blocxx
PosixRegEx.cpp
Go to the documentation of this file.
1 /*******************************************************************************
2 * Copyright (C) 2005 Novell, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * - Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 *
10 * - Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * - Neither the name of Vintela, Inc., Novell, Inc., nor the names of its
15 * contributors may be used to endorse or promote products derived from this
16 * software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL Vintela, Inc., Novell, Inc., OR THE
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************/
34 #include "blocxx/PosixRegEx.hpp"
35 #ifdef BLOCXX_HAVE_REGEX
36 #ifdef BLOCXX_HAVE_REGEX_H
37 
38 #include "blocxx/ExceptionIds.hpp"
39 #include "blocxx/Assertion.hpp"
40 #include "blocxx/Format.hpp"
41 
42 
43 namespace BLOCXX_NAMESPACE
44 {
45 
46 namespace
47 {
48 // the REG_NOERROR enum value from linux's regex.h is non-standard, so don't use it.
49 const int REG_NOERROR = 0;
50 }
51 
52 // -------------------------------------------------------------------
53 static String
54 substitute_caps(const PosixRegEx::MatchArray &sub,
55  const String &str, const String &rep)
56 {
57  static const char *cap_refs[] = {
58  NULL, "\\1", "\\2", "\\3", "\\4",
59  "\\5", "\\6", "\\7", "\\8", "\\9", NULL
60  };
61 
62  String res( rep);
63  size_t pos;
64 
65  for(size_t i=1; cap_refs[i] != NULL; i++)
66  {
67  String cap;
68 
69  if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
70  {
71  cap = str.substring(sub[i].rm_so, sub[i].rm_eo
72  - sub[i].rm_so);
73  }
74 
75  pos = res.indexOf(cap_refs[i]);
76  while( pos != String::npos)
77  {
78  size_t quotes = 0;
79  size_t at = pos;
80 
81  while( at > 0 && res.charAt(--at) == '\\')
82  quotes++;
83 
84  if( quotes % 2)
85  {
86  quotes = (quotes + 1) / 2;
87 
88  res = res.erase(pos - quotes, quotes);
89 
90  pos = res.indexOf(cap_refs[i],
91  pos + 2 - quotes);
92  }
93  else
94  {
95  quotes = quotes / 2;
96 
97  res = res.substring(0, pos - quotes) +
98  cap +
99  res.substring(pos + 2);
100 
101  pos = res.indexOf(cap_refs[i],
102  pos + cap.length() - quotes);
103  }
104  }
105  }
106  return res;
107 }
108 
109 
110 // -------------------------------------------------------------------
111 static inline String
112 getError(const regex_t *preg, const int code)
113 {
114  char err[256] = { '\0'};
115  ::regerror(code, preg, err, sizeof(err));
116  return String(err);
117 }
118 
119 
120 // -------------------------------------------------------------------
121 PosixRegEx::PosixRegEx()
122  : compiled(false)
123  , m_flags(0)
124  , m_ecode(REG_NOERROR)
125 {
126 }
127 
128 
129 // -------------------------------------------------------------------
130 PosixRegEx::PosixRegEx(const String &regex, int cflags)
131  : compiled(false)
132  , m_flags(0)
133  , m_ecode(REG_NOERROR)
134 {
135  if( !compile(regex, cflags))
136  {
137  BLOCXX_THROW_ERR(RegExCompileException,
138  errorString().c_str(), m_ecode);
139  }
140 }
141 
142 
143 // -------------------------------------------------------------------
144 PosixRegEx::PosixRegEx(const PosixRegEx &ref)
145  : compiled(false)
146  , m_flags(ref.m_flags)
147  , m_ecode(REG_NOERROR)
148  , m_rxstr(ref.m_rxstr)
149 {
150  if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
151  {
152  BLOCXX_THROW_ERR(RegExCompileException,
153  errorString().c_str(), m_ecode);
154  }
155 }
156 
157 
158 // -------------------------------------------------------------------
159 PosixRegEx::~PosixRegEx()
160 {
161  if( compiled)
162  {
163  regfree(&m_regex);
164  }
165 }
166 
167 
168 // -------------------------------------------------------------------
169 PosixRegEx &
170 PosixRegEx::operator = (const PosixRegEx &ref)
171 {
172  if( !ref.compiled)
173  {
174  m_ecode = REG_NOERROR;
175  m_error.erase();
176  m_flags = ref.m_flags;
177  m_rxstr = ref.m_rxstr;
178  if( compiled)
179  {
180  regfree(&m_regex);
181  compiled = false;
182  }
183  }
184  else if( !compile(ref.m_rxstr, ref.m_flags))
185  {
186  BLOCXX_THROW_ERR(RegExCompileException,
187  errorString().c_str(), m_ecode);
188  }
189  return *this;
190 }
191 
192 
193 // -------------------------------------------------------------------
194 bool
195 PosixRegEx::compile(const String &regex, int cflags)
196 {
197  if( compiled)
198  {
199  regfree(&m_regex);
200  compiled = false;
201  }
202 
203  m_rxstr = regex;
204  m_flags = cflags;
205  m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
206  if( m_ecode == REG_NOERROR)
207  {
208  compiled = true;
209  m_error.erase();
210  return true;
211  }
212  else
213  {
214  m_error = getError(&m_regex, m_ecode);
215  return false;
216  }
217 }
218 
219 
220 // -------------------------------------------------------------------
221 int
222 PosixRegEx::errorCode()
223 {
224  return m_ecode;
225 }
226 
227 
228 // -------------------------------------------------------------------
229 String
230 PosixRegEx::errorString() const
231 {
232  return m_error;
233 }
234 
235 
236 // -------------------------------------------------------------------
237 String
238 PosixRegEx::patternString() const
239 {
240  return m_rxstr;
241 }
242 
243 
244 // -------------------------------------------------------------------
245 int
246 PosixRegEx::compileFlags() const
247 {
248  return m_flags;
249 }
250 
251 
252 // -------------------------------------------------------------------
253 bool
254 PosixRegEx::isCompiled() const
255 {
256  return compiled;
257 }
258 
259 
260 // -------------------------------------------------------------------
261 bool
262 PosixRegEx::execute(MatchArray &sub, const String &str,
263  size_t index, size_t count, int eflags)
264 {
265  if( !compiled)
266  {
267  BLOCXX_THROW(RegExCompileException,
268  "Regular expression is not compiled");
269  }
270 
271  if( index > str.length())
272  {
273  BLOCXX_THROW(OutOfBoundsException,
274  Format("String index out of bounds ("
275  "length = %1, index = %2).",
276  str.length(), index
277  ).c_str());
278  }
279 
280  if( count == 0)
281  {
282  count = m_regex.re_nsub + 1;
283  }
284  AutoPtrVec<regmatch_t> rsub(new regmatch_t[count]);
285  rsub[0].rm_so = -1;
286  rsub[0].rm_eo = -1;
287 
288  sub.clear();
289  m_ecode = ::regexec(&m_regex, str.c_str() + index,
290  count, rsub.get(), eflags);
291  if( m_ecode == REG_NOERROR)
292  {
293  m_error.erase();
294  if( m_flags & REG_NOSUB)
295  {
296  return true;
297  }
298 
299  sub.resize(count);
300  for(size_t n = 0; n < count; n++)
301  {
302  if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
303  {
304  sub[n] = rsub[n];
305  }
306  else
307  {
308  rsub[n].rm_so += index;
309  rsub[n].rm_eo += index;
310  sub[n] = rsub[n];
311  }
312  }
313  return true;
314  }
315  else
316  {
317  m_error = getError(&m_regex, m_ecode);
318  return false;
319  }
320 }
321 
322 
323 // -------------------------------------------------------------------
325 PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
326 {
327  if( !compiled)
328  {
329  BLOCXX_THROW(RegExCompileException,
330  "Regular expression is not compiled");
331  }
332 
333  MatchArray rsub;
334  StringArray ssub;
335 
336  bool match = execute(rsub, str, index, count, eflags);
337  if( match)
338  {
339  if( rsub.empty())
340  {
341  BLOCXX_THROW(RegExCompileException,
342  "Non-capturing regular expression");
343  }
344 
345  MatchArray::const_iterator i=rsub.begin();
346  for( ; i != rsub.end(); ++i)
347  {
348  if( i->rm_so >= 0 && i->rm_eo >= 0)
349  {
350  ssub.push_back(str.substring(i->rm_so,
351  i->rm_eo - i->rm_so));
352  }
353  else
354  {
355  ssub.push_back(String(""));
356  }
357  }
358  }
359  else if(m_ecode != REG_NOMATCH)
360  {
361  BLOCXX_THROW_ERR(RegExExecuteException,
362  errorString().c_str(), m_ecode);
363  }
364  return ssub;
365 }
366 
367 
368 // -------------------------------------------------------------------
369 blocxx::String
370 PosixRegEx::replace(const String &str, const String &rep,
371  bool global, int eflags)
372 {
373  if( !compiled)
374  {
375  BLOCXX_THROW(RegExCompileException,
376  "Regular expression is not compiled");
377  }
378 
379  MatchArray rsub;
380  bool match;
381  size_t off = 0;
382  String out = str;
383 
384  do
385  {
386  match = execute(rsub, out, off, 0, eflags);
387  if( match)
388  {
389  if( rsub.empty() ||
390  rsub[0].rm_so < 0 ||
391  rsub[0].rm_eo < 0)
392  {
393  // only if empty (missused as guard).
394  BLOCXX_THROW(RegExCompileException,
395  "Non-capturing regular expression");
396  }
397 
398  String res = substitute_caps(rsub, out, rep);
399 
400  out = out.substring(0, rsub[0].rm_so) +
401  res + out.substring(rsub[0].rm_eo);
402 
403  off = rsub[0].rm_so + res.length();
404  }
405  else if(m_ecode == REG_NOMATCH)
406  {
407  m_ecode = REG_NOERROR;
408  m_error.erase();
409  }
410  else
411  {
412  BLOCXX_THROW_ERR(RegExExecuteException,
413  errorString().c_str(), m_ecode);
414  }
415  } while(global && match && out.length() > off);
416 
417  return out;
418 }
419 
420 // -------------------------------------------------------------------
422 PosixRegEx::split(const String &str, bool empty, int eflags)
423 {
424  if( !compiled)
425  {
426  BLOCXX_THROW(RegExCompileException,
427  "Regular expression is not compiled");
428  }
429 
430  MatchArray rsub;
431  StringArray ssub;
432  bool match;
433  size_t off = 0;
434  size_t len = str.length();
435 
436  do
437  {
438  match = execute(rsub, str, off, 1, eflags);
439  if( match)
440  {
441  if( rsub.empty() ||
442  rsub[0].rm_so < 0 ||
443  rsub[0].rm_eo < 0)
444  {
445  BLOCXX_THROW(RegExCompileException,
446  "Non-capturing regular expression");
447  }
448 
449  if( empty || ((size_t)rsub[0].rm_so > off))
450  {
451  ssub.push_back(str.substring(off,
452  rsub[0].rm_so - off));
453  }
454  off = rsub[0].rm_eo;
455  }
456  else if(m_ecode == REG_NOMATCH)
457  {
458  String tmp = str.substring(off);
459  if( empty || !tmp.empty())
460  {
461  ssub.push_back(tmp);
462  }
463  m_ecode = REG_NOERROR;
464  m_error.erase();
465  }
466  else
467  {
468  BLOCXX_THROW_ERR(RegExExecuteException,
469  errorString().c_str(), m_ecode);
470  }
471  } while(match && len > off);
472 
473  return ssub;
474 }
475 
476 
477 // -------------------------------------------------------------------
479 PosixRegEx::grep(const StringArray &src, int eflags)
480 {
481  if( !compiled)
482  {
483  BLOCXX_THROW(RegExCompileException,
484  "Regular expression is not compiled");
485  }
486 
487  m_ecode = REG_NOERROR;
488  m_error.erase();
489 
490  StringArray out;
491  if( !src.empty())
492  {
493  StringArray::const_iterator i=src.begin();
494  for( ; i != src.end(); ++i)
495  {
496  int ret = ::regexec(&m_regex, i->c_str(),
497  0, NULL, eflags);
498  if( ret == REG_NOERROR)
499  {
500  out.push_back(*i);
501  }
502  else if(ret != REG_NOMATCH)
503  {
504  m_ecode = ret;
505  m_error = getError(&m_regex, m_ecode);
506  BLOCXX_THROW_ERR(RegExExecuteException,
507  errorString().c_str(), m_ecode);
508  }
509  }
510  }
511 
512  return out;
513 }
514 
515 
516 // -------------------------------------------------------------------
517 bool
518 PosixRegEx::match(const String &str, size_t index, int eflags) const
519 {
520  if( !compiled)
521  {
522  BLOCXX_THROW(RegExCompileException,
523  "Regular expression is not compiled");
524  }
525 
526  if( index > str.length())
527  {
528  BLOCXX_THROW(OutOfBoundsException,
529  Format("String index out of bounds ("
530  "length = %1, index = %2).",
531  str.length(), index
532  ).c_str());
533  }
534 
535  m_ecode = ::regexec(&m_regex, str.c_str() + index,
536  0, NULL, eflags);
537 
538  if( m_ecode == REG_NOERROR)
539  {
540  m_error.erase();
541  return true;
542  }
543  else if(m_ecode == REG_NOMATCH)
544  {
545  m_error = getError(&m_regex, m_ecode);
546  return false;
547  }
548  else
549  {
550  m_error = getError(&m_regex, m_ecode);
551  BLOCXX_THROW_ERR(RegExExecuteException,
552  errorString().c_str(), m_ecode);
553  }
554 }
555 
556 
557 // -------------------------------------------------------------------
558 } // namespace BLOCXX_NAMESPACE
559 
560 #endif // BLOCXX_HAVE_REGEX_H
561 #endif // BLOCXX_HAVE_REGEX
562 
563 /* vim: set ts=8 sts=8 sw=8 ai noet: */
564