00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef Tanl_Text_RegExp_h
00025 #define Tanl_Text_RegExp_h
00026
00027
00028 #include "text/text.h"
00029 #include "text/pcre/pcre.h"
00030
00031
00032 #include <string>
00033 #include <vector>
00034 #include <stdexcept>
00035
00036 namespace Tanl {
00037 namespace Text {
00041 namespace RegExp {
00042
00043 class ParseException : public std::runtime_error {
00044 public:
00045 ParseException(std::string const& msg) : std::runtime_error(msg) {}
00046 };
00047
00048 enum CompileFlags
00049 {
00050 IgnoreCase = PCRE_CASELESS,
00051 NoNewLine = PCRE_MULTILINE,
00052 DotAll = PCRE_DOTALL,
00053 Extended = PCRE_EXTENDED,
00054 Anchored = PCRE_ANCHORED,
00055 DollarEndOnly = PCRE_DOLLAR_ENDONLY,
00056 Extra = PCRE_EXTRA,
00057 UnGreedy = PCRE_UNGREEDY,
00058 Utf8 = PCRE_UTF8
00059 };
00060
00061 enum EvaluateFlags {
00062 NotBol = PCRE_NOTBOL,
00063 NotEmpty = PCRE_NOTEMPTY,
00064 NotEol = PCRE_NOTEOL
00065
00066 };
00067
00071 class MatchGroups : std::vector<int>
00072 {
00073 public:
00074 MatchGroups(int size) :
00075 std::vector<int>(3 * size, -1)
00076 { }
00077
00081 size_t size() { return std::vector<int>::size() / 3; }
00082
00087 std::pair<int, int>& operator [](int i) {
00088 return *(std::pair<int, int>*)&std::vector<int>::operator[](2 * i); }
00089 };
00090
00114 class Pattern
00115 {
00116 private:
00117 int _errorCode;
00118 pcre* _pcre;
00119 pcre_extra* _pcre_extra;
00120
00124 int subpatterns;
00125
00126 public:
00127
00128 Pattern() { }
00129
00138 Pattern(std::string& expression, int cflags = 0);
00139
00148 Pattern(char const* expression, int cflags = 0);
00149
00150 ~Pattern();
00151
00155 Pattern& operator =(Pattern const& other) {
00156 _errorCode = other._errorCode;
00157 _pcre = other._pcre;
00158 _pcre_extra = other._pcre_extra;
00159 subpatterns = other.subpatterns;
00160 ((Pattern*)&other)->_pcre = 0;
00161 ((Pattern*)&other)->_pcre_extra = 0;
00162 return *this;
00163 }
00164
00171 bool test(std::string const& str, int eflags = 0);
00172
00181 bool test(char const* str, size_t len = 0, int eflags = 0);
00182
00189 int matchSize(std::string const& text, int eflags = 0);
00190 #ifdef UNICODE
00191
00202 #else
00203
00213 #endif
00214 int match(const char* start, const char* end,
00215 MatchGroups& pos, int eflags = 0);
00216 #ifdef UNICODE
00217
00227 #else
00228
00237 #endif
00238 int match(std::string const& text, MatchGroups& pos, int eflags = 0);
00246 std::vector<std::string> match(std::string const& str, int eflags = 0);
00247
00253 std::string replace(std::string& text, std::string& with, bool replaceAll = false);
00254
00258 static std::string escape(std::string& str);
00259
00264 static const unsigned char* setLocale(char const* locale);
00265
00267 static const unsigned char* CharTables;
00268 };
00269
00270 }
00271 }
00272 }
00273
00274 #endif // Tanl_Text_RegExp_h