00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "TRegularExpression.hh"
00022
00023 TRegularExpression::TRegularExpression( const Tstring& pattern, Tint option )
00024 : thePattern( pattern ),
00025 theOption( option ),
00026 theNumberOfSubMatches( -1 ),
00027 theCompiledPattern( 0 ),
00028 theMatch( 0 ),
00029 theSubMatch( 0 )
00030 {
00031 Compile();
00032 }
00033
00034 TRegularExpression::TRegularExpression( Tint option, const Tstring& pattern )
00035 : thePattern( pattern ),
00036 theOption( option ),
00037 theNumberOfSubMatches( -1 ),
00038 theCompiledPattern( 0 ),
00039 theMatch( 0 ),
00040 theSubMatch( 0 )
00041 {
00042 Compile();
00043 }
00044
00045 TRegularExpression::TRegularExpression( const TRegularExpression& right )
00046 : thePattern( right.thePattern ),
00047 theOption( right.theOption ),
00048 theNumberOfSubMatches( -1 ),
00049 theCompiledPattern( 0 ),
00050 theMatch( 0 ),
00051 theSubMatch( 0 )
00052 {
00053 Compile();
00054 }
00055
00056 TRegularExpression::~TRegularExpression()
00057 {
00058 free();
00059 }
00060
00061 const TRegularExpression& TRegularExpression::operator=( const TRegularExpression& right )
00062 {
00063 thePattern = right.thePattern;
00064 theOption = right.theOption;
00065 free();
00066 theNumberOfSubMatches = -1;
00067 theCompiledPattern = 0;
00068 theMatch = 0;
00069 theSubMatch = 0;
00070 Compile();
00071 return *this;
00072 }
00073
00074 const TRegularExpression& TRegularExpression::operator=( const Tstring& right )
00075 {
00076 *this = TRegularExpression( right );
00077 return *this;
00078 }
00079
00080 Tbool TRegularExpression::operator==( const Tstring& right )
00081 {
00082 return IsMatch( right );
00083 }
00084
00085 Tbool TRegularExpression::operator!=( const Tstring& right )
00086 {
00087 if ( IsMatch( right ) ) {
00088 return Tfalse;
00089 } else {
00090 return Ttrue;
00091 }
00092 }
00093
00094 Tvoid TRegularExpression::Compile()
00095 {
00096 free();
00097
00098 theCompiledPattern = new Tregex_t();
00099 Tint retval = regcomp( theCompiledPattern, thePattern.c_str(), theOption );
00100 if ( retval != 0 ) {
00101 char m[ 128 ];
00102 regerror( retval, theCompiledPattern, m, Tsizeof(m) );
00103 regfree( theCompiledPattern );
00104 delete theCompiledPattern;
00105 theCompiledPattern = 0;
00106 theNumberOfSubMatches = -1;
00107 return;
00108 }
00109
00110
00111
00112 theNumberOfSubMatches = theCompiledPattern -> re_nsub;
00113 Tregmatch_t* buf = new Tregmatch_t[ theNumberOfSubMatches + 1 ];
00114 theMatch = buf;
00115
00116 if ( theNumberOfSubMatches > 0 ) {
00117 buf ++;
00118 theSubMatch = buf;
00119 }
00120
00121 return;
00122 }
00123
00124 Tint TRegularExpression::Index( const Tstring& source, Tint pos )
00125 {
00126 Tint result = -1;
00127 if ( IsMatch( source, pos ) ) {
00128 Tint so = theMatch -> rm_so;
00129 result = so + pos;
00130 }
00131 return result;
00132 }
00133
00134 TintList TRegularExpression::Indexes( const Tstring& source, Tint pos )
00135 {
00136 TintList result;
00137 while ( IsMatch( source, pos ) ) {
00138 Tint so = theMatch -> rm_so;
00139 Tint eo = theMatch -> rm_eo;
00140 Tint pbuf = so + pos;
00141 result.push_back( pbuf );
00142 pos = pos + eo;
00143 }
00144 return result;
00145 }
00146
00147 Tint TRegularExpression::Size( const Tstring& source, Tint pos )
00148 {
00149 Tint result = 0;
00150 if ( IsMatch( source, pos ) ) {
00151 Tint so = theMatch -> rm_so;
00152 Tint eo = theMatch -> rm_eo;
00153 result = eo - so;
00154 }
00155 return result;
00156 }
00157
00158 TintList TRegularExpression::Sizes( const Tstring& source, Tint pos )
00159 {
00160 TintList result;
00161 while ( IsMatch( source, pos ) ) {
00162 Tint so = theMatch -> rm_so;
00163 Tint eo = theMatch -> rm_eo;
00164 Tint sbuf = eo - so;
00165 result.push_back( sbuf );
00166 pos = pos + eo;
00167 }
00168 return result;
00169 }
00170
00171 Tstring TRegularExpression::MatchString( const Tstring& source, Tint pos )
00172 {
00173 Tstring result = "";
00174 if ( IsMatch( source, pos ) ) {
00175 Tint so = theMatch -> rm_so;
00176 Tint eo = theMatch -> rm_eo;
00177 Tint index = so + pos;
00178 Tint size = eo - so;
00179 result = source.substr( index, size );
00180 }
00181 return result;
00182 }
00183
00184 TstringList TRegularExpression::MatchStrings( const Tstring& source, Tint pos )
00185 {
00186 TstringList result;
00187 while ( IsMatch( source, pos ) ) {
00188 Tint so = theMatch -> rm_so;
00189 Tint eo = theMatch -> rm_eo;
00190 Tint index = so + pos;
00191 Tint size = eo - so;
00192 Tstring strbuf = source.substr( index, size );
00193 result.push_back( strbuf );
00194 pos = pos + eo;
00195 }
00196 return result;
00197 }
00198
00199 Tbool TRegularExpression::IsMatch( const Tstring& source, Tint pos )
00200 {
00201 if ( execute( source, pos ) == 0 ) {
00202 return Ttrue;
00203 } else {
00204 return Tfalse;
00205 }
00206 }
00207
00208 Tint TRegularExpression::GetNumberOfMatches( const Tstring& source, Tint pos )
00209 {
00210 Tint nmatch = 0;
00211 while ( IsMatch( source, pos ) ) {
00212 Tint eo = theMatch -> rm_eo;
00213 pos = pos + eo;
00214 nmatch ++;
00215 }
00216 return nmatch;
00217 }
00218
00219 Tstring TRegularExpression::Substitute( const Tstring& source, const Tstring& substr, Tint pos )
00220 {
00221 Tstring result = source;
00222 if ( IsMatch( source, pos ) ) {
00223 Tint so = theMatch -> rm_so;
00224 Tint eo = theMatch -> rm_eo;
00225 Tint index = so + pos;
00226 Tint size = eo - so;
00227 result.replace( index, size, substr );
00228 }
00229 return result;
00230 }
00231
00232 Tstring TRegularExpression::SubstituteAll( const Tstring& source, const Tstring& substr, Tint pos )
00233 {
00234 Tstring result = source;
00235 while ( IsMatch( source, pos ) ) {
00236 Tint so = theMatch -> rm_so;
00237 Tint eo = theMatch -> rm_eo;
00238 Tint index = so + pos;
00239 Tint size = eo - so;
00240 result.replace( index, size, substr );
00241 pos = pos + eo;
00242 }
00243 return result;
00244 }
00245
00246 TstringList TRegularExpression::Split( const Tstring& source, Tint pos )
00247 {
00248 TstringList result;
00249
00250 Tint lastend = -1;
00251 while ( IsMatch( source, pos ) ) {
00252 Tint so = theMatch -> rm_so;
00253 Tint eo = theMatch -> rm_eo;
00254 Tsize_t gbegin = so + pos;
00255 Tsize_t gend = eo + pos - 1;
00256 if ( gbegin != 0 ) {
00257 Tint cpos = lastend + 1;
00258 Tint csize = gbegin - cpos;
00259 Tstring strbuf = source.substr( cpos, csize );
00260 if ( ! strbuf.empty() ) {
00261 result.push_back( strbuf );
00262 }
00263 }
00264 lastend = gend;
00265 pos = pos + eo;
00266 }
00267
00268 Tint srcsize = (Tint)source.size();
00269 if ( lastend + 1 == srcsize ) {
00270 #ifdef __CLDAQ_DEBUG
00271 CLDAQ_DEBUG("last string is matched separater");
00272 #endif
00273 } else if ( lastend < srcsize ) {
00274 #ifdef __CLDAQ_DEBUG
00275 CLDAQ_DEBUG("some string is exist after last separator");
00276 #endif
00277 Tint cpos = lastend + 1;
00278 Tint csize = srcsize - cpos;
00279 Tstring strbuf = source.substr( cpos, csize );
00280 result.push_back( strbuf );
00281 } else if ( lastend > srcsize ) {
00282 CLDAQ_WARN("last separator is exist on out of source");
00283 } else {
00284 CLDAQ_WARN("unexpected operation");
00285 }
00286
00287 return result;
00288 }
00289
00290 TstringList TRegularExpression::Split( const TRegularExpression& regex, const Tstring& source, Tint pos )
00291 {
00292 TRegularExpression r( regex );
00293 return r.Split( source, pos );
00294 }
00295
00296 TstringList TRegularExpression::Split( const Tstring& pattern, const Tstring& source, Tint pos )
00297 {
00298 TRegularExpression regex( pattern );
00299 return Split( regex, source, pos );
00300 }
00301
00302 Tstring TRegularExpression::GetSubMatch( Tint index, const Tstring& source, Tint pos )
00303 {
00304 Tstring result;
00305 if ( index < theNumberOfSubMatches && IsMatch( source, pos ) ) {
00306 Tint so = theSubMatch[ index ].rm_so;
00307 Tint eo = theSubMatch[ index ].rm_eo;
00308 Tint subindex = so + pos;
00309 Tint subsize = eo - so;
00310
00311 Tint sz = (Tint)source.size();
00312 if ( subindex < 0 || subindex >= sz || subsize <= 0 || subsize > sz-subindex ) {
00313 return result;
00314 }
00315
00316 result = source.substr( subindex, subsize );
00317 }
00318 return result;
00319 }
00320
00321 TstringList TRegularExpression::GetSubMatch( const Tstring& source, Tint pos )
00322 {
00323 TstringList result;
00324 if ( theNumberOfSubMatches > 0 && IsMatch( source, pos ) ) {
00325 for ( Tint i = 0; i < theNumberOfSubMatches; i ++ ) {
00326 Tint so = theSubMatch[ i ].rm_so;
00327 Tint eo = theSubMatch[ i ].rm_eo;
00328 Tint index = so + pos;
00329 Tint size = eo - so;
00330
00331 Tint sz = (Tint)source.size();
00332 if ( index < 0 || index >= sz || size <= 0 || size > sz-index ) {
00333 continue;
00334 }
00335
00336 Tstring strbuf = source.substr( index, size );
00337 result.push_back( strbuf );
00338 }
00339 }
00340 return result;
00341 }
00342
00343 Tvoid TRegularExpression::free()
00344 {
00345 if ( theCompiledPattern ) {
00346 regfree( theCompiledPattern );
00347 delete theCompiledPattern;
00348 theCompiledPattern = 0;
00349 theNumberOfSubMatches = -1;
00350 }
00351
00352 if ( theMatch ) {
00353 delete [] theMatch;
00354 theMatch = 0;
00355 theSubMatch = 0;
00356 }
00357
00358
00359
00360
00361
00362
00363
00364
00365 return;
00366 }
00367
00368 Tint TRegularExpression::execute( const Tstring& source, Tint pos )
00369 {
00370 if ( source.empty() || (Tsize_t)pos >= source.size() || pos < 0 ) {
00371 #ifdef __CLDAQ_DEBUG
00372 CLDAQ_DEBUG("source string is empty");
00373 CLDAQ_DEBUG(source.c_str());
00374 #endif
00375 return -1;
00376 } else if ( !theCompiledPattern || !theMatch ) {
00377 Tstring m = "not compiled pattern \"" + thePattern + "\"";
00378 CLDAQ_WARN( m.c_str() );
00379 return -1;
00380 } else {
00381 Tstring strbuf = source.substr( (Tsize_t)pos, (Tsize_t)(source.size()-pos) );
00382 const Tchar* cc = strbuf.c_str();
00383 Tint retval =
00384 regexec( theCompiledPattern, cc, theNumberOfSubMatches + 1, theMatch, 0 );
00385 return retval;
00386 }
00387 }
00388
00389 #ifdef __CLDAQ_ROOT_DLL
00390 ClassImp(TRegularExpression)
00391 #endif