メインページ   モジュール   名前空間一覧   クラス階層   アルファベット順一覧   構成   ファイル一覧   構成メンバ   ファイルメンバ   関連ページ    

TRegularExpression.cc

解説を見る。
00001 // ============================================================================
00002 //  $Id: TRegularExpression.cc,v 1.4 2004/03/07 10:30:34 goiwai Exp $
00003 //  $Name: CLDAQ-1-14-03 $
00004 //  $Log: TRegularExpression.cc,v $
00005 //  Revision 1.4  2004/03/07 10:30:34  goiwai
00006 //  ROOTに組みこむためのおまじないマクロを埋めこみました。
00007 //  全てにおいて完全に動作するわけではありません。
00008 //
00009 //  Revision 1.3  2004/02/06 07:31:00  goiwai
00010 //  SubMatchした文字列取得時に発生する不具合を修正しました.
00011 //
00012 //  Revision 1.2  2004/01/29 04:16:37  goiwai
00013 //  GetSubMatches->GetSubMatch にして若干仕様変更
00014 //  Splitをオーバーロード
00015 //
00016 //  Revision 1.1  2003/10/06 16:33:15  goiwai
00017 //  UNIXの正規表現を扱うための部品です.速度よりも可読性を重視したので遅い
00018 //  です.
00019 //
00020 // ============================================================================
00021 #include "TRegularExpression.hh"
00022 
00023 TRegularExpression::TRegularExpression( const Tstring& pattern, Tint option )
00024   : thePattern( pattern ),
00025     theOption( option ),
00026     theNumberOfSubMatches( -1 ),
00027     theCompiledPattern( 0 ),
00028     theMatch( 0 ),
00029     theSubMatch( 0 )
00030 {
00031   Compile();
00032 }
00033 
00034 TRegularExpression::TRegularExpression( Tint option, const Tstring& pattern )
00035   : thePattern( pattern ),
00036     theOption( option ),
00037     theNumberOfSubMatches( -1 ),
00038     theCompiledPattern( 0 ),
00039     theMatch( 0 ),
00040     theSubMatch( 0 )
00041 {
00042   Compile();
00043 }
00044 
00045 TRegularExpression::TRegularExpression( const TRegularExpression& right )
00046   : thePattern( right.thePattern ),
00047     theOption( right.theOption ),
00048     theNumberOfSubMatches( -1 ),
00049     theCompiledPattern( 0 ),
00050     theMatch( 0 ),
00051     theSubMatch( 0 )
00052 {
00053   Compile();
00054 }
00055 
00056 TRegularExpression::~TRegularExpression()
00057 {
00058   free();
00059 }
00060 
00061 const TRegularExpression& TRegularExpression::operator=( const TRegularExpression& right )
00062 {
00063   thePattern = right.thePattern;
00064   theOption = right.theOption;
00065   free();
00066   theNumberOfSubMatches = -1;
00067   theCompiledPattern = 0;
00068   theMatch = 0;
00069   theSubMatch = 0;
00070   Compile();
00071   return *this;
00072 }
00073 
00074 const TRegularExpression& TRegularExpression::operator=( const Tstring& right )
00075 {
00076   *this = TRegularExpression( right );
00077   return *this;
00078 }
00079 
00080 Tbool TRegularExpression::operator==( const Tstring& right )
00081 {
00082   return IsMatch( right );
00083 }
00084 
00085 Tbool TRegularExpression::operator!=( const Tstring& right )
00086 {
00087   if ( IsMatch( right ) ) {
00088     return Tfalse;
00089   } else {
00090     return Ttrue;
00091   }
00092 }
00093 
00094 Tvoid TRegularExpression::Compile()
00095 {
00096   free();
00097 
00098   theCompiledPattern = new Tregex_t();
00099   Tint retval = regcomp( theCompiledPattern, thePattern.c_str(), theOption );
00100   if ( retval != 0 ) {
00101     char m[ 128 ];
00102     regerror( retval, theCompiledPattern, m, Tsizeof(m) );
00103     regfree( theCompiledPattern );
00104     delete theCompiledPattern;
00105     theCompiledPattern = 0;
00106     theNumberOfSubMatches = -1;
00107     return;
00108   }
00109 
00110 
00111 
00112   theNumberOfSubMatches = theCompiledPattern -> re_nsub;
00113   Tregmatch_t* buf = new Tregmatch_t[ theNumberOfSubMatches + 1 ];
00114   theMatch = buf;
00115 
00116   if ( theNumberOfSubMatches > 0 ) {
00117     buf ++;
00118     theSubMatch = buf;
00119   }
00120 
00121   return;
00122 }
00123 
00124 Tint TRegularExpression::Index( const Tstring& source, Tint pos )
00125 {
00126   Tint result = -1;
00127   if ( IsMatch( source, pos ) ) {
00128     Tint so = theMatch -> rm_so;
00129     result = so + pos;
00130   }
00131   return result;
00132 }
00133 
00134 TintList TRegularExpression::Indexes( const Tstring& source, Tint pos )
00135 {
00136   TintList result;
00137   while ( IsMatch( source, pos ) ) {
00138     Tint so = theMatch -> rm_so;
00139     Tint eo = theMatch -> rm_eo;
00140     Tint pbuf = so + pos;
00141     result.push_back( pbuf );
00142     pos = pos + eo;
00143   }
00144   return result;
00145 }
00146 
00147 Tint TRegularExpression::Size( const Tstring& source, Tint pos )
00148 {
00149   Tint result = 0;
00150   if ( IsMatch( source, pos ) ) {
00151     Tint so = theMatch -> rm_so;
00152     Tint eo = theMatch -> rm_eo;
00153     result = eo - so;
00154   }
00155   return result;
00156 }
00157 
00158 TintList TRegularExpression::Sizes( const Tstring& source, Tint pos )
00159 {
00160   TintList result;
00161   while ( IsMatch( source, pos ) ) {
00162     Tint so = theMatch -> rm_so;
00163     Tint eo = theMatch -> rm_eo;
00164     Tint sbuf = eo - so;
00165     result.push_back( sbuf );
00166     pos = pos + eo;
00167   }
00168   return result;
00169 }
00170 
00171 Tstring TRegularExpression::MatchString( const Tstring& source, Tint pos )
00172 {
00173   Tstring result = "";
00174   if ( IsMatch( source, pos ) ) {
00175     Tint so = theMatch -> rm_so;
00176     Tint eo = theMatch -> rm_eo;
00177     Tint index = so + pos;
00178     Tint size = eo - so;
00179     result = source.substr( index, size );
00180   }
00181   return result;
00182 }
00183 
00184 TstringList TRegularExpression::MatchStrings( const Tstring& source, Tint pos )
00185 {
00186   TstringList result;
00187   while ( IsMatch( source, pos ) ) {
00188     Tint so = theMatch -> rm_so;
00189     Tint eo = theMatch -> rm_eo;
00190     Tint index = so + pos;
00191     Tint size = eo - so;
00192     Tstring strbuf = source.substr( index, size );
00193     result.push_back( strbuf );
00194     pos = pos + eo;
00195   }
00196   return result;
00197 }
00198 
00199 Tbool TRegularExpression::IsMatch( const Tstring& source, Tint pos )
00200 {
00201   if ( execute( source, pos ) == 0 ) {
00202     return Ttrue;
00203   } else {
00204     return Tfalse;
00205   }
00206 }
00207 
00208 Tint TRegularExpression::GetNumberOfMatches( const Tstring& source, Tint pos )
00209 {
00210   Tint nmatch = 0;
00211   while ( IsMatch( source, pos ) ) {
00212     Tint eo = theMatch -> rm_eo;
00213     pos = pos + eo;
00214     nmatch ++;
00215   }
00216   return nmatch;
00217 }
00218 
00219 Tstring TRegularExpression::Substitute( const Tstring& source, const Tstring& substr, Tint pos )
00220 {
00221   Tstring result = source;
00222   if ( IsMatch( source, pos ) ) {
00223     Tint so = theMatch -> rm_so;
00224     Tint eo = theMatch -> rm_eo;
00225     Tint index = so + pos;
00226     Tint size = eo - so;
00227     result.replace( index, size, substr );
00228   }
00229   return result;
00230 }
00231 
00232 Tstring TRegularExpression::SubstituteAll( const Tstring& source, const Tstring& substr, Tint pos )
00233 {
00234   Tstring result = source;
00235   while ( IsMatch( source, pos ) ) {
00236     Tint so = theMatch -> rm_so;
00237     Tint eo = theMatch -> rm_eo;
00238     Tint index = so + pos;
00239     Tint size = eo - so;
00240     result.replace( index, size, substr );
00241     pos = pos + eo;
00242   }
00243   return result;
00244 }
00245 
00246 TstringList TRegularExpression::Split( const Tstring& source, Tint pos )
00247 {
00248   TstringList result;
00249 
00250   Tint lastend = -1;
00251   while ( IsMatch( source, pos ) ) {
00252     Tint so = theMatch -> rm_so;
00253     Tint eo = theMatch -> rm_eo;
00254     Tsize_t gbegin = so + pos;
00255     Tsize_t gend = eo + pos - 1;
00256     if ( gbegin != 0 ) {
00257       Tint cpos = lastend + 1;
00258       Tint csize = gbegin - cpos;
00259       Tstring strbuf = source.substr( cpos, csize );
00260       if ( ! strbuf.empty() ) {
00261         result.push_back( strbuf );
00262       }
00263     }
00264     lastend = gend;
00265     pos = pos + eo;
00266   }
00267 
00268   Tint srcsize = (Tint)source.size();
00269   if ( lastend + 1 == srcsize ) {
00270 #ifdef __CLDAQ_DEBUG
00271     CLDAQ_DEBUG("last string is matched separater");
00272 #endif
00273   } else if ( lastend < srcsize ) {
00274 #ifdef __CLDAQ_DEBUG
00275     CLDAQ_DEBUG("some string is exist after last separator");
00276 #endif
00277     Tint cpos = lastend + 1;
00278     Tint csize = srcsize - cpos;
00279     Tstring strbuf = source.substr( cpos, csize );
00280     result.push_back( strbuf );
00281   } else if ( lastend > srcsize ) {
00282     CLDAQ_WARN("last separator is exist on out of source");
00283   } else {
00284     CLDAQ_WARN("unexpected operation");
00285   }
00286 
00287   return result;
00288 }
00289 
00290 TstringList TRegularExpression::Split( const TRegularExpression& regex, const Tstring& source, Tint pos )
00291 {
00292   TRegularExpression r( regex );
00293   return r.Split( source, pos );
00294 }
00295 
00296 TstringList TRegularExpression::Split( const Tstring& pattern, const Tstring& source, Tint pos )
00297 {
00298   TRegularExpression regex( pattern );
00299   return Split( regex, source, pos );
00300 }
00301 
00302 Tstring TRegularExpression::GetSubMatch( Tint index, const Tstring& source, Tint pos )
00303 {
00304   Tstring result;
00305   if ( index < theNumberOfSubMatches && IsMatch( source, pos ) ) {
00306     Tint so = theSubMatch[ index ].rm_so;
00307     Tint eo = theSubMatch[ index ].rm_eo;
00308     Tint subindex = so + pos;
00309     Tint subsize = eo - so;
00310 
00311     Tint sz = (Tint)source.size();
00312     if ( subindex < 0 || subindex >= sz || subsize <= 0 || subsize > sz-subindex ) {
00313       return result;
00314     }
00315 
00316     result = source.substr( subindex, subsize );
00317   }
00318   return result;
00319 }
00320 
00321 TstringList TRegularExpression::GetSubMatch( const Tstring& source, Tint pos )
00322 {
00323   TstringList result;
00324   if ( theNumberOfSubMatches > 0 && IsMatch( source, pos ) ) {
00325     for ( Tint i = 0; i < theNumberOfSubMatches; i ++ ) {
00326       Tint so = theSubMatch[ i ].rm_so;
00327       Tint eo = theSubMatch[ i ].rm_eo;
00328       Tint index = so + pos;
00329       Tint size = eo - so;
00330 
00331       Tint sz = (Tint)source.size();
00332       if ( index < 0 || index >= sz || size <= 0 || size > sz-index  ) {
00333         continue;
00334       }
00335 
00336       Tstring strbuf = source.substr( index, size );
00337       result.push_back( strbuf );
00338     }
00339   }
00340   return result;  
00341 }
00342 
00343 Tvoid TRegularExpression::free()
00344 {
00345   if ( theCompiledPattern ) {
00346     regfree( theCompiledPattern );
00347     delete theCompiledPattern;
00348     theCompiledPattern = 0;
00349     theNumberOfSubMatches = -1;
00350   }
00351 
00352   if ( theMatch ) {
00353     delete [] theMatch;
00354     theMatch = 0;
00355     theSubMatch = 0;
00356   }
00357 
00358   // if ( theSubMatch ) {
00359   //   ここを解放しようとすると落ちるなあ
00360   //   delete theSubMatch;
00361   //   theSubMatch = 0;
00362   //   theNumberOfSubMatches = -1;
00363   // }
00364 
00365   return;
00366 }
00367 
00368 Tint TRegularExpression::execute( const Tstring& source, Tint pos )
00369 {
00370   if ( source.empty() || (Tsize_t)pos >= source.size() || pos < 0 ) {
00371 #ifdef __CLDAQ_DEBUG
00372     CLDAQ_DEBUG("source string is empty");
00373     CLDAQ_DEBUG(source.c_str());
00374 #endif
00375     return -1;
00376   } else if ( !theCompiledPattern || !theMatch ) {
00377     Tstring m = "not compiled pattern \"" + thePattern + "\"";
00378     CLDAQ_WARN( m.c_str() );
00379     return -1;
00380   } else {
00381     Tstring strbuf = source.substr( (Tsize_t)pos, (Tsize_t)(source.size()-pos) );
00382     const Tchar* cc = strbuf.c_str();
00383     Tint retval = 
00384       regexec( theCompiledPattern, cc, theNumberOfSubMatches + 1, theMatch, 0 );
00385     return retval;
00386   }
00387 }
00388 
00389 #ifdef __CLDAQ_ROOT_DLL
00390     ClassImp(TRegularExpression)
00391 #endif


CLDAQ - a Class Library for DataAcQuisition (Version 1.14.3)
Go IWAI -- goiwai at users.sourceforge.jp