Main Page   Class Hierarchy   File List  

ocString.h

00001 #ifndef OC_STRING_H
00002 #define OC_STRING_H
00003 
00004 #include <string>
00005 #include <algorithm>
00006 #include <sstream>
00007 #include <regex.h>
00008 
00009 using namespace std;
00010 /*
00011  External helper type conversion function template
00012 */
00013 template<class T> string & ocAppend( string & inStr, T inVar )
00014 {
00015 
00016   stringstream s;
00017   s << inVar;
00018   inStr += s.str();  
00019   return inStr;
00020 }
00021 
00022 /*
00023   ocString
00024   new open string class
00025   ---------------------
00026   The beginning of a replacement for my legacy aString class
00027   Uses std::string functionality.
00028 
00029 */
00030 class ocString: public string
00031 {
00032 private:
00033   string retVal;
00034   string::size_type idx;
00035   bool eop;
00036   int  regex_status;
00037 public:
00038 
00039   ocString():string(),retVal(""),idx(0),eop(false),regex_status(0){;}
00040   ocString( const char * in ):string(in),retVal(""),idx(0),eop(false),regex_status(0){;}
00041   ocString( const string & in ):string(in),retVal(""),idx(0),eop(false),regex_status(0){;}
00042   
00043   // string overload methods    
00044   inline ocString & operator = ( const string & in ){ return operator=(in.c_str());}
00045   inline string & parse( const string & pattern ){ return parse(pattern.c_str());}
00046   inline ocString replace( const string & pattern, const string & replacement ){ return replace(pattern.c_str(),replacement.c_str()); }
00047   inline ocString replaceAll( const string & pattern, const string & replacement ){ return replaceAll(pattern.c_str(),replacement.c_str()); }
00048   inline ocString remove( const string & pattern ){ return remove( pattern.c_str()); }
00049     
00050   ocString & operator = ( const char * in )
00051   {
00052     string::operator = ( in );
00053     parseInit();
00054     return *this;
00055   }
00056  
00057   // append any type with a defined stream operator
00058   template<class T> ocString & append( T inVar )
00059   {
00060     ocAppend( *this, inVar );
00061     return *this;
00062   }
00063   virtual ~ocString(){;}
00064   /*
00065     Initializes the class for parsing
00066   */
00067   void parseInit( void )
00068   {
00069     retVal = "";
00070     idx=0;
00071     eop = false;
00072   }
00073   /*
00074     Parse the string into tokens based on the pattern
00075     This is how parse behaves:
00076     -------------------------------------------------------------
00077     If the pattern is NOT found:
00078       Return value is the whole string from the previous parse pos.
00079       End of parse (eop) is set to true.
00080       Parse pos is set to start of string.
00081     If the pattern IS found:
00082       Return value is the substring from the previous parse pos to the current found token.
00083       Parse position is set just beyond the found pattern.
00084     If eop set.
00085       Returns an empty string.
00086   */
00087   string & parse( const char * pattern )
00088   {
00089     string::size_type idxStart;
00090     retVal="";
00091     if( pattern && strlen( pattern ) && !eop )
00092     {
00093       idxStart = this->find( pattern, idx );
00094       if(idxStart != string::npos )
00095       {
00096         retVal = this->substr(idx,idxStart-idx);
00097         idx = idxStart+strlen(pattern); // move to the next position
00098       }
00099       else
00100       {
00101         retVal = this->substr(idx);
00102         idx=0; // start over
00103         eop=true;
00104       }
00105     }
00106     return retVal;
00107   }
00108   
00109   /* 
00110     this one is like parse, 
00111     but it returns text delimited 
00112     by char tokens instead of strings
00113   */
00114   string & tokenParse( string tokens )
00115   {
00116     enum { before, inside, after, ended } cpos = before;
00117     string::size_type idxCursor = idx;
00118     retVal="";
00119     if( tokens.length() )
00120     {      
00121       string::size_type idxStart = idx, idxEnd = 0;
00122       while( cpos != ended && idx < size() )      
00123       {
00124         char test = this->operator[](idx);        
00125         if( cpos == before )
00126         {
00127           // check if no longer in tokens
00128           if( tokens.find(test) == string::npos )
00129           {
00130             cpos = inside;
00131             idxStart = idx;                         
00132           }
00133         }   
00134         else if( cpos == inside )
00135         {
00136           // check if back in tokens
00137           if( tokens.find(test) != string::npos )
00138           {
00139             cpos = after;
00140             idxEnd = idx;          
00141           }
00142         }
00143         else if( cpos==after )
00144         {
00145           // for parse compatibility, go to end of delimiters  
00146           if( tokens.find(test) == string::npos )
00147           {
00148             cpos = ended;
00149             break;
00150           }
00151         }        
00152         ++idx;        
00153       } // end while
00154       if( idxEnd > idxStart ) // did we find an end delimiter?       
00155       {    
00156         retVal = this->substr(idxStart,idxEnd-idxStart);              
00157       }
00158       else
00159       {   
00160         retVal = this->substr(idxStart);     
00161         idx=0; // start over
00162         eop=true;
00163       }  
00164     }
00165     return retVal;
00166   }
00167 
00168   // get whats left after the last parse
00169   string & remainder( void )
00170   {
00171     retVal = this->substr(idx);
00172     return retVal;
00173   }
00174 
00175   /*
00176     replace the first occurence of pattern with replacement
00177       return is a temporary ocString,
00178         so you can say strObj.replace().replace()...
00179         and have it do the expected thing.
00180   */
00181   ocString replace( const char * pattern, const char * replacement )
00182   {
00183     string::size_type idxPattern;
00184     retVal="";
00185     if( pattern )
00186     {
00187       size_t ptrnLen = strlen( pattern );
00188       if( ptrnLen > 0 )
00189       {
00190         idxPattern = this->find( pattern );
00191         if(idxPattern != string::npos )
00192         {
00193           retVal = this->substr(0, idxPattern );
00194           if( replacement && strlen(replacement) )
00195           {
00196             retVal += replacement;
00197           }
00198           retVal += this->substr(idxPattern+ptrnLen);
00199         }
00200       }
00201     }
00202     // set to this if no replacements made
00203     if( retVal.length() == 0 ) retVal = *this;
00204     return retVal;
00205   }
00206 
00207   /*
00208     replace all occurences of pattern with replacement
00209       return is a temporary ocString.
00210   */
00211   ocString replaceAll( const char * pattern, const char * replacement )
00212   {
00213     string::size_type idxPattern, idxFrom;
00214     retVal="";
00215 
00216     if( pattern )
00217     {
00218       size_t ptrnLen = strlen( pattern );
00219       //size_t replLen = strlen( replacement );
00220       if( ptrnLen > 0 )
00221       {
00222         idxFrom = 0;
00223         idxPattern = this->find( pattern );
00224         while(idxPattern != string::npos )
00225         {
00226           retVal += this->substr(idxFrom, idxPattern-idxFrom );
00227           if( replacement && strlen(replacement) )
00228           {
00229             retVal += replacement;
00230           }
00231           idxFrom = idxPattern+ptrnLen;
00232           idxPattern = this->find( pattern, idxFrom );
00233         }
00234         // any remainder
00235         retVal += this->substr(idxFrom);
00236       }
00237     }
00238     return retVal;
00239   }
00240 
00241   /*
00242     remove the first occurence of pattern
00243       return is a temporary ocString.
00244   */
00245   ocString remove( const char * pattern )
00246   {
00247     string::size_type idxPattern;
00248     retVal=*this;
00249     if( pattern )
00250     {
00251       size_t ptrnLen = strlen( pattern );
00252       if( ptrnLen > 0 )
00253       {
00254         idxPattern = this->find( pattern );
00255         if(idxPattern != string::npos )
00256         {
00257           retVal.erase(idxPattern, ptrnLen);
00258         }
00259       }
00260     }
00261     return retVal;
00262   }
00263 
00264   /*
00265     Returns last parse position in string
00266   */
00267   string::size_type lastPos( void )
00268   {
00269     return idx;
00270   }
00271   bool endOfParse(void)
00272   {
00273     return eop;
00274   }
00275   /*
00276      Uppercase every new word.
00277   */
00278   ocString titleCase( void )
00279   {
00280     string::iterator pos;
00281     retVal="";
00282     bool wasWhiteSpace = false;
00283     for( pos=begin(); pos!=end(); ++pos )
00284     {
00285       char ch = *pos;
00286       if(wasWhiteSpace || pos==begin() )
00287       {
00288         retVal += toupper(ch);
00289       }
00290       else
00291       {
00292         retVal += tolower(ch);
00293       }
00294       if( isalpha(ch) || ch=='\'' )
00295       {
00296         wasWhiteSpace = false;
00297       }
00298       else
00299       {
00300         wasWhiteSpace = true;
00301       }
00302     }
00303     return retVal;
00304   }
00305   bool regExMatch( const char *pattern )
00306   {    
00307     const char *string = c_str();     
00308     regex_t    re;
00309     regex_status = regcomp(&re, pattern, REG_EXTENDED|REG_NOSUB|RE_BACKSLASH_ESCAPE_IN_LISTS);
00310     if ( regex_status != 0) 
00311     {
00312         return(false);      /* report error */
00313     }
00314     regex_status = regexec(&re, string, (size_t) 0, NULL, 0);
00315     regfree(&re);
00316     if (regex_status != 0) 
00317     {
00318       return(false);      /* report error */
00319     }
00320     return(true);
00321 
00322   }
00323   int RegExStatus( void )
00324   {
00325     return regex_status;
00326   }
00327 };
00328 
00329 #endif
00330 
00331 #ifdef IN_T2_TESTHARNESS
00332   // unit testing
00333   cout << "=======================================" << endl;
00334   ocString thingy("This is \n\tSome parsable data");
00335   while( !thingy.endOfParse() )
00336   {
00337     cout << "Parsing: [" << thingy.tokenParse(" \t\n\r" ) << "]" << endl;
00338   }
00339   thingy.parseInit();
00340   cout << "=======================================" << endl;
00341   while( !thingy.endOfParse() )
00342   {
00343     cout << "Parsing: [" << thingy.parse(" ") << "]" << endl;
00344   }
00345   
00346   cout << "checking out reg exp matching" << endl;
00347   thingy = "david.mc@w3.sys.com";
00348   cout <<  thingy.regExMatch("^[a-z\\.A-Z1-9\\-]+@[a-z\\.A-Z1-9\\-]+$") << " should be true" << endl;
00349   thingy = "david-mc@w3-sys.com";
00350   cout <<  thingy.regExMatch("^[a-z\\.A-Z1-9\\-]+@[a-z\\.A-Z1-9\\-]+$") << " should be true" << endl;
00351   thingy = "david9mcw3.sys9.com";
00352   cout <<  thingy.regExMatch("^[a-z\\.A-Z1-9\\-]+@[a-z\\.A-Z1-9\\-]+$") << " should be false" << endl;
00353   thingy = "david.mcw@";
00354   cout <<  thingy.regExMatch("^[a-z\\.A-Z1-9\\-]+@[a-z\\.A-Z1-9\\-]+$") << " should be false" << endl;
00355   thingy = "Hutch0099@cs.com";
00356   cout <<  thingy.regExMatch("^[a-z\\.A-Z0-9\\-]+@[a-z\\.A-Z0-9\\-]+$") << " should be true" << endl;
00357   cout << thingy.RegExStatus() << endl;
00358   /*
00359   cout <<  thingy.regExMatch("^this") << " should be false" << endl;
00360   cout <<  thingy.regExMatch("\n\tS") << " should be true" << endl;
00361   cout <<  thingy.regExMatch("^\n\tS") << " should be false" << endl;
00362   cout <<  thingy.regExMatch("data$") << " should be true" << endl;
00363   */
00364 #endif

Generated on Tue Jan 20 09:03:27 2004 for OpenTools by doxygen1.2.18