Main Page   Class Hierarchy   File List  

ocImportParser.h

00001 /*
00002   ocImportParser.h
00003   ================
00004   Imports delimited data into a vector of columns
00005 
00006   (c) 2002 David McCombs w3sys.com
00007 
00008 */
00009 
00010 // Compile sentry
00011 #ifndef OCIMPORTPARSER_H
00012 #define OCIMPORTPARSER_H
00013 
00014 struct ocFileCol
00015 {
00016   string name;  
00017   string value;
00018   int iMapPos;
00019   ocFileCol():iMapPos(0L){;}
00020   ocFileCol( const ocFileCol & in )
00021   :name(in.name),value(in.value),iMapPos(in.iMapPos){;}
00022   ~ocFileCol(){;}
00023   ocFileCol & operator = (const ocFileCol & in )
00024   {
00025     name = in.name;
00026     value = in.value;
00027     iMapPos = in.iMapPos;
00028     return * this;
00029   }
00030 };
00031 
00032 typedef vector<ocFileCol> ocFileCols;
00033 
00034 class ocFileParser
00035 {
00036   string cDelim;
00037   string cField;
00038   int colCount;
00039   ocFileCols cols;
00040   ifstream inFile;
00041   ocString  lineTokens;
00042   bool bTrimQuote;
00043   bool bCheckQuote;
00044   bool notDone;
00045   
00046 public:
00047 
00048   string attrSep;
00049   ocFileParser()
00050   :colCount(0),cDelim("|"),cField("\""),cols(100)
00051   ,bTrimQuote(false),bCheckQuote(false)
00052   ,notDone(false),attrSep("~|~")
00053   {;}
00054   ~ocFileParser(){;}
00055 
00056   // clear values only, the name should stay set
00057   void clearData( void )
00058   {
00059     for( int i=0; i < cols.size(); i++ ) cols[i].value = "";
00060   }
00061 
00062   bool openFile( string filePath )
00063   {
00064     inFile.open(filePath.c_str());
00065     return( inFile.is_open() );
00066   }
00067   bool closeFile( void )
00068   {
00069     if( inFile.is_open() )
00070     {
00071       inFile.close();
00072       inFile.clear();
00073       cols.clear();
00074     }
00075     return true;
00076   }
00077   
00078   bool isEven( int cnt )
00079   {
00080     return cnt%2 == 0;
00081   }
00082   
00083   bool getLine ( void )
00084   {
00085     bool bret = false;
00086     lineTokens = "";
00087     bool eol = false;
00088     char ch;
00089     // if were quote checking, 
00090     // this should always be even at end of line
00091     int quoteCount = 0;
00092 
00093 
00094     if( inFile.is_open() )
00095     {
00096       while( inFile.eof() == false
00097              && eol == false )
00098       {
00099         ch = inFile.get();
00100 
00101         if( ch == EOF ) break;
00102         
00103         // increment if quote checking
00104         if( ch == '\"' &&  bCheckQuote )
00105         {
00106            ++quoteCount;           
00107         }
00108 
00109         lineTokens += ch;
00110 
00111         int len = lineTokens.length();
00112         if( len 
00113             &&  lineTokens[len-1] == '\n' 
00114             &&  isEven(quoteCount)
00115           )
00116         {          
00117           lineTokens.resize(len-1);
00118           eol = true;
00119           len = lineTokens.length();
00120           if( lineTokens[len-1] == '\r' ) // possible carraige ret from Windows files
00121           {
00122             lineTokens.resize(len-1);
00123           }
00124         }
00125       } // end while
00126     }
00127     if(lineTokens.length())
00128     {
00129       bret = true;
00130     }
00131     return bret;
00132   }
00133   bool parseLine( bool isLabel = false )
00134   {
00135     bool bret = false;
00136     if( getLine() )
00137     {
00138       bret = parse(isLabel);
00139     }
00140     return bret;
00141   }
00142   void trimQuote( string & tok )
00143   {
00144     int len = tok.length();
00145     if( len )
00146     {
00147       if( tok[0] == '\"' )
00148       {
00149         tok.erase(0,1);
00150       }
00151       len = tok.length();
00152       if( len )
00153       {
00154         if( tok[len-1] == '\"' )
00155         {
00156           tok.resize(len-1);
00157         }
00158       }
00159     }
00160   }
00161   void checkQuote( string & tok )
00162   {
00163     int len = tok.length();
00164     if( len ) // the token has length
00165     {
00166       if( tok[0] == '\"' )  // the start has a 
00167       {
00168         if( len > 1 && tok[len-1] == '\"' ) // true unless the delim part of text
00169         {
00170           tok.erase(0,1);
00171           len = tok.length();
00172           tok.resize(len-1);
00173         }
00174         else // delim part of text, add it back
00175         {
00176           tok.erase(0,1); // erase the quote
00177     
00178           tok += ","; // add back the delimiter stolen from the field
00179           // create a proper ending delimiter and parse with it
00180           string eDelim = cField;
00181           if( lineTokens.remainder().find(cDelim) != string::npos ) eDelim += cDelim;
00182           // add the rest of the field
00183           tok += lineTokens.parse(eDelim.c_str());    
00184         }
00185       }
00186     }
00187   }
00188   bool parse( bool isLabel = false )
00189   {
00190     lineTokens.parseInit();
00191     int colCount = 0;
00192     clearData();
00193         
00194     while(lineTokens.endOfParse() == false )
00195     {
00196       string tok = lineTokens.parse(cDelim.c_str());
00197       if( bTrimQuote )
00198       {
00199         trimQuote( tok );
00200       }
00201       else if ( bCheckQuote )
00202       {
00203         checkQuote( tok  );
00204       }
00205       ocFileCol & rfc = cols[colCount];
00206       if(isLabel)
00207       {
00208         rfc.name = tok;
00209       }
00210       else
00211       {
00212         // append values instead of assigning them in case of multiple mapping
00213         // Especially useful for attributes
00214         if(rfc.value.length())
00215         {
00216           rfc.value += attrSep;
00217         }
00218         rfc.value += tok;
00219       }
00220       rfc.iMapPos = colCount;
00221       colCount ++;
00222     }
00223 
00224     if( isLabel ) this->colCount = colCount;
00225     
00226     return true;
00227   }
00228   inline int getColCount(void)
00229   {
00230     return colCount;
00231   }
00232   inline void setColCount( int iCount )
00233   {
00234     colCount = iCount ;
00235   }
00236   inline ocFileCols & getCols( void )
00237   {
00238     return cols;
00239   }
00240 
00241   ocFileParser & setDelim(string in)
00242   {
00243     cDelim = in;
00244     return * this;
00245   }
00246   ocFileParser & setTrimQuote( bool in )
00247   {
00248     bTrimQuote = in;
00249     return * this;
00250   }
00251   ocFileParser & setCheckQuote( bool in )
00252   {
00253     bCheckQuote = in;
00254     return * this;
00255   }
00256   ocFileParser & setLineTokens( string in )
00257   {
00258     lineTokens = in;
00259     return * this;
00260   }
00261   string getLineTokens( void )
00262   {
00263     return lineTokens;
00264   }
00265 };
00266 
00267 #endif

Generated on Tue Jan 20 09:03:27 2004 for OpenTools by doxygen1.2.18