00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <string>
00020 #include <iomanip>
00021 #include <algorithm>
00022 #include <map>
00023 #include <vector>
00024
00025 #include <fstream>
00026 #include "ocString.h"
00027
00028 #ifndef OC_XML_H
00029 #define OC_XML_H
00030
00031 using namespace std;
00032
00033
00034 class parseFobject
00035 {
00036 public:
00037
00038 parseFobject()
00039 {;}
00040
00041 virtual void operator () ( ocString & in )
00042 {;}
00043 };
00044
00045
00046 class ocGenericParser
00047 {
00048 protected:
00049 istream * iStream;
00050 ocString input;
00051 parseFobject * pCurrentFunction;
00052 public:
00053 ocGenericParser( istream * in ):iStream(in),pCurrentFunction(NULL)
00054 {
00055 ;
00056 }
00057 ocGenericParser( string & in ):iStream(NULL),pCurrentFunction(NULL)
00058 {
00059 input = in;
00060 }
00061 virtual ~ocGenericParser()
00062 {;}
00063 bool parse( void )
00064 {
00065
00066 bool bGood=true;
00067 do
00068 {
00069
00070 while( pCurrentFunction && input.length() && !input.endOfParse() )
00071 {
00072 curFunc()(input);
00073 }
00074 if(iStream)
00075 {
00076 iStream->clear();
00077 getline(*iStream,input);
00078 input.parseInit();
00079 bGood = iStream->good();
00080 }
00081 } while( pCurrentFunction && iStream && !iStream->eof() );
00082 return bGood;
00083 }
00084 parseFobject & curFunc(void)
00085 {
00086 return *pCurrentFunction;
00087 }
00088 virtual void callback( void )
00089 {
00090 ;
00091 }
00092 };
00093
00094
00095 class baseFunc: public parseFobject
00096 {
00097 protected:
00098 ocGenericParser & rParser;
00099 public:
00100
00101 baseFunc(ocGenericParser & irParser):parseFobject(),rParser(irParser){;}
00102
00103 virtual ~baseFunc(){;}
00104 };
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120 string xmlEscape( string in )
00121 {
00122 ocString temp ( in );
00123 return temp.replaceAll( "\"",""" )
00124 .replaceAll( "%", "%" )
00125 .replaceAll( "&", "&" )
00126 .replaceAll( "'", "'" )
00127 .replaceAll( "+", "+" )
00128 .replaceAll( "<", "<" )
00129 .replaceAll( ">", ">" );
00130 }
00131 string xmlUnescape( string in )
00132 {
00133 ocString temp ( in );
00134 return temp.replaceAll( """, "\"")
00135 .replaceAll( "%", "%" )
00136 .replaceAll( "&", "&" )
00137 .replaceAll( "'", "'" )
00138 .replaceAll( "+", "+" )
00139 .replaceAll( "<", "<" )
00140 .replaceAll( ">", ">" );
00141 }
00142
00143
00144
00145
00146 typedef class map<string,string> node_attr;
00147 class xmlNode
00148 {
00149 public:
00150 string name;
00151 string data;
00152 node_attr attr;
00153 bool monopole;
00154 xmlNode()
00155 :name(""),data(""),attr(),monopole(false)
00156 {;}
00157 xmlNode(const xmlNode & in)
00158 :name(in.name),data(in.data),attr(in.attr),monopole(in.monopole)
00159 {;}
00160 xmlNode & operator = (const xmlNode & in)
00161 {
00162 name=in.name;
00163 data=in.data;
00164 attr=in.attr;
00165 monopole=in.monopole;
00166 return *this;
00167 }
00168 string emit( void )
00169 {
00170 string ret = "<";
00171 ret += name;
00172 if( attr.size() )
00173 {
00174 node_attr::iterator it;
00175 for( it = attr.begin(); it != attr.end(); ++it )
00176 {
00177 ret += " ";
00178 ret += it->first;
00179 ret += "='";
00180 ret += xmlEscape(it->second);
00181 ret += "'";
00182 }
00183 }
00184 if( monopole )
00185 {
00186 ret += "/>";
00187 }
00188 else
00189 {
00190 ret += ">";
00191 ret += xmlEscape(data);
00192 ret += "</";
00193 ret += name;
00194 ret += ">";
00195 }
00196 return ret;
00197 }
00198 };
00199
00200
00201
00202
00203 typedef class vector<xmlNode> node_vector;
00204 typedef class multimap<string,size_t> node_map;
00205 typedef class vector<size_t>node_stack;
00206 struct stateMachine
00207 {
00208 node_vector nodes;
00209 node_stack nodestack;
00210 node_map nodemap;
00211 xmlNode & topNode( void )
00212 {
00213 return nodes[nodestack.back()];
00214 }
00215 };
00216
00217
00218 class findStart: public baseFunc
00219 {
00220 public:
00221 ocString data;
00222 bool foundStart;
00223 findStart(ocGenericParser & irParser):baseFunc(irParser)
00224 {
00225 foundStart = false;
00226 }
00227 void operator () ( ocString & in )
00228 {
00229 data += in.parse("<");
00230 foundStart = (in.lastPos() > 0);
00231 if( foundStart )
00232 {
00233 rParser.callback();
00234 data = "";
00235 }
00236 }
00237 };
00238
00239
00240 class findEnd: public baseFunc
00241 {
00242 public:
00243 ocString data;
00244 bool foundEnd;
00245
00246 findEnd(ocGenericParser & irParser):baseFunc(irParser)
00247 {
00248 foundEnd = false;
00249 }
00250
00251 void operator () ( ocString & in )
00252 {
00253 data += in.parse(">");
00254 foundEnd = (in.lastPos() > 0);
00255 if( foundEnd )
00256 {
00257 rParser.callback();
00258 data = "";
00259 }
00260 }
00261 };
00262
00263
00264
00265 class xmlParser : public ocGenericParser
00266 {
00267 private:
00268
00269 findStart start;
00270 findEnd end;
00271 node_vector::iterator xnode_it;
00272
00273 void startCallback(void)
00274 {
00275 size_t len = start.data.length();
00276 if( len && !states.nodestack.empty() )
00277 {
00278
00279 xmlNode & rNode = states.topNode();
00280 rNode.data += xmlUnescape(start.data);
00281 }
00282 pCurrentFunction = &end;
00283 }
00284 void endCallback(void)
00285 {
00286 ocString parseableData = end.data;
00287 size_t len = end.data.length();
00288 if( len == 0 ) return;
00289
00290
00291 bool isEndTag = end.data[0] == '/';
00292
00293 bool isMonoTag = end.data[len-1] == '/';
00294 if( isMonoTag )
00295 {
00296 len--;
00297 parseableData.resize(len);
00298 }
00299 if( !isEndTag || isMonoTag )
00300 {
00301
00302
00303
00304 xmlNode node;
00305
00306 node.name = parseableData.tokenParse( " \t\n\r" );
00307 if( isMonoTag ) node.monopole = true;
00308
00309 do
00310 {
00311
00312 string attrCandidate = parseableData.parse( "=" );
00313
00314 while( attrCandidate.size() && (
00315 attrCandidate[0] == ' ' ||
00316 attrCandidate[0] == '\n' ||
00317 attrCandidate[0] == '\t' ) ) attrCandidate.erase(0,1);
00318 if(attrCandidate.length())
00319 {
00320 string attrName = attrCandidate;
00321
00322
00323 char pTok[2];
00324 pTok[0] = parseableData.remainder()[0];
00325 pTok[1] = '\0';
00326 parseableData.parse( pTok );
00327 string attrValue = parseableData.parse( pTok );
00328
00329 if( attrName.length() && attrValue.length() )
00330 {
00331 node.attr.insert(make_pair(attrName,xmlUnescape(attrValue)));
00332 }
00333 }
00334 } while(!parseableData.endOfParse());
00335
00336 addNode(node);
00337
00338 if( !isMonoTag )
00339 {
00340
00341
00342
00343 size_t nTop = states.nodes.size()-1;
00344 states.nodestack.push_back(nTop);
00345 }
00346 }
00347 if(isEndTag)
00348 {
00349
00350
00351 states.nodestack.pop_back();
00352 }
00353 pCurrentFunction = &start;
00354 }
00355 public:
00356
00357 bool useXPATH;
00358
00359 stateMachine states;
00360
00361 xmlParser( istream * in ):ocGenericParser(in),start(*this),end(*this),useXPATH(false)
00362 {
00363 pCurrentFunction = &start;
00364 }
00365 xmlParser(string & in):ocGenericParser(in),start(*this),end(*this),useXPATH(false)
00366 {
00367 pCurrentFunction = &start;
00368 }
00369 virtual ~xmlParser(){;}
00370
00371 virtual void callback( void )
00372 {
00373 if( pCurrentFunction == &start )
00374 {
00375 startCallback();
00376 }
00377 else if(pCurrentFunction == &end)
00378 {
00379 endCallback();
00380 }
00381 }
00382
00383 node_vector & nodeList( void )
00384 {
00385 return states.nodes;
00386 }
00387
00388 void addNode( xmlNode &node )
00389 {
00390 states.nodes.push_back(node);
00391 string nodeName;
00392 if( useXPATH )
00393 {
00394 nodeName = "/";
00395 for( int i=0; i<states.nodestack.size(); i++ )
00396 {
00397 size_t sz = states.nodestack[i];
00398 nodeName += states.nodes[sz].name;
00399 nodeName += "/";
00400 }
00401
00402 }
00403 nodeName += node.name;
00404 states.nodemap.insert(make_pair(nodeName,states.nodes.size()-1));
00405 }
00406
00407
00408 string emit( void )
00409 {
00410 string ret;
00411 for( int i=0; i<states.nodes.size(); i++ )
00412 {
00413 ret += states.nodes[i].emit();
00414 }
00415 return ret;
00416 }
00417
00419 node_vector::iterator & findFirstNodeByName( string name )
00420 {
00421 xnode_it = states.nodes.begin();
00422 return seekNodeByName( name );
00423
00424 }
00426 node_vector::iterator & findNextNodeByName( string name )
00427 {
00428 ++xnode_it;
00429 return seekNodeByName( name );
00430 }
00431
00433 node_vector::iterator & findFirstNodeByAttribute( string name, string value )
00434 {
00435 xnode_it = states.nodes.begin();
00436 return seekNodeByAttribute( name, value );
00437 }
00439 node_vector::iterator & findNextNodeByAttribute( string name, string value )
00440 {
00441 ++xnode_it;
00442 return seekNodeByAttribute( name, value );
00443 }
00444
00445 protected:
00446
00447 node_vector::iterator & seekNodeByAttribute( string name, string value )
00448 {
00449 for( ;xnode_it!=states.nodes.end();++xnode_it)
00450 {
00451 node_attr::iterator it = (*xnode_it).attr.find(name);
00452 if( it != (*xnode_it).attr.end() )
00453 {
00454 if( it->second == value )
00455 {
00456 break;
00457 }
00458 }
00459 }
00460 return xnode_it;
00461 }
00462 node_vector::iterator & seekNodeByName( string name )
00463 {
00464 for( ;xnode_it!=states.nodes.end();++xnode_it)
00465 {
00466 if( (*xnode_it).name == name )
00467 {
00468 break;
00469 }
00470 }
00471 return xnode_it;
00472 }
00473 };
00474
00475 #endif
00476
00477 #ifdef IN_T2_TESTHARNESS
00478 xmlParser parser( &cin );
00479 parser.useXPATH = true;
00480 parser.parse();
00481 node_vector & xnodes = parser.nodeList();
00482 int i;
00483
00484 for(i=0;i<xnodes.size();i++)
00485 {
00486 xmlNode & node = xnodes[i];
00487 cout << "Node: " << node.name << endl
00488 << " contains: " << node.data << endl;
00489 cout << "node attributes" << endl << "=================" << endl;
00490 node_attr::iterator x;
00491 for( x=node.attr.begin(); x!=node.attr.end(); ++x)
00492 {
00493 cout << x->first << " = " << x->second << endl;
00494 }
00495 cout << "=================" << endl;
00496 }
00497 node_map::iterator idx = parser.states.nodemap.begin();
00498 while( idx != parser.states.nodemap.end() )
00499 {
00500 cout << "PATH: " << idx->first << " has " << xnodes[idx->second].name << endl;
00501 idx++;
00502 }
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512 #endif