Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

xmlparser.cpp

Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil c-basic-offset: 3 -*- */
00002 // vim:cindent:ts=3:sw=3:et:tw=80:sta:
00003 /*************************************************************** cppdom-cpr beg
00004  * 
00005  * cppdom was forked from the original xmlpp version 0.6 under the LGPL. This
00006  * new, branched xmlpp is under the same LGPL (of course) and is being
00007  * maintained by:
00008  *      Kevin Meinert   <subatomic@users.sourceforge.net>
00009  *      Allen Bierbaum  <allenb@users.sourceforge.net>
00010  *      Ben Scott       <nonchocoboy@users.sourceforge.net>
00011  *
00012  * -----------------------------------------------------------------
00013  *
00014  * xmlpp - an xml parser and validator written in C++
00015  * copyright (c) 2000-2001 Michael Fink
00016  *
00017  * This library is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU Library General Public
00019  * License as published by the Free Software Foundation; either
00020  * version 2 of the License, or (at your option) any later version.
00021  *
00022  * This library is distributed in the hope that it will be useful,
00023  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * Library General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU Library General Public
00028  * License along with this library; if not, write to the
00029  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00030  * Boston, MA 02111-1307, USA.
00031  *
00032  * -----------------------------------------------------------------
00033  * File:          $RCSfile: xmlparser.cpp,v $
00034  * Date modified: $Date: 2003/01/03 03:06:47 $
00035  * Version:       $Revision: 1.20 $
00036  * -----------------------------------------------------------------
00037  *
00038  ************************************************************ cppdom-cpr-end */
00045 // needed includes
00046 #include "xmlparser.h"
00047 
00048 // namespace declaration
00049 namespace cppdom
00050 {
00051    // XMLParser methods
00052    XMLParser::XMLParser(std::istream& in, XMLLocation& loc)
00053       : mInput(in), mTokenizer(in, loc)
00054    {}
00055 
00056    bool XMLParser::parseDocument(XMLDocument& doc, XMLContextPtr& context)
00057    {
00058       // set root nodename
00059       doc.mContext = context;
00060       std::string rootstr("root");
00061       doc.mNodeNameHandle = context->insertTagname(rootstr);
00062 
00063       bool handle = context->handleEvents();
00064 
00065       // start parsing
00066       if (handle)
00067       {
00068          context->getEventHandler().startDocument();
00069       }
00070 
00071       parseHeader(doc, context);
00072 
00073       // parse the only one subnode
00074       XMLNodePtr new_subnode(new XMLNode(context));
00075 
00076       bool ret = parseNode(*new_subnode, context);
00077 
00078       // if successful, put node into nodelist
00079       if (ret)
00080       {
00081          doc.addChild(new_subnode);
00082       }
00083 
00084       if (handle)
00085       {
00086          context->getEventHandler().endDocument();
00087       }
00088 
00089       return ret;
00090    }
00091 
00092    // parses the header, ie processing instructions and doctype tag
00094    bool XMLParser::parseHeader(XMLDocument& doc, XMLContextPtr& context)
00095    {
00096       while(true)
00097       {
00098          ++mTokenizer;
00099          XMLToken token1 = *mTokenizer;
00100          if (token1 != '<')
00101          {
00102             throw XMLError(xml_opentag_expected);
00103          }
00104 
00105          // token after opening < is a literal?
00106          mTokenizer++;
00107          XMLToken token2 = *mTokenizer;
00108          if (!token2.isLiteral())
00109          {
00110             // generic string encountered: assume no pi and doctype tags
00111             mTokenizer.putBack();
00112             mTokenizer.putBack(token1);
00113             return false;
00114          }
00115 
00116          // now check for the literal
00117          switch(token2.getLiteral())
00118          {
00119             // comment or doctype tag
00120          case '!':
00121             {
00122                ++mTokenizer;
00123                XMLToken token3 = *mTokenizer;
00124 
00125                if (!token3.isLiteral())
00126                {
00127                   // now a doctype tag or a comment may follow
00128                   if (token3.getGeneric().at(0) == '-' &&
00129                       token3.getGeneric().at(1) == '-')
00130                   {
00131                       parseComment(context);
00132                   }
00133                   else
00134                   {
00135                      std::string doctypestr(token3.getGeneric());
00136 
00137                      std::transform(doctypestr.begin(), doctypestr.end(), doctypestr.begin(), toupper);
00138 
00139                      if (doctypestr == "DOCTYPE")
00140                      {
00141                         // \todo parse doctype tag
00142 
00143                         // read the complete tag till the closing >
00144                         while (*(mTokenizer++) != '>');
00145                      }
00146                      else
00147                      {
00148                         throw XMLError(xml_unknown);
00149                      }
00150                   }
00151                }
00152                else
00153                {
00154                   throw XMLError(xml_pi_doctype_expected);
00155                }
00156 
00157                break;
00158             }
00159          case '?':
00160             {
00161                ++mTokenizer;
00162                XMLToken token3 = *mTokenizer;
00163 
00164                if (token3.isLiteral())
00165                {
00166                   throw XMLError(xml_pi_doctype_expected);
00167                }
00168 
00169                // parse processing instruction
00170                XMLNode pinode(context);
00171 
00172                std::string tagname(token3.getGeneric());
00173                pinode.mNodeNameHandle = context->insertTagname(tagname);
00174 
00175                parseAttributes(pinode.getAttrMap());
00176 
00177                XMLNodePtr nodeptr(new XMLNode(pinode));
00178                doc.mProcInstructions.push_back(nodeptr);
00179 
00180                if (context->handleEvents())
00181                {
00182                   context->getEventHandler().processingInstruction(pinode);
00183                }
00184 
00185                ++mTokenizer;
00186                if (*mTokenizer != '?')
00187                {
00188                   throw XMLError(xml_pi_doctype_expected);
00189                }
00190 
00191                ++mTokenizer;
00192                if (*mTokenizer != '>')
00193                {
00194                   throw XMLError(xml_closetag_expected);
00195                }
00196                break;
00197             }
00198          default:
00199             // unknown literal encountered
00200             throw XMLError(xml_pi_doctype_expected);
00201 
00202          } // end switch
00203 
00204       } // end while
00205    }
00206 
00207    // parses the contents of the current node
00208    bool XMLParser::parseNode(XMLNode& node, XMLContextPtr& context)
00209    {
00210       node.mContext = context;
00211       bool handle = context->handleEvents();
00212 
00213       ++mTokenizer;
00214       XMLToken token1 = *mTokenizer;
00215 
00216       if (token1.isEndOfStream())
00217       {
00218          return false;
00219       }
00220 
00221       XMLToken token2;
00222 
00223       // loop when we encounter a comment
00224       bool again;
00225       do
00226       {
00227          again = false;
00228 
00229          // check if we have cdata
00230          if (!token1.isLiteral())
00231          {
00232             std::string cdataname("cdata");
00233             node.mNodeNameHandle = context->insertTagname(cdataname);
00234 
00235             // parse cdata section(s) and return
00236             node.mNodeType = xml_nt_cdata;
00237             node.mCdata.empty();
00238 
00239             while(!token1.isLiteral())
00240             {
00241                node.mCdata += token1.getGeneric();
00242                ++mTokenizer;
00243                token1 = *mTokenizer;
00244             }
00245             mTokenizer.putBack();
00246 
00247             if (handle)
00248             {
00249                context->getEventHandler().gotCdata( node.mCdata );
00250             }
00251 
00252             return true;
00253          }
00254 
00255          // no cdata, try to continue parsing node content
00256          // Must be a start of a node (ie. < literal)
00257          if (token1 != '<')
00258          {
00259             throw XMLError(xml_opentag_cdata_expected);
00260          }
00261 
00262          // get node name
00263          ++mTokenizer;
00264          token2 = *mTokenizer;
00265          if (token2.isLiteral())
00266          {
00267             // check the following literal
00268             switch(token2.getLiteral())
00269             {
00270                // closing '</...>' follows
00271             case '/':
00272                // return, we have a closing node with no more content
00273                mTokenizer.putBack();
00274                mTokenizer.putBack(token1);
00275                return false;
00276 
00277                // comment follows
00278             case '!':
00279                this->parseComment(context);
00280 
00281                // get next token
00282                ++mTokenizer;
00283                token1 = *mTokenizer;
00284 
00285                // parse again, until we encounter some useful data
00286                again = true;
00287                break;
00288 
00289             default:
00290                throw XMLError(xml_tagname_expected);
00291             }
00292          }
00293       } while (again);
00294 
00295       // insert tag name and set handle for it
00296       std::string tagname(token2.getGeneric());
00297       node.mNodeNameHandle = context->insertTagname(tagname);
00298 
00299       // notify event handler
00300       if (handle)
00301       {
00302          context->getEventHandler().startNode(tagname);
00303       }
00304 
00305       // parse attributes
00306       this->parseAttributes(node.getAttrMap());
00307 
00308       if (handle)
00309       {
00310          context->getEventHandler().parsedAttributes(node.getAttrMap());
00311       }
00312 
00313       // check for leaf
00314       ++mTokenizer;
00315       XMLToken token3 = *mTokenizer;
00316       if (token3 == '/' )
00317       {
00318          // node has finished
00319          ++mTokenizer;
00320          XMLToken token4 = *mTokenizer;
00321          if (token4 != '>' )
00322          {
00323             throw XMLError(xml_closetag_expected);
00324          }
00325 
00326          node.mNodeType = xml_nt_leaf;
00327 
00328          // return, let the caller continue to parse
00329          return true;
00330       }
00331 
00332       // now a closing bracket must follow
00333       if (token3 != '>')
00334       {
00335          throw XMLError(xml_closetag_expected);
00336       }
00337 
00338       // loop to parse all subnodes
00339       while (true)
00340       {
00341          // create subnode
00342          XMLNodePtr new_subnode(new XMLNode(context));
00343 
00344          // try to parse possible sub nodes
00345          if (this->parseNode(*new_subnode, context))
00346          {
00347             // if successful, put node into nodelist
00348    //         XMLNodePtr nodeptr( new XMLNode(subnode) );
00349             node.addChild(new_subnode);
00350          }
00351          else
00352          {
00353             break;
00354          }
00355       }
00356 
00357       // parse end tag
00358       XMLToken token5 = *mTokenizer++;
00359       ++mTokenizer;
00360       if (token5 != '<' && *mTokenizer != '/')
00361       {
00362          throw XMLError(xml_opentag_expected);
00363       }
00364 
00365       ++mTokenizer;
00366       token1 = *mTokenizer;
00367       if (token1.isLiteral())
00368       {
00369          throw XMLError(xml_tagname_expected);
00370       }
00371 
00372       // check if open and close tag names are identical
00373       if (token1.getGeneric() != token2.getGeneric())
00374       {
00375          throw XMLError(xml_tagname_close_mismatch);
00376       }
00377 
00378       ++mTokenizer;
00379       if (*mTokenizer != '>')
00380       {
00381          throw XMLError(xml_opentag_expected);
00382       }
00383 
00384       if (handle)
00385       {
00386          context->getEventHandler().endNode(node);
00387       }
00388 
00389       return true;
00390    }
00391 
00392    // parses tag attributes
00393    bool XMLParser::parseAttributes(XMLAttributes& attr)
00394    {
00395       while(true)
00396       {
00397          ++mTokenizer;
00398          XMLToken token1 = *mTokenizer;
00399 
00400          if (token1.isLiteral())
00401          {
00402             mTokenizer.putBack();
00403             return false;
00404          }
00405 
00406          // guru: get value name here
00407          std::string name = token1.getGeneric();
00408 
00409          ++mTokenizer;
00410          if (*mTokenizer != '=')
00411          {
00412             throw XMLError(xml_attr_equal_expected);
00413          }
00414 
00415          ++mTokenizer;
00416          XMLToken token2 = *mTokenizer;
00417 
00418          if (token2.isLiteral())
00419          {
00420             throw XMLError(xml_attr_value_expected);
00421          }
00422 
00423          // remove "" from attribute value
00424          std::string value(token2.getGeneric());
00425          value.erase(0, 1);
00426          value.erase(value.length()-1, 1);
00427 
00428          // insert attribute into the map
00429          // guru: we got the name already
00430          XMLAttributes::value_type attrpair(name, value);
00431          attr.insert(attrpair);
00432       }
00433       return true;
00434    }
00435 
00436    void XMLParser::parseComment(XMLContextPtr& context)
00437    {
00438       // get tokens until comment is over
00439       while (true)
00440       {
00441          ++mTokenizer;
00442          if (*mTokenizer == "--")
00443          {
00444             ++mTokenizer;
00445             if (*mTokenizer == '>')
00446             {
00447                break;
00448             }
00449          }
00450       }
00451    }
00452 }

Generated on Thu Jan 2 21:29:17 2003 for cppdom by doxygen1.2.15