Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

xmltokenizer.cpp

Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil c-basic-offset: 3 -*- */
00002 // vim:cindent:ts=3:sw=3:et:tw=80:sta:
00003 /*************************************************************** cppdom-cpr beg
00004  * 
00005  * cppdom was forked from the original xmlpp version 0.6 under the LGPL. This
00006  * new, branched xmlpp is under the same LGPL (of course) and is being
00007  * maintained by:
00008  *      Kevin Meinert   <subatomic@users.sourceforge.net>
00009  *      Allen Bierbaum  <allenb@users.sourceforge.net>
00010  *      Ben Scott       <nonchocoboy@users.sourceforge.net>
00011  *
00012  * -----------------------------------------------------------------
00013  *
00014  * xmlpp - an xml parser and validator written in C++
00015  * copyright (c) 2000-2001 Michael Fink
00016  *
00017  * This library is free software; you can redistribute it and/or
00018  * modify it under the terms of the GNU Library General Public
00019  * License as published by the Free Software Foundation; either
00020  * version 2 of the License, or (at your option) any later version.
00021  *
00022  * This library is distributed in the hope that it will be useful,
00023  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00024  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00025  * Library General Public License for more details.
00026  *
00027  * You should have received a copy of the GNU Library General Public
00028  * License along with this library; if not, write to the
00029  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00030  * Boston, MA 02111-1307, USA.
00031  *
00032  * -----------------------------------------------------------------
00033  * File:          $RCSfile: xmltokenizer.cpp,v $
00034  * Date modified: $Date: 2003/01/03 03:06:47 $
00035  * Version:       $Revision: 1.13 $
00036  * -----------------------------------------------------------------
00037  *
00038  ************************************************************ cppdom-cpr-end */
00045 // needed includes
00046 #include "cppdom.h"
00047 #include "xmltokenizer.h"
00048 
00049 
00050 // namespace declaration
00051 namespace cppdom
00052 {
00053    // XMLToken methods
00054    XMLToken::XMLToken()
00055       : mIsLiteral(true)
00056       , mLiteral(0)
00057    {}
00058 
00059    XMLToken::XMLToken(char ch)
00060       : mIsLiteral(true)
00061       , mLiteral(ch)
00062    {}
00063 
00064    XMLToken::XMLToken(const std::string& str)
00065       : mIsLiteral(false)
00066       , mLiteral(0)
00067       , mGeneric(str)
00068    {}
00069 
00070    bool XMLToken::isLiteral() const
00071    {
00072       return mIsLiteral;
00073    }
00074 
00075    bool XMLToken::isEndOfStream() const
00076    {
00077       return mIsLiteral && mLiteral == char(EOF);
00078    }
00079 
00080    char XMLToken::getLiteral() const
00081    {
00082       return mLiteral;
00083    }
00084 
00085    const std::string& XMLToken::getGeneric() const
00086    {
00087       return mGeneric;
00088    }
00089 
00090    bool XMLToken::operator==(char ch) const
00091    {
00092       return !isLiteral() ? false : ch == mLiteral;
00093    }
00094 
00095    bool XMLToken::operator!=(char ch) const
00096    {
00097       return ! operator==(ch);
00098    }
00099 
00100    bool XMLToken::operator==(const std::string& str) const
00101    {
00102       return !isLiteral() ? str == mGeneric : false;
00103    }
00104 
00105    bool XMLToken::operator!=(const std::string& str) const
00106    {
00107       return ! operator==(str);
00108    }
00109 
00110    XMLToken& XMLToken::operator=(const std::string& str)
00111    {
00112       mGeneric = str;
00113       mIsLiteral = false;
00114       return *this;
00115    }
00116 
00117    XMLToken& XMLToken::operator=(char ch)
00118    {
00119       mLiteral = ch;
00120       mIsLiteral = true;
00121       return *this;
00122    }
00123 
00124    // XMLTokenizer methods
00125 
00126    XMLTokenizer::XMLTokenizer(std::istream& in, XMLLocation& loc)
00127       : mInput(in), mLocation(loc)
00128    {}
00129 
00130    XMLTokenizer::~XMLTokenizer()
00131    {}
00132 
00133    XMLToken& XMLTokenizer::operator*()
00134    {
00135       return mCurToken;
00136    }
00137 
00138    const XMLToken* XMLTokenizer::operator->()
00139    {
00140       return &mCurToken;
00141    }
00142 
00143    XMLTokenizer& XMLTokenizer::operator++()
00144    {
00145       getNext();
00146       return *this;
00147    }
00148 
00149    XMLTokenizer& XMLTokenizer::operator++(int)
00150    {
00151       getNext();
00152       return *this;
00153    }
00154 
00155    XMLToken& XMLTokenizer::get()
00156    {
00157       return mCurToken;
00158    }
00159 
00160    void XMLTokenizer::putBack(XMLToken& token)
00161    {
00162       mTokenStack.push(token);
00163    }
00164 
00165    void XMLTokenizer::putBack()
00166    {
00167       mTokenStack.push(mCurToken);
00168    }
00169 
00170    // xmlstream_iterator methods
00171    xmlstream_iterator::xmlstream_iterator(std::istream& in, XMLLocation& loc)
00172       : XMLTokenizer(in, loc)
00173       , mCdataMode(false)
00174       , mPutbackChar(-1)
00175    {}
00176 
00178    void xmlstream_iterator::getNext()
00179    {
00180       // first use the token stack if filled
00181       if (mTokenStack.size() != 0)
00182       {
00183          // get the token from the stack and return it
00184          XMLToken tok;
00185          mCurToken = mTokenStack.top();
00186          mTokenStack.pop();
00187 
00188          return;
00189       }
00190 
00191       bool finished = false;
00192 
00193       std::string generic;
00194 
00195       // get next char
00196       char c;
00197 
00198       do
00199       {
00200          if (mPutbackChar == char(-1))
00201          {
00202             c = mInput.get();
00203             mLocation.step();
00204          }
00205          else
00206          {
00207             c = mPutbackChar;
00208             mPutbackChar = char(-1);
00209             mLocation.step();
00210          }
00211 
00212          // do we have an eof?
00213          // TODO: check for instr.eof()
00214          if (c == char(EOF))
00215          {
00216             if (generic.length() != 0)
00217             {
00218                mCurToken = c;
00219                return;
00220             }
00221             else
00222             {
00223                break;
00224             }
00225          }
00226 
00227          // is it a literal?
00228          if (isLiteral(c))
00229          {
00230             mCdataMode = false;
00231             if (generic.length() == 0)
00232             {
00233                mCurToken = c;
00234 
00235                // quick fix for removing set_cdataMode() functionality
00236                if (c == '>')
00237                {
00238                   mCdataMode = true;
00239                }
00240 
00241                return;
00242             }
00243             mPutbackChar = c;
00244             mLocation.step(-1);
00245             break;
00246          }
00247 
00248          // a string delimiter and not in cdata mode?
00249          if (isStringDelimiter(c) && !mCdataMode)
00250          {
00251             generic = c;
00252             char delim = c;
00253             do
00254             {
00255                c = mInput.get();
00256                mLocation.step();
00257                if (c == char(EOF))
00258                {
00259                   break;
00260                }
00261                generic += c;
00262             }
00263             while (c != delim);
00264             break;
00265          }
00266 
00267          // a whitespace?
00268          if (isWhiteSpace(c))
00269          {
00270             if (generic.length() == 0)
00271             {
00272                continue;
00273             }
00274             else
00275             {
00276                if (!mCdataMode)
00277                {
00278                   break;
00279                }
00280             }
00281          }
00282 
00283          // a newline char?
00284          if (isNewLine(c) )
00285          {
00286             if (mCdataMode && generic.length() != 0)
00287             {
00288                c = ' ';
00289             }
00290             else
00291             {
00292                continue;
00293             }
00294          }
00295 
00296          // add to generic string
00297          generic += c;
00298       }
00299       while (!finished);
00300 
00301       // set the generic string
00302       mCurToken = generic;
00303    }
00304 
00305    // returns if we have a literal char
00306    bool xmlstream_iterator::isLiteral(char c)
00307    {
00308       switch(c)
00309       {
00310       case '?':
00311       case '=':
00312       case '!':
00313       case '/':
00314          if (mCdataMode)
00315          {
00316             return false;
00317          }
00318       case '<':
00319       case '>':
00320          return true;
00321       }
00322       return false;
00323    }
00324 
00325    // returns if we have a white space char
00326    bool xmlstream_iterator::isWhiteSpace(char c)
00327    {
00328       switch(c)
00329       {
00330       case ' ':
00331       case '\t':
00332          return true;
00333       }
00334       return false;
00335    }
00336 
00337    // returns if we have a newline
00338    bool xmlstream_iterator::isNewLine(char c)
00339    {
00340       switch(c)
00341       {
00342       case '\n':
00343          mLocation.newline();
00344       case '\r':
00345          return true;
00346       }
00347       return false;
00348    }
00349 
00350    // returns if we have a string delimiter (separating " and ')
00351    bool xmlstream_iterator::isStringDelimiter(char c)
00352    {
00353       switch(c)
00354       {
00355       case '\"':
00356       case '\'':
00357          return true;
00358       }
00359       return false;
00360    }
00361 }

Generated on Thu Jan 2 21:29:17 2003 for cppdom by doxygen1.2.15