PTLib  Version 2.10.4
pxml.h
Go to the documentation of this file.
00001 /*
00002  * pxml.h
00003  *
00004  * XML parser support
00005  *
00006  * Portable Windows Library
00007  *
00008  * Copyright (c) 2002 Equivalence Pty. Ltd.
00009  *
00010  * The contents of this file are subject to the Mozilla Public License
00011  * Version 1.0 (the "License"); you may not use this file except in
00012  * compliance with the License. You may obtain a copy of the License at
00013  * http://www.mozilla.org/MPL/
00014  *
00015  * Software distributed under the License is distributed on an "AS IS"
00016  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
00017  * the License for the specific language governing rights and limitations
00018  * under the License.
00019  *
00020  * The Original Code is Portable Windows Library.
00021  *
00022  * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
00023  *
00024  * Contributor(s): ______________________________________.
00025  *
00026  * $Revision: 24995 $
00027  * $Author: csoutheren $
00028  * $Date: 2011-01-04 19:12:33 -0600 (Tue, 04 Jan 2011) $
00029  */
00030 
00031 #ifndef PTLIB_PXML_H
00032 #define PTLIB_PXML_H
00033 
00034 #ifdef P_USE_PRAGMA
00035 #pragma interface
00036 #endif
00037 
00038 #include <ptlib.h>
00039 
00040 #include <ptbuildopts.h>
00041 
00042 #ifndef P_EXPAT
00043 
00044 namespace PXML {
00045 extern PString EscapeSpecialChars(const PString & str);
00046 };
00047 
00048 #else
00049 
00050 #include <ptclib/http.h>
00051 
00053 
00054 class PXMLElement;
00055 class PXMLData;
00056 
00057 
00058 class PXMLObject;
00059 class PXMLElement;
00060 class PXMLData;
00061 
00063 
00064 class PXMLBase : public PObject
00065 {
00066   public:
00067     enum Options {
00068       NoOptions           = 0x0000,
00069       Indent              = 0x0001,
00070       NewLineAfterElement = 0x0002,
00071       NoIgnoreWhiteSpace  = 0x0004,   
00072       CloseExtended       = 0x0008,   
00073       WithNS              = 0x0010,
00074       FragmentOnly        = 0x0020,   
00075       AllOptions          = 0xffff
00076     };
00077     __inline friend Options operator|(Options o1, Options o2) { return (Options)(((unsigned)o1) | ((unsigned)o2)); }
00078     __inline friend Options operator&(Options o1, Options o2) { return (Options)(((unsigned)o1) & ((unsigned)o2)); }
00079 
00080     enum StandAloneType {
00081       UninitialisedStandAlone = -2,
00082       UnknownStandAlone = -1,
00083       NotStandAlone,
00084       IsStandAlone
00085     };
00086 
00087     PXMLBase(int opts = NoOptions)
00088       : m_options(opts) { }
00089 
00090     void SetOptions(int opts)
00091       { m_options = opts; }
00092 
00093     int GetOptions() const { return m_options; }
00094 
00095     virtual PBoolean IsNoIndentElement(
00096       const PString & /*elementName*/
00097     ) const
00098     {
00099       return false;
00100     }
00101 
00102   protected:
00103     int m_options;
00104 };
00105 
00106 
00107 class PXML : public PXMLBase
00108 {
00109   PCLASSINFO(PXML, PObject);
00110   public:
00111 
00112     PXML(
00113       int options = NoOptions,
00114       const char * noIndentElements = NULL
00115     );
00116     PXML(
00117       const PString & data,
00118       int options = NoOptions,
00119       const char * noIndentElements = NULL
00120     );
00121 
00122     PXML(const PXML & xml);
00123 
00124     ~PXML();
00125 
00126     bool IsLoaded() const { return rootElement != NULL; }
00127     bool IsDirty() const;
00128 
00129     bool Load(const PString & data, Options options = NoOptions);
00130 
00131 #if P_HTTP
00132     bool StartAutoReloadURL(
00133       const PURL & url, 
00134       const PTimeInterval & timeout, 
00135       const PTimeInterval & refreshTime,
00136       Options options = NoOptions
00137     );
00138     bool StopAutoReloadURL();
00139     PString GetAutoReloadStatus() { PWaitAndSignal m(autoLoadMutex); PString str = autoLoadError; return str; }
00140     bool AutoLoadURL();
00141     virtual void OnAutoLoad(PBoolean ok);
00142 
00143     bool LoadURL(const PURL & url);
00144     bool LoadURL(const PURL & url, const PTimeInterval & timeout, Options options = NoOptions);
00145 #endif // P_HTTP
00146 
00147     bool LoadFile(const PFilePath & fn, Options options = NoOptions);
00148 
00149     virtual void OnLoaded() { }
00150 
00151     bool Save(Options options = NoOptions);
00152     bool Save(PString & data, Options options = NoOptions);
00153     bool SaveFile(const PFilePath & fn, Options options = NoOptions);
00154 
00155     void RemoveAll();
00156 
00157     PBoolean IsNoIndentElement(
00158       const PString & elementName
00159     ) const;
00160 
00161     PString AsString() const;
00162     void PrintOn(ostream & strm) const;
00163     void ReadFrom(istream & strm);
00164 
00165 
00166     PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const;
00167     PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const;
00168     PXMLElement * GetElement(PINDEX idx) const;
00169     PINDEX        GetNumElements() const; 
00170     PXMLElement * GetRootElement() const { return rootElement; }
00171     PXMLElement * SetRootElement(PXMLElement * p);
00172     PXMLElement * SetRootElement(const PString & documentType);
00173     bool          RemoveElement(PINDEX idx);
00174 
00175     PCaselessString GetDocumentType() const;
00176 
00177 
00178     enum ValidationOp {
00179       EndOfValidationList,
00180       DocType,
00181       ElementName,
00182       RequiredAttribute,
00183       RequiredNonEmptyAttribute,
00184       RequiredAttributeWithValue,
00185       RequiredElement,
00186       Subtree,
00187       RequiredAttributeWithValueMatching,
00188       RequiredElementWithBodyMatching,
00189       OptionalElement,
00190       OptionalAttribute,
00191       OptionalNonEmptyAttribute,
00192       OptionalAttributeWithValue,
00193       OptionalAttributeWithValueMatching,
00194       OptionalElementWithBodyMatching,
00195       SetDefaultNamespace,
00196       SetNamespace,
00197 
00198       RequiredAttributeWithValueMatchingEx = RequiredAttributeWithValueMatching + 0x8000,
00199       OptionalAttributeWithValueMatchingEx = OptionalAttributeWithValueMatching + 0x8000,
00200       RequiredElementWithBodyMatchingEx    = RequiredElementWithBodyMatching    + 0x8000,
00201       OptionalElementWithBodyMatchingEx    = OptionalElementWithBodyMatching    + 0x8000
00202     };
00203 
00204     struct ValidationContext {
00205       PString m_defaultNameSpace;
00206       PStringToString m_nameSpaces;
00207     };
00208 
00209     struct ValidationInfo {
00210       ValidationOp m_op;
00211       const char * m_name;
00212 
00213       union {
00214         const void     * m_placeHolder;
00215         const char     * m_attributeValues;
00216         ValidationInfo * m_subElement;
00217         const char     * m_namespace;
00218       };
00219 
00220       PINDEX m_minCount;
00221       PINDEX m_maxCount;
00222     };
00223 
00224     bool Validate(const ValidationInfo * validator);
00225     bool ValidateElements(ValidationContext & context, PXMLElement * baseElement, const ValidationInfo * elements);
00226     bool ValidateElement(ValidationContext & context, PXMLElement * element, const ValidationInfo * elements);
00227     bool LoadAndValidate(const PString & body, const PXML::ValidationInfo * validator, PString & error, int options = NoOptions);
00228 
00229     PString  GetErrorString() const { return m_errorString; }
00230     unsigned GetErrorColumn() const { return m_errorColumn; }
00231     unsigned GetErrorLine() const   { return m_errorLine; }
00232 
00233     PString GetDocType() const         { return docType; }
00234     void SetDocType(const PString & v) { docType = v; }
00235 
00236     PMutex & GetMutex() { return rootMutex; }
00237 
00238 #if P_HTTP
00239     PDECLARE_NOTIFIER(PTimer,  PXML, AutoReloadTimeout);
00240     PDECLARE_NOTIFIER(PThread, PXML, AutoReloadThread);
00241 #endif // P_HTTP
00242 
00243     // static methods to create XML tags
00244     static PString CreateStartTag (const PString & text);
00245     static PString CreateEndTag (const PString & text);
00246     static PString CreateTagNoData (const PString & text);
00247     static PString CreateTag (const PString & text, const PString & data);
00248 
00249     static PString EscapeSpecialChars(const PString & string);
00250 
00251   protected:
00252     void Construct(int options, const char * noIndentElements);
00253     PXMLElement * rootElement;
00254     PMutex rootMutex;
00255 
00256     bool loadFromFile;
00257     PFilePath loadFilename;
00258     PString version, encoding;
00259     StandAloneType m_standAlone;
00260 
00261 #if P_HTTP
00262     PTimer autoLoadTimer;
00263     PURL autoloadURL;
00264     PTimeInterval autoLoadWaitTime;
00265     PMutex autoLoadMutex;
00266     PString autoLoadError;
00267 #endif // P_HTTP
00268 
00269     PStringStream m_errorString;
00270     unsigned      m_errorLine;
00271     unsigned      m_errorColumn;
00272 
00273     PSortedStringList noIndentElements;
00274 
00275     PString docType;
00276     PString m_defaultNameSpace;
00277 };
00278 
00280 
00281 PARRAY(PXMLObjectArray, PXMLObject);
00282 
00283 class PXMLObject : public PObject {
00284   PCLASSINFO(PXMLObject, PObject);
00285   public:
00286     PXMLObject(PXMLElement * par)
00287       : parent(par) { dirty = false; }
00288 
00289     PXMLElement * GetParent() const
00290       { return parent; }
00291 
00292     PXMLObject * GetNextObject() const;
00293 
00294     void SetParent(PXMLElement * newParent)
00295     { 
00296       PAssert(parent == NULL, "Cannot reparent PXMLElement");
00297       parent = newParent;
00298     }
00299 
00300     PString AsString() const;
00301 
00302     virtual void Output(ostream & strm, const PXMLBase & xml, int indent) const = 0;
00303 
00304     virtual PBoolean IsElement() const = 0;
00305 
00306     void SetDirty();
00307     bool IsDirty() const { return dirty; }
00308 
00309     virtual PXMLObject * Clone(PXMLElement * parent) const = 0;
00310 
00311   protected:
00312     PXMLElement * parent;
00313     bool dirty;
00314 };
00315 
00317 
00318 class PXMLData : public PXMLObject {
00319   PCLASSINFO(PXMLData, PXMLObject);
00320   public:
00321     PXMLData(PXMLElement * parent, const PString & data);
00322     PXMLData(PXMLElement * parent, const char * data, int len);
00323 
00324     PBoolean IsElement() const    { return false; }
00325 
00326     void SetString(const PString & str, bool dirty = true);
00327 
00328     PString GetString() const           { return value; }
00329 
00330     void Output(ostream & strm, const PXMLBase & xml, int indent) const;
00331 
00332     PXMLObject * Clone(PXMLElement * parent) const;
00333 
00334   protected:
00335     PString value;
00336 };
00337 
00339 
00340 class PXMLElement : public PXMLObject {
00341   PCLASSINFO(PXMLElement, PXMLObject);
00342   public:
00343     PXMLElement(PXMLElement * parent, const char * name = NULL);
00344     PXMLElement(PXMLElement * parent, const PString & name, const PString & data);
00345 
00346     PBoolean IsElement() const { return true; }
00347 
00348     void PrintOn(ostream & strm) const;
00349     void Output(ostream & strm, const PXMLBase & xml, int indent) const;
00350 
00351     PCaselessString GetName() const
00352       { return name; }
00353 
00358     PCaselessString GetPathName() const;
00359 
00360     void SetName(const PString & v)
00361     { name = v; }
00362 
00363     PINDEX GetSize() const
00364       { return subObjects.GetSize(); }
00365 
00366     PXMLObject  * AddSubObject(PXMLObject * elem, bool dirty = true);
00367 
00368     PXMLElement * AddChild    (PXMLElement * elem, bool dirty = true);
00369     PXMLData    * AddChild    (PXMLData    * elem, bool dirty = true);
00370 
00371     PXMLElement * AddElement(const char * name);
00372     PXMLElement * AddElement(const PString & name, const PString & data);
00373     PXMLElement * AddElement(const PString & name, const PString & attrName, const PString & attrVal);
00374 
00375     void SetAttribute(const PCaselessString & key,
00376                       const PString & value,
00377                       bool setDirty = true);
00378 
00379     PString GetAttribute(const PCaselessString & key) const;
00380     PString GetKeyAttribute(PINDEX idx) const;
00381     PString GetDataAttribute(PINDEX idx) const;
00382     bool HasAttribute(const PCaselessString & key) const;
00383     bool HasAttributes() const      { return attributes.GetSize() > 0; }
00384     PINDEX GetNumAttributes() const { return attributes.GetSize(); }
00385 
00386     PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const;
00387     PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const;
00388     PXMLObject  * GetElement(PINDEX idx = 0) const;
00389     bool          RemoveElement(PINDEX idx);
00390 
00391     PINDEX FindObject(const PXMLObject * ptr) const;
00392 
00393     bool HasSubObjects() const
00394       { return subObjects.GetSize() != 0; }
00395 
00396     PXMLObjectArray  GetSubObjects() const
00397       { return subObjects; }
00398 
00399     PString GetData() const;
00400     void SetData(const PString & data);
00401     void AddData(const PString & data);
00402 
00403     PXMLObject * Clone(PXMLElement * parent) const;
00404 
00405     void GetFilePosition(unsigned & col, unsigned & line) const { col = column; line = lineNumber; }
00406     void SetFilePosition(unsigned col,   unsigned line)         { column = col; lineNumber = line; }
00407 
00408     void AddNamespace(const PString & prefix, const PString & uri);
00409     void RemoveNamespace(const PString & prefix);
00410 
00411     bool GetDefaultNamespace(PCaselessString & str) const;
00412     bool GetNamespace(const PCaselessString & prefix, PCaselessString & str) const;
00413     PCaselessString PrependNamespace(const PCaselessString & name) const;
00414     bool GetURIForNamespace(const PCaselessString & prefix, PCaselessString & uri);
00415 
00416   protected:
00417     PCaselessString name;
00418     PStringToString attributes;
00419     PXMLObjectArray subObjects;
00420     bool dirty;
00421     unsigned column;
00422     unsigned lineNumber;
00423     PStringToString m_nameSpaces;
00424     PCaselessString m_defaultNamespace;
00425 };
00426 
00428 
00429 class PConfig;      // stupid gcc 4 does not recognize PConfig as a class
00430 
00431 class PXMLSettings : public PXML
00432 {
00433   PCLASSINFO(PXMLSettings, PXML);
00434   public:
00435     PXMLSettings(Options options = NewLineAfterElement);
00436     PXMLSettings(const PString & data, Options options = NewLineAfterElement);
00437     PXMLSettings(const PConfig & data, Options options = NewLineAfterElement);
00438 
00439     bool Load(const PString & data);
00440     bool LoadFile(const PFilePath & fn);
00441 
00442     bool Save();
00443     bool Save(PString & data);
00444     bool SaveFile(const PFilePath & fn);
00445 
00446     void SetAttribute(const PCaselessString & section, const PString & key, const PString & value);
00447 
00448     PString GetAttribute(const PCaselessString & section, const PString & key) const;
00449     bool    HasAttribute(const PCaselessString & section, const PString & key) const;
00450 
00451     void ToConfig(PConfig & cfg) const;
00452 };
00453 
00454 
00456 
00457 class PXMLParser : public PXMLBase
00458 {
00459   PCLASSINFO(PXMLParser, PXMLBase);
00460   public:
00461     PXMLParser(int options = NoOptions);
00462     ~PXMLParser();
00463     bool Parse(const char * data, int dataLen, bool final);
00464     void GetErrorInfo(PString & errorString, unsigned & errorCol, unsigned & errorLine);
00465 
00466     virtual void StartElement(const char * name, const char **attrs);
00467     virtual void EndElement(const char * name);
00468     virtual void AddCharacterData(const char * data, int len);
00469     virtual void XmlDecl(const char * version, const char * encoding, int standAlone);
00470     virtual void StartDocTypeDecl(const char * docTypeName,
00471                                   const char * sysid,
00472                                   const char * pubid,
00473                                   int hasInternalSubSet);
00474     virtual void EndDocTypeDecl();
00475     virtual void StartNamespaceDeclHandler(const char * prefix, const char * uri);
00476     virtual void EndNamespaceDeclHandler(const char * prefix);
00477 
00478     PString GetVersion() const  { return version; }
00479     PString GetEncoding() const { return encoding; }
00480 
00481     StandAloneType GetStandAlone() const { return m_standAlone; }
00482 
00483     PXMLElement * GetXMLTree() const;
00484     PXMLElement * SetXMLTree(PXMLElement * newRoot);
00485 
00486   protected:
00487     void * expat;
00488     PXMLElement * rootElement;
00489     bool rootOpen;
00490     PXMLElement * currentElement;
00491     PXMLData * lastElement;
00492     PString version, encoding;
00493     StandAloneType m_standAlone;
00494     PStringToString m_tempNamespaceList;
00495 };
00496 
00498 
00499 class PXMLStreamParser : public PXMLParser
00500 {
00501   PCLASSINFO(PXMLStreamParser, PXMLParser);
00502   public:
00503     PXMLStreamParser();
00504 
00505     virtual void EndElement(const char * name);
00506     virtual PXML * Read(PChannel * channel);
00507 
00508   protected:
00509     PQueue<PXML> messages;
00510 };
00511 
00512 
00513 #endif // P_EXPAT
00514 
00515 #endif // PTLIB_PXML_H
00516 
00517 
00518 // End Of File ///////////////////////////////////////////////////////////////
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines