PTLib
Version 2.10.4
|
00001 /* 00002 * pxml.h 00003 * 00004 * XML parser support 00005 * 00006 * Portable Windows Library 00007 * 00008 * Copyright (c) 2002 Equivalence Pty. Ltd. 00009 * 00010 * The contents of this file are subject to the Mozilla Public License 00011 * Version 1.0 (the "License"); you may not use this file except in 00012 * compliance with the License. You may obtain a copy of the License at 00013 * http://www.mozilla.org/MPL/ 00014 * 00015 * Software distributed under the License is distributed on an "AS IS" 00016 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 00017 * the License for the specific language governing rights and limitations 00018 * under the License. 00019 * 00020 * The Original Code is Portable Windows Library. 00021 * 00022 * The Initial Developer of the Original Code is Equivalence Pty. Ltd. 00023 * 00024 * Contributor(s): ______________________________________. 00025 * 00026 * $Revision: 24995 $ 00027 * $Author: csoutheren $ 00028 * $Date: 2011-01-04 19:12:33 -0600 (Tue, 04 Jan 2011) $ 00029 */ 00030 00031 #ifndef PTLIB_PXML_H 00032 #define PTLIB_PXML_H 00033 00034 #ifdef P_USE_PRAGMA 00035 #pragma interface 00036 #endif 00037 00038 #include <ptlib.h> 00039 00040 #include <ptbuildopts.h> 00041 00042 #ifndef P_EXPAT 00043 00044 namespace PXML { 00045 extern PString EscapeSpecialChars(const PString & str); 00046 }; 00047 00048 #else 00049 00050 #include <ptclib/http.h> 00051 00053 00054 class PXMLElement; 00055 class PXMLData; 00056 00057 00058 class PXMLObject; 00059 class PXMLElement; 00060 class PXMLData; 00061 00063 00064 class PXMLBase : public PObject 00065 { 00066 public: 00067 enum Options { 00068 NoOptions = 0x0000, 00069 Indent = 0x0001, 00070 NewLineAfterElement = 0x0002, 00071 NoIgnoreWhiteSpace = 0x0004, 00072 CloseExtended = 0x0008, 00073 WithNS = 0x0010, 00074 FragmentOnly = 0x0020, 00075 AllOptions = 0xffff 00076 }; 00077 __inline friend Options operator|(Options o1, Options o2) { return (Options)(((unsigned)o1) | ((unsigned)o2)); } 00078 __inline friend Options operator&(Options o1, Options o2) { return (Options)(((unsigned)o1) & ((unsigned)o2)); } 00079 00080 enum StandAloneType { 00081 UninitialisedStandAlone = -2, 00082 UnknownStandAlone = -1, 00083 NotStandAlone, 00084 IsStandAlone 00085 }; 00086 00087 PXMLBase(int opts = NoOptions) 00088 : m_options(opts) { } 00089 00090 void SetOptions(int opts) 00091 { m_options = opts; } 00092 00093 int GetOptions() const { return m_options; } 00094 00095 virtual PBoolean IsNoIndentElement( 00096 const PString & /*elementName*/ 00097 ) const 00098 { 00099 return false; 00100 } 00101 00102 protected: 00103 int m_options; 00104 }; 00105 00106 00107 class PXML : public PXMLBase 00108 { 00109 PCLASSINFO(PXML, PObject); 00110 public: 00111 00112 PXML( 00113 int options = NoOptions, 00114 const char * noIndentElements = NULL 00115 ); 00116 PXML( 00117 const PString & data, 00118 int options = NoOptions, 00119 const char * noIndentElements = NULL 00120 ); 00121 00122 PXML(const PXML & xml); 00123 00124 ~PXML(); 00125 00126 bool IsLoaded() const { return rootElement != NULL; } 00127 bool IsDirty() const; 00128 00129 bool Load(const PString & data, Options options = NoOptions); 00130 00131 #if P_HTTP 00132 bool StartAutoReloadURL( 00133 const PURL & url, 00134 const PTimeInterval & timeout, 00135 const PTimeInterval & refreshTime, 00136 Options options = NoOptions 00137 ); 00138 bool StopAutoReloadURL(); 00139 PString GetAutoReloadStatus() { PWaitAndSignal m(autoLoadMutex); PString str = autoLoadError; return str; } 00140 bool AutoLoadURL(); 00141 virtual void OnAutoLoad(PBoolean ok); 00142 00143 bool LoadURL(const PURL & url); 00144 bool LoadURL(const PURL & url, const PTimeInterval & timeout, Options options = NoOptions); 00145 #endif // P_HTTP 00146 00147 bool LoadFile(const PFilePath & fn, Options options = NoOptions); 00148 00149 virtual void OnLoaded() { } 00150 00151 bool Save(Options options = NoOptions); 00152 bool Save(PString & data, Options options = NoOptions); 00153 bool SaveFile(const PFilePath & fn, Options options = NoOptions); 00154 00155 void RemoveAll(); 00156 00157 PBoolean IsNoIndentElement( 00158 const PString & elementName 00159 ) const; 00160 00161 PString AsString() const; 00162 void PrintOn(ostream & strm) const; 00163 void ReadFrom(istream & strm); 00164 00165 00166 PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const; 00167 PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const; 00168 PXMLElement * GetElement(PINDEX idx) const; 00169 PINDEX GetNumElements() const; 00170 PXMLElement * GetRootElement() const { return rootElement; } 00171 PXMLElement * SetRootElement(PXMLElement * p); 00172 PXMLElement * SetRootElement(const PString & documentType); 00173 bool RemoveElement(PINDEX idx); 00174 00175 PCaselessString GetDocumentType() const; 00176 00177 00178 enum ValidationOp { 00179 EndOfValidationList, 00180 DocType, 00181 ElementName, 00182 RequiredAttribute, 00183 RequiredNonEmptyAttribute, 00184 RequiredAttributeWithValue, 00185 RequiredElement, 00186 Subtree, 00187 RequiredAttributeWithValueMatching, 00188 RequiredElementWithBodyMatching, 00189 OptionalElement, 00190 OptionalAttribute, 00191 OptionalNonEmptyAttribute, 00192 OptionalAttributeWithValue, 00193 OptionalAttributeWithValueMatching, 00194 OptionalElementWithBodyMatching, 00195 SetDefaultNamespace, 00196 SetNamespace, 00197 00198 RequiredAttributeWithValueMatchingEx = RequiredAttributeWithValueMatching + 0x8000, 00199 OptionalAttributeWithValueMatchingEx = OptionalAttributeWithValueMatching + 0x8000, 00200 RequiredElementWithBodyMatchingEx = RequiredElementWithBodyMatching + 0x8000, 00201 OptionalElementWithBodyMatchingEx = OptionalElementWithBodyMatching + 0x8000 00202 }; 00203 00204 struct ValidationContext { 00205 PString m_defaultNameSpace; 00206 PStringToString m_nameSpaces; 00207 }; 00208 00209 struct ValidationInfo { 00210 ValidationOp m_op; 00211 const char * m_name; 00212 00213 union { 00214 const void * m_placeHolder; 00215 const char * m_attributeValues; 00216 ValidationInfo * m_subElement; 00217 const char * m_namespace; 00218 }; 00219 00220 PINDEX m_minCount; 00221 PINDEX m_maxCount; 00222 }; 00223 00224 bool Validate(const ValidationInfo * validator); 00225 bool ValidateElements(ValidationContext & context, PXMLElement * baseElement, const ValidationInfo * elements); 00226 bool ValidateElement(ValidationContext & context, PXMLElement * element, const ValidationInfo * elements); 00227 bool LoadAndValidate(const PString & body, const PXML::ValidationInfo * validator, PString & error, int options = NoOptions); 00228 00229 PString GetErrorString() const { return m_errorString; } 00230 unsigned GetErrorColumn() const { return m_errorColumn; } 00231 unsigned GetErrorLine() const { return m_errorLine; } 00232 00233 PString GetDocType() const { return docType; } 00234 void SetDocType(const PString & v) { docType = v; } 00235 00236 PMutex & GetMutex() { return rootMutex; } 00237 00238 #if P_HTTP 00239 PDECLARE_NOTIFIER(PTimer, PXML, AutoReloadTimeout); 00240 PDECLARE_NOTIFIER(PThread, PXML, AutoReloadThread); 00241 #endif // P_HTTP 00242 00243 // static methods to create XML tags 00244 static PString CreateStartTag (const PString & text); 00245 static PString CreateEndTag (const PString & text); 00246 static PString CreateTagNoData (const PString & text); 00247 static PString CreateTag (const PString & text, const PString & data); 00248 00249 static PString EscapeSpecialChars(const PString & string); 00250 00251 protected: 00252 void Construct(int options, const char * noIndentElements); 00253 PXMLElement * rootElement; 00254 PMutex rootMutex; 00255 00256 bool loadFromFile; 00257 PFilePath loadFilename; 00258 PString version, encoding; 00259 StandAloneType m_standAlone; 00260 00261 #if P_HTTP 00262 PTimer autoLoadTimer; 00263 PURL autoloadURL; 00264 PTimeInterval autoLoadWaitTime; 00265 PMutex autoLoadMutex; 00266 PString autoLoadError; 00267 #endif // P_HTTP 00268 00269 PStringStream m_errorString; 00270 unsigned m_errorLine; 00271 unsigned m_errorColumn; 00272 00273 PSortedStringList noIndentElements; 00274 00275 PString docType; 00276 PString m_defaultNameSpace; 00277 }; 00278 00280 00281 PARRAY(PXMLObjectArray, PXMLObject); 00282 00283 class PXMLObject : public PObject { 00284 PCLASSINFO(PXMLObject, PObject); 00285 public: 00286 PXMLObject(PXMLElement * par) 00287 : parent(par) { dirty = false; } 00288 00289 PXMLElement * GetParent() const 00290 { return parent; } 00291 00292 PXMLObject * GetNextObject() const; 00293 00294 void SetParent(PXMLElement * newParent) 00295 { 00296 PAssert(parent == NULL, "Cannot reparent PXMLElement"); 00297 parent = newParent; 00298 } 00299 00300 PString AsString() const; 00301 00302 virtual void Output(ostream & strm, const PXMLBase & xml, int indent) const = 0; 00303 00304 virtual PBoolean IsElement() const = 0; 00305 00306 void SetDirty(); 00307 bool IsDirty() const { return dirty; } 00308 00309 virtual PXMLObject * Clone(PXMLElement * parent) const = 0; 00310 00311 protected: 00312 PXMLElement * parent; 00313 bool dirty; 00314 }; 00315 00317 00318 class PXMLData : public PXMLObject { 00319 PCLASSINFO(PXMLData, PXMLObject); 00320 public: 00321 PXMLData(PXMLElement * parent, const PString & data); 00322 PXMLData(PXMLElement * parent, const char * data, int len); 00323 00324 PBoolean IsElement() const { return false; } 00325 00326 void SetString(const PString & str, bool dirty = true); 00327 00328 PString GetString() const { return value; } 00329 00330 void Output(ostream & strm, const PXMLBase & xml, int indent) const; 00331 00332 PXMLObject * Clone(PXMLElement * parent) const; 00333 00334 protected: 00335 PString value; 00336 }; 00337 00339 00340 class PXMLElement : public PXMLObject { 00341 PCLASSINFO(PXMLElement, PXMLObject); 00342 public: 00343 PXMLElement(PXMLElement * parent, const char * name = NULL); 00344 PXMLElement(PXMLElement * parent, const PString & name, const PString & data); 00345 00346 PBoolean IsElement() const { return true; } 00347 00348 void PrintOn(ostream & strm) const; 00349 void Output(ostream & strm, const PXMLBase & xml, int indent) const; 00350 00351 PCaselessString GetName() const 00352 { return name; } 00353 00358 PCaselessString GetPathName() const; 00359 00360 void SetName(const PString & v) 00361 { name = v; } 00362 00363 PINDEX GetSize() const 00364 { return subObjects.GetSize(); } 00365 00366 PXMLObject * AddSubObject(PXMLObject * elem, bool dirty = true); 00367 00368 PXMLElement * AddChild (PXMLElement * elem, bool dirty = true); 00369 PXMLData * AddChild (PXMLData * elem, bool dirty = true); 00370 00371 PXMLElement * AddElement(const char * name); 00372 PXMLElement * AddElement(const PString & name, const PString & data); 00373 PXMLElement * AddElement(const PString & name, const PString & attrName, const PString & attrVal); 00374 00375 void SetAttribute(const PCaselessString & key, 00376 const PString & value, 00377 bool setDirty = true); 00378 00379 PString GetAttribute(const PCaselessString & key) const; 00380 PString GetKeyAttribute(PINDEX idx) const; 00381 PString GetDataAttribute(PINDEX idx) const; 00382 bool HasAttribute(const PCaselessString & key) const; 00383 bool HasAttributes() const { return attributes.GetSize() > 0; } 00384 PINDEX GetNumAttributes() const { return attributes.GetSize(); } 00385 00386 PXMLElement * GetElement(const PCaselessString & name, const PCaselessString & attr, const PString & attrval) const; 00387 PXMLElement * GetElement(const PCaselessString & name, PINDEX idx = 0) const; 00388 PXMLObject * GetElement(PINDEX idx = 0) const; 00389 bool RemoveElement(PINDEX idx); 00390 00391 PINDEX FindObject(const PXMLObject * ptr) const; 00392 00393 bool HasSubObjects() const 00394 { return subObjects.GetSize() != 0; } 00395 00396 PXMLObjectArray GetSubObjects() const 00397 { return subObjects; } 00398 00399 PString GetData() const; 00400 void SetData(const PString & data); 00401 void AddData(const PString & data); 00402 00403 PXMLObject * Clone(PXMLElement * parent) const; 00404 00405 void GetFilePosition(unsigned & col, unsigned & line) const { col = column; line = lineNumber; } 00406 void SetFilePosition(unsigned col, unsigned line) { column = col; lineNumber = line; } 00407 00408 void AddNamespace(const PString & prefix, const PString & uri); 00409 void RemoveNamespace(const PString & prefix); 00410 00411 bool GetDefaultNamespace(PCaselessString & str) const; 00412 bool GetNamespace(const PCaselessString & prefix, PCaselessString & str) const; 00413 PCaselessString PrependNamespace(const PCaselessString & name) const; 00414 bool GetURIForNamespace(const PCaselessString & prefix, PCaselessString & uri); 00415 00416 protected: 00417 PCaselessString name; 00418 PStringToString attributes; 00419 PXMLObjectArray subObjects; 00420 bool dirty; 00421 unsigned column; 00422 unsigned lineNumber; 00423 PStringToString m_nameSpaces; 00424 PCaselessString m_defaultNamespace; 00425 }; 00426 00428 00429 class PConfig; // stupid gcc 4 does not recognize PConfig as a class 00430 00431 class PXMLSettings : public PXML 00432 { 00433 PCLASSINFO(PXMLSettings, PXML); 00434 public: 00435 PXMLSettings(Options options = NewLineAfterElement); 00436 PXMLSettings(const PString & data, Options options = NewLineAfterElement); 00437 PXMLSettings(const PConfig & data, Options options = NewLineAfterElement); 00438 00439 bool Load(const PString & data); 00440 bool LoadFile(const PFilePath & fn); 00441 00442 bool Save(); 00443 bool Save(PString & data); 00444 bool SaveFile(const PFilePath & fn); 00445 00446 void SetAttribute(const PCaselessString & section, const PString & key, const PString & value); 00447 00448 PString GetAttribute(const PCaselessString & section, const PString & key) const; 00449 bool HasAttribute(const PCaselessString & section, const PString & key) const; 00450 00451 void ToConfig(PConfig & cfg) const; 00452 }; 00453 00454 00456 00457 class PXMLParser : public PXMLBase 00458 { 00459 PCLASSINFO(PXMLParser, PXMLBase); 00460 public: 00461 PXMLParser(int options = NoOptions); 00462 ~PXMLParser(); 00463 bool Parse(const char * data, int dataLen, bool final); 00464 void GetErrorInfo(PString & errorString, unsigned & errorCol, unsigned & errorLine); 00465 00466 virtual void StartElement(const char * name, const char **attrs); 00467 virtual void EndElement(const char * name); 00468 virtual void AddCharacterData(const char * data, int len); 00469 virtual void XmlDecl(const char * version, const char * encoding, int standAlone); 00470 virtual void StartDocTypeDecl(const char * docTypeName, 00471 const char * sysid, 00472 const char * pubid, 00473 int hasInternalSubSet); 00474 virtual void EndDocTypeDecl(); 00475 virtual void StartNamespaceDeclHandler(const char * prefix, const char * uri); 00476 virtual void EndNamespaceDeclHandler(const char * prefix); 00477 00478 PString GetVersion() const { return version; } 00479 PString GetEncoding() const { return encoding; } 00480 00481 StandAloneType GetStandAlone() const { return m_standAlone; } 00482 00483 PXMLElement * GetXMLTree() const; 00484 PXMLElement * SetXMLTree(PXMLElement * newRoot); 00485 00486 protected: 00487 void * expat; 00488 PXMLElement * rootElement; 00489 bool rootOpen; 00490 PXMLElement * currentElement; 00491 PXMLData * lastElement; 00492 PString version, encoding; 00493 StandAloneType m_standAlone; 00494 PStringToString m_tempNamespaceList; 00495 }; 00496 00498 00499 class PXMLStreamParser : public PXMLParser 00500 { 00501 PCLASSINFO(PXMLStreamParser, PXMLParser); 00502 public: 00503 PXMLStreamParser(); 00504 00505 virtual void EndElement(const char * name); 00506 virtual PXML * Read(PChannel * channel); 00507 00508 protected: 00509 PQueue<PXML> messages; 00510 }; 00511 00512 00513 #endif // P_EXPAT 00514 00515 #endif // PTLIB_PXML_H 00516 00517 00518 // End Of File ///////////////////////////////////////////////////////////////