Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members

data_node_format.h

00001 #ifndef s11n_DATA_NODE_FORMAT_H_INCLUDED 00002 #define s11n_DATA_NODE_FORMAT_H_INCLUDED 00003 //////////////////////////////////////////////////////////////////////////////// 00004 // data_node_format.h 00005 // Contains some helpers related to parsing/formating data_node-style objects. 00006 // 00007 // License: Public Domain 00008 // Author: stephan@s11n.net 00009 //////////////////////////////////////////////////////////////////////////////// 00010 00011 00012 #include <string> 00013 #include <list> 00014 #include <map> 00015 #include <stdexcept> 00016 // #include <typeinfo> 00017 00018 00019 #include <s11n/to_string.h> // to/from_string() 00020 #include <s11n/debuggering_macros.h> // COUT/CERR 00021 #include <s11n/file_util.h> // get_i/ostream() 00022 #include <s11n/string_util.h> // translate_entities() 00023 00024 00025 #include <s11n/s11n_core.h> // classload() 00026 00027 #include "data_node_functor.h" // some utility functors 00028 #include "data_node_serialize.h" // data_node_serializer<> and friends 00029 #include "data_node_io.h" // default serializer interfaces 00030 //////////////////////////////////////////////////////////////////////////////// 00031 // NO DEPS ON data_node.h ALLOWED! 00032 //////////////////////////////////////////////////////////////////////////////// 00033 00034 #ifndef yyFlexLexer // aaarrrgggg! 00035 # include "FlexLexer.h" 00036 #endif 00037 s11n_CLASSLOADER_ABSTRACT_BASE(FlexLexer); 00038 00039 00040 namespace s11n { 00041 namespace io { 00042 00043 namespace Private { 00044 /** 00045 A helper to hide FlexLexer subclasses from needing to be 00046 included in header files. (FlexLexer's subclassing technique 00047 (via macros) makes them impracticle to mix together 00048 in the same headers.) 00049 00050 May throw std::runtime_error. 00051 */ 00052 int lex_api_hider_yylex( FlexLexer *, std::istream & ); 00053 00054 } 00055 00056 /** 00057 A typedef representing a map of tokens used for 00058 "entity translations" by s11n parsers/serializers. 00059 */ 00060 typedef std::map<std::string,std::string> entity_translation_map; 00061 00062 00063 /** 00064 tree_builder exists mainly so some lex-based code 00065 can get access to a non-templated type (so we don't 00066 have to hard-code the parsers to a node_type). 00067 00068 It provides only the interface needed by the current 00069 lex-based parsers, not some ultimately reusable 00070 interface. 00071 00072 It is not functionally useful by itself - it must 00073 be subclassed and all of it's virtual methods must be 00074 implemented. 00075 */ 00076 class tree_builder 00077 { 00078 public: 00079 tree_builder() : m_autodel(true) {} 00080 00081 virtual ~tree_builder() {} 00082 00083 /** 00084 Starts a new node with the the given class 00085 name and node name. 00086 00087 Return value indicates success or failure. 00088 */ 00089 virtual bool 00090 open_node( const std::string & classname, const std::string & nodename ) = 0; 00091 00092 /** 00093 Closes the current node. 00094 00095 Return value indicates success or failure. 00096 */ 00097 virtual bool 00098 close_node() = 0; 00099 00100 /** 00101 Sets property key to val for the current node. 00102 00103 Return value indicates success or failure. 00104 */ 00105 virtual bool 00106 add_property( const std::string & key, const std::string & val ) = 0; 00107 00108 /** 00109 Returns the depth level of the parser, 00110 where the root node is 1. 00111 */ 00112 virtual size_t node_depth() const = 0; 00113 00114 00115 /** 00116 Changes the implementation class name of 00117 the current node. 00118 */ 00119 virtual bool change_node_class( const std::string & newclassname ) = 0; 00120 00121 /** 00122 If auto_delete() is on (the default) then 00123 this object should delete it's children 00124 when it is destroyed, otherwise it will 00125 not. It is up to subclasses to honor this, 00126 as this base type does no handling of 00127 children. 00128 */ 00129 void auto_delete( bool b ) 00130 { 00131 this->m_autodel = b; 00132 } 00133 00134 /** 00135 This is the getter for auto_delete( bool ). 00136 */ 00137 bool auto_delete() const 00138 { 00139 return this->m_autodel; 00140 } 00141 00142 00143 private: 00144 bool m_autodel; 00145 00146 }; 00147 00148 00149 00150 /** 00151 tree_builder_context is a helper for accessing some 00152 template-dependent code from non-template-aware 00153 lexer code. It's usage is admitedly a bit strange 00154 (indeed, it's whole existance is). 00155 00156 This object sets up a "context channel" where 00157 a given FlexLexer can, in a thread-safe manner, 00158 communicate data back to a data_tree_builder<NodeType> 00159 without knowing the exact NodeType. 00160 00161 For samples see the s11n lexers, under src/node/lex. 00162 00163 All of it's methods, except for builder(), mirror 00164 those of a tree_builder object, so see that class 00165 for the API docs. For the "mirrored" functions, the 00166 object being proxied is that set via builder(). It 00167 is intended that only the lexers associated with 00168 this context actually use it's API. 00169 00170 Calling the proxied functions when no builder is 00171 set has no effect. Calling them with no bind()ed 00172 FlexLexer may be fatal. 00173 00174 */ 00175 template <typename ContextT> 00176 class tree_builder_context 00177 { 00178 public: 00179 /** The context type for this class. */ 00180 typedef ContextT context_type; 00181 /** 00182 Sets the current builder object for this context. 00183 00184 Pointer ownership does not change by 00185 calling this function. 00186 00187 This must be carefully marshalled: it must 00188 always be set immediately before the 00189 matching lexer is used, and unbind(lexer) 00190 should be called immediately afterwards to 00191 free up the internal marshaling data. Failing 00192 to call unbind will mean a resource leak 00193 (albeit a small one). 00194 00195 Preconditions: 00196 00197 - lexer and builder must be valid pointers 00198 and must out-live the expected lifetime of 00199 this context object, which internally 00200 associates these two objects. 00201 */ 00202 static void bind( const FlexLexer * lexer, tree_builder * builder ) 00203 { 00204 lmap()[lexer].builder = builder; 00205 } 00206 00207 /** 00208 Frees up the internal resources used by the 00209 marshaling process for the given lexer. 00210 */ 00211 static void unbind( const FlexLexer * lexer ) 00212 { 00213 lmap().erase( lexer ); 00214 } 00215 00216 /** 00217 Gets the current builder object for this 00218 context, which must have been previously 00219 set up via a call to bind(lexer,builder). 00220 00221 Ownership of the returned pointer does not 00222 change by calling this function. 00223 */ 00224 static tree_builder * builder( const FlexLexer * lexer ) 00225 { 00226 return lmap()[lexer].builder; 00227 } 00228 00229 00230 #define IFNOLEXER(RET) if( lmap().end() == lmap().find(lexer) ) return RET; 00231 /** 00232 See tree_builder::open_node(). 00233 */ 00234 static bool open_node( const FlexLexer * lexer, 00235 const std::string & classname, 00236 const std::string & nodename ) 00237 { 00238 IFNOLEXER(false); 00239 return lmap()[lexer].builder->open_node( classname, nodename ); 00240 } 00241 00242 /** 00243 See tree_builder::clode_node(). 00244 */ 00245 static bool close_node( const FlexLexer * lexer ) 00246 { 00247 IFNOLEXER(false); 00248 return lmap()[lexer].builder->close_node(); 00249 } 00250 00251 /** 00252 See tree_builder::add_property(). 00253 */ 00254 static bool 00255 add_property( const FlexLexer * lexer, 00256 const std::string & key, 00257 const std::string & val ) 00258 { 00259 IFNOLEXER(false); 00260 return lmap()[lexer].builder->add_property( key, val ); 00261 } 00262 00263 /** 00264 See tree_builder::node_depth(). 00265 */ 00266 static size_t node_depth( const FlexLexer * lexer ) 00267 { 00268 IFNOLEXER(0); 00269 return lmap()[lexer].builder->node_depth(); 00270 } 00271 00272 /** 00273 See tree_builder::change_node_class(). 00274 */ 00275 static bool change_node_class( const FlexLexer * lexer, 00276 const std::string & newclassname ) 00277 { 00278 IFNOLEXER(false); 00279 return lmap()[lexer].builder->change_node_class( newclassname ); 00280 } 00281 #undef IFNOLEXER 00282 00283 /** 00284 This is intended for direct access by a lexer associated 00285 with this context, and ONLY by such lexers. 00286 00287 Except for the builder member, these are 00288 temporary holding points for vars common to 00289 most lexers, placed here to avoid using 00290 global data in the lexer code. 00291 */ 00292 struct lexer_metadata 00293 { 00294 tree_builder * builder; 00295 00296 size_t internaldepth; // current internal depth (not always the same as node_depth()) 00297 std::string nodename; // name of current node 00298 std::string nodeclass; // class name of current node 00299 std::string property; // property value buffer 00300 std::string bufferyy; // lexer-dependent 00301 lexer_metadata() 00302 { 00303 builder = 0; 00304 internaldepth = 0; 00305 nodename = nodeclass = property = bufferyy = ""; 00306 } 00307 }; 00308 00309 /** 00310 Returns the lexer_metadata for the given lexer, creating one 00311 if needed. It is assumed that the lexer has been bound via a 00312 call to bind(). 00313 */ 00314 static lexer_metadata & metadata( const FlexLexer * lexer ) 00315 { 00316 return lmap()[lexer]; 00317 } 00318 00319 private: 00320 /** Convenience typedef. */ 00321 typedef tree_builder_context<context_type> this_type; 00322 /** lexer-to-metadata map */ 00323 typedef std::map<const FlexLexer *,lexer_metadata> lexer_map; 00324 static lexer_map & lmap() 00325 { 00326 return s11n::phoenix< 00327 lexer_map, 00328 this_type 00329 >::instance(); 00330 } 00331 00332 }; 00333 00334 /** 00335 data_node_tree_builder is a helper class for 00336 building trees from deserialized data, designed 00337 particularly for use with lex/callback-based tree 00338 builders. 00339 00340 It owns all objects which build up it's tree. If 00341 you want them you must manually remove them from the 00342 container. You normally do not want them, however - they're 00343 mostly throwaway nodes on their way to becoming fully 00344 deserialized objects. 00345 00346 This class only provides methods for building a tree, not 00347 for traversing it. Once you have built a tree, traverse it 00348 starting at the root_node(). 00349 00350 Based on usage conventions this type supports only 00351 a single root node. 00352 */ 00353 template <typename NodeType> 00354 class data_node_tree_builder : public tree_builder 00355 { 00356 public: 00357 typedef NodeType node_type; 00358 00359 typedef std::list< node_type * > child_list_type; 00360 00361 /** Creates a default builder. */ 00362 data_node_tree_builder() : m_node_count(0), m_node(0),m_root(0) 00363 { 00364 } 00365 00366 /** 00367 Deletes this object's children if 00368 auto_delete() returns true. 00369 */ 00370 virtual ~data_node_tree_builder() 00371 { 00372 if( this->auto_delete() && this->m_root ) 00373 { 00374 //CERR << "data_node_tree_builder<> cleaning up root node.\n"; 00375 delete( this->m_root ); 00376 } 00377 else 00378 { 00379 //CERR << "data_node_tree_builder<> was relieved of child duty (or had no child).\n"; 00380 } 00381 } 00382 00383 00384 /** 00385 Opens a new node, making that the current node. 00386 classname will be used for the node's impl_class() 00387 (see docs for node_type::impl_class()). name will 00388 be the object's name, which is important for 00389 de/serializing the node (see node_type::name()). 00390 00391 It returns false on error, else true. The default 00392 implementation has no error conditions, and 00393 therefor always returns true. 00394 00395 Node that classnames and node names need not be 00396 unique (nor make up unique combinations). Any 00397 number of nodes may have the same name or 00398 classname. 00399 */ 00400 bool open_node( const std::string & classname, const std::string & nodename ) 00401 { 00402 ++m_node_count; 00403 00404 this->m_node = ( this->m_nodestack.empty() 00405 ? 0 00406 : this->m_nodestack.back() ); 00407 node_type * newnode = new node_type(); 00408 if ( m_node ) 00409 { // if we're in a node, add new node as a child to that one: 00410 m_node->children().push_back( newnode ); 00411 } 00412 this->m_node = newnode; 00413 m_node->name( nodename ); 00414 m_node->impl_class( classname ); 00415 this->m_nodestack.push_back( m_node ); 00416 bool ret = true; 00417 if ( 1 == this->m_nodestack.size() ) 00418 { 00419 if( m_root ) 00420 { 00421 CERR << "open_node("<<classname<<","<<nodename<<") WARNING: deleting extra root node!\n"; 00422 delete( m_node ); 00423 ret = false; 00424 } 00425 else 00426 { 00427 m_root = m_node; 00428 } 00429 } 00430 return ret; 00431 } 00432 00433 /** 00434 Closes the most-recently-opened node, effectively 00435 popping the previous node off of the node stack (it 00436 is not destroyed). It is an error to call this more 00437 often than calling open_node(). 00438 00439 It returns false on error (e.g., called 00440 with no opened node). 00441 */ 00442 virtual bool close_node() 00443 { 00444 if ( !m_node || m_nodestack.empty() ) 00445 { 00446 CERR << "close_node() error: called with an empty node stack!" << std::endl; 00447 return false; 00448 } 00449 m_nodestack.pop_back(); 00450 if ( m_nodestack.empty() ) 00451 { 00452 m_node = NULL; 00453 } 00454 else 00455 { 00456 m_node = m_nodestack.back(); 00457 } 00458 return true; 00459 } 00460 00461 00462 /** 00463 Adds the given key/value pair to the 00464 current node and returns true. If no node 00465 is currently opened it returns false. 00466 */ 00467 virtual bool add_property( const std::string & key, const std::string & val ) 00468 { 00469 if( ! this->m_node ) return false; 00470 this->m_node->set( key, val ); 00471 return true; 00472 } 00473 00474 /** 00475 Returns the total number of nodes opened via open_node(). 00476 */ 00477 size_t node_count() const 00478 { 00479 return m_node_count; 00480 } 00481 00482 00483 /** 00484 Returns the current depth of opened nodes. A return 00485 value of 1 means the current node is the root node, 00486 for example, and 0 means that no node has yet been 00487 opened. 00488 */ 00489 size_t node_depth() const 00490 { 00491 return m_nodestack.size(); 00492 } 00493 00494 00495 /** 00496 Returns the most recent root node parsed out of the 00497 input object. 00498 00499 Use auto_delete() to determine ownership of 00500 the returned pointer. 00501 */ 00502 node_type * root_node() const 00503 { 00504 return m_root; 00505 } 00506 00507 00508 /** 00509 Returns the current node. 00510 00511 Use auto_delete() to determine ownership of 00512 the returned pointer. 00513 */ 00514 node_type * current_node() const 00515 { 00516 return m_node; 00517 } 00518 00519 /** 00520 Changes class name of current node, if one 00521 is set. Returns false only if no node is 00522 currently opened, else it returns true. 00523 */ 00524 virtual bool change_node_class( const std::string & newclassname ) 00525 { 00526 if( ! this->m_node ) return false; 00527 this->m_node->impl_class( newclassname ); 00528 return true; 00529 } 00530 00531 private: 00532 size_t m_node_count; 00533 node_type * m_node; 00534 node_type * m_root; 00535 typedef std::deque < node_type * > node_stack; 00536 node_stack m_nodestack; 00537 }; 00538 00539 00540 00541 /** 00542 This function exists for a really long, strange 00543 reason involving accessing templatized types from 00544 template-free code (FlexLexers). 00545 00546 - lexerClassName is the name of a FlexLexer 00547 subclass. It must be registered with the FlexLexer 00548 classloader. 00549 00550 - src is the stream to pass on to the lexer. 00551 00552 - BuilderContext should be the same one expected by 00553 the specific lexer. See the existing lexers for 00554 examples. You want to pass the actual 00555 BuilderContext's context here, not a 00556 tree_builder_context<> type. 00557 00558 The caller owns the returned poiner, which may be 0. 00559 */ 00560 template <typename NodeType, typename BuilderContext> 00561 NodeType * deserialize_lex_forwarder( const std::string & lexerClassName, 00562 std::istream & src 00563 ) 00564 { 00565 // CERR << "deserialize_lex_forwarder("<<lexerClassName<<")\n"; 00566 FlexLexer * lexer = s11n::classload<FlexLexer>( lexerClassName ); 00567 if( ! lexer ) 00568 { 00569 CERR << "Lexer '"<<lexerClassName 00570 <<"' was not found by classload<FlexLexer>()." 00571 << " It is probably not registered with class_loader<FlexLexer>.\n"; 00572 return 0; 00573 } 00574 00575 typedef s11n::io::data_node_tree_builder<NodeType> BuilderType; 00576 typedef tree_builder_context<BuilderContext> BC; 00577 00578 NodeType * ret = 0; 00579 BuilderType * treebuilder = new BuilderType(); 00580 treebuilder->auto_delete( false ); // we want to steal it's nodes. 00581 bool err = false; 00582 try 00583 { 00584 BC::bind( lexer, treebuilder ); 00585 // ^^^ sets up the comm channel between the builder and lexer 00586 /** 00587 Bug-in-waiting: we don't yet have a way of knowing 00588 if a lexer partially populates the builder. 00589 */ 00590 Private::lex_api_hider_yylex(lexer,src); 00591 } 00592 catch ( std::runtime_error & ex ) 00593 { 00594 err = true; 00595 CERR << "deserialize_lex_forwarder(): Doh! Exception during lexing: " << ex.what() << "\n"; 00596 } 00597 catch (...) 00598 { 00599 err = true; 00600 CERR << "deserialize_lex_forwarder(): Doh! Unknown exception during lexing:\n"; 00601 } 00602 BC::unbind( lexer ); // free up lexer-to-builder binding 00603 delete( lexer ); 00604 if( err ) 00605 { 00606 treebuilder->auto_delete( true ); // let it delete it's children 00607 } 00608 else 00609 { 00610 ret = treebuilder->root_node(); 00611 } 00612 // CERR << "Loaded node: " << std::hex << ret << '\n'; 00613 delete( treebuilder ); 00614 return ret; 00615 } 00616 00617 /** 00618 tree_builder_lexer is a type intended to ease the 00619 implementation of lex-based node tree parsers. 00620 00621 It is useless standalone: it must be subclassed. 00622 00623 It holds the class name of a FlexLexer type so it 00624 can be dynamically loaded as needed. Also, for 00625 subclasses it takes the responsibility of 00626 instantiating this type and passing off input to 00627 it. 00628 */ 00629 template <typename NodeType, typename LexerSharingContext> 00630 class tree_builder_lexer : public data_node_serializer<NodeType> 00631 { 00632 public: 00633 00634 typedef NodeType node_type; 00635 typedef LexerSharingContext sharing_context; 00636 00637 /** 00638 lexerClassName = the class name of the FlexLexer subtype 00639 associated with this serializer. 00640 */ 00641 explicit tree_builder_lexer( const std::string & lexerClassName ) 00642 : m_impl(lexerClassName) 00643 {} 00644 00645 virtual ~tree_builder_lexer(){} 00646 00647 /** 00648 Overridden to parse src using this object's lexer. 00649 It uses <code>deserialize_lex_forwarder<sharing_context>()</code>, 00650 passing it this object's lexer_class(). 00651 */ 00652 virtual node_type * deserialize( std::istream & src ) 00653 { 00654 return deserialize_lex_forwarder< 00655 node_type, 00656 sharing_context 00657 >( this->lexer_class(), src ); 00658 } 00659 00660 00661 /** 00662 Returns this object's lexer class name. 00663 */ 00664 std::string lexer_class() const { return this->m_impl; } 00665 00666 00667 protected: 00668 /** 00669 Sets this object's lexer class name. 00670 */ 00671 void lexer_class( const std::string & classname ) 00672 { 00673 this->m_impl = classname; 00674 } 00675 00676 private: 00677 std::string m_impl; // implementation class name for a FlexLexer subclass 00678 }; 00679 00680 } // namespace io 00681 } // namespace s11n 00682 00683 s11n_CLASSLOADER_ABSTRACT_BASE(s11n::io::tree_builder); 00684 00685 #endif // s11n_DATA_NODE_FORMAT_H_INCLUDED

Generated on Wed Jul 28 16:04:14 2004 for s11n by doxygen 1.3.7