Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members

data_node_format.h

00001 #ifndef s11n_DATA_NODE_FORMAT_H_INCLUDED
00002 #define s11n_DATA_NODE_FORMAT_H_INCLUDED
00003 ////////////////////////////////////////////////////////////////////////////////
00004 // data_node_format.h
00005 // Contains some helpers related to parsing/formating data_node-style objects.
00006 //
00007 // License: Public Domain
00008 // Author: stephan@s11n.net
00009 ////////////////////////////////////////////////////////////////////////////////
00010 
00011 
00012 #include <string>
00013 #include <list>
00014 #include <map>
00015 #include <stdexcept>
00016 // #include <typeinfo>
00017 
00018 
00019 #include <s11n/to_string.h> // to/from_string()
00020 #include <s11n/debuggering_macros.h> // COUT/CERR
00021 #include <s11n/file_util.h> // get_i/ostream()
00022 #include <s11n/string_util.h> // translate_entities()
00023 
00024 
00025 #include <s11n/s11n_core.h> // classload()
00026 
00027 #include "data_node_functor.h" // some utility functors
00028 #include "data_node_serialize.h" // data_node_serializer<> and friends
00029 #include "data_node_io.h" // default serializer interfaces
00030 ////////////////////////////////////////////////////////////////////////////////
00031 // NO DEPS ON data_node.h ALLOWED!
00032 ////////////////////////////////////////////////////////////////////////////////
00033 
00034 #ifndef yyFlexLexer // aaarrrgggg!
00035 #  include "FlexLexer.h"
00036 #endif
00037 s11n_CLASSLOADER_ABSTRACT_BASE(FlexLexer);
00038 
00039 
00040 namespace s11n {
00041         namespace io {
00042 
00043                 namespace Private {
00044                         /**
00045                            A helper to hide FlexLexer subclasses from needing to be
00046                            included in header files. (FlexLexer's subclassing technique
00047                            (via macros) makes them impracticle to mix together
00048                            in the same headers.)
00049 
00050                            May throw std::runtime_error.
00051                          */
00052                         int lex_api_hider_yylex( FlexLexer *, std::istream & );
00053 
00054                 }
00055 
00056                 /**
00057                    A typedef representing a map of tokens used for
00058                    "entity translations" by s11n parsers/serializers.
00059                 */
00060                 typedef std::map<std::string,std::string> entity_translation_map;
00061 
00062 
00063                 /**
00064                    tree_builder exists mainly so some lex-based code
00065                    can get access to a non-templated type (so we don't
00066                    have to hard-code the parsers to a node_type).
00067 
00068                    It provides only the interface needed by the current
00069                    lex-based parsers, not some ultimately reusable
00070                    interface.
00071 
00072                    It is not functionally useful by itself - it must
00073                    be subclassed and all of it's virtual methods must be
00074                    implemented.
00075                 */
00076                 class tree_builder
00077                 {
00078                 public:
00079                         tree_builder() : m_autodel(true) {}
00080 
00081                         virtual ~tree_builder() {}
00082 
00083                         /**
00084                            Starts a new node with the the given class
00085                            name and node name.
00086 
00087                            Return value indicates success or failure.
00088                         */
00089                         virtual bool
00090                         open_node( const std::string & classname, const std::string & nodename ) = 0;
00091 
00092                         /**
00093                            Closes the current node.
00094 
00095                            Return value indicates success or failure.
00096                         */
00097                         virtual bool
00098                         close_node() = 0;
00099 
00100                         /**
00101                            Sets property key to val for the current node.
00102 
00103                            Return value indicates success or failure.
00104                         */
00105                         virtual bool
00106                         add_property( const std::string & key, const std::string & val ) = 0;
00107 
00108                         /**
00109                            Returns the depth level of the parser,
00110                            where the root node is 1.
00111                         */
00112                         virtual size_t node_depth() const = 0;
00113 
00114 
00115                         /**
00116                            Changes the implementation class name of
00117                            the current node.
00118                         */
00119                         virtual bool change_node_class( const std::string & newclassname ) = 0;
00120 
00121                         /**
00122                            If auto_delete() is on (the default) then
00123                            this object should delete it's children
00124                            when it is destroyed, otherwise it will
00125                            not. It is up to subclasses to honor this,
00126                            as this base type does no handling of
00127                            children.
00128                         */
00129                         void auto_delete( bool b )
00130                         {
00131                                 this->m_autodel = b;
00132                         }
00133 
00134                         /**
00135                            This is the getter for auto_delete( bool ).
00136                         */
00137                         bool auto_delete() const
00138                         {
00139                                 return this->m_autodel;
00140                         }
00141 
00142 
00143                 private:
00144                         bool m_autodel;
00145 
00146                 };
00147 
00148 
00149 
00150                 /**
00151                    tree_builder_context is a helper for accessing some
00152                    template-dependent code from non-template-aware
00153                    lexer code. It's usage is admitedly a bit strange
00154                    (indeed, it's whole existance is).
00155 
00156                    This object sets up a "context channel" where
00157                    a given FlexLexer can, in a thread-safe manner,
00158                    communicate data back to a data_tree_builder<NodeType>
00159                    without knowing the exact NodeType.
00160 
00161                    For samples see the s11n lexers, under src/node/lex.
00162 
00163                    All of it's methods, except for builder(), mirror
00164                    those of a tree_builder object, so see that class
00165                    for the API docs. For the "mirrored" functions, the
00166                    object being proxied is that set via builder(). It
00167                    is intended that only the lexers associated with
00168                    this context actually use it's API.
00169 
00170                    Calling the proxied functions when no builder is
00171                    set has no effect. Calling them with no bind()ed 
00172                    FlexLexer may be fatal.
00173 
00174                 */
00175                 template <typename ContextT>
00176                 class tree_builder_context
00177                 {
00178                 public:
00179                         /** The context type for this class. */
00180                         typedef ContextT context_type;
00181                         /**
00182                            Sets the current builder object for this context.
00183 
00184                            Pointer ownership does not change by
00185                            calling this function.
00186 
00187                            This must be carefully marshalled: it must
00188                            always be set immediately before the
00189                            matching lexer is used, and unbind(lexer)
00190                            should be called immediately afterwards to
00191                            free up the internal marshaling data. Failing
00192                            to call unbind will mean a resource leak
00193                            (albeit a small one).
00194 
00195                            Preconditions:
00196 
00197                            - lexer and builder must be valid pointers
00198                            and must out-live the expected lifetime of
00199                            this context object, which internally
00200                            associates these two objects.
00201                         */
00202                         static void bind( const FlexLexer * lexer, tree_builder * builder )
00203                         {
00204                                 lmap()[lexer].builder = builder;
00205                         }
00206 
00207                         /**
00208                            Frees up the internal resources used by the
00209                            marshaling process for the given lexer.
00210                         */
00211                         static void unbind( const FlexLexer * lexer )
00212                         {
00213                                 lmap().erase( lexer );
00214                         }
00215 
00216                         /**
00217                            Gets the current builder object for this
00218                            context, which must have been previously
00219                            set up via a call to bind(lexer,builder).
00220 
00221                            Ownership of the returned pointer does not
00222                            change by calling this function.
00223                         */
00224                         static tree_builder * builder( const FlexLexer * lexer )
00225                         {
00226                                 return lmap()[lexer].builder;
00227                         }
00228 
00229 
00230 #define IFNOLEXER(RET) if( lmap().end() == lmap().find(lexer) ) return RET;
00231                         /**
00232                            See tree_builder::open_node().
00233                         */
00234                         static bool open_node( const FlexLexer * lexer,
00235                                                const std::string & classname,
00236                                                const std::string & nodename )
00237                         {
00238                                 IFNOLEXER(false);
00239                                 return lmap()[lexer].builder->open_node( classname, nodename );
00240                         }
00241 
00242                         /**
00243                            See tree_builder::clode_node().
00244                         */
00245                         static bool close_node( const FlexLexer * lexer )
00246                         {
00247                                 IFNOLEXER(false);
00248                                 return lmap()[lexer].builder->close_node();
00249                         }
00250 
00251                         /**
00252                            See tree_builder::add_property().
00253                         */
00254                         static bool
00255                         add_property(  const FlexLexer * lexer,
00256                                        const std::string & key,
00257                                        const std::string & val )
00258                         {
00259                                 IFNOLEXER(false);
00260                                 return lmap()[lexer].builder->add_property( key, val );
00261                         }
00262 
00263                         /**
00264                            See tree_builder::node_depth().
00265                         */
00266                         static size_t node_depth(  const FlexLexer * lexer )
00267                         {
00268                                 IFNOLEXER(0);
00269                                 return lmap()[lexer].builder->node_depth();
00270                         }
00271 
00272                         /**
00273                            See tree_builder::change_node_class().
00274                         */
00275                         static bool change_node_class(  const FlexLexer * lexer,
00276                                                         const std::string & newclassname )
00277                         {
00278                                 IFNOLEXER(false);
00279                                 return lmap()[lexer].builder->change_node_class( newclassname );
00280                         }
00281 #undef IFNOLEXER
00282 
00283                         /**
00284                            This is intended for direct access by a lexer associated
00285                            with this context, and ONLY by such lexers.
00286 
00287                            Except for the builder member, these are
00288                            temporary holding points for vars common to
00289                            most lexers, placed here to avoid using
00290                            global data in the lexer code.
00291                         */
00292                         struct lexer_metadata
00293                         {
00294                                 tree_builder * builder;
00295 
00296                                 size_t internaldepth; // current internal depth (not always the same as node_depth())
00297                                 std::string nodename; // name of current node
00298                                 std::string nodeclass; // class name of current node
00299                                 std::string property; // property value buffer
00300                                 std::string bufferyy; // lexer-dependent
00301                                 lexer_metadata()
00302                                 {
00303                                         builder = 0;
00304                                         internaldepth = 0;
00305                                         nodename = nodeclass = property = bufferyy = "";
00306                                 }
00307                         };
00308 
00309                         /**
00310                            Returns the lexer_metadata for the given lexer, creating one
00311                            if needed. It is assumed that the lexer has been bound via a
00312                            call to bind().
00313                         */
00314                         static lexer_metadata & metadata( const FlexLexer * lexer )
00315                         {
00316                                 return lmap()[lexer];
00317                         }
00318 
00319                 private:
00320                         /** Convenience typedef. */
00321                         typedef tree_builder_context<context_type> this_type;
00322                         /** lexer-to-metadata map */
00323                         typedef std::map<const FlexLexer *,lexer_metadata> lexer_map;
00324                         static lexer_map & lmap()
00325                         {
00326                                 return s11n::phoenix<
00327                                         lexer_map,
00328                                         this_type
00329                                         >::instance();
00330                         }
00331 
00332                 };
00333 
00334                 /**
00335                    data_node_tree_builder is a helper class for
00336                    building trees from deserialized data, designed
00337                    particularly for use with lex/callback-based tree
00338                    builders.
00339 
00340                    It owns all objects which build up it's tree. If
00341                    you want them you must manually remove them from the
00342                    container. You normally do not want them, however - they're
00343                    mostly throwaway nodes on their way to becoming fully
00344                    deserialized objects.
00345 
00346                    This class only provides methods for building a tree, not
00347                    for traversing it. Once you have built a tree, traverse it
00348                    starting at the root_node().
00349 
00350                    Based on usage conventions this type supports only
00351                    a single root node.
00352                 */
00353                 template <typename NodeType>
00354                 class data_node_tree_builder : public tree_builder
00355                 {
00356                 public:
00357                         typedef NodeType node_type;
00358 
00359                         typedef std::list< node_type * > child_list_type;
00360 
00361                         /** Creates a default builder. */
00362                         data_node_tree_builder() : m_node_count(0), m_node(0),m_root(0)
00363                         {
00364                         }
00365 
00366                         /**
00367                            Deletes this object's children if
00368                            auto_delete() returns true.
00369                         */
00370                         virtual ~data_node_tree_builder()
00371                         {
00372                                 if( this->auto_delete() && this->m_root )
00373                                 {
00374                                         //CERR << "data_node_tree_builder<> cleaning up root node.\n";
00375                                         delete( this->m_root );
00376                                 }
00377                                 else
00378                                 {
00379                                         //CERR << "data_node_tree_builder<> was relieved of child duty (or had no child).\n";
00380                                 }
00381                         }
00382 
00383 
00384                         /**
00385                            Opens a new node, making that the current node.
00386                            classname will be used for the node's impl_class()
00387                            (see docs for node_type::impl_class()). name will
00388                            be the object's name, which is important for
00389                            de/serializing the node (see node_type::name()).
00390 
00391                            It returns false on error, else true. The default
00392                            implementation has no error conditions, and
00393                            therefor always returns true.
00394 
00395                            Node that classnames and node names need not be
00396                            unique (nor make up unique combinations). Any
00397                            number of nodes may have the same name or
00398                            classname.
00399                         */
00400                         bool open_node( const std::string & classname, const std::string & nodename )
00401                         {
00402                                 ++m_node_count;
00403 
00404                                 this->m_node = ( this->m_nodestack.empty() 
00405                                                  ? 0
00406                                                  : this->m_nodestack.back() );
00407                                 node_type * newnode = new node_type();
00408                                 if ( m_node )
00409                                 { // if we're in a node, add new node as a child to that one:
00410                                         m_node->children().push_back( newnode );
00411                                 }
00412                                 this->m_node = newnode;
00413                                 m_node->name( nodename );
00414                                 m_node->impl_class( classname );
00415                                 this->m_nodestack.push_back( m_node );
00416                                 bool ret = true;
00417                                 if ( 1 == this->m_nodestack.size() )
00418                                 {
00419                                         if( m_root )
00420                                         {
00421                                                 CERR << "open_node("<<classname<<","<<nodename<<") WARNING: deleting extra root node!\n";
00422                                                 delete( m_node );
00423                                                 ret = false;
00424                                         }
00425                                         else
00426                                         {
00427                                                 m_root = m_node;
00428                                         }
00429                                 }
00430                                 return ret;
00431                         }
00432 
00433                         /**
00434                            Closes the most-recently-opened node, effectively
00435                            popping the previous node off of the node stack (it
00436                            is not destroyed).  It is an error to call this more
00437                            often than calling open_node().
00438 
00439                            It returns false on error (e.g., called
00440                            with no opened node).
00441                         */
00442                         virtual bool close_node()
00443                         {
00444                                 if ( !m_node || m_nodestack.empty() )
00445                                 {
00446                                         CERR << "close_node() error: called with an empty node stack!" << std::endl;
00447                                         return false;
00448                                 }
00449                                 m_nodestack.pop_back();
00450                                 if ( m_nodestack.empty() )
00451                                 {
00452                                         m_node = NULL;
00453                                 }
00454                                 else
00455                                 {
00456                                         m_node = m_nodestack.back();
00457                                 }
00458                                 return true;
00459                         }
00460 
00461 
00462                         /**
00463                            Adds the given key/value pair to the
00464                            current node and returns true. If no node
00465                            is currently opened it returns false.
00466                         */
00467                         virtual bool add_property( const std::string & key, const std::string & val )
00468                         {
00469                                 if( ! this->m_node ) return false;
00470                                 this->m_node->set( key, val );
00471                                 return true;
00472                         }
00473 
00474                         /**
00475                            Returns the total number of nodes opened via open_node().
00476                         */
00477                         size_t node_count() const
00478                         {
00479                                 return m_node_count;
00480                         }
00481 
00482 
00483                         /**
00484                            Returns the current depth of opened nodes. A return
00485                            value of 1 means the current node is the root node,
00486                            for example, and 0 means that no node has yet been
00487                            opened.
00488                         */
00489                         size_t node_depth() const
00490                         {
00491                                 return m_nodestack.size();
00492                         }
00493 
00494 
00495                         /**
00496                            Returns the most recent root node parsed out of the
00497                            input object.
00498 
00499                            Use auto_delete() to determine ownership of
00500                            the returned pointer.
00501                         */
00502                         node_type * root_node() const
00503                         {
00504                                 return m_root;
00505                         }
00506 
00507 
00508                         /**
00509                            Returns the current node.
00510 
00511                            Use auto_delete() to determine ownership of
00512                            the returned pointer.
00513                         */
00514                         node_type * current_node() const
00515                         {
00516                                 return m_node;
00517                         }
00518 
00519                         /**
00520                            Changes class name of current node, if one
00521                            is set. Returns false only if no node is
00522                            currently opened, else it returns true.
00523                         */
00524                         virtual bool change_node_class( const std::string & newclassname )
00525                         {
00526                                 if( ! this->m_node ) return false;
00527                                 this->m_node->impl_class( newclassname );
00528                                 return true;
00529                         }
00530 
00531                 private:
00532                         size_t m_node_count;
00533                         node_type * m_node;
00534                         node_type * m_root;
00535                         typedef std::deque < node_type * > node_stack;
00536                         node_stack m_nodestack;                        
00537                 };
00538 
00539 
00540 
00541                 /**
00542                    This function exists for a really long, strange
00543                    reason involving accessing templatized types from
00544                    template-free code (FlexLexers).
00545 
00546                    - lexerClassName is the name of a FlexLexer
00547                    subclass. It must be registered with the FlexLexer
00548                    classloader.
00549 
00550                    - src is the stream to pass on to the lexer.
00551 
00552                    - BuilderContext should be the same one expected by
00553                    the specific lexer. See the existing lexers for
00554                    examples. You want to pass the actual
00555                    BuilderContext's context here, not a
00556                    tree_builder_context<> type.
00557 
00558                    The caller owns the returned poiner, which may be 0.
00559                 */
00560                 template <typename NodeType, typename BuilderContext>
00561                 NodeType * deserialize_lex_forwarder( const std::string & lexerClassName,
00562                                                       std::istream & src
00563                                                       )
00564                 {
00565                         // CERR << "deserialize_lex_forwarder("<<lexerClassName<<")\n";
00566                         FlexLexer * lexer = s11n::classload<FlexLexer>( lexerClassName );
00567                         if( ! lexer )
00568                         {
00569                                 CERR << "Lexer '"<<lexerClassName
00570                                      <<"' was not found by classload<FlexLexer>()."
00571                                      << " It is probably not registered with class_loader<FlexLexer>.\n";
00572                                 return 0;
00573                         }
00574 
00575                         typedef s11n::io::data_node_tree_builder<NodeType> BuilderType;
00576                         typedef tree_builder_context<BuilderContext> BC;
00577 
00578                         NodeType * ret = 0;
00579                         BuilderType * treebuilder = new BuilderType();
00580                         treebuilder->auto_delete( false ); // we want to steal it's nodes.
00581                         bool err = false;
00582                         try
00583                         {
00584                                 BC::bind( lexer, treebuilder );
00585                                 // ^^^ sets up the comm channel between the builder and lexer
00586                                 /**
00587                                    Bug-in-waiting: we don't yet have a way of knowing
00588                                    if a lexer partially populates the builder.
00589                                 */
00590                                 Private::lex_api_hider_yylex(lexer,src);
00591                         }
00592                         catch ( std::runtime_error & ex )
00593                         {
00594                                 err = true;
00595                                 CERR << "deserialize_lex_forwarder(): Doh! Exception during lexing: " << ex.what() << "\n";
00596                         }
00597                         catch (...)
00598                         {
00599                                 err = true;
00600                                 CERR << "deserialize_lex_forwarder(): Doh! Unknown exception during lexing:\n";
00601                         }
00602                         BC::unbind( lexer ); // free up lexer-to-builder binding
00603                         delete( lexer );
00604                         if( err )
00605                         {
00606                                 treebuilder->auto_delete( true ); // let it delete it's children
00607                         }
00608                         else
00609                         {
00610                                 ret = treebuilder->root_node();
00611                         }
00612                         // CERR << "Loaded node: " << std::hex << ret << '\n';
00613                         delete( treebuilder );
00614                         return ret;
00615                 }
00616 
00617                 /**
00618                    tree_builder_lexer is a type intended to ease the
00619                    implementation of lex-based node tree parsers.
00620 
00621                    It is useless standalone: it must be subclassed.
00622 
00623                    It holds the class name of a FlexLexer type so it
00624                    can be dynamically loaded as needed. Also, for
00625                    subclasses it takes the responsibility of
00626                    instantiating this type and passing off input to
00627                    it.
00628                 */
00629                 template <typename NodeType, typename LexerSharingContext>
00630                 class tree_builder_lexer : public data_node_serializer<NodeType>
00631                 {
00632                 public:
00633 
00634                         typedef NodeType node_type;
00635                         typedef LexerSharingContext sharing_context;
00636 
00637                         /**
00638                            lexerClassName = the class name of the FlexLexer subtype
00639                            associated with this serializer.
00640                         */
00641                         explicit tree_builder_lexer( const std::string & lexerClassName )
00642                                 : m_impl(lexerClassName)
00643                         {}
00644 
00645                         virtual ~tree_builder_lexer(){}
00646 
00647                         /**
00648                            Overridden to parse src using this object's lexer.
00649                            It uses <code>deserialize_lex_forwarder<sharing_context>()</code>,
00650                            passing it this object's lexer_class().
00651                         */
00652                         virtual node_type * deserialize( std::istream & src )
00653                         {
00654                                 return deserialize_lex_forwarder<
00655                                         node_type,
00656                                         sharing_context
00657                                         >( this->lexer_class(), src );
00658                         }
00659 
00660 
00661                         /**
00662                            Returns this object's lexer class name.
00663                         */
00664                         std::string lexer_class() const { return this->m_impl; }
00665 
00666 
00667                 protected:
00668                         /**
00669                            Sets this object's lexer class name.
00670                         */
00671                         void lexer_class( const std::string & classname )
00672                         {
00673                                 this->m_impl = classname;
00674                         }
00675 
00676                 private:
00677                         std::string m_impl; // implementation class name for a FlexLexer subclass
00678                 };
00679 
00680         } // namespace io
00681 } // namespace s11n
00682 
00683 s11n_CLASSLOADER_ABSTRACT_BASE(s11n::io::tree_builder);
00684 
00685 #endif // s11n_DATA_NODE_FORMAT_H_INCLUDED

Generated on Tue Oct 26 18:25:59 2004 for s11n by  doxygen 1.3.9.1