Main Page   Class Hierarchy   Alphabetical List   Compound List   Examples  
tokenizer.h
00001 /***************************************************************************
00002     copyright            : (C) 2002-2008 by Stefano Barbato
00003     email                : stefano@codesink.org
00004 
00005     $Id: tokenizer.h,v 1.18 2008-10-07 11:44:38 tat Exp $
00006  ***************************************************************************/
00007 #ifndef _MIMETIC_TOKENIZER_H_
00008 #define _MIMETIC_TOKENIZER_H_
00009 #include <iterator>
00010 #include <algorithm>
00011 #include <set>
00012 #include <string>
00013 #include <cstring>
00014 
00015 namespace mimetic
00016 {
00017 
00018 template<typename value_type>
00019 struct IsDelim: public std::unary_function<value_type,bool>
00020 {
00021     bool operator()(const value_type& val) const
00022     {
00023         return m_delims.count(val) != 0; 
00024     }
00025     template<typename Container>
00026     void setDelimList(const Container& cont)
00027     {
00028         typename Container::const_iterator bit, eit;
00029         bit = cont.begin(), eit = cont.end();
00030         for(; bit != eit; ++bit)
00031             m_delims.insert(*bit);
00032     }
00033     template<typename Iterator>
00034     void setDelimList(Iterator bit, Iterator eit)
00035     {
00036         for(; bit != eit; ++bit)
00037             m_delims.insert(*bit);
00038     }
00039     void addDelim(const value_type& value)
00040     {
00041         m_delims.insert(value);
00042     }
00043     void removeDelim(const value_type& value)
00044     {
00045         m_delims.erase(value);
00046     }
00047 private:
00048     std::set<value_type> m_delims;
00049 };
00050 
00051 template<>
00052 struct IsDelim<char>: public std::unary_function<char, bool>
00053 {
00054     void setDelimList(const std::string& delims)
00055     {
00056         setDelimList(delims.begin(), delims.end());
00057     }
00058     template<typename Iterator>
00059     void setDelimList(Iterator bit, Iterator eit)
00060     {
00061         memset(&m_lookup, 0, sizeof(m_lookup));
00062         for(; bit != eit; ++bit)
00063             m_lookup[(int)*bit] = 1;
00064     }
00065     bool operator()(unsigned char val) const
00066     {
00067         return m_lookup[val] != 0;
00068     }
00069 private:
00070     char m_lookup[256];
00071 };
00072 
00073 
00074 /// Iterator tokenizer template class
00075 template<class Iterator,typename value_type>
00076 class ItTokenizer
00077 {
00078 public:
00079     ItTokenizer(Iterator bit, Iterator eit)
00080     : m_bit(bit), m_eit(eit), m_tok_eit(bit)
00081     {
00082     }
00083     void setSource(Iterator bit, Iterator eit)
00084     {
00085         m_bit = bit;
00086         m_eit = eit;
00087         m_tok_eit = bit;
00088     }
00089     template<typename DelimCont>
00090     void setDelimList(const DelimCont& cont)
00091     {
00092         m_delimPred.setDelimList(cont);
00093     }
00094     template<typename It>
00095     void setDelimList(It bit, It eit)
00096     {
00097         m_delimPred.setDelimList(bit, eit);
00098     }
00099     template<typename DestCont>
00100     bool next(DestCont& dst)
00101     {
00102         dst.erase(dst.begin(), dst.end());
00103         if(m_tok_eit == m_eit)
00104             return false;
00105         m_tok_eit = std::find_if(m_bit, m_eit, m_delimPred);
00106         m_matched = 0; // end of input
00107         if(m_tok_eit != m_eit)
00108             m_matched = *m_tok_eit; // matched delimiter
00109         std::copy(m_bit, m_tok_eit, std::back_inserter<DestCont>(dst));
00110         m_bit = (m_tok_eit != m_eit && ++m_tok_eit != m_eit ? m_tok_eit :m_eit);
00111         return true;
00112     }
00113     const value_type& matched() const
00114     {
00115         return m_matched;
00116     }
00117     void addDelim(const value_type& value)
00118     {
00119         m_delimPred.addDelim(value);
00120     }
00121     void removeDelim(const value_type& value)
00122     {
00123         m_delimPred.removeDelim(value);
00124     }
00125 private:
00126     Iterator m_bit, m_eit, m_tok_eit;
00127     IsDelim<value_type> m_delimPred;
00128     value_type m_matched;
00129 };
00130 
00131 
00132 /// char container tokenizer template class
00133 template<typename Container>
00134 struct ContTokenizer: public ItTokenizer<typename Container::const_iterator,typename Container::value_type>
00135 {
00136     typedef typename Container::value_type value_type;
00137     typedef typename Container::iterator iterator;
00138     typedef typename Container::const_iterator const_iterator;
00139     // i want to be fast here so i don't want to copy "cont"
00140     // so "cont" MUST be in scope for all following calls
00141     // to next(...). 
00142     ContTokenizer(const Container* cont)
00143     : ItTokenizer<const_iterator, value_type>(cont.begin(), cont.end())
00144     {
00145     }
00146     template<typename DelimCont>
00147     ContTokenizer(const Container* cont, const DelimCont& delims)
00148     : ItTokenizer<const_iterator,value_type>(cont->begin(), cont->end())
00149     {
00150         this->setDelimList(delims);
00151     }
00152     void setSource(const Container* cont)
00153     {
00154         ItTokenizer<const_iterator,value_type>::setSource(cont->begin(), cont->end());
00155     }
00156 private:
00157     ContTokenizer(const ContTokenizer&);
00158     ContTokenizer& operator=(const ContTokenizer&);
00159 };
00160 
00161 /// std::string tokenizer
00162 typedef ContTokenizer<std::string> StringTokenizer;
00163 
00164 }
00165 
00166 #endif
00167