System documentation of the GNU Image-Finding Tool

CAcSQLInvertedFile.h

00001 /* -*- mode: c++ -*- 
00002 */
00003 /* 
00004 
00005     GIFT, a flexible content based image retrieval system.
00006     Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva
00007 
00008      Copyright (C) 2003, 2004 Bayreuth University
00009       2005 Bamberg University
00010     This program is free software; you can redistribute it and/or modify
00011     it under the terms of the GNU General Public License as published by
00012     the Free Software Foundation; either version 2 of the License, or
00013     (at your option) any later version.
00014 
00015     This program is distributed in the hope that it will be useful,
00016     but WITHOUT ANY WARRANTY; without even the implied warranty of
00017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018     GNU General Public License for more details.
00019 
00020     You should have received a copy of the GNU General Public License
00021     along with this program; if not, write to the Free Software
00022     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 
00024 */
00025 // -*- mode: c++ -*-
00026 
00027 
00028 class CXMLElement;
00029 
00049 #ifndef _CINVERTEDFILEACCESSOR
00050 #define _CINVERTEDFILEACCESSOR
00051 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
00052 #include <string>
00053 #include "libMRML/include/TID.h"
00054 #include "libMRML/include/CSelfDestroyPointer.h"
00055 #include "libMRML/include/CArraySelfDestroyPointer.h"
00056 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
00057 #include "CCollectionFrequencyList.h"
00058 #include "libGIFTAcInvertedFile/include/CADIHash.h"
00059 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h"
00060 #include <iostream>
00061 #include <fstream>
00062 #include <map>
00063 #include <vector>
00064 #ifdef HAS_HASH_MAP
00065 #include <hash_map>
00066 #else
00067 #define hash_map map
00068 #endif
00069 #include <functional>
00070 #include <algorithm>
00071 
00072 #include "libMRML/include/CMagic.h"
00073 
00074 
00075 typedef TID TFeatureID ;
00076 
00083 class CAcInvertedFile:public CAcURL2FTS{  
00084 
00085 protected:
00087   TID mMaximumFeatureID;
00090   CArraySelfDestroyPointer<char> mInvertedFileBuffer;
00092   mutable CSelfDestroyPointer<istream> mInvertedFile;
00093 
00095   mutable ifstream mOffsetFile;
00096 
00098   ifstream mFeatureDescriptionFile;
00099 
00101   string mInvertedFileName;
00102 
00104   string mOffsetFileName;
00105 
00107   string mFeatureDescriptionFileName;
00108 
00110   typedef hash_map<TID,unsigned int> CIDToOffset;//new hash
00112   CIDToOffset mIDToOffset;
00113 
00115   mutable hash_map<TID,double> mFeatureToCollectionFrequency;//new hash
00116 
00120   hash_map<TID,unsigned int> mFeatureDescription;//new hash_
00121 
00125   CADIHash mDocumentInformation;
00127 
00130   void writeOffsetFileElement(TID inFeatureID,
00131                               int inPosition,
00132                               ostream& inOpenOffsetFile);
00134   CDocumentFrequencyList* getFeatureFile(string inFileName)const;
00135 public:
00137   bool operator()()const;
00138 
00153   CAcInvertedFile(const CXMLElement& inCollectionElement);
00155   bool init(bool);
00156 
00158   ~CAcInvertedFile();
00159   
00161   string IDToURL(TID inID)const;
00162 
00164   TID URLToID(const string& inURL)const;
00165   
00169   CDocumentFrequencyList* FeatureToList(TFeatureID)const;
00170 
00172   CDocumentFrequencyList* URLToFeatureList(string inURL)const;
00173 
00175   CDocumentFrequencyList* DIDToFeatureList(TID inDID)const;
00176 
00178 
00179 
00183   double FeatureToCollectionFrequency(TFeatureID)const;
00184 
00186   unsigned int getFeatureDescription(TID inFeatureID)const;
00188 
00192   double DIDToMaxDocumentFrequency(TID)const;
00193 
00195   double DIDToDFSquareSum(TID)const;
00196 
00198   double DIDToSquareDFLogICFSum(TID)const;
00200 
00201   /*@name Inverted File Generation and Consistency Checking*/
00203 
00211   bool generateInvertedFile();
00212 
00220   bool newGenerateInvertedFile();
00221 
00224   bool checkConsistency();
00225 
00229   bool findWithinStream(TID inFeatureID,
00230                         TID inDocumentID,
00231                         double inDocumentFrequency)const;
00232   
00234 
00236   TID getMaximumFeatureID()const;
00244   list<TID>* getAllFeatureIDs()const;
00245 };
00246 
00247 #endif

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen