00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 class CXMLElement;
00029
00052 #ifndef _CINVERTEDFILEACCESSOR
00053 #define _CINVERTEDFILEACCESSOR
00054 #include "libGIFTAcInvertedFile/include/uses-declarations.h"
00055 #include <string>
00056 #include "libMRML/include/TID.h"
00057 #include "libMRML/include/CAccessor.h"
00058 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h"
00059 #include <iostream>
00060 #include <fstream>
00061 #include <map>
00062 #include <vector>
00063 #ifdef HAS_HASH_MAP
00064 #include <hash_map>
00065 #else
00066 #define hash_map map
00067 #endif
00068
00069 #include <functional>
00070 #include <algorithm>
00071
00072 #include "libMRML/include/CMagic.h"
00073
00074
00075 typedef TID TFeatureID ;
00076
00083 class CAcInvertedFile:public CAccessor{
00084
00085 public:
00087 virtual bool operator()()const =0;
00088
00092 virtual string IDToURL(TID inID)const =0;
00093
00095 virtual pair<bool,TID> URLToID(const string& inURL)const =0;
00096
00113 virtual CDocumentFrequencyList* FeatureToList(TFeatureID inFID)const =0;
00114
00116 virtual CDocumentFrequencyList* URLToFeatureList(string inURL)const =0;
00117
00119 virtual CDocumentFrequencyList* DIDToFeatureList(TID inDID)const =0;
00120
00133
00134
00138 virtual double FeatureToCollectionFrequency(TFeatureID)const =0;
00139
00141 virtual unsigned int getFeatureDescription(TID inFeatureID)const =0;
00143
00147 virtual double DIDToMaxDocumentFrequency(TID)const =0;
00148
00150 virtual double DIDToDFSquareSum(TID)const =0;
00151
00153 virtual double DIDToSquareDFLogICFSum(TID)const =0;
00155
00156
00158
00161 virtual bool generateInvertedFile() =0;
00162
00165 virtual bool checkConsistency() =0;
00166
00168
00176 virtual list<TID>* getAllFeatureIDs()const =0;
00177 };
00178
00179 #endif