kdecore Library API Documentation

ksycocadict.cpp

00001 /* This file is part of the KDE libraries 00002 * Copyright (C) 1999 Waldo Bastian <bastian@kde.org> 00003 * 00004 * This library is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU Library General Public 00006 * License version 2 as published by the Free Software Foundation; 00007 * 00008 * This library is distributed in the hope that it will be useful, 00009 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00011 * Library General Public License for more details. 00012 * 00013 * You should have received a copy of the GNU Library General Public License 00014 * along with this library; see the file COPYING.LIB. If not, write to 00015 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 00016 * Boston, MA 02111-1307, USA. 00017 **/ 00018 00019 #include "ksycocadict.h" 00020 #include "ksycocaentry.h" 00021 #include "ksycoca.h" 00022 00023 #include <qptrlist.h> 00024 #include <qvaluelist.h> 00025 #include <kdebug.h> 00026 #include <stdlib.h> 00027 00028 namespace 00029 { 00030 struct string_entry { 00031 string_entry(QString _key, KSycocaEntry *_payload) 00032 { keyStr = _key; key = keyStr.unicode(); length = keyStr.length(); payload = _payload; hash = 0; } 00033 uint hash; 00034 int length; 00035 const QChar *key; 00036 QString keyStr; 00037 KSycocaEntry *payload; 00038 }; 00039 } 00040 00041 template class QPtrList<string_entry>; 00042 00043 class KSycocaDictStringList : public QPtrList<string_entry> 00044 { 00045 public: 00046 KSycocaDictStringList(); 00047 }; 00048 00049 KSycocaDictStringList::KSycocaDictStringList() 00050 { 00051 setAutoDelete(true); 00052 } 00053 00054 KSycocaDict::KSycocaDict() 00055 : d(0), mStr(0), mOffset(0) 00056 { 00057 } 00058 00059 KSycocaDict::KSycocaDict(QDataStream *str, int offset) 00060 : d(0), mStr(str), mOffset(offset) 00061 { 00062 Q_UINT32 test1, test2; 00063 str->device()->at(offset); 00064 (*str) >> test1 >> test2; 00065 if ((test1 > 0x000fffff) || (test2 > 1024)) 00066 { 00067 KSycoca::flagError(); 00068 mHashTableSize = 0; 00069 mOffset = 0; 00070 return; 00071 } 00072 00073 str->device()->at(offset); 00074 (*str) >> mHashTableSize; 00075 (*str) >> mHashList; 00076 mOffset = str->device()->at(); // Start of hashtable 00077 } 00078 00079 KSycocaDict::~KSycocaDict() 00080 { 00081 delete d; 00082 } 00083 00084 void 00085 KSycocaDict::add(const QString &key, KSycocaEntry *payload) 00086 { 00087 if (key.isEmpty()) return; // Not allowed (should never happen) 00088 if (!payload) return; // Not allowed! 00089 if (!d) 00090 { 00091 d = new KSycocaDictStringList(); 00092 } 00093 00094 string_entry *entry= new string_entry(key, payload); 00095 d->append(entry); 00096 } 00097 00098 void 00099 KSycocaDict::remove(const QString &key) 00100 { 00101 if (d) 00102 { 00103 for(string_entry *entry=d->first(); entry; entry = d->next()) 00104 { 00105 if (entry->keyStr == key) 00106 { 00107 d->remove(); 00108 break; 00109 } 00110 } 00111 } 00112 } 00113 00114 int 00115 KSycocaDict::find_string(const QString &key ) 00116 { 00117 //kdDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key) << endl; 00118 00119 if (!mStr || !mOffset) 00120 { 00121 kdError(7011) << "No database available!" << endl; 00122 return 0; 00123 } 00124 00125 if (mHashTableSize == 0) 00126 return 0; // Unlikely to find anything :-] 00127 00128 // Read hash-table data 00129 uint hash = hashKey(key) % mHashTableSize; 00130 //kdDebug(7011) << QString("hash is %1").arg(hash) << endl; 00131 00132 uint off = mOffset+sizeof(Q_INT32)*hash; 00133 //kdDebug(7011) << QString("off is %1").arg(off,8,16) << endl; 00134 mStr->device()->at( off ); 00135 00136 Q_INT32 offset; 00137 (*mStr) >> offset; 00138 00139 //kdDebug(7011) << QString("offset is %1").arg(offset,8,16) << endl; 00140 if (offset == 0) 00141 return 0; 00142 00143 if (offset > 0) 00144 return offset; // Positive ID 00145 00146 // Lookup duplicate list. 00147 offset = -offset; 00148 00149 mStr->device()->at(offset); 00150 //kdDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16) << endl; 00151 00152 while(true) 00153 { 00154 (*mStr) >> offset; 00155 if (offset == 0) break; 00156 QString dupkey; 00157 (*mStr) >> dupkey; 00158 //kdDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey) << endl; 00159 if (dupkey == key) return offset; 00160 } 00161 //kdWarning(7011) << "Not found!" << endl; 00162 00163 return 0; 00164 } 00165 00166 uint 00167 KSycocaDict::count() 00168 { 00169 if (!d) return 0; 00170 00171 return d->count(); 00172 } 00173 00174 void 00175 KSycocaDict::clear() 00176 { 00177 delete d; 00178 d = 0; 00179 } 00180 00181 uint 00182 KSycocaDict::hashKey( const QString &key) 00183 { 00184 int l = key.length(); 00185 register uint h = 0; 00186 00187 for(uint i = 0; i < mHashList.count(); i++) 00188 { 00189 int pos = mHashList[i]; 00190 if (pos < 0) 00191 { 00192 pos = -pos-1; 00193 if (pos < l) 00194 h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff; 00195 } 00196 else 00197 { 00198 pos = pos-1; 00199 if (pos < l) 00200 h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff; 00201 } 00202 } 00203 return h; 00204 } 00205 00206 // 00207 // Calculate the diversity of the strings at position 'pos' 00208 static int 00209 calcDiversity(KSycocaDictStringList *d, int pos, int sz) 00210 { 00211 if (pos == 0) return 0; 00212 bool *matrix = (bool *) calloc(sz, sizeof(bool)); 00213 uint usz = sz; 00214 00215 if (pos < 0) 00216 { 00217 pos = -pos-1; 00218 for(string_entry *entry=d->first(); entry; entry = d->next()) 00219 { 00220 register int l = entry->length; 00221 if (pos < l && pos != 0) 00222 { 00223 register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff; 00224 matrix[ hash % usz ] = true; 00225 } 00226 } 00227 } 00228 else 00229 { 00230 pos = pos-1; 00231 for(string_entry *entry=d->first(); entry; entry = d->next()) 00232 { 00233 if (pos < entry->length) 00234 { 00235 register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff; 00236 matrix[ hash % usz ] = true; 00237 } 00238 } 00239 } 00240 int diversity = 0; 00241 for(int i=0;i< sz;i++) 00242 if (matrix[i]) diversity++; 00243 00244 free((void *) matrix); 00245 00246 return diversity; 00247 } 00248 00249 // 00250 // Add the diversity of the strings at position 'pos' 00251 static void 00252 addDiversity(KSycocaDictStringList *d, int pos) 00253 { 00254 if (pos == 0) return; 00255 if (pos < 0) 00256 { 00257 pos = -pos-1; 00258 for(string_entry *entry=d->first(); entry; entry = d->next()) 00259 { 00260 register int l = entry->length; 00261 if (pos < l) 00262 entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff; 00263 } 00264 } 00265 else 00266 { 00267 pos = pos - 1; 00268 for(string_entry *entry=d->first(); entry; entry = d->next()) 00269 { 00270 if (pos < entry->length) 00271 entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff; 00272 } 00273 } 00274 } 00275 00276 00277 void 00278 KSycocaDict::save(QDataStream &str) 00279 { 00280 if (count() == 0) 00281 { 00282 mHashTableSize = 0; 00283 mHashList.clear(); 00284 str << mHashTableSize; 00285 str << mHashList; 00286 return; 00287 } 00288 00289 mOffset = str.device()->at(); 00290 00291 //kdDebug(7011) << QString("KSycocaDict: %1 entries.").arg(count()) << endl; 00292 00293 //kdDebug(7011) << "Calculating hash keys.." << endl; 00294 00295 int maxLength = 0; 00296 //kdDebug(7011) << "Finding maximum string length" << endl; 00297 for(string_entry *entry=d->first(); entry; entry = d->next()) 00298 { 00299 entry->hash = 0; 00300 if (entry->length > maxLength) 00301 maxLength = entry->length; 00302 } 00303 00304 //kdDebug(7011) << QString("Max string length = %1").arg(maxLength) << endl; 00305 00306 // use "almost prime" number for sz (to calculate diversity) and later 00307 // for the table size of big tables 00308 // int sz = d->count()*5-1; 00309 register unsigned int sz = count()*4 + 1; 00310 while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13)))) sz+=2; 00311 00312 int maxDiv = 0; 00313 int maxPos = 0; 00314 int lastDiv = 0; 00315 00316 mHashList.clear(); 00317 00318 // try to limit diversity scan by "predicting" positions 00319 // with high diversity 00320 int *oldvec=new int[maxLength*2+1]; 00321 for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0; 00322 int mindiv=0; 00323 00324 while(true) 00325 { 00326 int divsum=0,divnum=0; 00327 00328 maxDiv = 0; 00329 maxPos = 0; 00330 for(int pos=-maxLength; pos <= maxLength; pos++) 00331 { 00332 // cut off 00333 if (oldvec[pos+maxLength]<mindiv) 00334 { oldvec[pos+maxLength]=0; continue; } 00335 00336 int diversity = calcDiversity(d, pos, sz); 00337 if (diversity > maxDiv) 00338 { 00339 maxDiv = diversity; 00340 maxPos = pos; 00341 } 00342 oldvec[pos+maxLength]=diversity; 00343 divsum+=diversity; divnum++; 00344 } 00345 // arbitrary cut-off value 3/4 of average seems to work 00346 if (divnum) 00347 mindiv=(3*divsum)/(4*divnum); 00348 00349 if (maxDiv <= lastDiv) 00350 break; 00351 // qWarning("Max Div = %d at pos %d", maxDiv, maxPos); 00352 lastDiv = maxDiv; 00353 addDiversity(d, maxPos); 00354 mHashList.append(maxPos); 00355 } 00356 00357 delete [] oldvec; 00358 00359 for(string_entry *entry=d->first(); entry; entry = d->next()) 00360 { 00361 entry->hash = hashKey(entry->keyStr); 00362 } 00363 // fprintf(stderr, "Calculating minimum table size..\n"); 00364 00365 mHashTableSize = sz; 00366 00367 struct hashtable_entry { 00368 string_entry *entry; 00369 QPtrList<string_entry> *duplicates; 00370 int duplicate_offset; 00371 }; 00372 00373 hashtable_entry *hashTable = new hashtable_entry[ sz ]; 00374 00375 //kdDebug(7011) << "Clearing hashtable..." << endl; 00376 for (unsigned int i=0; i < sz; i++) 00377 { 00378 hashTable[i].entry = 0; 00379 hashTable[i].duplicates = 0; 00380 } 00381 00382 //kdDebug(7011) << "Filling hashtable..." << endl; 00383 for(string_entry *entry=d->first(); entry; entry = d->next()) 00384 { 00385 int hash = entry->hash % sz; 00386 if (!hashTable[hash].entry) 00387 { // First entry 00388 hashTable[hash].entry = entry; 00389 } 00390 else 00391 { 00392 if (!hashTable[hash].duplicates) 00393 { // Second entry, build duplicate list. 00394 hashTable[hash].duplicates = new QPtrList<string_entry>(); 00395 hashTable[hash].duplicates->append(hashTable[hash].entry); 00396 hashTable[hash].duplicate_offset = 0; 00397 } 00398 hashTable[hash].duplicates->append(entry); 00399 } 00400 } 00401 00402 str << mHashTableSize; 00403 str << mHashList; 00404 00405 mOffset = str.device()->at(); // mOffset points to start of hashTable 00406 //kdDebug(7011) << QString("Start of Hash Table, offset = %1").arg(mOffset,8,16) << endl; 00407 00408 for(int pass = 1; pass <= 2; pass++) 00409 { 00410 str.device()->at(mOffset); 00411 //kdDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass) << endl; 00412 for(uint i=0; i < mHashTableSize; i++) 00413 { 00414 Q_INT32 tmpid; 00415 if (!hashTable[i].entry) 00416 tmpid = (Q_INT32) 0; 00417 else if (!hashTable[i].duplicates) 00418 tmpid = (Q_INT32) hashTable[i].entry->payload->offset(); // Positive ID 00419 else 00420 tmpid = (Q_INT32) -hashTable[i].duplicate_offset; // Negative ID 00421 str << tmpid; 00422 //kdDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16) << endl; 00423 } 00424 //kdDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16) << endl; 00425 00426 //kdDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass) << endl; 00427 for(uint i=0; i < mHashTableSize; i++) 00428 { 00429 if (hashTable[i].duplicates) 00430 { 00431 QPtrList<string_entry> *dups = hashTable[i].duplicates; 00432 hashTable[i].duplicate_offset = str.device()->at(); 00433 00434 /*kdDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2") .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count()) << endl; 00435 */ 00436 for(string_entry *dup = dups->first(); dup; dup=dups->next()) 00437 { 00438 str << (Q_INT32) dup->payload->offset(); // Positive ID 00439 str << dup->keyStr; // Key (QString) 00440 } 00441 str << (Q_INT32) 0; // End of list marker (0) 00442 } 00443 } 00444 //kdDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16) << endl; 00445 } 00446 00447 //kdDebug(7011) << "Cleaning up hash table." << endl; 00448 for(uint i=0; i < mHashTableSize; i++) 00449 { 00450 delete hashTable[i].duplicates; 00451 } 00452 delete [] hashTable; 00453 } 00454
KDE Logo
This file is part of the documentation for kdecore Library Version 3.2.3.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Wed Mar 16 17:21:43 2005 by doxygen 1.3.7 written by Dimitri van Heesch, © 1997-2003