• Skip to content
  • Skip to link menu
KDE 4.1 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

ksycocadict.cpp

Go to the documentation of this file.
00001 /*  This file is part of the KDE libraries
00002  *  Copyright (C) 1999 Waldo Bastian <bastian@kde.org>
00003  *
00004  *  This library is free software; you can redistribute it and/or
00005  *  modify it under the terms of the GNU Library General Public
00006  *  License version 2 as published by the Free Software Foundation;
00007  *
00008  *  This library is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011  *  Library General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU Library General Public License
00014  *  along with this library; see the file COPYING.LIB.  If not, write to
00015  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00016  *  Boston, MA 02110-1301, USA.
00017  **/
00018 
00019 #include "ksycocadict.h"
00020 #include "ksycocaentry.h"
00021 #include "ksycoca.h"
00022 #include "kdebug.h"
00023 
00024 #include <QtCore/QBitArray>
00025 
00026 namespace
00027 {
00028 struct string_entry {
00029     string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload)
00030       : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload)
00031     {}
00032     uint hash;
00033     const int length;
00034     const QString keyStr;
00035     const QChar * const key; // always points to keyStr.unicode(); just an optimization
00036     const KSycocaEntry::Ptr payload;
00037 };
00038 }
00039 
00040 class KSycocaDictStringList : public QList<string_entry*>
00041 {
00042 public:
00043    ~KSycocaDictStringList() {
00044        qDeleteAll(*this);
00045    }
00046 };
00047 
00048 class KSycocaDict::Private
00049 {
00050 public:
00051     Private()
00052         : stringlist( 0 ),
00053           stream( 0 ),
00054           offset( 0 )
00055     {
00056     }
00057 
00058     ~Private()
00059     {
00060         delete stringlist;
00061     }
00062 
00063     // Helper for find_string and findMultiString
00064     qint32 offsetForKey(const QString& key) const;
00065 
00066     // Calculate hash - can be used during loading and during saving.
00067     quint32 hashKey(const QString & key) const;
00068 
00069     KSycocaDictStringList *stringlist;
00070     QDataStream *stream;
00071     qint32 offset;
00072     quint32 hashTableSize;
00073     QList<qint32> hashList;
00074 };
00075 
00076 KSycocaDict::KSycocaDict()
00077   : d( new Private )
00078 {
00079 }
00080 
00081 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
00082   : d( new Private )
00083 {
00084    d->stream = str;
00085    d->offset = offset;
00086 
00087    quint32 test1, test2;
00088    str->device()->seek(offset);
00089    (*str) >> test1 >> test2;
00090    if ((test1 > 0x000fffff) || (test2 > 1024))
00091    {
00092        KSycoca::flagError();
00093        d->hashTableSize = 0;
00094        d->offset = 0;
00095        return;
00096    }
00097 
00098    str->device()->seek(offset);
00099    (*str) >> d->hashTableSize;
00100    (*str) >> d->hashList;
00101    d->offset = str->device()->pos(); // Start of hashtable
00102 }
00103 
00104 KSycocaDict::~KSycocaDict()
00105 {
00106    delete d;
00107 }
00108 
00109 void
00110 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload)
00111 {
00112    if (key.isEmpty()) return; // Not allowed (should never happen)
00113    if (!payload) return; // Not allowed!
00114    if (!d->stringlist)
00115    {
00116        d->stringlist = new KSycocaDictStringList;
00117    }
00118 
00119    string_entry *entry = new string_entry(key, payload);
00120    d->stringlist->append(entry);
00121 }
00122 
00123 void
00124 KSycocaDict::remove(const QString &key)
00125 {
00126    if ( !d || !d->stringlist )
00127       return;
00128 
00129    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00130       string_entry* entry = *it;
00131       if (entry->keyStr == key) {
00132          d->stringlist->erase(it);
00133          delete entry;
00134          break;
00135       }
00136    }
00137 }
00138 
00139 int KSycocaDict::find_string(const QString &key ) const
00140 {
00141     Q_ASSERT(d);
00142 
00143     //kDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key);
00144     qint32 offset = d->offsetForKey(key);
00145 
00146     //kDebug(7011) << QString("offset is %1").arg(offset,8,16);
00147     if (offset == 0)
00148         return 0;
00149 
00150     if (offset > 0)
00151         return offset; // Positive ID
00152 
00153     // Lookup duplicate list.
00154     offset = -offset;
00155 
00156     d->stream->device()->seek(offset);
00157     //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
00158 
00159    while(true)
00160    {
00161        (*d->stream) >> offset;
00162        if (offset == 0) break;
00163        QString dupkey;
00164        (*d->stream) >> dupkey;
00165        //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
00166        if (dupkey == key) return offset;
00167    }
00168    //kWarning(7011) << "Not found!";
00169 
00170    return 0;
00171 }
00172 
00173 
00174 QList<int> KSycocaDict::findMultiString(const QString &key ) const
00175 {
00176     qint32 offset = d->offsetForKey(key);
00177     QList<int> offsetList;
00178     if (offset == 0)
00179         return offsetList;
00180 
00181     if (offset > 0) { // Positive ID: one entry found
00182         offsetList.append(offset);
00183         return offsetList;
00184     }
00185 
00186     // Lookup duplicate list.
00187     offset = -offset;
00188 
00189     d->stream->device()->seek(offset);
00190     //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
00191 
00192     while(true)
00193     {
00194         (*d->stream) >> offset;
00195         if (offset == 0) break;
00196         QString dupkey;
00197         (*d->stream) >> dupkey;
00198         //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
00199         if (dupkey == key)
00200             offsetList.append(offset);
00201     }
00202     return offsetList;
00203 }
00204 
00205 uint KSycocaDict::count()
00206 {
00207    if ( !d || !d->stringlist ) return 0;
00208 
00209    return d->stringlist->count();
00210 }
00211 
00212 void
00213 KSycocaDict::clear()
00214 {
00215    delete d;
00216    d = 0;
00217 }
00218 
00219 uint KSycocaDict::Private::hashKey( const QString &key) const
00220 {
00221    int l = key.length();
00222    register uint h = 0;
00223 
00224    for(int i = 0; i < hashList.count(); i++)
00225    {
00226       int pos = hashList[i];
00227       if (pos < 0)
00228       {
00229          pos = -pos-1;
00230          if (pos < l)
00231             h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff;
00232       }
00233       else
00234       {
00235          pos = pos-1;
00236          if (pos < l)
00237             h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
00238       }
00239    }
00240    return h;
00241 }
00242 
00243 //
00244 // Calculate the diversity of the strings at position 'pos'
00245 static int
00246 calcDiversity(KSycocaDictStringList *stringlist, int pos, int sz)
00247 {
00248    if (pos == 0) return 0;
00249    QBitArray matrix(sz);
00250    uint usz = sz;
00251 
00252    if (pos < 0)
00253    {
00254       pos = -pos-1;
00255       for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00256       {
00257         string_entry* entry = *it;
00258     register int l = entry->length;
00259          if (pos < l && pos != 0)
00260          {
00261            register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff;
00262        matrix.setBit( hash % usz, true );
00263          }
00264       }
00265    }
00266    else
00267    {
00268       pos = pos-1;
00269       for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00270       {
00271          string_entry* entry = *it;
00272          if (pos < entry->length)
00273          {
00274             register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
00275         matrix.setBit( hash % usz, true );
00276          }
00277       }
00278    }
00279    int diversity = 0;
00280    for(int i=0;i< sz;i++)
00281       if (matrix.testBit(i)) diversity++;
00282 
00283    return diversity;
00284 }
00285 
00286 //
00287 // Add the diversity of the strings at position 'pos'
00288 static void
00289 addDiversity(KSycocaDictStringList *stringlist, int pos)
00290 {
00291    if (pos == 0) return;
00292    if (pos < 0)
00293    {
00294       pos = -pos-1;
00295       for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00296       {
00297          string_entry* entry = *it;
00298          register int l = entry->length;
00299          if (pos < l)
00300             entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff;
00301       }
00302    }
00303    else
00304    {
00305       pos = pos - 1;
00306       for(KSycocaDictStringList::Iterator it = stringlist->begin(); it != stringlist->end(); ++it)
00307       {
00308          string_entry* entry = *it;
00309          if (pos < entry->length)
00310             entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
00311       }
00312    }
00313 }
00314 
00315 
00316 void
00317 KSycocaDict::save(QDataStream &str)
00318 {
00319    if (count() == 0)
00320    {
00321       d->hashTableSize = 0;
00322       d->hashList.clear();
00323       str << d->hashTableSize;
00324       str << d->hashList;
00325       return;
00326    }
00327 
00328    d->offset = str.device()->pos();
00329 
00330    //kDebug(7011) << QString("KSycocaDict: %1 entries.").arg(count());
00331 
00332    //kDebug(7011) << "Calculating hash keys..";
00333 
00334    int maxLength = 0;
00335    //kDebug(7011) << "Finding maximum string length";
00336    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00337    {
00338       string_entry* entry = *it;
00339       entry->hash = 0;
00340       if (entry->length > maxLength)
00341          maxLength = entry->length;
00342    }
00343 
00344    //kDebug(7011) << QString("Max string length = %1").arg(maxLength);
00345 
00346    // use "almost prime" number for sz (to calculate diversity) and later
00347    // for the table size of big tables
00348    // int sz = d->stringlist->count()*5-1;
00349    register unsigned int sz = count()*4 + 1;
00350    while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13))))
00351       sz+=2;
00352 
00353    int maxDiv = 0;
00354    int maxPos = 0;
00355    int lastDiv = 0;
00356 
00357    d->hashList.clear();
00358 
00359    // try to limit diversity scan by "predicting" positions
00360    // with high diversity
00361    int *oldvec=new int[maxLength*2+1];
00362    for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0;
00363    int mindiv=0;
00364 
00365    while(true)
00366    {
00367       int divsum=0,divnum=0;
00368 
00369       maxDiv = 0;
00370       maxPos = 0;
00371       for(int pos=-maxLength; pos <= maxLength; pos++)
00372       {
00373          // cut off
00374          if (oldvec[pos+maxLength]<mindiv)
00375          { oldvec[pos+maxLength]=0; continue; }
00376 
00377          int diversity = calcDiversity(d->stringlist, pos, sz);
00378          if (diversity > maxDiv)
00379          {
00380             maxDiv = diversity;
00381             maxPos = pos;
00382          }
00383          oldvec[pos+maxLength]=diversity;
00384          divsum+=diversity; divnum++;
00385       }
00386       // arbitrary cut-off value 3/4 of average seems to work
00387       if (divnum)
00388          mindiv=(3*divsum)/(4*divnum);
00389 
00390       if (maxDiv <= lastDiv)
00391          break;
00392       // qWarning("Max Div = %d at pos %d", maxDiv, maxPos);
00393       lastDiv = maxDiv;
00394       addDiversity(d->stringlist, maxPos);
00395       d->hashList.append(maxPos);
00396    }
00397 
00398    delete [] oldvec;
00399 
00400    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00401       (*it)->hash = d->hashKey((*it)->keyStr);
00402 // fprintf(stderr, "Calculating minimum table size..\n");
00403 
00404    d->hashTableSize = sz;
00405 
00406    struct hashtable_entry {
00407       string_entry *entry;
00408       QList<string_entry*>* duplicates;
00409       int duplicate_offset;
00410    };
00411 
00412    hashtable_entry *hashTable = new hashtable_entry[ sz ];
00413 
00414    //kDebug(7011) << "Clearing hashtable...";
00415    for (unsigned int i=0; i < sz; i++)
00416    {
00417       hashTable[i].entry = 0;
00418       hashTable[i].duplicates = 0;
00419    }
00420 
00421    //kDebug(7011) << "Filling hashtable...";
00422    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it)
00423    {
00424       string_entry* entry = *it;
00425       int hash = entry->hash % sz;
00426       if (!hashTable[hash].entry)
00427       { // First entry
00428          hashTable[hash].entry = entry;
00429       }
00430       else
00431       {
00432          if (!hashTable[hash].duplicates)
00433          { // Second entry, build duplicate list.
00434             hashTable[hash].duplicates = new QList<string_entry*>;
00435             hashTable[hash].duplicates->append(hashTable[hash].entry);
00436             hashTable[hash].duplicate_offset = 0;
00437          }
00438          hashTable[hash].duplicates->append(entry);
00439       }
00440    }
00441 
00442    str << d->hashTableSize;
00443    str << d->hashList;
00444 
00445    d->offset = str.device()->pos(); // d->offset points to start of hashTable
00446    //kDebug(7011) << QString("Start of Hash Table, offset = %1").arg(d->offset,8,16);
00447 
00448    // Write the hashtable + the duplicates twice.
00449    // The duplicates are after the normal hashtable, but the offset of each
00450    // duplicate entry is written into the normal hashtable.
00451    for(int pass = 1; pass <= 2; pass++)
00452    {
00453       str.device()->seek(d->offset);
00454       //kDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass);
00455       for(uint i=0; i < d->hashTableSize; i++)
00456       {
00457          qint32 tmpid;
00458          if (!hashTable[i].entry)
00459             tmpid = (qint32) 0;
00460          else if (!hashTable[i].duplicates)
00461             tmpid = (qint32) hashTable[i].entry->payload->offset(); // Positive ID
00462          else
00463             tmpid = (qint32) -hashTable[i].duplicate_offset; // Negative ID
00464          str << tmpid;
00465          //kDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16);
00466       }
00467       //kDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16);
00468 
00469       //kDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass);
00470       for(uint i=0; i < d->hashTableSize; i++)
00471       {
00472          const QList<string_entry*> *dups = hashTable[i].duplicates;
00473          if (dups)
00474          {
00475             hashTable[i].duplicate_offset = str.device()->pos();
00476 
00477             /*kDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2")                           .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count());
00478 */
00479         for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup)
00480             {
00481                const qint32 offset = (*dup)->payload->offset();
00482                // save() must have been called on the entry
00483                Q_ASSERT_X( offset, "KSycocaDict::save",
00484                            QByteArray("entry offset is 0, save() was not called on ")
00485                            + (*dup)->payload->name().toLatin1() );
00486                str << offset ;                       // Positive ID
00487                str << (*dup)->keyStr;                // Key (QString)
00488             }
00489             str << (qint32) 0;               // End of list marker (0)
00490          }
00491       }
00492       //kDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16);
00493    }
00494 
00495    //kDebug(7011) << "Cleaning up hash table.";
00496    for(uint i=0; i < d->hashTableSize; i++)
00497    {
00498       delete hashTable[i].duplicates;
00499    }
00500    delete [] hashTable;
00501 }
00502 
00503 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const
00504 {
00505    if ( !stream || !offset )
00506    {
00507       kError() << "No ksycoca4 database available!" << endl;
00508       return 0;
00509    }
00510 
00511    if (hashTableSize == 0)
00512       return 0; // Unlikely to find anything :-]
00513 
00514    // Read hash-table data
00515    const uint hash = hashKey(key) % hashTableSize;
00516    //kDebug(7011) << "hash is" << hash;
00517 
00518    const uint off = offset+sizeof(qint32)*hash;
00519    //kDebug(7011) << QString("off is %1").arg(off,8,16);
00520    stream->device()->seek( off );
00521 
00522    qint32 retOffset;
00523    (*stream) >> retOffset;
00524    return retOffset;
00525 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • KIO
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.4
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal