• Skip to content
  • Skip to link menu
KDE 4.1 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

kmimetypefactory.cpp

Go to the documentation of this file.
00001 /*  This file is part of the KDE libraries
00002  *  Copyright (C) 1999 Waldo Bastian <bastian@kde.org>
00003  *  Copyright (C) 2006 David Faure <faure@kde.org>
00004  *
00005  *  This library is free software; you can redistribute it and/or
00006  *  modify it under the terms of the GNU Library General Public
00007  *  License version 2 as published by the Free Software Foundation;
00008  *
00009  *  This library is distributed in the hope that it will be useful,
00010  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  *  Library General Public License for more details.
00013  *
00014  *  You should have received a copy of the GNU Library General Public License
00015  *  along with this library; see the file COPYING.LIB.  If not, write to
00016  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017  *  Boston, MA 02110-1301, USA.
00018  */
00019 
00020 #include "kmimetypefactory.h"
00021 #include "kmimetype.h"
00022 #include "kfoldermimetype.h"
00023 #include <ksycoca.h>
00024 #include <ksycocadict.h>
00025 #include <kshell.h>
00026 #include <kdebug.h>
00027 
00029 
00030 KMimeTypeFactory::KMimeTypeFactory()
00031     : KSycocaFactory( KST_KMimeTypeFactory ),
00032       m_magicFilesParsed(false)
00033 {
00034     _self = this;
00035     m_fastPatternOffset = 0;
00036     m_otherPatternOffset = 0;
00037     if (m_str) {
00038         // Read Header
00039         qint32 i;
00040         (*m_str) >> i;
00041         m_fastPatternOffset = i;
00042         (*m_str) >> i;
00043         m_otherPatternOffset = i;
00044 
00045         // alias map
00046         qint32 n;
00047         (*m_str) >> n;
00048         QString str1, str2;
00049         for(;n;n--) {
00050             KSycocaEntry::read(*m_str, str1);
00051             KSycocaEntry::read(*m_str, str2);
00052             m_aliases.insert(str1, str2);
00053         }
00054 
00055         const int saveOffset = m_str->device()->pos();
00056         // Init index tables
00057         m_fastPatternDict = new KSycocaDict(m_str, m_fastPatternOffset);
00058         m_str->device()->seek(saveOffset);
00059 
00060     } else {
00061         // Build new database
00062         m_fastPatternDict = new KSycocaDict();
00063     }
00064 }
00065 
00066 KMimeTypeFactory::~KMimeTypeFactory()
00067 {
00068     _self = 0;
00069     delete m_fastPatternDict;
00070 }
00071 
00072 KMimeTypeFactory * KMimeTypeFactory::self()
00073 {
00074     if (!_self)
00075         _self = new KMimeTypeFactory();
00076     return _self;
00077 }
00078 
00079 KMimeType::Ptr KMimeTypeFactory::findMimeTypeByName(const QString &_name, KMimeType::FindByNameOption options)
00080 {
00081     if (!sycocaDict()) return KMimeType::Ptr(); // Error!
00082     assert (!KSycoca::self()->isBuilding());
00083 
00084     QString name = _name;
00085     if (options & KMimeType::ResolveAliases) {
00086         QMap<QString, QString>::const_iterator it = m_aliases.find(_name);
00087         if (it != m_aliases.end())
00088             name = *it;
00089     }
00090 
00091     int offset = sycocaDict()->find_string( name );
00092     if (!offset) return KMimeType::Ptr(); // Not found
00093     KMimeType::Ptr newMimeType(createEntry(offset));
00094 
00095     // Check whether the dictionary was right.
00096     if (newMimeType && (newMimeType->name() != name))
00097     {
00098         // No it wasn't...
00099         newMimeType = 0; // Not found
00100     }
00101     return newMimeType;
00102 }
00103 
00104 bool KMimeTypeFactory::checkMimeTypes()
00105 {
00106    QDataStream *str = KSycoca::self()->findFactory( factoryId() );
00107    if (!str) return false;
00108 
00109    // check if there are mimetypes
00110    return !isEmpty();
00111 }
00112 
00113 KMimeType * KMimeTypeFactory::createEntry(int offset) const
00114 {
00115    KMimeType *newEntry = 0;
00116    KSycocaType type;
00117    QDataStream *str = KSycoca::self()->findEntry(offset, type);
00118    if (!str) return 0;
00119 
00120    switch(type)
00121    {
00122      case KST_KMimeType:
00123      case KST_KDEDesktopMimeType: // old, compat only
00124         newEntry = new KMimeType(*str, offset);
00125         break;
00126      case KST_KFolderMimeType:
00127         newEntry = new KFolderMimeType(*str, offset);
00128         break;
00129 
00130      default:
00131         kError(7011) << QString("KMimeTypeFactory: unexpected object entry in KSycoca database (type = %1)").arg((int)type) << endl;
00132         break;
00133    }
00134    if (newEntry && !newEntry->isValid())
00135    {
00136       kError(7011) << "KMimeTypeFactory: corrupt object in KSycoca database!\n" << endl;
00137       delete newEntry;
00138       newEntry = 0;
00139    }
00140    return newEntry;
00141 }
00142 
00143 
00144 QString KMimeTypeFactory::resolveAlias(const QString& mime)
00145 {
00146     return m_aliases.value(mime);
00147 }
00148 
00149 QList<KMimeType::Ptr> KMimeTypeFactory::findFromFileName( const QString &filename, QString *matchingExtension )
00150 {
00151     // Assume we're NOT building a database
00152     if (!m_str) return QList<KMimeType::Ptr>();
00153 
00154     // "Applications MUST first try a case-sensitive match, then try again with
00155     // the filename converted to lower-case if that fails. This is so that
00156     // main.C will be seen as a C++ file, but IMAGE.GIF will still use the
00157     // *.gif pattern."
00158     QList<KMimeType::Ptr> mimeList = findFromFileNameHelper(filename, matchingExtension);
00159     if (mimeList.isEmpty()) {
00160         const QString lowerCase = filename.toLower();
00161         if (lowerCase != filename)
00162             mimeList = findFromFileNameHelper(lowerCase, matchingExtension);
00163     }
00164     return mimeList;
00165 }
00166 
00167 QList<KMimeType::Ptr> KMimeTypeFactory::findFromFastPatternDict(const QString &extension)
00168 {
00169     QList<KMimeType::Ptr> mimeList;
00170     if (!m_fastPatternDict) return mimeList; // Error!
00171 
00172     // Warning : this assumes we're NOT building a database
00173 
00174     const QList<int> offsetList = m_fastPatternDict->findMultiString(extension);
00175     foreach(int offset, offsetList) {
00176         KMimeType::Ptr newMimeType(createEntry(offset));
00177         // Check whether the dictionary was right.
00178         if (newMimeType && newMimeType->patterns().contains("*."+extension)) {
00179             mimeList.append(newMimeType);
00180         }
00181     }
00182     return mimeList;
00183 }
00184 
00185 static bool matchFileName( const QString &filename, const QString &pattern )
00186 {
00187     int pattern_len = pattern.length();
00188     if (!pattern_len)
00189         return false;
00190     int len = filename.length();
00191 
00192     // Patterns like "*~", "*.extension"
00193     if (pattern[0] == '*'  && pattern.indexOf('[') == -1)
00194     {
00195         if ( len + 1 < pattern_len ) return false;
00196 
00197         const QChar *c1 = pattern.unicode() + pattern_len - 1;
00198         const QChar *c2 = filename.unicode() + len - 1;
00199         int cnt = 1;
00200         while (cnt < pattern_len && *c1-- == *c2--)
00201             ++cnt;
00202         return cnt == pattern_len;
00203     }
00204 
00205     // Patterns like "README*" (well this is currently the only one like that...)
00206     if (pattern[pattern_len - 1] == '*') {
00207         if ( len + 1 < pattern_len ) return false;
00208         if (pattern[0] == '*')
00209             return filename.indexOf(pattern.mid(1, pattern_len - 2)) != -1;
00210 
00211         const QChar *c1 = pattern.unicode();
00212         const QChar *c2 = filename.unicode();
00213         int cnt = 1;
00214         while (cnt < pattern_len && *c1++ == *c2++)
00215            ++cnt;
00216         return cnt == pattern_len;
00217     }
00218 
00219     // Names without any wildcards like "README"
00220     if (pattern.indexOf('[') == -1 && pattern.indexOf('*') == -1 && pattern.indexOf('?'))
00221         return (pattern == filename);
00222 
00223     // Other patterns, like "[Mm]akefile": use slow but correct method
00224     QRegExp rx(pattern);
00225     rx.setPatternSyntax(QRegExp::Wildcard);
00226     return rx.exactMatch(filename);
00227 }
00228 
00229 QList<KMimeType::Ptr> KMimeTypeFactory::findFromFileNameHelper( const QString &_filename, QString *matchingExtension )
00230 {
00231     QList<KMimeType::Ptr> matchingMimeTypes;
00232 
00233     // Get stream to the header
00234     QDataStream *str = m_str;
00235 
00236     // Extract extension
00237     const int lastDot = _filename.lastIndexOf('.');
00238     int matchingPatternLength = 0;
00239 
00240     if (lastDot != -1) { // if no '.', skip the extension lookup
00241         const int ext_len = _filename.length() - lastDot - 1;
00242         const QString simpleExtension = _filename.right( ext_len );
00243 
00244         matchingMimeTypes = findFromFastPatternDict(simpleExtension);
00245         if (!matchingMimeTypes.isEmpty()) {
00246             matchingPatternLength = simpleExtension.length() + 2; // *.foo -> length=5
00247             if (matchingExtension)
00248                 *matchingExtension = simpleExtension;
00249             // Keep going, there might be some matches from the 'other' list, like *.tar.bz2
00250         }
00251     }
00252 
00253     // Now try the "other" Pattern table
00254     if ( m_otherPatterns.isEmpty() ) {
00255         // Load it only once
00256         str->device()->seek( m_otherPatternOffset );
00257 
00258         QString pattern;
00259         qint32 mimetypeOffset;
00260 
00261         while (true)
00262         {
00263             KSycocaEntry::read(*str, pattern);
00264             if (pattern.isEmpty()) // end of list
00265                 break;
00266             (*str) >> mimetypeOffset;
00267             m_otherPatterns.push_back( pattern );
00268             m_otherPatterns_offsets.push_back( mimetypeOffset );
00269         }
00270     }
00271 
00272     assert( m_otherPatterns.size() == m_otherPatterns_offsets.size() );
00273 
00274     QStringList::const_iterator it = m_otherPatterns.begin();
00275     const QStringList::const_iterator end = m_otherPatterns.end();
00276     QList<qint32>::const_iterator it_offset = m_otherPatterns_offsets.begin();
00277 
00278     for ( ; it != end; ++it, ++it_offset ) {
00279         const QString pattern = *it;
00280         if ( matchFileName( _filename, pattern ) ) {
00281             // Is this a shorter or a longer match than an existing one, or same length?
00282             if (pattern.length() < matchingPatternLength) {
00283                 continue; // too short, ignore
00284             } else if (pattern.length() > matchingPatternLength) {
00285                 // longer: clear any previous match (like *.bz2, when pattern is *.tar.bz2)
00286                 matchingMimeTypes.clear();
00287                 // remember the new "longer" length
00288                 matchingPatternLength = pattern.length();
00289             }
00290             KMimeType *newMimeType = createEntry( *it_offset );
00291             assert (newMimeType && newMimeType->isType( KST_KMimeType ));
00292             matchingMimeTypes.append( KMimeType::Ptr( newMimeType ) );
00293             if (matchingExtension && pattern.startsWith("*."))
00294                 *matchingExtension = pattern.mid(2);
00295         }
00296     }
00297 
00298     return matchingMimeTypes;
00299 }
00300 
00301 // TODO: remove unused whichPriority argument, once XDG shared-mime is updated
00302 KMimeType::Ptr KMimeTypeFactory::findFromContent(QIODevice* device, WhichPriority whichPriority, int* accuracy, QByteArray& beginning)
00303 {
00304     Q_ASSERT(device->isOpen());
00305     if (device->size() == 0) {
00306         if (accuracy)
00307             *accuracy = 100;
00308         return findMimeTypeByName("application/x-zerosize");
00309     }
00310 
00311     if (!m_magicFilesParsed) {
00312         parseMagic();
00313         m_magicFilesParsed = true;
00314     }
00315 
00316     for ( QList<KMimeMagicRule>::const_iterator it = m_magicRules.begin(), end = m_magicRules.end();
00317           it != end; ++it ) {
00318         const KMimeMagicRule& rule = *it;
00319         // HighPriorityRules: select rules with priority >= 80
00320         // LowPriorityRules: select rules with priority < 80
00321         if ( ( whichPriority == AllRules ) ||
00322              ( (rule.priority() >= 80) == (whichPriority == HighPriorityRules) ) ) {
00323             if (rule.match(device, beginning)) {
00324                 if (accuracy)
00325                     *accuracy = rule.priority();
00326                 return findMimeTypeByName(rule.mimetype());
00327             }
00328         }
00329         // Rules are sorted by decreasing priority, so we can abort when we're past high-prio rules
00330         if (whichPriority == HighPriorityRules && rule.priority() < 80)
00331             break;
00332     }
00333 
00334     // Do fallback code so that we never return 0 - unless we were only looking for HighPriorityRules
00335     if (whichPriority != HighPriorityRules) {
00336         // Nothing worked, check if the file contents looks like binary or text
00337         if (!KMimeType::isBufferBinaryData(beginning)) {
00338             if (accuracy)
00339                 *accuracy = 5;
00340             return findMimeTypeByName("text/plain");
00341         }
00342         if (accuracy)
00343             *accuracy = 0;
00344         return KMimeType::defaultMimeTypePtr();
00345     }
00346 
00347     return KMimeType::Ptr();
00348 }
00349 
00350 KMimeType::List KMimeTypeFactory::allMimeTypes()
00351 {
00352     KMimeType::List result;
00353     const KSycocaEntry::List list = allEntries();
00354     for( KSycocaEntry::List::ConstIterator it = list.begin();
00355          it != list.end();
00356          ++it)
00357     {
00358         Q_ASSERT( (*it)->isType( KST_KMimeType ) );
00359         result.append( KMimeType::Ptr::staticCast( *it ) );
00360     }
00361     return result;
00362 }
00363 
00364 QMap<QString, QString>& KMimeTypeFactory::aliases()
00365 {
00366     return m_aliases;
00367 }
00368 
00369 KMimeTypeFactory *KMimeTypeFactory::_self = 0;
00370 
00371 void KMimeTypeFactory::virtual_hook( int id, void* data )
00372 { KSycocaFactory::virtual_hook( id, data ); }
00373 
00374 
00375 #include <arpa/inet.h> // for ntohs
00376 #include <kstandarddirs.h>
00377 #include <QFile>
00378 
00379 // Sort them in descending order of priority
00380 static bool mimeMagicRuleCompare(const KMimeMagicRule& lhs, const KMimeMagicRule& rhs) {
00381     return lhs.priority() > rhs.priority();
00382 }
00383 
00384 
00385 void KMimeTypeFactory::parseMagic()
00386 {
00387     const QStringList magicFiles = KGlobal::dirs()->findAllResources("xdgdata-mime", "magic");
00388     //kDebug() << magicFiles;
00389     QListIterator<QString> magicIter( magicFiles );
00390     magicIter.toBack();
00391     while (magicIter.hasPrevious()) { // global first, then local. Turns out it doesn't matter though.
00392         const QString fileName = magicIter.previous();
00393         QFile magicFile(fileName);
00394         kDebug() << "Now parsing " << fileName;
00395         if (magicFile.open(QIODevice::ReadOnly))
00396             m_magicRules += parseMagicFile(&magicFile, fileName);
00397     }
00398     qSort(m_magicRules.begin(), m_magicRules.end(), mimeMagicRuleCompare);
00399 }
00400 
00401 static char readNumber(qint64& value, QIODevice* file)
00402 {
00403     char ch;
00404     while (file->getChar(&ch)) {
00405         if (ch < '0' || ch > '9')
00406             return ch;
00407         value = 10 * value + ch - '0';
00408     }
00409     // eof
00410     return '\0';
00411 }
00412 
00413 
00414 #define MAKE_LITTLE_ENDIAN16(val) val = (quint16)(((quint16)(val) << 8)|((quint16)(val) >> 8))
00415 
00416 #define MAKE_LITTLE_ENDIAN32(val) \
00417    val = (((quint32)(val) & 0xFF000000U) >> 24) | \
00418          (((quint32)(val) & 0x00FF0000U) >> 8) | \
00419          (((quint32)(val) & 0x0000FF00U) << 8) | \
00420          (((quint32)(val) & 0x000000FFU) << 24)
00421 
00422 QList<KMimeMagicRule> KMimeTypeFactory::parseMagicFile(QIODevice* file, const QString& fileName) const
00423 {
00424     QList<KMimeMagicRule> rules;
00425     QByteArray header = file->read(12);
00426     if (header != QByteArray::fromRawData("MIME-Magic\0\n", 12)) {
00427         kWarning(7009) << "Invalid magic file " << fileName << " starts with " << header;
00428         return rules;
00429     }
00430     QList<KMimeMagicMatch> matches; // toplevel matches (indent==0)
00431     int priority = 0; // to avoid warning
00432     QString mimeTypeName;
00433 
00434     Q_FOREVER {
00435         char ch = '\0';
00436         bool chOk = file->getChar(&ch);
00437 
00438         if (!chOk || ch == '[') {
00439             // Finish previous section
00440             if (!mimeTypeName.isEmpty()) {
00441                 rules.append(KMimeMagicRule(mimeTypeName, priority, matches));
00442                 matches.clear();
00443                 mimeTypeName.clear();
00444             }
00445             if (file->atEnd())
00446                 break; // done
00447 
00448             // Parse new section
00449             const QString line = file->readLine();
00450             const int pos = line.indexOf(':');
00451             if (pos == -1) { // syntax error
00452                 kWarning(7009) << "Syntax error in " << mimeTypeName
00453                                << " ':' not present in section name" << endl;
00454                 break;
00455             }
00456             priority = line.left(pos).toInt();
00457             mimeTypeName = line.mid(pos+1);
00458             mimeTypeName = mimeTypeName.left(mimeTypeName.length()-2); // remove ']\n'
00459             //kDebug(7009) << "New rule for " << mimeTypeName
00460             //             << " with priority " << priority << endl;
00461         } else {
00462             // Parse line in the section
00463             // [ indent ] ">" start-offset "=" value
00464             //   [ "&" mask ] [ "~" word-size ] [ "+" range-length ] "\n"
00465             qint64 indent = 0;
00466             if (ch != '>') {
00467                 indent = ch - '0';
00468                 ch = readNumber(indent, file);
00469                 if (ch != '>') {
00470                     kWarning(7009) << "Invalid magic file " << fileName << " '>' not found, got " << ch << " at pos " << file->pos();
00471                     break;
00472                 }
00473             }
00474 
00475             KMimeMagicMatch match;
00476             match.m_rangeStart = 0;
00477             ch = readNumber(match.m_rangeStart, file);
00478             if (ch != '=') {
00479                 kWarning(7009) << "Invalid magic file " << fileName << " '=' not found";
00480                 break;
00481             }
00482 
00483             char lengthBuffer[2];
00484             if (file->read(lengthBuffer, 2) != 2)
00485                 break;
00486             const short valueLength = ntohs(*(short*)lengthBuffer);
00487             //kDebug() << "indent=" << indent << " rangeStart=" << match.m_rangeStart
00488             //         << " valueLength=" << valueLength << endl;
00489 
00490             match.m_data.resize(valueLength);
00491             if (file->read(match.m_data.data(), valueLength) != valueLength)
00492                 break;
00493 
00494             match.m_rangeLength = 1;
00495             bool invalidLine = false;
00496 
00497             if (!file->getChar(&ch))
00498                 break;
00499             qint64 wordSize = 1;
00500 
00501             Q_FOREVER {
00502                 // We get 'ch' before coming here, or as part of the parsing in each case below.
00503                 switch (ch) {
00504                 case '\n':
00505                     break;
00506                 case '&':
00507                     match.m_mask.resize(valueLength);
00508                     if (file->read(match.m_mask.data(), valueLength) != valueLength)
00509                         invalidLine = true;
00510                     if (!file->getChar(&ch))
00511                         invalidLine = true;
00512                     break;
00513                 case '~': {
00514                     wordSize = 0;
00515                     ch = readNumber(wordSize, file);
00516                     //kDebug() << "wordSize=" << wordSize;
00517                     break;
00518                 }
00519                 case '+':
00520                     // Parse range length
00521                     match.m_rangeLength = 0;
00522                     ch = readNumber(match.m_rangeLength, file);
00523                     if (ch == '\n')
00524                         break;
00525                     // fall-through intended
00526                 default:
00527                     // "If an unknown character is found where a newline is expected
00528                     // then the whole line should be ignored (there will be no binary
00529                     // data after the new character, so the next line starts after the
00530                     // next "\n" character). This is for future extensions.", says spec
00531                     while (ch != '\n' && !file->atEnd()) {
00532                         file->getChar(&ch);
00533                     }
00534                     invalidLine = true;
00535                     kDebug(7009) << "invalid line - garbage found - ch=" << ch;
00536                     break;
00537                 }
00538                 if (ch == '\n' || invalidLine)
00539                     break;
00540             }
00541             if (!invalidLine) {
00542                 // Finish match, doing byte-swapping on little endian hosts
00543 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
00544                 if (wordSize > 1) {
00545                     //kDebug() << "data before swapping: " << match.m_data;;
00546                     if ((wordSize != 2 && wordSize != 4) || (valueLength % wordSize != 0))
00547                         continue; // invalid word size
00548                     char* data = match.m_data.data();
00549                     char* mask = match.m_mask.data();
00550                     for (int i = 0; i < valueLength; i += wordSize) {
00551                         if (wordSize == 2)
00552                             MAKE_LITTLE_ENDIAN16( *((quint16 *) data + i) );
00553                         else if (wordSize == 4)
00554                             MAKE_LITTLE_ENDIAN32( *((quint32 *) data + i) );
00555                         if (!match.m_mask.isEmpty()) {
00556                             if (wordSize == 2)
00557                                 MAKE_LITTLE_ENDIAN16( *((quint16 *) mask + i) );
00558                             else if (wordSize == 4)
00559                                 MAKE_LITTLE_ENDIAN32( *((quint32 *) mask + i) );
00560                         }
00561                     }
00562                     //kDebug() << "data after swapping: " << match.m_data;
00563                 }
00564 #endif
00565                 // Append match at the right place depending on indent:
00566                 if (indent == 0) {
00567                     matches.append(match);
00568                 } else {
00569                     KMimeMagicMatch* m = &matches.last();
00570                     Q_ASSERT(m);
00571                     for (int i = 1 /* nothing to do for indent==1 */; i < indent; ++i) {
00572                         m = &m->m_subMatches.last();
00573                         Q_ASSERT(m);
00574                     }
00575                     m->m_subMatches.append(match);
00576                 }
00577             }
00578         }
00579     }
00580     return rules;
00581 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • KIO
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.4
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal