• Skip to content
  • Skip to link menu
KDE 4.1 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDEUI

kcharselectdata.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002 
00003    Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009 
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public License
00016    along with this library; see the file COPYING.LIB.  If not, write to
00017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018    Boston, MA 02110-1301, USA.
00019 */
00020 
00021 #include "kcharselectdata_p.h"
00022 
00023 #include <QStringList>
00024 #include <QFile>
00025 #include <qendian.h>
00026 
00027 #include <string.h>
00028 #include <klocalizedstring.h>
00029 #include <kstandarddirs.h>
00030 
00031 /* constants for hangul (de)composition, see UAX #15 */
00032 #define SBase 0xAC00
00033 #define LBase 0x1100
00034 #define VBase 0x1161
00035 #define TBase 0x11A7
00036 #define LCount 19
00037 #define VCount 21
00038 #define TCount 28
00039 #define NCount (VCount * TCount)
00040 #define SCount (LCount * NCount)
00041 
00042 static const char JAMO_L_TABLE[][4] =
00043     {
00044         "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
00045         "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
00046     };
00047 
00048 static const char JAMO_V_TABLE[][4] =
00049     {
00050         "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
00051         "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
00052         "YU", "EU", "YI", "I"
00053     };
00054 
00055 static const char JAMO_T_TABLE[][4] =
00056     {
00057         "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
00058         "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
00059         "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
00060     };
00061 
00062 bool KCharSelectData::openDataFile()
00063 {
00064     if(!dataFile.isEmpty()) {
00065         return true;
00066     } else {
00067         QFile file(KStandardDirs::locate("data", "kcharselect/kcharselect-data"));
00068         if (!file.open(QIODevice::ReadOnly)) {
00069             return false;
00070         }
00071         dataFile = file.readAll();
00072         file.close();
00073         return true;
00074     }
00075 }
00076 
00077 quint32 KCharSelectData::getDetailIndex(const QChar& c) const
00078 {
00079     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00080     // Convert from little-endian, so that this code works on PPC too.
00081     // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286
00082     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12);
00083     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16);
00084 
00085     int min = 0;
00086     int mid;
00087     int max = ((offsetEnd - offsetBegin) / 27) - 1;
00088 
00089     quint16 unicode = c.unicode();
00090 
00091     static quint16 most_recent_searched;
00092     static quint32 most_recent_result;
00093 
00094 
00095     if (unicode == most_recent_searched)
00096         return most_recent_result;
00097 
00098     most_recent_searched = unicode;
00099 
00100     while (max >= min) {
00101         mid = (min + max) / 2;
00102         const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27);
00103         if (unicode > midUnicode)
00104             min = mid + 1;
00105         else if (unicode < midUnicode)
00106             max = mid - 1;
00107         else {
00108             most_recent_result = offsetBegin + mid*27;
00109 
00110             return most_recent_result;
00111         }
00112     }
00113 
00114     most_recent_result = 0;
00115     return 0;
00116 }
00117 
00118 QString KCharSelectData::formatCode(ushort code, int length, const QString& prefix, int base)
00119 {
00120     QString s = QString::number(code, base).toUpper();
00121     while (s.size() < length)
00122         s.prepend('0');
00123     s.prepend(prefix);
00124     return s;
00125 }
00126 
00127 QList<QChar> KCharSelectData::blockContents(int block)
00128 {
00129     if(!openDataFile()) {
00130         return QList<QChar>();
00131     }
00132 
00133     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00134     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
00135     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
00136 
00137     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00138 
00139     QList<QChar> res;
00140 
00141     if(block > max)
00142         return res;
00143 
00144     quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4);
00145     quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2);
00146 
00147     while(unicodeBegin < unicodeEnd) {
00148         res.append(unicodeBegin);
00149         unicodeBegin++;
00150     }
00151     res.append(unicodeBegin); // Be carefull when unicodeEnd==0xffff
00152 
00153     return res;
00154 }
00155 
00156 QList<int> KCharSelectData::sectionContents(int section)
00157 {
00158     if(!openDataFile()) {
00159         return QList<int>();
00160     }
00161 
00162     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00163     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
00164     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
00165 
00166     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00167 
00168     QList<int> res;
00169 
00170     if(section > max)
00171         return res;
00172 
00173     for(int i = 0; i <= max; i++) {
00174         const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4);
00175         if(currSection == section) {
00176             res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) );
00177         }
00178     }
00179 
00180     return res;
00181 }
00182 
00183 QStringList KCharSelectData::sectionList()
00184 {
00185     if(!openDataFile()) {
00186         return QStringList();
00187     }
00188 
00189     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00190     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
00191     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
00192 
00193     const char* data = dataFile.constData();
00194     QStringList list;
00195     quint32 i = stringBegin;
00196     while(i < stringEnd) {
00197         list.append(i18nc("KCharSelect section name", data + i));
00198         i += strlen(data + i) + 1;
00199     }
00200 
00201     return list;
00202 }
00203 
00204 QString KCharSelectData::block(const QChar& c)
00205 {
00206     return blockName(blockIndex(c));
00207 }
00208 
00209 QString KCharSelectData::name(const QChar& c)
00210 {
00211     if(!openDataFile()) {
00212         return QString();
00213     }
00214 
00215     ushort unicode = c.unicode();
00216     if ((unicode >= 0x3400 && unicode <= 0x4DB5)
00217             || (unicode >= 0x4e00 && unicode <= 0x9fa5)) {
00218         // || (unicode >= 0x20000 && unicode <= 0x2A6D6) // useless, since limited to 16 bit
00219         return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16);
00220     } else if (c >= 0xac00 && c <= 0xd7af) {
00221         /* compute hangul syllable name as per UAX #15 */
00222         int SIndex = c.unicode() - SBase;
00223         int LIndex, VIndex, TIndex;
00224 
00225         if (SIndex < 0 || SIndex >= SCount)
00226             return QString();
00227 
00228         LIndex = SIndex / NCount;
00229         VIndex = (SIndex % NCount) / TCount;
00230         TIndex = SIndex % TCount;
00231 
00232         return QString("HANGUL SYLLABLE ") + JAMO_L_TABLE[LIndex] + JAMO_V_TABLE[VIndex] + JAMO_T_TABLE[TIndex];
00233     } else if (unicode >= 0xD800 && unicode <= 0xDB7F)
00234         return i18n("<Non Private Use High Surrogate>");
00235     else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
00236         return i18n("<Private Use High Surrogate>");
00237     else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
00238         return i18n("<Low Surrogate>");
00239     else if (unicode >= 0xE000 && unicode <= 0xF8FF)
00240         return i18n("<Private Use>");
00241 //  else if (unicode >= 0xF0000 && unicode <= 0xFFFFD) // 16 bit!
00242 //   return i18n("<Plane 15 Private Use>");
00243 //  else if (unicode >= 0x100000 && unicode <= 0x10FFFD)
00244 //   return i18n("<Plane 16 Private Use>");
00245     else {
00246         const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00247         const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
00248         const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
00249 
00250         int min = 0;
00251         int mid;
00252         int max = ((offsetEnd - offsetBegin) / 6) - 1;
00253         QString s;
00254 
00255         while (max >= min) {
00256             mid = (min + max) / 2;
00257             const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
00258             if (unicode > midUnicode)
00259                 min = mid + 1;
00260             else if (unicode < midUnicode)
00261                 max = mid - 1;
00262             else {
00263                 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
00264                 s = QString(dataFile.constData() + offset);
00265                 break;
00266             }
00267         }
00268 
00269         if (s.isNull()) {
00270             return i18n("<not assigned>");
00271         } else {
00272             return s;
00273         }
00274     }
00275 }
00276 
00277 int KCharSelectData::blockIndex(const QChar& c)
00278 {
00279     if(!openDataFile()) {
00280         return 0;
00281     }
00282 
00283     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00284     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
00285     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
00286     const quint16 unicode = c.unicode();
00287 
00288     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00289 
00290     int i = 0;
00291 
00292     while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) {
00293         i++;
00294     }
00295 
00296     return i;
00297 }
00298 
00299 int KCharSelectData::sectionIndex(int block)
00300 {
00301     if(!openDataFile()) {
00302         return 0;
00303     }
00304 
00305     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00306     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
00307     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
00308 
00309     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00310 
00311     for(int i = 0; i <= max; i++) {
00312         if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) {
00313             return qFromLittleEndian<quint16>(data + offsetBegin + i*4);
00314         }
00315     }
00316 
00317     return 0;
00318 }
00319 
00320 QString KCharSelectData::blockName(int index)
00321 {
00322     if(!openDataFile()) {
00323         return QString();
00324     }
00325 
00326     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00327     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16);
00328     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20);
00329 
00330     quint32 i = stringBegin;
00331     int currIndex = 0;
00332 
00333     const char* data = dataFile.constData();
00334     while(i < stringEnd && currIndex < index) {
00335         i += strlen(data + i) + 1;
00336         currIndex++;
00337     }
00338 
00339     return i18nc("KCharselect unicode block name", data + i);
00340 }
00341 
00342 QStringList KCharSelectData::aliases(const QChar& c)
00343 {
00344     if(!openDataFile()) {
00345         return QStringList();
00346     }
00347     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00348     const int detailIndex = getDetailIndex(c);
00349     if(detailIndex == 0) {
00350         return QStringList();
00351     }
00352 
00353     const quint8 count = * (quint8 *)(udata + detailIndex + 6);
00354     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
00355 
00356     QStringList aliases;
00357 
00358     const char* data = dataFile.constData();
00359     for (int i = 0;  i < count;  i++) {
00360         aliases.append(QString::fromUtf8(data + offset));
00361         offset += strlen(data + offset) + 1;
00362     }
00363     return aliases;
00364 }
00365 
00366 QStringList KCharSelectData::notes(const QChar& c)
00367 {
00368     if(!openDataFile()) {
00369         return QStringList();
00370     }
00371     const int detailIndex = getDetailIndex(c);
00372     if(detailIndex == 0) {
00373         return QStringList();
00374     }
00375 
00376     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00377     const quint8 count = * (quint8 *)(udata + detailIndex + 11);
00378     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
00379 
00380     QStringList notes;
00381 
00382     const char* data = dataFile.constData();
00383     for (int i = 0;  i < count;  i++) {
00384         notes.append(QString::fromLatin1(data + offset));
00385         offset += strlen(data + offset) + 1;
00386     }
00387 
00388     return notes;
00389 }
00390 
00391 QList<QChar> KCharSelectData::seeAlso(const QChar& c)
00392 {
00393     if(!openDataFile()) {
00394         return QList<QChar>();
00395     }
00396     const int detailIndex = getDetailIndex(c);
00397     if(detailIndex == 0) {
00398         return QList<QChar>();
00399     }
00400 
00401     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00402     const quint8 count = * (quint8 *)(udata + detailIndex + 26);
00403     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
00404 
00405     QList<QChar> seeAlso;
00406 
00407     for (int i = 0;  i < count;  i++) {
00408         seeAlso.append(qFromLittleEndian<quint16> (udata + offset));
00409         offset += 2;
00410     }
00411 
00412     return seeAlso;
00413 }
00414 
00415 QStringList KCharSelectData::equivalents(const QChar& c)
00416 {
00417     if(!openDataFile()) {
00418         return QStringList();
00419     }
00420     const int detailIndex = getDetailIndex(c);
00421     if(detailIndex == 0) {
00422         return QStringList();
00423     }
00424 
00425     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00426     const quint8 count = * (quint8 *)(udata + detailIndex + 21);
00427     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
00428 
00429     QStringList equivalents;
00430 
00431     const char* data = dataFile.constData();
00432     for (int i = 0;  i < count;  i++) {
00433         equivalents.append(QString::fromUtf8(data + offset));
00434         offset += strlen(data + offset) + 1;
00435     }
00436 
00437     return equivalents;
00438 }
00439 
00440 QStringList KCharSelectData::approximateEquivalents(const QChar& c)
00441 {
00442     if(!openDataFile()) {
00443         return QStringList();
00444     }
00445     const int detailIndex = getDetailIndex(c);
00446     if(detailIndex == 0) {
00447         return QStringList();
00448     }
00449 
00450     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00451     const quint8 count = * (quint8 *)(udata + detailIndex + 16);
00452     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
00453 
00454     QStringList approxEquivalents;
00455 
00456     const char* data = dataFile.constData();
00457     for (int i = 0;  i < count;  i++) {
00458         approxEquivalents.append(QString::fromUtf8(data + offset));
00459         offset += strlen(data + offset) + 1;
00460     }
00461 
00462     return approxEquivalents;
00463 }
00464 
00465 QStringList KCharSelectData::unihanInfo(const QChar& c)
00466 {
00467     if(!openDataFile()) {
00468         return QStringList();
00469     }
00470 
00471     const char* data = dataFile.constData();
00472     const uchar* udata = reinterpret_cast<const uchar*>(data);
00473     const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36);
00474     const quint32 offsetEnd = dataFile.size();
00475 
00476     int min = 0;
00477     int mid;
00478     int max = ((offsetEnd - offsetBegin) / 30) - 1;
00479     quint16 unicode = c.unicode();
00480 
00481     while (max >= min) {
00482         mid = (min + max) / 2;
00483         const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30);
00484         if (unicode > midUnicode)
00485             min = mid + 1;
00486         else if (unicode < midUnicode)
00487             max = mid - 1;
00488         else {
00489             QStringList res;
00490             for(int i = 0; i < 7; i++) {
00491                 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4);
00492                 if(offset != 0) {
00493                     res.append(QString::fromUtf8(data + offset));
00494                 } else {
00495                     res.append(QString());
00496                 }
00497             }
00498             return res;
00499         }
00500     }
00501 
00502     return QStringList();
00503 }
00504 
00505 QString KCharSelectData::categoryText(QChar::Category category)
00506 {
00507     switch (category) {
00508     case QChar::Other_Control: return i18n("Other, Control");
00509     case QChar::Other_Format: return i18n("Other, Format");
00510     case QChar::Other_NotAssigned: return i18n("Other, Not Assigned");
00511     case QChar::Other_PrivateUse: return i18n("Other, Private Use");
00512     case QChar::Other_Surrogate: return i18n("Other, Surrogate");
00513     case QChar::Letter_Lowercase: return i18n("Letter, Lowercase");
00514     case QChar::Letter_Modifier: return i18n("Letter, Modifier");
00515     case QChar::Letter_Other: return i18n("Letter, Other");
00516     case QChar::Letter_Titlecase: return i18n("Letter, Titlecase");
00517     case QChar::Letter_Uppercase: return i18n("Letter, Uppercase");
00518     case QChar::Mark_SpacingCombining: return i18n("Mark, Spacing Combining");
00519     case QChar::Mark_Enclosing: return i18n("Mark, Enclosing");
00520     case QChar::Mark_NonSpacing: return i18n("Mark, Non-Spacing");
00521     case QChar::Number_DecimalDigit: return i18n("Number, Decimal Digit");
00522     case QChar::Number_Letter: return i18n("Number, Letter");
00523     case QChar::Number_Other: return i18n("Number, Other");
00524     case QChar::Punctuation_Connector: return i18n("Punctuation, Connector");
00525     case QChar::Punctuation_Dash: return i18n("Punctuation, Dash");
00526     case QChar::Punctuation_Close: return i18n("Punctuation, Close");
00527     case QChar::Punctuation_FinalQuote: return i18n("Punctuation, Final Quote");
00528     case QChar::Punctuation_InitialQuote: return i18n("Punctuation, Initial Quote");
00529     case QChar::Punctuation_Other: return i18n("Punctuation, Other");
00530     case QChar::Punctuation_Open: return i18n("Punctuation, Open");
00531     case QChar::Symbol_Currency: return i18n("Symbol, Currency");
00532     case QChar::Symbol_Modifier: return i18n("Symbol, Modifier");
00533     case QChar::Symbol_Math: return i18n("Symbol, Math");
00534     case QChar::Symbol_Other: return i18n("Symbol, Other");
00535     case QChar::Separator_Line: return i18n("Separator, Line");
00536     case QChar::Separator_Paragraph: return i18n("Separator, Paragraph");
00537     case QChar::Separator_Space: return i18n("Separator, Space");
00538     default: return i18n("Unknown");
00539     }
00540 }
00541 
00542 QList<QChar> KCharSelectData::find(const QString& needle)
00543 {
00544     QList<QChar> res;
00545     QStringList searchStrings = needle.simplified().split(' ');
00546 
00547     if (searchStrings.count() == 0) {
00548         return res;
00549     }
00550 
00551     if(searchStrings.count() == 1 && searchStrings[0].length() == 1) {
00552         res.append(searchStrings[0].at(0));
00553         return res;
00554     }
00555 
00556     QRegExp regExp("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$");
00557     foreach(const QString &s, searchStrings) {
00558         if(regExp.exactMatch(s)) {
00559             res.append(regExp.cap(2).toInt(0, 16));
00560         }
00561     }
00562 
00563     QString firstString = searchStrings.takeFirst();
00564 
00565     const char* data = dataFile.constData();
00566     const uchar* udata = reinterpret_cast<const uchar*>(data);
00567     const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+4);
00568     const quint32 offsetEnd = qFromLittleEndian<quint32>(udata+8);
00569 
00570     int max = ((offsetEnd - offsetBegin) / 6) - 1;
00571 
00572     for (int i = 0; i <= max; i++) {
00573         quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + i*6 + 2);
00574         QString name(data + offset);
00575         if (name.contains(firstString, Qt::CaseInsensitive)) {
00576             bool valid = true;
00577             foreach(const QString &s, searchStrings) {
00578                 if (!name.contains(s, Qt::CaseInsensitive)) {
00579                     valid = false;
00580                     break;
00581                 }
00582             }
00583             if (valid) {
00584                 res.append(qFromLittleEndian<quint16>(udata + offsetBegin + i*6));
00585             }
00586         }
00587     }
00588     return res;
00589 }

KDEUI

Skip menu "KDEUI"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • KIO
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.4
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal