00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "kcharselectdata_p.h"
00022
00023 #include <QStringList>
00024 #include <QFile>
00025 #include <qendian.h>
00026
00027 #include <string.h>
00028 #include <klocalizedstring.h>
00029 #include <kstandarddirs.h>
00030
00031
00032 #define SBase 0xAC00
00033 #define LBase 0x1100
00034 #define VBase 0x1161
00035 #define TBase 0x11A7
00036 #define LCount 19
00037 #define VCount 21
00038 #define TCount 28
00039 #define NCount (VCount * TCount)
00040 #define SCount (LCount * NCount)
00041
00042 static const char JAMO_L_TABLE[][4] =
00043 {
00044 "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
00045 "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
00046 };
00047
00048 static const char JAMO_V_TABLE[][4] =
00049 {
00050 "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
00051 "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
00052 "YU", "EU", "YI", "I"
00053 };
00054
00055 static const char JAMO_T_TABLE[][4] =
00056 {
00057 "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
00058 "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
00059 "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
00060 };
00061
00062 bool KCharSelectData::openDataFile()
00063 {
00064 if(!dataFile.isEmpty()) {
00065 return true;
00066 } else {
00067 QFile file(KStandardDirs::locate("data", "kcharselect/kcharselect-data"));
00068 if (!file.open(QIODevice::ReadOnly)) {
00069 return false;
00070 }
00071 dataFile = file.readAll();
00072 file.close();
00073 return true;
00074 }
00075 }
00076
00077 quint32 KCharSelectData::getDetailIndex(const QChar& c) const
00078 {
00079 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00080
00081
00082 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12);
00083 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16);
00084
00085 int min = 0;
00086 int mid;
00087 int max = ((offsetEnd - offsetBegin) / 27) - 1;
00088
00089 quint16 unicode = c.unicode();
00090
00091 static quint16 most_recent_searched;
00092 static quint32 most_recent_result;
00093
00094
00095 if (unicode == most_recent_searched)
00096 return most_recent_result;
00097
00098 most_recent_searched = unicode;
00099
00100 while (max >= min) {
00101 mid = (min + max) / 2;
00102 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27);
00103 if (unicode > midUnicode)
00104 min = mid + 1;
00105 else if (unicode < midUnicode)
00106 max = mid - 1;
00107 else {
00108 most_recent_result = offsetBegin + mid*27;
00109
00110 return most_recent_result;
00111 }
00112 }
00113
00114 most_recent_result = 0;
00115 return 0;
00116 }
00117
00118 QString KCharSelectData::formatCode(ushort code, int length, const QString& prefix, int base)
00119 {
00120 QString s = QString::number(code, base).toUpper();
00121 while (s.size() < length)
00122 s.prepend('0');
00123 s.prepend(prefix);
00124 return s;
00125 }
00126
00127 QList<QChar> KCharSelectData::blockContents(int block)
00128 {
00129 if(!openDataFile()) {
00130 return QList<QChar>();
00131 }
00132
00133 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00134 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
00135 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
00136
00137 int max = ((offsetEnd - offsetBegin) / 4) - 1;
00138
00139 QList<QChar> res;
00140
00141 if(block > max)
00142 return res;
00143
00144 quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4);
00145 quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2);
00146
00147 while(unicodeBegin < unicodeEnd) {
00148 res.append(unicodeBegin);
00149 unicodeBegin++;
00150 }
00151 res.append(unicodeBegin);
00152
00153 return res;
00154 }
00155
00156 QList<int> KCharSelectData::sectionContents(int section)
00157 {
00158 if(!openDataFile()) {
00159 return QList<int>();
00160 }
00161
00162 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00163 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
00164 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
00165
00166 int max = ((offsetEnd - offsetBegin) / 4) - 1;
00167
00168 QList<int> res;
00169
00170 if(section > max)
00171 return res;
00172
00173 for(int i = 0; i <= max; i++) {
00174 const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4);
00175 if(currSection == section) {
00176 res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) );
00177 }
00178 }
00179
00180 return res;
00181 }
00182
00183 QStringList KCharSelectData::sectionList()
00184 {
00185 if(!openDataFile()) {
00186 return QStringList();
00187 }
00188
00189 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00190 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
00191 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
00192
00193 const char* data = dataFile.constData();
00194 QStringList list;
00195 quint32 i = stringBegin;
00196 while(i < stringEnd) {
00197 list.append(i18nc("KCharSelect section name", data + i));
00198 i += strlen(data + i) + 1;
00199 }
00200
00201 return list;
00202 }
00203
00204 QString KCharSelectData::block(const QChar& c)
00205 {
00206 return blockName(blockIndex(c));
00207 }
00208
00209 QString KCharSelectData::name(const QChar& c)
00210 {
00211 if(!openDataFile()) {
00212 return QString();
00213 }
00214
00215 ushort unicode = c.unicode();
00216 if ((unicode >= 0x3400 && unicode <= 0x4DB5)
00217 || (unicode >= 0x4e00 && unicode <= 0x9fa5)) {
00218
00219 return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16);
00220 } else if (c >= 0xac00 && c <= 0xd7af) {
00221
00222 int SIndex = c.unicode() - SBase;
00223 int LIndex, VIndex, TIndex;
00224
00225 if (SIndex < 0 || SIndex >= SCount)
00226 return QString();
00227
00228 LIndex = SIndex / NCount;
00229 VIndex = (SIndex % NCount) / TCount;
00230 TIndex = SIndex % TCount;
00231
00232 return QString("HANGUL SYLLABLE ") + JAMO_L_TABLE[LIndex] + JAMO_V_TABLE[VIndex] + JAMO_T_TABLE[TIndex];
00233 } else if (unicode >= 0xD800 && unicode <= 0xDB7F)
00234 return i18n("<Non Private Use High Surrogate>");
00235 else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
00236 return i18n("<Private Use High Surrogate>");
00237 else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
00238 return i18n("<Low Surrogate>");
00239 else if (unicode >= 0xE000 && unicode <= 0xF8FF)
00240 return i18n("<Private Use>");
00241
00242
00243
00244
00245 else {
00246 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00247 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
00248 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
00249
00250 int min = 0;
00251 int mid;
00252 int max = ((offsetEnd - offsetBegin) / 6) - 1;
00253 QString s;
00254
00255 while (max >= min) {
00256 mid = (min + max) / 2;
00257 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
00258 if (unicode > midUnicode)
00259 min = mid + 1;
00260 else if (unicode < midUnicode)
00261 max = mid - 1;
00262 else {
00263 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
00264 s = QString(dataFile.constData() + offset);
00265 break;
00266 }
00267 }
00268
00269 if (s.isNull()) {
00270 return i18n("<not assigned>");
00271 } else {
00272 return s;
00273 }
00274 }
00275 }
00276
00277 int KCharSelectData::blockIndex(const QChar& c)
00278 {
00279 if(!openDataFile()) {
00280 return 0;
00281 }
00282
00283 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00284 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
00285 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
00286 const quint16 unicode = c.unicode();
00287
00288 int max = ((offsetEnd - offsetBegin) / 4) - 1;
00289
00290 int i = 0;
00291
00292 while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) {
00293 i++;
00294 }
00295
00296 return i;
00297 }
00298
00299 int KCharSelectData::sectionIndex(int block)
00300 {
00301 if(!openDataFile()) {
00302 return 0;
00303 }
00304
00305 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00306 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
00307 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
00308
00309 int max = ((offsetEnd - offsetBegin) / 4) - 1;
00310
00311 for(int i = 0; i <= max; i++) {
00312 if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) {
00313 return qFromLittleEndian<quint16>(data + offsetBegin + i*4);
00314 }
00315 }
00316
00317 return 0;
00318 }
00319
00320 QString KCharSelectData::blockName(int index)
00321 {
00322 if(!openDataFile()) {
00323 return QString();
00324 }
00325
00326 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00327 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16);
00328 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20);
00329
00330 quint32 i = stringBegin;
00331 int currIndex = 0;
00332
00333 const char* data = dataFile.constData();
00334 while(i < stringEnd && currIndex < index) {
00335 i += strlen(data + i) + 1;
00336 currIndex++;
00337 }
00338
00339 return i18nc("KCharselect unicode block name", data + i);
00340 }
00341
00342 QStringList KCharSelectData::aliases(const QChar& c)
00343 {
00344 if(!openDataFile()) {
00345 return QStringList();
00346 }
00347 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00348 const int detailIndex = getDetailIndex(c);
00349 if(detailIndex == 0) {
00350 return QStringList();
00351 }
00352
00353 const quint8 count = * (quint8 *)(udata + detailIndex + 6);
00354 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
00355
00356 QStringList aliases;
00357
00358 const char* data = dataFile.constData();
00359 for (int i = 0; i < count; i++) {
00360 aliases.append(QString::fromUtf8(data + offset));
00361 offset += strlen(data + offset) + 1;
00362 }
00363 return aliases;
00364 }
00365
00366 QStringList KCharSelectData::notes(const QChar& c)
00367 {
00368 if(!openDataFile()) {
00369 return QStringList();
00370 }
00371 const int detailIndex = getDetailIndex(c);
00372 if(detailIndex == 0) {
00373 return QStringList();
00374 }
00375
00376 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00377 const quint8 count = * (quint8 *)(udata + detailIndex + 11);
00378 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
00379
00380 QStringList notes;
00381
00382 const char* data = dataFile.constData();
00383 for (int i = 0; i < count; i++) {
00384 notes.append(QString::fromLatin1(data + offset));
00385 offset += strlen(data + offset) + 1;
00386 }
00387
00388 return notes;
00389 }
00390
00391 QList<QChar> KCharSelectData::seeAlso(const QChar& c)
00392 {
00393 if(!openDataFile()) {
00394 return QList<QChar>();
00395 }
00396 const int detailIndex = getDetailIndex(c);
00397 if(detailIndex == 0) {
00398 return QList<QChar>();
00399 }
00400
00401 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00402 const quint8 count = * (quint8 *)(udata + detailIndex + 26);
00403 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
00404
00405 QList<QChar> seeAlso;
00406
00407 for (int i = 0; i < count; i++) {
00408 seeAlso.append(qFromLittleEndian<quint16> (udata + offset));
00409 offset += 2;
00410 }
00411
00412 return seeAlso;
00413 }
00414
00415 QStringList KCharSelectData::equivalents(const QChar& c)
00416 {
00417 if(!openDataFile()) {
00418 return QStringList();
00419 }
00420 const int detailIndex = getDetailIndex(c);
00421 if(detailIndex == 0) {
00422 return QStringList();
00423 }
00424
00425 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00426 const quint8 count = * (quint8 *)(udata + detailIndex + 21);
00427 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
00428
00429 QStringList equivalents;
00430
00431 const char* data = dataFile.constData();
00432 for (int i = 0; i < count; i++) {
00433 equivalents.append(QString::fromUtf8(data + offset));
00434 offset += strlen(data + offset) + 1;
00435 }
00436
00437 return equivalents;
00438 }
00439
00440 QStringList KCharSelectData::approximateEquivalents(const QChar& c)
00441 {
00442 if(!openDataFile()) {
00443 return QStringList();
00444 }
00445 const int detailIndex = getDetailIndex(c);
00446 if(detailIndex == 0) {
00447 return QStringList();
00448 }
00449
00450 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00451 const quint8 count = * (quint8 *)(udata + detailIndex + 16);
00452 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
00453
00454 QStringList approxEquivalents;
00455
00456 const char* data = dataFile.constData();
00457 for (int i = 0; i < count; i++) {
00458 approxEquivalents.append(QString::fromUtf8(data + offset));
00459 offset += strlen(data + offset) + 1;
00460 }
00461
00462 return approxEquivalents;
00463 }
00464
00465 QStringList KCharSelectData::unihanInfo(const QChar& c)
00466 {
00467 if(!openDataFile()) {
00468 return QStringList();
00469 }
00470
00471 const char* data = dataFile.constData();
00472 const uchar* udata = reinterpret_cast<const uchar*>(data);
00473 const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36);
00474 const quint32 offsetEnd = dataFile.size();
00475
00476 int min = 0;
00477 int mid;
00478 int max = ((offsetEnd - offsetBegin) / 30) - 1;
00479 quint16 unicode = c.unicode();
00480
00481 while (max >= min) {
00482 mid = (min + max) / 2;
00483 const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30);
00484 if (unicode > midUnicode)
00485 min = mid + 1;
00486 else if (unicode < midUnicode)
00487 max = mid - 1;
00488 else {
00489 QStringList res;
00490 for(int i = 0; i < 7; i++) {
00491 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4);
00492 if(offset != 0) {
00493 res.append(QString::fromUtf8(data + offset));
00494 } else {
00495 res.append(QString());
00496 }
00497 }
00498 return res;
00499 }
00500 }
00501
00502 return QStringList();
00503 }
00504
00505 QString KCharSelectData::categoryText(QChar::Category category)
00506 {
00507 switch (category) {
00508 case QChar::Other_Control: return i18n("Other, Control");
00509 case QChar::Other_Format: return i18n("Other, Format");
00510 case QChar::Other_NotAssigned: return i18n("Other, Not Assigned");
00511 case QChar::Other_PrivateUse: return i18n("Other, Private Use");
00512 case QChar::Other_Surrogate: return i18n("Other, Surrogate");
00513 case QChar::Letter_Lowercase: return i18n("Letter, Lowercase");
00514 case QChar::Letter_Modifier: return i18n("Letter, Modifier");
00515 case QChar::Letter_Other: return i18n("Letter, Other");
00516 case QChar::Letter_Titlecase: return i18n("Letter, Titlecase");
00517 case QChar::Letter_Uppercase: return i18n("Letter, Uppercase");
00518 case QChar::Mark_SpacingCombining: return i18n("Mark, Spacing Combining");
00519 case QChar::Mark_Enclosing: return i18n("Mark, Enclosing");
00520 case QChar::Mark_NonSpacing: return i18n("Mark, Non-Spacing");
00521 case QChar::Number_DecimalDigit: return i18n("Number, Decimal Digit");
00522 case QChar::Number_Letter: return i18n("Number, Letter");
00523 case QChar::Number_Other: return i18n("Number, Other");
00524 case QChar::Punctuation_Connector: return i18n("Punctuation, Connector");
00525 case QChar::Punctuation_Dash: return i18n("Punctuation, Dash");
00526 case QChar::Punctuation_Close: return i18n("Punctuation, Close");
00527 case QChar::Punctuation_FinalQuote: return i18n("Punctuation, Final Quote");
00528 case QChar::Punctuation_InitialQuote: return i18n("Punctuation, Initial Quote");
00529 case QChar::Punctuation_Other: return i18n("Punctuation, Other");
00530 case QChar::Punctuation_Open: return i18n("Punctuation, Open");
00531 case QChar::Symbol_Currency: return i18n("Symbol, Currency");
00532 case QChar::Symbol_Modifier: return i18n("Symbol, Modifier");
00533 case QChar::Symbol_Math: return i18n("Symbol, Math");
00534 case QChar::Symbol_Other: return i18n("Symbol, Other");
00535 case QChar::Separator_Line: return i18n("Separator, Line");
00536 case QChar::Separator_Paragraph: return i18n("Separator, Paragraph");
00537 case QChar::Separator_Space: return i18n("Separator, Space");
00538 default: return i18n("Unknown");
00539 }
00540 }
00541
00542 QList<QChar> KCharSelectData::find(const QString& needle)
00543 {
00544 QList<QChar> res;
00545 QStringList searchStrings = needle.simplified().split(' ');
00546
00547 if (searchStrings.count() == 0) {
00548 return res;
00549 }
00550
00551 if(searchStrings.count() == 1 && searchStrings[0].length() == 1) {
00552 res.append(searchStrings[0].at(0));
00553 return res;
00554 }
00555
00556 QRegExp regExp("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$");
00557 foreach(const QString &s, searchStrings) {
00558 if(regExp.exactMatch(s)) {
00559 res.append(regExp.cap(2).toInt(0, 16));
00560 }
00561 }
00562
00563 QString firstString = searchStrings.takeFirst();
00564
00565 const char* data = dataFile.constData();
00566 const uchar* udata = reinterpret_cast<const uchar*>(data);
00567 const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+4);
00568 const quint32 offsetEnd = qFromLittleEndian<quint32>(udata+8);
00569
00570 int max = ((offsetEnd - offsetBegin) / 6) - 1;
00571
00572 for (int i = 0; i <= max; i++) {
00573 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + i*6 + 2);
00574 QString name(data + offset);
00575 if (name.contains(firstString, Qt::CaseInsensitive)) {
00576 bool valid = true;
00577 foreach(const QString &s, searchStrings) {
00578 if (!name.contains(s, Qt::CaseInsensitive)) {
00579 valid = false;
00580 break;
00581 }
00582 }
00583 if (valid) {
00584 res.append(qFromLittleEndian<quint16>(udata + offsetBegin + i*6));
00585 }
00586 }
00587 }
00588 return res;
00589 }