KDECore
textbreaks.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "textbreaks.h"
00021
00022 namespace Sonnet
00023 {
00024
00025 QHash<quint32,qint8> TextBreaks::data;
00026 QHash<QString,qint8> TextBreaks::catalog;
00027
00028 TextBreaks::TextBreaks(const QString &text)
00029 : m_text(text)
00030 {
00031 init();
00032 }
00033
00034 void TextBreaks::init()
00035 {
00036 if ( catalog.size() || data.size() )
00037 return;
00038
00039 QFile sin(":/ucd/all.map");
00040 sin.open(QIODevice::ReadOnly);
00041
00042 QDataStream in(&sin);
00043
00044 QString cat;
00045 quint8 catKey;
00046 quint32 keys, code;
00047
00048 in >> keys;
00049 while (keys) {
00050 in >> catKey >> cat;
00051 catalog.insert(cat,catKey);
00052 --keys;
00053 }
00054
00055 in >> keys;
00056 while (keys) {
00057 in >> code >> catKey;
00058 data.insert(code,catKey);
00059 --keys;
00060 }
00061 }
00062
00063 QString TextBreaks::text() const
00064 {
00065 return m_text;
00066 }
00067
00068 void TextBreaks::setText(const QString &text)
00069 {
00070 m_text = text;
00071 }
00072
00073 TextBreaks::Positions TextBreaks::graphemeBreaks(const QString &text)
00074 {
00075 init();
00076 Q_UNUSED(text);
00077 return Positions();
00078 }
00079
00080 TextBreaks::Positions TextBreaks::wordBreaks(const QString &text)
00081 {
00082 init();
00083 const QChar * grapheme = text.constData();
00084 Positions breaks;
00085
00086 if (text.isEmpty())
00087 return breaks;
00088
00089 breaks.append(0);
00090
00091 bool bk = true;
00092 int pos(1);
00093 while (!grapheme->isNull())
00094 {
00095 qint8 catagory0( data[(quint32)(grapheme-1)->unicode()] );
00096 qint8 catagory( data[(quint32)grapheme->unicode()] );
00097 qint8 catagory2( data[(quint32)(grapheme+1)->unicode()] );
00098 qint8 catagory3(0);
00099 if ( ( !(grapheme+1)->isNull() )
00100 &&
00101 ( !(grapheme+2)->isNull() )
00102 )
00103 catagory3 = data[(quint32)(grapheme+2)->unicode()];
00104
00105
00106
00107
00108 if ( ( catagory == catalog["ALetter"] )
00109 &&
00110 ( catagory2 == catalog["ALetter"] )
00111 )
00112 {
00113
00114 bk=false;
00115 }
00116
00117
00118 if ( ( catagory == catalog["ALetter"] )
00119 &&
00120 ( catagory2 == catalog["MidLetter"] )
00121 &&
00122 ( catagory3 == catalog["ALetter"] )
00123 )
00124 {
00125
00126 bk=false;
00127 }
00128
00129
00130 if ( ( catagory0 == catalog["ALetter"] )
00131 &&
00132 ( catagory == catalog["MidLetter"] )
00133 &&
00134 ( catagory2 == catalog["ALetter"] )
00135 )
00136 {
00137
00138 bk=false;
00139 }
00140
00141
00142 if ( ( catagory == catalog["Numeric"] )
00143 &&
00144 ( catagory2 == catalog["Numeric"] )
00145 )
00146 {
00147
00148 bk=false;
00149 }
00150
00151
00152 if ( ( catagory == catalog["ALetter"] )
00153 &&
00154 ( catagory2 == catalog["Numeric"] )
00155 )
00156 {
00157
00158 bk=false;
00159 }
00160
00161
00162 if ( ( catagory == catalog["Numeric"] )
00163 &&
00164 ( catagory2 == catalog["ALetter"] )
00165 )
00166 {
00167
00168 bk=false;
00169 }
00170
00171
00172 if ( ( catagory == catalog["Numeric"] )
00173 &&
00174 ( catagory2 == catalog["MidNum"] )
00175 &&
00176 ( catagory3 == catalog["Numeric"] )
00177 )
00178 {
00179
00180 bk=false;
00181 }
00182
00183
00184 if ( ( catagory0 == catalog["Numeric"] )
00185 &&
00186 ( catagory == catalog["MidNum"] )
00187 &&
00188 ( catagory2 == catalog["Numeric"] )
00189 )
00190 {
00191
00192 bk=false;
00193 }
00194
00195
00196 if ( ( catagory == catalog["Katakana"] )
00197 &&
00198 ( catagory2 == catalog["Katakana"] )
00199 )
00200 {
00201
00202 bk=false;
00203 }
00204
00205
00206 if ( ( catagory == catalog["ALetter"] ||
00207 catagory == catalog["Numeric"] ||
00208 catagory == catalog["Katakana"] ||
00209 catagory == catalog["ExtendNumLet"] )
00210 &&
00211 ( catagory2 == catalog["ExtendNumLet"] )
00212 )
00213 {
00214
00215 bk=false;
00216 }
00217
00218
00219 if ( ( catagory == catalog["ExtendNumLet"] )
00220 &&
00221 ( catagory2 == catalog["ALetter"] ||
00222 catagory2 == catalog["Numeric"] ||
00223 catagory2 == catalog["Katakana"] )
00224 )
00225 {
00226
00227 bk=false;
00228 }
00229
00230
00231
00232 if (bk)
00233 breaks.append(pos);
00234 bk=true;
00235
00236 ++pos;
00237 ++grapheme;
00238 }
00239
00240
00241
00242 QString modText = text;
00243 int off(0);
00244 foreach(pos,breaks)
00245 {
00246 modText.insert(pos+off, '|');
00247 ++off;
00248 }
00249
00250 qDebug() << modText;
00251
00252
00253 return Positions();
00254 }
00255
00256 TextBreaks::Positions TextBreaks::sentenceBreaks(const QString &text)
00257 {
00258 init();
00259 Q_UNUSED(text);
00260 return Positions();
00261 }
00262 TextBreaks::Positions TextBreaks::graphemeBreaks() const
00263 {
00264 return graphemeBreaks(m_text);
00265 }
00266
00267 TextBreaks::Positions TextBreaks::wordBreaks() const
00268 {
00269 return wordBreaks(m_text);
00270 }
00271
00272 TextBreaks::Positions TextBreaks::sentenceBreaks() const
00273 {
00274 return sentenceBreaks(m_text);
00275 }
00276
00277 }