• Skip to content
  • Skip to link menu
KDE 4.1 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

textbreaks.cpp

Go to the documentation of this file.
00001 /*  This file is part of the KDE libraries
00002     Copyright (c) 2006 Jacob R Rideout <kde@jacobrideout.net>
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public License
00015     along with this library; see the file COPYING.LIB.  If not, write to
00016     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017     Boston, MA 02110-1301, USA.
00018 */
00019 
00020 #include "textbreaks.h"
00021 
00022 namespace Sonnet
00023 {
00024 
00025 QHash<quint32,qint8> TextBreaks::data;
00026 QHash<QString,qint8> TextBreaks::catalog;
00027 
00028 TextBreaks::TextBreaks(const QString &text)
00029     : m_text(text)
00030 {
00031     init();
00032 }
00033 
00034 void TextBreaks::init()
00035 {
00036     if ( catalog.size() || data.size() )
00037         return;
00038 
00039     QFile sin(":/ucd/all.map");
00040     sin.open(QIODevice::ReadOnly);
00041 
00042     QDataStream in(&sin);
00043 
00044     QString cat;
00045     quint8 catKey;
00046     quint32 keys, code;
00047 
00048     in >> keys;
00049     while (keys) {
00050         in >> catKey >> cat;
00051         catalog.insert(cat,catKey);
00052         --keys;
00053     }
00054 
00055     in >> keys;
00056     while (keys) {
00057         in >> code >> catKey;
00058         data.insert(code,catKey);
00059         --keys;
00060     }
00061 }
00062 
00063 QString TextBreaks::text() const
00064 {
00065     return    m_text;
00066 }
00067 
00068 void TextBreaks::setText(const QString &text)
00069 {
00070     m_text = text;
00071 }
00072 
00073 TextBreaks::Positions TextBreaks::graphemeBreaks(const QString &text)
00074 {
00075     init();
00076     Q_UNUSED(text);
00077     return Positions();
00078 }
00079 
00080 TextBreaks::Positions TextBreaks::wordBreaks(const QString &text)
00081 {
00082     init();
00083     const QChar * grapheme  = text.constData();
00084     Positions breaks;
00085 
00086     if (text.isEmpty())
00087         return breaks;
00088 
00089     breaks.append(0); //WB1
00090 
00091     bool bk = true;
00092     int pos(1);
00093     while (!grapheme->isNull())
00094     {
00095         qint8 catagory0( data[(quint32)(grapheme-1)->unicode()] );
00096         qint8 catagory( data[(quint32)grapheme->unicode()] );
00097         qint8 catagory2( data[(quint32)(grapheme+1)->unicode()] );
00098         qint8 catagory3(0);
00099         if ( ( !(grapheme+1)->isNull() )
00100                 &&
00101                 ( !(grapheme+2)->isNull() )
00102            )
00103             catagory3 = data[(quint32)(grapheme+2)->unicode()];
00104 
00105         // start rule based checking
00106 
00107         //WB5
00108         if ( ( catagory == catalog["ALetter"] )
00109                 &&
00110                 ( catagory2 == catalog["ALetter"] )
00111            )
00112         {
00113             //qDebug() << "WB5";
00114             bk=false;
00115         }
00116 
00117         //WB6
00118         if ( ( catagory == catalog["ALetter"] )
00119                 &&
00120                 ( catagory2 == catalog["MidLetter"] )
00121                 &&
00122                 ( catagory3 == catalog["ALetter"] )
00123            )
00124         {
00125             //qDebug() << "WB6";
00126             bk=false;
00127         }
00128 
00129         //WB7
00130         if ( ( catagory0 == catalog["ALetter"] )
00131                 &&
00132                 ( catagory == catalog["MidLetter"] )
00133                 &&
00134                 ( catagory2 == catalog["ALetter"] )
00135            )
00136         {
00137             //qDebug() << "WB7";
00138             bk=false;
00139         }
00140 
00141         //WB8
00142         if ( ( catagory == catalog["Numeric"] )
00143                 &&
00144                 ( catagory2 == catalog["Numeric"] )
00145            )
00146         {
00147             //qDebug() << "WB8";
00148             bk=false;
00149         }
00150 
00151         //WB9
00152         if ( ( catagory == catalog["ALetter"] )
00153                 &&
00154                 ( catagory2 == catalog["Numeric"] )
00155            )
00156         {
00157             //qDebug() << "WB9";
00158             bk=false;
00159         }
00160 
00161         //WB10
00162         if ( ( catagory == catalog["Numeric"] )
00163                 &&
00164                 ( catagory2 == catalog["ALetter"] )
00165            )
00166         {
00167             //qDebug() << "WB10";
00168             bk=false;
00169         }
00170 
00171         //WB11
00172         if ( ( catagory == catalog["Numeric"] )
00173                 &&
00174                 ( catagory2 == catalog["MidNum"] )
00175                 &&
00176                 ( catagory3 == catalog["Numeric"] )
00177            )
00178         {
00179             //qDebug() << "WB11";
00180             bk=false;
00181         }
00182 
00183         //WB12
00184         if ( ( catagory0 == catalog["Numeric"] )
00185                 &&
00186                 ( catagory == catalog["MidNum"] )
00187                 &&
00188                 ( catagory2 == catalog["Numeric"] )
00189            )
00190         {
00191             //qDebug() << "WB12";
00192             bk=false;
00193         }
00194 
00195         //WB13
00196         if ( ( catagory == catalog["Katakana"] )
00197                 &&
00198                 ( catagory2 == catalog["Katakana"] )
00199            )
00200         {
00201             //qDebug() << "WB13";
00202             bk=false;
00203         }
00204 
00205         //WB13a
00206         if ( ( catagory == catalog["ALetter"] ||
00207                 catagory == catalog["Numeric"] ||
00208                 catagory == catalog["Katakana"] ||
00209                 catagory == catalog["ExtendNumLet"] )
00210                 &&
00211                 ( catagory2 == catalog["ExtendNumLet"] )
00212            )
00213         {
00214             //qDebug() << "WB13a";
00215             bk=false;
00216         }
00217 
00218         //WB13b
00219         if ( ( catagory == catalog["ExtendNumLet"] )
00220                 &&
00221                 ( catagory2 == catalog["ALetter"] ||
00222                   catagory2 == catalog["Numeric"] ||
00223                   catagory2 == catalog["Katakana"] )
00224            )
00225         {
00226             //qDebug() << "WB13b";
00227             bk=false;
00228         }
00229 
00230         //  qDebug() << "cat="<< data[(quint32)grapheme->unicode()];
00231 
00232         if (bk)
00233             breaks.append(pos);
00234         bk=true; // WB14
00235 
00236         ++pos;
00237         ++grapheme;
00238     }
00239     //WB2
00240     //qDebug() << "WB2";
00241 
00242     QString modText = text;
00243     int off(0);
00244     foreach(pos,breaks)
00245     {
00246         modText.insert(pos+off, '|');
00247         ++off;
00248     }
00249     //qDebug() << text;
00250     qDebug() << modText;
00251     //qDebug() << "==============";
00252 
00253     return Positions();
00254 }
00255 
00256 TextBreaks::Positions TextBreaks::sentenceBreaks(const QString &text)
00257 {
00258     init();
00259     Q_UNUSED(text);
00260     return Positions();
00261 }
00262 TextBreaks::Positions TextBreaks::graphemeBreaks() const
00263 {
00264     return graphemeBreaks(m_text);
00265 }
00266 
00267 TextBreaks::Positions TextBreaks::wordBreaks() const
00268 {
00269     return wordBreaks(m_text);
00270 }
00271 
00272 TextBreaks::Positions TextBreaks::sentenceBreaks() const
00273 {
00274     return sentenceBreaks(m_text);
00275 }
00276 
00277 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • KIO
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • Kross
  • KUtils
  • Nepomuk
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.5.4
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal