Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

utf8greekaccents.cpp

00001 /******************************************************************************
00002  *
00003  * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents
00004  *
00005  */
00006 
00007 
00008 #include <stdlib.h>
00009 #include <stdio.h>
00010 #include <utf8greekaccents.h>
00011 
00012 
00013 const char UTF8GreekAccents::on[] = "On";
00014 const char UTF8GreekAccents::off[] = "Off";
00015 const char UTF8GreekAccents::optName[] = "Greek Accents";
00016 const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents";
00017 
00018 UTF8GreekAccents::UTF8GreekAccents() {
00019         option = true;
00020         options.push_back(on);
00021         options.push_back(off);
00022 }
00023 
00024 UTF8GreekAccents::~UTF8GreekAccents(){};
00025 
00026 void UTF8GreekAccents::setOptionValue(const char *ival)
00027 {
00028         option = (!stricmp(ival, on));
00029 }
00030 
00031 const char *UTF8GreekAccents::getOptionValue()
00032 {
00033         return (option) ? on:off;
00034 }
00035 
00036 char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
00037 {
00038         if (!option) {
00039     unsigned char *to, *from;
00040 
00041         to = (unsigned char*)text;
00042         for (from = (unsigned char*)text; *from; from++) {
00043           //first just remove combining characters
00044           if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99)
00045             from += 2;
00046           else if (*from == 0xCC && *(from + 1)) {
00047             if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94)
00048               from++;
00049           }
00050           else if (*from == 0xCD && *(from + 1) == 0xBA)
00051             from++;
00052           //now converted pre-composed characters to their alphabetic bases, discarding the accents
00053 
00054           //Greek
00055           //capital alpha
00056           else if ((*from == 0xCE && *(from + 1) == 0x86)) {
00057             *to++ = 0xCE;
00058             *to++ = 0x91;
00059             from++;
00060           }
00061           //capital epsilon
00062           else if ((*from == 0xCE && *(from + 1) == 0x88)) {
00063             *to++ = 0xCE;
00064             *to++ = 0x95;
00065             from++;
00066           }
00067           //capital eta
00068           else if ((*from == 0xCE && *(from + 1) == 0x89)) {
00069             *to++ = 0xCE;
00070             *to++ = 0x97;
00071             from++;
00072           }
00073           //capital iota
00074           else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) {
00075             *to++ = 0xCE;
00076             *to++ = 0x99;
00077             from++;
00078           }
00079           //capital omicron
00080           else if ((*from == 0xCE && *(from + 1) == 0x8C)) {
00081             *to++ = 0xCE;
00082             *to++ = 0x9F;
00083             from++;
00084           }
00085           //capital upsilon
00086           else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) {
00087             *to++ = 0xCE;
00088             *to++ = 0xA5;
00089             from++;
00090           }
00091           //capital omega
00092           else if ((*from == 0xCE && *(from + 1) == 0x8F)) {
00093             *to++ = 0xCE;
00094             *to++ = 0xA9;
00095             from++;
00096           }
00097 
00098           //alpha
00099           else if ((*from == 0xCE && *(from + 1) == 0xAC)) {
00100             *to++ = 0xCE;
00101             *to++ = 0xB1;
00102             from++;
00103           }
00104           //epsilon
00105           else if ((*from == 0xCE && *(from + 1) == 0xAD)) {
00106             *to++ = 0xCE;
00107             *to++ = 0xB5;
00108             from++;
00109           }
00110           //eta
00111           else if ((*from == 0xCE && *(from + 1) == 0xAE)) {
00112             *to++ = 0xCE;
00113             *to++ = 0xB7;
00114             from++;
00115           }
00116           //iota
00117           else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) {
00118             *to++ = 0xCE;
00119             *to++ = 0xB9;
00120             from++;
00121           }
00122           //omicron
00123           else if ((*from == 0xCF && *(from + 1) == 0x8C)) {
00124             *to++ = 0xCE;
00125             *to++ = 0xBF;
00126             from++;
00127           }
00128           //upsilon
00129           else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) {
00130             *to++ = 0xCF;
00131             *to++ = 0x85;
00132             from++;
00133           }
00134           //omega
00135           else if ((*from == 0xCF && *(from + 1) == 0x8E)) {
00136             *to++ = 0xCF;
00137             *to++ = 0x89;
00138             from++;
00139           }
00140 
00141           //Extended Greek
00142           //capital alpha
00143           else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) {
00144             *to++ = 0xCE;
00145             *to++ = 0x91;
00146             from+=2;
00147           }
00148           //capital epsilon
00149           else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) {
00150             *to++ = 0xCE;
00151             *to++ = 0x95;
00152             from+=2;
00153           }
00154           //capital eta
00155           else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) {
00156             *to++ = 0xCE;
00157             *to++ = 0x97;
00158             from+=2;
00159           }
00160           //capital iota
00161           else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) {
00162             *to++ = 0xCE;
00163             *to++ = 0x99;
00164             from+=2;
00165           }
00166           //capital omicron
00167           else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) {
00168             *to++ = 0xCE;
00169             *to++ = 0x9F;
00170             from+=2;
00171           }
00172           //capital upsilon
00173           else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) {
00174             *to++ = 0xCE;
00175             *to++ = 0xA5;
00176             from+=2;
00177           }
00178           //capital omega
00179           else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) {
00180             *to++ = 0xCE;
00181             *to++ = 0xA9;
00182             from+=2;
00183           }
00184           //capital rho
00185           else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) {
00186             *to++ = 0xCE;
00187             *to++ = 0xA1;
00188             from+=2;
00189           }
00190 
00191           //alpha
00192           else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) {
00193             *to++ = 0xCE;
00194             *to++ = 0xB1;
00195             from+=2;
00196           }
00197           //epsilon
00198           else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) {
00199             *to++ = 0xCE;
00200             *to++ = 0xB5;
00201             from+=2;
00202           }
00203           //eta
00204           else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) {
00205             *to++ = 0xCE;
00206             *to++ = 0xB7;
00207             from+=2;
00208           }
00209           //iota
00210           else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) {
00211             *to++ = 0xCE;
00212             *to++ = 0xB9;
00213             from+=2;
00214           }
00215           //omicron
00216           else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) {
00217             *to++ = 0xCE;
00218             *to++ = 0xBF;
00219             from+=2;
00220           }
00221           //upsilon
00222           else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) {
00223             *to++ = 0xCF;
00224             *to++ = 0x85;
00225             from+=2;
00226           }
00227           //omega
00228           else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) {
00229             *to++ = 0xCF;
00230             *to++ = 0x89;
00231             from+=2;
00232           }
00233           //rho
00234           else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) {
00235             *to++ = 0xCF;
00236             *to++ = 0x81;
00237             from+=2;
00238           }
00239           else
00240             *to++ = *from;
00241         }
00242         *to++ = 0;
00243         *to = 0;
00244      }
00245         return 0;
00246 }
00247 
00248 
00249 
00250 
00251 
00252 

Generated on Thu Jun 20 22:13:01 2002 for The Sword Project by doxygen1.2.15