Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

utf8latin1.cpp

00001 /******************************************************************************
00002  *
00003  * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1
00004  *
00005  */
00006 
00007 #include <stdlib.h>
00008 #include <stdio.h>
00009 
00010 #include <utf8latin1.h>
00011 
00012 UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) {
00013 }
00014 
00015 
00016 char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
00017 {
00018   unsigned char *from;
00019   unsigned short *to;
00020 
00021   int len;
00022   unsigned long uchar;
00023   unsigned char significantFirstBits, subsequent;
00024   
00025   len = strlen(text) + 1;                                               // shift string to right of buffer
00026   if (len < maxlen) {
00027     memmove(&text[maxlen - len], text, len);
00028     from = (unsigned char*)&text[maxlen - len];
00029   }
00030   else
00031     from = (unsigned char*)text;
00032   
00033   
00034   // -------------------------------
00035   
00036   for (to = (unsigned short*)text; *from; from++) {
00037     uchar = 0;
00038     if ((*from & 128) != 128) {
00039       //                if (*from != ' ')
00040       uchar = *from;
00041     }
00042     else if ((*from & 128) && ((*from & 64) != 64)) {
00043       // error, do nothing
00044       continue;
00045     }
00046     else {
00047       *from <<= 1;
00048       for (subsequent = 1; (*from & 128); subsequent++) {
00049         *from <<= 1;
00050         from[subsequent] &= 63;
00051         uchar <<= 6;
00052         uchar |= from[subsequent];
00053       }
00054       subsequent--;
00055       *from <<=1;
00056       significantFirstBits = 8 - (2+subsequent);
00057       
00058       uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
00059       from += subsequent;
00060     }
00061 
00062     if (uchar < 0xff) {
00063         *to++ = (unsigned char)uchar;
00064     }
00065     else {
00066         *to++ = replacementChar;
00067     }
00068   }
00069   *to++ = 0;
00070   *to = 0;
00071 
00072   return 0;
00073 }
00074 

Generated on Thu Jun 20 22:13:01 2002 for The Sword Project by doxygen1.2.15