Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

rawstr.cpp

00001 /******************************************************************************
00002  *  rawstr.cpp   - code for class 'RawStr'- a module that reads raw text
00003  *                              files:  ot and nt using indexs ??.bks ??.cps ??.vss
00004  *                              and provides lookup and parsing functions based on
00005  *                              class StrKey
00006  */
00007 
00008 
00009 #include <stdio.h>
00010 #include <fcntl.h>
00011 #include <errno.h>
00012 
00013 #ifndef __GNUC__
00014 #include <io.h>
00015 #else
00016 #include <unistd.h>
00017 #endif
00018 
00019 #include <string.h>
00020 #include <stdlib.h>
00021 #include <utilfuns.h>
00022 #include <rawstr.h>
00023 #include <sysdata.h>
00024 /******************************************************************************
00025  * RawStr Statics
00026  */
00027 
00028 int RawStr::instance = 0;
00029 char RawStr::nl = '\n';
00030 
00031 
00032 /******************************************************************************
00033  * RawStr Constructor - Initializes data for instance of RawStr
00034  *
00035  * ENT: ipath - path of the directory where data and index files are located.
00036  *              be sure to include the trailing separator (e.g. '/' or '\')
00037  *              (e.g. 'modules/texts/rawtext/webster/')
00038  */
00039 
00040 RawStr::RawStr(const char *ipath, int fileMode)
00041 {
00042         char buf[127];
00043 
00044         lastoff = -1;
00045         path = 0;
00046         stdstr(&path, ipath);
00047 
00048 #ifndef O_BINARY                // O_BINARY is needed in Borland C++ 4.53
00049 #define O_BINARY 0              // If it hasn't been defined than we probably
00050 #endif                          // don't need it.
00051 
00052         if (fileMode == -1) { // try read/write if possible
00053                 fileMode = O_RDWR;
00054         }
00055                 
00056         sprintf(buf, "%s.idx", path);
00057         idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true);
00058 
00059         sprintf(buf, "%s.dat", path);
00060         datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true);
00061 
00062         if (datfd < 0) {
00063                 sprintf(buf, "Error: %d", errno);
00064                 perror(buf);
00065         }
00066 
00067         instance++;
00068 }
00069 
00070 
00071 /******************************************************************************
00072  * RawStr Destructor - Cleans up instance of RawStr
00073  */
00074 
00075 RawStr::~RawStr()
00076 {
00077         if (path)
00078                 delete [] path;
00079 
00080         --instance;
00081 
00082         FileMgr::systemFileMgr.close(idxfd);
00083         FileMgr::systemFileMgr.close(datfd);
00084 }
00085 
00086 
00087 /******************************************************************************
00088  * RawStr::getidxbufdat - Gets the index string at the given idx offset
00089  *                                              NOTE: buf is allocated and must be freed by
00090  *                                                      calling function
00091  *
00092  * ENT: ioffset - offset in dat file to lookup
00093  *              buf             - address of pointer to allocate for storage of string
00094  */
00095 
00096 void RawStr::getidxbufdat(long ioffset, char **buf)
00097 {
00098         int size;
00099         char ch;
00100         if (datfd > 0) {
00101                 lseek(datfd->getFd(), ioffset, SEEK_SET);
00102                 for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) {
00103                         if ((ch == '\\') || (ch == 10) || (ch == 13))
00104                                 break;
00105                 }
00106                 *buf = (*buf) ? (char *)realloc(*buf, size + 1) : (char *)malloc(size + 1);
00107                 if (size) {
00108                         lseek(datfd->getFd(), ioffset, SEEK_SET);
00109                         read(datfd->getFd(), *buf, size);
00110                 }
00111                 (*buf)[size] = 0;
00112                 for (size--; size > 0; size--)
00113                         (*buf)[size] = SW_toupper((*buf)[size]);
00114         }
00115         else {
00116                 *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
00117                 **buf = 0;
00118         }
00119 }
00120 
00121 
00122 /******************************************************************************
00123  * RawStr::getidxbuf    - Gets the index string at the given idx offset
00124  *                                              NOTE: buf is allocated and must be freed by
00125  *                                                      calling function
00126  *
00127  * ENT: ioffset - offset in idx file to lookup
00128  *              buf             - address of pointer to allocate for storage of string
00129  */
00130 
00131 void RawStr::getidxbuf(long ioffset, char **buf)
00132 {
00133         char *trybuf, *targetbuf;
00134         long offset;
00135         
00136         if (idxfd > 0) {
00137                 lseek(idxfd->getFd(), ioffset, SEEK_SET);
00138                 read(idxfd->getFd(), &offset, 4);
00139 
00140                 offset = swordtoarch32(offset);
00141 
00142                 getidxbufdat(offset, buf);
00143                 for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) {
00144 /*
00145                         if (*trybuf == '-') {           // ignore '-' because alphabetized silly in file
00146                                 targetbuf--;
00147                                 continue;
00148                         }
00149 */
00150                         *targetbuf = SW_toupper(*trybuf);
00151                 }
00152                 *targetbuf = 0;
00153                 trybuf = 0;
00154         }
00155 }
00156 
00157 
00158 /******************************************************************************
00159  * RawStr::findoffset   - Finds the offset of the key string from the indexes
00160  *
00161  * ENT: key             - key string to lookup
00162  *              start   - address to store the starting offset
00163  *              size            - address to store the size of the entry
00164  *              away            - number of entries before of after to jump
00165  *                                      (default = 0)
00166  *
00167  * RET: error status
00168  */
00169 
00170 signed char RawStr::findoffset(const char *ikey, long *start, unsigned short *size, long away, long *idxoff)
00171 {
00172         char *trybuf, *targetbuf, *key, quitflag = 0;
00173         signed char retval = 0;
00174         long headoff, tailoff, tryoff = 0, maxoff = 0;
00175 
00176         if (idxfd->getFd() >=0) {
00177                 tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 6;
00178                 if (*ikey) {
00179                         headoff = 0;
00180 
00181                         key = new char [ strlen(ikey) + 1 ];
00182                         strcpy(key, ikey);
00183 
00184                         for (trybuf = targetbuf = key; *trybuf; trybuf++, targetbuf++) {
00185         /*
00186                                 if (*trybuf == '-') {           // ignore '-' because alphabetized silly in file
00187                                         targetbuf--;
00188                                         continue;
00189                                 }
00190         */
00191                                 *targetbuf = SW_toupper(*trybuf);
00192                         }
00193                         *targetbuf = 0;
00194                         trybuf = 0;
00195 
00196                         while (headoff < tailoff) {
00197                                 tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff; 
00198                                 lastoff = -1;
00199                                 getidxbuf(tryoff, &trybuf);
00200 
00201                                 if (!*trybuf && tryoff) {               // In case of extra entry at end of idx (not first entry)
00202                                         tryoff += (tryoff > (maxoff / 2))?-6:6;
00203                                         retval = -1;
00204                                         break;
00205                                 }
00206                                         
00207                                 if (!strcmp(key, trybuf))
00208                                         break;
00209 
00210                                 int diff = strcmp(key, trybuf);
00211                                 if (diff < 0)
00212                                         tailoff = (tryoff == headoff) ? headoff : tryoff;
00213                                 else headoff = tryoff;
00214                                 if (tailoff == headoff + 6) {
00215                                         if (quitflag++)
00216                                                 headoff = tailoff;
00217                                 }
00218                         }
00219                         if (headoff >= tailoff)
00220                                 tryoff = headoff;
00221                         if (trybuf)
00222                                 free(trybuf);
00223                         delete [] key;
00224                 }
00225                 else    tryoff = 0;
00226 
00227                 lseek(idxfd->getFd(), tryoff, SEEK_SET);
00228 
00229                 *start = *size = 0;
00230                 read(idxfd->getFd(), start, 4);
00231                 read(idxfd->getFd(), size, 2);
00232                 if (idxoff)
00233                         *idxoff = tryoff;
00234 
00235                 *start = swordtoarch32(*start);
00236                 *size  = swordtoarch16(*size);
00237 
00238                 while (away) {
00239                         long laststart = *start;
00240                         unsigned short lastsize = *size;
00241                         long lasttry = tryoff;
00242                         tryoff += (away > 0) ? 6 : -6;
00243                 
00244                         bool bad = false;
00245                         if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6)))
00246                                 bad = true;
00247                         else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0)
00248                                 bad = true;
00249                         if (bad) {
00250                                 retval = -1;
00251                                 *start = laststart;
00252                                 *size = lastsize;
00253                                 tryoff = lasttry;
00254                                 if (idxoff)
00255                                         *idxoff = tryoff;
00256                                 break;
00257                         }
00258                         read(idxfd->getFd(), start, 4);
00259                         read(idxfd->getFd(), size, 2);
00260                         if (idxoff)
00261                                 *idxoff = tryoff;
00262 
00263                         *start = swordtoarch32(*start);
00264                         *size  = swordtoarch16(*size);
00265 
00266                         if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) 
00267                                 away += (away < 0) ? 1 : -1;
00268                 }
00269         
00270                 lastoff = tryoff;
00271         }
00272         else {
00273                 *start = 0;
00274                 *size  = 0;
00275                 if (idxoff)
00276                         *idxoff = 0;
00277                 retval = -1;
00278         }
00279         return retval;
00280 }
00281 
00282 
00283 /******************************************************************************
00284  * RawStr::preptext     - Prepares the text before returning it to external
00285  *                                      objects
00286  *
00287  * ENT: buf     - buffer where text is stored and where to store the prep'd
00288  *                              text.
00289  */
00290 
00291 void RawStr::preptext(char *buf) {
00292         char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0;
00293 
00294         for (to = from = buf; *from; from++) {
00295                 switch (*from) {
00296                 case 10:
00297                         if (!realdata)
00298                                 continue;
00299                         space = (cr) ? 0 : 1;
00300                         cr = 0;
00301                         nlcnt++;
00302                         if (nlcnt > 1) {
00303 //                              *to++ = nl;
00304                                 *to++ = nl;
00305 //                              nlcnt = 0;
00306                         }
00307                         continue;
00308                 case 13:
00309                         if (!realdata)
00310                                 continue;
00311                         *to++ = nl;
00312                         space = 0;
00313                         cr = 1;
00314                         continue;
00315                 }
00316                 realdata = 1;
00317                 nlcnt = 0;
00318                 if (space) {
00319                         space = 0;
00320                         if (*from != ' ') {
00321                                 *to++ = ' ';
00322                                 from--;
00323                                 continue;
00324                         }
00325                 }
00326                 *to++ = *from;
00327         }
00328         *to = 0;
00329 
00330         while (to > (buf+1)) {                  // remove trailing excess
00331                 to--;
00332                 if ((*to == 10) || (*to == ' '))
00333                         *to = 0;
00334                 else break;
00335         }
00336 }
00337 
00338 
00339 /******************************************************************************
00340  * RawStr::gettext      - gets text at a given offset
00341  *
00342  * ENT:
00343  *      start   - starting offset where the text is located in the file
00344  *      size            - size of text entry
00345  *      buf             - buffer to store text
00346  *
00347  */
00348 
00349 void RawStr::gettext(long istart, unsigned short isize, char *idxbuf, char *buf)
00350 {
00351         char *ch;
00352         char *idxbuflocal = 0;
00353         getidxbufdat(istart, &idxbuflocal);
00354         long start = istart;
00355         unsigned short size = isize;
00356 
00357         do {
00358                 memset(buf, 0, size);
00359                 lseek(datfd->getFd(), start, SEEK_SET);
00360                 read(datfd->getFd(), buf, (int)(size - 2));
00361 
00362                 for (ch = buf; *ch; ch++) {             // skip over index string
00363                         if (*ch == 10) {
00364                                 ch++;
00365                                 break;
00366                         }
00367                 }
00368                 size -= (unsigned short)(ch-buf);
00369                 memmove(buf, ch, size);
00370                 buf[size] = 0;
00371                 buf[size+1] = 0;
00372 
00373                 // resolve link
00374                 if (!strncmp(buf, "@LINK", 5)) {
00375                         for (ch = buf; *ch; ch++) {             // null before nl
00376                                 if (*ch == 10) {
00377                                         *ch = 0;
00378                                         break;
00379                                 }
00380                         }
00381                         findoffset(buf + 6, &start, &size);
00382                         // TODO: FIX!  THIS IS WRONG!!!  buf is not reallocated for the appropriate size!
00383                 }
00384                 else break;
00385         }
00386         while (true);   // while we're resolving links
00387 
00388         if (idxbuflocal) {
00389                 int localsize = strlen(idxbuflocal);
00390                 localsize = (localsize < (size - 1)) ? localsize : (size - 1);
00391                 strncpy(idxbuf, idxbuflocal, localsize);
00392                 idxbuf[localsize] = 0;
00393                 free(idxbuflocal);
00394         }
00395 }
00396 
00397 
00398 /******************************************************************************
00399  * RawLD::settext       - Sets text for current offset
00400  *
00401  * ENT: key     - key for this entry
00402  *      buf     - buffer to store
00403  *      len     - length of buffer (0 - null terminated)
00404  */
00405 
00406 void RawStr::settext(const char *ikey, const char *buf, long len)
00407 {
00408 
00409         long start, outstart;
00410         long idxoff;
00411         long endoff;
00412         long shiftSize;
00413         unsigned short size;
00414         unsigned short outsize;
00415         static const char nl[] = {13, 10};
00416         char *tmpbuf = 0;
00417         char *key = 0;
00418         char *dbKey = 0;
00419         char *idxBytes = 0;
00420         char *outbuf = 0;
00421         char *ch = 0;
00422 
00423         findoffset(ikey, &start, &size, 0, &idxoff);
00424         stdstr(&key, ikey);
00425         for (ch = key; *ch; ch++)
00426                 *ch = SW_toupper(*ch);
00427         ch = 0;
00428 
00429         getidxbufdat(start, &dbKey);
00430 
00431         if (strcmp(key, dbKey) < 0) {
00432         }
00433         else if (strcmp(key, dbKey) > 0) {
00434                 idxoff += 6;
00435         } else if ((!strcmp(key, dbKey)) && (len || strlen(buf) /*we're not deleting*/)) { // got absolute entry
00436                 do {
00437                         tmpbuf = new char [ size + 2 ];
00438                         memset(tmpbuf, 0, size + 2);
00439                         lseek(datfd->getFd(), start, SEEK_SET);
00440                         read(datfd->getFd(), tmpbuf, (int)(size - 1));
00441 
00442                         for (ch = tmpbuf; *ch; ch++) {          // skip over index string
00443                                 if (*ch == 10) {
00444                                         ch++;
00445                                         break;
00446                                 }
00447                         }
00448                         memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf));
00449 
00450                         // resolve link
00451                         if (!strncmp(tmpbuf, "@LINK", 5) && (len ? len : strlen(buf))) {
00452                                 for (ch = tmpbuf; *ch; ch++) {          // null before nl
00453                                         if (*ch == 10) {
00454                                                 *ch = 0;
00455                                                 break;
00456                                         }
00457                                 }
00458                                 findoffset(tmpbuf + 6, &start, &size, 0, &idxoff);
00459                         }
00460                         else break;
00461                 }
00462                 while (true);   // while we're resolving links
00463         }
00464 
00465         endoff = lseek(idxfd->getFd(), 0, SEEK_END);
00466 
00467         shiftSize = endoff - idxoff;
00468 
00469         if (shiftSize > 0) {
00470                 idxBytes = new char [ shiftSize ];
00471                 lseek(idxfd->getFd(), idxoff, SEEK_SET);
00472                 read(idxfd->getFd(), idxBytes, shiftSize);
00473         }
00474 
00475         outbuf = new char [ (len ? len : strlen(buf)) + strlen(key) + 5 ];
00476         sprintf(outbuf, "%s%c%c", key, 13, 10);
00477         size = strlen(outbuf);
00478         memcpy (outbuf + size, buf, len ? len : strlen(buf));
00479         size = outsize = size + (len ? len : strlen(buf));
00480 
00481         start = outstart = lseek(datfd->getFd(), 0, SEEK_END);
00482 
00483         outstart = archtosword32(start);
00484         outsize  = archtosword16(size);
00485 
00486         lseek(idxfd->getFd(), idxoff, SEEK_SET);
00487         if (len ? len : strlen(buf)) {
00488                 lseek(datfd->getFd(), start, SEEK_SET);
00489                 write(datfd->getFd(), outbuf, (int)size);
00490 
00491                 // add a new line to make data file easier to read in an editor
00492                 write(datfd->getFd(), &nl, 2);
00493                 
00494                 write(idxfd->getFd(), &outstart, 4);
00495                 write(idxfd->getFd(), &outsize, 2);
00496                 if (idxBytes) {
00497                         write(idxfd->getFd(), idxBytes, shiftSize);
00498                         delete [] idxBytes;
00499                 }
00500         }
00501         else {  // delete entry
00502                 if (idxBytes) {
00503                         write(idxfd->getFd(), idxBytes+6, shiftSize-6);
00504                         lseek(idxfd->getFd(), -1, SEEK_CUR);    // last valid byte
00505                         FileMgr::systemFileMgr.trunc(idxfd);    // truncate index
00506                         delete [] idxBytes;
00507                 }
00508         }
00509 
00510         delete [] key;
00511         delete [] outbuf;
00512         free(dbKey);
00513 }
00514 
00515 
00516 /******************************************************************************
00517  * RawLD::linkentry     - links one entry to another
00518  *
00519  * ENT: testmt  - testament to find (0 - Bible/module introduction)
00520  *      destidxoff      - dest offset into .vss
00521  *      srcidxoff               - source offset into .vss
00522  */
00523 
00524 void RawStr::linkentry(const char *destkey, const char *srckey) {
00525         char *text = new char [ strlen(destkey) + 7 ];
00526         sprintf(text, "@LINK %s", destkey);
00527         settext(srckey, text);
00528         delete [] text;
00529 }
00530 
00531 
00532 /******************************************************************************
00533  * RawLD::CreateModule  - Creates new module files
00534  *
00535  * ENT: path    - directory to store module files
00536  * RET: error status
00537  */
00538 
00539 signed char RawStr::createModule(const char *ipath)
00540 {
00541         char *path = 0;
00542         char *buf = new char [ strlen (ipath) + 20 ];
00543         FileDesc *fd, *fd2;
00544 
00545         stdstr(&path, ipath);
00546 
00547         if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
00548                 path[strlen(path)-1] = 0;
00549 
00550         sprintf(buf, "%s.dat", path);
00551         unlink(buf);
00552         fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
00553         fd->getFd();
00554         FileMgr::systemFileMgr.close(fd);
00555 
00556         sprintf(buf, "%s.idx", path);
00557         unlink(buf);
00558         fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
00559         fd2->getFd();
00560         FileMgr::systemFileMgr.close(fd2);
00561 
00562         delete [] path;
00563         
00564         return 0;
00565 }

Generated on Thu Jun 20 22:13:00 2002 for The Sword Project by doxygen1.2.15