kpilot Library API Documentation

makedoc9.cc

00001 // based on: MakeDoc, version 2 00002 // I only took the tBuf class from there and adapted it. 00003 // 00004 // Compresses text files into a format that is ready to export to a Pilot 00005 // and work with Rick Bram's PilotDOC reader. 00006 // Copyright (C) Reinhold Kainhofer, 2002 00007 // Copyrigth (C) Pat Beirne, 2000 00008 // 00009 // Original file (makedoc9.cpp) copyright by: 00010 // Copyright (C) Pat Beirne, 2000. 00011 // Distributable under the GNU General Public License Version 2 or later. 00012 // 00013 // ver 0.6 enforce 31 char limit on database names 00014 // ver 0.7 change header and record0 to structs 00015 // ver 2.0 added category control on the command line 00016 // changed extensions from .prc to .pdb 00017 00018 /* 00019 ** This program is free software; you can redistribute it and/or modify 00020 ** it under the terms of the GNU General Public License as published by 00021 ** the Free Software Foundation; either version 2 of the License, or 00022 ** (at your option) any later version. 00023 ** 00024 ** This program is distributed in the hope that it will be useful, 00025 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00026 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00027 ** GNU General Public License for more details. 00028 ** 00029 ** You should have received a copy of the GNU General Public License 00030 ** along with this program in a file called COPYING; if not, write to 00031 ** the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, 00032 ** MA 02111-1307, USA. 00033 */ 00034 00035 00036 #include <stdio.h> 00037 #include <stdlib.h> 00038 #include <string.h> 00039 00040 #include <iostream> 00041 00042 00043 #include "makedoc9.h" 00044 00045 00046 00047 // 00048 // Issue() 00049 // 00050 // action: handle the details of writing a single 00051 // character to the compressed stream 00052 // 00053 unsigned 00054 tBuf::Issue(byte src, int &bSpace) 00055 { 00056 unsigned int iDest = len; 00057 byte *dest = buf; 00058 00059 // TODO: which of the if parts should really be included??? 00060 #if 0 00061 // modified version of issue 00062 // just issue the char 00063 if (src >= 0x80 || src <= 8) 00064 dest[iDest++] = 1; 00065 dest[iDest++] = src; 00066 00067 #else 00068 // if there is an outstanding space char, see if 00069 // we can squeeze it in with an ASCII char 00070 if (bSpace) 00071 { 00072 if (src >= 0x40 && src <= 0x7F) 00073 dest[iDest++] = src ^ 0x80; 00074 else 00075 { 00076 // couldn't squeeze it in, so issue the space char by itself 00077 // most chars go out simple, except the range 1...8,0x80...0xFF 00078 dest[iDest++] = ' '; 00079 if (src < 0x80 && (src == 0 || src > 8)) 00080 dest[iDest++] = src; 00081 else 00082 dest[iDest++] = 1, dest[iDest++] = src; 00083 } 00084 // knock down the space flag 00085 bSpace = 0; 00086 } 00087 else 00088 { 00089 // check for a space char 00090 if (src == ' ') 00091 bSpace = 1; 00092 else 00093 { 00094 if (src < 0x80 && (src == 0 || src > 8)) 00095 dest[iDest++] = src; 00096 else 00097 dest[iDest++] = 1, dest[iDest++] = src; 00098 00099 } 00100 } 00101 #endif 00102 len = iDest; 00103 return iDest; 00104 } 00105 00106 // 00107 // Compress 00108 // 00109 // params: none 00110 // 00111 // action: takes the given buffer, 00112 // and compresses 00113 // the original data down into a second buffer 00114 // 00115 // comment: This version make heavy use of walking pointers. 00116 // 00117 unsigned tBuf::Compress() 00118 { 00119 if (!buf) 00120 return 0; 00121 if (isCompressed) { 00122 // cout<<"Buffer is already compressed!"<<endl; 00123 return len; 00124 // } else { 00125 // cout<<" Compressing buffer!!!"<<endl; 00126 } 00127 00128 unsigned int i; 00129 00130 // run through the input buffer 00131 byte *pBuffer; // points to the input buffer 00132 byte *pHit; // points to a walking test hit; works upwards on successive matches 00133 byte *pPrevHit; // previous value of pHit; also, start of next test 00134 byte *pTestHead; // current test string 00135 byte *pTestTail; // current walking pointer; one past the current test buffer 00136 byte *pEnd; // 1 past the end of the input buffer 00137 00138 pHit = pPrevHit = pTestHead = pBuffer = buf; 00139 pTestTail = pTestHead + 1; 00140 pEnd = buf + len; // should point to a 0! 00141 00142 // make a dest buffer and reassign the local buffer 00143 buf = new byte[6000]; 00144 len = 0; // used to walk through the output buffer 00145 00146 // loop, absorbing one more char from the input buffer on each pass 00147 for (; pTestHead != pEnd; pTestTail++) 00148 { 00149 // if we already have 10 char match, don't bother scanning again for the 11th (wasted time) 00150 if (pTestTail - pTestHead != (1 << COUNT_BITS) + 3) 00151 { 00152 // scan in the previous data for a match 00153 // terminate the test string (and the matcher string, as well!) in a 0 00154 byte tmp = *pTestTail; 00155 00156 *pTestTail = 0; 00157 pHit = (byte *) strstr((const char *) pPrevHit, 00158 (const char *) pTestHead); 00159 *pTestTail = tmp; // restore the char 00160 } 00161 00162 // on a mismatch or end of buffer, issued codes 00163 if (pHit == pTestHead 00164 || pTestTail - pTestHead > (1 << COUNT_BITS) + 2 00165 || pTestTail == pEnd) 00166 { 00167 // issue the codes 00168 // first, check for short runs 00169 if (pTestTail - pTestHead < 4) 00170 { 00171 if (pTestHead[0] > 0x7F || pTestHead[0] <= 8) 00172 buf[len++] = 1; 00173 buf[len++] = pTestHead[0]; 00174 pTestHead++; 00175 } 00176 // for longer runs, issue a run-code 00177 else 00178 { 00179 unsigned int dist = pTestHead - pPrevHit; 00180 unsigned int compound = 00181 (dist << COUNT_BITS) + pTestTail - pTestHead - 4; 00182 00183 //if (dist>=(1<<DISP_BITS)) printf("\n!! error dist overflow"); 00184 //if (pTestTail-pTestHead-4>7) printf("\n!! error len overflow"); 00185 00186 buf[len++] = 0x80 + (compound >> 8); 00187 buf[len++] = compound & 0xFF; 00188 //printf("\nissuing code for sequence len %d <%c%c%c>",pTestTail-pTestHead-1,pTestHead[0],pTestHead[1],pTestHead[2]); 00189 //printf("\n <%x%x>",pOut[-2],pOut[-1]); 00190 // and start again 00191 pTestHead = pTestTail - 1; 00192 } 00193 // start the search again 00194 pPrevHit = pBuffer; 00195 // within range 00196 if (pTestHead - pPrevHit > ((1 << DISP_BITS) - 1)) 00197 pPrevHit = pTestHead - ((1 << DISP_BITS) - 1); 00198 } 00199 // got a match 00200 else 00201 { 00202 pPrevHit = pHit; 00203 } 00204 // when we get to the end of the buffer, don't inc past the end 00205 // this forces the residue chars out one at a time 00206 if (pTestTail == pEnd) 00207 pTestTail--; 00208 } 00209 00210 00211 // final scan to merge consecutive high chars together 00212 // and merge space chars 00213 unsigned int k; 00214 00215 for (i = k = 0; i < len; i++, k++) 00216 { 00217 buf[k] = buf[i]; 00218 // skip the run-length codes 00219 if (buf[k] >= 0x80 && buf[k] < 0xC0) 00220 buf[++k] = buf[++i]; 00221 // if we hit a high char marker, look ahead for another 00222 // and merge multiples together 00223 else if (buf[k] == 1) 00224 { 00225 buf[k + 1] = buf[i + 1]; 00226 while (i + 2 < len && buf[i + 2] == 1 && buf[k] < 8) 00227 { 00228 buf[k]++; 00229 buf[k + buf[k]] = buf[i + 3]; 00230 i += 2; 00231 } 00232 k += buf[k]; 00233 i++; 00234 } 00235 else if (buf[k] == ' ' && i < len - 1 && buf[i + 1] <= 0x7F 00236 && buf[i + 1] >= 0x40) 00237 buf[k] = 0x80 | buf[++i]; 00238 } 00239 00240 // delete original buffer 00241 delete[]pBuffer; 00242 len = k; 00243 00244 isCompressed = true; 00245 return k; 00246 } 00247 00248 /* 00249 Decompress 00250 00251 params: none 00252 00253 action: make a new buffer 00254 run through the source data 00255 check the 4 cases: 00256 0,9...7F represent self 00257 1...8 escape n chars 00258 80...bf reference earlier run 00259 c0...ff space+ASCII 00260 00261 */ 00262 unsigned tBuf::Decompress() 00263 { 00264 if (!buf) 00265 return 0; 00266 if (!isCompressed) { 00267 // cout<<"Buffer already uncompressed. Doing nothing"<<endl; 00268 return len; 00269 // } else { 00270 // cout<<"Decompressing buffer"<<endl; 00271 } 00272 00273 // we "know" that all decompresses fit within 4096, right? 00274 byte *pOut = new byte[6000]; 00275 byte *in_buf = buf; 00276 byte *out_buf = pOut; 00277 00278 unsigned int i, j; 00279 00280 for (j = i = 0; j < len;) 00281 { 00282 unsigned int c; 00283 00284 // take a char from the input buffer 00285 c = in_buf[j++]; 00286 00287 // separate the char into zones: 0, 1...8, 9...0x7F, 0x80...0xBF, 0xC0...0xFF 00288 00289 // codes 1...8 mean copy that many bytes; for accented chars & binary 00290 if (c > 0 && c < 9) 00291 while (c--) 00292 out_buf[i++] = in_buf[j++]; 00293 00294 // codes 0, 9...0x7F represent themselves 00295 else if (c < 0x80) 00296 out_buf[i++] = c; 00297 00298 // codes 0xC0...0xFF represent "space + ascii char" 00299 else if (c >= 0xC0) 00300 out_buf[i++] = ' ', out_buf[i++] = c ^ 0x80; 00301 00302 // codes 0x80...0xBf represent sequences 00303 else 00304 { 00305 int m, n; 00306 00307 c <<= 8; 00308 c += in_buf[j++]; 00309 m = (c & 0x3FFF) >> COUNT_BITS; 00310 n = c & ((1 << COUNT_BITS) - 1); 00311 n += 3; 00312 while (n--) 00313 { 00314 out_buf[i] = out_buf[i - m]; 00315 i++; 00316 } 00317 } 00318 } 00319 out_buf[i++]='\0'; 00320 out_buf[i++]='\0'; 00321 delete[]buf; 00322 buf = pOut; 00323 len = i; 00324 00325 isCompressed = false; 00326 return i; 00327 } 00328 00329 unsigned tBuf::DuplicateCR() 00330 { 00331 if (!buf) 00332 return 0; 00333 byte *pBuf = new byte[2 * len]; 00334 00335 unsigned int k, j; 00336 00337 for (j = k = 0; j < len; j++, k++) 00338 { 00339 pBuf[k] = buf[j]; 00340 if (pBuf[k] == 0x0A) 00341 pBuf[k++] = 0x0D, pBuf[k] = 0x0A; 00342 } 00343 delete[]buf; 00344 buf = pBuf; 00345 len = k; 00346 return k; 00347 } 00348 00349 00350 00351 // this nasty little beast removes really low ASCII and 0's 00352 // and handles the CR problem 00353 // 00354 // if a cr appears before a lf, then remove the cr 00355 // if a cr appears in isolation, change to a lf 00356 unsigned tBuf::RemoveBinary() 00357 { 00358 if (!buf) 00359 return 0; 00360 byte *in_buf = buf; 00361 byte *out_buf = new byte[len]; 00362 00363 unsigned int k, j; 00364 00365 for (j = k = 0; j < len; j++, k++) 00366 { 00367 // copy each byte 00368 out_buf[k] = in_buf[j]; 00369 00370 // throw away really low ASCII 00371 if (( /*out_buf[k]>=0 && */ out_buf[k] < 9)) 00372 k--; 00373 00374 // for CR 00375 if (out_buf[k] == 0x0D) 00376 { 00377 // if next is LF, then drop it 00378 if (j < len - 1 && in_buf[j + 1] == 0x0A) 00379 k--; 00380 else // turn it into a LF 00381 out_buf[k] = 0x0A; 00382 } 00383 } 00384 delete[]buf; 00385 buf = out_buf; 00386 len = k; 00387 return k; 00388 } 00389 00390 void tBuf::setText(const byte * text, unsigned txtlen, bool txtcomp) 00391 { 00392 if (buf) 00393 delete[]buf; 00394 buf = 0L; 00395 00396 if (txtlen <= 0) 00397 txtlen = strlen((const char *) text); 00398 len = txtlen; 00399 buf = new byte[len]; 00400 00401 memcpy(buf, text, len*sizeof(char)); 00402 // strncpy((char *) buf, (const char *) text, len); 00403 isCompressed = txtcomp; 00404 // cout<<"Setting text, compressed="<<txtcomp<<endl; 00405 }
KDE Logo
This file is part of the documentation for kpilot Library Version 3.2.2.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Wed Jul 28 23:57:49 2004 by doxygen 1.3.7 written by Dimitri van Heesch, © 1997-2003