00001
00002
00003
00004
00005
00006
00007
00008
00009 #include <aconf.h>
00010
00011 #ifdef USE_GCC_PRAGMAS
00012 #pragma implementation
00013 #endif
00014
00015 #include <stdlib.h>
00016 #include <stddef.h>
00017 #include <string.h>
00018 #include <ctype.h>
00019 #include "Lexer.h"
00020 #include "Error.h"
00021
00022
00023
00024
00025
00026 static char specialChars[256] = {
00027 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
00028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00029 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2,
00030 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,
00031 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,
00033 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00034 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,
00035 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00038 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00039 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00040 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00041 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00042 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
00043 };
00044
00045
00046
00047
00048
00049 Lexer::Lexer(XRef *xref, Stream *str) {
00050 Object obj;
00051
00052 curStr.initStream(str);
00053 streams = new Array(xref);
00054 streams->add(curStr.copy(&obj));
00055 strPtr = 0;
00056 freeArray = gTrue;
00057 curStr.streamReset();
00058 }
00059
00060 Lexer::Lexer(XRef *xref, Object *obj) {
00061 Object obj2;
00062
00063 if (obj->isStream()) {
00064 streams = new Array(xref);
00065 freeArray = gTrue;
00066 streams->add(obj->copy(&obj2));
00067 } else {
00068 streams = obj->getArray();
00069 freeArray = gFalse;
00070 }
00071 strPtr = 0;
00072 if (streams->getLength() > 0) {
00073 streams->get(strPtr, &curStr);
00074 curStr.streamReset();
00075 }
00076 }
00077
00078 Lexer::~Lexer() {
00079 if (!curStr.isNone()) {
00080 curStr.streamClose();
00081 curStr.free();
00082 }
00083 if (freeArray) {
00084 delete streams;
00085 }
00086 }
00087
00088 int Lexer::getChar() {
00089 int c;
00090
00091 c = EOF;
00092 while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) {
00093 curStr.streamClose();
00094 curStr.free();
00095 ++strPtr;
00096 if (strPtr < streams->getLength()) {
00097 streams->get(strPtr, &curStr);
00098 curStr.streamReset();
00099 }
00100 }
00101 return c;
00102 }
00103
00104 int Lexer::lookChar() {
00105 if (curStr.isNone()) {
00106 return EOF;
00107 }
00108 return curStr.streamLookChar();
00109 }
00110
00111 Object *Lexer::getObj(Object *obj) {
00112 char *p;
00113 int c, c2;
00114 GBool comment, neg, done;
00115 int numParen;
00116 int xi;
00117 double xf, scale;
00118 GString *s;
00119 int n, m;
00120
00121
00122 comment = gFalse;
00123 while (1) {
00124 if ((c = getChar()) == EOF) {
00125 return obj->initEOF();
00126 }
00127 if (comment) {
00128 if (c == '\r' || c == '\n')
00129 comment = gFalse;
00130 } else if (c == '%') {
00131 comment = gTrue;
00132 } else if (specialChars[c] != 1) {
00133 break;
00134 }
00135 }
00136
00137
00138 switch (c) {
00139
00140
00141 case '0': case '1': case '2': case '3': case '4':
00142 case '5': case '6': case '7': case '8': case '9':
00143 case '-': case '.':
00144 neg = gFalse;
00145 xi = 0;
00146 if (c == '-') {
00147 neg = gTrue;
00148 } else if (c == '.') {
00149 goto doReal;
00150 } else {
00151 xi = c - '0';
00152 }
00153 while (1) {
00154 c = lookChar();
00155 if (isdigit(c)) {
00156 getChar();
00157 xi = xi * 10 + (c - '0');
00158 } else if (c == '.') {
00159 getChar();
00160 goto doReal;
00161 } else {
00162 break;
00163 }
00164 }
00165 if (neg)
00166 xi = -xi;
00167 obj->initInt(xi);
00168 break;
00169 doReal:
00170 xf = xi;
00171 scale = 0.1;
00172 while (1) {
00173 c = lookChar();
00174 if (!isdigit(c)) {
00175 break;
00176 }
00177 getChar();
00178 xf = xf + scale * (c - '0');
00179 scale *= 0.1;
00180 }
00181 if (neg)
00182 xf = -xf;
00183 obj->initReal(xf);
00184 break;
00185
00186
00187 case '(':
00188 p = tokBuf;
00189 n = 0;
00190 numParen = 1;
00191 done = gFalse;
00192 s = NULL;
00193 do {
00194 c2 = EOF;
00195 switch (c = getChar()) {
00196
00197 case EOF:
00198 #if 0
00199
00200 case '\r':
00201 case '\n':
00202 #endif
00203 error(getPos(), "Unterminated string");
00204 done = gTrue;
00205 break;
00206
00207 case '(':
00208 ++numParen;
00209 c2 = c;
00210 break;
00211
00212 case ')':
00213 if (--numParen == 0) {
00214 done = gTrue;
00215 } else {
00216 c2 = c;
00217 }
00218 break;
00219
00220 case '\\':
00221 switch (c = getChar()) {
00222 case 'n':
00223 c2 = '\n';
00224 break;
00225 case 'r':
00226 c2 = '\r';
00227 break;
00228 case 't':
00229 c2 = '\t';
00230 break;
00231 case 'b':
00232 c2 = '\b';
00233 break;
00234 case 'f':
00235 c2 = '\f';
00236 break;
00237 case '\\':
00238 case '(':
00239 case ')':
00240 c2 = c;
00241 break;
00242 case '0': case '1': case '2': case '3':
00243 case '4': case '5': case '6': case '7':
00244 c2 = c - '0';
00245 c = lookChar();
00246 if (c >= '0' && c <= '7') {
00247 getChar();
00248 c2 = (c2 << 3) + (c - '0');
00249 c = lookChar();
00250 if (c >= '0' && c <= '7') {
00251 getChar();
00252 c2 = (c2 << 3) + (c - '0');
00253 }
00254 }
00255 break;
00256 case '\r':
00257 c = lookChar();
00258 if (c == '\n') {
00259 getChar();
00260 }
00261 break;
00262 case '\n':
00263 break;
00264 case EOF:
00265 error(getPos(), "Unterminated string");
00266 done = gTrue;
00267 break;
00268 default:
00269 c2 = c;
00270 break;
00271 }
00272 break;
00273
00274 default:
00275 c2 = c;
00276 break;
00277 }
00278
00279 if (c2 != EOF) {
00280 if (n == tokBufSize) {
00281 if (!s)
00282 s = new GString(tokBuf, tokBufSize);
00283 else
00284 s->append(tokBuf, tokBufSize);
00285 p = tokBuf;
00286 n = 0;
00287 }
00288 *p++ = (char)c2;
00289 ++n;
00290 }
00291 } while (!done);
00292 if (!s)
00293 s = new GString(tokBuf, n);
00294 else
00295 s->append(tokBuf, n);
00296 obj->initString(s);
00297 break;
00298
00299
00300 case '/':
00301 p = tokBuf;
00302 n = 0;
00303 while ((c = lookChar()) != EOF && !specialChars[c]) {
00304 getChar();
00305 if (c == '#') {
00306 c2 = lookChar();
00307 if (c2 >= '0' && c2 <= '9') {
00308 c = c2 - '0';
00309 } else if (c2 >= 'A' && c2 <= 'F') {
00310 c = c2 - 'A' + 10;
00311 } else if (c2 >= 'a' && c2 <= 'f') {
00312 c = c2 - 'a' + 10;
00313 } else {
00314 goto notEscChar;
00315 }
00316 getChar();
00317 c <<= 4;
00318 c2 = getChar();
00319 if (c2 >= '0' && c2 <= '9') {
00320 c += c2 - '0';
00321 } else if (c2 >= 'A' && c2 <= 'F') {
00322 c += c2 - 'A' + 10;
00323 } else if (c2 >= 'a' && c2 <= 'f') {
00324 c += c2 - 'a' + 10;
00325 } else {
00326 error(getPos(), "Illegal digit in hex char in name");
00327 }
00328 }
00329 notEscChar:
00330 if (++n == tokBufSize) {
00331 error(getPos(), "Name token too long");
00332 break;
00333 }
00334 *p++ = c;
00335 }
00336 *p = '\0';
00337 obj->initName(tokBuf);
00338 break;
00339
00340
00341 case '[':
00342 case ']':
00343 tokBuf[0] = c;
00344 tokBuf[1] = '\0';
00345 obj->initCmd(tokBuf);
00346 break;
00347
00348
00349 case '<':
00350 c = lookChar();
00351
00352
00353 if (c == '<') {
00354 getChar();
00355 tokBuf[0] = tokBuf[1] = '<';
00356 tokBuf[2] = '\0';
00357 obj->initCmd(tokBuf);
00358
00359
00360 } else {
00361 p = tokBuf;
00362 m = n = 0;
00363 c2 = 0;
00364 s = NULL;
00365 while (1) {
00366 c = getChar();
00367 if (c == '>') {
00368 break;
00369 } else if (c == EOF) {
00370 error(getPos(), "Unterminated hex string");
00371 break;
00372 } else if (specialChars[c] != 1) {
00373 c2 = c2 << 4;
00374 if (c >= '0' && c <= '9')
00375 c2 += c - '0';
00376 else if (c >= 'A' && c <= 'F')
00377 c2 += c - 'A' + 10;
00378 else if (c >= 'a' && c <= 'f')
00379 c2 += c - 'a' + 10;
00380 else
00381 error(getPos(), "Illegal character <%02x> in hex string", c);
00382 if (++m == 2) {
00383 if (n == tokBufSize) {
00384 if (!s)
00385 s = new GString(tokBuf, tokBufSize);
00386 else
00387 s->append(tokBuf, tokBufSize);
00388 p = tokBuf;
00389 n = 0;
00390 }
00391 *p++ = (char)c2;
00392 ++n;
00393 c2 = 0;
00394 m = 0;
00395 }
00396 }
00397 }
00398 if (!s)
00399 s = new GString(tokBuf, n);
00400 else
00401 s->append(tokBuf, n);
00402 if (m == 1)
00403 s->append((char)(c2 << 4));
00404 obj->initString(s);
00405 }
00406 break;
00407
00408
00409 case '>':
00410 c = lookChar();
00411 if (c == '>') {
00412 getChar();
00413 tokBuf[0] = tokBuf[1] = '>';
00414 tokBuf[2] = '\0';
00415 obj->initCmd(tokBuf);
00416 } else {
00417 error(getPos(), "Illegal character '>'");
00418 obj->initError();
00419 }
00420 break;
00421
00422
00423 case ')':
00424 case '{':
00425 case '}':
00426 error(getPos(), "Illegal character '%c'", c);
00427 obj->initError();
00428 break;
00429
00430
00431 default:
00432 p = tokBuf;
00433 *p++ = c;
00434 n = 1;
00435 while ((c = lookChar()) != EOF && !specialChars[c]) {
00436 getChar();
00437 if (++n == tokBufSize) {
00438 error(getPos(), "Command token too long");
00439 break;
00440 }
00441 *p++ = c;
00442 }
00443 *p = '\0';
00444 if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) {
00445 obj->initBool(gTrue);
00446 } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) {
00447 obj->initBool(gFalse);
00448 } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) {
00449 obj->initNull();
00450 } else {
00451 obj->initCmd(tokBuf);
00452 }
00453 break;
00454 }
00455
00456 return obj;
00457 }
00458
00459 void Lexer::skipToNextLine() {
00460 int c;
00461
00462 while (1) {
00463 c = getChar();
00464 if (c == EOF || c == '\n') {
00465 return;
00466 }
00467 if (c == '\r') {
00468 if ((c = lookChar()) == '\n') {
00469 getChar();
00470 }
00471 return;
00472 }
00473 }
00474 }