00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <ctype.h>
00021 #include <stdlib.h>
00022 #include <qstringlist.h>
00023 #include "ailexer.h"
00024
00025 #define CATEGORY_WHITESPACE -1
00026 #define CATEGORY_ALPHA -2
00027 #define CATEGORY_DIGIT -3
00028 #define CATEGORY_SPECIAL -4
00029 #define CATEGORY_LETTERHEX -5
00030 #define CATEGORY_INTTOOLONG -6
00031
00032 #define CATEGORY_ANY -127
00033
00034 #define MAX_INTLEN 9
00035 #define MIN_HEXCHARS 6
00036
00037 #define STOP 0
00038
00039 int iswhitespace(char c){
00040 return (c==' ')||(c=='\n')||(c=='\t')||(c=='\r');
00041 }
00042
00043 int isSpecial(char c){
00044 return (c=='*')||(c=='_')||(c=='?')||(c=='~')||(c=='-')||(c=='^')||(c=='`')||(c=='!')||(c=='.')||(c=='@')||(c=='&')||(c=='$')||(c=='=');
00045 }
00046
00047 int isletterhex(char c){
00048 return (c=='A')||(c=='B')||(c=='C')||(c=='D')||(c=='E')||(c=='F');
00049 }
00050
00051 const char*statetoa (State state){
00052 switch (state)
00053 {
00054 case State_Comment : return "comment";
00055 case State_Integer : return "integer";
00056 case State_Float : return "float";
00057 case State_String : return "string";
00058 case State_Token : return "token";
00059 case State_Reference : return "reference";
00060 case State_Start : return "start";
00061 case State_BlockStart : return "block start";
00062 case State_BlockEnd : return "block end";
00063 case State_ArrayStart : return "array start";
00064 case State_ArrayEnd : return "array end";
00065 case State_Byte : return "byte";
00066 case State_ByteArray : return "byte array";
00067 case State_StringEncodedChar : return "encoded char (string)";
00068 case State_CommentEncodedChar : return "encoded char (comment)";
00069 case State_ByteArray2 : return "byte array (mode 2)";
00070 default : return "unknown";
00071 }
00072 }
00073
00074 typedef struct {
00075 State oldState;
00076 char c;
00077 State newState;
00078 Action action;
00079 } Transition;
00080
00081 static Transition transitions[] = {
00082 { State_Comment, '\n', State_Start, Action_Output},
00083 { State_Comment, '\r', State_Start, Action_Output},
00084 { State_Comment, '\\', State_CommentEncodedChar, Action_InitTemp},
00085 { State_Comment, CATEGORY_ANY, State_Comment, Action_Copy},
00086 { State_Integer, CATEGORY_DIGIT, State_Integer, Action_Copy},
00087 { State_Integer, CATEGORY_WHITESPACE, State_Start, Action_Output},
00088 { State_Integer, '.', State_Float, Action_Copy},
00089 { State_Integer, ']', State_Start, Action_OutputUnget},
00090 { State_Integer, '}', State_Start, Action_OutputUnget},
00091 { State_Integer, '#', State_Byte, Action_Copy },
00092 { State_Integer, '/', State_Start, Action_OutputUnget },
00093 { State_Integer, '{', State_Start, Action_OutputUnget },
00094 { State_Integer, '%', State_Start, Action_OutputUnget },
00095 { State_Integer, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy },
00096 { State_Integer, CATEGORY_INTTOOLONG, State_ByteArray2, Action_Copy },
00097 { State_Integer, CATEGORY_ANY, State_Start, Action_Abort},
00098 { State_Float, CATEGORY_DIGIT, State_Float, Action_Copy},
00099 { State_Float, CATEGORY_WHITESPACE, State_Start, Action_Output},
00100 { State_Float, ']', State_Start, Action_OutputUnget},
00101 { State_Float, '}', State_Start, Action_OutputUnget},
00102 { State_Float, CATEGORY_ANY, State_Start, Action_Abort},
00103 { State_Token, CATEGORY_ALPHA, State_Token, Action_Copy},
00104 { State_Token, CATEGORY_DIGIT, State_Token, Action_Copy},
00105 { State_Token, CATEGORY_SPECIAL, State_Token, Action_Copy},
00106 { State_Token, '}', State_Start, Action_OutputUnget},
00107 { State_Token, ']', State_Start, Action_OutputUnget},
00108 { State_Token, '{', State_BlockStart, Action_Output},
00109 { State_Token, '}', State_BlockEnd, Action_Output},
00110 { State_Token, '/', State_Start, Action_OutputUnget},
00111 { State_Token, CATEGORY_WHITESPACE, State_Start, Action_Output},
00112 { State_Token, CATEGORY_ANY, State_Start, Action_Abort},
00113 { State_String, ')', State_Start, Action_Output},
00114 { State_String, '\\', State_StringEncodedChar, Action_InitTemp},
00115 { State_String, CATEGORY_ANY, State_String, Action_Copy},
00116
00117
00118
00119 { State_BlockStart, CATEGORY_ANY, State_Start, Action_OutputUnget },
00120 { State_BlockEnd, CATEGORY_ANY, State_Start, Action_OutputUnget },
00121 { State_ArrayStart, CATEGORY_ANY, State_Start, Action_OutputUnget },
00122 { State_ArrayEnd, CATEGORY_ANY, State_Start, Action_OutputUnget },
00123 { State_Reference, '#', State_Reference, Action_Copy },
00124 { State_Reference, CATEGORY_ALPHA, State_Reference, Action_Copy },
00125 { State_Reference, CATEGORY_DIGIT, State_Reference, Action_Copy },
00126 { State_Reference, CATEGORY_SPECIAL, State_Reference, Action_Copy },
00127 { State_Reference, CATEGORY_ANY, State_Start, Action_OutputUnget },
00128 { State_Byte, '/', State_Start, Action_OutputUnget },
00129 { State_Byte, CATEGORY_DIGIT, State_Byte, Action_Copy},
00130 { State_Byte, CATEGORY_ALPHA, State_Byte, Action_Copy},
00131 { State_Byte, CATEGORY_WHITESPACE, State_Start, Action_Output},
00132 { State_ByteArray, '>', State_Start, Action_Output },
00133 { State_ByteArray, CATEGORY_ALPHA, State_ByteArray, Action_Copy },
00134 { State_ByteArray, CATEGORY_DIGIT, State_ByteArray, Action_Copy },
00135 { State_ByteArray, CATEGORY_WHITESPACE, State_ByteArray, Action_Ignore },
00136 { State_ByteArray, CATEGORY_ANY, State_Start, Action_Abort },
00137 { State_StringEncodedChar, '\\', State_String, Action_Copy},
00138 { State_StringEncodedChar, CATEGORY_DIGIT, State_StringEncodedChar, Action_CopyTemp},
00139 { State_StringEncodedChar, CATEGORY_ANY, State_String, Action_DecodeUnget},
00140 { State_CommentEncodedChar, '\\', State_Comment, Action_Copy},
00141 { State_CommentEncodedChar, CATEGORY_DIGIT, State_CommentEncodedChar, Action_CopyTemp},
00142 { State_CommentEncodedChar, CATEGORY_ANY, State_Comment, Action_DecodeUnget},
00143 { State_ByteArray2, '\n', State_Start, Action_Output},
00144 { State_ByteArray2, '\r', State_Start, Action_Output},
00145 { State_ByteArray2, '}', State_Start, Action_ByteArraySpecial},
00146 { State_ByteArray2, CATEGORY_WHITESPACE, State_Start, Action_Output},
00147 { State_ByteArray2, CATEGORY_DIGIT, State_ByteArray2, Action_Copy},
00148 { State_ByteArray2, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy},
00149 { State_ByteArray2, CATEGORY_ALPHA, State_Token, Action_Copy},
00150 { State_ByteArray2, CATEGORY_ANY, State_Start, Action_Abort},
00151 { State_Start, '%', State_Comment, Action_Ignore},
00152 { State_Start, CATEGORY_DIGIT, State_Integer, Action_Copy},
00153 { State_Start, '-', State_Integer, Action_Copy},
00154 { State_Start, '+', State_Integer, Action_Copy},
00155 { State_Start, '.', State_Float, Action_Copy},
00156 { State_Start, '/', State_Reference, Action_Ignore },
00157 { State_Start, '(', State_String, Action_Ignore},
00158 { State_Start, '{', State_BlockStart, Action_Copy},
00159 { State_Start, '}', State_BlockEnd, Action_Copy},
00160 { State_Start, '[', State_ArrayStart, Action_Copy},
00161 { State_Start, ']', State_ArrayEnd, Action_Copy},
00162 { State_Start, '<', State_ByteArray, Action_Ignore},
00163 { State_Start, CATEGORY_ALPHA, State_Token, Action_Copy},
00164 { State_Start, CATEGORY_WHITESPACE, State_Start, Action_Output},
00165 { State_Start, CATEGORY_SPECIAL, State_Token, Action_Copy},
00166 { State_Start, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy},
00167 { State_Start, CATEGORY_ANY, State_Start, Action_Abort},
00168 { State_Start, STOP, State_Start, Action_Abort}
00169 };
00170
00171 AILexer::AILexer(){
00172 }
00173 AILexer::~AILexer(){
00174 }
00175
00176 bool AILexer::parse (QIODevice& fin){
00177 char c;
00178
00179 m_buffer.clear();
00180 m_curState = State_Start;
00181
00182 parsingStarted();
00183
00184 while (!fin.atEnd())
00185 {
00186 c = fin.getch ();
00187
00188
00189
00190 State newState;
00191 Action action;
00192
00193 nextStep (c, &newState, &action);
00194
00195 switch (action)
00196 {
00197 case Action_Copy :
00198 m_buffer.append (c);
00199 break;
00200 case Action_CopyOutput :
00201 m_buffer.append (c);
00202 doOutput();
00203 break;
00204 case Action_Output :
00205 doOutput();
00206 break;
00207 case Action_OutputUnget :
00208 doOutput();
00209 fin.ungetch(c);
00210 break;
00211 case Action_Ignore :
00212
00213 break;
00214 case Action_Abort :
00215 qWarning ( "state %s / %s char %c (%d)" , statetoa(m_curState), statetoa(newState), c, c );
00216 parsingAborted();
00217 return false;
00218 break;
00219 case Action_InitTemp :
00220 m_temp.clear();
00221 break;
00222 case Action_CopyTemp :
00223 m_temp.append (c);
00224 break;
00225 case Action_DecodeUnget :
00226 m_buffer.append (decode());
00227 fin.ungetch(c);
00228 break;
00229
00230 case Action_ByteArraySpecial :
00231 m_curState = State_Token;
00232 doOutput();
00233 fin.ungetch(c);
00234 break;
00235 default :
00236 qWarning ( "unknown action: %d ", action);
00237 }
00238
00239 m_curState = newState;
00240 }
00241
00242 parsingFinished();
00243 return true;
00244 }
00245
00246 void AILexer::doOutput ()
00247 {
00248 if (m_buffer.length() == 0) return;
00249 switch (m_curState)
00250 {
00251 case State_Comment :
00252 gotComment (m_buffer.latin1());
00253 break;
00254 case State_Integer :
00255 gotIntValue (m_buffer.toInt());
00256 break;
00257 case State_Float :
00258 gotDoubleValue (m_buffer.toFloat());
00259 break;
00260 case State_String :
00261 gotStringValue (m_buffer.latin1());
00262 break;
00263 case State_Token :
00264 gotToken (m_buffer.latin1());
00265 break;
00266 case State_Reference :
00267 gotReference (m_buffer.latin1());
00268 break;
00269 case State_BlockStart :
00270 gotBlockStart ();
00271 break;
00272 case State_BlockEnd :
00273 gotBlockEnd ();
00274 break;
00275 case State_Start :
00276 break;
00277 case State_ArrayStart :
00278 gotArrayStart ();
00279 break;
00280 case State_ArrayEnd :
00281 gotArrayEnd ();
00282 break;
00283 case State_Byte :
00284 gotByte (getByte());
00285 break;
00286 case State_ByteArray :
00287 case State_ByteArray2 :
00288 doHandleByteArray ();
00289 break;
00290 default:
00291 qWarning ( "unknown state: %d", m_curState );
00292 }
00293
00294 m_buffer.clear();
00295 }
00296
00297 void AILexer::gotComment (const char *value) {
00298 qDebug ( "gotComment: %s ", value );
00299 }
00300
00301 void AILexer::gotIntValue (int value) {
00302 qDebug ( "gotInt: %d ", value );
00303 }
00304
00305 void AILexer::gotDoubleValue (double value) {
00306 qDebug ( "gotDouble: %f ", value );
00307 }
00308
00309 void AILexer::gotStringValue (const char *value) {
00310 qDebug ( "gotString: %s ", value );
00311 }
00312
00313 void AILexer::gotToken (const char *value) {
00314 qDebug ( "gotToken: %s ", value );
00315 }
00316
00317 void AILexer::gotReference (const char *value) {
00318 qDebug ( "gotReference: %s ", value );
00319 }
00320
00321 void AILexer::gotBlockStart (){
00322 qDebug ( "gotBlockStart" );
00323 }
00324
00325 void AILexer::gotBlockEnd (){
00326 qDebug ( "gotBlockEnd" );
00327 }
00328
00329 void AILexer::gotArrayStart (){
00330 qDebug ( "gotArrayStart" );
00331 }
00332
00333 void AILexer::gotArrayEnd (){
00334 qDebug ( "gotArrayEnd" );
00335 }
00336
00337 void AILexer::parsingStarted() {
00338 qDebug ( "parsing started" );
00339 }
00340
00341 void AILexer::parsingFinished() {
00342 qDebug ( "parsing finished" );
00343 }
00344
00345 void AILexer::parsingAborted() {
00346 qDebug ( "parsing aborted" );
00347 }
00348
00349 void AILexer::gotByte (uchar value) {
00350 qDebug ( "got byte %d" , value );
00351 }
00352
00353 void AILexer::gotByteArray (const QByteArray &data) {
00354 qDebug ( "got byte array" );
00355
00356
00357
00358
00359
00360
00361
00362 }
00363
00364
00365 void AILexer::nextStep (char c, State *newState, Action *newAction) {
00366 int i=0;
00367
00368 while (true) {
00369 Transition trans = transitions[i];
00370
00371 if (trans.c == STOP) {
00372 *newState = trans.newState;
00373 *newAction = trans.action;
00374 return;
00375 }
00376
00377 bool found = false;
00378
00379 if (trans.oldState == m_curState) {
00380 switch (trans.c) {
00381 case CATEGORY_WHITESPACE : found = isspace(c); break;
00382 case CATEGORY_ALPHA : found = isalpha(c); break;
00383 case CATEGORY_DIGIT : found = isdigit(c); break;
00384 case CATEGORY_SPECIAL : found = isSpecial(c); break;
00385 case CATEGORY_LETTERHEX : found = isletterhex(c); break;
00386 case CATEGORY_INTTOOLONG : found = m_buffer.length() > MAX_INTLEN; break;
00387 case CATEGORY_ANY : found = true; break;
00388 default : found = (trans.c == c);
00389 }
00390
00391 if (found) {
00392 *newState = trans.newState;
00393 *newAction = trans.action;
00394
00395 return;
00396 }
00397 }
00398
00399
00400 i++;
00401 }
00402 }
00403
00404 void AILexer::doHandleByteArray ()
00405 {
00406
00407 if (m_buffer.length () < MIN_HEXCHARS)
00408 {
00409 gotToken (m_buffer.latin1());
00410 return;
00411 }
00412
00413 uint strIdx = 0;
00414 uint arrayIdx = 0;
00415
00416 QByteArray data (m_buffer.length() >> 1);
00417
00418 while (strIdx < m_buffer.length())
00419 {
00420 const QString &item = m_buffer.mid (strIdx, 2);
00421 uchar val = item.toShort(NULL, 16);
00422 data[arrayIdx] = val;
00423 strIdx += 2;
00424 arrayIdx++;
00425 }
00426
00427 gotByteArray (data);
00428 }
00429
00430 uchar AILexer::getByte()
00431 {
00432
00433
00434 QStringList list = QStringList::split ("#", m_buffer.toString());
00435 int radix = list[0].toShort();
00436 uchar value = list[1].toShort (NULL, radix);
00437
00438 return value;
00439 }
00440
00441 uchar AILexer::decode()
00442 {
00443 uchar value = m_temp.toString().toShort(NULL, 8);
00444
00445 return value;
00446 }
00447
00448
00449
00450 int initialSize = 20;
00451 int addSize = 10;
00452
00453 StringBuffer::StringBuffer () {
00454 m_buffer = (char*)calloc (initialSize, sizeof(char));
00455 m_length = 0;
00456 m_capacity = initialSize;
00457 }
00458
00459 StringBuffer::~StringBuffer (){
00460 free(m_buffer);
00461 }
00462
00463 void StringBuffer::append (char c){
00464 ensureCapacity(m_length + 1);
00465 m_buffer[m_length] = c;
00466 m_length++;
00467 }
00468
00469 void StringBuffer::clear(){
00470 for (uint i=0; i<m_length; i++) m_buffer[i] = '\0';
00471 m_length = 0;
00472 }
00473
00474 QString StringBuffer::toString() const {
00475 QString ret(m_buffer);
00476 return ret;
00477 }
00478
00479 void StringBuffer::ensureCapacity (int p_capacity) {
00480 if (m_capacity >= p_capacity) return;
00481
00482 int newSize = m_capacity + addSize;
00483 if (p_capacity > newSize) newSize = p_capacity;
00484
00485 char* oldBuffer = m_buffer;
00486 char *newBuffer = (char*)calloc (newSize, sizeof(char));
00487 strcpy (newBuffer, m_buffer);
00488 free(oldBuffer);
00489 m_buffer = newBuffer;
00490 m_capacity = newSize;
00491 }
00492
00493 uint StringBuffer::length() {
00494 return m_length;
00495 }
00496
00497 double StringBuffer::toFloat() {
00498 QString data = toString();
00499 return data.toFloat();
00500 }
00501
00502 int StringBuffer::toInt() {
00503 QString data = toString();
00504 return data.toInt();
00505 }
00506
00507 const char *StringBuffer::latin1() {
00508 return m_buffer;
00509 }
00510
00511 QString StringBuffer::mid( uint index, uint len) const {
00512 QString data = toString();
00513 return data.mid(index,len);
00514 }