filters

pscommentlexer.cc

00001 /* This file is part of the KDE project
00002    Copyright (C) 2002, Dirk Schönberger <dirk.schoenberger@sz-online.de>
00003 
00004    This library is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU Library General Public
00006    License as published by the Free Software Foundation; either
00007    version 2 of the License, or (at your option) any later version.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017  * Boston, MA 02110-1301, USA.
00018 */
00019 
00020 #include <stdlib.h>
00021 #include <ctype.h>
00022 #include <qstringlist.h>
00023 #include "pscommentlexer.h"
00024 
00025 #define CATEGORY_WHITESPACE -1
00026 #define CATEGORY_ALPHA -2
00027 #define CATEGORY_DIGIT -3
00028 #define CATEGORY_SPECIAL -4
00029 #define CATEGORY_LETTERHEX -5
00030 #define CATEGORY_INTTOOLONG -6
00031 
00032 #define CATEGORY_ANY -127
00033 
00034 #define MAX_INTLEN 9
00035 #define MIN_HEXCHARS 6
00036 
00037 #define STOP 0
00038 
00039 int iswhitespace(char c){
00040   return (c==' ')||(c=='\n')||(c=='\t')||(c=='\r');
00041 }
00042 
00043 int isSpecial(char c){
00044   return (c=='*')||(c=='_')||(c=='?')||(c=='~')||(c=='-')||(c=='^')||(c=='`')||(c=='!')||(c=='.')||(c=='@')||(c=='&')||(c=='$')||(c=='=');
00045 }
00046 
00047 int isletterhex(char c){
00048   return (c=='A')||(c=='B')||(c=='C')||(c=='D')||(c=='E')||(c=='F');
00049 }
00050 
00051 const char*statetoa (State state){
00052   switch (state)
00053   {
00054     case State_Comment : return "comment";
00055     case State_CommentEncodedChar : return "encoded char (comment)";
00056     default : return "unknown";
00057   }
00058 }
00059 
00060 typedef struct {
00061   State oldState;
00062   char c;
00063   State newState;
00064   Action action;
00065 } Transition;
00066 
00067 static Transition transitions[] = {
00068   { State_Comment, '\n', State_Start, Action_Output},
00069   { State_Comment, '\r', State_Start, Action_Output},
00070   { State_Comment, '\\', State_CommentEncodedChar, Action_InitTemp},
00071   { State_Comment, CATEGORY_ANY, State_Comment, Action_Copy},
00072   { State_CommentEncodedChar, '\\', State_Comment, Action_Copy},
00073   { State_CommentEncodedChar, CATEGORY_DIGIT, State_CommentEncodedChar, Action_CopyTemp},
00074   { State_CommentEncodedChar, CATEGORY_ANY, State_Comment, Action_DecodeUnget},
00075   { State_Start, '%', State_Comment, Action_Ignore},
00076   { State_Start, CATEGORY_ANY, State_Start, Action_Ignore},
00077   { State_Start, STOP, State_Start, Action_Abort}
00078 };
00079 
00080 PSCommentLexer::PSCommentLexer(){
00081 }
00082 PSCommentLexer::~PSCommentLexer(){
00083 }
00084 
00085 bool PSCommentLexer::parse (QIODevice& fin){
00086   char c;
00087 
00088   m_buffer.clear();
00089   m_curState = State_Start;
00090 
00091   parsingStarted();
00092 
00093   while (!fin.atEnd())
00094   {
00095     c = fin.getch ();
00096 
00097 //    qDebug ("got %c", c);
00098 
00099     State newState;
00100     Action action;
00101 
00102     nextStep (c, &newState, &action);
00103 
00104     switch (action)
00105     {
00106       case Action_Copy :
00107         m_buffer.append (c);
00108         break;
00109       case Action_CopyOutput :
00110         m_buffer.append (c);
00111         doOutput();
00112         break;
00113       case Action_Output :
00114         doOutput();
00115         break;
00116       case Action_OutputUnget :
00117         doOutput();
00118         fin.ungetch(c);
00119         break;
00120       case Action_Ignore :
00121         /* ignore */
00122         break;
00123       case Action_Abort :
00124         qWarning ( "state %s / %s char %c (%d)" , statetoa(m_curState), statetoa(newState), c, c );
00125         parsingAborted();
00126         return false;
00127         break;
00128       case Action_InitTemp :
00129         m_temp.clear();
00130         break;
00131       case Action_CopyTemp :
00132         m_temp.append (c);
00133         break;
00134       case Action_DecodeUnget :
00135         m_buffer.append (decode());
00136         fin.ungetch(c);
00137         break;
00138       default :
00139         qWarning ( "unknown action: %d ", action);
00140     }
00141 
00142     m_curState = newState;
00143   }
00144 
00145   parsingFinished();
00146   return true;
00147 }
00148 
00149 void PSCommentLexer::doOutput ()
00150 {
00151   if (m_buffer.length() == 0) return;
00152   switch (m_curState)
00153   {
00154     case State_Comment :
00155       gotComment (m_buffer.latin1());
00156       break;
00157     default:
00158       qWarning ( "unknown state: %d", m_curState );
00159   }
00160 
00161   m_buffer.clear();
00162 }
00163 
00164 void PSCommentLexer::gotComment (const char *value) {
00165   qDebug ( "gotComment: %s ", value );
00166 }
00167 
00168 void PSCommentLexer::parsingStarted() {
00169   qDebug ( "parsing started" );
00170 }
00171 
00172 void PSCommentLexer::parsingFinished() {
00173   qDebug ( "parsing finished" );
00174 }
00175 
00176 void PSCommentLexer::parsingAborted() {
00177   qDebug ( "parsing aborted" );
00178 }
00179 
00180 void PSCommentLexer::nextStep (char c, State *newState, Action *newAction) {
00181   int i=0;
00182 
00183   while (true) {
00184     Transition trans = transitions[i];
00185 
00186     if (trans.c == STOP) {
00187       *newState = trans.newState;
00188       *newAction = trans.action;
00189       return;
00190     }
00191 
00192     bool found = false;
00193 
00194     if (trans.oldState == m_curState) {
00195       switch (trans.c) {
00196         case CATEGORY_WHITESPACE : found = isspace(c); break;
00197         case CATEGORY_ALPHA : found = isalpha(c); break;
00198         case CATEGORY_DIGIT : found = isdigit(c); break;
00199         case CATEGORY_SPECIAL : found = isSpecial(c); break;
00200         case CATEGORY_LETTERHEX : found = isletterhex(c); break;
00201         case CATEGORY_INTTOOLONG : found = m_buffer.length() > MAX_INTLEN; break;
00202         case CATEGORY_ANY : found = true; break;
00203         default : found = (trans.c == c);
00204       }
00205 
00206       if (found) {
00207         *newState = trans.newState;
00208         *newAction = trans.action;
00209 
00210         return;
00211       }
00212     }
00213 
00214 
00215     i++;
00216   }
00217 }
00218 
00219 uchar PSCommentLexer::decode()
00220 {
00221   uchar value = m_temp.toString().toShort(NULL, 8);
00222 //  qDebug ("got encoded char %c",value);
00223   return value;
00224 }
00225 
00226 /* StringBuffer implementation */
00227 
00228 int initialSize = 20;
00229 int addSize = 10;
00230 
00231 StringBuffer::StringBuffer () {
00232   m_buffer = (char*)calloc (initialSize, sizeof(char));
00233   m_length = 0;
00234   m_capacity = initialSize;
00235 }
00236 
00237 StringBuffer::~StringBuffer (){
00238   free(m_buffer);
00239 }
00240 
00241 void StringBuffer::append (char c){
00242   ensureCapacity(m_length + 1);
00243   m_buffer[m_length] = c;
00244   m_length++;
00245 }
00246 
00247 void StringBuffer::clear(){
00248   for (uint i=0; i<m_length; i++) m_buffer[i] = '\0';
00249   m_length = 0;
00250 }
00251 
00252 QString StringBuffer::toString() const {
00253   QString ret(m_buffer);
00254   return ret;
00255 }
00256 
00257 void StringBuffer::ensureCapacity (int p_capacity) {
00258   if (m_capacity >= p_capacity) return;
00259 
00260   int newSize = m_capacity + addSize;
00261   if (p_capacity > newSize) newSize = p_capacity;
00262 
00263   char* oldBuffer = m_buffer;
00264   char *newBuffer = (char*)calloc (newSize, sizeof(char));
00265   strcpy (newBuffer, m_buffer);
00266   free(oldBuffer);
00267   m_buffer = newBuffer;
00268   m_capacity = newSize;
00269 }
00270 
00271 uint StringBuffer::length() {
00272   return m_length;
00273 }
00274 
00275 double StringBuffer::toFloat() {
00276   QString data = toString();
00277   return data.toFloat();
00278 }
00279 
00280 int StringBuffer::toInt() {
00281   QString data = toString();
00282   return data.toInt();
00283 }
00284 
00285 const char *StringBuffer::latin1() {
00286   return m_buffer;
00287 }
00288 
00289 QString StringBuffer::mid( uint index, uint len) const {
00290   QString data = toString();
00291   return data.mid(index,len);
00292 }
00293 
00294 /* BoundingBoxExtractor */
00295 BoundingBoxExtractor::	BoundingBoxExtractor() : m_llx(0), m_lly(0), m_urx(0), m_ury(0) {}
00296 BoundingBoxExtractor::~BoundingBoxExtractor() {}
00297 
00298 void BoundingBoxExtractor::gotComment (const char *value)
00299 {
00300   QString data (value);
00301   if (data.find("%BoundingBox:")==-1) return;
00302 
00303   getRectangle (value, m_llx, m_lly, m_urx, m_ury);
00304 }
00305 
00306 bool BoundingBoxExtractor::getRectangle (const char* input, int &llx, int &lly, int &urx, int &ury)
00307 {
00308   if (input == NULL) return false;
00309 
00310   QString s(input);
00311   if (s.contains ("(atend)")) return false;
00312 
00313   QString s2 = s.remove("%BoundingBox:");
00314   QStringList values = QStringList::split (" ", s2.latin1());
00315   qDebug("size is %d",values.size());
00316 //  if (values.size() < 5) return false;
00317   llx = values[0].toInt();
00318   lly = values[1].toInt();
00319   urx = values[2].toInt();
00320   ury = values[3].toInt();
00321 
00322   return true;
00323 }
00324 
KDE Home | KDE Accessibility Home | Description of Access Keys