filters

parser.cpp

00001 /* This file is part of the KDE project
00002    Copyright (C) 2001 Ariya Hidayat <ariyahidayat@yahoo.de>
00003 
00004    This library is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU Library General Public
00006    License as published by the Free Software Foundation; either
00007    version 2 of the License, or (at your option) any later version.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017  * Boston, MA 02110-1301, USA.
00018 */
00019 
00020 #include "parser.h"
00021 
00022 #include <qmemarray.h>
00023 #include <qdatastream.h>
00024 #include <qfile.h>
00025 #include <qptrlist.h>
00026 #include <qstring.h>
00027 
00028 using namespace WP;
00029 
00030 static Token::Attr
00031 mapToAttr (int data)
00032 {
00033   switch (data)
00034     {
00035     case 0:
00036       return Token::ExtraLarge;
00037     case 1:
00038       return Token::VeryLarge;
00039     case 2:
00040       return Token::Large;
00041     case 3:
00042       return Token::Small;
00043     case 4:
00044       return Token::Fine;
00045     case 5:
00046       return Token::Superscript;
00047     case 6:
00048       return Token::Subscript;
00049     case 7:
00050       return Token::Outline;
00051     case 8:
00052       return Token::Italic;
00053     case 9:
00054       return Token::Shadow;
00055     case 10:
00056       return Token::Redline;
00057     case 11:
00058       return Token::DoubleUnderline;
00059     case 12:
00060       return Token::Bold;
00061     case 13:
00062       return Token::StrikedOut;
00063     case 14:
00064       return Token::Underline;
00065     case 15:
00066       return Token::SmallCaps;
00067     case 16:
00068       return Token::Blink;
00069     };
00070   return Token::None;
00071 }
00072 
00073 static Token::Align
00074 mapToAlign (int data)
00075 {
00076   switch (data)
00077     {
00078     case 0:
00079       return Token::Left;
00080     case 1:
00081       return Token::Full;
00082     case 2:
00083       return Token::Center;
00084     case 3:
00085       return Token::Right;
00086     case 4:
00087       return Token::All;
00088     };
00089   return Token::Left;
00090 }
00091 
00092 Parser::Parser ()
00093 {
00094   tokens.setAutoDelete( TRUE );
00095   packets.setAutoDelete( TRUE );
00096 }
00097 
00098 bool
00099 Parser::parse (const QString & filename)
00100 {
00101   // initialize
00102   tokens.clear();
00103   packets.clear();
00104   docTitle = docAuthor = docAbstract = "";
00105 
00106   // open input file and assign the stream
00107   QDataStream stream;
00108   QFile in (filename);
00109   if (!in.open (IO_ReadOnly))
00110     return FALSE;
00111   stream.setDevice (&in);
00112 
00113   // must be little-endian
00114   stream.setByteOrder (QDataStream::LittleEndian);
00115 
00116   // read 16-bytes document header
00117   Q_UINT8 header[16];
00118   for (int k = 0; k < 16; k++)
00119     stream >> header[k];
00120 
00121   // check first 4 bytes, must be (in hex): 0xFF, 0x57, 0x50, 0x43
00122   if ((header[0] != 0xFF) || (header[1] != 0x57) ||
00123       (header[2] != 0x50) || (header[3] != 0x43))
00124     return FALSE;
00125 
00126   // get document start
00127   unsigned m_docstart = header[4] + (header[5] << 8) +
00128     (header[6] << 16) + (header[7] << 24);
00129   if (m_docstart > stream.device ()->size ())
00130     return FALSE;
00131 
00132   // check document type
00133   unsigned product_type = header[8];
00134   unsigned file_type = header[9];
00135   if ((product_type != 1) || (file_type != 10))
00136     return FALSE;
00137 
00138   // check document format version: for WP 5.x or WP 6/7/8
00139   // major 0 means WP 5.x, otherwise WP 6/7/8
00140   unsigned major_version = header[10];
00141   unsigned minor_version = header[11];
00142   version = (major_version<<8) + minor_version;
00143 
00144   // do not accept other than 0 (WP 5.x) and 2 (WP 6/7/8)
00145   if ( (major_version != 0) && (major_version != 2) )
00146     return FALSE;
00147 
00148   // check if this is encrypted
00149   unsigned encrypt_hash = header[12] + (header[13] << 8);
00150   if ( encrypt_hash ) return FALSE;
00151 
00152   in.close ();
00153 
00154   // parse packets in prefix area
00155   if( major_version==0 ) parsePacketWP5( filename );
00156   else parsePacketWP6( filename );
00157 
00158   // parse document area
00159   if ( major_version == 0 ) parseDocWP5 ( filename, m_docstart );
00160   else parseDocWP6 ( filename, m_docstart );
00161 
00162   return TRUE;
00163 }
00164 
00165 void
00166 Parser::parsePacketWP5( const QString & filename )
00167 {
00168   // open input file and assign the stream
00169   QDataStream stream;
00170   QFile in (filename);
00171   if (!in.open (IO_ReadOnly))
00172     return;
00173   stream.setDevice (&in);
00174 
00175   unsigned filesize = stream.device()->size();
00176 
00177   // must be little-endian
00178   stream.setByteOrder (QDataStream::LittleEndian);
00179 
00180   for( unsigned next_block=16; next_block>0; )
00181   {
00182     QMemArray<Q_UINT8> buf( 10 );
00183     stream.device()->at( next_block );
00184     for( int c=0; c<10; c++ )
00185       stream >> buf.at( c );
00186 
00187     unsigned type = buf[0] + (buf[1]<<8);
00188     unsigned count = buf[2]+ (buf[3]<<8);
00189     unsigned size = buf[4] + (buf[5]<<8);
00190     next_block = buf[6] + (buf[7] << 8) + (buf[8] << 16) + (buf[9] << 24);
00191 
00192     if( type != 0xfffb ) break;
00193     if( size != 10*count ) break;
00194 
00195     for( unsigned v=0; v<count; v++ )
00196     {
00197       Q_UINT16 packet_type;
00198       Q_UINT32 packet_size, packet_pos;
00199       stream >> packet_type;
00200       stream >> packet_size;
00201       stream >> packet_pos;
00202 
00203       if( packet_type == 0 ) break;
00204       if( packet_pos <= 0 ) continue;
00205       if( packet_size < 0 ) continue;
00206       if( packet_pos + packet_size > filesize ) continue;
00207 
00208       Packet* p = new Packet;
00209       p->type = packet_type;
00210       p->pos = packet_pos;
00211       p->size = packet_size;
00212       packets.append( p );
00213     }
00214 
00215   }
00216 
00217   // load all packets
00218   for( QPtrListIterator<Packet> it(packets); it; ++it )
00219   {
00220       Packet* p = it.current();
00221       stream.device()->at( p->pos );
00222       p->data.resize( p->size );
00223       for( unsigned q = 0; q < p->size; q++ )
00224         stream >> p->data.at(q );
00225   }
00226 
00227   in.close();
00228 
00229   // process all known packets
00230   for( QPtrListIterator<Packet> i(packets); i; ++i )
00231   {
00232     Packet* p = i.current();
00233     if( p->data.size()==0 ) continue;
00234 
00235     // document sumary
00236     if( p->type== 1 )
00237     {
00238       unsigned c;
00239       QString desc, desc_type, subject, author, typist, abstract;
00240 
00241       // handle difference between WP 5.0 and WP 5.1
00242       unsigned limit = (p->data[p->data.size()-1]==0xff) ? 94 : 57;
00243 
00244       for( c=26; c<limit; c++)
00245         desc.append( p->data[c] );
00246       if( limit==94 )
00247         for( c=94; (c<p->data.size())&&(p->data[c]); c++ )
00248           desc_type.append( p->data[c] );
00249 
00250       for( c++; (c<p->data.size())&&(p->data[c]); c++)
00251         subject.append( p->data[c] );
00252       for( c++; (c<p->data.size())&&(p->data[c]); c++)
00253         author.append( p->data[c] );
00254       for( c++; (c<p->data.size())&&(p->data[c]); c++)
00255         typist.append( p->data[c] );
00256       for( c++; (c<p->data.size())&&(p->data[c]); c++)
00257         abstract.append( p->data[c] );
00258 
00259       docTitle = desc.stripWhiteSpace();
00260       docAuthor = author.stripWhiteSpace();
00261       docAbstract = abstract.stripWhiteSpace();
00262     }
00263 
00264   }
00265 }
00266 
00267 void
00268 Parser::parsePacketWP6( const QString & filename )
00269 {
00270   // open input file and assign the stream
00271   QDataStream stream;
00272   QFile in (filename);
00273   if (!in.open (IO_ReadOnly))
00274     return;
00275   stream.setDevice (&in);
00276 
00277   unsigned filesize = stream.device()->size();
00278 
00279   // must be little-endian
00280   stream.setByteOrder (QDataStream::LittleEndian);
00281 
00282   Q_UINT16 flag, count;
00283   stream.device()->at( 0x200 );
00284 
00285   stream >> flag;  // FIXME should be checked == 2 ?
00286   stream >> count;
00287 
00288   stream.device()->at( 0x20e );
00289   for( unsigned c=0; c<count; c++ )
00290   {
00291     Q_UINT8 packet_type, packet_flag;
00292     Q_UINT16 count, hidcount;
00293     Q_UINT32 packet_size, packet_pos;
00294 
00295     stream >> packet_flag;
00296     stream >> packet_type;
00297     stream >> count;
00298     stream >> hidcount;
00299     stream >> packet_size;
00300     stream >> packet_pos;
00301 
00302     if( packet_pos + packet_size > filesize ) continue;
00303 
00304     Packet* p = new Packet;
00305     p->type = packet_type;
00306     p->pos = packet_pos;
00307     p->size = packet_size;
00308     packets.append( p );
00309   }
00310 
00311   // load all packets
00312   for( QPtrListIterator<Packet> it(packets); it; ++it )
00313   {
00314     Packet* p = it.current();
00315     stream.device()->at( p->pos );
00316     p->data.resize( p->size );
00317     for( unsigned q = 0; q < p->size; q++ )
00318       stream >> p->data.at(q );
00319   }
00320 
00321   in.close();
00322 
00323   // process all known packets
00324   for( QPtrListIterator<Packet> i(packets); i; ++i )
00325   {
00326     Packet* p = i.current();
00327     if( p->data.size()==0 ) continue;
00328 
00329     // extended document summary
00330     if( p->type == 18 )
00331     {
00332       for( unsigned j=0; j<p->data.size();)
00333       {
00334         unsigned size = p->data[j] + (p->data[j+1]<<8);
00335         unsigned tag = p->data[j+2] + (p->data[j+3]<<8);
00336         QString str;
00337         for( unsigned k=0; k<size-8; k++)
00338           if(!((j+8+k)&1))
00339             if( p->data[j+8+k]==0 ) break;
00340             else str.append( p->data[j+8+k] );
00341 
00342         str = str.stripWhiteSpace();
00343 
00344         if( tag==1 ) docAbstract = str;
00345         if( tag==5 ) docAuthor = str;
00346         if( tag==17 ) docTitle = str;
00347 
00348         j+= size;
00349       }
00350     }
00351 
00352   }
00353 
00354 }
00355 
00356 void
00357 Parser::parseDocWP5( const QString & filename, int start )
00358 {
00359   // open input file and assign the stream
00360   QDataStream stream;
00361   QFile in (filename);
00362   if (!in.open (IO_ReadOnly))
00363     return;
00364   stream.setDevice (&in);
00365 
00366   // sentinel
00367   if ( start < 0 ) return;
00368   if ( start >= stream.device ()->size () ) return;
00369 
00370   // must be little-endian
00371   stream.setByteOrder (QDataStream::LittleEndian);
00372 
00373   // seek to start of document area
00374   stream.device ()->at (start);
00375 
00376   // main loop
00377   QString text;
00378   while (!stream.atEnd ())
00379     {
00380 
00381       // read one byte
00382       Q_UINT8 code;
00383       stream >> code;
00384 
00385       // ASCII printable characters ?
00386       if ((code >= 33) && (code <= 127))
00387         text.append ((char) code);
00388       else
00389         {
00390           // either fixed-length or variable-length function
00391 
00392           QMemArray < Q_UINT8 > data;
00393           QMemArray < Q_UINT16 > pid;
00394           Q_UINT8 subfunction = 0;
00395 
00396           if ((code >= 0xC0) && (code <= 0xCF))
00397             {
00398               Q_UINT8 dummy;
00399               unsigned lentab[] =
00400                 { 2, 7, 9, 1, 1, 3, 4, 5, 2, 3, 4, 4, 6, 8, 8, 10 };
00401               unsigned length = lentab[code & 0x0F];
00402               data.resize (length);
00403               for (unsigned c = 0; c < length; c++)
00404                 stream >> data.at (c);
00405               stream >> dummy;  // FIXME should be checked == code ?
00406             }
00407           else if ((code >= 0xD0) && (code <= 0xFF))
00408             {
00409               Q_UINT16 length;
00410               stream >> subfunction;
00411               stream >> length;
00412 
00413               data.resize (length);
00414               for (unsigned c = 0; (c < length) && !stream.atEnd (); c++)
00415                 stream >> data.at (c);
00416             }
00417 
00418           // NOTE: code < 32 is single-byte function
00419 
00420           // this is to simplify
00421           unsigned function = (code << 8) + subfunction;
00422 
00423           // flush previous text first
00424           if (!text.isEmpty ())
00425             {
00426               tokens.append (new Token (text));
00427               text = "";
00428             }
00429 
00430           switch (function)
00431             {
00432 
00433             case 0x2000:
00434             case 0x0d00:
00435             case 0x0b00:
00436               tokens.append (new Token (Token::SoftSpace));
00437               break;
00438 
00439             case 0xa900:
00440               tokens.append (new Token (Token::HardHyphen));
00441               break;
00442 
00443             case 0x0a00:
00444             case 0x9900:
00445             case 0x8c00:
00446               tokens.append (new Token (Token::HardReturn));
00447               break;
00448 
00449             case 0xc000:
00450               tokens.append (new Token (Token::ExtChar, data[1], data[0]));
00451               break;
00452 
00453             case 0xc300:
00454               tokens.append (new Token (Token::AttrOn, mapToAttr (data[0])));
00455               break;
00456 
00457             case 0xc400:
00458               tokens.append (new Token (Token::AttrOff, mapToAttr (data[0])));
00459               break;
00460 
00461             case 0xd001:
00462               tokens.append (new Token (Token::LeftMargin,
00463                                         data[4] + (data[5] << 8)));
00464               tokens.append (new Token (Token::RightMargin,
00465                                         data[6] + (data[7] << 8)));
00466               break;
00467 
00468             case 0xd005:
00469               tokens.append (new Token (Token::TopMargin,
00470                                         data[4] + (data[5] << 8)));
00471               tokens.append (new Token (Token::BottomMargin,
00472                                         data[6] + (data[7] << 8)));
00473               break;
00474 
00475               // NOTE we store linespace as 1/65536th, i.e 655536 means single space
00476               // on WP 5.x, (data[3]<<8)+data[2] is in 1/256th, so make 8-bit adjustment
00477             case 0xd002:
00478               tokens.append (new Token (Token::Linespace,
00479                                         (data[2] << 8) + (data[3] << 16)));
00480               break;
00481 
00482             case 0xd006:
00483               tokens.append (new Token (Token::Justification,
00484                                         mapToAlign (data[1])));
00485               break;
00486 
00487             case 0xd100:
00488               tokens.append (new Token (Token::FontColor,
00489                                         data[3], data[4], data[5]));
00490               break;
00491 
00492             case 0xd101:
00493               tokens.append (new Token (Token::FontSize,
00494                                         (data[29] << 8) + data[28]));
00495               break;
00496 
00497             case 0x8300:
00498               tokens.append (new Token (Token::SoftReturn));
00499               // FIXME this is actually Soft End of Center/Align
00500               break;
00501 
00502             case 0xc500:
00503               // TODO block protect on/off
00504               break;
00505 
00506             case 0xd311:
00507               // TODO set language [Lang]
00508               break;
00509 
00510             case 0xd301:
00511               // TODO set underline mode
00512               break;
00513 
00514             case 0xd700:
00515               tokens.append (new Token (Token::MarkTocStart));
00516               break;
00517 
00518             case 0xd701:
00519               tokens.append (new Token (Token::MarkTocEnd));
00520               break;
00521 
00522             case 0xdb00:
00523               // TODO begin style on
00524               break;
00525 
00526             case 0xdb01:
00527               // TODO end style on
00528               break;
00529 
00530             case 0xdb02:
00531               // TODO global on
00532               break;
00533 
00534             case 0xdb03:
00535               // TODO style off
00536               break;
00537 
00538             case 0xdc00:
00539               tokens.append (new Token (Token::TableCell));
00540               break;
00541 
00542             case 0xdc01:
00543               tokens.append (new Token (Token::TableRow));
00544               break;
00545 
00546             case 0xdc02:
00547               tokens.append (new Token (Token::TableOff));
00548               break;
00549 
00550             default:
00551               tokens.append (new Token (Token::Function, function));
00552             }
00553 
00554         }
00555     }
00556 
00557   // flush left-over text if any
00558   if (!text.isEmpty ())
00559     {
00560       tokens.append (new Token (text));
00561       text = "";
00562     }
00563 }
00564 
00565 void
00566 Parser::parseDocWP6 (const QString & filename, int start)
00567 {
00568   // open input file and assign the stream
00569   QDataStream stream;
00570   QFile in (filename);
00571   if (!in.open (IO_ReadOnly))
00572     return;
00573   stream.setDevice (&in);
00574 
00575   // sentinel
00576   if ( start < 0 ) return;
00577   if ( start >= stream.device ()->size () ) return;
00578 
00579   // must be little-endian
00580   stream.setByteOrder (QDataStream::LittleEndian);
00581 
00582   // seek to start of document area
00583   stream.device ()->at (start);
00584 
00585   // main loop
00586   QString text;
00587   while (!stream.atEnd ())
00588     {
00589 
00590       // read one byte
00591       Q_UINT8 code;
00592       stream >> code;
00593 
00594       // ASCII printable characters ?
00595       if ((code >= 33) && (code <= 127))
00596         text.append ((char) code);
00597 
00598       // WP default extended international characters ?
00599       else if ((code >= 1) && (code <= 32))
00600         {
00601           // flush previous text first
00602           if (!text.isEmpty ())
00603             {
00604               tokens.append (new Token (text));
00605               text = "";
00606             }
00607 
00608           int xlate[] = {
00609             35, 34, 7, 36, 31, 30, 27, 33, 29, 77, 76, 39, 38, 45, 41, 40,
00610             47, 43, 49, 57, 56, 81, 80, 83, 82, 63, 62, 71, 70, 67, 73, 23
00611           };
00612           tokens.append (new Token (Token::ExtChar, 1, xlate[code - 1]));
00613         }
00614 
00615 
00616       else
00617         {
00618           // either fixed-length or variable-length function
00619 
00620           QMemArray < Q_UINT8 > data;
00621           QMemArray < Q_UINT16 > pid;
00622           Q_UINT8 subfunction = 0;
00623 
00624           if ((code >= 0xF0) && (code <= 0xFF))
00625             {
00626               Q_UINT8 dummy;
00627               unsigned lentab[] =
00628                 { 2, 3, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 6, 6, 0 };
00629               unsigned length = lentab[code & 0x0F];
00630               data.resize (length);
00631               for (unsigned c = 0; c < length; c++)
00632                 stream >> data.at (c);
00633               stream >> dummy;  // FIXME should be checked == code ?
00634             }
00635           else if ((code >= 0xD0) && (code <= 0xFF))
00636             {
00637               Q_UINT16 length;
00638               Q_UINT8 flag = 0, numpid = 0;
00639               stream >> subfunction;
00640               stream >> length;
00641               stream >> flag;
00642               length -= 10;
00643 
00644               if (flag & 0x80)
00645                 {
00646                   stream >> numpid;
00647                   pid.resize (numpid);
00648                   for (unsigned n = 0; n < numpid; n++)
00649                     stream >> pid.at (n);
00650                   length = length - 1 - numpid * 2;
00651                 }
00652 
00653               Q_UINT16 nondel;
00654               stream >> nondel;
00655 
00656               data.resize (length);
00657               for (int c = 0; length && !stream.atEnd (); length--, c++)
00658                 stream >> data[c];
00659 
00660               Q_UINT16 dummy_length;
00661               Q_UINT8 dummy_code;
00662               stream >> dummy_length;   // FIXME should be checked == length ?
00663               stream >> dummy_code;     // FIXME should be checked == code ?
00664 
00665             }
00666 
00667           // this is to simplify
00668           unsigned function = (code << 8) + subfunction;
00669 
00670           // flush previous text first
00671           if (!text.isEmpty ())
00672             {
00673               tokens.append (new Token (text));
00674               text = "";
00675             }
00676 
00677           int val;
00678 
00679           switch (function)
00680             {
00681 
00682             case 0x8000:
00683               tokens.append (new Token (Token::SoftSpace));
00684               break;
00685 
00686             case 0x8100:
00687               tokens.append (new Token (Token::HardSpace));
00688               break;
00689 
00690             case 0x8400:
00691               tokens.append (new Token (Token::HardHyphen));
00692               break;
00693 
00694             case 0x8700:
00695               tokens.append (new Token (Token::DormantHardReturn));
00696               break;
00697 
00698             case 0x8800:
00699               tokens.append (new Token (Token::SoftReturn));
00700               // FIXME this is actually Soft End of Center/Align
00701               break;
00702 
00703             case 0xcc00:
00704               tokens.append (new Token (Token::HardReturn));
00705               break;
00706 
00707             case 0xd001:
00708             case 0xd014:
00709               tokens.append (new Token (Token::SoftReturn));
00710               break;
00711 
00712             case 0xd004:
00713               tokens.append (new Token (Token::HardReturn));
00714               break;
00715 
00716             case 0xd00a:
00717               tokens.append (new Token (Token::TableCell));
00718               break;
00719 
00720             case 0xd00b:
00721             case 0xd00c:
00722             case 0xd00d:
00723             case 0xd00e:
00724             case 0xd00f:
00725             case 0xd010:
00726               tokens.append (new Token (Token::TableRow));
00727               break;
00728 
00729             case 0xd011:
00730             case 0xd012:
00731             case 0xd013:
00732               tokens.append (new Token (Token::TableOff));
00733               break;
00734 
00735             case 0xd100:
00736               tokens.append (new Token (Token::TopMargin,
00737                                         data[0] + (data[1] << 8)));
00738               break;
00739 
00740             case 0xd101:
00741               tokens.append (new Token (Token::BottomMargin,
00742                                         data[0] + (data[1] << 8)));
00743               break;
00744 
00745             case 0xd200:
00746               tokens.append (new Token (Token::LeftMargin,
00747                                         data[0] + (data[1] << 8)));
00748               break;
00749 
00750             case 0xd201:
00751               tokens.append (new Token (Token::RightMargin,
00752                                         data[0] + (data[1] << 8)));
00753               break;
00754 
00755             case 0xd301:
00756               tokens.append (new Token (Token::Linespace,
00757                                         (data[2] << 16) + (data[1] << 8) +
00758                                         data[0]));
00759               break;
00760 
00761             case 0xd304:
00762               handleTab (data);
00763               break;
00764 
00765             case 0xd305:
00766               tokens.append (new Token (Token::Justification,
00767                                         mapToAlign (data[0])));
00768               break;
00769 
00770             case 0xd30a:
00771               // TODO spacing after paragraph
00772               break;
00773 
00774             case 0xd30b:
00775               tokens.append (new Token (Token::ParagraphIndent,
00776                                         data[0] + (data[1] << 8)));
00777               break;
00778 
00779             case 0xd30c:
00780               val = data[0] + (data[1] << 8);
00781               if (val > 32767)
00782                 val = val - 65536;
00783               tokens.append (new Token (Token::LeftMarginAdjust, val));
00784               break;
00785 
00786             case 0xd30d:
00787               val = data[0] + (data[1] << 8);
00788               if (val > 32767)
00789                 val = val - 65536;
00790               tokens.append (new Token (Token::RightMarginAdjust, val));
00791               break;
00792 
00793             case 0xd402:
00794             case 0xd403:
00795               // TODO underline spaces ? tabs ?
00796               break;
00797 
00798             case 0xd40a:
00799               tokens.append (new Token (Token::MarkTocStart));
00800               break;
00801 
00802             case 0xd40b:
00803               tokens.append (new Token (Token::MarkTocEnd));
00804               break;
00805 
00806             case 0xd418:
00807               tokens.append (new Token (Token::FontColor,
00808                                         data[0], data[1], data[2]));
00809               break;
00810 
00811             case 0xd41b:
00812               tokens.append (new Token (Token::FontSize,
00813                                         (data[1] << 8) + data[0]));
00814               break;
00815 
00816             case 0xd41c:
00817               // TODO set language [Lang]
00818               break;
00819 
00820             case 0xd41a:
00821               {
00822                 unsigned fontpid = pid[0];
00823                 if( fontpid == 0 || fontpid > packets.count() ) continue;
00824                 Packet* p = packets.at( fontpid-1 );
00825                 if( p->type==85 )
00826                 {
00827                   // read the typeface (stored as WP word-string)
00828                   QString typeface;
00829                   unsigned strlen = p->data[22];
00830                   for( unsigned i=24; strlen && (i<p->data.size()); i+=2, strlen-=2 )
00831                     if( p->data[i]) typeface.append( p->data[i] );
00832                     else break;
00833                   typeface = typeface.stripWhiteSpace();
00834 
00835                   // hack: get rid of "Regular" as font name suffix
00836                   QString suffix = "Regular";
00837                   if( typeface.right( suffix.length() ) == suffix )
00838                      typeface = typeface.left( typeface.length() -  suffix.length() ).stripWhiteSpace();
00839 
00840                   tokens.append( new Token( Token::FontFace, typeface ) );
00841                 }
00842               }
00843               break;
00844 
00845             case 0xd426:
00846             case 0xd427:
00847               // TODO block protect on/off
00848               break;
00849 
00850             case 0xd42a:
00851               tokens.append (new Token (Token::TableOn));
00852               break;
00853 
00854             case 0xd42b:
00855               tokens.append (new Token (Token::TableEnd));
00856               break;
00857 
00858             case 0xd42c:
00859               tokens.append (new Token (Token::TableColumn));
00860               break;
00861 
00862             case 0xd45f:
00863               // FIXME check what it does
00864               // this always appears within table cell
00865               break;
00866 
00867             case 0xdd0a:
00868             case 0xdd0b:
00869               // TODO global on/off
00870               break;
00871 
00872             case 0xf000:
00873               tokens.append (new Token (Token::ExtChar, data[1], data[0]));
00874               break;
00875 
00876             case 0xf200:
00877               tokens.append (new Token (Token::AttrOn, mapToAttr (data[0])));
00878               break;
00879 
00880             case 0xf300:
00881               tokens.append (new Token (Token::AttrOff, mapToAttr (data[0])));
00882               break;
00883 
00884             case 0xdd00:
00885             case 0xdd01:
00886               // TODO style begin on/off
00887               break;
00888 
00889             case 0xdd02:
00890             case 0xdd03:
00891               // TODO style end on/off
00892               break;
00893 
00894             case 0xe011:
00895               // TODO soft left-tab
00896               break;
00897 
00898             case 0xe040:
00899               // TODO center on margin
00900               break;
00901 
00902             case 0xe080:
00903               tokens.append (new Token (Token::TabHardFlushRight));
00904               break;
00905 
00906             case 0xd111:
00907               // FIXME unknown functions
00908               break;
00909 
00910             case 0xfb00:
00911               // text highlight (or background color)
00912               tokens.append (new Token (Token::HighlightOn,
00913                                         data[0], data[1], data[2]));
00914               break;
00915 
00916             case 0xfc00:
00917               // turn off highlight, data is last highlight color
00918               tokens.append (new Token (Token::HighlightOff,
00919                                         data[0], data[1], data[2]));
00920               break;
00921 
00922             default:
00923               tokens.append (new Token (Token::Function, function));
00924               break;
00925 
00926             };
00927 
00928 
00929         }
00930 
00931     }
00932 
00933   // flush left-over text if any
00934   if (!text.isEmpty ())
00935     {
00936       tokens.append (new Token (text));
00937       text = "";
00938     }
00939 }
00940 
00941 
00942 static Token::TabType
00943 mapToTabType (int t)
00944 {
00945   switch (t)
00946     {
00947     case 0:
00948       return Token::LeftTab;
00949     case 1:
00950       return Token::CenterTab;
00951     case 2:
00952       return Token::RightTab;
00953     case 3:
00954       return Token::DecimalTab;
00955     case 4:
00956       return Token::VerticalTab;
00957     }
00958   return Token::LeftTab;
00959 }
00960 
00961 void
00962 Parser::handleTab (QMemArray < Q_UINT8 > data)
00963 {
00964   QPtrList < Token::Tab > tabs;
00965   bool relative = data[0];
00966   int adjust = data[1] + (data[2] << 8);
00967   int num = data[3];
00968   int p = 4;
00969   int tabtype = 0;
00970   int tabpos = 0;
00971 
00972   for (int i = 0; i < num; i++)
00973     {
00974       int tt = data[p];
00975 
00976       if (tt & 0x80)
00977         {
00978           int rep = tt & 0x7F;
00979           p++;
00980           for (int c = 0; c < rep; c++)
00981             {
00982               int diff = data[p] + (data[p + 1] << 8);
00983               tabs.
00984                 append (new Token::
00985                         Tab (mapToTabType (tabtype), tabpos + diff));
00986               p += 2;
00987             }
00988         }
00989       else
00990         {
00991           tabtype = tt & 0x7f;
00992           tabpos = data[p + 1] + (data[p + 2] << 8);
00993           tabs.append (new Token::Tab (mapToTabType (tabtype), tabpos));
00994           p += 3;
00995         }
00996 
00997     }
00998 
00999 
01000   if (tabs.count ())
01001     tokens.append (new Token (tabs));
01002 
01003 }
01004 
01005 // the following tables maps WP charset/charcode to unicode character
01006 
01007 // WP multinational characters (charset 1)
01008 static unsigned multinational_map[] = {
01009   0x0300, 0x00b7, 0x0303, 0x0302, 0x0335, 0x0338, 0x0301, 0x0308,
01010   0x0304, 0x0313, 0x0315, 0x02bc, 0x0326, 0x0315, 0x030a, 0x0307,
01011   0x030b, 0x0327, 0x0328, 0x030c, 0x0337, 0x0305, 0x0306, 0x00df,
01012   0x0138, 0xf801, 0x00c1, 0x00e1, 0x00c2, 0x00e2, 0x00c4, 0x00e4,
01013   0x00c0, 0x00e0, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00c7, 0x00e7,
01014   0x00c9, 0x00e9, 0x00ca, 0x00ea, 0x00cb, 0x00eb, 0x00c8, 0x00e8,
01015   0x00cd, 0x00ed, 0x00ce, 0x00ee, 0x00cf, 0x00ef, 0x00cc, 0x00ec,
01016   0x00d1, 0x00f1, 0x00d3, 0x00f3, 0x00d4, 0x00f4, 0x00d6, 0x00f6,
01017   0x00d2, 0x00f2, 0x00da, 0x00fa, 0x00db, 0x00fb, 0x00dc, 0x00fc,
01018   0x00d9, 0x00f9, 0x0178, 0x00ff, 0x00c3, 0x00e3, 0x0110, 0x0111,
01019   0x00d8, 0x00f8, 0x00d5, 0x00f5, 0x00dd, 0x00fd, 0x00d0, 0x00f0,
01020   0x00de, 0x00fe, 0x0102, 0x0103, 0x0100, 0x0101, 0x0104, 0x0105,
01021   0x0106, 0x0107, 0x010c, 0x010d, 0x0108, 0x0109, 0x010a, 0x010b,
01022   0x010e, 0x010f, 0x011a, 0x011b, 0x0116, 0x0117, 0x0112, 0x0113,
01023   0x0118, 0x0119, 0x0047, 0x0067, 0x011e, 0x011f, 0x0047, 0x0067,
01024   0x0122, 0x0123, 0x011c, 0x011d, 0x0120, 0x0121, 0x0124, 0x0125,
01025   0x0126, 0x0127, 0x0130, 0x0069, 0x012a, 0x012b, 0x012e, 0x012f,
01026   0x0128, 0x0129, 0x0132, 0x0133, 0x0134, 0x0135, 0x0136, 0x0137,
01027   0x0139, 0x013a, 0x013d, 0x013e, 0x013b, 0x013c, 0x013f, 0x0140,
01028   0x0141, 0x0142, 0x0143, 0x0144, 0xf802, 0x0149, 0x0147, 0x0148,
01029   0x0145, 0x0146, 0x0150, 0x0151, 0x014c, 0x014d, 0x0152, 0x0153,
01030   0x0154, 0x0155, 0x0158, 0x0159, 0x0156, 0x0157, 0x015a, 0x015b,
01031   0x0160, 0x0161, 0x015e, 0x015f, 0x015c, 0x015d, 0x0164, 0x0165,
01032   0x0162, 0x0163, 0x0166, 0x0167, 0x016c, 0x016d, 0x0170, 0x0171,
01033   0x016a, 0x016b, 0x0172, 0x0173, 0x016e, 0x016f, 0x0168, 0x0169,
01034   0x0174, 0x0175, 0x0176, 0x0177, 0x0179, 0x017a, 0x017d, 0x017e,
01035   0x017b, 0x017c, 0x014a, 0x014b, 0xf000, 0xf001, 0xf002, 0xf003,
01036   0xf004, 0xf005, 0xf006, 0xf007, 0xf008, 0xf009, 0xf00a, 0xf00b,
01037   0xf00c, 0xf00d, 0xf00e, 0xf00f, 0x010e, 0x010f, 0x01a0, 0x01a1,
01038   0x01af, 0x01b0, 0x0114, 0x0115, 0x012c, 0x012d, 0x0049, 0x0131,
01039   0x014e, 0x014f
01040 };
01041 
01042 // WP phonetic symbol (charset 2)
01043 static unsigned phonetic_map[] = {
01044   0x02b9, 0x02ba, 0x02bb, 0xf813, 0x02bd, 0x02bc, 0xf814, 0x02be,
01045   0x02bf, 0x0310, 0x02d0, 0x02d1, 0x0306, 0x032e, 0x0329, 0x02c8,
01046   0x02cc, 0x02c9, 0x02ca, 0x02cb, 0x02cd, 0x02ce, 0x02cf, 0x02c6,
01047   0x02c7, 0x02dc, 0x0325, 0x02da, 0x032d, 0x032c, 0x0323, 0x0308,
01048   0x0324, 0x031c, 0x031d, 0x031e, 0x031f, 0x0320, 0x0321, 0x0322,
01049   0x032a, 0x032b, 0x02d2, 0x02d3, 0xf815, 0xf816, 0x005f, 0x2017,
01050   0x033e, 0x02db, 0x0327, 0x0233, 0x030d, 0x02b0, 0x02b6, 0x0250,
01051   0x0251, 0x0252, 0x0253, 0x0299, 0x0254, 0x0255, 0x0297, 0x0256,
01052   0x0257, 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x029a,
01053   0x025e, 0x025f, 0x0278, 0x0261, 0x0260, 0x0262, 0x029b, 0x0263,
01054   0x0264, 0x0265, 0x0266, 0x0267, 0x029c, 0x0268, 0x026a, 0x0269,
01055   0x029d, 0x029e, 0x026b, 0x026c, 0x026d, 0x029f, 0x026e, 0x028e,
01056   0x026f, 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0276, 0x0277,
01057   0x02a0, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
01058   0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
01059   0x0288, 0x0275, 0x0289, 0x028a, 0x028c, 0x028b, 0x028d, 0x03c7,
01060   0x028f, 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296,
01061   0x02a1, 0x02a2, 0x0298, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
01062   0x02a8
01063 };
01064 
01065 // WP typographic symbol (charset 4)
01066 static unsigned typographic_map[] = {
01067   0x25cf, 0x25cb, 0x25a0, 0x2022, 0xf817, 0x00b6, 0x00a7, 0x00a1,
01068   0x00bf, 0x00ab, 0x00bb, 0x00a3, 0x00a5, 0x20a7, 0x0192, 0x00aa,
01069   0x00ba, 0x00bd, 0x00bc, 0x00a2, 0x00b2, 0x207f, 0x00ae, 0x00a9,
01070   0x00a4, 0x00be, 0x00b3, 0x201b, 0x2019, 0x2018, 0x201f, 0x201d,
01071   0x201c, 0x2013, 0x2014, 0x2039, 0x203a, 0x25cb, 0x25a1, 0x2020,
01072   0x2021, 0x2122, 0x2120, 0x211e, 0x25cf, 0x25e6, 0x25a0, 0x25aa,
01073   0x25a1, 0x25ab, 0x2012, 0xfb00, 0xfb03, 0xfb04, 0xfb01, 0xfb02,
01074   0x2026, 0x0024, 0x20a3, 0x20a2, 0x20a0, 0x20a4, 0x201a, 0x201e,
01075   0x2153, 0x2154, 0x215b, 0x215c, 0x215d, 0x215e, 0x24c2, 0x24c5,
01076   0x20ac, 0x2105, 0x2106, 0x2030, 0x2116, 0xf818, 0x00b9, 0x2409,
01077   0x240c, 0x240d, 0x240a, 0x2424, 0x240b, 0xf819, 0x20a9, 0x20a6,
01078   0x20a8, 0xf81a, 0xf81b, 0xf81c, 0xf81d, 0xf81e, 0xf81f, 0xf820,
01079   0xf821, 0xf822, 0xf823, 0xf824, 0xf825, 0xf826
01080 };
01081 
01082 // WP iconic symbol (charset 5)
01083 static unsigned iconic_map[] = {
01084   0x2661, 0x2662, 0x2667, 0x2664, 0x2642, 0x2640, 0x263c, 0x263a,
01085   0x263b, 0x266a, 0x266c, 0x25ac, 0x2302, 0x203c, 0x221a, 0x21a8,
01086   0x2310, 0x2319, 0x25d8, 0x25d9, 0x21b5, 0x2104, 0x261c, 0x2007,
01087   0x2610, 0x2612, 0x2639, 0x266f, 0x266d, 0x266e, 0x260e, 0x231a,
01088   0x231b, 0x2701, 0x2702, 0x2703, 0x2704, 0x260e, 0x2706, 0x2707,
01089   0x2708, 0x2709, 0x261b, 0x261e, 0x270c, 0x270d, 0x270e, 0x270f,
01090   0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717,
01091   0x2718, 0x2719, 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f,
01092   0x2720, 0x2721, 0x2722, 0x2723, 0x2724, 0x2725, 0x2726, 0x2727,
01093   0x2605, 0x2606, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 0x272f,
01094   0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737,
01095   0x2738, 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f,
01096   0x2740, 0x2741, 0x2742, 0x2743, 0x2744, 0x2745, 0x2746, 0x2747,
01097   0x2748, 0x2749, 0x274a, 0x274b, 0x25cf, 0x274d, 0x25a0, 0x274f,
01098   0x2750, 0x2751, 0x2752, 0x25b2, 0x25bc, 0x25c6, 0x2756, 0x25d7,
01099   0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 0x275d, 0x275e, 0x2036,
01100   0x2033, 0xf827, 0xf828, 0xf829, 0xf82a, 0x2329, 0x232a, 0x005b,
01101   0x005d, 0xf82b, 0xf82c, 0xf82d, 0xf82e, 0xf82f, 0xf830, 0xf831,
01102   0x2190, 0xf832, 0xf833, 0xf834, 0xf835, 0xf836, 0x21e8, 0x21e6,
01103   0x2794, 0xf838, 0xf839, 0xf83a, 0xf83b, 0xf83c, 0x25d6, 0xf83d,
01104   0xf83e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767,
01105   0x2663, 0x2666, 0x2665, 0x2660, 0x2780, 0x2781, 0x2782, 0x2783,
01106   0x2784, 0x2785, 0x2786, 0x2787, 0x2788, 0x2789, 0x2776, 0x2777,
01107   0x2778, 0x2779, 0x277a, 0x277b, 0x277c, 0x277d, 0x277e, 0x277f,
01108   0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786, 0x2787,
01109   0x2788, 0x2789, 0x278a, 0x278b, 0x278c, 0x278d, 0x278e, 0x278f,
01110   0x2790, 0x2791, 0x2792, 0x2793, 0x2794, 0x2192, 0x2194, 0x2195,
01111   0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 0x279f,
01112   0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7,
01113   0x27a8, 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af,
01114   0xf83f, 0x27b1, 0x27b2, 0x27b3, 0x27b4, 0x27b5, 0x27b6, 0x27b7,
01115   0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 0x27be
01116 };
01117 
01118 // WP math/scientific (charset 6)
01119 static unsigned math_map[] = {
01120   0x2212, 0x00b1, 0x2264, 0x2265, 0x221d, 0x01c0, 0x2215, 0x2216,
01121   0x00f7, 0x2223, 0x2329, 0x232a, 0x223c, 0x2248, 0x2261, 0x2208,
01122   0x2229, 0x2225, 0x2211, 0x221e, 0x00ac, 0x2192, 0x2190, 0x2191,
01123   0x2193, 0x2194, 0x2195, 0x25b8, 0x25c2, 0x25b4, 0x25be, 0x22c5,
01124   0xf850, 0x2218, 0x2219, 0x212b, 0x00b0, 0x00b5, 0x203e, 0x00d7,
01125   0x222b, 0x220f, 0x2213, 0x2207, 0x2202, 0x02b9, 0x02ba, 0x2192,
01126   0x212f, 0x2113, 0x210f, 0x2111, 0x211c, 0x2118, 0x21c4, 0x21c6,
01127   0x21d2, 0x21d0, 0x21d1, 0x21d3, 0x21d4, 0x21d5, 0x2197, 0x2198,
01128   0x2196, 0x2199, 0x222a, 0x2282, 0x2283, 0x2286, 0x2287, 0x220d,
01129   0x2205, 0x2308, 0x2309, 0x230a, 0x230b, 0x226a, 0x226b, 0x2220,
01130   0x2297, 0x2295, 0x2296, 0xf851, 0x2299, 0x2227, 0x2228, 0x22bb,
01131   0x22a4, 0x22a5, 0x2312, 0x22a2, 0x22a3, 0x25a1, 0x25a0, 0x25ca,
01132   0xf852, 0xf853, 0xf854, 0x2260, 0x2262, 0x2235, 0x2234, 0x2237,
01133   0x222e, 0x2112, 0x212d, 0x2128, 0x2118, 0x20dd, 0xf855, 0x25c7,
01134   0x22c6, 0x2034, 0x2210, 0x2243, 0x2245, 0x227a, 0x227c, 0x227b,
01135   0x227d, 0x2203, 0x2200, 0x22d8, 0x22d9, 0x228e, 0x228a, 0x228b,
01136   0x2293, 0x2294, 0x228f, 0x2291, 0x22e4, 0x2290, 0x2292, 0x22e5,
01137   0x25b3, 0x25bd, 0x25c3, 0x25b9, 0x22c8, 0x2323, 0x2322, 0xf856,
01138   0x219d, 0x21a9, 0x21aa, 0x21a3, 0x21bc, 0x21bd, 0x21c0, 0x21c1,
01139   0x21cc, 0x21cb, 0x21bf, 0x21be, 0x21c3, 0x21c2, 0x21c9, 0x21c7,
01140   0x22d3, 0x22d2, 0x22d0, 0x22d1, 0x229a, 0x229b, 0x229d, 0x2127,
01141   0x2221, 0x2222, 0x25c3, 0x25b9, 0x25b5, 0x25bf, 0x2214, 0x2250,
01142   0x2252, 0x2253, 0x224e, 0x224d, 0x22a8, 0xf857, 0x226c, 0x0285,
01143   0x2605, 0x226e, 0x2270, 0x226f, 0x2271, 0x2241, 0x2244, 0x2247,
01144   0x2249, 0x2280, 0x22e0, 0x2281, 0x22e1, 0x2284, 0x2285, 0x2288,
01145   0x2289, 0xf858, 0xf859, 0x22e2, 0x22e3, 0x2226, 0x2224, 0x226d,
01146   0x2204, 0x2209, 0xf85a, 0x2130, 0x2131, 0x2102, 0xf85b, 0x2115,
01147   0x211d, 0x225f, 0x221f, 0x220b, 0x22ef, 0xf85c, 0x22ee, 0x22f1,
01148   0xf85d, 0x20e1, 0x002b, 0x002d, 0x003d, 0x002a, 0xf85e, 0xf85f,
01149   0xf860, 0x210c, 0x2118, 0x2272, 0x2273, 0xf861
01150 };
01151 
01152 // WP math/scientific extended (charset 6)
01153 static unsigned mathext_map[] = {
01154   0x2320, 0x2321, 0xf702, 0xf703, 0x221a, 0xf705, 0xf706, 0xf707,
01155   0xf708, 0xf709, 0xf70a, 0xf70b, 0xf70c, 0xf70d, 0xf70e, 0xf70f,
01156   0xf710, 0xf711, 0xf712, 0xf713, 0xf714, 0xf715, 0xf716, 0xf717,
01157   0xf718, 0xf719, 0xf71a, 0xf71b, 0xf71c, 0xf71d, 0xf71e, 0xf71f,
01158   0xf720, 0xf721, 0xf722, 0xf723, 0xf724, 0xf725, 0xf726, 0xf727,
01159   0xf728, 0xf729, 0xf72a, 0xf72b, 0xf72c, 0xf72d, 0xf72e, 0xf72f,
01160   0xf730, 0xf731, 0xf732, 0xf733, 0xf734, 0xf735, 0xf736, 0xf737,
01161   0xf738, 0xf739, 0xf73a, 0xf73b, 0xf73c, 0xf73d, 0xf73e, 0xf73f,
01162   0xf740, 0xf741, 0xf742, 0xf743, 0xf744, 0xf745, 0xf746, 0xf747,
01163   0xf748, 0xf749, 0xf74a, 0xf74b, 0xf74c, 0xf74d, 0xf74e, 0xf74f,
01164   0xf750, 0xf751, 0xf752, 0xf753, 0xf754, 0xf755, 0xf756, 0xf757,
01165   0xf758, 0xf759, 0xf75a, 0xf75b, 0xf75c, 0xf75d, 0xf75e, 0xf75f,
01166   0xf760, 0xf761, 0xf762, 0xf763, 0xf764, 0xf765, 0xf766, 0xf767,
01167   0xf768, 0xf769, 0xf76a, 0xf76b, 0xf76c, 0xf76d, 0xf76e, 0xf76f,
01168   0xf770, 0xf771, 0xf772, 0xf773, 0xf774, 0xf775, 0xf776, 0xf777,
01169   0xf778, 0xf779, 0x20aa, 0xf77b, 0xf77c, 0xf77d, 0xf77e, 0xf77f,
01170   0xf780, 0xf781, 0xf782, 0xf783, 0xf784, 0xf785, 0xf786, 0xf787,
01171   0xf788, 0xf789, 0xf78a, 0xf78b, 0xf78c, 0xf78d, 0xf78e, 0xf78f,
01172   0xf790, 0xf791, 0xf792, 0xf793, 0xf794, 0xf795, 0xf796, 0xf797,
01173   0xf798, 0xf799, 0xf79a, 0xf79b, 0xf79c, 0xf79d, 0xf79e, 0xf79f,
01174   0xf7a0, 0xf7a1, 0xf7a2, 0xf7a3, 0xf7a4, 0xf7a5, 0xf7a6, 0xf7a7,
01175   0xf7a8, 0xf7a9, 0xf7aa, 0xf7ab, 0xf7ac, 0xf7ad, 0xf7ae, 0xf7af,
01176   0xf7b0, 0xf7b1, 0xf7b2, 0xf7b3, 0xf7b4, 0xf7b5, 0xf7b6, 0xf7b7,
01177   0xf7b8, 0xf7b9, 0xf7ba, 0xf7bb, 0xf7bc, 0xf7bd, 0xf7be, 0xf7bf,
01178   0xf7c0, 0xf7c1, 0xf7c2, 0xf7c3, 0xf7c4, 0xf7c5, 0xf7c6, 0xf7c7
01179 };
01180 
01181 // WP greek (charset 8)
01182 static unsigned int greek_map[] = {
01183   0x0391, 0x03b1, 0x0392, 0x03b2, 0x0392, 0x03d0, 0x0393, 0x03b3,
01184   0x0394, 0x03b4, 0x0395, 0x03b5, 0x0396, 0x03b6, 0x0397, 0x03b7,
01185   0x0398, 0x03b8, 0x0399, 0x03b9, 0x039a, 0x03ba, 0x039b, 0x03bb,
01186   0x039c, 0x03bc, 0x039d, 0x03bd, 0x039e, 0x03be, 0x039f, 0x03bf,
01187   0x03a0, 0x03c0, 0x03a1, 0x03c1, 0x03a3, 0x03c3, 0x03a3, 0x03c2,
01188   0x03a4, 0x03c4, 0x03a5, 0x03c5, 0x03a6, 0x03c6, 0x03a7, 0x03c7,
01189   0x03a8, 0x03c8, 0x03a9, 0x03c9, 0x0386, 0x03ac, 0x0388, 0x03ad,
01190   0x0389, 0x03ae, 0x038a, 0x03af, 0x03aa, 0x03ca, 0x038c, 0x03cc,
01191   0x038e, 0x03cd, 0x03ab, 0x03cb, 0x038f, 0x03ce, 0x03b5, 0x03d1,
01192   0x03f0, 0x03d6, 0x03f1, 0x03db, 0x03d2, 0x03d5, 0x03d6, 0x03d7,
01193   0x00b7, 0x0374, 0x0375, 0x0301, 0x0308, 0xf216, 0xf217, 0x0300,
01194   0x0311, 0x0313, 0x0314, 0x0345, 0x1fce, 0x1fde, 0x1fcd, 0x1fdd,
01195   0xf200, 0xf201, 0xf022, 0xf021, 0xf202, 0xf203, 0xf204, 0xf300,
01196   0xf301, 0xf302, 0xf303, 0xf304, 0xf305, 0x1f70, 0xf100, 0x1fb3,
01197   0x1fb4, 0x1fb2, 0xf205, 0x1f00, 0x1f04, 0x1f02, 0xf206, 0x1f80,
01198   0x1f84, 0x1f82, 0xf306, 0x1f01, 0x1f05, 0x1f03, 0xf207, 0x1f81,
01199   0x1f85, 0x1f83, 0xf307, 0x1f72, 0x1f10, 0x1f14, 0x1f12, 0x1f11,
01200   0x1f15, 0x1f13, 0x1f74, 0xf101, 0x1fc3, 0x1fc4, 0x1fc2, 0xf208,
01201   0x1f20, 0x1f24, 0x1f22, 0xf209, 0x1f90, 0x1f94, 0x1f92, 0xf308,
01202   0x1f21, 0x1f25, 0x1f23, 0xf20a, 0x1f91, 0x1f95, 0x1f93, 0xf309,
01203   0x1f76, 0xf102, 0xf20b, 0xf20c, 0x1f30, 0x1f34, 0x1f32, 0xf20d,
01204   0x1f31, 0x1f35, 0x1f33, 0xf20e, 0x1f78, 0x1f40, 0x1f44, 0x1f42,
01205   0x1f41, 0x1f45, 0x1f43, 0x1fe5, 0x1fe4, 0x1f7a, 0xf103, 0xf20f,
01206   0xf210, 0x1f50, 0x1f54, 0x1f52, 0xf211, 0x1f51, 0x1f55, 0x1f53,
01207   0xf212, 0x1f7c, 0xf104, 0x1ff3, 0x1ff4, 0x1ff2, 0xf213, 0x1f60,
01208   0x1f64, 0x1f62, 0xf214, 0x1fa0, 0x1fa4, 0x1fa2, 0xf30a, 0x1f61,
01209   0x1f65, 0x1f63, 0xf215, 0x1fa1, 0x1fa5, 0x1fa3, 0xf30b, 0x03da,
01210   0x03dc, 0x03de, 0x03e0
01211 };
01212 
01213 // WP hebrew (charset 9)
01214 static unsigned hebrew_map[] = {
01215   0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
01216   0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
01217   0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
01218   0x05e8, 0x05e9, 0x05ea, 0xf862, 0x05c0, 0x05c3, 0x05f3, 0x05f4,
01219   0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7,
01220   0x05b8, 0x05b9, 0x05b9, 0x05bb, 0x05bc, 0x05bd, 0x05bf, 0x05b7,
01221   0xfb1e, 0x05f0, 0x05f1, 0x05f2, 0xf114, 0xf8b0, 0xf863, 0xf864,
01222   0xf865, 0xf866, 0xf867, 0xf868, 0xf869, 0xf86a, 0xf86b, 0xf86c,
01223   0xf86d, 0xf86e, 0xf86f, 0xf870, 0xf871, 0xf872, 0xf873, 0xf874,
01224   0x05f3, 0x05f3, 0x05f4, 0xf876, 0xf877, 0xf878, 0xf879, 0xf87a,
01225   0xf87b, 0xf87c, 0xf87d, 0xf87e, 0xf115, 0xf116, 0xf87f, 0xf117,
01226   0xf118, 0xf119, 0xf11a, 0xf11b, 0xf11c, 0xf11d, 0xf11e, 0xf11f,
01227   0xf120, 0xf121, 0xf122, 0xf123, 0xf124, 0xf125, 0xf126, 0xf127,
01228   0xf218, 0xf128, 0xf129, 0xf12a, 0xf12b, 0xf12c, 0xf12d, 0xf880,
01229   0xf12e, 0xf12f, 0xf130, 0xf219, 0x05e9, 0xf131, 0xf132, 0xf140,
01230   0xf141, 0xf142, 0x20aa
01231 };
01232 
01233 // WP cyrillic (charset 10)
01234 static unsigned cyrillic_map[] = {
01235   0x0410, 0x0430, 0x0411, 0x0431, 0x0412, 0x0432, 0x0413, 0x0433,
01236   0x0414, 0x0434, 0x0415, 0x0435, 0x0401, 0x0451, 0x0416, 0x0436,
01237   0x0417, 0x0437, 0x0418, 0x0438, 0x0419, 0x0439, 0x041a, 0x043a,
01238   0x041b, 0x043b, 0x041c, 0x043c, 0x041d, 0x043d, 0x041e, 0x043e,
01239   0x041f, 0x043f, 0x0420, 0x0440, 0x0421, 0x0441, 0x0422, 0x0442,
01240   0x0423, 0x0443, 0x0424, 0x0444, 0x0425, 0x0445, 0x0426, 0x0446,
01241   0x0427, 0x0447, 0x0428, 0x0448, 0x0429, 0x0449, 0x042a, 0x044a,
01242   0x042b, 0x044b, 0x042c, 0x044c, 0x042d, 0x044d, 0x042e, 0x044e,
01243   0x042f, 0x044f, 0x04d8, 0x04d9, 0x0403, 0x0453, 0x0490, 0x0491,
01244   0x0492, 0x0493, 0x0402, 0x0452, 0x0404, 0x0454, 0x0404, 0x0454,
01245   0x0496, 0x0497, 0x0405, 0x0455, 0xf159, 0xf889, 0xf15e, 0xf15f,
01246   0x0406, 0x0456, 0x0407, 0x0457, 0xf88c, 0xf88d, 0x0408, 0x0458,
01247   0x040c, 0x045c, 0x049a, 0x049b, 0xf160, 0xf161, 0x049c, 0x049d,
01248   0x0409, 0x0459, 0x04a2, 0x04a3, 0x040a, 0x045a, 0x047a, 0x047b,
01249   0x0460, 0x0461, 0x040b, 0x045b, 0x040e, 0x045e, 0x04ee, 0x04ef,
01250   0x04ae, 0x04af, 0x04b0, 0x04b1, 0x0194, 0x0263, 0x04b2, 0x04b3,
01251   0xf162, 0xf163, 0x04ba, 0x04bb, 0x047e, 0x047f, 0x040f, 0x045f,
01252   0x04b6, 0x04b7, 0x04b8, 0x04b9, 0xf164, 0xf165, 0x0462, 0x0463,
01253   0x0466, 0x0467, 0x046a, 0x046b, 0x046e, 0x046f, 0x0470, 0x0471,
01254   0x0472, 0x0473, 0x0474, 0x0475, 0xf400, 0xf401, 0xf402, 0xf403,
01255   0xf404, 0xf405, 0xf406, 0xf407, 0xf408, 0xf409, 0xf40a, 0xf40b,
01256   0xf40c, 0xf40d, 0xf40e, 0xf40f, 0xf410, 0xf411, 0xf412, 0xf413,
01257   0xf414, 0xf415, 0xf416, 0xf417, 0xf418, 0xf419, 0xf41a, 0xf41b,
01258   0xf41c, 0xf41d, 0xf41e, 0xf41f, 0xf420, 0xf421, 0xf422, 0xf423,
01259   0xf424, 0xf425, 0xf426, 0xf427, 0xf428, 0xf429, 0xf42a, 0xf42b,
01260   0x0301, 0x0300, 0x0308, 0x0306, 0x0326, 0x0328, 0x0304, 0xf893,
01261   0x201e, 0x201c, 0x10d0, 0x10d1, 0x10d2, 0x10d3, 0x10d4, 0x10d5,
01262   0x10d6, 0x10f1, 0x10d7, 0x10d8, 0x10d9, 0x10da, 0x10db, 0x10dc,
01263   0x10f2, 0x10dd, 0x10de, 0x10df, 0x10e0, 0x10e1, 0x10e2, 0x10e3,
01264   0x10f3, 0x10e4, 0x10e5, 0x10e6, 0x10e7, 0x10e8, 0x10e9, 0x10ea,
01265   0x10eb, 0x10ec, 0x10ed, 0x10ee, 0x10f4, 0x10ef, 0x10f0, 0x10f5,
01266   0x10f6, 0xf42c
01267 };
01268 
01269 // WP japanese (charset 11)
01270 static unsigned japanese_map[] = {
01271   0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68,
01272   0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff70,
01273   0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78,
01274   0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80,
01275   0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88,
01276   0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90,
01277   0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98,
01278   0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0xff9e, 0xff9f
01279 };
01280 
01281 unsigned
01282 Parser::ExtCharToUnicode (int charset, int charcode)
01283 {
01284   unsigned ucode = 0;
01285   int cmax;
01286 
01287   if (charcode < 0)
01288     return 0;
01289 
01290   switch (charset)
01291     {
01292     case 1:
01293       // multinatinal character
01294       cmax = sizeof (multinational_map) / sizeof (multinational_map[0]);
01295       if (charcode < cmax)
01296         ucode = multinational_map[charcode];
01297       break;
01298     case 2:
01299       // phonetic symbol
01300       cmax = sizeof (phonetic_map) / sizeof (phonetic_map[0]);
01301       if (charcode < cmax)
01302         ucode = phonetic_map[charcode];
01303       break;
01304     case 3:
01305       // TODO box drawing
01306       break;
01307     case 4:
01308       // typographic symbol
01309       cmax = sizeof (typographic_map) / sizeof (typographic_map[0]);
01310       if (charcode < cmax)
01311         ucode = typographic_map[charcode];
01312       break;
01313     case 5:
01314       // iconic symbol
01315       cmax = sizeof (iconic_map) / sizeof (iconic_map[0]);
01316       if (charcode < cmax)
01317         ucode = iconic_map[charcode];
01318       break;
01319     case 6:
01320       // math/scientific
01321       cmax = sizeof (math_map) / sizeof (math_map[0]);
01322       if (charcode < cmax)
01323         ucode = math_map[charcode];
01324       break;
01325     case 7:
01326       // math/scientific extended
01327       cmax = sizeof (mathext_map) / sizeof (mathext_map[0]);
01328       if (charcode < cmax)
01329         ucode = mathext_map[charcode];
01330       break;
01331     case 8:
01332       // greek
01333       cmax = sizeof (greek_map) / sizeof (greek_map[0]);
01334       if (charcode < cmax)
01335         ucode = greek_map[charcode];
01336       break;
01337     case 9:
01338       // hebrew
01339       cmax = sizeof (hebrew_map) / sizeof (hebrew_map[0]);
01340       if (charcode < cmax)
01341         ucode = hebrew_map[charcode];
01342       break;
01343     case 10:
01344       // cyrillic
01345       cmax = sizeof (cyrillic_map) / sizeof (cyrillic_map[0]);
01346       if (charcode < cmax)
01347         ucode = cyrillic_map[charcode];
01348       break;
01349     case 11:
01350       // japanese
01351       cmax = sizeof (japanese_map) / sizeof (japanese_map[0]);
01352       if (charcode < cmax)
01353         ucode = japanese_map[charcode];
01354       break;
01355     }
01356 
01357   // FIXME not sure about this but I think codepoint >= 0xf000 is really
01358   // specific only to WP
01359   if( ucode >= 0xf000 ) ucode = 0;
01360 
01361   return ucode;
01362 }
KDE Home | KDE Accessibility Home | Description of Access Keys