filters

textzone.cc

00001 /* MEMO: to see the unicode table
00002  * xset +fp /usr/X11R6/lib/X11/fonts/ucs/
00003  * xfd -fn '-misc-fixed-medium-r-semicondensed--13-120-75-75-c-60-iso10646-1'
00004  */
00005 /*
00006 ** A program to convert the XML rendered by KWord into LATEX.
00007 **
00008 ** Copyright (C) 2000 - 2003 Robert JACOLIN
00009 **
00010 ** This library is free software; you can redistribute it and/or
00011 ** modify it under the terms of the GNU Library General Public
00012 ** License as published by the Free Software Foundation; either
00013 ** version 2 of the License, or (at your option) any later version.
00014 **
00015 ** This library is distributed in the hope that it will be useful,
00016 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 ** Library General Public License for more details.
00019 **
00020 ** To receive a copy of the GNU Library General Public License, write to the
00021 ** Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00022  * Boston, MA 02110-1301, USA.
00023 **
00024 */
00025 
00026 #include <kdebug.h>     /* for kdDebug() stream */
00027 #include <qregexp.h>        /* for QRegExp() --> escapeLatin1 */
00028 
00029 #include "textzone.h"
00030 #include "para.h"
00031 
00032 #define CSTART 0x00C0
00033 
00034 /*******************************************/
00035 /* TextZone                                */
00036 /*******************************************/
00037 TextZone::TextZone(Para *para)
00038 {
00039     setPara(para);
00040     if(para != NULL)
00041     {
00042         setSize(para->getSize());
00043         setWeight(para->getWeight());
00044         setItalic(para->isItalic());
00045         setUnderlined(para->getUnderlineType());
00046         setStrikeout(para->isStrikeout());
00047     }
00048 }
00049 
00050 /*******************************************/
00051 /* TextZone                                */
00052 /*******************************************/
00053 TextZone::TextZone(QString texte, Para *para): _texte(texte)
00054 {
00055     setPara(para);
00056     if(para != NULL)
00057     {
00058         setSize(para->getSize());
00059         setWeight(para->getWeight());
00060         setItalic(para->isItalic());
00061         setUnderlined(para->getUnderlineType());
00062         setStrikeout(para->isStrikeout());
00063     }
00064 }
00065 
00066 /*******************************************/
00067 /* ~TextZone                               */
00068 /*******************************************/
00069 TextZone::~TextZone()
00070 {
00071     kdDebug(30522) << "Destruction of a area" << endl;
00072 }
00073 
00074 /*******************************************/
00075 /* useFormat                               */
00076 /*******************************************/
00077 /* Use the format only if the user wants   */
00078 /* that and it's not a title.              */
00079 /*******************************************/
00080 bool TextZone::useFormat() const
00081 {
00082     return !getPara()->isChapter();
00083 }
00084 
00085 /*******************************************/
00086 /* escapeLatin1                            */
00087 /*******************************************/
00088 /* Convert special caracters (unicode) in  */
00089 /* latex usable caracters.                 */
00090 /*******************************************/
00091 QString TextZone::escapeLatin1(QString text)
00092 {
00093     static const char *escapes[64] =
00094     {
00095         "\\`{A}", "\\'{A}", "\\^{A}", "\\~{A}",
00096         "\\\"{A}", "\\AA", "\\AE", "\\c{C}",
00097         "\\`{E}", "\\'{E}", "\\^{E}", "\\\"{E}",
00098         "\\`{I}", "\\'{I}", "\\^{I}", "\\\"{I}",
00099 
00100         "\\DH{}", "\\~{N}", "\\`{O}", "\\'{O}",
00101         "\\^{O}", "\\~{O}", "\\\"{O}", "\\texttimes{}",
00102         "\\O{}", "\\`{U}", "\\'{U}", "\\^{U}",
00103         "\\\"{U}", "\\'{Y}", "\\TH{}", "\\ss{}",
00104 
00105         "\\`{a}", "\\'{a}", "\\^{a}", "\\~{a}",
00106         "\\\"{a}", "\\aa", "\\ae{}", "\\c{c}",
00107         "\\`{e}", "\\'{e}", "\\^{e}", "\\\"{e}",
00108         "\\`{\\i}", "\\'{\\i}", "\\^{\\i}", "\\\"{\\i}",
00109 
00110         "\\dh{}", "\\~{n}", "\\`{o}", "\\'{o}",
00111         "\\^{o}", "\\~{o}", "\\\"{o}", "\\textdiv{}",
00112         "\\o{}", "\\`{u}", "\\'{u}", "\\^{u}",
00113         "\\\"{u}", "\\'{y}", "\\th{}", "\\\"{y}"
00114     };
00115 
00116     QString escapedText;
00117     int unicode;         /* the character to be escaped */
00118 
00119     escapedText = text;  /* copy input text */
00120     
00121     /***************************************************************************
00122      * Escape the special punctuation and other symbols in the Latin1 supplement
00123     ****************************************************************************/
00124     /* We must begin by this char because else, all special char will
00125      * be backslahed !
00126      */
00127     convert(escapedText, 0X005C, "\\textbackslash{}");
00128 
00129     //convert(escapedText, 0X22, "\\textquotestraightdblbase");/* textcomp */
00130     convert(escapedText, 0X0023, "\\#{}");
00131     convert(escapedText, 0X0024, "\\${}");  /* add a \$ at the end of the paragraphes ! */
00132     convert(escapedText, 0X0025, "\\%{}");
00133     convert(escapedText, 0X0026, "\\&{}");
00134     //convert(escapedText, 0X0027, "\\textquotestraightbase");  /* textcomp */
00135 //  convert(escapedText, 0X002A, "\\textasteriskcentered"); /* textcomp */
00136 
00137     convert(escapedText, 0X003C, "\\textless{}");
00138     convert(escapedText, 0X003E, "\\textgreater{} ");
00139 
00140     convert(escapedText, 0X005E, "\\^{}");
00141     convert(escapedText, 0X005F, "\\_{}");      
00142     
00143     convert(escapedText, 0X007B, "\\{");
00144     convert(escapedText, 0X007C, "\\textbar{}");
00145     convert(escapedText, 0X007D, "\\}");
00146     convert(escapedText, 0X007E, "\\textasciitilde{}");
00147     
00148     convert(escapedText, 0X00A1, "!`{}");
00149     convert(escapedText, 0X00A2, "\\textcent{}");       /* textcomp */
00150     convert(escapedText, 0X00A3, "\\pounds{}");
00151     convert(escapedText, 0X00A4, "\\textcurrency{}");   /* textcomp */
00152     convert(escapedText, 0X00A5, "\\textyen{}");        /* textcomp */
00153     convert(escapedText, 0X00A6, "\\textbrokenbar{}");
00154     convert(escapedText, 0X00A7, "\\S{}");
00155     convert(escapedText, 0X00A8, "\\textasciidieresis{}");  /*? not good */
00156     convert(escapedText, 0X00A9, "\\copyright{}");
00157     convert(escapedText, 0X00AA, "\\textordfeminine{}");    /* textcomp */
00158     convert(escapedText, 0X00AB, "\\guillemotleft{}");  /* textcomp */
00159     convert(escapedText, 0X00AC, "\\textlnot{}");       /* textcomp */
00160 
00161     convert(escapedText, 0X00AE, "\\textregistered{}");
00162     convert(escapedText, 0X00AF, "\\textmacron{}");     /* textcomp */
00163     convert(escapedText, 0X00B0, "\\textdegree{}");     /* textcomp */
00164     convert(escapedText, 0X00B1, "\\textpm{}");     /* textcomp */
00165     convert(escapedText, 0X00B2, "\\texttwosuperior{}");    /* textcomp */
00166     convert(escapedText, 0X00B3, "\\textthreesuperior{}");  /* textcomp */
00167     convert(escapedText, 0X00B4, "' ");         /* textcomp */
00168     convert(escapedText, 0X00B5, "\\textmu{}");     /* textcomp */
00169     convert(escapedText, 0X00B6, "\\P{}");
00170     convert(escapedText, 0X00B7, "\\textperiodcentered{}"); /* not good textcomp */
00171 //  convert(escapedText, 0X00B8, "\\textthreesuperior{}");  /* textcomp */
00172     convert(escapedText, 0X00B9, "\\textonesuperior{}");    /* textcomp */
00173     convert(escapedText, 0X00BA, "\\textordmasculine{}");   /* textcomp */
00174     convert(escapedText, 0X00BB, "\\guillemotright{}"); /* textcomp */
00175     convert(escapedText, 0X00BC, "\\textonequarter{}"); /* textcomp */
00176     convert(escapedText, 0X00BD, "\\textonehalf{}");    /* textcomp */
00177     convert(escapedText, 0X00BE, "\\textthreequarters{}");  /* textcomp */
00178     convert(escapedText, 0X00BF, "?`{}");
00179     
00180     
00181     /* begin making escape sequences for the 64 consecutive letters starting at C0
00182      * LaTeX has a different escape code when a char is followed by a space so
00183      * two escape sequences are needed for each character.
00184      */
00185 
00186     for(int index = 0; index < 64; index++)
00187     {
00188         unicode = CSTART + index;
00189         convert(escapedText, unicode, escapes[index]);
00190     }
00191 
00192     convert(escapedText, 0X2020, "\\textdied{}");       /* textcomp */
00193     convert(escapedText, 0X2021, "\\textdaggerdbl{}");  /* textcomp */
00194     convert(escapedText, 0X2022, "'' ");            /* textcomp */
00195     convert(escapedText, 0X2023, "\\textdaggerdbl{}");  /* textcomp */
00196     convert(escapedText, 0X2024, "\\textdaggerdbl{}");  /* textcomp */
00197     convert(escapedText, 0X2025, "\\textdaggerdbl{}");  /* textcomp */
00198     convert(escapedText, 0X2026, "\\&{}");          /* textcomp */
00199     convert(escapedText, 0X2027, "\\textperiodcentered{}"); /* textcomp */
00200     convert(escapedText, 0X2030, "\\textperthousand{}");    /* textcomp */
00201     convert(escapedText, 0X2031, "\\textpertenthousand{}"); /* textcomp */
00202     convert(escapedText, 0X2032, "\\textasciiacute{}"); /* textcomp */
00203     convert(escapedText, 0X2033, "\\textgravedbl{}");   /* textcomp */
00204     convert(escapedText, 0X2034, "\\textdaggerdbl{}");  /* textcomp */
00205     convert(escapedText, 0X2035, "\\textasciigrave{}"); /* textcomp */
00206     convert(escapedText, 0X2036, "\\textacutedbl{}");   /* textcomp */
00207     convert(escapedText, 0X2037, "\\textdaggerdbl{}");  /* textcomp */
00208     convert(escapedText, 0X2038, "\\textdaggerdbl{}");  /* textcomp */
00209     convert(escapedText, 0X2039, "\\textdaggerdbl{}");  /* textcomp */
00210     convert(escapedText, 0X203A, "\\textdaggerdbl{}");  /* textcomp */
00211     convert(escapedText, 0X203B, "\\textreferencemark{}");  /* textcomp */
00212     convert(escapedText, 0X203D, "\\textinterrobang{}");    /* textcomp */
00213 
00214     convert(escapedText, 0X2045, "\\textlquill{}");     /* textcomp */
00215     convert(escapedText, 0X2046, "\\textrquill{}");     /* textcomp */
00216 
00217 
00218     convert(escapedText, 0X2080, "\\textzerooldstyle{}");   /* textcomp */
00219     convert(escapedText, 0X2081, "\\textoneoldstyle{}");    /* textcomp */
00220     convert(escapedText, 0X2082, "\\texttwooldstyle{}");    /* textcomp */
00221     convert(escapedText, 0X2083, "\\textthreeoldstyle{}");  /* textcomp */
00222     convert(escapedText, 0X2084, "\\textfouroldstyle{}");   /* textcomp */
00223     convert(escapedText, 0X2085, "\\textfiveoldstyle{}");   /* textcomp */
00224     convert(escapedText, 0X2086, "\\textsixoldstyle{}");    /* textcomp */
00225     convert(escapedText, 0X2087, "\\textsevenoldstyle{}");  /* textcomp */
00226     convert(escapedText, 0X2088, "\\texteightoldstyle{}");  /* textcomp */
00227     convert(escapedText, 0X2089, "\\textnineoldstyle{}");   /* textcomp */
00228     convert(escapedText, 0X208C, "\\textdblhyphen{}");  /* textcomp */
00229 
00230     convert(escapedText, 0X20A4, "\\textsterling{}");   /* textcomp */
00231     convert(escapedText, 0X20A6, "\\textnaria{}");      /* textcomp */
00232     convert(escapedText, 0X20AA, "\\textwon{}");        /* textcomp */
00233     convert(escapedText, 0X20AB, "\\textdong{}");       /* textcomp */
00234     convert(escapedText, 0X20AC, "\\texteuro{}");       /* textcomp */
00235 
00236     convert(escapedText, 0X2103, "\\textcelsius{}");    /* textcomp */
00237     convert(escapedText, 0X2116, "\\textnumero{}");     /* textcomp */
00238     convert(escapedText, 0X2117, "\\textcircledP{}");   /* textcomp */
00239     convert(escapedText, 0X2120, "\\textservicemark{}");    /* textcomp */
00240     convert(escapedText, 0X2122, "\\texttrademark{}");  /* textcomp */
00241     convert(escapedText, 0X2126, "\\textohm{}");        /* textcomp */
00242     convert(escapedText, 0X2127, "\\textmho{}");        /* textcomp */
00243     convert(escapedText, 0X212E, "\\textestimated{}");  /* textcomp */
00244 
00245     convert(escapedText, 0X2190, "\\textleftarrow{}");  /* textcomp */
00246     convert(escapedText, 0X2191, "\\textuparrow{}");    /* textcomp */
00247     convert(escapedText, 0X2192, "\\textrightarrow{}"); /* textcomp */
00248     convert(escapedText, 0X2193, "\\textdownarrow{}");  /* textcomp */
00249 //  convert(escapedText, 0X2194, "\\texteuro{}");       /* textcomp */
00250 //  convert(escapedText, 0X2195, "\\texteuro{}");       /* textcomp */
00251 //  convert(escapedText, 0X2196, "\\texteuro{}");       /* textcomp */
00252 
00253     return escapedText;
00254 }
00255 
00256 /*******************************************/
00257 /* convert                                 */
00258 /*******************************************/
00259 /* Convert all the instance of one         */
00260 /* character in latex usable caracter.     */
00261 /*******************************************/
00262 void TextZone::convert(QString& text, int unicode, const char* escape)
00263 {
00264     QString expression;
00265     QString value;
00266 
00267     expression = QString("\\x") + value.setNum(unicode, 16);
00268 
00269     if( !QString(escape).isEmpty() )
00270     {
00271         /*1. translate special characters with a space after. */
00272         text = text.replace( QRegExp( expression), QString(escape));
00273     }
00274 }
00275 
00276 /*******************************************/
00277 /* analyse                                 */
00278 /*******************************************/
00279 /* Analyse a text format, get the text used*/
00280 /* by this format.                         */
00281 /*******************************************/
00282 void TextZone::analyse(const QDomNode balise)
00283 {
00284     kdDebug(30522) << "FORMAT" << endl;
00285     /* Get header information (size, position)
00286      * Get infos. to format the text
00287      */
00288     //if(balise != 0)
00289         analyseFormat(balise);
00290     
00291     /* Format the text */
00292     setTexte(getTexte().mid(getPos(), getLength()));
00293     
00294     kdDebug(30522) << getTexte().length() << endl;
00295     kdDebug(30522) << getTexte().latin1() << endl;
00296     kdDebug(30522) << "END FORMAT" << endl;
00297 }
00298 
00299 /*******************************************/
00300 /* analyse                                 */
00301 /*******************************************/
00302 /* Analyse a text format, get the text used*/
00303 /* by this format.                         */
00304 /*******************************************/
00305 void TextZone::analyse()
00306 {
00307     kdDebug(30522) << "ZONE" << endl;
00308     
00309     /* Format the text */
00310     setTexte(getTexte().mid(getPos(), getLength()));
00311     
00312     kdDebug(30522) << "String of " << getTexte().length() << " caracters :" << endl;
00313     kdDebug(30522) << getTexte().latin1() << endl;
00314     kdDebug(30522) << "END ZONE" << endl;
00315 }
00316 
00317 /*******************************************/
00318 /* generate                                */
00319 /*******************************************/
00320 /* Generate the text formated (if needed). */
00321 /*******************************************/
00322 void TextZone::generate(QTextStream &out)
00323 {
00324 
00325     if(useFormat())
00326         generate_format_begin(out);
00327 
00328     /* Display the text */
00329     if(Config::instance()->getEncoding() == "latin1")
00330         display(_texte, out);
00331     else if(Config::instance()->mustUseUnicode())
00332         display(_texte, out);
00333     else
00334         display(escapeLatin1(_texte), out); 
00335 
00336     if(useFormat())
00337         generate_format_end(out);
00338 }
00339 
00340 /*******************************************/
00341 /* display                                 */
00342 /*******************************************/
00343 /* Trunc the text in about 80 caracters of */
00344 /* width except if there are not spaces.   */
00345 /*******************************************/
00346 void TextZone::display(QString texte, QTextStream& out)
00347 {
00348     QString line;
00349     int index = 0, end = 0;
00350     end = texte.find(' ', 60, false);
00351     if(end != -1)
00352         line = texte.mid(index, end - index);
00353     else
00354         line = texte;
00355     while(end < (signed int) texte.length() && end != -1)
00356     {
00357         /* There are something to display */
00358         if(Config::instance()->mustUseUnicode())
00359             out << line.utf8() << endl;
00360         else if(Config::instance()->mustUseLatin1())
00361             out << line << endl;
00362         Config::instance()->writeIndent(out);
00363         index = end;
00364         end = texte.find(' ', index + 60, false);
00365         line = texte.mid(index, end - index);
00366     }
00367     kdDebug(30522) << line << endl;
00368     if(Config::instance()->mustUseUnicode())
00369         out << line.utf8();
00370     else if(Config::instance()->getEncoding() == "ascii")
00371         out << line.ascii();
00372     else
00373         out << line;
00374 }
00375 
00376 /*******************************************/
00377 /* generate_format_begin                   */
00378 /*******************************************/
00379 /* Write the begining format markup.       */
00380 /*******************************************/
00381 void TextZone::generate_format_begin(QTextStream & out)
00382 {
00383     kdDebug(30522) << "GENERATE FORMAT BEGIN" << endl;
00384 
00385     /* Bold, Italic or underlined */
00386     if(getWeight() > 50)
00387         out << "\\textbf{";
00388     if(isItalic())
00389         out << "\\textit{";
00390     if(getUnderlineType() == UNDERLINE_SIMPLE)
00391         out << "\\uline{";
00392     else if(getUnderlineType() == UNDERLINE_DOUBLE)
00393         out << "\\uuline{";
00394     else if(getUnderlineType() == UNDERLINE_WAVE)
00395         out << "\\uwave{";
00396     if (isStrikeout())
00397         out << "\\sout{";
00398     
00399     /* Size */
00400     if(getSize() != Config::instance()->getDefaultFontSize() &&
00401             Config::instance()->isKwordStyleUsed())
00402     {
00403         out << "\\fontsize{" << getSize() << "}{1}%" << endl;
00404         Config::instance()->writeIndent(out);
00405         out << "\\selectfont" << endl;
00406         Config::instance()->writeIndent(out);
00407     }
00408 
00409     /* background color */
00410     if(isBkColored())
00411     {
00412         float red, green, blue;
00413 
00414         red   = ((float) getBkColorRed()) / 255;
00415         green = ((float) getBkColorGreen()) / 255;
00416         blue  = ((float) getBkColorBlue()) / 255;
00417 
00418         out << "\\colorbox[rgb]{";
00419         out << red << ", " << green << ", " << blue << "}{";
00420     }
00421     
00422     /* Color */
00423     if(isColor())
00424     {
00425         float red, green, blue;
00426 
00427         red   = ((float) getColorRed()) / 255;
00428         green = ((float) getColorGreen()) / 255;
00429         blue  = ((float) getColorBlue()) / 255;
00430 
00431         out << "\\textcolor[rgb]{";
00432         out << red << ", " << green << ", " << blue << "}{";
00433     }
00434 
00435     /* Alignement */
00436     switch(getAlign())
00437     {
00438         case EA_NONE:
00439             break;
00440         case EA_SUB: /* pass in math mode !! */
00441             out << "$_{";
00442             break;
00443         case EA_SUPER:
00444             out << "\\textsuperscript{";
00445             break;
00446     }
00447 }
00448 
00449 /*******************************************/
00450 /* generate_format_end                     */
00451 /*******************************************/
00452 /* Write the format end markup.            */
00453 /*******************************************/
00454 void TextZone::generate_format_end(QTextStream & out)
00455 {
00456     kdDebug(30522) << "GENERATE FORMAT END" << endl;
00457     
00458     /* Alignement */
00459     if(getAlign() == EA_SUPER)
00460         out << "}";
00461     if(getAlign() == EA_SUB)
00462         out << "}$";
00463 
00464     /* Color */
00465     if(isColor() || isBkColored())
00466         out << "}";
00467 
00468     /* Size */
00469     if(getSize() != Config::instance()->getDefaultFontSize() && Config::instance()->isKwordStyleUsed())
00470     {
00471         out << "\\fontsize{" << Config::instance()->getDefaultFontSize() << "}{1}%" << endl;
00472         Config::instance()->writeIndent(out);
00473         out << "\\selectfont" << endl;
00474         Config::instance()->writeIndent(out);
00475     }
00476 
00477     /* Bold, Italic or underlined */
00478     if(isUnderlined())
00479         out << "}";
00480     if(isItalic())
00481         out << "}";
00482     if(getWeight() > 50)
00483         out << "}";
00484     if(isStrikeout())
00485         out << "}";
00486 }
00487 
00488 QString convertSpecialChar(int c)
00489 {
00490     QString output;
00491 
00492     switch(c)
00493     {
00494         case 183: return output = "\\textminus";
00495             break;
00496         default: return output.setNum(c);
00497     }
00498 }
KDE Home | KDE Accessibility Home | Description of Access Keys