filters

asciiimport.cc

00001 /* This file is part of the KDE project
00002    Copyright (C) 1998, 1999 Reginald Stadlbauer <reggie@kde.org>
00003    Copyright (C) 2000 Michael Johnson <mikej@xnet.com>
00004    Copyright (C) 2001, 2002, 2004 Nicolas GOUTTE <goutte@kde.org>
00005 
00006    This library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Library General Public
00008    License as published by the Free Software Foundation; either
00009    version 2 of the License, or (at your option) any later version.
00010 
00011    This library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Library General Public License for more details.
00015 
00016    You should have received a copy of the GNU Library General Public License
00017    along with this library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  * Boston, MA 02110-1301, USA.
00020 */
00021 
00022 #include <config.h>
00023 
00024 #ifdef HAVE_UNISTD_H
00025 #include <unistd.h>
00026 #endif
00027 
00028 #include <qobject.h>
00029 #include <qstring.h>
00030 // #include <qregexp.h> // Currently not needed (due to disabled code)
00031 #include <qtextcodec.h>
00032 #include <qfile.h>
00033 #include <qtextstream.h>
00034 #include <qdom.h>
00035 #include <qfontinfo.h>
00036 
00037 #include <kdebug.h>
00038 #include <kglobal.h>
00039 #include <kgenericfactory.h>
00040 
00041 #include <KoGlobal.h>
00042 #include <KoPageLayout.h>
00043 #include <KoStore.h>
00044 #include <KoFilterChain.h>
00045 #include <KoFilterManager.h>
00046 
00047 #include <asciiimport.h>
00048 #include <asciiimport.moc>
00049 #include "ImportDialog.h"
00050 
00051 class ASCIIImportFactory : KGenericFactory<ASCIIImport, KoFilter>
00052 {
00053 public:
00054     ASCIIImportFactory(void) : KGenericFactory<ASCIIImport, KoFilter> ("kwordasciiimport")
00055     {}
00056 protected:
00057     virtual void setupTranslations( void )
00058     {
00059         KGlobal::locale()->insertCatalogue( "kofficefilters" );
00060     }
00061 };
00062 
00063 K_EXPORT_COMPONENT_FACTORY( libasciiimport, ASCIIImportFactory() )
00064 
00065 ASCIIImport::ASCIIImport(KoFilter *, const char *, const QStringList &) :
00066                      KoFilter() {
00067 }
00068 
00069 void ASCIIImport::prepareDocument(QDomDocument& mainDocument, QDomElement& mainFramesetElement)
00070 {
00071     // TODO: other paper formats
00072     KoFormat paperFormat=PG_DIN_A4; // ISO A4
00073     KoOrientation paperOrientation=PG_PORTRAIT;
00074 
00075     mainDocument.appendChild(
00076         mainDocument.createProcessingInstruction(
00077         "xml","version=\"1.0\" encoding=\"UTF-8\""));
00078 
00079     QDomElement elementDoc;
00080     elementDoc=mainDocument.createElement("DOC");
00081     elementDoc.setAttribute("editor","KWord's Plain Text Import Filter");
00082     elementDoc.setAttribute("mime","application/x-kword");
00083     // TODO: We claim to be syntax version 2, but we should verify that it is also true.
00084     elementDoc.setAttribute("syntaxVersion",2);
00085     mainDocument.appendChild(elementDoc);
00086 
00087     QDomElement element;
00088     element=mainDocument.createElement("ATTRIBUTES");
00089     element.setAttribute("processing",0);
00090     element.setAttribute("standardpage",1);
00091     element.setAttribute("hasHeader",0);
00092     element.setAttribute("hasFooter",0);
00093     //element.setAttribute("unit","mm"); // use KWord default instead
00094     elementDoc.appendChild(element);
00095 
00096     QDomElement elementPaper=mainDocument.createElement("PAPER");
00097     elementPaper.setAttribute("format",paperFormat);
00098     elementPaper.setAttribute("width" ,KoPageFormat::width (paperFormat,paperOrientation) * 72.0 / 25.4);
00099     elementPaper.setAttribute("height",KoPageFormat::height(paperFormat,paperOrientation) * 72.0 / 25.4);
00100     elementPaper.setAttribute("orientation",PG_PORTRAIT);
00101     elementPaper.setAttribute("columns",1);
00102     elementPaper.setAttribute("columnspacing",2);
00103     elementPaper.setAttribute("hType",0);
00104     elementPaper.setAttribute("fType",0);
00105     elementPaper.setAttribute("spHeadBody",9);
00106     elementPaper.setAttribute("spFootBody",9);
00107     elementPaper.setAttribute("zoom",100);
00108     elementDoc.appendChild(elementPaper);
00109 
00110     element=mainDocument.createElement("PAPERBORDERS");
00111     element.setAttribute("left",28);
00112     element.setAttribute("top",42);
00113     element.setAttribute("right",28);
00114     element.setAttribute("bottom",42);
00115     elementPaper.appendChild(element);
00116 
00117     QDomElement framesetsPluralElementOut=mainDocument.createElement("FRAMESETS");
00118     mainDocument.documentElement().appendChild(framesetsPluralElementOut);
00119 
00120     mainFramesetElement=mainDocument.createElement("FRAMESET");
00121     mainFramesetElement.setAttribute("frameType",1);
00122     mainFramesetElement.setAttribute("frameInfo",0);
00123     mainFramesetElement.setAttribute("autoCreateNewFrame",1);
00124     mainFramesetElement.setAttribute("removable",0);
00125     // TODO: "name" attribute (needs I18N)
00126     framesetsPluralElementOut.appendChild(mainFramesetElement);
00127 
00128     QDomElement frameElementOut=mainDocument.createElement("FRAME");
00129     frameElementOut.setAttribute("left",28);
00130     frameElementOut.setAttribute("top",42);
00131     frameElementOut.setAttribute("bottom",566);
00132     frameElementOut.setAttribute("right",798);
00133     frameElementOut.setAttribute("runaround",1);
00134     mainFramesetElement.appendChild(frameElementOut);
00135 
00136     QDomElement elementStylesPlural=mainDocument.createElement("STYLES");
00137     elementDoc.appendChild(elementStylesPlural);
00138 
00139     QDomElement elementStyleStandard=mainDocument.createElement("STYLE");
00140     elementStylesPlural.appendChild(elementStyleStandard);
00141 
00142     element=mainDocument.createElement("NAME");
00143     element.setAttribute("value","Standard");
00144     elementStyleStandard.appendChild(element);
00145 
00146     element=mainDocument.createElement("FOLLOWING");
00147     element.setAttribute("name","Standard");
00148     elementStyleStandard.appendChild(element);
00149 
00150     QDomElement elementFormat=mainDocument.createElement("FORMAT");
00151     elementStyleStandard.appendChild(elementFormat);
00152 
00153     // Use QFontInfo, as it does not give back -1 as point size.
00154     QFontInfo defaultFontInfo(KoGlobal::defaultFont());
00155 
00156     element=mainDocument.createElement("FONT");
00157     element.setAttribute("name",defaultFontInfo.family());
00158     elementFormat.appendChild(element);
00159 
00160     element=mainDocument.createElement("SIZE");
00161     element.setAttribute("value",defaultFontInfo.pointSize());
00162     elementFormat.appendChild(element);
00163 
00164     element=mainDocument.createElement("ITALIC");
00165     element.setAttribute("value",0);
00166     elementFormat.appendChild(element);
00167 
00168     element=mainDocument.createElement("WEIGHT");
00169     element.setAttribute("value",50);
00170     elementFormat.appendChild(element);
00171 
00172     element=mainDocument.createElement("UNDERLINE");
00173     element.setAttribute("value",0);
00174     elementFormat.appendChild(element);
00175 
00176     element=mainDocument.createElement("STRIKEOUT");
00177     element.setAttribute("value",0);
00178     elementFormat.appendChild(element);
00179 
00180     element=mainDocument.createElement("VERTALIGN");
00181     element.setAttribute("value",0);
00182     elementFormat.appendChild(element);
00183 
00184     element=mainDocument.createElement("COLOR");
00185     element.setAttribute("red",  -1);
00186     element.setAttribute("green",1);
00187     element.setAttribute("blue", -1);
00188     elementFormat.appendChild(element);
00189 
00190     element=mainDocument.createElement("TEXTBACKGROUNDCOLOR");
00191     element.setAttribute("red",  -1);
00192     element.setAttribute("green",-1);
00193     element.setAttribute("blue", -1);
00194     elementFormat.appendChild(element);
00195 
00196 }
00197 
00198 KoFilter::ConversionStatus ASCIIImport::convert( const QCString& from, const QCString& to )
00199 {
00200     if (to!="application/x-kword" || from!="text/plain")
00201         return KoFilter::NotImplemented;
00202 
00203     AsciiImportDialog* dialog = 0;
00204     if (!m_chain->manager()->getBatchMode())
00205     {
00206         dialog = new AsciiImportDialog();
00207     if (!dialog)
00208     {
00209       kdError(30502) << "Dialog has not been created! Aborting!" << endl;
00210       return KoFilter::StupidError;
00211     }
00212     if (!dialog->exec())
00213     {
00214       kdDebug(30502) << "Dialog was aborted! Aborting filter!" << endl; // this isn't an error!
00215       return KoFilter::UserCancelled;
00216     }
00217     }
00218 
00219     QTextCodec* codec;
00220     int paragraphStrategy;
00221     if (dialog)
00222     {
00223         codec = dialog->getCodec();
00224     paragraphStrategy=dialog->getParagraphStrategy();
00225     delete dialog;
00226     }
00227     else
00228     {
00229       codec = QTextCodec::codecForName("UTF-8");
00230       paragraphStrategy=0;
00231     }
00232 
00233     QFile in(m_chain->inputFile());
00234     if(!in.open(IO_ReadOnly)) {
00235         kdError(30502) << "Unable to open input file!" << endl;
00236         in.close();
00237         return KoFilter::FileNotFound;
00238     }
00239 
00240     QString tbl;  // string for table XML
00241 
00242     QDomDocument mainDocument;
00243     QDomElement mainFramesetElement;
00244 
00245     prepareDocument(mainDocument,mainFramesetElement);
00246 
00247     QTextStream stream(&in);
00248 
00249     if (!codec)
00250     {
00251         kdError(30502) << "Could not create QTextCodec! Aborting" << endl;
00252         in.close();
00253         return KoFilter::StupidError;
00254     }
00255 
00256     kdDebug(30502) << "Charset used: " << codec->name() << endl;
00257 
00258     stream.setCodec(codec);
00259 
00260     if (1==paragraphStrategy)
00261         sentenceConvert(stream, mainDocument, mainFramesetElement);
00262     else if (999==paragraphStrategy)
00263         oldWayConvert(stream, mainDocument, mainFramesetElement);
00264     else
00265         asIsConvert(stream, mainDocument, mainFramesetElement);
00266 
00267 
00268 #if 0
00269     kdDebug(30502) << mainDocument.toString() << endl;
00270 #endif
00271 
00272     KoStoreDevice* out=m_chain->storageFile( "root", KoStore::Write );
00273     if(!out) {
00274         kdError(30502) << "Unable to open output file!" << endl;
00275         in.close();
00276         return KoFilter::StorageCreationError;
00277     }
00278     QCString cstr=mainDocument.toCString();
00279     // WARNING: we cannot use KoStore::write(const QByteArray&) because it gives an extra NULL character at the end.
00280     out->writeBlock(cstr,cstr.length());
00281     in.close();
00282     return KoFilter::OK;
00283 }
00284 
00285 void ASCIIImport::oldWayConvert(QTextStream& stream, QDomDocument& mainDocument,
00286     QDomElement& mainFramesetElement)
00287 {
00288     kdDebug(30502) << "Entering: ASCIIImport::oldWayConvert" << endl;
00289     QStringList paragraph;  // lines of the paragraph
00290     int linecount = 0;  // line counter used to position tables
00291     //int table_no = 0;  // used for table identifiers
00292     int numLines; // Number of lines of the paragraph
00293 
00294     bool lastCharWasCr=false; // Was the previous character a Carriage Return?
00295     QString strLine;
00296     while(!stream.atEnd())
00297     {
00298         paragraph.clear();
00299         // Read in paragraph
00300         for(int line_no = numLines = 0; line_no < MAXLINES; line_no++, numLines++)
00301         {
00302             // We need to read a line
00303             // NOTE: we cannot use QStreamText::readLine,
00304             //   as it does not know anything about Carriage Returns
00305             strLine=readLine(stream,lastCharWasCr);
00306             if (strLine.isEmpty())
00307             {
00308                 paragraph.append(QString::null);
00309                 break;
00310             }
00311 
00312             const int length = strLine.length();
00313             if (strLine.at(length-1) == '-')
00314                 // replace the hard hyphen - at line end by a soft hyphen
00315                 strLine.at(length-1)=QChar(173);
00316             else
00317                 strLine += ' '; // add space to end of line
00318 
00319             paragraph.append(strLine);
00320         } // for(line_no = 0;
00321 
00322          //   process tables
00323 #if 0
00324         if ( Table( &Line[0], &linecount, numLines, table_no, tbl, str))
00325             table_no++;
00326         else
00327 #else
00328         if (true)
00329 #endif
00330         {
00331         // Process bullet and dash lists
00332 #if 0
00333         if(ListItem( &Line[0], numLines, str))
00334             linecount += (numLines + 1);
00335         else
00336            {
00337 #else
00338             if (true)
00339             {
00340 #endif
00341                 processParagraph(mainDocument,mainFramesetElement,paragraph);
00342                 linecount += ( numLines + 1);  // increment the line count
00343             }  // else
00344         }  // else
00345      }  // while(!eof)
00346 
00347 #if 0
00348     // Add table info
00349     if( table_no > 0) str += tbl;
00350 #endif
00351 }
00352 
00353 void ASCIIImport::asIsConvert(QTextStream& stream, QDomDocument& mainDocument,
00354     QDomElement& mainFramesetElement)
00355 // Paragraph strategy: one line, one paragraph
00356 {
00357     kdDebug(30502) << "Entering: ASCIIImport::asIsConvert" << endl;
00358     bool lastCharWasCr=false; // Was the previous character a Carriage Return?
00359     QString strLine;
00360     while(!stream.atEnd())
00361     {
00362         // Read one line and consider it being a paragraph
00363         strLine=readLine(stream,lastCharWasCr);
00364         writeOutParagraph(mainDocument,mainFramesetElement, "Standard", strLine,
00365             0, 0);
00366     }
00367 }
00368 
00369 void ASCIIImport::sentenceConvert(QTextStream& stream, QDomDocument& mainDocument,
00370     QDomElement& mainFramesetElement)
00371 // Strategy:
00372 // - end a paragraph when a line ends with a point or similar punctuation.
00373 // - search the punctuation at the end of the line, even if the sentence is quoted or in parentheses.
00374 // - an empty line also ends the paragraph
00375 // TODO/FIXME: we have a little problem with empty lines. Perhaps we should not allow empty paragraphs!
00376 {
00377     kdDebug(30502) << "Entering: ASCIIImport::sentenceConvert" << endl;
00378     QStringList paragraph;  // lines of the paragraph
00379     bool lastCharWasCr=false; // Was the previous character a Carriage Return?
00380     QString strLine;
00381     QString stoppingPunctuation(".!?");
00382     QString skippingQuotes("\"')");
00383     while (!stream.atEnd())
00384     {
00385         paragraph.clear();
00386         for(;;)
00387         {
00388             // We need to read a line
00389             // NOTE: we cannot use QStreamText::readLine,
00390             //   as it does not know anything about Carriage Returns
00391             strLine=readLine(stream,lastCharWasCr);
00392             if (strLine.isEmpty())
00393             {
00394                 break;
00395             }
00396 
00397             paragraph.append(strLine);
00398 
00399             uint lastPos=strLine.length()-1;
00400             QChar lastChar;
00401             // Skip a maximum of 10 quotes (or similar) at the end of the line
00402             for (int i=0;i<10;i++)
00403             {
00404                 lastChar=strLine[lastPos];
00405                 if (lastChar.isNull())
00406                     break;
00407                 else if (skippingQuotes.find(lastChar)==-1)
00408                     break;
00409                 else
00410                     lastPos--;
00411             }
00412 
00413             lastChar=strLine[lastPos];
00414             if (lastChar.isNull())
00415                 continue;
00416             else if (stoppingPunctuation.find(lastChar)!=-1)
00417                 break;
00418         }
00419 #if 1
00420         writeOutParagraph(mainDocument,mainFramesetElement, "Standard",
00421             paragraph.join(" ").simplifyWhiteSpace(), 0, 0);
00422 #else
00423         // FIXME/TODO: why is this not working?
00424         //processParagraph(mainDocument,mainFramesetElement,paragraph);
00425 #endif
00426      }  // while(!eof)
00427 }
00428 
00429 void ASCIIImport::processParagraph(QDomDocument& mainDocument,
00430     QDomElement& mainFramesetElement, const QStringList& paragraph)
00431 {
00432     // Paragraph with no tables or lists
00433     QString text;
00434     QStringList::ConstIterator it=paragraph.begin(); // Current line (at start, the first one)
00435     QStringList::ConstIterator previousLine=it; // The previous one (at start, also the first one)
00436     int firstindent=Indent(*it);
00437 
00438     // We work with one line in advance (therefore the two it++)
00439     for( it++; it!=paragraph.end(); it++)
00440     {
00441         text += *previousLine; // add previous line to paragraph
00442         // check for a short line - if short make it a paragraph
00443         if( (*previousLine).length() <= shortline)
00444         {
00445             if((*it).length() > shortline)
00446             // skip if short last line of normal paragraph
00447             {
00448                 const int secondindent = Indent(*previousLine);
00449                 writeOutParagraph(mainDocument,mainFramesetElement,
00450                     "Standard", text.simplifyWhiteSpace(), firstindent, secondindent);
00451 
00452                 firstindent = Indent(*it);
00453                 text = QString::null;  // reinitialize paragraph text
00454             }
00455         }
00456         previousLine=it;
00457     }
00458     // write out paragraph begin to end
00459     const int secondindent = Indent(*previousLine);
00460     writeOutParagraph(mainDocument,mainFramesetElement,
00461         "Standard", text.simplifyWhiteSpace(), firstindent, secondindent);
00462 }
00463 
00464 void ASCIIImport::writeOutParagraph(QDomDocument& mainDocument,
00465     QDomElement& mainFramesetElement, const QString& name,
00466     const QString& text, const int firstindent, const int secondindent)
00467 {
00468     QDomElement paragraphElementOut=mainDocument.createElement("PARAGRAPH");
00469     mainFramesetElement.appendChild(paragraphElementOut);
00470     QDomElement textElement=mainDocument.createElement("TEXT");
00471     paragraphElementOut.appendChild(textElement);
00472     //QDomElement formatsPluralElementOut=mainDocument.createElement("FORMATS");
00473     //paragraphElementOut.appendChild(formatsPluralElementOut);
00474     QDomElement layoutElement=mainDocument.createElement("LAYOUT");
00475     paragraphElementOut.appendChild(layoutElement);
00476 
00477     QDomElement element;
00478     element=mainDocument.createElement("NAME");
00479     element.setAttribute("value",name);
00480     layoutElement.appendChild(element);
00481 
00482     double size;
00483     element=mainDocument.createElement("INDENTS");
00484     size = firstindent-secondindent;
00485     size *= ptsperchar;  // convert indent spaces to points
00486     element.setAttribute("first",QString::number(size));
00487     size = secondindent;
00488     size *= ptsperchar;  // convert indent spaces to points
00489     element.setAttribute("left",QString::number(size));
00490     element.setAttribute("right",0);
00491 
00492     textElement.appendChild(mainDocument.createTextNode(text));
00493     textElement.normalize(); // Put text together (not sure if needed)
00494 
00495 #if 0
00496     // If the paragraph is indented, write out indentation elements.
00497     // TODO: why not always write identation?
00498     if (firstindent > 0 || secondindent > 0)         \
00499              WriteOutIndents( firstindent, secondindent, str);
00500 
00501     // If this is a bullet or a dash list, write out a COUNTER element
00502     if(type.isEmpty())
00503        {
00504        str += "<COUNTER type=\"";
00505        str += type;  // "6" = bullet "7" = dash
00506        str += "\" depth=\"0\" start=\"1\" numberingtype=\"0\" lefttext=\"\" righttext=\".\" />\n";
00507        }
00508 
00509     str += "<FORMAT>\n";
00510     // for now we try with style (TODO: verify that KWord 1.2 works correctly)
00511     //str += "<FONT name=\"times\"/>\n";
00512     str += "</FORMAT>\n";
00513     str += "</LAYOUT>\n";
00514     str += "</PARAGRAPH>\n";
00515 #endif
00516 }  // WriteOutParagraph
00517 
00518    /* The Indent method determines the equivalent number of spaces
00519       at the beginning of a line   */
00520 
00521 int ASCIIImport::Indent(const QString& line) const
00522 {
00523 
00524     QChar c;  // for reading string a character at a time
00525     int count=0;  // amount of white space at the begining of the line
00526 
00527     for( uint i = 0; i < line.length(); i++ )
00528     {
00529          c = line.at(i);
00530          if( c == QChar(' '))
00531             count++;
00532          else if( c == QChar('\t'))
00533             count += spacespertab;
00534          else
00535             break;
00536     }
00537 
00538    return count;
00539 
00540 }  // Indent
00541 
00542     /* The WriteOutTableCell method writes out a single table cell
00543         in Kword XML to the output string str. The table is sized according
00544        to the spacing in the ascii document.
00545        Arguments
00546          int table_no - used to keep the group number count
00547          int row - row number
00548          int col - column number
00549          Position *pos - pointer to the table position
00550          QString str - the output string.
00551 
00552        Returns - void
00553         */
00554 
00555 #if 0
00556    void ASCIIImport::WriteOutTableCell( int table_no, int row,
00557                          int col, Position *pos, QString &str)
00558    {
00559 
00560    str += "<FRAMESET frameType=\"1\" frameInfo=\"0\" grpMgr=\"grpmgr_";
00561 
00562    QString buf = QString::number(table_no); // convert int to string
00563    str += buf;
00564 
00565    str += "\" row=\"";
00566    buf = QString::number(row); // convert row to string
00567    str += buf;
00568 
00569    str += "\" col=\"";
00570    buf = QString::number(col); // convert col to string
00571    str += buf;
00572 
00573    str += "\" rows=\"1\" cols=\"1\" removable=\"0\" visible=\"1\" name=\"Cell ";
00574    buf = QString::number(row); // convert row to string
00575    str += buf;
00576    str += ",";
00577    buf = QString::number(col); // convert col to string
00578    str += buf;
00579 
00580    str += "\">\n";
00581 
00582    str += "<FRAME left=\"";
00583    buf = QString::number(pos->left); // convert left coordinate to string
00584    str += buf;
00585 
00586    str += "\" top=\"";
00587    buf = QString::number(pos->top); // convert top coordinate to string
00588    str += buf;
00589 
00590    str += "\" right=\"";
00591    buf = QString::number(pos->right); // convert right coordinate to string
00592    str += buf;
00593 
00594    str += "\" bottom=\"";
00595    buf = QString::number(pos->bottom); // convert bottom coordinate to string
00596    str += buf;
00597 
00598    str += "\" runaround=\"0\" autoCreateNewFrame=\"0\" newFrameBehavior=\"1\" />\n";
00599 
00600    return;
00601 
00602    }  // WriteOutTableCell
00603 
00604 
00605 
00606 
00607 bool ASCIIImport::Table( QString *Line, int *linecount, int no_lines,
00608                          int table_no, QString &tbl, QString &str )
00609 {
00610     return false; // this method is disabled
00611 
00612    /* This method examines a paragraph for embedded tables.
00613       If none are found, it returns. If tables are found then it
00614       writes out the paragraph including the tables. It does not
00615       check for embedded bullet or dash lists which is a seperate
00616       method.
00617       Arguments
00618          QString *Line - pointer to the array of text lines in the paragraph.
00619          int *linecount - pointer to the line counter used to position tables
00620          int no_lines - the number of lines in the paragraph
00621          int *table_no - pointer to the table number counter
00622          QString str  - the output string
00623       Returns - enum value indicating wheterer a table was processed. */
00624 
00625       enum LiType{paragraph, table} linetype[MAXLINES];
00626       struct Tabs tabs[MAXLINES];  // array of structures for tables
00627       struct Position pos;  // struct to pass cell position
00628       int width[MAXCOLUMNS]; // array of column widths
00629       QString *currentline;  // pointer to the current line
00630       QString *beginline;  // pointer to the beginning line
00631       QString text;  // text buffer
00632       QString text1;  // another text buffer
00633       QString No_indents;  // Line with leading white space removed
00634       int index, index1, index2, index3; // used to define column positions in a line
00635 
00636       int no_cols;  // column counter
00637       int firstindent;
00638       int secondindent;
00639       int i;  // counter
00640       int j;  // counter
00641       int k;  // counter
00642       QString *firstline;
00643       firstline = Line;
00644       int begin = 0;  // beginning line number
00645       int tabcount = 0;   // counts tab characters
00646 
00647       // check all lines for tabs
00648       for(i = 0; i < no_lines; i++)
00649          {
00650          no_cols = 0;  // inilialize the number of columns
00651          index = 0;  // initialize the line index
00652 
00653          // check the line for leading tabs and indentation
00654          if( (tabs[i].indent = Indent( *Line)) > 0)
00655              {
00656              // save and remove tabs at beginning of the line
00657              text = *Line;  // copy
00658              Line->remove(QRegExp("^[\\ \t]*"));
00659              }
00660 
00661          // find column positions and record text fields
00662          while((index2 = Line->find( QRegExp("\t"),index)) > index
00663          || (index3 = MultSpaces( *Line, index)) > index )
00664            {
00665            index1 = kMax(index2, index3);
00666            if( index2 > index3)
00667            index1 = Line->find( QRegExp("[^\t]"), index1);
00668            tabcount++;
00669            tabs[i].field[no_cols] = Line->mid(index, (index1 - index -1));
00670            tabs[i].width[no_cols] = index1 - index + spacespertab - 1;
00671 
00672            no_cols++;
00673            index = index1;
00674            }  // while( (index1 = ...
00675 
00676          // record last column
00677          if( no_cols > 0)
00678             {
00679             index1 = Line->length() - (index + 1);  // +1 for space at line end
00680             tabs[i].field[no_cols] = Line->mid(index,index1);
00681             tabs[i].width[no_cols] = index1;
00682             }
00683 
00684          if(tabs[i].indent > 0) *Line = text;  // restore original line
00685          Line++;  // increment the line pointer
00686          tabs[i].columns = no_cols;
00687          }  // for(i = 0; ...
00688          // All lines of the paragraph have been checked for tabs
00689 
00690      // check for a tab in at least one line
00691      if( tabcount == 0) return false;
00692      tabcount = 0;
00693 
00694      Line = firstline;  // reset the line pointer
00695 
00696      for(j = 0; j < MAXCOLUMNS; j++) width[j] = 0;
00697      for( i = 0; i <= no_lines; i++)
00698         {
00699         if(tabs[i].columns == 0 )  // indicates no tabs in line
00700            {
00701            text += *Line;  // add line of text to the paragraph
00702            linetype[i] = paragraph;
00703            }
00704 
00705         else  // this means a tabulated line
00706            {
00707            linetype[i] = table;
00708 
00709            // calculate the column widths
00710            for(j = 0; j <= tabs[i].columns; j++)
00711               width[j] = kMax(tabs[i].width[j], width[j] );
00712            }  // else
00713 
00714         if(i > 0)
00715            {
00716            if( linetype[i] != linetype[(i - 1)] || i == no_lines) // change of type
00717               {
00718               if( linetype[(begin)] == paragraph )  // write out a paragraph
00719                  {
00720                  text = "";  // initialize a null string for paragraph text
00721                  currentline = firstline;
00722                  currentline += begin;  //starting line
00723                  beginline = currentline;
00724                  firstindent = Indent(*currentline);
00725                  currentline += 1;  // second line
00726                  if( (i - begin) > 1)
00727                         secondindent = Indent( *currentline );
00728                  else secondindent = 0;
00729 
00730                  for( j= begin; j < i; j++)
00731                     {
00732                     text += *beginline;  // form text entry
00733                     beginline++;   // increment pointer
00734                     }
00735                  // process the white space to eliminate unwanted spaces
00736                  QString text1 = text.simplifyWhiteSpace();
00737                  WriteOutParagraph( "Standard", "", text1 , firstindent, secondindent, str);
00738                  *linecount += (i - begin);
00739 
00740                  }  // if( linetype[(i - 1)]
00741 
00742               else       // this implies a table
00743                  {
00744                  // Write out a blank line to align text and table
00745                  WriteOutParagraph( "Standard", "", "" , 0, 0, str);
00746                  for( j = begin; j < i; j++ )
00747                     {
00748                     int row = j - begin;
00749                     pos.top = (double)((*linecount) + 1) * ptsperline;
00750                     pos.bottom = pos.top + ptsperline;
00751                     // initialize position of first col
00752                     pos.left = leftmargin + (double)tabs[i].indent * ptsperchar;
00753 
00754                     // write out a cell for each column
00755                     for( k = 0; k <= tabs[j].columns; k++)
00756                        {
00757 
00758                        text = tabs[j].field[k];
00759                        // process the white space to eliminate unwanted spaces
00760                        text1 = text.simplifyWhiteSpace();
00761 
00762                        // calculate position of table cell
00763                        pos.right = pos.left + (double)width[k] * ptsperchar;
00764 
00765                        WriteOutTableCell( table_no, row, k, &pos, tbl);
00766                        WriteOutParagraph( "Standard", "", text1 , 0, 0, tbl);
00767                        tbl += "</FRAMESET>\n";
00768                        pos.left += ptsperchar * (double)width[k];
00769                        }  // for( k = 0; ...
00770                     (*linecount)  += 1;
00771                     }   // for( j = begin; ...
00772 
00773 
00774                  }  // else...
00775               begin = i;
00776               }  // if( linetype[i]...
00777 
00778            }  // if(i > 0)
00779         Line++;
00780         }  // for( i = 0; ...
00781 
00782    (*linecount) += 1;
00783 
00784    return true;
00785    } // end of Table()
00786 #endif
00787 
00788    // the following method finds the location of multiple spaces in a string
00789 int ASCIIImport::MultSpaces(const QString& text, const int index) const
00790 {
00791 
00792     QChar c;
00793     QChar lastchar = 'c'; // previous character - initialize non blank
00794     bool found = false;
00795     // kdDebug(30502) << "length = "  << text.length() << endl;
00796     for (uint i = index; i < text.length(); i++)
00797     {
00798         c = text.at(i);
00799     // kdDebug(30502) << "i = " << i << " found = " << found << " c = " << c << " lastchar = " << lastchar << endl;
00800         if ( (c != ' ') && found)
00801             return i;
00802         else if (c == ' ' && lastchar == ' ')
00803             found = true;
00804         lastchar = c;
00805     }
00806     return -1;
00807 } // MultSpaces
00808 
00809 #if 0
00810    bool ASCIIImport::ListItem( QString *Line, int no_lines,
00811              QString &str )
00812       {
00813 
00814    /* This method examines a paragraph for embedded lists.
00815       If none are found, it returns. If lists are found then it
00816       writes out the paragraph including the lists.
00817       Arguments
00818          QString *Line - pointer to the array of text lines in the paragraph.
00819          int *linecount - pointer to the line counter used to position tables
00820          int no_lines - the number of lines in the paragraph
00821          QString &str  - the output string
00822       Returns - bool value indicating whether a list item was processed. */
00823 
00824 
00825       QString *currentline;  // pointer to the current line
00826       enum Listtype{none, bullet, dash} listtype[MAXLINES];
00827       QString type;  // type of list - bullet or dash
00828       QString *firstline;
00829       int i;  // counter
00830       int firstindent;
00831       int secondindent;
00832       int begin = 0;  // beginning line number
00833       int listcount = 0;   // counts lines with list items
00834       firstline = Line;
00835 
00836       // check all lines for list item indicators - or *
00837       for(i = 0; i < no_lines; i++)
00838          {
00839          listtype[i] = none;  // initialize
00840 
00841          // check for a bullet list item
00842          if( IsListItem( *Line, QChar('*')))
00843             {
00844             listtype[i] = bullet;
00845             listcount++;
00846             }
00847          // check for a dash list item
00848          if( IsListItem( *Line, QChar('-')))
00849             {
00850             listtype[i] = dash;
00851             listcount++;
00852             }
00853 
00854          Line++;  // increment the line pointer
00855          }  // for(i = 0; ...
00856          // All lines of the paragraph have been checked for list items
00857 
00858          // check for at least one list
00859          if( listcount == 0) return false;
00860 
00861          QString text = "";  // initialize a null string for paragraph text
00862          Line = firstline;  // reset the line pointer
00863 
00864          for( i = 0; i < no_lines; i++)
00865             {
00866 
00867             if(i > 0)
00868                {
00869                if( listtype[i] != none || Line->isEmpty()) // new list item
00870                   {
00871                   if( listtype[begin] == bullet)  // write out a bullet list
00872                      {
00873                      type = "6";
00874                      // delete * at beginning of line
00875                      text.remove( QRegExp("^[ \t]*\\* ") );
00876                      }
00877                   else if( listtype[begin] == dash) // write out a dash list
00878                      {
00879                      type = "7";
00880                      // delete - at beginning of line
00881                      text.remove( QRegExp("^[ \t]*\\- ") );
00882                      }
00883                   else if( listtype[begin] == none) // write out a paragraph
00884                      type = "";
00885 
00886                      currentline = firstline;
00887                      currentline += begin;  //starting line
00888                      firstindent = Indent(*currentline);
00889                      if( (i - begin) > 1)
00890                         {
00891                         currentline += 1;  // second line
00892                         secondindent = Indent( *currentline );
00893                         }
00894                      else secondindent = 0;
00895 
00896                      // process the white space to eliminate unwanted spaces
00897                      QString text1 = text.simplifyWhiteSpace();
00898                      WriteOutParagraph( "Standard", type, text1 , firstindent, secondindent, str);
00899 
00900                      begin = i;
00901                      text = "";  // reset text
00902 
00903 
00904                   }  // if( listtype[i] != none
00905 
00906 
00907                }  // if(i > 0)
00908 
00909             text += *Line;  // add text to paragraph
00910             Line++;
00911 
00912             }  // for( i = 0; ...
00913 
00914          // write out the last paragraph
00915          if( listtype[begin] == bullet)  // write out a bullet list
00916             {
00917             type = "6";
00918             // delete * at beginning of line
00919             text.replace( QRegExp("^[ \t]*\\* "), "");
00920             }
00921          else if( listtype[begin] == dash) // write out a dash list
00922             {
00923             type = "7";
00924             // delete - at beginning of line
00925             text.replace( QRegExp("^[ \t]*\\- "), "");
00926             }
00927          else if( listtype[begin] == none) // write out a paragraph
00928             type = "";
00929 
00930          currentline = firstline;
00931          currentline += begin;  //starting line
00932          firstindent = Indent(*currentline);
00933          if( (i - begin) > 1)
00934             {
00935             currentline += 1;  // second line
00936             secondindent = Indent( *currentline );
00937             }
00938          else secondindent = 0;
00939 
00940          // process the white space to eliminate unwanted spaces
00941          QString text1 = text.simplifyWhiteSpace();
00942          WriteOutParagraph( "Standard", type, text1 , firstindent, secondindent, str);
00943 
00944 
00945 
00946    return true;
00947    } // end of ListItem()
00948 #endif
00949 
00950 /* The IsListItem method checks a paragraph's first line and determines if
00951    the text appears to be a list item (bullet or dash).
00952 
00953    Arguments
00954      QString FirstLine  the first line of text in a paragraph
00955      QChar mark         the mark - or * that indicates a list item
00956 
00957    Return
00958      true if the line appears as a list item, false if not. */
00959 
00960 bool ASCIIImport::IsListItem( QString FirstLine, QChar mark )
00961   {
00962 
00963      int i;  // counter
00964      QChar c; // to stream into one character at a time
00965 
00966 
00967 
00968      int k = FirstLine.find(mark);
00969 
00970      if( k < 0) return false;  // list item mark not on line
00971 
00972      // find first non white character on the line
00973      for( i=0; IsWhiteSpace( c = FirstLine.at((uint)i) ); i++);
00974 
00975      if(i != k ) return false;  // mark is in wrong position
00976 
00977      c = FirstLine.at((uint)++i);
00978 
00979      return IsWhiteSpace(c);  // character after the mark must be a white space
00980 
00981   }  // IsListItem
00982 
00983 
00984 bool ASCIIImport::IsWhiteSpace(const QChar& c) const
00985 {
00986      if ( c == QChar(' ') || c == QChar('\t') )
00987         return true;
00988      else
00989         return false;
00990 } // IsWhiteSpace
00991 
00992 QString ASCIIImport::readLine(QTextStream& textstream, bool& lastCharWasCr)
00993 {
00994     // We need to read a line, character by character
00995     // NOTE: we cannot use QStreamText::readLine,
00996     //   as it does not know anything about Carriage Returns
00997     QString strLine;
00998     QChar ch;
00999     while (!textstream.atEnd())
01000     {
01001         textstream >> ch; // Read one character
01002         if (ch=="\n")
01003         {
01004             if (lastCharWasCr)
01005             {
01006                 // We have a line feed following a Carriage Return
01007                 // As the Carriage Return has already ended the previous line,
01008                 // discard this Line Feed.
01009                 lastCharWasCr=false;
01010             }
01011             else
01012             {
01013                 // We have a normal Line Feed, therefore we end the line
01014                 break;
01015             }
01016         }
01017         else if (ch=="\r")
01018         {
01019             // We have a Carriage Return, therefore we end the line
01020             lastCharWasCr=true;
01021             break;
01022         }
01023         else if (ch==char(12)) // Form Feed
01024         {
01025             // Ignore the form feed
01026             continue;
01027         }
01028         else
01029         {
01030             strLine+=ch;
01031             lastCharWasCr=false;
01032         }
01033     } // while
01034     return strLine;
01035 }
KDE Home | KDE Accessibility Home | Description of Access Keys