filters

starwriterimport.cc

00001 /*
00002    This file is part of the KDE project
00003    Copyright (C) 2002 Marco Zanon <info@marcozanon.com>
00004                   and Ariya Hidayat <ariya@kde.org>
00005 
00006    This library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Library General Public
00008    License as published by the Free Software Foundation; either
00009    version 2 of the License, or (at your option) any later version.
00010 
00011    This library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Library General Public License for more details.
00015 
00016    You should have received a copy of the GNU Library General Public License
00017    along with this library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  * Boston, MA 02110-1301, USA.
00020 */
00021 
00022 #include <starwriterimport.h>
00023 
00024 #include <qdatastream.h>
00025 #include <qiodevice.h>
00026 #include <qstring.h>
00027 
00028 #include <KoFilterChain.h>
00029 #include <kgenericfactory.h>
00030 
00031 #include "pole.h"
00032 
00033 typedef KGenericFactory<StarWriterImport, KoFilter> StarWriterImportFactory;
00034 K_EXPORT_COMPONENT_FACTORY(libstarwriterimport, StarWriterImportFactory("kofficefilters"))
00035 
00036 // Get unsigned 24-bits integer at given offset
00037 static inline Q_UINT32 readU24(QByteArray array, Q_UINT32 p)
00038 {
00039    Q_UINT8* ptr = (Q_UINT8*) array.data();
00040    return (Q_UINT32) (ptr[p] + (ptr[p+1] << 8) + (ptr[p+2] << 16));
00041 }
00042 
00043 // Get unsigned 16-bits integer at given offset
00044 static inline Q_UINT16 readU16(QByteArray array, Q_UINT32 p)
00045 {
00046    Q_UINT8* ptr = (Q_UINT8*) array.data();
00047    return (Q_UINT16) (ptr[p] + (ptr[p+1] << 8));
00048 }
00049 
00050 StarWriterImport::StarWriterImport(KoFilter *, const char *, const QStringList&) : KoFilter()
00051 {
00052     hasHeader = false;
00053     hasFooter = false;
00054     tablesNumber = 1;
00055 }
00056 
00057 StarWriterImport::~StarWriterImport()
00058 {
00059 }
00060 
00061 KoFilter::ConversionStatus StarWriterImport::convert(const QCString& from, const QCString& to)
00062 {
00063     // Check for proper conversion
00064     // When 4.x is supported, use also: || (from != "application/x-starwriter")
00065     if ((to != "application/x-kword") || (from != "application/vnd.stardivision.writer"))
00066         return KoFilter::NotImplemented;
00067 
00068     // Read streams
00069     POLE::Storage storage;
00070     storage.open(m_chain->inputFile().latin1());
00071 
00072     POLE::Stream* stream;
00073 
00074     stream = storage.stream("StarWriterDocument");
00075     if (!stream) return KoFilter::WrongFormat;
00076     StarWriterDocument.resize(stream->size());
00077     stream->read((unsigned char*)StarWriterDocument.data(), StarWriterDocument.size());
00078     delete stream;
00079 
00080     stream = storage.stream("SwPageStyleSheets");
00081     if (!stream) return KoFilter::WrongFormat;
00082     SwPageStyleSheets.resize(stream->size());
00083     stream->read((unsigned char*)SwPageStyleSheets.data(), SwPageStyleSheets.size());
00084     delete stream;
00085 
00086     // Check document version
00087     if (!checkDocumentVersion()) return KoFilter::WrongFormat;
00088 
00089     // Algorithm for creating the main document
00090     if (!addBody()) return KoFilter::ParsingError;
00091     if (!addHeaders()) return KoFilter::ParsingError;
00092     if (!addFooters()) return KoFilter::ParsingError;
00093     if (!addStyles()) return KoFilter::ParsingError;
00094     if (!addPageProperties()) return KoFilter::ParsingError;
00095     maindoc = bodyStuff + tablesStuff + picturesStuff;   // + lots of other things :)
00096 
00097     if (!addKWordHeader()) return KoFilter::ParsingError;
00098 
00099     // Prepare storage device and return
00100     KoStoreDevice *out = m_chain->storageFile("maindoc.xml", KoStore::Write);
00101     if (out) {
00102         QCString cstring = maindoc.utf8();
00103         cstring.prepend("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
00104         out->writeBlock((const char*) cstring, cstring.length());
00105     }
00106 
00107     return KoFilter::OK;
00108 }
00109 
00110 bool StarWriterImport::checkDocumentVersion()
00111 {
00112     if (SwPageStyleSheets.size() < 0x0B) return false;
00113     if (StarWriterDocument.size() < 0x0B) return false;
00114 
00115     if (SwPageStyleSheets[0x00] != 'S') return false;
00116     if (SwPageStyleSheets[0x01] != 'W') return false;
00117     // When 4.x is supported use also: || (SwPageStyleSheets[0x02] != '4')
00118     if (SwPageStyleSheets[0x02] != '5') return false;
00119     if (SwPageStyleSheets[0x03] != 'H') return false;
00120     if (SwPageStyleSheets[0x04] != 'D') return false;
00121     if (SwPageStyleSheets[0x05] != 'R') return false;
00122 
00123     if (StarWriterDocument[0x00] != 'S') return false;
00124     if (StarWriterDocument[0x01] != 'W') return false;
00125     // When 4.x is supported use also: || (StarWriterDocument[0x02] != '4')
00126     if (StarWriterDocument[0x02] != '5') return false;
00127     if (StarWriterDocument[0x03] != 'H') return false;
00128     if (StarWriterDocument[0x04] != 'D') return false;
00129     if (StarWriterDocument[0x05] != 'R') return false;
00130 
00131     // Password-protection is not supported for the moment
00132     Q_UINT16 flags = readU16(StarWriterDocument, 0x0A);
00133     if (flags & 0x0008) return false;
00134 
00135     return true;
00136 }
00137 
00138 bool StarWriterImport::addKWordHeader()
00139 {
00140     // Proper prolog and epilog
00141     QString prolog;
00142     prolog = "<!DOCTYPE DOC>\n";
00143     prolog.append("<DOC mime=\"application/x-kword\" syntaxVersion=\"2\" editor=\"KWord\">\n");
00144     prolog.append("<PAPER width=\"595\" height=\"841\" format=\"1\" fType=\"0\" orientation=\"0\" hType=\"0\" columns=\"1\">\n");
00145     prolog.append(" <PAPERBORDERS left=\"36\" right=\"36\" top=\"36\" bottom=\"36\" />\n");
00146     prolog.append("</PAPER>\n");
00147     prolog.append("<ATTRIBUTES standardpage=\"1\" hasFooter=\"0\" hasHeader=\"0\" processing=\"0\" />\n");
00148     prolog.append("<FRAMESETS>\n");
00149 
00150     maindoc.prepend(prolog);
00151 
00152     maindoc.append("</FRAMESETS>\n");
00153     maindoc.append("</DOC>");
00154 
00155     return true;
00156 }
00157 
00158 bool StarWriterImport::addPageProperties()
00159 {
00160     return true;
00161 }
00162 
00163 bool StarWriterImport::addStyles()
00164 {
00165     return true;
00166 }
00167 
00168 // FIXME
00169 // 1. search for the right starting point
00170 // 2. determine the length
00171 // 3. parse everything with parseNodes()
00172 bool StarWriterImport::addHeaders()
00173 {
00174     return true;
00175 }
00176 
00177 bool StarWriterImport::addFooters()
00178 {
00179     return true;
00180 }
00181 
00182 bool StarWriterImport::addBody()
00183 {
00184     // Find the starting point, by:
00185     // 1. skipping the header
00186     Q_UINT32 len = StarWriterDocument[0x07];
00187     Q_UINT32 p = len;
00188 
00189     // 2. skipping 8 more bytes
00190     p += 0x08;
00191 
00192     // 3. skipping useless sections
00193     char c = StarWriterDocument[p];
00194     while (c != 'N') {
00195         len = readU24(StarWriterDocument, p+1);
00196         p += len;
00197         c = StarWriterDocument[p];
00198     };   // there is at least one empty paragraph!
00199 
00200     // Select nodes and pass them to parseNodes()
00201     len = readU24(StarWriterDocument, p+1);
00202     QByteArray data(len);
00203     for (Q_UINT32 k=0; k<len; k++)
00204       data[k] = StarWriterDocument[p+k];
00205     bool retval = parseNodes(data);
00206 
00207     // add proper tags
00208     bodyStuff.prepend(" <FRAME right=\"567\" left=\"28\" top=\"42\" bottom=\"799\" />\n");
00209     bodyStuff.prepend(" <FRAMESET removable=\"0\" frameType=\"1\" frameInfo=\"0\" name=\"Text Frameset 1\" autoCreateNewFrame=\"1\">\n");
00210     bodyStuff.append(" </FRAMESET>\n");
00211 
00212     return retval;
00213 }
00214 
00215 QString StarWriterImport::convertToKWordString(QByteArray s)
00216 {
00217     QString result;
00218 
00219     for (Q_UINT32 i = 0x00; i < s.size(); i++)
00220         if (s[i] == '&') result += "&amp;";
00221         else if (s[i] == '<') result += "&lt;";
00222         else if (s[i] == '>') result += "&gt;";
00223         else if (s[i] == '"') result += "&quot;";
00224         else if (s[i] == 0x27) result += "&apos;";
00225         else if (s[i] == 0x09) result += "\t";
00226         // FIXME: more to add here
00227         //        (manual breaks, soft-hyphens, non-breaking spaces, variables)
00228         else result += QChar(s[i]);
00229 
00230     return result;
00231 }
00232 
00233 bool StarWriterImport::parseNodes(QByteArray n)
00234 {
00235     QByteArray s;
00236     Q_UINT32 len, p;
00237 
00238     // Loop
00239     p = 0x09;   // is this a fixed value? is it the same for headers/footers?
00240 
00241     while (p < n.size()) {
00242         char c = n[p];
00243         len = readU24(n, p+1);
00244 
00245         s.resize(len);
00246         for (Q_UINT32 k = 0x00; k < len; k++)
00247             s[k] = n[p+k];
00248 
00249         switch (c) {
00250             case 'T':
00251                 //if ((s[0x0A] == 0x01) && (s[0x0B] == 0x00) && (s[0x0C] == 0xFF)) {
00252                 //    if (!parseGraphics(s)) return false;
00253                 //}
00254                 //else {
00255                     if (!parseText(s)) return false;
00256                 //}
00257                 break;
00258             case 'E':
00259                 if (!parseTable(s)) return false;
00260                 break;
00261             default:
00262                 break;
00263         };
00264         p += len;
00265     };
00266 
00267     return true;
00268 }
00269 
00270 bool StarWriterImport::parseText(QByteArray n)
00271 {
00272     QByteArray s;
00273     Q_UINT16 len;
00274     Q_UINT32 p;
00275     QString text;
00276     // Q_UINT16 attributeStart, attributeEnd, formatPos, formatLen;
00277     // QString pAttributes, cAttributes, tempCAttributes;
00278     // QStringList cAttributesList;
00279 
00280     // Retrieve the paragraph (text-only)
00281     len = readU16(n, 0x09);
00282     s.resize(len);
00283     for (Q_UINT16 k = 0x00; k < len; k++)
00284         s[k] = n[0x0B+k];
00285 
00286     /*
00287     // Retrieve paragraph and character attributes
00288     P = len;
00289     while (n[p] == 'S') {
00290         p += 0x04;
00291         // parse 'A' sub-sections and write to pAttributes
00292         // FIXME: all this part
00293         // get section length
00294         // if (length > ...) {
00295         //    if (special characters found) {
00296         //        get start, len, type
00297         //        write to the list
00298         //    }
00299         // }
00300         // increment p
00301     }
00302     while (n[p] == 'A') {
00303         // parse 'A' sections and fill cAttributesList
00304         // FIXME: similar as above
00305     }
00306     // Parse list
00307     while ((!cAttributesList.isEmpty()) || (formatPos < len)) {
00308         formatLen = 65535;
00309         // FIXME: point to first list item
00310         while (FIXME: current list item < last list item) {
00311             // FIXME: get item members
00312             if ((attributeStart <= formatPos) && (formatPos <= attributeEnd)) {   // this attribute has to be considered
00313                 // FIXME: write attribute to tempCAttributes
00314                 formatLen = min(formatLen, (attributeStart - attributeEnd));
00315             }
00316             else if (attributeLen < formatPos) [   // this attribute has to be removed
00317                 // FIXME: remove list item
00318             }
00319             // FIXME: point to next list item
00320         }
00321         // FIXME: copy tempCAttributes to cAttributes
00322         formatPos += formatLen;
00323     }
00324     */
00325 
00326     // Write everything to the variable
00327     text = convertToKWordString(s);
00328     bodyStuff.append("  <PARAGRAPH>\n");
00329     bodyStuff.append("   <TEXT xml:space=\"preserve\">" + text + "</TEXT>\n");
00330     // FIXME: add FORMATS for pAttributes and cAttributes
00331     bodyStuff.append("  </PARAGRAPH>\n");
00332 
00333     return true;
00334 }
00335 
00336 bool StarWriterImport::parseTable(QByteArray n)
00337 {
00338 /*
00339     QByteArray s;
00340     Q_UINT32 len, len2;
00341     Q_UINT16 len3;
00342     Q_UINT32 p, p2;
00343     QString text;
00344     QString tableCell, tableText, tableName;
00345     Q_UINT8 row, column;
00346 
00347     // Set table name
00348     tableName = QString("Table %1").arg(tablesNumber);
00349     tablesNumber++;
00350 
00351     // Skip useless sections and retrieve the right point
00352     p = 0x13;
00353     while (n[p] != 'L') {
00354         len = readU24(n, p+1);
00355         p += len;
00356     }
00357 
00358     row = 0;
00359 
00360     // Read rows
00361     while (n[p] == 'L') {
00362         column = 0;
00363 
00364         // Find the first 't'
00365         while (n[p] != 't') p++;
00366 
00367         // Read cells
00368         while (n[p] == 't') {
00369             // Get cell length
00370             len2 = readU24(n, p+1);
00371             p2 = p + len2;
00372 
00373             // Find the 'T' section
00374             while (n[p] != 'T') p++;
00375 
00376             // Get cell text/value
00377             len3 = readU16(n, p+0x09);
00378             s.resize(len3);
00379             for (Q_UINT16 k = 0x00; k < len3; k++)
00380                 s[k] = n[p+0x0B+k];
00381             text = convertToKWordString(s);
00382 
00383             // FIXME: check this stuff
00384             QString frameName = QString("%1 Cell %2,%3").arg(tableName).arg(row).arg(column);
00385             tableText.append(QString(" <FRAMESET name=\"%1\" frameType=\"1\" frameInfo=\"0\" removable=\"0\" visible=\"1\" grpMgr=\"%2\" row=\"%3\" col=\"%4\" rows=\"1\" cols=\"1\" protectSize=\"0\">\n").arg(frameName).arg(tableName).arg(row).arg(column));
00386             tableText.append(" <FRAME runaround=\"1\" copy=\"0\" newFrameBehavior=\"1\" runaroundSide=\"biggest\" autoCreateNewFrame=\"0\" bleftpt=\"2.8\" brightpt=\"2.8\" btoppt=\"2.8\" bbottompt=\"2.8\" runaroundGap=\"2.8\" />\n");
00387             tableText.append("  <PARAGRAPH>\n");
00388             tableText.append("   <TEXT xml:space=\"preserve\">" + text + "</TEXT>\n");
00389             tableText.append("  </PARAGRAPH>\n");
00390             tableText.append(" </FRAMESET>\n");
00391 
00392             // Skip other sections or bytes
00393             p = p2;
00394 
00395             // Increase column pointers
00396             column++;
00397         }
00398 
00399         // Increase row pointer
00400         row++;
00401     }
00402 
00403     // Add everything to tablesStuff
00404     tablesStuff.append(tableText);
00405 
00406     // Add anchor to bodyStuff
00407     bodyStuff.append("  <PARAGRAPH>\n");
00408     bodyStuff.append("   <TEXT xml:space=\"preserve\">#</TEXT>\n");
00409     bodyStuff.append("   <FORMATS>\n");
00410     bodyStuff.append("    <FORMAT id=\"6\" pos=\"0\" len=\"1\">\n");
00411     bodyStuff.append(QString("    <ANCHOR type=\"frameset\" instance=\"%1\" />\n").arg(tableName));
00412     bodyStuff.append("    </FORMAT>\n");
00413     bodyStuff.append("   </FORMATS>\n");
00414     bodyStuff.append("  </PARAGRAPH>\n");
00415 
00416 */
00417     return true;
00418 }
00419 
00420 bool StarWriterImport::parseGraphics(QByteArray n)
00421 {
00422     return true;
00423 }
00424 
00425 #include <starwriterimport.moc>
KDE Home | KDE Accessibility Home | Description of Access Keys