00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "khtmlreader.h"
00020
00021 #include "khtmlreader.moc"
00022 #include <kdebug.h>
00023 #include <dom/dom_text.h>
00024 #include <dom/dom2_views.h>
00025 #include <dom/dom_doc.h>
00026 #include <qcolor.h>
00027 #include <dom/dom_element.h>
00028 #include <dom/html_table.h>
00029 #include <khtmlview.h>
00030 #include <qwidget.h>
00031 #include <kapplication.h>
00032 #include <dom/html_misc.h>
00033 #include <qregexp.h>
00034
00035 KHTMLReader::KHTMLReader(KWDWriter *w){
00036 _html=new KHTMLPart();
00037 _writer=w;
00038 _it_worked=false;
00039 }
00040
00041
00042 void qt_enter_modal( QWidget *widget );
00043 void qt_leave_modal( QWidget *widget );
00044
00045
00046 bool KHTMLReader::filter(KURL url) {
00047 kdDebug(30503) << "KHTMLReader::filter" << endl;
00048 QObject::connect(_html,SIGNAL(completed()),this,SLOT(completed()));
00049
00050 _state.clear();
00051 _list_depth=0;
00052
00053 _html->view()->resize(600,530);
00054 _html->setAutoloadImages(false);
00055 _html->setJScriptEnabled(false);
00056 _html->setPluginsEnabled(false);
00057 _html->setJavaEnabled(false);
00058 _html->setMetaRefreshEnabled(false);
00059 if (_html->openURL(url) == false) {
00060 kdWarning(30503) << "openURL returned false" << endl;
00061 return false;
00062 }
00063
00064
00065 QWidget dummy(0,0,WType_Dialog | WShowModal);
00066 qt_enter_modal(&dummy);
00067 qApp->enter_loop();
00068 qt_leave_modal(&dummy);
00069 return _it_worked;
00070 }
00071
00072 HTMLReader_state *KHTMLReader::state() {
00073 if (_state.count() == 0) {
00074 HTMLReader_state *s=new HTMLReader_state;
00075 s->frameset=_writer->mainFrameset();
00076 s->paragraph = _writer->addParagraph(s->frameset);
00077 s->format=_writer->currentFormat(s->paragraph,true);
00078 s->layout=_writer->currentLayout(s->paragraph);
00079 s->in_pre_mode = false;
00080 _state.push(s);
00081 }
00082 return _state.top();
00083 }
00084
00085 HTMLReader_state *KHTMLReader::pushNewState() {
00086 HTMLReader_state *s=new HTMLReader_state;
00087 s->frameset=state()->frameset;
00088 s->paragraph=state()->paragraph;
00089 s->format=state()->format;
00090 s->layout=state()->layout;
00091 s->in_pre_mode=state()->in_pre_mode;
00092 _writer->cleanUpParagraph(s->paragraph);
00093 _state.push(s);
00094 return s;
00095 }
00096
00097
00098 void KHTMLReader::popState() {
00099
00100 HTMLReader_state *s=_state.pop();
00101
00110 if (s->frameset == state()->frameset)
00111 {
00112 state()->paragraph=s->paragraph;
00113 if ((state()->layout != s->layout)) {
00114 startNewLayout(false,state()->layout);
00115 }
00116 state()->format=_writer->startFormat(state()->paragraph, state()->format);
00117 }
00118 delete(s);
00119 }
00120
00121 void KHTMLReader::startNewLayout(bool startNewFormat) {
00122 QDomElement layout;
00123 startNewLayout(startNewFormat,layout);
00124 }
00125
00126 void KHTMLReader::startNewLayout(bool startNewFormat, QDomElement layout) {
00127 if (!(_writer->getText(state()->paragraph).isEmpty())) {
00128 startNewParagraph(startNewFormat,true);
00129 }
00130 state()->layout=_writer->setLayout(state()->paragraph,layout);
00131 }
00132
00133
00134 void KHTMLReader::completed() {
00135 kdDebug(30503) << "KHTMLReader::completed" << endl;
00136 qApp->exit_loop();
00137 DOM::Document doc=_html->document();
00138 DOM::NodeList list=doc.getElementsByTagName("body");
00139 DOM::Node docbody=list.item(0);
00140
00141 if (docbody.isNull()) {
00142 kdWarning(30503) << "no <BODY>, giving up" << endl;
00143 _it_worked=false;
00144 return;
00145 }
00146
00147
00148 parseNode(docbody);
00149
00150 list = doc.getElementsByTagName("head");
00151 DOM::Node dochead=list.item(0);
00152 if (!dochead.isNull())
00153 parse_head(dochead);
00154 else
00155 kdWarning(30503) << "WARNING: no html <HEAD> section" << endl;
00156
00157 _writer->cleanUpParagraph(state()->paragraph);
00158 _it_worked=_writer->writeDoc();
00159 }
00160
00161
00162 void KHTMLReader::parseNode(DOM::Node node) {
00163
00164
00165 DOM::Text t=node;
00166 if (!t.isNull()) {
00167 _writer->addText(state()->paragraph,t.data().string(),1,state()->in_pre_mode);
00168 return;
00169 }
00170
00171
00172 state()->format=_writer->currentFormat(state()->paragraph,true);
00173 state()->layout=_writer->currentLayout(state()->paragraph);
00174 pushNewState();
00175
00176 DOM::Element e=node;
00177
00178 bool go_recursive=true;
00179
00180 if (!e.isNull()) {
00181
00182 parseStyle(e);
00183
00184 go_recursive=parseTag(e);
00185 }
00186 if (go_recursive) {
00187 for (DOM::Node q=node.firstChild(); !q.isNull(); q=q.nextSibling()) {
00188 parseNode(q);
00189 }
00190 }
00191 popState();
00192
00193
00194 }
00195
00196 void KHTMLReader::parse_head(DOM::Element e) {
00197 for (DOM::Element items=e.firstChild();!items.isNull();items=items.nextSibling()) {
00198 if (items.tagName().string().lower() == "title") {
00199 DOM::Text t=items.firstChild();
00200 if (!t.isNull()) {
00201 _writer->createDocInfo("HTML import filter",t.data().string());
00202 }
00203 }
00204 }
00205 }
00206
00207 #define _PP(x) { \
00208 if (e.tagName().lower() == #x) \
00209 return parse_##x(e); \
00210 }
00211
00212 #define _PF(x,a,b,c) { \
00213 if (e.tagName().lower() == #x) \
00214 { \
00215 _writer->formatAttribute(state()->paragraph, #a,#b,#c); \
00216 return true; \
00217 } \
00218 }
00219
00220
00221
00222
00223 #define _PL(x,a,b,c) { \
00224 if (e.tagName().lower() == #x) \
00225 { \
00226 state()->layout=_writer->setLayout(state()->paragraph,state()->layout);\
00227 if (!(_writer->getText(state()->paragraph).isEmpty())) \
00228 startNewParagraph(false,false); \
00229 _writer->layoutAttribute(state()->paragraph, #a,#b,#c); \
00230 return true; \
00231 } \
00232 }
00233
00234
00235 bool KHTMLReader::parseTag(DOM::Element e) {
00236 _PP(a);
00237 _PP(p);
00238 _PP(br);
00239 _PP(table);
00240 _PP(pre);
00241 _PP(ul);
00242 _PP(ol);
00243 _PP(font);
00244 _PP(hr);
00245
00246
00247
00248 _PF(b,WEIGHT,value,75);
00249 _PF(strong,WEIGHT,value,75);
00250 _PF(u,UNDERLINE,value,1);
00251 _PF(i,ITALIC,value,1);
00252
00253 _PL(center,FLOW,align,center);
00254 _PL(right,FLOW,align,right);
00255 _PL(left,FLOW,align,left);
00256
00257 _PL(h1,NAME,value,h1);
00258 _PL(h2,NAME,value,h2);
00259 _PL(h3,NAME,value,h3);
00260 _PL(h4,NAME,value,h4);
00261 _PL(h5,NAME,value,h5);
00262 _PL(h6,NAME,value,h6);
00263
00264
00265 if(e.nodeType() == DOM::Node::COMMENT_NODE || e.tagName().lower() == "script") {
00266 return false;
00267 }
00268
00269 return true;
00270 }
00271
00272
00273
00274 void KHTMLReader::parseStyle(DOM::Element e) {
00275
00276
00277
00278 kdDebug(30503) << "entering parseStyle" << endl;
00279 DOM::CSSStyleDeclaration s1=e.style();
00280 DOM::Document doc=_html->document();
00281 DOM::CSSStyleDeclaration s2=doc.defaultView().getComputedStyle(e,"");
00282
00283 kdDebug(30503) << "font-weight=" << s1.getPropertyValue("font-weight").string() << endl;
00284 if ( s1.getPropertyValue("font-weight").string() == "bolder" )
00285 {
00286 _writer->formatAttribute(state()->paragraph,"WEIGHT","value","75");
00287 }
00288 if ( s1.getPropertyValue("font-weight").string() == "bold" )
00289 {
00290 _writer->formatAttribute(state()->paragraph,"WEIGHT","value","75");
00291 }
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308 }
00309
00310 void KHTMLReader::startNewParagraph(bool startnewformat, bool startnewlayout) {
00311
00312 QDomElement qf=state()->format;
00313 QDomElement ql=state()->layout;
00314
00315 _writer->cleanUpParagraph(state()->paragraph);
00316
00317 if ((startnewlayout==true) || ql.isNull())
00318 {state()->paragraph=_writer->addParagraph(state()->frameset);}
00319 else
00320 {state()->paragraph=
00321 _writer->addParagraph(state()->frameset,state()->layout);}
00322
00323
00324
00325 if (qf.isNull() || (startnewformat==true)) {
00326 state()->format=_writer->startFormat(state()->paragraph);
00327 } else {
00328 state()->format=_writer->startFormat(state()->paragraph,qf);
00329 }
00330
00336 QString ct=_writer->getLayoutAttribute(state()->paragraph,"COUNTER","type");
00337 if ((!ct.isNull()) && (ct != "0")) {
00338 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","0");
00339 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","0");
00340 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext","");
00341 int currdepth=(_writer->getLayoutAttribute(state()->paragraph,"COUNTER","depth")).toInt();
00342 _writer->layoutAttribute(state()->paragraph,"COUNTER","depth",QString("%1").arg(currdepth+1));
00343 }
00344 }
00345
00346 KHTMLReader::~KHTMLReader(){
00347 delete _html;
00348 }
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360 bool KHTMLReader::parse_CommonAttributes(DOM::Element e) {
00361 kdDebug(30503) << "entering KHTMLReader::parse_CommonAttributes" << endl;
00362 kdDebug(30503) << "tagName is " << e.tagName().string() << endl;
00363 QString s=e.getAttribute("align").string();
00364 if (!s.isEmpty())
00365 {
00366 _writer->formatAttribute(state()->paragraph,"FLOW","align",s);
00367 }
00368 QRegExp rx( "h[0-9]+" );
00369 if ( 0 == rx.search( e.getAttribute("class").string() ) )
00370
00371 {
00372 _writer->layoutAttribute(state()->paragraph,"NAME","value",e.getAttribute("class").string());
00373 }
00374 return true;
00375 }
00376
00377 bool KHTMLReader::parse_a(DOM::Element e) {
00378 QString url = e.getAttribute("href").string();
00379 if (!url.isEmpty())
00380 {
00381 QString linkName;
00382 DOM::Text t = e.firstChild();
00383 if (t.isNull()) {
00384
00385 return false;
00386 }
00387 linkName = t.data().string().simplifyWhiteSpace();
00388 t.setData(DOM::DOMString("#"));
00389 _writer->createLink(state()->paragraph, linkName, url);
00390 }
00391 return true;
00392 }
00393
00394 bool KHTMLReader::parse_p(DOM::Element e) {
00395
00396
00397 static bool firstparagraph=true;
00398 if (firstparagraph)
00399 {
00400 firstparagraph=false;
00401 }
00402 else {
00403 startNewParagraph();
00404 }
00405 parse_CommonAttributes(e);
00406 return true;
00407 }
00408
00409 bool KHTMLReader::parse_hr(DOM::Element ) {
00410 startNewParagraph();
00411 _writer->createHR(state()->paragraph);
00412 startNewParagraph();
00413 return true;
00414 }
00415
00416 bool KHTMLReader::parse_br(DOM::Element ) {
00417 startNewParagraph(false,false);
00418 return false;
00419 }
00420
00421 static const QColor parsecolor(const QString& colorstring) {
00422 QColor color;
00423 if (colorstring[0]=='#') {
00424 color.setRgb(
00425 colorstring.mid(1,2).toInt(0,16),
00426 colorstring.mid(3,2).toInt(0,16),
00427 colorstring.mid(5,2).toInt(0,16)
00428 );
00429 } else {
00430 QString colorlower=colorstring.lower();
00431
00432 if (colorlower=="black")
00433 color.setRgb(0,0,0);
00434 else if (colorlower=="white")
00435 color.setRgb(255,255,255);
00436 else if (colorlower=="silver")
00437 color.setRgb(0xc0,0xc0,0xc0);
00438 else if (colorlower=="gray")
00439 color.setRgb(128,128,128);
00440
00441 else if (colorlower=="red")
00442 color.setRgb(255,0,0);
00443 else if (colorlower=="lime")
00444 color.setRgb(0,255,0);
00445 else if (colorlower=="blue")
00446 color.setRgb(0,0,255);
00447 else if (colorlower=="yellow")
00448 color.setRgb(255,255,0);
00449 else if (colorlower=="fuchsia")
00450 color.setRgb(255,0,255);
00451 else if (colorlower=="aqua")
00452 color.setRgb(0,255,255);
00453
00454 else if (colorlower=="maroon")
00455 color.setRgb(128,0,0);
00456 else if (colorlower=="green")
00457 color.setRgb(0,128,0);
00458 else if (colorlower=="navy")
00459 color.setRgb(0,0,128);
00460 else if (colorlower=="olive")
00461 color.setRgb(128,128,0);
00462 else if (colorlower=="purple")
00463 color.setRgb(128,0,128);
00464 else if (colorlower=="teal")
00465 color.setRgb(0,128,128);
00466 else {
00467
00468
00469 color.setNamedColor(colorstring);
00470 }
00471 }
00472 return colorstring;
00473 }
00474
00475
00476 bool KHTMLReader::parse_table(DOM::Element e) {
00477 if(_writer->isInTable()) {
00478
00479
00480 for (DOM::Node rows=e.firstChild().firstChild();!rows.isNull();rows=rows.nextSibling())
00481 if (!rows.isNull() && rows.nodeName().string().lower() == "tr")
00482 for (DOM::Node cols=rows.firstChild();!cols.isNull();cols=cols.nextSibling())
00483 if (!cols.isNull())
00484 parseNode(cols);
00485 return false;
00486 }
00487
00488 DOM::Element table_body=e.firstChild();
00489 if(table_body.isNull()) {
00490
00491
00492
00493 return true;
00494 }
00495
00496 int tableno=_writer->createTable();
00497 int nrow=0;
00498 int ncol=0;
00499 bool has_borders=false;
00500 QColor bgcolor=parsecolor("#FFFFFF");
00501
00502 if (!table_body.getAttribute("bgcolor").string().isEmpty())
00503 bgcolor=parsecolor(table_body.getAttribute("bgcolor").string());
00504 if ((e.getAttribute("border").string().toInt() > 0))
00505 has_borders=true;
00506
00507
00508
00509 for (DOM::Node rowsnode=table_body.firstChild();!rowsnode.isNull();rowsnode=rowsnode.nextSibling()) {
00510 DOM::Element rows = rowsnode;
00511 if (!rows.isNull() && rows.tagName().string().lower() == "tr") {
00512 QColor obgcolor=bgcolor;
00513 if (!rows.getAttribute("bgcolor").string().isEmpty())
00514 bgcolor=parsecolor(rows.getAttribute("bgcolor").string());
00515
00516 ncol=0;
00517 for (DOM::Node colsnode=rows.firstChild();!colsnode.isNull();colsnode=colsnode.nextSibling()) {
00518 DOM::Element cols = colsnode;
00519 const QString nodename = cols.isNull() ? QString::null : cols.nodeName().string().lower();
00520 if (nodename == "td" || nodename == "th") {
00521 QColor bbgcolor=bgcolor;
00522 if (!cols.getAttribute("bgcolor").string().isEmpty())
00523 bgcolor=parsecolor(cols.getAttribute("bgcolor").string());
00524
00525 pushNewState();
00526 QRect colrect=cols.getRect();
00527 state()->frameset=_writer->createTableCell(tableno,nrow,ncol,1,colrect);
00528 state()->frameset.firstChild().toElement().setAttribute("bkRed",bgcolor.red());
00529 state()->frameset.firstChild().toElement().setAttribute("bkGreen",bgcolor.green());
00530 state()->frameset.firstChild().toElement().setAttribute("bkBlue",bgcolor.blue());
00531 if (has_borders) {
00532 state()->frameset.firstChild().toElement().setAttribute("lWidth",1);
00533 state()->frameset.firstChild().toElement().setAttribute("rWidth",1);
00534 state()->frameset.firstChild().toElement().setAttribute("bWidth",1);
00535 state()->frameset.firstChild().toElement().setAttribute("tWidth",1);
00536 }
00537
00538
00539 state()->paragraph=_writer->addParagraph(state()->frameset);
00540 parseNode(cols);
00541 _writer->cleanUpParagraph(state()->paragraph);
00542 popState();
00543 ncol++;
00544 bgcolor=bbgcolor;
00545 }
00546 }
00547 nrow++;
00548 bgcolor=obgcolor;
00549 }
00550 }
00551 _writer->finishTable(tableno);
00552 startNewParagraph(false,false);
00553 _writer->createInline(state()->paragraph,_writer->fetchTableCell(tableno,0,0));
00554 startNewParagraph(false,false);
00555 return false;
00556 }
00557
00558 bool KHTMLReader::parse_img(DOM::Element ) {
00559
00560 return true;
00561 }
00562
00563 bool KHTMLReader::parse_pre(DOM::Element e) {
00564 #if 0 // see Bug #74601 (normal): kword doesn't recognize PRE-tags in HTML
00565
00567 DOM::HTMLElement htmlelement(e);
00568 if(! htmlelement.isNull())
00569 _writer->addText(state()->paragraph,htmlelement.innerHTML().string(),1);
00570 startNewParagraph();
00571
00572 return false;
00573 #else
00574 pushNewState();
00575 state()->in_pre_mode=true;
00576 for (DOM::Node q=e.firstChild(); !q.isNull(); q=q.nextSibling()) {
00577 parseNode(q);
00578 }
00579 popState();
00580 return false;
00581 #endif
00582 }
00583
00584 bool KHTMLReader::parse_ol(DOM::Element e) {
00585 return parse_ul(e);
00586 }
00587
00588 bool KHTMLReader::parse_font(DOM::Element e) {
00589
00590 QString face=e.getAttribute("face").string();
00591 QColor color=parsecolor("#000000");
00592 if (!e.getAttribute("color").string().isEmpty())
00593 color=parsecolor(e.getAttribute("color").string());
00594 QString size=e.getAttribute("size").string();
00595 int isize=-1;
00596 if (size.startsWith("+"))
00597 isize=12+size.right(size.length()-1).toInt();
00598 else if (size.startsWith("-"))
00599 isize=12-size.right(size.length()-1).toInt();
00600 else
00601 isize=12+size.toInt();
00602
00603 _writer->formatAttribute(state()->paragraph,"FONT","name",face);
00604 if ((isize>=0) && (isize != 12))
00605 _writer->formatAttribute(state()->paragraph,"SIZE","value",QString("%1").arg(isize));
00606
00607 _writer->formatAttribute(state()->paragraph,"COLOR","red",QString("%1").arg(color.red()));
00608 _writer->formatAttribute(state()->paragraph,"COLOR","green",QString("%1").arg(color.green()));
00609 _writer->formatAttribute(state()->paragraph,"COLOR","blue",QString("%1").arg(color.blue()));
00610 return true;
00611 }
00612
00613 bool KHTMLReader::parse_ul(DOM::Element e) {
00614 _list_depth++;
00615 bool popstateneeded = false;
00616 for (DOM::Node items=e.firstChild();!items.isNull();items=items.nextSibling()) {
00617 if (items.nodeName().string().lower() == "li") {
00618 if (popstateneeded) {
00619 popState();
00620
00621 }
00622 pushNewState();
00623 startNewLayout();
00624 popstateneeded = true;
00625 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","1");
00626 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext",".");
00627 if (e.tagName().string().lower() == "ol")
00628 {
00629 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","1");
00630 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","1");
00631 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext",".");
00632 }
00633 else
00634 {
00635 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","10");
00636 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","");
00637 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext","");
00638 }
00639 _writer->layoutAttribute(state()->paragraph,"COUNTER","depth",QString("%1").arg(_list_depth-1));
00640 }
00641 parseNode(items);
00642 }
00643 if (popstateneeded)
00644 popState();
00645 _list_depth--;
00646 return false;
00647 }
00648