00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "khtmlreader.h"
00020
00021 #include "khtmlreader.moc"
00022 #include <kdebug.h>
00023 #include <dom/dom_text.h>
00024 #include <dom/dom2_views.h>
00025 #include <dom/dom_doc.h>
00026 #include <qcolor.h>
00027 #include <dom/dom_element.h>
00028 #include <dom/html_table.h>
00029 #include <khtmlview.h>
00030 #include <qwidget.h>
00031 #include <kapplication.h>
00032 #include <dom/html_misc.h>
00033 #include <qregexp.h>
00034
00035 KHTMLReader::KHTMLReader(KWDWriter *w){
00036 _html=new KHTMLPart();
00037 _writer=w;
00038 _it_worked=false;
00039 }
00040
00041
00042 void qt_enter_modal( QWidget *widget );
00043 void qt_leave_modal( QWidget *widget );
00044
00045
00046 bool KHTMLReader::filter(KURL url) {
00047 kdDebug(30503) << "KHTMLReader::filter" << endl;
00048 QObject::connect(_html,SIGNAL(completed()),this,SLOT(completed()));
00049
00050 _state.clear();
00051 _list_depth=0;
00052
00053 _html->view()->resize(600,530);
00054 _html->setAutoloadImages(false);
00055 _html->setJScriptEnabled(false);
00056 _html->setPluginsEnabled(false);
00057 _html->setJavaEnabled(false);
00058 _html->setMetaRefreshEnabled(false);
00059 if (_html->openURL(url) == false) {
00060 kdWarning(30503) << "openURL returned false" << endl;
00061 return false;
00062 }
00063
00064
00065 QWidget dummy(0,0,WType_Dialog | WShowModal);
00066 qt_enter_modal(&dummy);
00067 qApp->enter_loop();
00068 qt_leave_modal(&dummy);
00069 return _it_worked;
00070 }
00071
00072 HTMLReader_state *KHTMLReader::state() {
00073 if (_state.count() == 0) {
00074 HTMLReader_state *s=new HTMLReader_state;
00075 s->frameset=_writer->mainFrameset();
00076 s->paragraph = _writer->addParagraph(s->frameset);
00077 s->format=_writer->currentFormat(s->paragraph,true);
00078 s->layout=_writer->currentLayout(s->paragraph);
00079 s->in_pre_mode = false;
00080 _state.push(s);
00081 }
00082 return _state.top();
00083 }
00084
00085 HTMLReader_state *KHTMLReader::pushNewState() {
00086 HTMLReader_state *s=new HTMLReader_state;
00087 s->frameset=state()->frameset;
00088 s->paragraph=state()->paragraph;
00089 s->format=state()->format;
00090 s->layout=state()->layout;
00091 s->in_pre_mode=state()->in_pre_mode;
00092 _writer->cleanUpParagraph(s->paragraph);
00093 _state.push(s);
00094 return s;
00095 }
00096
00097
00098 void KHTMLReader::popState() {
00099 kdDebug(30503) << "Entering popState" << endl;
00100
00101 HTMLReader_state *s=_state.pop();
00102
00111 if (s->frameset == state()->frameset)
00112 {
00113 state()->paragraph=s->paragraph;
00114 if ((state()->layout != s->layout)) {
00115 if (_writer->getText(state()->paragraph).length()!=0) startNewLayout(false,state()->layout);
00116 }
00117 state()->format=_writer->startFormat(state()->paragraph, state()->format);
00118 }
00119 delete(s);
00120 }
00121
00122 void KHTMLReader::startNewLayout(bool startNewFormat) {
00123 QDomElement layout;
00124 startNewLayout(startNewFormat,layout);
00125 }
00126
00127 void KHTMLReader::startNewLayout(bool startNewFormat, QDomElement layout) {
00128 kdDebug() << "entering startNewLayout" << endl;
00129 startNewParagraph(startNewFormat,true);
00130 state()->layout=_writer->setLayout(state()->paragraph,layout);
00131 }
00132
00133
00134 void KHTMLReader::completed() {
00135 kdDebug(30503) << "KHTMLReader::completed" << endl;
00136 qApp->exit_loop();
00137 DOM::Document doc=_html->document();
00138 DOM::NodeList list=doc.getElementsByTagName("body");
00139 DOM::Node docbody=list.item(0);
00140
00141 if (docbody.isNull()) {
00142 kdWarning(30503) << "no <BODY>, giving up" << endl;
00143 _it_worked=false;
00144 return;
00145 }
00146
00147
00148 parseNode(docbody);
00149
00150 list = doc.getElementsByTagName("head");
00151 DOM::Node dochead=list.item(0);
00152 if (!dochead.isNull())
00153 parse_head(dochead);
00154 else
00155 kdWarning(30503) << "WARNING: no html <HEAD> section" << endl;
00156
00157 _writer->cleanUpParagraph(state()->paragraph);
00158 _it_worked=_writer->writeDoc();
00159 }
00160
00161
00162 void KHTMLReader::parseNode(DOM::Node node) {
00163 kdDebug(30503) << "Entering parseNode" << endl;
00164
00165 DOM::Text t=node;
00166 if (!t.isNull()) {
00167 _writer->addText(state()->paragraph,t.data().string(),1,state()->in_pre_mode);
00168 return;
00169 }
00170
00171
00172 state()->format=_writer->currentFormat(state()->paragraph,true);
00173 state()->layout=_writer->currentLayout(state()->paragraph);
00174 pushNewState();
00175
00176 DOM::Element e=node;
00177
00178 bool go_recursive=true;
00179
00180 if (!e.isNull()) {
00181
00182 parseStyle(e);
00183
00184 go_recursive=parseTag(e);
00185 }
00186 if (go_recursive) {
00187 for (DOM::Node q=node.firstChild(); !q.isNull(); q=q.nextSibling()) {
00188 parseNode(q);
00189 }
00190 }
00191 popState();
00192
00193
00194 }
00195
00196 void KHTMLReader::parse_head(DOM::Element e) {
00197 for (DOM::Element items=e.firstChild();!items.isNull();items=items.nextSibling()) {
00198 if (items.tagName().string().lower() == "title") {
00199 DOM::Text t=items.firstChild();
00200 if (!t.isNull()) {
00201 _writer->createDocInfo("HTML import filter",t.data().string());
00202 }
00203 }
00204 }
00205 }
00206
00207 #define _PP(x) { \
00208 if (e.tagName().lower() == #x) \
00209 return parse_##x(e); \
00210 }
00211
00212 #define _PF(x,a,b,c) { \
00213 if (e.tagName().lower() == #x) \
00214 { \
00215 _writer->formatAttribute(state()->paragraph, #a,#b,#c); \
00216 return true; \
00217 } \
00218 }
00219
00220
00221
00222
00223 #define _PL(x,a,b,c) { \
00224 if (e.tagName().lower() == #x) \
00225 { \
00226 state()->layout=_writer->setLayout(state()->paragraph,state()->layout);\
00227 if (!(_writer->getText(state()->paragraph).isEmpty())) \
00228 startNewParagraph(false,false); \
00229 _writer->layoutAttribute(state()->paragraph, #a,#b,#c); \
00230 return true; \
00231 } \
00232 }
00233
00234
00235 bool KHTMLReader::parseTag(DOM::Element e) {
00236 kdDebug(30503) << "Entering parseTag for " << e.tagName().lower() << endl;
00237 _PP(a);
00238 _PP(p);
00239 _PP(br);
00240 _PP(table);
00241 _PP(pre);
00242 _PP(ul);
00243 _PP(ol);
00244 _PP(font);
00245 _PP(hr);
00246
00247
00248
00249 _PF(b,WEIGHT,value,75);
00250 _PF(strong,WEIGHT,value,75);
00251 _PF(u,UNDERLINE,value,1);
00252 _PF(i,ITALIC,value,1);
00253
00254 _PL(center,FLOW,align,center);
00255 _PL(right,FLOW,align,right);
00256 _PL(left,FLOW,align,left);
00257
00258 _PL(h1,NAME,value,h1);
00259 _PL(h2,NAME,value,h2);
00260 _PL(h3,NAME,value,h3);
00261 _PL(h4,NAME,value,h4);
00262 _PL(h5,NAME,value,h5);
00263 _PL(h6,NAME,value,h6);
00264 kdDebug(30503) << "Leaving parseTag" << endl;
00265
00266
00267 if(e.nodeType() == DOM::Node::COMMENT_NODE || e.tagName().lower() == "script") {
00268 return false;
00269 }
00270
00271 return true;
00272 }
00273
00274
00275
00276
00277
00278 void KHTMLReader::startNewParagraph(bool startnewformat, bool startnewlayout) {
00279 kdDebug() << "Entering startNewParagraph" << endl;
00280
00281 QDomElement qf=state()->format;
00282 QDomElement ql=state()->layout;
00283
00284 _writer->cleanUpParagraph(state()->paragraph);
00285
00286 if ((startnewlayout==true) || ql.isNull())
00287 {state()->paragraph=_writer->addParagraph(state()->frameset);}
00288 else
00289 {state()->paragraph=
00290 _writer->addParagraph(state()->frameset,state()->layout);}
00291
00292
00293
00294 if (qf.isNull() || (startnewformat==true)) {
00295 state()->format=_writer->startFormat(state()->paragraph);
00296 } else {
00297 state()->format=_writer->startFormat(state()->paragraph,qf);
00298 }
00299
00305 QString ct=_writer->getLayoutAttribute(state()->paragraph,"COUNTER","type");
00306 if ((!ct.isNull()) && (ct != "0")) {
00307 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","0");
00308 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","0");
00309 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext","");
00310 int currdepth=(_writer->getLayoutAttribute(state()->paragraph,"COUNTER","depth")).toInt();
00311 _writer->layoutAttribute(state()->paragraph,"COUNTER","depth",QString("%1").arg(currdepth+1));
00312 }
00313 }
00314
00315 KHTMLReader::~KHTMLReader(){
00316 delete _html;
00317 }
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329 bool KHTMLReader::parse_CommonAttributes(DOM::Element e) {
00330 kdDebug(30503) << "entering KHTMLReader::parse_CommonAttributes" << endl;
00331 kdDebug(30503) << "tagName is " << e.tagName().string() << endl;
00332 QString s=e.getAttribute("align").string();
00333 if (!s.isEmpty())
00334 {
00335 _writer->formatAttribute(state()->paragraph,"FLOW","align",s);
00336 }
00337 QRegExp rx( "h[0-9]+" );
00338 if ( 0 == rx.search( e.getAttribute("class").string() ) )
00339
00340 {
00341 _writer->layoutAttribute(state()->paragraph,"NAME","value",e.getAttribute("class").string());
00342 }
00343 kdDebug(30503) << "leaving parse_CommonAttributes" << endl;
00344 return true;
00345 }
00346
00347 bool KHTMLReader::parse_a(DOM::Element e) {
00348 QString url = e.getAttribute("href").string();
00349 if (!url.isEmpty())
00350 {
00351 QString linkName;
00352 DOM::Text t = e.firstChild();
00353 if (t.isNull()) {
00354
00355 return false;
00356 }
00357 linkName = t.data().string().simplifyWhiteSpace();
00358 t.setData(DOM::DOMString("#"));
00359 _writer->createLink(state()->paragraph, linkName, url);
00360 }
00361 return true;
00362 }
00363
00364 bool KHTMLReader::parse_p(DOM::Element e)
00365 {
00366
00367
00368 kdDebug() << "entering parse_p" << endl;
00369 if (!(_writer->getText(state()->paragraph).isEmpty()))
00370 startNewParagraph(false,false);
00371 parse_CommonAttributes(e);
00372 kdDebug() << "leaving parse_p" << endl;
00373 return true;
00374 }
00375
00376 bool KHTMLReader::parse_hr(DOM::Element ) {
00377 startNewParagraph();
00378 _writer->createHR(state()->paragraph);
00379 startNewParagraph();
00380 return true;
00381 }
00382
00383 bool KHTMLReader::parse_br(DOM::Element ) {
00384 startNewParagraph(false,false);
00385 return false;
00386 }
00387
00388 static const QColor parsecolor(const QString& colorstring) {
00389 QColor color;
00390 if (colorstring[0]=='#') {
00391 color.setRgb(
00392 colorstring.mid(1,2).toInt(0,16),
00393 colorstring.mid(3,2).toInt(0,16),
00394 colorstring.mid(5,2).toInt(0,16)
00395 );
00396 } else {
00397 QString colorlower=colorstring.lower();
00398
00399 if (colorlower=="black")
00400 color.setRgb(0,0,0);
00401 else if (colorlower=="white")
00402 color.setRgb(255,255,255);
00403 else if (colorlower=="silver")
00404 color.setRgb(0xc0,0xc0,0xc0);
00405 else if (colorlower=="gray")
00406 color.setRgb(128,128,128);
00407
00408 else if (colorlower=="red")
00409 color.setRgb(255,0,0);
00410 else if (colorlower=="lime")
00411 color.setRgb(0,255,0);
00412 else if (colorlower=="blue")
00413 color.setRgb(0,0,255);
00414 else if (colorlower=="yellow")
00415 color.setRgb(255,255,0);
00416 else if (colorlower=="fuchsia")
00417 color.setRgb(255,0,255);
00418 else if (colorlower=="aqua")
00419 color.setRgb(0,255,255);
00420
00421 else if (colorlower=="maroon")
00422 color.setRgb(128,0,0);
00423 else if (colorlower=="green")
00424 color.setRgb(0,128,0);
00425 else if (colorlower=="navy")
00426 color.setRgb(0,0,128);
00427 else if (colorlower=="olive")
00428 color.setRgb(128,128,0);
00429 else if (colorlower=="purple")
00430 color.setRgb(128,0,128);
00431 else if (colorlower=="teal")
00432 color.setRgb(0,128,128);
00433 else {
00434
00435
00436 color.setNamedColor(colorstring);
00437 }
00438 }
00439 return colorstring;
00440 }
00441
00442 void KHTMLReader::parseStyle(DOM::Element e) {
00443
00444
00445
00446 kdDebug(30503) << "entering parseStyle" << endl;
00447 DOM::CSSStyleDeclaration s1=e.style();
00448 DOM::Document doc=_html->document();
00449 DOM::CSSStyleDeclaration s2=doc.defaultView().getComputedStyle(e,"");
00450
00451 kdDebug(30503) << "font-weight=" << s1.getPropertyValue("font-weight").string() << endl;
00452 if ( s1.getPropertyValue("font-weight").string() == "bolder" )
00453 {
00454 _writer->formatAttribute(state()->paragraph,"WEIGHT","value","75");
00455 }
00456 if ( s1.getPropertyValue("font-weight").string() == "bold" )
00457 {
00458 _writer->formatAttribute(state()->paragraph,"WEIGHT","value","75");
00459 }
00460
00461
00462 if ( s1.getPropertyValue("color").string() != QString() )
00463 {
00464 QColor c=parsecolor(s1.getPropertyValue("color").string());
00465 _writer->formatAttribute(state()->paragraph,"COLOR","red",QString::number(c.red()));
00466 _writer->formatAttribute(state()->paragraph,"COLOR","green",QString::number(c.green()));
00467 _writer->formatAttribute(state()->paragraph,"COLOR","blue",QString::number(c.blue()));
00468 }
00469
00470
00471 if ( s1.getPropertyValue("font-size").string() != QString() )
00472 {
00473 QString size=s1.getPropertyValue("font-size").string();
00474 if (size.endsWith("pt"))
00475 {
00476 size=size.left(size.length()-2);
00477 }
00478 _writer->formatAttribute(state()->paragraph,"SIZE","value",size);
00479 }
00480
00481
00482 if ( s1.getPropertyValue("text-align").string() != QString() )
00483 {
00484 state()->layout=_writer->setLayout(state()->paragraph,state()->layout);
00485 _writer->layoutAttribute(state()->paragraph, "FLOW","align",s1.getPropertyValue("text-align").string());
00486 }
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503 }
00504
00505 bool KHTMLReader::parse_table(DOM::Element e) {
00506 if(_writer->isInTable()) {
00507
00508
00509 for (DOM::Node rows=e.firstChild().firstChild();!rows.isNull();rows=rows.nextSibling())
00510 if (!rows.isNull() && rows.nodeName().string().lower() == "tr")
00511 for (DOM::Node cols=rows.firstChild();!cols.isNull();cols=cols.nextSibling())
00512 if (!cols.isNull())
00513 parseNode(cols);
00514 return false;
00515 }
00516
00517 DOM::Element table_body=e.firstChild();
00518 if(table_body.isNull()) {
00519
00520
00521
00522 return true;
00523 }
00524
00525 int tableno=_writer->createTable();
00526 int nrow=0;
00527 int ncol=0;
00528 bool has_borders=false;
00529 QColor bgcolor=parsecolor("#FFFFFF");
00530
00531 if (!table_body.getAttribute("bgcolor").string().isEmpty())
00532 bgcolor=parsecolor(table_body.getAttribute("bgcolor").string());
00533 if ((e.getAttribute("border").string().toInt() > 0))
00534 has_borders=true;
00535
00536
00537
00538 for (DOM::Node rowsnode=table_body.firstChild();!rowsnode.isNull();rowsnode=rowsnode.nextSibling()) {
00539 DOM::Element rows = rowsnode;
00540 if (!rows.isNull() && rows.tagName().string().lower() == "tr") {
00541 QColor obgcolor=bgcolor;
00542 if (!rows.getAttribute("bgcolor").string().isEmpty())
00543 bgcolor=parsecolor(rows.getAttribute("bgcolor").string());
00544
00545 ncol=0;
00546 for (DOM::Node colsnode=rows.firstChild();!colsnode.isNull();colsnode=colsnode.nextSibling()) {
00547 DOM::Element cols = colsnode;
00548 const QString nodename = cols.isNull() ? QString::null : cols.nodeName().string().lower();
00549 if (nodename == "td" || nodename == "th") {
00550 QColor bbgcolor=bgcolor;
00551 if (!cols.getAttribute("bgcolor").string().isEmpty())
00552 bgcolor=parsecolor(cols.getAttribute("bgcolor").string());
00553
00554 pushNewState();
00555 QRect colrect=cols.getRect();
00556 state()->frameset=_writer->createTableCell(tableno,nrow,ncol,1,colrect);
00557 state()->frameset.firstChild().toElement().setAttribute("bkRed",bgcolor.red());
00558 state()->frameset.firstChild().toElement().setAttribute("bkGreen",bgcolor.green());
00559 state()->frameset.firstChild().toElement().setAttribute("bkBlue",bgcolor.blue());
00560 if (has_borders) {
00561 state()->frameset.firstChild().toElement().setAttribute("lWidth",1);
00562 state()->frameset.firstChild().toElement().setAttribute("rWidth",1);
00563 state()->frameset.firstChild().toElement().setAttribute("bWidth",1);
00564 state()->frameset.firstChild().toElement().setAttribute("tWidth",1);
00565 }
00566
00567
00568 state()->paragraph=_writer->addParagraph(state()->frameset);
00569 parseNode(cols);
00570 _writer->cleanUpParagraph(state()->paragraph);
00571 popState();
00572 ncol++;
00573 bgcolor=bbgcolor;
00574 }
00575 }
00576 nrow++;
00577 bgcolor=obgcolor;
00578 }
00579 }
00580 _writer->finishTable(tableno);
00581 startNewParagraph(false,false);
00582 _writer->createInline(state()->paragraph,_writer->fetchTableCell(tableno,0,0));
00583 startNewParagraph(false,false);
00584 return false;
00585 }
00586
00587 bool KHTMLReader::parse_img(DOM::Element ) {
00588
00589 return true;
00590 }
00591
00592 bool KHTMLReader::parse_pre(DOM::Element e) {
00593 #if 0 // see Bug #74601 (normal): kword doesn't recognize PRE-tags in HTML
00594
00596 DOM::HTMLElement htmlelement(e);
00597 if(! htmlelement.isNull())
00598 _writer->addText(state()->paragraph,htmlelement.innerHTML().string(),1);
00599 startNewParagraph();
00600
00601 return false;
00602 #else
00603 pushNewState();
00604 state()->in_pre_mode=true;
00605 for (DOM::Node q=e.firstChild(); !q.isNull(); q=q.nextSibling()) {
00606 parseNode(q);
00607 }
00608 popState();
00609 return false;
00610 #endif
00611 }
00612
00613 bool KHTMLReader::parse_ol(DOM::Element e) {
00614 return parse_ul(e);
00615 }
00616
00617 bool KHTMLReader::parse_font(DOM::Element e) {
00618
00619 QString face=e.getAttribute("face").string();
00620 QColor color=parsecolor("#000000");
00621 if (!e.getAttribute("color").string().isEmpty())
00622 color=parsecolor(e.getAttribute("color").string());
00623 QString size=e.getAttribute("size").string();
00624 int isize=-1;
00625 if (size.startsWith("+"))
00626 isize=12+size.right(size.length()-1).toInt();
00627 else if (size.startsWith("-"))
00628 isize=12-size.right(size.length()-1).toInt();
00629 else
00630 isize=12+size.toInt();
00631
00632 _writer->formatAttribute(state()->paragraph,"FONT","name",face);
00633 if ((isize>=0) && (isize != 12))
00634 _writer->formatAttribute(state()->paragraph,"SIZE","value",QString("%1").arg(isize));
00635
00636 _writer->formatAttribute(state()->paragraph,"COLOR","red",QString("%1").arg(color.red()));
00637 _writer->formatAttribute(state()->paragraph,"COLOR","green",QString("%1").arg(color.green()));
00638 _writer->formatAttribute(state()->paragraph,"COLOR","blue",QString("%1").arg(color.blue()));
00639 return true;
00640 }
00641
00642 bool KHTMLReader::parse_ul(DOM::Element e) {
00643
00644 _list_depth++;
00645 for (DOM::Node items=e.firstChild();!items.isNull();items=items.nextSibling()) {
00646 if (items.nodeName().string().lower() == "li") {
00647 if (!(_writer->getText(state()->paragraph).isEmpty())) startNewLayout();
00648 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","1");
00649 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext",".");
00650 if (e.tagName().string().lower() == "ol")
00651 {
00652 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","1");
00653 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","1");
00654 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext",".");
00655 }
00656 else
00657 {
00658 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","10");
00659 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","");
00660 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext","");
00661 }
00662 _writer->layoutAttribute(state()->paragraph,"COUNTER","depth",QString("%1").arg(_list_depth-1));
00663 }
00664 parseNode(items);
00665 }
00666 startNewLayout();
00667 _list_depth--;
00668 return false;
00669 }
00670