filters

Catalog.cc

00001 //========================================================================
00002 //
00003 // Catalog.cc
00004 //
00005 // Copyright 1996-2002 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #include <aconf.h>
00010 
00011 #ifdef USE_GCC_PRAGMAS
00012 #pragma implementation
00013 #endif
00014 
00015 #include <limits.h>
00016 #include <stddef.h>
00017 #include "gmem.h"
00018 #include "Object.h"
00019 #include "XRef.h"
00020 #include "Array.h"
00021 #include "Dict.h"
00022 #include "Page.h"
00023 #include "Error.h"
00024 #include "Link.h"
00025 #include "Catalog.h"
00026 
00027 // This define is used to limit the depth of recursive readPageTree calls
00028 // This is needed because the page tree nodes can reference their parents
00029 // leaving us in an infinite loop
00030 // Most sane pdf documents don't have a call depth higher than 10
00031 #define MAX_CALL_DEPTH 1000
00032 
00033 //------------------------------------------------------------------------
00034 // Catalog
00035 //------------------------------------------------------------------------
00036 
00037 Catalog::Catalog(XRef *xrefA) {
00038   Object catDict, pagesDict;
00039   Object obj, obj2;
00040   int numPages0;
00041   int i;
00042 
00043   ok = gTrue;
00044   xref = xrefA;
00045   pages = NULL;
00046   pageRefs = NULL;
00047   numPages = pagesSize = 0;
00048   baseURI = NULL;
00049 
00050   xref->getCatalog(&catDict);
00051   if (!catDict.isDict()) {
00052     error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
00053     goto err1;
00054   }
00055 
00056   // read page tree
00057   catDict.dictLookup("Pages", &pagesDict);
00058   // This should really be isDict("Pages"), but I've seen at least one
00059   // PDF file where the /Type entry is missing.
00060   if (!pagesDict.isDict()) {
00061     error(-1, "Top-level pages object is wrong type (%s)",
00062       pagesDict.getTypeName());
00063     goto err2;
00064   }
00065   pagesDict.dictLookup("Count", &obj);
00066   if (!obj.isInt()) {
00067     error(-1, "Page count in top-level pages object is wrong type (%s)",
00068       obj.getTypeName());
00069     goto err3;
00070   }
00071   pagesSize = numPages0 = obj.getInt();
00072   obj.free();
00073   if ((unsigned) pagesSize >= INT_MAX / sizeof(Page *) ||
00074       (unsigned) pagesSize >= INT_MAX / sizeof(Ref)) {
00075     error(-1, "Invalid 'pagesSize'");
00076     ok = gFalse;
00077     return;
00078   }
00079   pages = (Page **)gmalloc(pagesSize * sizeof(Page *));
00080   pageRefs = (Ref *)gmalloc(pagesSize * sizeof(Ref));
00081   for (i = 0; i < pagesSize; ++i) {
00082     pages[i] = NULL;
00083     pageRefs[i].num = -1;
00084     pageRefs[i].gen = -1;
00085   }
00086   numPages = readPageTree(pagesDict.getDict(), NULL, 0, 0);
00087   if (numPages != numPages0) {
00088     error(-1, "Page count in top-level pages object is incorrect");
00089   }
00090   pagesDict.free();
00091 
00092   // read named destination dictionary
00093   catDict.dictLookup("Dests", &dests);
00094 
00095   // read root of named destination tree
00096   if (catDict.dictLookup("Names", &obj)->isDict())
00097     obj.dictLookup("Dests", &nameTree);
00098   else
00099     nameTree.initNull();
00100   obj.free();
00101 
00102   // read base URI
00103   if (catDict.dictLookup("URI", &obj)->isDict()) {
00104     if (obj.dictLookup("Base", &obj2)->isString()) {
00105       baseURI = obj2.getString()->copy();
00106     }
00107     obj2.free();
00108   }
00109   obj.free();
00110 
00111   // get the metadata stream
00112   catDict.dictLookup("Metadata", &metadata);
00113 
00114   // get the structure tree root
00115   catDict.dictLookup("StructTreeRoot", &structTreeRoot);
00116 
00117   // get the outline dictionary
00118   catDict.dictLookup("Outlines", &outline);
00119 
00120   catDict.free();
00121   return;
00122 
00123  err3:
00124   obj.free();
00125  err2:
00126   pagesDict.free();
00127  err1:
00128   catDict.free();
00129   dests.initNull();
00130   nameTree.initNull();
00131   ok = gFalse;
00132 }
00133 
00134 Catalog::~Catalog() {
00135   int i;
00136 
00137   if (pages) {
00138     for (i = 0; i < pagesSize; ++i) {
00139       if (pages[i]) {
00140     delete pages[i];
00141       }
00142     }
00143     gfree(pages);
00144     gfree(pageRefs);
00145   }
00146   dests.free();
00147   nameTree.free();
00148   if (baseURI) {
00149     delete baseURI;
00150   }
00151   metadata.free();
00152   structTreeRoot.free();
00153   outline.free();
00154 }
00155 
00156 GString *Catalog::readMetadata() {
00157   GString *s;
00158   Dict *dict;
00159   Object obj;
00160   int c;
00161 
00162   if (!metadata.isStream()) {
00163     return NULL;
00164   }
00165   dict = metadata.streamGetDict();
00166   if (!dict->lookup("Subtype", &obj)->isName("XML")) {
00167     error(-1, "Unknown Metadata type: '%s'",
00168       obj.isName() ? obj.getName() : "???");
00169   }
00170   obj.free();
00171   s = new GString();
00172   metadata.streamReset();
00173   while ((c = metadata.streamGetChar()) != EOF) {
00174     s->append(c);
00175   }
00176   metadata.streamClose();
00177   return s;
00178 }
00179 
00180 int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start, int callDepth) {
00181   Object kids;
00182   Object kid;
00183   Object kidRef;
00184   PageAttrs *attrs1, *attrs2;
00185   Page *page;
00186   int i, j;
00187 
00188   attrs1 = new PageAttrs(attrs, pagesDict);
00189   pagesDict->lookup("Kids", &kids);
00190   if (!kids.isArray()) {
00191     error(-1, "Kids object (page %d) is wrong type (%s)",
00192       start+1, kids.getTypeName());
00193     goto err1;
00194   }
00195   for (i = 0; i < kids.arrayGetLength(); ++i) {
00196     kids.arrayGet(i, &kid);
00197     if (kid.isDict("Page")) {
00198       attrs2 = new PageAttrs(attrs1, kid.getDict());
00199       page = new Page(xref, start+1, kid.getDict(), attrs2);
00200       if (!page->isOk()) {
00201     ++start;
00202     goto err3;
00203       }
00204       if (start >= pagesSize) {
00205     pagesSize += 32;
00206         if ((unsigned) pagesSize >= INT_MAX / sizeof(Page *) ||
00207             (unsigned) pagesSize >= INT_MAX / sizeof(Ref)) {
00208           error(-1, "Invalid 'pagesSize' parameter.");
00209           goto err3;
00210         }
00211     pages = (Page **)grealloc(pages, pagesSize * sizeof(Page *));
00212     pageRefs = (Ref *)grealloc(pageRefs, pagesSize * sizeof(Ref));
00213     for (j = pagesSize - 32; j < pagesSize; ++j) {
00214       pages[j] = NULL;
00215       pageRefs[j].num = -1;
00216       pageRefs[j].gen = -1;
00217     }
00218       }
00219       pages[start] = page;
00220       kids.arrayGetNF(i, &kidRef);
00221       if (kidRef.isRef()) {
00222     pageRefs[start].num = kidRef.getRefNum();
00223     pageRefs[start].gen = kidRef.getRefGen();
00224       }
00225       kidRef.free();
00226       ++start;
00227     // This should really be isDict("Pages"), but I've seen at least one
00228     // PDF file where the /Type entry is missing.
00229     } else if (kid.isDict()) {
00230       if (callDepth > MAX_CALL_DEPTH) {
00231         error(-1, "Limit of %d recursive calls reached while reading the page tree. If your document is correct and not a test to try to force a crash, please report a bug.", MAX_CALL_DEPTH);
00232       } else {
00233         if ((start = readPageTree(kid.getDict(), attrs1, start, callDepth + 1))
00234         < 0)
00235       goto err2;
00236       }
00237     } else {
00238       error(-1, "Kid object (page %d) is wrong type (%s)",
00239         start+1, kid.getTypeName());
00240       goto err2;
00241     }
00242     kid.free();
00243   }
00244   delete attrs1;
00245   kids.free();
00246   return start;
00247 
00248  err3:
00249   delete page;
00250  err2:
00251   kid.free();
00252  err1:
00253   kids.free();
00254   delete attrs1;
00255   ok = gFalse;
00256   return -1;
00257 }
00258 
00259 int Catalog::findPage(int num, int gen) {
00260   int i;
00261 
00262   for (i = 0; i < numPages; ++i) {
00263     if (pageRefs[i].num == num && pageRefs[i].gen == gen)
00264       return i + 1;
00265   }
00266   return 0;
00267 }
00268 
00269 LinkDest *Catalog::findDest(GString *name) {
00270   LinkDest *dest;
00271   Object obj1, obj2;
00272   GBool found;
00273 
00274   // try named destination dictionary then name tree
00275   found = gFalse;
00276   if (dests.isDict()) {
00277     if (!dests.dictLookup(name->getCString(), &obj1)->isNull())
00278       found = gTrue;
00279     else
00280       obj1.free();
00281   }
00282   if (!found && nameTree.isDict()) {
00283     if (!findDestInTree(&nameTree, name, &obj1)->isNull())
00284       found = gTrue;
00285     else
00286       obj1.free();
00287   }
00288   if (!found)
00289     return NULL;
00290 
00291   // construct LinkDest
00292   dest = NULL;
00293   if (obj1.isArray()) {
00294     dest = new LinkDest(obj1.getArray());
00295   } else if (obj1.isDict()) {
00296     if (obj1.dictLookup("D", &obj2)->isArray())
00297       dest = new LinkDest(obj2.getArray());
00298     else
00299       error(-1, "Bad named destination value");
00300     obj2.free();
00301   } else {
00302     error(-1, "Bad named destination value");
00303   }
00304   obj1.free();
00305   if (dest && !dest->isOk()) {
00306     delete dest;
00307     dest = NULL;
00308   }
00309 
00310   return dest;
00311 }
00312 
00313 Object *Catalog::findDestInTree(Object *tree, GString *name, Object *obj) {
00314   Object names, name1;
00315   Object kids, kid, limits, low, high;
00316   GBool done, found;
00317   int cmp, i;
00318 
00319   // leaf node
00320   if (tree->dictLookup("Names", &names)->isArray()) {
00321     done = found = gFalse;
00322     for (i = 0; !done && i < names.arrayGetLength(); i += 2) {
00323       if (names.arrayGet(i, &name1)->isString()) {
00324     cmp = name->cmp(name1.getString());
00325     if (cmp == 0) {
00326       names.arrayGet(i+1, obj);
00327       found = gTrue;
00328       done = gTrue;
00329     } else if (cmp < 0) {
00330       done = gTrue;
00331     }
00332     name1.free();
00333       }
00334     }
00335     names.free();
00336     if (!found)
00337       obj->initNull();
00338     return obj;
00339   }
00340   names.free();
00341 
00342   // root or intermediate node
00343   done = gFalse;
00344   if (tree->dictLookup("Kids", &kids)->isArray()) {
00345     for (i = 0; !done && i < kids.arrayGetLength(); ++i) {
00346       if (kids.arrayGet(i, &kid)->isDict()) {
00347     if (kid.dictLookup("Limits", &limits)->isArray()) {
00348       if (limits.arrayGet(0, &low)->isString() &&
00349           name->cmp(low.getString()) >= 0) {
00350         if (limits.arrayGet(1, &high)->isString() &&
00351         name->cmp(high.getString()) <= 0) {
00352           findDestInTree(&kid, name, obj);
00353           done = gTrue;
00354         }
00355         high.free();
00356       }
00357       low.free();
00358     }
00359     limits.free();
00360       }
00361       kid.free();
00362     }
00363   }
00364   kids.free();
00365 
00366   // name was outside of ranges of all kids
00367   if (!done)
00368     obj->initNull();
00369 
00370   return obj;
00371 }
KDE Home | KDE Accessibility Home | Description of Access Keys