filters

pole.cpp

00001 /* POLE - Portable C++ library to access OLE Storage
00002    Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
00003 
00004    Redistribution and use in source and binary forms, with or without
00005    modification, are permitted provided that the following conditions
00006    are met:
00007    * Redistributions of source code must retain the above copyright notice,
00008      this list of conditions and the following disclaimer.
00009    * Redistributions in binary form must reproduce the above copyright notice,
00010      this list of conditions and the following disclaimer in the documentation
00011      and/or other materials provided with the distribution.
00012    * Neither the name of the authors nor the names of its contributors may be
00013      used to endorse or promote products derived from this software without
00014      specific prior written permission.
00015 
00016    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00017    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00018    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00019    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00020    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00021    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00022    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00023    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00024    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00025    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00026    THE POSSIBILITY OF SUCH DAMAGE.
00027 */
00028 
00029 #include <fstream>
00030 #include <iostream>
00031 #include <list>
00032 #include <string>
00033 #include <vector>
00034 
00035 #include "pole.h"
00036 
00037 // enable to activate debugging output
00038 // #define POLE_DEBUG
00039 
00040 namespace POLE
00041 {
00042 
00043 class Header
00044 {
00045   public:
00046     unsigned char id[8];       // signature, or magic identifier
00047     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
00048     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
00049     unsigned num_bat;          // blocks allocated for big bat
00050     unsigned dirent_start;     // starting block for directory info
00051     unsigned threshold;        // switch from small to big file (usually 4K)
00052     unsigned sbat_start;       // starting block index to store small bat
00053     unsigned num_sbat;         // blocks allocated for small bat
00054     unsigned mbat_start;       // starting block to store meta bat
00055     unsigned num_mbat;         // blocks allocated for meta bat
00056     unsigned long bb_blocks[109];
00057 
00058     Header();
00059     bool valid();
00060     void load( const unsigned char* buffer );
00061     void save( unsigned char* buffer );
00062     void debug();
00063 };
00064 
00065 class AllocTable
00066 {
00067   public:
00068     static const unsigned Eof;
00069     static const unsigned Avail;
00070     static const unsigned Bat;
00071     static const unsigned MetaBat;
00072     unsigned blockSize;
00073     AllocTable();
00074     void clear();
00075     unsigned long count();
00076     void resize( unsigned long newsize );
00077     void preserve( unsigned long n );
00078     void set( unsigned long index, unsigned long val );
00079     unsigned unused();
00080     void setChain( std::vector<unsigned long> );
00081     std::vector<unsigned long> follow( unsigned long start );
00082     unsigned long operator[](unsigned long index );
00083     void load( const unsigned char* buffer, unsigned len );
00084     void save( unsigned char* buffer );
00085     unsigned size();
00086     void debug();
00087   private:
00088     std::vector<unsigned long> data;
00089     AllocTable( const AllocTable& );
00090     AllocTable& operator=( const AllocTable& );
00091 };
00092 
00093 class DirEntry
00094 {
00095   public:
00096     bool valid;            // false if invalid (should be skipped)
00097     std::string name;      // the name, not in unicode anymore
00098     bool dir;              // true if directory
00099     unsigned long size;    // size (not valid if directory)
00100     unsigned long start;   // starting block
00101     unsigned prev;         // previous sibling
00102     unsigned next;         // next sibling
00103     unsigned child;        // first child
00104 };
00105 
00106 class DirTree
00107 {
00108   public:
00109     static const unsigned End;
00110     DirTree();
00111     void clear();
00112     unsigned entryCount();
00113     DirEntry* entry( unsigned index );
00114     DirEntry* entry( const std::string& name, bool create=false );
00115     int indexOf( DirEntry* e );
00116     int parent( unsigned index );
00117     std::string fullName( unsigned index );
00118     std::vector<unsigned> children( unsigned index );
00119     void load( unsigned char* buffer, unsigned len );
00120     void save( unsigned char* buffer );
00121     unsigned size();
00122     void debug();
00123   private:
00124     std::vector<DirEntry> entries;
00125     DirTree( const DirTree& );
00126     DirTree& operator=( const DirTree& );
00127 };
00128 
00129 class StorageIO
00130 {
00131   public:
00132     Storage* storage;         // owner
00133     std::string filename;     // filename
00134     std::fstream file;        // associated with above name
00135     int result;               // result of operation
00136     bool opened;              // true if file is opened
00137     unsigned long filesize;   // size of the file
00138 
00139     Header* header;           // storage header
00140     DirTree* dirtree;         // directory tree
00141     AllocTable* bbat;         // allocation table for big blocks
00142     AllocTable* sbat;         // allocation table for small blocks
00143 
00144     unsigned long lastBlockIndex;         // last read, for simple caching
00145     unsigned char* lastBlockData;
00146 
00147     std::vector<unsigned long> sb_blocks; // blocks for "small" files
00148 
00149     std::list<Stream*> streams;
00150 
00151     StorageIO( Storage* storage, const char* filename );
00152     ~StorageIO();
00153 
00154     bool open();
00155     void close();
00156     void flush();
00157     void load();
00158     void create();
00159 
00160     unsigned long loadBigBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00161 
00162     unsigned long loadBigBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00163 
00164     unsigned long loadSmallBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00165 
00166     unsigned long loadSmallBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00167 
00168     StreamIO* streamIO( const std::string& name );
00169 
00170   private:
00171     // no copy or assign
00172     StorageIO( const StorageIO& );
00173     StorageIO& operator=( const StorageIO& );
00174 
00175 };
00176 
00177 class StreamIO
00178 {
00179   public:
00180     StorageIO* io;
00181     DirEntry* entry;
00182     std::string fullName;
00183     bool eof;
00184     bool fail;
00185 
00186     StreamIO( StorageIO* io, DirEntry* entry );
00187     ~StreamIO();
00188     unsigned long size();
00189     void seek( unsigned long pos );
00190     unsigned long tell();
00191     int getch();
00192     unsigned long read( unsigned char* data, unsigned long maxlen );
00193     unsigned long read( unsigned long pos, unsigned char* data, unsigned long maxlen );
00194 
00195 
00196   private:
00197     std::vector<unsigned long> blocks;
00198 
00199     // no copy or assign
00200     StreamIO( const StreamIO& );
00201     StreamIO& operator=( const StreamIO& );
00202 
00203     // pointer for read
00204     unsigned long m_pos;
00205 
00206     // simple cache system to speed-up getch()
00207     unsigned char* cache_data;
00208     unsigned long cache_size;
00209     unsigned long cache_pos;
00210     void updateCache();
00211 };
00212 
00213 } // namespace POLE
00214 
00215 using namespace POLE;
00216 
00217 static inline unsigned long readU16( const unsigned char* ptr )
00218 {
00219   return ptr[0]+(ptr[1]<<8);
00220 }
00221 
00222 static inline unsigned long readU32( const unsigned char* ptr )
00223 {
00224   return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24);
00225 }
00226 
00227 static inline void writeU16( unsigned char* ptr, unsigned long data )
00228 {
00229   ptr[0] = (unsigned char)(data & 0xff);
00230   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00231 }
00232 
00233 static inline void writeU32( unsigned char* ptr, unsigned long data )
00234 {
00235   ptr[0] = (unsigned char)(data & 0xff);
00236   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00237   ptr[2] = (unsigned char)((data >> 16) & 0xff);
00238   ptr[3] = (unsigned char)((data >> 24) & 0xff);
00239 }
00240 
00241 static const unsigned char pole_magic[] =
00242  { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
00243 
00244 // =========== Header ==========
00245 
00246 Header::Header()
00247 {
00248   b_shift = 9;
00249   s_shift = 6;
00250   num_bat = 0;
00251   dirent_start = 0;
00252   threshold = 4096;
00253   sbat_start = 0;
00254   num_sbat = 0;
00255   mbat_start = 0;
00256   num_mbat = 0;
00257 
00258   for( unsigned i = 0; i < 8; i++ )
00259     id[i] = pole_magic[i];
00260   for( unsigned i=0; i<109; i++ )
00261     bb_blocks[i] = AllocTable::Avail;
00262 }
00263 
00264 bool Header::valid()
00265 {
00266   if( threshold != 4096 ) return false;
00267   if( num_bat == 0 ) return false;
00268   if( (num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
00269   if( (num_bat < 109) && (num_mbat != 0) ) return false;
00270   if( s_shift > b_shift ) return false;
00271   if( b_shift <= 6 ) return false;
00272   if( b_shift >=31 ) return false;
00273 
00274   return true;
00275 }
00276 
00277 void Header::load( const unsigned char* buffer )
00278 {
00279   b_shift      = readU16( buffer + 0x1e );
00280   s_shift      = readU16( buffer + 0x20 );
00281   num_bat      = readU32( buffer + 0x2c );
00282   dirent_start = readU32( buffer + 0x30 );
00283   threshold    = readU32( buffer + 0x38 );
00284   sbat_start   = readU32( buffer + 0x3c );
00285   num_sbat     = readU32( buffer + 0x40 );
00286   mbat_start   = readU32( buffer + 0x44 );
00287   num_mbat     = readU32( buffer + 0x48 );
00288 
00289   for( unsigned i = 0; i < 8; i++ )
00290     id[i] = buffer[i];
00291   for( unsigned i=0; i<109; i++ )
00292     bb_blocks[i] = readU32( buffer + 0x4C+i*4 );
00293 }
00294 
00295 void Header::save( unsigned char* buffer )
00296 {
00297   memset( buffer, 0, 0x4c );
00298   memcpy( buffer, pole_magic, 8 );        // ole signature
00299   writeU32( buffer + 8, 0 );              // unknown
00300   writeU32( buffer + 12, 0 );             // unknown
00301   writeU32( buffer + 16, 0 );             // unknown
00302   writeU16( buffer + 24, 0x003e );        // revision ?
00303   writeU16( buffer + 26, 3 );             // version ?
00304   writeU16( buffer + 28, 0xfffe );        // unknown
00305   writeU16( buffer + 0x1e, b_shift );
00306   writeU16( buffer + 0x20, s_shift );
00307   writeU32( buffer + 0x2c, num_bat );
00308   writeU32( buffer + 0x30, dirent_start );
00309   writeU32( buffer + 0x38, threshold );
00310   writeU32( buffer + 0x3c, sbat_start );
00311   writeU32( buffer + 0x40, num_sbat );
00312   writeU32( buffer + 0x44, mbat_start );
00313   writeU32( buffer + 0x48, num_mbat );
00314 
00315   for( unsigned i=0; i<109; i++ )
00316     writeU32( buffer + 0x4C+i*4, bb_blocks[i] );
00317 }
00318 
00319 void Header::debug()
00320 {
00321   std::cout << std::endl;
00322   std::cout << "b_shift " << b_shift << std::endl;
00323   std::cout << "s_shift " << s_shift << std::endl;
00324   std::cout << "num_bat " << num_bat << std::endl;
00325   std::cout << "dirent_start " << dirent_start << std::endl;
00326   std::cout << "threshold " << threshold << std::endl;
00327   std::cout << "sbat_start " << sbat_start << std::endl;
00328   std::cout << "num_sbat " << num_sbat << std::endl;
00329   std::cout << "mbat_start " << mbat_start << std::endl;
00330   std::cout << "num_mbat " << num_mbat << std::endl;
00331 
00332   unsigned s = (num_bat<=109) ? num_bat : 109;
00333   std::cout << "bat blocks: ";
00334   for( unsigned i = 0; i < s; i++ )
00335     std::cout << bb_blocks[i] << " ";
00336   std::cout << std::endl;
00337 }
00338 
00339 // =========== AllocTable ==========
00340 
00341 const unsigned AllocTable::Avail = 0xffffffff;
00342 const unsigned AllocTable::Eof = 0xfffffffe;
00343 const unsigned AllocTable::Bat = 0xfffffffd;
00344 const unsigned AllocTable::MetaBat = 0xfffffffc;
00345 
00346 AllocTable::AllocTable()
00347 {
00348   blockSize = 4096;
00349   // initial size
00350   resize( 128 );
00351 }
00352 
00353 unsigned long AllocTable::count()
00354 {
00355   return data.size();
00356 }
00357 
00358 void AllocTable::resize( unsigned long newsize )
00359 {
00360   unsigned oldsize = data.size();
00361   data.resize( newsize );
00362   if( newsize > oldsize )
00363     for( unsigned i = oldsize; i<newsize; i++ )
00364       data[i] = Avail;
00365 }
00366 
00367 // make sure there're still free blocks
00368 void AllocTable::preserve( unsigned long n )
00369 {
00370   std::vector<unsigned long> pre;
00371   for( unsigned i=0; i < n; i++ )
00372     pre.push_back( unused() );
00373 }
00374 
00375 unsigned long AllocTable::operator[]( unsigned long index )
00376 {
00377   unsigned long result;
00378   result = data[index];
00379   return result;
00380 }
00381 
00382 void AllocTable::set( unsigned long index, unsigned long value )
00383 {
00384   if( index >= count() ) resize( index + 1);
00385   data[ index ] = value;
00386 }
00387 
00388 void AllocTable::setChain( std::vector<unsigned long> chain )
00389 {
00390   if( chain.size() )
00391   {
00392     for( unsigned i=0; i<chain.size()-1; i++ )
00393       set( chain[i], chain[i+1] );
00394     set( chain[ chain.size()-1 ], AllocTable::Eof );
00395   }
00396 }
00397 
00398 // follow
00399 std::vector<unsigned long> AllocTable::follow( unsigned long start )
00400 {
00401   std::vector<unsigned long> chain;
00402 
00403   if( start >= count() ) return chain;
00404 
00405   unsigned long p = start;
00406   while( p < count() )
00407   {
00408     if( p == (unsigned long)Eof ) break;
00409     if( p == (unsigned long)Bat ) break;
00410     if( p == (unsigned long)MetaBat ) break;
00411     if( p >= count() ) break;
00412     chain.push_back( p );
00413     if( data[p] >= count() ) break;
00414     p = data[ p ];
00415   }
00416 
00417   return chain;
00418 }
00419 
00420 unsigned AllocTable::unused()
00421 {
00422   // find first available block
00423   for( unsigned i = 0; i < data.size(); i++ )
00424     if( data[i] == Avail )
00425       return i;
00426 
00427   // completely full, so enlarge the table
00428   unsigned block = data.size();
00429   resize( data.size()+10 );
00430   return block;
00431 }
00432 
00433 void AllocTable::load( const unsigned char* buffer, unsigned len )
00434 {
00435   resize( len / 4 );
00436   for( unsigned i = 0; i < count(); i++ )
00437     set( i, readU32( buffer + i*4 ) );
00438 }
00439 
00440 // return space required to save this dirtree
00441 unsigned AllocTable::size()
00442 {
00443   return count() * 4;
00444 }
00445 
00446 void AllocTable::save( unsigned char* buffer )
00447 {
00448   for( unsigned i = 0; i < count(); i++ )
00449     writeU32( buffer + i*4, data[i] );
00450 }
00451 
00452 void AllocTable::debug()
00453 {
00454   std::cout << "block size " << data.size() << std::endl;
00455   for( unsigned i=0; i< data.size(); i++ )
00456   {
00457      if( data[i] == Avail ) continue;
00458      std::cout << i << ": ";
00459      if( data[i] == Eof ) std::cout << "[eof]";
00460      else if( data[i] == Bat ) std::cout << "[bat]";
00461      else if( data[i] == MetaBat ) std::cout << "[metabat]";
00462      else std::cout << data[i];
00463      std::cout << std::endl;
00464   }
00465 }
00466 
00467 // =========== DirTree ==========
00468 
00469 const unsigned DirTree::End = 0xffffffff;
00470 
00471 DirTree::DirTree()
00472 {
00473   clear();
00474 }
00475 
00476 void DirTree::clear()
00477 {
00478   // leave only root entry
00479   entries.resize( 1 );
00480   entries[0].valid = true;
00481   entries[0].name = "Root Entry";
00482   entries[0].dir = true;
00483   entries[0].size = 0;
00484   entries[0].start = End;
00485   entries[0].prev = End;
00486   entries[0].next = End;
00487   entries[0].child = End;
00488 }
00489 
00490 unsigned DirTree::entryCount()
00491 {
00492   return entries.size();
00493 }
00494 
00495 DirEntry* DirTree::entry( unsigned index )
00496 {
00497   if( index >= entryCount() ) return (DirEntry*) 0;
00498   return &entries[ index ];
00499 }
00500 
00501 int DirTree::indexOf( DirEntry* e )
00502 {
00503   for( unsigned i = 0; i < entryCount(); i++ )
00504     if( entry( i ) == e ) return i;
00505 
00506   return -1;
00507 }
00508 
00509 int DirTree::parent( unsigned index )
00510 {
00511   // brute-force, basically we iterate for each entries, find its children
00512   // and check if one of the children is 'index'
00513   for( unsigned j=0; j<entryCount(); j++ )
00514   {
00515     std::vector<unsigned> chi = children( j );
00516     for( unsigned i=0; i<chi.size();i++ )
00517       if( chi[i] == index )
00518         return j;
00519   }
00520 
00521   return -1;
00522 }
00523 
00524 std::string DirTree::fullName( unsigned index )
00525 {
00526   // don't use root name ("Root Entry"), just give "/"
00527   if( index == 0 ) return "/";
00528 
00529   std::string result = entry( index )->name;
00530   result.insert( 0,  "/" );
00531   int p = parent( index );
00532   DirEntry * _entry = 0;
00533   while( p > 0 )
00534   {
00535     _entry = entry( p );
00536     if (_entry->dir && _entry->valid)
00537     {
00538       result.insert( 0,  _entry->name);
00539       result.insert( 0,  "/" );
00540     }
00541     --p;
00542     index = p;
00543     if( index <= 0 ) break;
00544   }
00545   return result;
00546 }
00547 
00548 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
00549 // if not found and create is false, return 0
00550 // if create is true, a new entry is returned
00551 DirEntry* DirTree::entry( const std::string& name, bool create )
00552 {
00553    if( !name.length() ) return (DirEntry*)0;
00554 
00555    // quick check for "/" (that's root)
00556    if( name == "/" ) return entry( 0 );
00557 
00558    // split the names, e.g  "/ObjectPool/_1020961869" will become:
00559    // "ObjectPool" and "_1020961869"
00560    std::list<std::string> names;
00561    std::string::size_type start = 0, end = 0;
00562    if( name[0] == '/' ) start++;
00563    while( start < name.length() )
00564    {
00565      end = name.find_first_of( '/', start );
00566      if( end == std::string::npos ) end = name.length();
00567      names.push_back( name.substr( start, end-start ) );
00568      start = end+1;
00569    }
00570 
00571    // start from root
00572    int index = 0 ;
00573 
00574    // trace one by one
00575    std::list<std::string>::iterator it;
00576 
00577    for( it = names.begin(); it != names.end(); ++it )
00578    {
00579      // find among the children of index
00580      std::vector<unsigned> chi = children( index );
00581      unsigned child = 0;
00582      for( unsigned i = 0; i < chi.size(); i++ )
00583      {
00584        DirEntry* ce = entry( chi[i] );
00585        if( ce )
00586        if( ce->valid && ( ce->name.length()>1 ) )
00587        if( ce->name == *it )
00588              child = chi[i];
00589      }
00590 
00591      // traverse to the child
00592      if( child > 0 ) index = child;
00593      else
00594      {
00595        // not found among children
00596        if( !create ) return (DirEntry*)0;
00597 
00598        // create a new entry
00599        unsigned parent = index;
00600        entries.push_back( DirEntry() );
00601        index = entryCount()-1;
00602        DirEntry* e = entry( index );
00603        e->valid = true;
00604        e->name = *it;
00605        e->dir = false;
00606        e->size = 0;
00607        e->start = 0;
00608        e->child = End;
00609        e->prev = End;
00610        e->next = entry(parent)->child;
00611        entry(parent)->child = index;
00612      }
00613    }
00614 
00615    return entry( index );
00616 }
00617 
00618 // helper function: recursively find siblings of index
00619 void dirtree_find_siblings( DirTree* dirtree, std::vector<unsigned>& result,
00620   unsigned index )
00621 {
00622   DirEntry* e = dirtree->entry( index );
00623   if( !e ) return;
00624   if( !e->valid ) return;
00625 
00626   // prevent infinite loop
00627   for( unsigned i = 0; i < result.size(); i++ )
00628     if( result[i] == index ) return;
00629 
00630   // add myself
00631   result.push_back( index );
00632 
00633   // visit previous sibling, don't go infinitely
00634   unsigned prev = e->prev;
00635   if( ( prev > 0 ) && ( prev < dirtree->entryCount() ) )
00636   {
00637     for( unsigned i = 0; i < result.size(); i++ )
00638       if( result[i] == prev ) prev = 0;
00639     if( prev ) dirtree_find_siblings( dirtree, result, prev );
00640   }
00641 
00642   // visit next sibling, don't go infinitely
00643   unsigned next = e->next;
00644   if( ( next > 0 ) && ( next < dirtree->entryCount() ) )
00645   {
00646     for( unsigned i = 0; i < result.size(); i++ )
00647       if( result[i] == next ) next = 0;
00648     if( next ) dirtree_find_siblings( dirtree, result, next );
00649   }
00650 }
00651 
00652 std::vector<unsigned> DirTree::children( unsigned index )
00653 {
00654   std::vector<unsigned> result;
00655 
00656   DirEntry* e = entry( index );
00657   if( e ) if( e->valid && e->child < entryCount() )
00658     dirtree_find_siblings( this, result, e->child );
00659 
00660   return result;
00661 }
00662 
00663 void DirTree::load( unsigned char* buffer, unsigned size )
00664 {
00665   entries.clear();
00666 
00667   for( unsigned i = 0; i < size/128; i++ )
00668   {
00669     unsigned p = i * 128;
00670 
00671     // would be < 32 if first char in the name isn't printable
00672     unsigned prefix = 32;
00673 
00674     // parse name of this entry, which stored as Unicode 16-bit
00675     std::string name;
00676     int name_len = readU16( buffer + 0x40+p );
00677     if( name_len > 64 ) name_len = 64;
00678     for( int j=0; ( buffer[j+p]) && (j<name_len); j+= 2 )
00679       name.append( 1, buffer[j+p] );
00680 
00681     // first char isn't printable ? remove it...
00682     if( buffer[p] < 32 )
00683     {
00684       prefix = buffer[0];
00685       name.erase( 0,1 );
00686     }
00687 
00688     // 2 = file (aka stream), 1 = directory (aka storage), 5 = root
00689     unsigned type = buffer[ 0x42 + p];
00690 
00691     DirEntry e;
00692     e.valid = true;
00693     e.name = name;
00694     e.start = readU32( buffer + 0x74+p );
00695     e.size = readU32( buffer + 0x78+p );
00696     e.prev = readU32( buffer + 0x44+p );
00697     e.next = readU32( buffer + 0x48+p );
00698     e.child = readU32( buffer + 0x4C+p );
00699     e.dir = ( type!=2 );
00700 
00701     // sanity checks
00702     if( (type != 2) && (type != 1 ) && (type != 5 ) ) e.valid = false;
00703     if( name_len < 1 ) e.valid = false;
00704 
00705     entries.push_back( e );
00706   }
00707 }
00708 
00709 // return space required to save this dirtree
00710 unsigned DirTree::size()
00711 {
00712   return entryCount() * 128;
00713 }
00714 
00715 void DirTree::save( unsigned char* buffer )
00716 {
00717   memset( buffer, 0, size() );
00718 
00719   // root is fixed as "Root Entry"
00720   DirEntry* root = entry( 0 );
00721   std::string name = "Root Entry";
00722   for( unsigned j = 0; j < name.length(); j++ )
00723     buffer[ j*2 ] = name[j];
00724   writeU16( buffer + 0x40, name.length()*2 + 2 );
00725   writeU32( buffer + 0x74, 0xffffffff );
00726   writeU32( buffer + 0x78, 0 );
00727   writeU32( buffer + 0x44, 0xffffffff );
00728   writeU32( buffer + 0x48, 0xffffffff );
00729   writeU32( buffer + 0x4c, root->child );
00730   buffer[ 0x42 ] = 5;
00731   buffer[ 0x43 ] = 1;
00732 
00733   for( unsigned i = 1; i < entryCount(); i++ )
00734   {
00735     DirEntry* e = entry( i );
00736     if( !e ) continue;
00737     if( e->dir )
00738     {
00739       e->start = 0xffffffff;
00740       e->size = 0;
00741     }
00742 
00743     // max length for name is 32 chars
00744     std::string name = e->name;
00745     if( name.length() > 32 )
00746       name.erase( 32, name.length() );
00747 
00748     // write name as Unicode 16-bit
00749     for( unsigned j = 0; j < name.length(); j++ )
00750       buffer[ i*128 + j*2 ] = name[j];
00751 
00752     writeU16( buffer + i*128 + 0x40, name.length()*2 + 2 );
00753     writeU32( buffer + i*128 + 0x74, e->start );
00754     writeU32( buffer + i*128 + 0x78, e->size );
00755     writeU32( buffer + i*128 + 0x44, e->prev );
00756     writeU32( buffer + i*128 + 0x48, e->next );
00757     writeU32( buffer + i*128 + 0x4c, e->child );
00758     buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
00759     buffer[ i*128 + 0x43 ] = 1; // always black
00760   }
00761 }
00762 
00763 void DirTree::debug()
00764 {
00765   for( unsigned i = 0; i < entryCount(); i++ )
00766   {
00767     DirEntry* e = entry( i );
00768     if( !e ) continue;
00769     std::cout << i << ": ";
00770     if( !e->valid ) std::cout << "INVALID ";
00771     std::cout << e->name << " ";
00772     if( e->dir ) std::cout << "(Dir) ";
00773     else std::cout << "(File) ";
00774     std::cout << e->size << " ";
00775     std::cout << "s:" << e->start << " ";
00776     std::cout << "(";
00777     if( e->child == End ) std::cout << "-"; else std::cout << e->child;
00778     std::cout << " ";
00779     if( e->prev == End ) std::cout << "-"; else std::cout << e->prev;
00780     std::cout << ":";
00781     if( e->next == End ) std::cout << "-"; else std::cout << e->next;
00782     std::cout << ")";
00783     std::cout << std::endl;
00784   }
00785 }
00786 
00787 // =========== StorageIO ==========
00788 
00789 StorageIO::StorageIO( Storage* st, const char* fname )
00790 {
00791   storage = st;
00792   filename = fname;
00793   result = Storage::Ok;
00794   opened = false;
00795 
00796   header = new Header();
00797   dirtree = new DirTree();
00798   bbat = new AllocTable();
00799   sbat = new AllocTable();
00800 
00801   lastBlockIndex = 0;
00802   lastBlockData = 0;
00803 
00804   filesize = 0;
00805   bbat->blockSize = 1 << header->b_shift;
00806   sbat->blockSize = 1 << header->s_shift;
00807 }
00808 
00809 StorageIO::~StorageIO()
00810 {
00811   if( opened ) close();
00812 
00813   delete [] lastBlockData;
00814   delete sbat;
00815   delete bbat;
00816   delete dirtree;
00817   delete header;
00818 }
00819 
00820 bool StorageIO::open()
00821 {
00822   // already opened ? close first
00823   if( opened ) close();
00824 
00825   load();
00826 
00827   return result == Storage::Ok;
00828 }
00829 
00830 void StorageIO::load()
00831 {
00832   unsigned char* buffer = 0;
00833   unsigned long buflen = 0;
00834   std::vector<unsigned long> blocks;
00835 
00836   // open the file, check for error
00837   result = Storage::OpenFailed;
00838   file.open( filename.c_str(), std::ios::binary | std::ios::in );
00839   if( !file.good() ) return;
00840 
00841   // find size of input file
00842   file.seekg( 0, std::ios::end );
00843   filesize = file.tellg();
00844 
00845   // load header
00846   buffer = new unsigned char[512];
00847   file.seekg( 0 );
00848   file.read( (char*)buffer, 512 );
00849   header->load( buffer );
00850   delete[] buffer;
00851 
00852   // check OLE magic id
00853   result = Storage::NotOLE;
00854   for( unsigned i=0; i<8; i++ )
00855     if( header->id[i] != pole_magic[i] )
00856       return;
00857 
00858   // sanity checks
00859   result = Storage::BadOLE;
00860   if( !header->valid() ) return;
00861   if( header->threshold != 4096 ) return;
00862 
00863   // important block size
00864   bbat->blockSize = 1 << header->b_shift;
00865   sbat->blockSize = 1 << header->s_shift;
00866 
00867   // find blocks allocated to store big bat
00868   // the first 109 blocks are in header, the rest in meta bat
00869   blocks.clear();
00870   blocks.resize( header->num_bat );
00871   for( unsigned i = 0; i < 109; i++ )
00872     if( i >= header->num_bat ) break;
00873     else blocks[i] = header->bb_blocks[i];
00874   if( (header->num_bat > 109) && (header->num_mbat > 0) )
00875   {
00876     unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
00877     unsigned k = 109;
00878     unsigned mblock = header->mbat_start;
00879     for( unsigned r = 0; r < header->num_mbat; r++ )
00880     {
00881       loadBigBlock( mblock, buffer2, bbat->blockSize );
00882       for( unsigned s=0; s < bbat->blockSize-4; s+=4 )
00883       {
00884         if( k >= header->num_bat ) break;
00885         else  blocks[k++] = readU32( buffer2 + s );
00886       }
00887       mblock = readU32( buffer2 + bbat->blockSize-4 );
00888      }
00889     delete[] buffer2;
00890   }
00891 
00892   // load big bat
00893   buflen = blocks.size()*bbat->blockSize;
00894   if( buflen > 0 )
00895   {
00896     buffer = new unsigned char[ buflen ];
00897     loadBigBlocks( blocks, buffer, buflen );
00898     bbat->load( buffer, buflen );
00899     delete[] buffer;
00900   }
00901 
00902   // load small bat
00903   blocks.clear();
00904   blocks = bbat->follow( header->sbat_start );
00905   buflen = blocks.size()*bbat->blockSize;
00906   if( buflen > 0 )
00907   {
00908     buffer = new unsigned char[ buflen ];
00909     loadBigBlocks( blocks, buffer, buflen );
00910     sbat->load( buffer, buflen );
00911     delete[] buffer;
00912   }
00913 
00914   // load directory tree
00915   blocks.clear();
00916   blocks = bbat->follow( header->dirent_start );
00917   buflen = blocks.size()*bbat->blockSize;
00918   buffer = new unsigned char[ buflen ];
00919   loadBigBlocks( blocks, buffer, buflen );
00920   dirtree->load( buffer, buflen );
00921   unsigned sb_start = readU32( buffer + 0x74 );
00922   delete[] buffer;
00923 
00924   // fetch block chain as data for small-files
00925   sb_blocks = bbat->follow( sb_start ); // small files
00926 
00927   // for troubleshooting, just enable this block
00928 #if 0
00929   header->debug();
00930   sbat->debug();
00931   bbat->debug();
00932   dirtree->debug();
00933 #endif
00934 
00935   // so far so good
00936   result = Storage::Ok;
00937   opened = true;
00938 }
00939 
00940 void StorageIO::create()
00941 {
00942   // std::cout << "Creating " << filename << std::endl;
00943 
00944   file.open( filename.c_str(), std::ios::out|std::ios::binary );
00945   if( !file.good() )
00946   {
00947     std::cerr << "Can't create " << filename << std::endl;
00948     result = Storage::OpenFailed;
00949     return;
00950   }
00951 
00952   // so far so good
00953   opened = true;
00954   result = Storage::Ok;
00955 }
00956 
00957 void StorageIO::flush()
00958 {
00959   /* Note on Microsoft implementation:
00960      - directory entries are stored in the last block(s)
00961      - BATs are as second to the last
00962      - Meta BATs are third to the last
00963   */
00964 }
00965 
00966 void StorageIO::close()
00967 {
00968   if( !opened ) return;
00969 
00970   file.close();
00971   opened = false;
00972 
00973   std::list<Stream*>::iterator it;
00974   for( it = streams.begin(); it != streams.end(); ++it )
00975     delete *it;
00976 }
00977 
00978 StreamIO* StorageIO::streamIO( const std::string& name )
00979 {
00980   // sanity check
00981   if( !name.length() ) return (StreamIO*)0;
00982 
00983   // search in the entries
00984   DirEntry* entry = dirtree->entry( name );
00985   //if( entry) std::cout << "FOUND\n";
00986   if( !entry ) return (StreamIO*)0;
00987   //if( !entry->dir ) std::cout << "  NOT DIR\n";
00988   if( entry->dir ) return (StreamIO*)0;
00989 
00990   StreamIO* result = new StreamIO( this, entry );
00991   result->fullName = name;
00992 
00993   return result;
00994 }
00995 
00996 unsigned long StorageIO::loadBigBlocks( std::vector<unsigned long> blocks,
00997   unsigned char* data, unsigned long maxlen )
00998 {
00999   // sentinel
01000   if( !data ) return 0;
01001   if( !file.good() ) return 0;
01002   if( blocks.size() < 1 ) return 0;
01003   if( maxlen == 0 ) return 0;
01004 
01005   // read block one by one, seems fast enough
01006   unsigned long bytes = 0;
01007   for( unsigned long i=0; (i < blocks.size() ) && ( bytes<maxlen ); i++ )
01008   {
01009     unsigned long block = blocks[i];
01010     unsigned long pos =  bbat->blockSize * ( block+1 );
01011     unsigned long p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes;
01012     if( pos + p > filesize ) p = filesize - pos;
01013     file.seekg( pos );
01014     file.read( (char*)data + bytes, p );
01015     bytes += p;
01016   }
01017 
01018   return bytes;
01019 }
01020 
01021 // this will avoid reading the same big block from the disk
01022 #define POLE_CACHE_BLOCK
01023 
01024 unsigned long StorageIO::loadBigBlock( unsigned long block,
01025   unsigned char* data, unsigned long maxlen )
01026 {
01027   // sentinel
01028   if( !data ) return 0;
01029   if( !file.good() ) return 0;
01030 
01031 #ifdef POLE_CACHE_BLOCK
01032   // just before, we also did read this block
01033   // so just reuse from cached data
01034   if( block == lastBlockIndex )
01035   if( lastBlockData) if( maxlen <= bbat->blockSize )
01036   {
01037     memcpy( data, lastBlockData, maxlen );
01038     return maxlen;
01039   }
01040 #endif
01041 
01042   // wraps call for loadBigBlocks
01043   std::vector<unsigned long> blocks;
01044   blocks.resize( 1 );
01045   blocks[0] = block;
01046 
01047   unsigned long result = loadBigBlocks( blocks, data, maxlen );
01048 
01049 #ifdef POLE_CACHE_BLOCK
01050   // save cached data for future use
01051   if(maxlen == bbat->blockSize )
01052   {
01053     if( !lastBlockData )
01054       lastBlockData = new unsigned char[bbat->blockSize];
01055     memcpy(lastBlockData, data, bbat->blockSize);
01056     lastBlockIndex = block;
01057   }
01058 #endif
01059 
01060   return result;
01061 }
01062 
01063 // return number of bytes which has been read
01064 unsigned long StorageIO::loadSmallBlocks( std::vector<unsigned long> blocks,
01065   unsigned char* data, unsigned long maxlen )
01066 {
01067   // sentinel
01068   if( !data ) return 0;
01069   if( !file.good() ) return 0;
01070   if( blocks.size() < 1 ) return 0;
01071   if( maxlen == 0 ) return 0;
01072 
01073   // our own local buffer
01074   unsigned char* buf = new unsigned char[ bbat->blockSize ];
01075 
01076   // read small block one by one
01077   unsigned long bytes = 0;
01078   for( unsigned long i=0; ( i<blocks.size() ) && ( bytes<maxlen ); i++ )
01079   {
01080     unsigned long block = blocks[i];
01081 
01082     // find where the small-block exactly is
01083     unsigned long pos = block * sbat->blockSize;
01084     unsigned long bbindex = pos / bbat->blockSize;
01085     if( bbindex >= sb_blocks.size() ) break;
01086 
01087     loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize );
01088 
01089     // copy the data
01090     unsigned offset = pos % bbat->blockSize;
01091     unsigned long p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes :  bbat->blockSize-offset;
01092     p = (sbat->blockSize<p ) ? sbat->blockSize : p;
01093     memcpy( data + bytes, buf + offset, p );
01094     bytes += p;
01095   }
01096 
01097   delete[] buf;
01098 
01099   return bytes;
01100 }
01101 
01102 unsigned long StorageIO::loadSmallBlock( unsigned long block,
01103   unsigned char* data, unsigned long maxlen )
01104 {
01105   // sentinel
01106   if( !data ) return 0;
01107   if( !file.good() ) return 0;
01108 
01109   // wraps call for loadSmallBlocks
01110   std::vector<unsigned long> blocks;
01111   blocks.resize( 1 );
01112   blocks.assign( 1, block );
01113 
01114   return loadSmallBlocks( blocks, data, maxlen );
01115 }
01116 
01117 // =========== StreamIO ==========
01118 
01119 StreamIO::StreamIO( StorageIO* s, DirEntry* e)
01120 {
01121   io = s;
01122   entry = e;
01123   eof = false;
01124   fail = false;
01125 
01126   m_pos = 0;
01127 
01128   if( entry->size >= io->header->threshold )
01129     blocks = io->bbat->follow( entry->start );
01130   else
01131     blocks = io->sbat->follow( entry->start );
01132 
01133   // prepare cache
01134   cache_pos = 0;
01135   cache_size = 4096; // optimal ?
01136   cache_data = new unsigned char[cache_size];
01137   updateCache();
01138 }
01139 
01140 // FIXME tell parent we're gone
01141 StreamIO::~StreamIO()
01142 {
01143   delete[] cache_data;
01144 }
01145 
01146 void StreamIO::seek( unsigned long pos )
01147 {
01148   m_pos = pos;
01149 }
01150 
01151 unsigned long StreamIO::tell()
01152 {
01153   return m_pos;
01154 }
01155 
01156 int StreamIO::getch()
01157 {
01158   // past end-of-file ?
01159   if( m_pos > entry->size ) return -1;
01160 
01161   // need to update cache ?
01162   if( !cache_size || ( m_pos < cache_pos ) ||
01163     ( m_pos >= cache_pos + cache_size ) )
01164       updateCache();
01165 
01166   // something bad if we don't get good cache
01167   if( !cache_size ) return -1;
01168 
01169   int data = cache_data[m_pos - cache_pos];
01170   m_pos++;
01171 
01172   return data;
01173 }
01174 
01175 unsigned long StreamIO::read( unsigned long pos, unsigned char* data, unsigned long maxlen )
01176 {
01177   // sanity checks
01178   if( !data ) return 0;
01179   if( maxlen == 0 ) return 0;
01180 
01181   unsigned long totalbytes = 0;
01182 
01183   if ( entry->size < io->header->threshold )
01184   {
01185     // small file
01186     unsigned long index = pos / io->sbat->blockSize;
01187 
01188     if( index >= blocks.size() ) return 0;
01189 
01190     unsigned char* buf = new unsigned char[ io->sbat->blockSize ];
01191     unsigned long offset = pos % io->sbat->blockSize;
01192     while( totalbytes < maxlen )
01193     {
01194       if( index >= blocks.size() ) break;
01195       io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize );
01196       unsigned long count = io->sbat->blockSize - offset;
01197       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01198       memcpy( data+totalbytes, buf + offset, count );
01199       totalbytes += count;
01200       offset = 0;
01201       index++;
01202     }
01203     delete[] buf;
01204 
01205   }
01206   else
01207   {
01208     // big file
01209     unsigned long index = pos / io->bbat->blockSize;
01210 
01211     // sanity check
01212     if( index >= blocks.size() ) return 0;
01213 
01214     unsigned char* buf = new unsigned char[ io->bbat->blockSize ];
01215     unsigned long offset = pos % io->bbat->blockSize;
01216     while( totalbytes < maxlen )
01217     {
01218       if( index >= blocks.size() ) break;
01219       io->loadBigBlock( blocks[index], buf, io->bbat->blockSize );
01220       unsigned long count = io->bbat->blockSize - offset;
01221       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01222       memcpy( data+totalbytes, buf + offset, count );
01223       totalbytes += count;
01224       index++;
01225       offset = 0;
01226     }
01227     delete [] buf;
01228 
01229   }
01230 
01231   return totalbytes;
01232 }
01233 
01234 unsigned long StreamIO::read( unsigned char* data, unsigned long maxlen )
01235 {
01236   unsigned long bytes = read( tell(), data, maxlen );
01237   m_pos += bytes;
01238   return bytes;
01239 }
01240 
01241 void StreamIO::updateCache()
01242 {
01243   // sanity check
01244   if( !cache_data ) return;
01245 
01246   cache_pos = m_pos - ( m_pos % cache_size );
01247   unsigned long bytes = cache_size;
01248   if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos;
01249   cache_size = read( cache_pos, cache_data, bytes );
01250 }
01251 
01252 
01253 // =========== Storage ==========
01254 
01255 Storage::Storage( const char* filename )
01256 {
01257   io = new StorageIO( this, filename );
01258 }
01259 
01260 Storage::~Storage()
01261 {
01262   delete io;
01263 }
01264 
01265 int Storage::result()
01266 {
01267   return io->result;
01268 }
01269 
01270 bool Storage::open()
01271 {
01272   return io->open();
01273 }
01274 
01275 void Storage::close()
01276 {
01277   io->close();
01278 }
01279 
01280 std::list<std::string> Storage::entries( const std::string& path )
01281 {
01282   std::list<std::string> result;
01283   DirTree* dt = io->dirtree;
01284   DirEntry* e = dt->entry( path, false );
01285   if( e  && e->dir )
01286   {
01287     unsigned parent = dt->indexOf( e );
01288     std::vector<unsigned> children = dt->children( parent );
01289     for( unsigned i = 0; i < children.size(); i++ )
01290       result.push_back( dt->entry( children[i] )->name );
01291   }
01292 
01293   return result;
01294 }
01295 
01296 bool Storage::isDirectory( const std::string& name )
01297 {
01298   DirEntry* e = io->dirtree->entry( name, false );
01299   return e ? e->dir : false;
01300 }
01301 
01302 // =========== Stream ==========
01303 
01304 Stream::Stream( Storage* storage, const std::string& name )
01305 {
01306   io = storage->io->streamIO( name );
01307 }
01308 
01309 // FIXME tell parent we're gone
01310 Stream::~Stream()
01311 {
01312   delete io;
01313 }
01314 
01315 std::string Stream::fullName()
01316 {
01317   return io ? io->fullName : std::string();
01318 }
01319 
01320 unsigned long Stream::tell()
01321 {
01322   return io ? io->tell() : 0;
01323 }
01324 
01325 void Stream::seek( unsigned long newpos )
01326 {
01327   if( io ) io->seek( newpos );
01328 }
01329 
01330 unsigned long Stream::size()
01331 {
01332   return io ? io->entry->size : 0;
01333 }
01334 
01335 int Stream::getch()
01336 {
01337   return io ? io->getch() : 0;
01338 }
01339 
01340 unsigned long Stream::read( unsigned char* data, unsigned long maxlen )
01341 {
01342   return io ? io->read( data, maxlen ) : 0;
01343 }
01344 
01345 bool Stream::eof()
01346 {
01347   return io ? io->eof : false;
01348 }
01349 
01350 bool Stream::fail()
01351 {
01352   return io ? io->fail : true;
01353 }
KDE Home | KDE Accessibility Home | Description of Access Keys