filters

pole.cpp

00001 /* POLE - Portable C++ library to access OLE Storage 
00002    Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
00003 
00004    Redistribution and use in source and binary forms, with or without 
00005    modification, are permitted provided that the following conditions 
00006    are met:
00007    * Redistributions of source code must retain the above copyright notice, 
00008      this list of conditions and the following disclaimer.
00009    * Redistributions in binary form must reproduce the above copyright notice, 
00010      this list of conditions and the following disclaimer in the documentation 
00011      and/or other materials provided with the distribution.
00012    * Neither the name of the authors nor the names of its contributors may be 
00013      used to endorse or promote products derived from this software without 
00014      specific prior written permission.
00015 
00016    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
00017    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
00018    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
00019    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
00020    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
00021    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
00022    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
00023    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
00024    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
00025    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
00026    THE POSSIBILITY OF SUCH DAMAGE.
00027 */
00028 
00029 #include <fstream>
00030 #include <iostream>
00031 #include <list>
00032 #include <string>
00033 #include <vector>
00034 
00035 #include "pole.h"
00036 
00037 // enable to activate debugging output
00038 // #define POLE_DEBUG
00039 
00040 namespace POLE
00041 {
00042 
00043 class Header
00044 {
00045   public:
00046     unsigned char id[8];       // signature, or magic identifier
00047     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
00048     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
00049     unsigned num_bat;          // blocks allocated for big bat
00050     unsigned dirent_start;     // starting block for directory info
00051     unsigned threshold;        // switch from small to big file (usually 4K)
00052     unsigned sbat_start;       // starting block index to store small bat
00053     unsigned num_sbat;         // blocks allocated for small bat
00054     unsigned mbat_start;       // starting block to store meta bat
00055     unsigned num_mbat;         // blocks allocated for meta bat
00056     unsigned long bb_blocks[109];
00057     
00058     Header();
00059     bool valid();
00060     void load( const unsigned char* buffer );
00061     void save( unsigned char* buffer );
00062     void debug();
00063 };
00064 
00065 class AllocTable
00066 {
00067   public:
00068     static const unsigned Eof;
00069     static const unsigned Avail;
00070     static const unsigned Bat;    
00071     static const unsigned MetaBat;    
00072     unsigned blockSize;
00073     AllocTable();
00074     void clear();
00075     unsigned long count();
00076     void resize( unsigned long newsize );
00077     void preserve( unsigned long n );
00078     void set( unsigned long index, unsigned long val );
00079     unsigned unused();
00080     void setChain( std::vector<unsigned long> );
00081     std::vector<unsigned long> follow( unsigned long start );
00082     unsigned long operator[](unsigned long index );
00083     void load( const unsigned char* buffer, unsigned len );
00084     void save( unsigned char* buffer );
00085     unsigned size();
00086     void debug();
00087   private:
00088     std::vector<unsigned long> data;
00089     AllocTable( const AllocTable& );
00090     AllocTable& operator=( const AllocTable& );
00091 };
00092 
00093 class DirEntry
00094 {
00095   public:
00096     bool valid;            // false if invalid (should be skipped)
00097     std::string name;      // the name, not in unicode anymore 
00098     bool dir;              // true if directory   
00099     unsigned long size;    // size (not valid if directory)
00100     unsigned long start;   // starting block
00101     unsigned prev;         // previous sibling
00102     unsigned next;         // next sibling
00103     unsigned child;        // first child
00104 };
00105 
00106 class DirTree
00107 {
00108   public:
00109     static const unsigned End;
00110     DirTree();
00111     void clear();
00112     unsigned entryCount();
00113     DirEntry* entry( unsigned index );
00114     DirEntry* entry( const std::string& name, bool create=false );
00115     int indexOf( DirEntry* e );
00116     int parent( unsigned index );
00117     std::string fullName( unsigned index );
00118     std::vector<unsigned> children( unsigned index );
00119     void load( unsigned char* buffer, unsigned len );
00120     void save( unsigned char* buffer );
00121     unsigned size();
00122     void debug();
00123   private:
00124     std::vector<DirEntry> entries;
00125     DirTree( const DirTree& );
00126     DirTree& operator=( const DirTree& );
00127 };
00128 
00129 class StorageIO
00130 {
00131   public:
00132     Storage* storage;         // owner
00133     std::string filename;     // filename
00134     std::fstream file;        // associated with above name
00135     int result;               // result of operation
00136     bool opened;              // true if file is opened
00137     unsigned long filesize;   // size of the file
00138     
00139     Header* header;           // storage header 
00140     DirTree* dirtree;         // directory tree
00141     AllocTable* bbat;         // allocation table for big blocks
00142     AllocTable* sbat;         // allocation table for small blocks
00143     
00144     std::vector<unsigned long> sb_blocks; // blocks for "small" files
00145        
00146     std::list<Stream*> streams;
00147 
00148     StorageIO( Storage* storage, const char* filename );
00149     ~StorageIO();
00150     
00151     bool open();
00152     void close();
00153     void flush();
00154     void load();
00155     void create();
00156 
00157     unsigned long loadBigBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00158 
00159     unsigned long loadBigBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00160 
00161     unsigned long loadSmallBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00162 
00163     unsigned long loadSmallBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00164     
00165     StreamIO* streamIO( const std::string& name ); 
00166 
00167   private:  
00168     // no copy or assign
00169     StorageIO( const StorageIO& );
00170     StorageIO& operator=( const StorageIO& );
00171 
00172 };
00173 
00174 class StreamIO
00175 {
00176   public:
00177     StorageIO* io;
00178     DirEntry* entry;
00179     std::string fullName;
00180     bool eof;
00181     bool fail;
00182 
00183     StreamIO( StorageIO* io, DirEntry* entry );
00184     ~StreamIO();
00185     unsigned long size();
00186     void seek( unsigned long pos );
00187     unsigned long tell();
00188     int getch();
00189     unsigned long read( unsigned char* data, unsigned long maxlen );
00190     unsigned long read( unsigned long pos, unsigned char* data, unsigned long maxlen );
00191 
00192 
00193   private:
00194     std::vector<unsigned long> blocks;
00195 
00196     // no copy or assign
00197     StreamIO( const StreamIO& );
00198     StreamIO& operator=( const StreamIO& );
00199 
00200     // pointer for read
00201     unsigned long m_pos;
00202 
00203     // simple cache system to speed-up getch()
00204     unsigned char* cache_data;
00205     unsigned long cache_size;
00206     unsigned long cache_pos;
00207     void updateCache();
00208 };
00209 
00210 } // namespace POLE
00211 
00212 using namespace POLE;
00213 
00214 static inline unsigned long readU16( const unsigned char* ptr )
00215 {
00216   return ptr[0]+(ptr[1]<<8);
00217 }
00218 
00219 static inline unsigned long readU32( const unsigned char* ptr )
00220 {
00221   return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24);
00222 }
00223 
00224 static inline void writeU16( unsigned char* ptr, unsigned long data )
00225 {
00226   ptr[0] = (unsigned char)(data & 0xff);
00227   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00228 }
00229 
00230 static inline void writeU32( unsigned char* ptr, unsigned long data )
00231 {
00232   ptr[0] = (unsigned char)(data & 0xff);
00233   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00234   ptr[2] = (unsigned char)((data >> 16) & 0xff);
00235   ptr[3] = (unsigned char)((data >> 24) & 0xff);
00236 }
00237 
00238 static const unsigned char pole_magic[] = 
00239  { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
00240 
00241 // =========== Header ==========
00242 
00243 Header::Header()
00244 {
00245   b_shift = 9;
00246   s_shift = 6;
00247   num_bat = 0;
00248   dirent_start = 0;
00249   threshold = 4096;
00250   sbat_start = 0;
00251   num_sbat = 0;
00252   mbat_start = 0;
00253   num_mbat = 0;
00254 
00255   for( unsigned i = 0; i < 8; i++ )
00256     id[i] = pole_magic[i];  
00257   for( unsigned i=0; i<109; i++ )
00258     bb_blocks[i] = AllocTable::Avail;
00259 }
00260 
00261 bool Header::valid()
00262 {
00263   if( threshold != 4096 ) return false;
00264   if( num_bat == 0 ) return false;
00265   if( (num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
00266   if( (num_bat < 109) && (num_mbat != 0) ) return false;
00267   if( s_shift > b_shift ) return false;
00268   if( b_shift <= 6 ) return false;
00269   if( b_shift >=31 ) return false;
00270   
00271   return true;
00272 }
00273 
00274 void Header::load( const unsigned char* buffer )
00275 {
00276   b_shift      = readU16( buffer + 0x1e );
00277   s_shift      = readU16( buffer + 0x20 );
00278   num_bat      = readU32( buffer + 0x2c );
00279   dirent_start = readU32( buffer + 0x30 );
00280   threshold    = readU32( buffer + 0x38 );
00281   sbat_start   = readU32( buffer + 0x3c );
00282   num_sbat     = readU32( buffer + 0x40 );
00283   mbat_start   = readU32( buffer + 0x44 );
00284   num_mbat     = readU32( buffer + 0x48 );
00285   
00286   for( unsigned i = 0; i < 8; i++ )
00287     id[i] = buffer[i];  
00288   for( unsigned i=0; i<109; i++ )
00289     bb_blocks[i] = readU32( buffer + 0x4C+i*4 );
00290 }
00291 
00292 void Header::save( unsigned char* buffer )
00293 {
00294   memset( buffer, 0, 0x4c );
00295   memcpy( buffer, pole_magic, 8 );        // ole signature
00296   writeU32( buffer + 8, 0 );              // unknown 
00297   writeU32( buffer + 12, 0 );             // unknown
00298   writeU32( buffer + 16, 0 );             // unknown
00299   writeU16( buffer + 24, 0x003e );        // revision ?
00300   writeU16( buffer + 26, 3 );             // version ?
00301   writeU16( buffer + 28, 0xfffe );        // unknown
00302   writeU16( buffer + 0x1e, b_shift );
00303   writeU16( buffer + 0x20, s_shift );
00304   writeU32( buffer + 0x2c, num_bat );
00305   writeU32( buffer + 0x30, dirent_start );
00306   writeU32( buffer + 0x38, threshold );
00307   writeU32( buffer + 0x3c, sbat_start );
00308   writeU32( buffer + 0x40, num_sbat );
00309   writeU32( buffer + 0x44, mbat_start );
00310   writeU32( buffer + 0x48, num_mbat );
00311   
00312   for( unsigned i=0; i<109; i++ )
00313     writeU32( buffer + 0x4C+i*4, bb_blocks[i] );
00314 }
00315 
00316 void Header::debug()
00317 {
00318   std::cout << std::endl;
00319   std::cout << "b_shift " << b_shift << std::endl;
00320   std::cout << "s_shift " << s_shift << std::endl;
00321   std::cout << "num_bat " << num_bat << std::endl;
00322   std::cout << "dirent_start " << dirent_start << std::endl;
00323   std::cout << "threshold " << threshold << std::endl;
00324   std::cout << "sbat_start " << sbat_start << std::endl;
00325   std::cout << "num_sbat " << num_sbat << std::endl;
00326   std::cout << "mbat_start " << mbat_start << std::endl;
00327   std::cout << "num_mbat " << num_mbat << std::endl;
00328   
00329   unsigned s = (num_bat<=109) ? num_bat : 109;
00330   std::cout << "bat blocks: ";
00331   for( unsigned i = 0; i < s; i++ )
00332     std::cout << bb_blocks[i] << " ";
00333   std::cout << std::endl;
00334 }
00335  
00336 // =========== AllocTable ==========
00337 
00338 const unsigned AllocTable::Avail = 0xffffffff;
00339 const unsigned AllocTable::Eof = 0xfffffffe;
00340 const unsigned AllocTable::Bat = 0xfffffffd;
00341 const unsigned AllocTable::MetaBat = 0xfffffffc;
00342 
00343 AllocTable::AllocTable()
00344 {
00345   blockSize = 4096;
00346   // initial size
00347   resize( 128 );
00348 }
00349 
00350 unsigned long AllocTable::count()
00351 {
00352   return data.size();
00353 }
00354 
00355 void AllocTable::resize( unsigned long newsize )
00356 {
00357   unsigned oldsize = data.size();
00358   data.resize( newsize );
00359   if( newsize > oldsize )
00360     for( unsigned i = oldsize; i<newsize; i++ )
00361       data[i] = Avail;
00362 }
00363 
00364 // make sure there're still free blocks
00365 void AllocTable::preserve( unsigned long n )
00366 {
00367   std::vector<unsigned long> pre;
00368   for( unsigned i=0; i < n; i++ )
00369     pre.push_back( unused() );
00370 }
00371 
00372 unsigned long AllocTable::operator[]( unsigned long index )
00373 {
00374   unsigned long result;
00375   result = data[index];
00376   return result;
00377 }
00378 
00379 void AllocTable::set( unsigned long index, unsigned long value )
00380 {
00381   if( index >= count() ) resize( index + 1);
00382   data[ index ] = value;
00383 }
00384 
00385 void AllocTable::setChain( std::vector<unsigned long> chain )
00386 {
00387   if( chain.size() )
00388   {
00389     for( unsigned i=0; i<chain.size()-1; i++ )
00390       set( chain[i], chain[i+1] );
00391     set( chain[ chain.size()-1 ], AllocTable::Eof );
00392   }
00393 }
00394 
00395 // follow 
00396 std::vector<unsigned long> AllocTable::follow( unsigned long start )
00397 {
00398   std::vector<unsigned long> chain;
00399 
00400   if( start >= count() ) return chain; 
00401 
00402   unsigned long p = start;
00403   while( p < count() )
00404   {
00405     if( p == (unsigned long)Eof ) break;
00406     if( p == (unsigned long)Bat ) break;
00407     if( p == (unsigned long)MetaBat ) break;
00408     if( p >= count() ) break;
00409     chain.push_back( p );
00410     if( data[p] >= count() ) break;
00411     p = data[ p ];
00412   }
00413 
00414   return chain;
00415 }
00416 
00417 unsigned AllocTable::unused()
00418 {
00419   // find first available block
00420   for( unsigned i = 0; i < data.size(); i++ )
00421     if( data[i] == Avail )
00422       return i;
00423   
00424   // completely full, so enlarge the table
00425   unsigned block = data.size();
00426   resize( data.size()+10 );
00427   return block;      
00428 }
00429 
00430 void AllocTable::load( const unsigned char* buffer, unsigned len )
00431 {
00432   resize( len / 4 );
00433   for( unsigned i = 0; i < count(); i++ )
00434     set( i, readU32( buffer + i*4 ) );
00435 }
00436 
00437 // return space required to save this dirtree
00438 unsigned AllocTable::size()
00439 {
00440   return count() * 4;
00441 }
00442 
00443 void AllocTable::save( unsigned char* buffer )
00444 {
00445   for( unsigned i = 0; i < count(); i++ )
00446     writeU32( buffer + i*4, data[i] );
00447 }
00448 
00449 void AllocTable::debug()
00450 {
00451   std::cout << "block size " << data.size() << std::endl;
00452   for( unsigned i=0; i< data.size(); i++ )
00453   {
00454      if( data[i] == Avail ) continue;
00455      std::cout << i << ": ";
00456      if( data[i] == Eof ) std::cout << "[eof]";
00457      else if( data[i] == Bat ) std::cout << "[bat]";
00458      else if( data[i] == MetaBat ) std::cout << "[metabat]";
00459      else std::cout << data[i];
00460      std::cout << std::endl;
00461   }
00462 }
00463 
00464 // =========== DirTree ==========
00465 
00466 const unsigned DirTree::End = 0xffffffff;
00467 
00468 DirTree::DirTree()
00469 {
00470   clear();
00471 }
00472 
00473 void DirTree::clear()
00474 {
00475   // leave only root entry
00476   entries.resize( 1 );
00477   entries[0].valid = true;
00478   entries[0].name = "Root Entry";
00479   entries[0].dir = true;
00480   entries[0].size = 0;
00481   entries[0].start = End;
00482   entries[0].prev = End;
00483   entries[0].next = End;
00484   entries[0].child = End;
00485 }
00486 
00487 unsigned DirTree::entryCount()
00488 {
00489   return entries.size();
00490 }
00491 
00492 DirEntry* DirTree::entry( unsigned index )
00493 {
00494   if( index >= entryCount() ) return (DirEntry*) 0;
00495   return &entries[ index ];
00496 }
00497 
00498 int DirTree::indexOf( DirEntry* e )
00499 {
00500   for( unsigned i = 0; i < entryCount(); i++ )
00501     if( entry( i ) == e ) return i;
00502     
00503   return -1;
00504 }
00505 
00506 int DirTree::parent( unsigned index )
00507 {
00508   // brute-force, basically we iterate for each entries, find its children
00509   // and check if one of the children is 'index'
00510   for( unsigned j=0; j<entryCount(); j++ )
00511   {
00512     std::vector<unsigned> chi = children( j );
00513     for( unsigned i=0; i<chi.size();i++ )
00514       if( chi[i] == index )
00515         return j;
00516   }
00517         
00518   return -1;
00519 }
00520 
00521 std::string DirTree::fullName( unsigned index )
00522 {
00523   // don't use root name ("Root Entry"), just give "/"
00524   if( index == 0 ) return "/";
00525 
00526   std::string result = entry( index )->name;
00527   result.insert( 0,  "/" );
00528   int p = parent( index );
00529   DirEntry * _entry = 0;
00530   while( p > 0 )
00531   {
00532     _entry = entry( p );
00533     if (_entry->dir && _entry->valid)
00534     {
00535       result.insert( 0,  _entry->name);
00536       result.insert( 0,  "/" );
00537     }
00538     --p;
00539     index = p;
00540     if( index <= 0 ) break;
00541   }
00542   return result;
00543 }
00544 
00545 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
00546 // if not found and create is false, return 0
00547 // if create is true, a new entry is returned
00548 DirEntry* DirTree::entry( const std::string& name, bool create )
00549 {
00550    if( !name.length() ) return (DirEntry*)0;
00551  
00552    // quick check for "/" (that's root)
00553    if( name == "/" ) return entry( 0 );
00554    
00555    // split the names, e.g  "/ObjectPool/_1020961869" will become:
00556    // "ObjectPool" and "_1020961869" 
00557    std::list<std::string> names;
00558    std::string::size_type start = 0, end = 0;
00559    if( name[0] == '/' ) start++;
00560    while( start < name.length() )
00561    {
00562      end = name.find_first_of( '/', start );
00563      if( end == std::string::npos ) end = name.length();
00564      names.push_back( name.substr( start, end-start ) );
00565      start = end+1;
00566    }
00567   
00568    // start from root 
00569    int index = 0 ;
00570 
00571    // trace one by one   
00572    std::list<std::string>::iterator it; 
00573 
00574    for( it = names.begin(); it != names.end(); ++it )
00575    {
00576      // find among the children of index
00577      std::vector<unsigned> chi = children( index );
00578      unsigned child = 0;
00579      for( unsigned i = 0; i < chi.size(); i++ )
00580      {
00581        DirEntry* ce = entry( chi[i] );
00582        if( ce ) 
00583        if( ce->valid && ( ce->name.length()>1 ) )
00584        if( ce->name == *it )
00585              child = chi[i];
00586      }
00587      
00588      // traverse to the child
00589      if( child > 0 ) index = child;
00590      else
00591      {
00592        // not found among children
00593        if( !create ) return (DirEntry*)0;
00594        
00595        // create a new entry
00596        unsigned parent = index;
00597        entries.push_back( DirEntry() );
00598        index = entryCount()-1;
00599        DirEntry* e = entry( index );
00600        e->valid = true;
00601        e->name = *it;
00602        e->dir = false;
00603        e->size = 0;
00604        e->start = 0;
00605        e->child = End;
00606        e->prev = End;
00607        e->next = entry(parent)->child;
00608        entry(parent)->child = index;
00609      }
00610    }
00611 
00612    return entry( index );
00613 }
00614 
00615 // helper function: recursively find siblings of index
00616 void dirtree_find_siblings( DirTree* dirtree, std::vector<unsigned>& result, 
00617   unsigned index )
00618 {
00619   DirEntry* e = dirtree->entry( index );
00620   if( !e ) return;
00621   if( !e->valid ) return;
00622 
00623   // prevent infinite loop  
00624   for( unsigned i = 0; i < result.size(); i++ )
00625     if( result[i] == index ) return;
00626 
00627   // add myself    
00628   result.push_back( index );
00629   
00630   // visit previous sibling, don't go infinitely
00631   unsigned prev = e->prev;
00632   if( ( prev > 0 ) && ( prev < dirtree->entryCount() ) )
00633   {
00634     for( unsigned i = 0; i < result.size(); i++ )
00635       if( result[i] == prev ) prev = 0;
00636     if( prev ) dirtree_find_siblings( dirtree, result, prev );
00637   }
00638     
00639   // visit next sibling, don't go infinitely
00640   unsigned next = e->next;
00641   if( ( next > 0 ) && ( next < dirtree->entryCount() ) )
00642   {
00643     for( unsigned i = 0; i < result.size(); i++ )
00644       if( result[i] == next ) next = 0;
00645     if( next ) dirtree_find_siblings( dirtree, result, next );
00646   }
00647 }
00648 
00649 std::vector<unsigned> DirTree::children( unsigned index )
00650 {
00651   std::vector<unsigned> result;
00652   
00653   DirEntry* e = entry( index );
00654   if( e ) if( e->valid && e->child < entryCount() )
00655     dirtree_find_siblings( this, result, e->child );
00656     
00657   return result;
00658 }
00659 
00660 void DirTree::load( unsigned char* buffer, unsigned size )
00661 {
00662   entries.clear();
00663   
00664   for( unsigned i = 0; i < size/128; i++ )
00665   {
00666     unsigned p = i * 128;
00667     
00668     // would be < 32 if first char in the name isn't printable
00669     unsigned prefix = 32;
00670     
00671     // parse name of this entry, which stored as Unicode 16-bit
00672     std::string name;
00673     int name_len = readU16( buffer + 0x40+p );
00674     if( name_len > 64 ) name_len = 64;
00675     for( int j=0; ( buffer[j+p]) && (j<name_len); j+= 2 )
00676       name.append( 1, buffer[j+p] );
00677       
00678     // first char isn't printable ? remove it...
00679     if( buffer[p] < 32 )
00680     { 
00681       prefix = buffer[0]; 
00682       name.erase( 0,1 ); 
00683     }
00684     
00685     // 2 = file (aka stream), 1 = directory (aka storage), 5 = root
00686     unsigned type = buffer[ 0x42 + p];
00687     
00688     DirEntry e;
00689     e.valid = true;
00690     e.name = name;
00691     e.start = readU32( buffer + 0x74+p );
00692     e.size = readU32( buffer + 0x78+p );
00693     e.prev = readU32( buffer + 0x44+p );
00694     e.next = readU32( buffer + 0x48+p );
00695     e.child = readU32( buffer + 0x4C+p );
00696     e.dir = ( type!=2 );
00697     
00698     // sanity checks
00699     if( (type != 2) && (type != 1 ) && (type != 5 ) ) e.valid = false;
00700     if( name_len < 1 ) e.valid = false;
00701     
00702     entries.push_back( e );
00703   }  
00704 }
00705 
00706 // return space required to save this dirtree
00707 unsigned DirTree::size()
00708 {
00709   return entryCount() * 128;
00710 }
00711 
00712 void DirTree::save( unsigned char* buffer )
00713 {
00714   memset( buffer, 0, size() );
00715   
00716   // root is fixed as "Root Entry"
00717   DirEntry* root = entry( 0 );
00718   std::string name = "Root Entry";
00719   for( unsigned j = 0; j < name.length(); j++ )
00720     buffer[ j*2 ] = name[j];
00721   writeU16( buffer + 0x40, name.length()*2 + 2 );    
00722   writeU32( buffer + 0x74, 0xffffffff );
00723   writeU32( buffer + 0x78, 0 );
00724   writeU32( buffer + 0x44, 0xffffffff );
00725   writeU32( buffer + 0x48, 0xffffffff );
00726   writeU32( buffer + 0x4c, root->child );
00727   buffer[ 0x42 ] = 5;
00728   buffer[ 0x43 ] = 1; 
00729 
00730   for( unsigned i = 1; i < entryCount(); i++ )
00731   {
00732     DirEntry* e = entry( i );
00733     if( !e ) continue;
00734     if( e->dir )
00735     {
00736       e->start = 0xffffffff;
00737       e->size = 0;
00738     }
00739     
00740     // max length for name is 32 chars
00741     std::string name = e->name;
00742     if( name.length() > 32 )
00743       name.erase( 32, name.length() );
00744       
00745     // write name as Unicode 16-bit
00746     for( unsigned j = 0; j < name.length(); j++ )
00747       buffer[ i*128 + j*2 ] = name[j];
00748 
00749     writeU16( buffer + i*128 + 0x40, name.length()*2 + 2 );    
00750     writeU32( buffer + i*128 + 0x74, e->start );
00751     writeU32( buffer + i*128 + 0x78, e->size );
00752     writeU32( buffer + i*128 + 0x44, e->prev );
00753     writeU32( buffer + i*128 + 0x48, e->next );
00754     writeU32( buffer + i*128 + 0x4c, e->child );
00755     buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
00756     buffer[ i*128 + 0x43 ] = 1; // always black
00757   }  
00758 }
00759 
00760 void DirTree::debug()
00761 {
00762   for( unsigned i = 0; i < entryCount(); i++ )
00763   {
00764     DirEntry* e = entry( i );
00765     if( !e ) continue;
00766     std::cout << i << ": ";
00767     if( !e->valid ) std::cout << "INVALID ";
00768     std::cout << e->name << " ";
00769     if( e->dir ) std::cout << "(Dir) ";
00770     else std::cout << "(File) ";
00771     std::cout << e->size << " ";
00772     std::cout << "s:" << e->start << " ";
00773     std::cout << "(";
00774     if( e->child == End ) std::cout << "-"; else std::cout << e->child;
00775     std::cout << " ";
00776     if( e->prev == End ) std::cout << "-"; else std::cout << e->prev;
00777     std::cout << ":";
00778     if( e->next == End ) std::cout << "-"; else std::cout << e->next;
00779     std::cout << ")";    
00780     std::cout << std::endl;
00781   }
00782 }
00783 
00784 // =========== StorageIO ==========
00785 
00786 StorageIO::StorageIO( Storage* st, const char* fname )
00787 {
00788   storage = st;
00789   filename = fname;
00790   result = Storage::Ok;
00791   opened = false;
00792   
00793   header = new Header();
00794   dirtree = new DirTree();
00795   bbat = new AllocTable();
00796   sbat = new AllocTable();
00797   
00798   filesize = 0;
00799   bbat->blockSize = 1 << header->b_shift;
00800   sbat->blockSize = 1 << header->s_shift;
00801 }
00802 
00803 StorageIO::~StorageIO()
00804 {
00805   if( opened ) close();
00806   delete sbat;
00807   delete bbat;
00808   delete dirtree;
00809   delete header;
00810 }
00811 
00812 bool StorageIO::open()
00813 {
00814   // already opened ? close first
00815   if( opened ) close();
00816   
00817   load();
00818   
00819   return result == Storage::Ok;
00820 }
00821 
00822 void StorageIO::load()
00823 {
00824   unsigned char* buffer = 0;
00825   unsigned long buflen = 0;
00826   std::vector<unsigned long> blocks;
00827   
00828   // open the file, check for error
00829   result = Storage::OpenFailed;
00830   file.open( filename.c_str(), std::ios::binary | std::ios::in );
00831   if( !file.good() ) return;
00832   
00833   // find size of input file
00834   file.seekg( 0, std::ios::end );
00835   filesize = file.tellg();
00836 
00837   // load header
00838   buffer = new unsigned char[512];
00839   file.seekg( 0 ); 
00840   file.read( (char*)buffer, 512 );
00841   header->load( buffer );
00842   delete[] buffer;
00843 
00844   // check OLE magic id
00845   result = Storage::NotOLE;
00846   for( unsigned i=0; i<8; i++ )
00847     if( header->id[i] != pole_magic[i] )
00848       return;
00849   
00850   // sanity checks
00851   result = Storage::BadOLE;
00852   if( !header->valid() ) return;
00853   if( header->threshold != 4096 ) return;
00854 
00855   // important block size
00856   bbat->blockSize = 1 << header->b_shift;
00857   sbat->blockSize = 1 << header->s_shift;
00858   
00859   // find blocks allocated to store big bat
00860   // the first 109 blocks are in header, the rest in meta bat
00861   blocks.clear();
00862   blocks.resize( header->num_bat );
00863   for( unsigned i = 0; i < 109; i++ )
00864     if( i >= header->num_bat ) break;
00865     else blocks[i] = header->bb_blocks[i];
00866   if( (header->num_bat > 109) && (header->num_mbat > 0) )
00867   {
00868     unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
00869     unsigned k = 109;
00870     unsigned mblock = header->mbat_start;
00871     for( unsigned r = 0; r < header->num_mbat; r++ )
00872     {
00873       loadBigBlock( mblock, buffer2, bbat->blockSize );
00874       for( unsigned s=0; s < bbat->blockSize-4; s+=4 )
00875       {
00876         if( k >= header->num_bat ) break;
00877         else  blocks[k++] = readU32( buffer2 + s );
00878       }
00879       mblock = readU32( buffer2 + bbat->blockSize-4 );
00880      }    
00881     delete[] buffer2;
00882   }
00883 
00884   // load big bat
00885   buflen = blocks.size()*bbat->blockSize;
00886   if( buflen > 0 )
00887   {
00888     buffer = new unsigned char[ buflen ];  
00889     loadBigBlocks( blocks, buffer, buflen );
00890     bbat->load( buffer, buflen );
00891     delete[] buffer;
00892   }  
00893 
00894   // load small bat
00895   blocks.clear();
00896   blocks = bbat->follow( header->sbat_start );
00897   buflen = blocks.size()*bbat->blockSize;
00898   if( buflen > 0 )
00899   {
00900     buffer = new unsigned char[ buflen ];  
00901     loadBigBlocks( blocks, buffer, buflen );
00902     sbat->load( buffer, buflen );
00903     delete[] buffer;
00904   }  
00905   
00906   // load directory tree
00907   blocks.clear();
00908   blocks = bbat->follow( header->dirent_start );
00909   buflen = blocks.size()*bbat->blockSize;
00910   buffer = new unsigned char[ buflen ];  
00911   loadBigBlocks( blocks, buffer, buflen );
00912   dirtree->load( buffer, buflen );
00913   unsigned sb_start = readU32( buffer + 0x74 );
00914   delete[] buffer;
00915   
00916   // fetch block chain as data for small-files
00917   sb_blocks = bbat->follow( sb_start ); // small files
00918   
00919   // for troubleshooting, just enable this block
00920 #if 0
00921   header->debug();
00922   sbat->debug();
00923   bbat->debug();
00924   dirtree->debug();
00925 #endif
00926   
00927   // so far so good
00928   result = Storage::Ok;
00929   opened = true;
00930 }
00931 
00932 void StorageIO::create()
00933 {
00934   // std::cout << "Creating " << filename << std::endl; 
00935   
00936   file.open( filename.c_str(), std::ios::out|std::ios::binary );
00937   if( !file.good() )
00938   {
00939     std::cerr << "Can't create " << filename << std::endl;
00940     result = Storage::OpenFailed;
00941     return;
00942   }
00943   
00944   // so far so good
00945   opened = true;
00946   result = Storage::Ok;
00947 }
00948 
00949 void StorageIO::flush()
00950 {
00951   /* Note on Microsoft implementation:
00952      - directory entries are stored in the last block(s)
00953      - BATs are as second to the last
00954      - Meta BATs are third to the last  
00955   */
00956 }
00957 
00958 void StorageIO::close()
00959 {
00960   if( !opened ) return;
00961   
00962   file.close(); 
00963   opened = false;
00964   
00965   std::list<Stream*>::iterator it;
00966   for( it = streams.begin(); it != streams.end(); ++it )
00967     delete *it;
00968 }
00969 
00970 StreamIO* StorageIO::streamIO( const std::string& name )
00971 {
00972   // sanity check
00973   if( !name.length() ) return (StreamIO*)0;
00974 
00975   // search in the entries
00976   DirEntry* entry = dirtree->entry( name );
00977   //if( entry) std::cout << "FOUND\n";
00978   if( !entry ) return (StreamIO*)0;
00979   //if( !entry->dir ) std::cout << "  NOT DIR\n";
00980   if( entry->dir ) return (StreamIO*)0;
00981 
00982   StreamIO* result = new StreamIO( this, entry );
00983   result->fullName = name;
00984   
00985   return result;
00986 }
00987 
00988 unsigned long StorageIO::loadBigBlocks( std::vector<unsigned long> blocks,
00989   unsigned char* data, unsigned long maxlen )
00990 {
00991   // sentinel
00992   if( !data ) return 0;
00993   if( !file.good() ) return 0;
00994   if( blocks.size() < 1 ) return 0;
00995   if( maxlen == 0 ) return 0;
00996 
00997   // read block one by one, seems fast enough
00998   unsigned long bytes = 0;
00999   for( unsigned long i=0; (i < blocks.size() ) & ( bytes<maxlen ); i++ )
01000   {
01001     unsigned long block = blocks[i];
01002     unsigned long pos =  bbat->blockSize * ( block+1 );
01003     unsigned long p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes;
01004     if( pos + p > filesize ) p = filesize - pos;
01005     file.seekg( pos );
01006     file.read( (char*)data + bytes, p );
01007     bytes += p;
01008   }
01009 
01010   return bytes;
01011 }
01012 
01013 unsigned long StorageIO::loadBigBlock( unsigned long block,
01014   unsigned char* data, unsigned long maxlen )
01015 {
01016   // sentinel
01017   if( !data ) return 0;
01018   if( !file.good() ) return 0;
01019   
01020   // wraps call for loadBigBlocks
01021   std::vector<unsigned long> blocks;
01022   blocks.resize( 1 );
01023   blocks[ 0 ] = block;
01024   
01025   return loadBigBlocks( blocks, data, maxlen );
01026 }
01027 
01028 // return number of bytes which has been read
01029 unsigned long StorageIO::loadSmallBlocks( std::vector<unsigned long> blocks,
01030   unsigned char* data, unsigned long maxlen )
01031 {
01032   // sentinel
01033   if( !data ) return 0;
01034   if( !file.good() ) return 0;
01035   if( blocks.size() < 1 ) return 0;
01036   if( maxlen == 0 ) return 0;
01037 
01038   // our own local buffer
01039   unsigned char* buf = new unsigned char[ bbat->blockSize ];
01040 
01041   // read small block one by one
01042   unsigned long bytes = 0;
01043   for( unsigned long i=0; ( i<blocks.size() ) & ( bytes<maxlen ); i++ )
01044   {
01045     unsigned long block = blocks[i];
01046 
01047     // find where the small-block exactly is
01048     unsigned long pos = block * sbat->blockSize;
01049     unsigned long bbindex = pos / bbat->blockSize;
01050     if( bbindex >= sb_blocks.size() ) break;
01051 
01052     loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize );
01053 
01054     // copy the data
01055     unsigned offset = pos % bbat->blockSize;
01056     unsigned long p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes :  bbat->blockSize-offset;
01057     p = (sbat->blockSize<p ) ? sbat->blockSize : p;
01058     memcpy( data + bytes, buf + offset, p );
01059     bytes += p;
01060   }
01061   
01062   delete[] buf;
01063 
01064   return bytes;
01065 }
01066 
01067 unsigned long StorageIO::loadSmallBlock( unsigned long block,
01068   unsigned char* data, unsigned long maxlen )
01069 {
01070   // sentinel
01071   if( !data ) return 0;
01072   if( !file.good() ) return 0;
01073 
01074   // wraps call for loadSmallBlocks
01075   std::vector<unsigned long> blocks;
01076   blocks.resize( 1 );
01077   blocks.assign( 1, block );
01078 
01079   return loadSmallBlocks( blocks, data, maxlen );
01080 }
01081 
01082 // =========== StreamIO ==========
01083 
01084 StreamIO::StreamIO( StorageIO* s, DirEntry* e)
01085 {
01086   io = s;
01087   entry = e;
01088   eof = false;
01089   fail = false;
01090   
01091   m_pos = 0;
01092 
01093   if( entry->size >= io->header->threshold ) 
01094     blocks = io->bbat->follow( entry->start );
01095   else
01096     blocks = io->sbat->follow( entry->start );
01097 
01098   // prepare cache
01099   cache_pos = 0;
01100   cache_size = 4096; // optimal ?
01101   cache_data = new unsigned char[cache_size];
01102   updateCache();
01103 }
01104 
01105 // FIXME tell parent we're gone
01106 StreamIO::~StreamIO()
01107 {
01108   delete[] cache_data;  
01109 }
01110 
01111 void StreamIO::seek( unsigned long pos )
01112 {
01113   m_pos = pos;
01114 }
01115 
01116 unsigned long StreamIO::tell()
01117 {
01118   return m_pos;
01119 }
01120 
01121 int StreamIO::getch()
01122 {
01123   // past end-of-file ?
01124   if( m_pos > entry->size ) return -1;
01125 
01126   // need to update cache ?
01127   if( !cache_size || ( m_pos < cache_pos ) ||
01128     ( m_pos >= cache_pos + cache_size ) )
01129       updateCache();
01130 
01131   // something bad if we don't get good cache
01132   if( !cache_size ) return -1;
01133 
01134   int data = cache_data[m_pos - cache_pos];
01135   m_pos++;
01136 
01137   return data;
01138 }
01139 
01140 unsigned long StreamIO::read( unsigned long pos, unsigned char* data, unsigned long maxlen )
01141 {
01142   // sanity checks
01143   if( !data ) return 0;
01144   if( maxlen == 0 ) return 0;
01145 
01146   unsigned long totalbytes = 0;
01147   
01148   if ( entry->size < io->header->threshold )
01149   {
01150     // small file
01151     unsigned long index = pos / io->sbat->blockSize;
01152 
01153     if( index >= blocks.size() ) return 0;
01154 
01155     unsigned char* buf = new unsigned char[ io->sbat->blockSize ];
01156     unsigned long offset = pos % io->sbat->blockSize;
01157     while( totalbytes < maxlen )
01158     {
01159       if( index >= blocks.size() ) break;
01160       io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize );
01161       unsigned long count = io->sbat->blockSize - offset;
01162       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01163       memcpy( data+totalbytes, buf + offset, count );
01164       totalbytes += count;
01165       offset = 0;
01166       index++;
01167     }
01168     delete[] buf;
01169 
01170   }
01171   else
01172   {
01173     // big file
01174     unsigned long index = pos / io->bbat->blockSize;
01175     
01176     if( index >= blocks.size() ) return 0;
01177     
01178     unsigned char* buf = new unsigned char[ io->bbat->blockSize ];
01179     unsigned long offset = pos % io->bbat->blockSize;
01180     while( totalbytes < maxlen )
01181     {
01182       if( index >= blocks.size() ) break;
01183       io->loadBigBlock( blocks[index], buf, io->bbat->blockSize );
01184       unsigned long count = io->bbat->blockSize - offset;
01185       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01186       memcpy( data+totalbytes, buf + offset, count );
01187       totalbytes += count;
01188       index++;
01189       offset = 0;
01190     }
01191     delete [] buf;
01192 
01193   }
01194 
01195   return totalbytes;
01196 }
01197 
01198 unsigned long StreamIO::read( unsigned char* data, unsigned long maxlen )
01199 {
01200   unsigned long bytes = read( tell(), data, maxlen );
01201   m_pos += bytes;
01202   return bytes;
01203 }
01204 
01205 void StreamIO::updateCache()
01206 {
01207   // sanity check
01208   if( !cache_data ) return;
01209 
01210   cache_pos = m_pos - ( m_pos % cache_size );
01211   unsigned long bytes = cache_size;
01212   if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos;
01213   cache_size = read( cache_pos, cache_data, bytes );
01214 }
01215 
01216 
01217 // =========== Storage ==========
01218 
01219 Storage::Storage( const char* filename )
01220 {
01221   io = new StorageIO( this, filename );
01222 }
01223 
01224 Storage::~Storage()
01225 {
01226   delete io;
01227 }
01228 
01229 int Storage::result()
01230 {
01231   return io->result;
01232 }
01233 
01234 bool Storage::open()
01235 {
01236   return io->open();
01237 }
01238 
01239 void Storage::close()
01240 {
01241   io->close();
01242 }
01243 
01244 std::list<std::string> Storage::entries( const std::string& path )
01245 {
01246   std::list<std::string> result;
01247   DirTree* dt = io->dirtree;
01248   DirEntry* e = dt->entry( path, false );
01249   if( e  && e->dir )
01250   {
01251     unsigned parent = dt->indexOf( e );
01252     std::vector<unsigned> children = dt->children( parent );
01253     for( unsigned i = 0; i < children.size(); i++ )
01254       result.push_back( dt->entry( children[i] )->name );
01255   }
01256   
01257   return result;
01258 }
01259 
01260 bool Storage::isDirectory( const std::string& name )
01261 {
01262   DirEntry* e = io->dirtree->entry( name, false );
01263   return e ? e->dir : false;
01264 }
01265 
01266 // =========== Stream ==========
01267 
01268 Stream::Stream( Storage* storage, const std::string& name )
01269 {
01270   io = storage->io->streamIO( name );
01271 }
01272 
01273 // FIXME tell parent we're gone
01274 Stream::~Stream()
01275 {
01276   delete io;
01277 }
01278 
01279 std::string Stream::fullName()
01280 {
01281   return io ? io->fullName : std::string();
01282 }
01283 
01284 unsigned long Stream::tell()
01285 {
01286   return io ? io->tell() : 0;
01287 }
01288 
01289 void Stream::seek( unsigned long newpos )
01290 {
01291   if( io ) io->seek( newpos );
01292 }
01293 
01294 unsigned long Stream::size()
01295 {
01296   return io ? io->entry->size : 0;
01297 }
01298 
01299 int Stream::getch()
01300 {
01301   return io ? io->getch() : 0;
01302 }
01303 
01304 unsigned long Stream::read( unsigned char* data, unsigned long maxlen )
01305 {
01306   return io ? io->read( data, maxlen ) : 0;
01307 }
01308 
01309 bool Stream::eof()
01310 {
01311   return io ? io->eof : false;
01312 }
01313 
01314 bool Stream::fail()
01315 {
01316   return io ? io->fail : true;
01317 }
KDE Home | KDE Accessibility Home | Description of Access Keys