filters

pole.cpp

00001 /* POLE - Portable C++ library to access OLE Storage 
00002    Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
00003 
00004    Redistribution and use in source and binary forms, with or without 
00005    modification, are permitted provided that the following conditions 
00006    are met:
00007    * Redistributions of source code must retain the above copyright notice, 
00008      this list of conditions and the following disclaimer.
00009    * Redistributions in binary form must reproduce the above copyright notice, 
00010      this list of conditions and the following disclaimer in the documentation 
00011      and/or other materials provided with the distribution.
00012    * Neither the name of the authors nor the names of its contributors may be 
00013      used to endorse or promote products derived from this software without 
00014      specific prior written permission.
00015 
00016    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
00017    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
00018    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
00019    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
00020    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
00021    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
00022    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
00023    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
00024    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
00025    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
00026    THE POSSIBILITY OF SUCH DAMAGE.
00027 */
00028 
00029 #include <fstream>
00030 #include <iostream>
00031 #include <list>
00032 #include <string>
00033 #include <vector>
00034 
00035 #include "pole.h"
00036 
00037 // enable to activate debugging output
00038 // #define POLE_DEBUG
00039 
00040 namespace POLE
00041 {
00042 
00043 class Header
00044 {
00045   public:
00046     unsigned char id[8];       // signature, or magic identifier
00047     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
00048     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
00049     unsigned num_bat;          // blocks allocated for big bat
00050     unsigned dirent_start;     // starting block for directory info
00051     unsigned threshold;        // switch from small to big file (usually 4K)
00052     unsigned sbat_start;       // starting block index to store small bat
00053     unsigned num_sbat;         // blocks allocated for small bat
00054     unsigned mbat_start;       // starting block to store meta bat
00055     unsigned num_mbat;         // blocks allocated for meta bat
00056     unsigned long bb_blocks[109];
00057     
00058     Header();
00059     bool valid();
00060     void load( const unsigned char* buffer );
00061     void save( unsigned char* buffer );
00062     void debug();
00063 };
00064 
00065 class AllocTable
00066 {
00067   public:
00068     static const unsigned Eof;
00069     static const unsigned Avail;
00070     static const unsigned Bat;    
00071     static const unsigned MetaBat;    
00072     unsigned blockSize;
00073     AllocTable();
00074     void clear();
00075     unsigned long count();
00076     void resize( unsigned long newsize );
00077     void preserve( unsigned long n );
00078     void set( unsigned long index, unsigned long val );
00079     unsigned unused();
00080     void setChain( std::vector<unsigned long> );
00081     std::vector<unsigned long> follow( unsigned long start );
00082     unsigned long operator[](unsigned long index );
00083     void load( const unsigned char* buffer, unsigned len );
00084     void save( unsigned char* buffer );
00085     unsigned size();
00086     void debug();
00087   private:
00088     std::vector<unsigned long> data;
00089     AllocTable( const AllocTable& );
00090     AllocTable& operator=( const AllocTable& );
00091 };
00092 
00093 class DirEntry
00094 {
00095   public:
00096     bool valid;            // false if invalid (should be skipped)
00097     std::string name;      // the name, not in unicode anymore 
00098     bool dir;              // true if directory   
00099     unsigned long size;    // size (not valid if directory)
00100     unsigned long start;   // starting block
00101     unsigned prev;         // previous sibling
00102     unsigned next;         // next sibling
00103     unsigned child;        // first child
00104 };
00105 
00106 class DirTree
00107 {
00108   public:
00109     static const unsigned End;
00110     DirTree();
00111     void clear();
00112     unsigned entryCount();
00113     DirEntry* entry( unsigned index );
00114     DirEntry* entry( const std::string& name, bool create=false );
00115     int indexOf( DirEntry* e );
00116     int parent( unsigned index );
00117     std::string fullName( unsigned index );
00118     std::vector<unsigned> children( unsigned index );
00119     void load( unsigned char* buffer, unsigned len );
00120     void save( unsigned char* buffer );
00121     unsigned size();
00122     void debug();
00123   private:
00124     std::vector<DirEntry> entries;
00125     DirTree( const DirTree& );
00126     DirTree& operator=( const DirTree& );
00127 };
00128 
00129 class StorageIO
00130 {
00131   public:
00132     Storage* storage;         // owner
00133     std::string filename;     // filename
00134     std::fstream file;        // associated with above name
00135     int result;               // result of operation
00136     bool opened;              // true if file is opened
00137     unsigned long filesize;   // size of the file
00138     
00139     Header* header;           // storage header 
00140     DirTree* dirtree;         // directory tree
00141     AllocTable* bbat;         // allocation table for big blocks
00142     AllocTable* sbat;         // allocation table for small blocks
00143     
00144     std::vector<unsigned long> sb_blocks; // blocks for "small" files
00145        
00146     std::list<Stream*> streams;
00147 
00148     StorageIO( Storage* storage, const char* filename );
00149     ~StorageIO();
00150     
00151     bool open();
00152     void close();
00153     void flush();
00154     void load();
00155     void create();
00156 
00157     unsigned long loadBigBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00158 
00159     unsigned long loadBigBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00160 
00161     unsigned long loadSmallBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00162 
00163     unsigned long loadSmallBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00164     
00165     StreamIO* streamIO( const std::string& name ); 
00166 
00167   private:  
00168     // no copy or assign
00169     StorageIO( const StorageIO& );
00170     StorageIO& operator=( const StorageIO& );
00171 
00172 };
00173 
00174 class StreamIO
00175 {
00176   public:
00177     StorageIO* io;
00178     DirEntry* entry;
00179     std::string fullName;
00180     bool eof;
00181     bool fail;
00182 
00183     StreamIO( StorageIO* io, DirEntry* entry );
00184     ~StreamIO();
00185     unsigned long size();
00186     void seek( unsigned long pos );
00187     unsigned long tell();
00188     int getch();
00189     unsigned long read( unsigned char* data, unsigned long maxlen );
00190     unsigned long read( unsigned long pos, unsigned char* data, unsigned long maxlen );
00191 
00192 
00193   private:
00194     std::vector<unsigned long> blocks;
00195 
00196     // no copy or assign
00197     StreamIO( const StreamIO& );
00198     StreamIO& operator=( const StreamIO& );
00199 
00200     // pointer for read
00201     unsigned long m_pos;
00202 
00203     // simple cache system to speed-up getch()
00204     unsigned char* cache_data;
00205     unsigned long cache_size;
00206     unsigned long cache_pos;
00207     void updateCache();
00208 };
00209 
00210 } // namespace POLE
00211 
00212 using namespace POLE;
00213 
00214 static inline unsigned long readU16( const unsigned char* ptr )
00215 {
00216   return ptr[0]+(ptr[1]<<8);
00217 }
00218 
00219 static inline unsigned long readU32( const unsigned char* ptr )
00220 {
00221   return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24);
00222 }
00223 
00224 static inline void writeU16( unsigned char* ptr, unsigned long data )
00225 {
00226   ptr[0] = (unsigned char)(data & 0xff);
00227   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00228 }
00229 
00230 static inline void writeU32( unsigned char* ptr, unsigned long data )
00231 {
00232   ptr[0] = (unsigned char)(data & 0xff);
00233   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00234   ptr[2] = (unsigned char)((data >> 16) & 0xff);
00235   ptr[3] = (unsigned char)((data >> 24) & 0xff);
00236 }
00237 
00238 static const unsigned char pole_magic[] = 
00239  { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
00240 
00241 // =========== Header ==========
00242 
00243 Header::Header()
00244 {
00245   b_shift = 9;
00246   s_shift = 6;
00247   num_bat = 0;
00248   dirent_start = 0;
00249   threshold = 4096;
00250   sbat_start = 0;
00251   num_sbat = 0;
00252   mbat_start = 0;
00253   num_mbat = 0;
00254 
00255   for( unsigned i = 0; i < 8; i++ )
00256     id[i] = pole_magic[i];  
00257   for( unsigned i=0; i<109; i++ )
00258     bb_blocks[i] = AllocTable::Avail;
00259 }
00260 
00261 bool Header::valid()
00262 {
00263   if( threshold != 4096 ) return false;
00264   if( num_bat == 0 ) return false;
00265   if( (num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
00266   if( (num_bat < 109) && (num_mbat != 0) ) return false;
00267   if( s_shift > b_shift ) return false;
00268   if( b_shift <= 6 ) return false;
00269   if( b_shift >=31 ) return false;
00270   
00271   return true;
00272 }
00273 
00274 void Header::load( const unsigned char* buffer )
00275 {
00276   b_shift      = readU16( buffer + 0x1e );
00277   s_shift      = readU16( buffer + 0x20 );
00278   num_bat      = readU32( buffer + 0x2c );
00279   dirent_start = readU32( buffer + 0x30 );
00280   threshold    = readU32( buffer + 0x38 );
00281   sbat_start   = readU32( buffer + 0x3c );
00282   num_sbat     = readU32( buffer + 0x40 );
00283   mbat_start   = readU32( buffer + 0x44 );
00284   num_mbat     = readU32( buffer + 0x48 );
00285   
00286   for( unsigned i = 0; i < 8; i++ )
00287     id[i] = buffer[i];  
00288   for( unsigned i=0; i<109; i++ )
00289     bb_blocks[i] = readU32( buffer + 0x4C+i*4 );
00290 }
00291 
00292 void Header::save( unsigned char* buffer )
00293 {
00294   memset( buffer, 0, 0x4c );
00295   memcpy( buffer, pole_magic, 8 );        // ole signature
00296   writeU32( buffer + 8, 0 );              // unknown 
00297   writeU32( buffer + 12, 0 );             // unknown
00298   writeU32( buffer + 16, 0 );             // unknown
00299   writeU16( buffer + 24, 0x003e );        // revision ?
00300   writeU16( buffer + 26, 3 );             // version ?
00301   writeU16( buffer + 28, 0xfffe );        // unknown
00302   writeU16( buffer + 0x1e, b_shift );
00303   writeU16( buffer + 0x20, s_shift );
00304   writeU32( buffer + 0x2c, num_bat );
00305   writeU32( buffer + 0x30, dirent_start );
00306   writeU32( buffer + 0x38, threshold );
00307   writeU32( buffer + 0x3c, sbat_start );
00308   writeU32( buffer + 0x40, num_sbat );
00309   writeU32( buffer + 0x44, mbat_start );
00310   writeU32( buffer + 0x48, num_mbat );
00311   
00312   for( unsigned i=0; i<109; i++ )
00313     writeU32( buffer + 0x4C+i*4, bb_blocks[i] );
00314 }
00315 
00316 void Header::debug()
00317 {
00318   std::cout << std::endl;
00319   std::cout << "b_shift " << b_shift << std::endl;
00320   std::cout << "s_shift " << s_shift << std::endl;
00321   std::cout << "num_bat " << num_bat << std::endl;
00322   std::cout << "dirent_start " << dirent_start << std::endl;
00323   std::cout << "threshold " << threshold << std::endl;
00324   std::cout << "sbat_start " << sbat_start << std::endl;
00325   std::cout << "num_sbat " << num_sbat << std::endl;
00326   std::cout << "mbat_start " << mbat_start << std::endl;
00327   std::cout << "num_mbat " << num_mbat << std::endl;
00328   
00329   unsigned s = (num_bat<=109) ? num_bat : 109;
00330   std::cout << "bat blocks: ";
00331   for( unsigned i = 0; i < s; i++ )
00332     std::cout << bb_blocks[i] << " ";
00333   std::cout << std::endl;
00334 }
00335  
00336 // =========== AllocTable ==========
00337 
00338 const unsigned AllocTable::Avail = 0xffffffff;
00339 const unsigned AllocTable::Eof = 0xfffffffe;
00340 const unsigned AllocTable::Bat = 0xfffffffd;
00341 const unsigned AllocTable::MetaBat = 0xfffffffc;
00342 
00343 AllocTable::AllocTable()
00344 {
00345   blockSize = 4096;
00346   // initial size
00347   resize( 128 );
00348 }
00349 
00350 unsigned long AllocTable::count()
00351 {
00352   return data.size();
00353 }
00354 
00355 void AllocTable::resize( unsigned long newsize )
00356 {
00357   unsigned oldsize = data.size();
00358   data.resize( newsize );
00359   if( newsize > oldsize )
00360     for( unsigned i = oldsize; i<newsize; i++ )
00361       data[i] = Avail;
00362 }
00363 
00364 // make sure there're still free blocks
00365 void AllocTable::preserve( unsigned long n )
00366 {
00367   std::vector<unsigned long> pre;
00368   for( unsigned i=0; i < n; i++ )
00369     pre.push_back( unused() );
00370 }
00371 
00372 unsigned long AllocTable::operator[]( unsigned long index )
00373 {
00374   unsigned long result;
00375   result = data[index];
00376   return result;
00377 }
00378 
00379 void AllocTable::set( unsigned long index, unsigned long value )
00380 {
00381   if( index >= count() ) resize( index + 1);
00382   data[ index ] = value;
00383 }
00384 
00385 void AllocTable::setChain( std::vector<unsigned long> chain )
00386 {
00387   if( chain.size() )
00388   {
00389     for( unsigned i=0; i<chain.size()-1; i++ )
00390       set( chain[i], chain[i+1] );
00391     set( chain[ chain.size()-1 ], AllocTable::Eof );
00392   }
00393 }
00394 
00395 // follow 
00396 std::vector<unsigned long> AllocTable::follow( unsigned long start )
00397 {
00398   std::vector<unsigned long> chain;
00399 
00400   if( start >= count() ) return chain; 
00401 
00402   unsigned long p = start;
00403   while( p < count() )
00404   {
00405     if( p == (unsigned long)Eof ) break;
00406     if( p == (unsigned long)Bat ) break;
00407     if( p == (unsigned long)MetaBat ) break;
00408     if( p >= count() ) break;
00409     chain.push_back( p );
00410     if( data[p] >= count() ) break;
00411     p = data[ p ];
00412   }
00413 
00414   return chain;
00415 }
00416 
00417 unsigned AllocTable::unused()
00418 {
00419   // find first available block
00420   for( unsigned i = 0; i < data.size(); i++ )
00421     if( data[i] == Avail )
00422       return i;
00423   
00424   // completely full, so enlarge the table
00425   unsigned block = data.size();
00426   resize( data.size()+10 );
00427   return block;      
00428 }
00429 
00430 void AllocTable::load( const unsigned char* buffer, unsigned len )
00431 {
00432   resize( len / 4 );
00433   for( unsigned i = 0; i < count(); i++ )
00434     set( i, readU32( buffer + i*4 ) );
00435 }
00436 
00437 // return space required to save this dirtree
00438 unsigned AllocTable::size()
00439 {
00440   return count() * 4;
00441 }
00442 
00443 void AllocTable::save( unsigned char* buffer )
00444 {
00445   for( unsigned i = 0; i < count(); i++ )
00446     writeU32( buffer + i*4, data[i] );
00447 }
00448 
00449 void AllocTable::debug()
00450 {
00451   std::cout << "block size " << data.size() << std::endl;
00452   for( unsigned i=0; i< data.size(); i++ )
00453   {
00454      if( data[i] == Avail ) continue;
00455      std::cout << i << ": ";
00456      if( data[i] == Eof ) std::cout << "[eof]";
00457      else if( data[i] == Bat ) std::cout << "[bat]";
00458      else if( data[i] == MetaBat ) std::cout << "[metabat]";
00459      else std::cout << data[i];
00460      std::cout << std::endl;
00461   }
00462 }
00463 
00464 // =========== DirTree ==========
00465 
00466 const unsigned DirTree::End = 0xffffffff;
00467 
00468 DirTree::DirTree()
00469 {
00470   clear();
00471 }
00472 
00473 void DirTree::clear()
00474 {
00475   // leave only root entry
00476   entries.resize( 1 );
00477   entries[0].valid = true;
00478   entries[0].name = "Root Entry";
00479   entries[0].dir = true;
00480   entries[0].size = 0;
00481   entries[0].start = End;
00482   entries[0].prev = End;
00483   entries[0].next = End;
00484   entries[0].child = End;
00485 }
00486 
00487 unsigned DirTree::entryCount()
00488 {
00489   return entries.size();
00490 }
00491 
00492 DirEntry* DirTree::entry( unsigned index )
00493 {
00494   if( index >= entryCount() ) return (DirEntry*) 0;
00495   return &entries[ index ];
00496 }
00497 
00498 int DirTree::indexOf( DirEntry* e )
00499 {
00500   for( unsigned i = 0; i < entryCount(); i++ )
00501     if( entry( i ) == e ) return i;
00502     
00503   return -1;
00504 }
00505 
00506 int DirTree::parent( unsigned index )
00507 {
00508   // brute-force, basically we iterate for each entries, find its children
00509   // and check if one of the children is 'index'
00510   for( unsigned j=0; j<entryCount(); j++ )
00511   {
00512     std::vector<unsigned> chi = children( j );
00513     for( unsigned i=0; i<chi.size();i++ )
00514       if( chi[i] == index )
00515         return j;
00516   }
00517         
00518   return -1;
00519 }
00520 
00521 std::string DirTree::fullName( unsigned index )
00522 {
00523   // don't use root name ("Root Entry"), just give "/"
00524   if( index == 0 ) return "/";
00525 
00526   std::string result = entry( index )->name;
00527   result.insert( 0,  "/" );
00528   int p = parent( index );
00529   DirEntry * _entry = 0;
00530   while( p > 0 )
00531   {
00532     _entry = entry( p );
00533     if (_entry->dir && _entry->valid)
00534     {
00535       result.insert( 0,  _entry->name);
00536       result.insert( 0,  "/" );
00537     }
00538     --p;
00539     index = p;
00540     if( index <= 0 ) break;
00541   }
00542   return result;
00543 }
00544 
00545 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
00546 // if not found and create is false, return 0
00547 // if create is true, a new entry is returned
00548 DirEntry* DirTree::entry( const std::string& name, bool create )
00549 {
00550    if( !name.length() ) return (DirEntry*)0;
00551  
00552    // quick check for "/" (that's root)
00553    if( name == "/" ) return entry( 0 );
00554    
00555    // split the names, e.g  "/ObjectPool/_1020961869" will become:
00556    // "ObjectPool" and "_1020961869" 
00557    std::list<std::string> names;
00558    std::string::size_type start = 0, end = 0;
00559    if( name[0] == '/' ) start++;
00560    while( start < name.length() )
00561    {
00562      end = name.find_first_of( '/', start );
00563      if( end == std::string::npos ) end = name.length();
00564      names.push_back( name.substr( start, end-start ) );
00565      start = end+1;
00566    }
00567   
00568    // start from root 
00569    int index = 0 ;
00570 
00571    // trace one by one   
00572    std::list<std::string>::iterator it; 
00573 
00574    for( it = names.begin(); it != names.end(); ++it )
00575    {
00576      // find among the children of index
00577      std::vector<unsigned> chi = children( index );
00578      unsigned child = 0;
00579      for( unsigned i = 0; i < chi.size(); i++ )
00580      {
00581        DirEntry* ce = entry( chi[i] );
00582        if( ce ) 
00583        if( ce->valid && ( ce->name.length()>1 ) )
00584        if( ce->name == *it )
00585              child = chi[i];
00586      }
00587      
00588      // traverse to the child
00589      if( child > 0 ) index = child;
00590      else
00591      {
00592        // not found among children
00593        if( !create ) return (DirEntry*)0;
00594        
00595        // create a new entry
00596        unsigned parent = index;
00597        entries.push_back( DirEntry() );
00598        index = entryCount()-1;
00599        DirEntry* e = entry( index );
00600        e->valid = true;
00601        e->name = *it;
00602        e->dir = false;
00603        e->size = 0;
00604        e->start = 0;
00605        e->child = End;
00606        e->prev = End;
00607        e->next = entry(parent)->child;
00608        entry(parent)->child = index;
00609      }
00610    }
00611 
00612    return entry( index );
00613 }
00614 
00615 // helper function: recursively find siblings of index
00616 void dirtree_find_siblings( DirTree* dirtree, std::vector<unsigned>& result, 
00617   unsigned index )
00618 {
00619   DirEntry* e = dirtree->entry( index );
00620   if( !e ) return;
00621   if( !e->valid ) return;
00622 
00623   // prevent infinite loop  
00624   for( unsigned i = 0; i < result.size(); i++ )
00625     if( result[i] == index ) return;
00626 
00627   // add myself    
00628   result.push_back( index );
00629   
00630   // visit previous sibling, don't go infinitely
00631   unsigned prev = e->prev;
00632   if( ( prev > 0 ) && ( prev < dirtree->entryCount() ) )
00633   {
00634     for( unsigned i = 0; i < result.size(); i++ )
00635       if( result[i] == prev ) prev = 0;
00636     if( prev ) dirtree_find_siblings( dirtree, result, prev );
00637   }
00638     
00639   // visit next sibling, don't go infinitely
00640   unsigned next = e->next;
00641   if( ( next > 0 ) && ( next < dirtree->entryCount() ) )
00642   {
00643     for( unsigned i = 0; i < result.size(); i++ )
00644       if( result[i] == next ) next = 0;
00645     if( next ) dirtree_find_siblings( dirtree, result, next );
00646   }
00647 }
00648 
00649 std::vector<unsigned> DirTree::children( unsigned index )
00650 {
00651   std::vector<unsigned> result;
00652   
00653   DirEntry* e = entry( index );
00654   if( e ) if( e->valid && e->child < entryCount() )
00655     dirtree_find_siblings( this, result, e->child );
00656     
00657   return result;
00658 }
00659 
00660 void DirTree::load( unsigned char* buffer, unsigned size )
00661 {
00662   entries.clear();
00663   
00664   for( unsigned i = 0; i < size/128; i++ )
00665   {
00666     unsigned p = i * 128;
00667     
00668     // would be < 32 if first char in the name isn't printable
00669     unsigned prefix = 32;
00670     
00671     // parse name of this entry, which stored as Unicode 16-bit
00672     std::string name;
00673     int name_len = readU16( buffer + 0x40+p );
00674     if( name_len > 64 ) name_len = 64;
00675     for( int j=0; ( buffer[j+p]) && (j<name_len); j+= 2 )
00676       name.append( 1, buffer[j+p] );
00677       
00678     // first char isn't printable ? remove it...
00679     if( buffer[p] < 32 )
00680     { 
00681       prefix = buffer[0]; 
00682       name.erase( 0,1 ); 
00683     }
00684     
00685     // 2 = file (aka stream), 1 = directory (aka storage), 5 = root
00686     unsigned type = buffer[ 0x42 + p];
00687     
00688     DirEntry e;
00689     e.valid = true;
00690     e.name = name;
00691     e.start = readU32( buffer + 0x74+p );
00692     e.size = readU32( buffer + 0x78+p );
00693     e.prev = readU32( buffer + 0x44+p );
00694     e.next = readU32( buffer + 0x48+p );
00695     e.child = readU32( buffer + 0x4C+p );
00696     e.dir = ( type!=2 );
00697     
00698     // sanity checks
00699     if( (type != 2) && (type != 1 ) && (type != 5 ) ) e.valid = false;
00700     if( name_len < 1 ) e.valid = false;
00701     
00702     entries.push_back( e );
00703   }  
00704 }
00705 
00706 // return space required to save this dirtree
00707 unsigned DirTree::size()
00708 {
00709   return entryCount() * 128;
00710 }
00711 
00712 void DirTree::save( unsigned char* buffer )
00713 {
00714   memset( buffer, 0, size() );
00715   
00716   // root is fixed as "Root Entry"
00717   DirEntry* root = entry( 0 );
00718   std::string name = "Root Entry";
00719   for( unsigned j = 0; j < name.length(); j++ )
00720     buffer[ j*2 ] = name[j];
00721   writeU16( buffer + 0x40, name.length()*2 + 2 );    
00722   writeU32( buffer + 0x74, 0xffffffff );
00723   writeU32( buffer + 0x78, 0 );
00724   writeU32( buffer + 0x44, 0xffffffff );
00725   writeU32( buffer + 0x48, 0xffffffff );
00726   writeU32( buffer + 0x4c, root->child );
00727   buffer[ 0x42 ] = 5;
00728   buffer[ 0x43 ] = 1; 
00729 
00730   for( unsigned i = 1; i < entryCount(); i++ )
00731   {
00732     DirEntry* e = entry( i );
00733     if( !e ) continue;
00734     if( e->dir )
00735     {
00736       e->start = 0xffffffff;
00737       e->size = 0;
00738     }
00739     
00740     // max length for name is 32 chars
00741     std::string name = e->name;
00742     if( name.length() > 32 )
00743       name.erase( 32, name.length() );
00744       
00745     // write name as Unicode 16-bit
00746     for( unsigned j = 0; j < name.length(); j++ )
00747       buffer[ i*128 + j*2 ] = name[j];
00748 
00749     writeU16( buffer + i*128 + 0x40, name.length()*2 + 2 );    
00750     writeU32( buffer + i*128 + 0x74, e->start );
00751     writeU32( buffer + i*128 + 0x78, e->size );
00752     writeU32( buffer + i*128 + 0x44, e->prev );
00753     writeU32( buffer + i*128 + 0x48, e->next );
00754     writeU32( buffer + i*128 + 0x4c, e->child );
00755     buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
00756     buffer[ i*128 + 0x43 ] = 1; // always black
00757   }  
00758 }
00759 
00760 void DirTree::debug()
00761 {
00762   for( unsigned i = 0; i < entryCount(); i++ )
00763   {
00764     DirEntry* e = entry( i );
00765     if( !e ) continue;
00766     std::cout << i << ": ";
00767     if( !e->valid ) std::cout << "INVALID ";
00768     std::cout << e->name << " ";
00769     if( e->dir ) std::cout << "(Dir) ";
00770     else std::cout << "(File) ";
00771     std::cout << e->size << " ";
00772     std::cout << "s:" << e->start << " ";
00773     std::cout << "(";
00774     if( e->child == End ) std::cout << "-"; else std::cout << e->child;
00775     std::cout << " ";
00776     if( e->prev == End ) std::cout << "-"; else std::cout << e->prev;
00777     std::cout << ":";
00778     if( e->next == End ) std::cout << "-"; else std::cout << e->next;
00779     std::cout << ")";    
00780     std::cout << std::endl;
00781   }
00782 }
00783 
00784 // =========== StorageIO ==========
00785 
00786 StorageIO::StorageIO( Storage* st, const char* fname )
00787 {
00788   storage = st;
00789   filename = fname;
00790   result = Storage::Ok;
00791   opened = false;
00792   
00793   header = new Header();
00794   dirtree = new DirTree();
00795   bbat = new AllocTable();
00796   sbat = new AllocTable();
00797   
00798   filesize = 0;
00799   bbat->blockSize = 1 << header->b_shift;
00800   sbat->blockSize = 1 << header->s_shift;
00801 }
00802 
00803 StorageIO::~StorageIO()
00804 {
00805   if( opened ) close();
00806   delete sbat;
00807   delete bbat;
00808   delete dirtree;
00809   delete header;
00810 }
00811 
00812 bool StorageIO::open()
00813 {
00814   // already opened ? close first
00815   if( opened ) close();
00816   
00817   load();
00818   
00819   return result == Storage::Ok;
00820 }
00821 
00822 void StorageIO::load()
00823 {
00824   unsigned char* buffer = 0;
00825   unsigned long buflen = 0;
00826   std::vector<unsigned long> blocks;
00827   
00828   // open the file, check for error
00829   result = Storage::OpenFailed;
00830   file.open( filename.c_str(), std::ios::binary | std::ios::in );
00831   if( !file.good() ) return;
00832   
00833   // find size of input file
00834   file.seekg( 0, std::ios::end );
00835   filesize = file.tellg();
00836 
00837   // load header
00838   buffer = new unsigned char[512];
00839   file.seekg( 0 ); 
00840   file.read( (char*)buffer, 512 );
00841   header->load( buffer );
00842   delete[] buffer;
00843 
00844   // check OLE magic id
00845   result = Storage::NotOLE;
00846   for( unsigned i=0; i<8; i++ )
00847     if( header->id[i] != pole_magic[i] )
00848       return;
00849   
00850   // sanity checks
00851   result = Storage::BadOLE;
00852   if( !header->valid() ) return;
00853   if( header->threshold != 4096 ) return;
00854 
00855   // important block size
00856   bbat->blockSize = 1 << header->b_shift;
00857   sbat->blockSize = 1 << header->s_shift;
00858   
00859   // find blocks allocated to store big bat
00860   // the first 109 blocks are in header, the rest in meta bat
00861   blocks.clear();
00862   blocks.resize( header->num_bat );
00863   for( unsigned i = 0; i < 109; i++ )
00864     if( i >= header->num_bat ) break;
00865     else blocks[i] = header->bb_blocks[i];
00866   if( (header->num_bat > 109) && (header->num_mbat > 0) )
00867   {
00868     unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
00869     unsigned k = 109;
00870     for( unsigned r = 0; r < header->num_mbat; r++ )
00871     {
00872       loadBigBlock( header->mbat_start+r, buffer2, bbat->blockSize );
00873       for( unsigned s=0; s < bbat->blockSize; s+=4 )
00874       {
00875         if( k >= header->num_bat ) break;
00876         else  blocks[k++] = readU32( buffer2 + s );
00877       }  
00878      }    
00879     delete[] buffer2;
00880   }
00881 
00882   // load big bat
00883   buflen = blocks.size()*bbat->blockSize;
00884   if( buflen > 0 )
00885   {
00886     buffer = new unsigned char[ buflen ];  
00887     loadBigBlocks( blocks, buffer, buflen );
00888     bbat->load( buffer, buflen );
00889     delete[] buffer;
00890   }  
00891 
00892   // load small bat
00893   blocks.clear();
00894   blocks = bbat->follow( header->sbat_start );
00895   buflen = blocks.size()*bbat->blockSize;
00896   if( buflen > 0 )
00897   {
00898     buffer = new unsigned char[ buflen ];  
00899     loadBigBlocks( blocks, buffer, buflen );
00900     sbat->load( buffer, buflen );
00901     delete[] buffer;
00902   }  
00903   
00904   // load directory tree
00905   blocks.clear();
00906   blocks = bbat->follow( header->dirent_start );
00907   buflen = blocks.size()*bbat->blockSize;
00908   buffer = new unsigned char[ buflen ];  
00909   loadBigBlocks( blocks, buffer, buflen );
00910   dirtree->load( buffer, buflen );
00911   unsigned sb_start = readU32( buffer + 0x74 );
00912   delete[] buffer;
00913   
00914   // fetch block chain as data for small-files
00915   sb_blocks = bbat->follow( sb_start ); // small files
00916   
00917   // for troubleshooting, just enable this block
00918 #if 0
00919   header->debug();
00920   sbat->debug();
00921   bbat->debug();
00922   dirtree->debug();
00923 #endif
00924   
00925   // so far so good
00926   result = Storage::Ok;
00927   opened = true;
00928 }
00929 
00930 void StorageIO::create()
00931 {
00932   // std::cout << "Creating " << filename << std::endl; 
00933   
00934   file.open( filename.c_str(), std::ios::out|std::ios::binary );
00935   if( !file.good() )
00936   {
00937     std::cerr << "Can't create " << filename << std::endl;
00938     result = Storage::OpenFailed;
00939     return;
00940   }
00941   
00942   // so far so good
00943   opened = true;
00944   result = Storage::Ok;
00945 }
00946 
00947 void StorageIO::flush()
00948 {
00949   /* Note on Microsoft implementation:
00950      - directory entries are stored in the last block(s)
00951      - BATs are as second to the last
00952      - Meta BATs are third to the last  
00953   */
00954 }
00955 
00956 void StorageIO::close()
00957 {
00958   if( !opened ) return;
00959   
00960   file.close(); 
00961   opened = false;
00962   
00963   std::list<Stream*>::iterator it;
00964   for( it = streams.begin(); it != streams.end(); ++it )
00965     delete *it;
00966 }
00967 
00968 StreamIO* StorageIO::streamIO( const std::string& name )
00969 {
00970   // sanity check
00971   if( !name.length() ) return (StreamIO*)0;
00972 
00973   // search in the entries
00974   DirEntry* entry = dirtree->entry( name );
00975   //if( entry) std::cout << "FOUND\n";
00976   if( !entry ) return (StreamIO*)0;
00977   //if( !entry->dir ) std::cout << "  NOT DIR\n";
00978   if( entry->dir ) return (StreamIO*)0;
00979 
00980   StreamIO* result = new StreamIO( this, entry );
00981   result->fullName = name;
00982   
00983   return result;
00984 }
00985 
00986 unsigned long StorageIO::loadBigBlocks( std::vector<unsigned long> blocks,
00987   unsigned char* data, unsigned long maxlen )
00988 {
00989   // sentinel
00990   if( !data ) return 0;
00991   if( !file.good() ) return 0;
00992   if( blocks.size() < 1 ) return 0;
00993   if( maxlen == 0 ) return 0;
00994 
00995   // read block one by one, seems fast enough
00996   unsigned long bytes = 0;
00997   for( unsigned long i=0; (i < blocks.size() ) & ( bytes<maxlen ); i++ )
00998   {
00999     unsigned long block = blocks[i];
01000     unsigned long pos =  bbat->blockSize * ( block+1 );
01001     unsigned long p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes;
01002     if( pos + p > filesize ) p = filesize - pos;
01003     file.seekg( pos );
01004     file.read( (char*)data + bytes, p );
01005     bytes += p;
01006   }
01007 
01008   return bytes;
01009 }
01010 
01011 unsigned long StorageIO::loadBigBlock( unsigned long block,
01012   unsigned char* data, unsigned long maxlen )
01013 {
01014   // sentinel
01015   if( !data ) return 0;
01016   if( !file.good() ) return 0;
01017   
01018   // wraps call for loadBigBlocks
01019   std::vector<unsigned long> blocks;
01020   blocks.resize( 1 );
01021   blocks[ 0 ] = block;
01022   
01023   return loadBigBlocks( blocks, data, maxlen );
01024 }
01025 
01026 // return number of bytes which has been read
01027 unsigned long StorageIO::loadSmallBlocks( std::vector<unsigned long> blocks,
01028   unsigned char* data, unsigned long maxlen )
01029 {
01030   // sentinel
01031   if( !data ) return 0;
01032   if( !file.good() ) return 0;
01033   if( blocks.size() < 1 ) return 0;
01034   if( maxlen == 0 ) return 0;
01035 
01036   // our own local buffer
01037   unsigned char* buf = new unsigned char[ bbat->blockSize ];
01038 
01039   // read small block one by one
01040   unsigned long bytes = 0;
01041   for( unsigned long i=0; ( i<blocks.size() ) & ( bytes<maxlen ); i++ )
01042   {
01043     unsigned long block = blocks[i];
01044 
01045     // find where the small-block exactly is
01046     unsigned long pos = block * sbat->blockSize;
01047     unsigned long bbindex = pos / bbat->blockSize;
01048     if( bbindex >= sb_blocks.size() ) break;
01049 
01050     loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize );
01051 
01052     // copy the data
01053     unsigned offset = pos % bbat->blockSize;
01054     unsigned long p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes :  bbat->blockSize-offset;
01055     p = (sbat->blockSize<p ) ? sbat->blockSize : p;
01056     memcpy( data + bytes, buf + offset, p );
01057     bytes += p;
01058   }
01059   
01060   delete[] buf;
01061 
01062   return bytes;
01063 }
01064 
01065 unsigned long StorageIO::loadSmallBlock( unsigned long block,
01066   unsigned char* data, unsigned long maxlen )
01067 {
01068   // sentinel
01069   if( !data ) return 0;
01070   if( !file.good() ) return 0;
01071 
01072   // wraps call for loadSmallBlocks
01073   std::vector<unsigned long> blocks;
01074   blocks.resize( 1 );
01075   blocks.assign( 1, block );
01076 
01077   return loadSmallBlocks( blocks, data, maxlen );
01078 }
01079 
01080 // =========== StreamIO ==========
01081 
01082 StreamIO::StreamIO( StorageIO* s, DirEntry* e)
01083 {
01084   io = s;
01085   entry = e;
01086   eof = false;
01087   fail = false;
01088   
01089   m_pos = 0;
01090 
01091   if( entry->size >= io->header->threshold ) 
01092     blocks = io->bbat->follow( entry->start );
01093   else
01094     blocks = io->sbat->follow( entry->start );
01095 
01096   // prepare cache
01097   cache_pos = 0;
01098   cache_size = 4096; // optimal ?
01099   cache_data = new unsigned char[cache_size];
01100   updateCache();
01101 }
01102 
01103 // FIXME tell parent we're gone
01104 StreamIO::~StreamIO()
01105 {
01106   delete[] cache_data;  
01107 }
01108 
01109 void StreamIO::seek( unsigned long pos )
01110 {
01111   m_pos = pos;
01112 }
01113 
01114 unsigned long StreamIO::tell()
01115 {
01116   return m_pos;
01117 }
01118 
01119 int StreamIO::getch()
01120 {
01121   // past end-of-file ?
01122   if( m_pos > entry->size ) return -1;
01123 
01124   // need to update cache ?
01125   if( !cache_size || ( m_pos < cache_pos ) ||
01126     ( m_pos >= cache_pos + cache_size ) )
01127       updateCache();
01128 
01129   // something bad if we don't get good cache
01130   if( !cache_size ) return -1;
01131 
01132   int data = cache_data[m_pos - cache_pos];
01133   m_pos++;
01134 
01135   return data;
01136 }
01137 
01138 unsigned long StreamIO::read( unsigned long pos, unsigned char* data, unsigned long maxlen )
01139 {
01140   // sanity checks
01141   if( !data ) return 0;
01142   if( maxlen == 0 ) return 0;
01143 
01144   unsigned long totalbytes = 0;
01145   
01146   if ( entry->size < io->header->threshold )
01147   {
01148     // small file
01149     unsigned long index = pos / io->sbat->blockSize;
01150 
01151     if( index >= blocks.size() ) return 0;
01152 
01153     unsigned char* buf = new unsigned char[ io->sbat->blockSize ];
01154     unsigned long offset = pos % io->sbat->blockSize;
01155     while( totalbytes < maxlen )
01156     {
01157       if( index >= blocks.size() ) break;
01158       io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize );
01159       unsigned long count = io->sbat->blockSize - offset;
01160       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01161       memcpy( data+totalbytes, buf + offset, count );
01162       totalbytes += count;
01163       offset = 0;
01164       index++;
01165     }
01166     delete[] buf;
01167 
01168   }
01169   else
01170   {
01171     // big file
01172     unsigned long index = pos / io->bbat->blockSize;
01173     
01174     if( index >= blocks.size() ) return 0;
01175     
01176     unsigned char* buf = new unsigned char[ io->bbat->blockSize ];
01177     unsigned long offset = pos % io->bbat->blockSize;
01178     while( totalbytes < maxlen )
01179     {
01180       if( index >= blocks.size() ) break;
01181       io->loadBigBlock( blocks[index], buf, io->bbat->blockSize );
01182       unsigned long count = io->bbat->blockSize - offset;
01183       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01184       memcpy( data+totalbytes, buf + offset, count );
01185       totalbytes += count;
01186       index++;
01187       offset = 0;
01188     }
01189     delete [] buf;
01190 
01191   }
01192 
01193   return totalbytes;
01194 }
01195 
01196 unsigned long StreamIO::read( unsigned char* data, unsigned long maxlen )
01197 {
01198   unsigned long bytes = read( tell(), data, maxlen );
01199   m_pos += bytes;
01200   return bytes;
01201 }
01202 
01203 void StreamIO::updateCache()
01204 {
01205   // sanity check
01206   if( !cache_data ) return;
01207 
01208   cache_pos = m_pos - ( m_pos % cache_size );
01209   unsigned long bytes = cache_size;
01210   if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos;
01211   cache_size = read( cache_pos, cache_data, bytes );
01212 }
01213 
01214 
01215 // =========== Storage ==========
01216 
01217 Storage::Storage( const char* filename )
01218 {
01219   io = new StorageIO( this, filename );
01220 }
01221 
01222 Storage::~Storage()
01223 {
01224   delete io;
01225 }
01226 
01227 int Storage::result()
01228 {
01229   return io->result;
01230 }
01231 
01232 bool Storage::open()
01233 {
01234   return io->open();
01235 }
01236 
01237 void Storage::close()
01238 {
01239   io->close();
01240 }
01241 
01242 std::list<std::string> Storage::entries( const std::string& path )
01243 {
01244   std::list<std::string> result;
01245   DirTree* dt = io->dirtree;
01246   DirEntry* e = dt->entry( path, false );
01247   if( e  && e->dir )
01248   {
01249     unsigned parent = dt->indexOf( e );
01250     std::vector<unsigned> children = dt->children( parent );
01251     for( unsigned i = 0; i < children.size(); i++ )
01252       result.push_back( dt->entry( children[i] )->name );
01253   }
01254   
01255   return result;
01256 }
01257 
01258 bool Storage::isDirectory( const std::string& name )
01259 {
01260   DirEntry* e = io->dirtree->entry( name, false );
01261   return e ? e->dir : false;
01262 }
01263 
01264 // =========== Stream ==========
01265 
01266 Stream::Stream( Storage* storage, const std::string& name )
01267 {
01268   io = storage->io->streamIO( name );
01269 }
01270 
01271 // FIXME tell parent we're gone
01272 Stream::~Stream()
01273 {
01274   delete io;
01275 }
01276 
01277 std::string Stream::fullName()
01278 {
01279   return io ? io->fullName : std::string();
01280 }
01281 
01282 unsigned long Stream::tell()
01283 {
01284   return io ? io->tell() : 0;
01285 }
01286 
01287 void Stream::seek( unsigned long newpos )
01288 {
01289   if( io ) io->seek( newpos );
01290 }
01291 
01292 unsigned long Stream::size()
01293 {
01294   return io ? io->entry->size : 0;
01295 }
01296 
01297 int Stream::getch()
01298 {
01299   return io ? io->getch() : 0;
01300 }
01301 
01302 unsigned long Stream::read( unsigned char* data, unsigned long maxlen )
01303 {
01304   return io ? io->read( data, maxlen ) : 0;
01305 }
01306 
01307 bool Stream::eof()
01308 {
01309   return io ? io->eof : false;
01310 }
01311 
01312 bool Stream::fail()
01313 {
01314   return io ? io->fail : true;
01315 }
KDE Home | KDE Accessibility Home | Description of Access Keys