LLVM API Documentation
00001 //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Reid Spencer and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the llvm::Compressor class, an abstraction for memory 00011 // block compression. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Config/config.h" 00016 #include "llvm/Support/Compressor.h" 00017 #include "llvm/ADT/StringExtras.h" 00018 #include <cassert> 00019 #include <string> 00020 #include <ostream> 00021 #include "bzip2/bzlib.h" 00022 using namespace llvm; 00023 00024 enum CompressionTypes { 00025 COMP_TYPE_NONE = '0', 00026 COMP_TYPE_BZIP2 = '2', 00027 }; 00028 00029 static int getdata(char*& buffer, size_t &size, 00030 llvm::Compressor::OutputDataCallback* cb, void* context) { 00031 buffer = 0; 00032 size = 0; 00033 int result = (*cb)(buffer, size, context); 00034 assert(buffer != 0 && "Invalid result from Compressor callback"); 00035 assert(size != 0 && "Invalid result from Compressor callback"); 00036 return result; 00037 } 00038 00039 static int getdata_uns(char*& buffer, unsigned &size, 00040 llvm::Compressor::OutputDataCallback* cb, void* context) 00041 { 00042 size_t SizeOut; 00043 int Res = getdata(buffer, SizeOut, cb, context); 00044 size = SizeOut; 00045 return Res; 00046 } 00047 00048 //===----------------------------------------------------------------------===// 00049 //=== NULLCOMP - a compression like set of routines that just copies data 00050 //=== without doing any compression. This is provided so that if the 00051 //=== configured environment doesn't have a compression library the 00052 //=== program can still work, albeit using more data/memory. 00053 //===----------------------------------------------------------------------===// 00054 00055 struct NULLCOMP_stream { 00056 // User provided fields 00057 char* next_in; 00058 size_t avail_in; 00059 char* next_out; 00060 size_t avail_out; 00061 00062 // Information fields 00063 size_t output_count; // Total count of output bytes 00064 }; 00065 00066 static void NULLCOMP_init(NULLCOMP_stream* s) { 00067 s->output_count = 0; 00068 } 00069 00070 static bool NULLCOMP_compress(NULLCOMP_stream* s) { 00071 assert(s && "Invalid NULLCOMP_stream"); 00072 assert(s->next_in != 0); 00073 assert(s->next_out != 0); 00074 assert(s->avail_in >= 1); 00075 assert(s->avail_out >= 1); 00076 00077 if (s->avail_out >= s->avail_in) { 00078 ::memcpy(s->next_out, s->next_in, s->avail_in); 00079 s->output_count += s->avail_in; 00080 s->avail_out -= s->avail_in; 00081 s->next_in += s->avail_in; 00082 s->avail_in = 0; 00083 return true; 00084 } else { 00085 ::memcpy(s->next_out, s->next_in, s->avail_out); 00086 s->output_count += s->avail_out; 00087 s->avail_in -= s->avail_out; 00088 s->next_in += s->avail_out; 00089 s->avail_out = 0; 00090 return false; 00091 } 00092 } 00093 00094 static bool NULLCOMP_decompress(NULLCOMP_stream* s) { 00095 assert(s && "Invalid NULLCOMP_stream"); 00096 assert(s->next_in != 0); 00097 assert(s->next_out != 0); 00098 assert(s->avail_in >= 1); 00099 assert(s->avail_out >= 1); 00100 00101 if (s->avail_out >= s->avail_in) { 00102 ::memcpy(s->next_out, s->next_in, s->avail_in); 00103 s->output_count += s->avail_in; 00104 s->avail_out -= s->avail_in; 00105 s->next_in += s->avail_in; 00106 s->avail_in = 0; 00107 return true; 00108 } else { 00109 ::memcpy(s->next_out, s->next_in, s->avail_out); 00110 s->output_count += s->avail_out; 00111 s->avail_in -= s->avail_out; 00112 s->next_in += s->avail_out; 00113 s->avail_out = 0; 00114 return false; 00115 } 00116 } 00117 00118 static void NULLCOMP_end(NULLCOMP_stream* strm) { 00119 } 00120 00121 namespace { 00122 00123 /// This structure is only used when a bytecode file is compressed. 00124 /// As bytecode is being decompressed, the memory buffer might need 00125 /// to be reallocated. The buffer allocation is handled in a callback 00126 /// and this structure is needed to retain information across calls 00127 /// to the callback. 00128 /// @brief An internal buffer object used for handling decompression 00129 struct BufferContext { 00130 char* buff; 00131 size_t size; 00132 BufferContext(size_t compressedSize) { 00133 // Null to indicate malloc of a new block 00134 buff = 0; 00135 00136 // Compute the initial length of the uncompression buffer. Note that this 00137 // is twice the length of the compressed buffer and will be doubled again 00138 // in the callback for an initial allocation of 4x compressedSize. This 00139 // calculation is based on the typical compression ratio of bzip2 on LLVM 00140 // bytecode files which typically ranges in the 50%-75% range. Since we 00141 // typically get at least 50%, doubling is insufficient. By using a 4x 00142 // multiplier on the first allocation, we minimize the impact of having to 00143 // copy the buffer on reallocation. 00144 size = compressedSize*2; 00145 } 00146 00147 /// trimTo - Reduce the size of the buffer down to the specified amount. This 00148 /// is useful after have read in the bytecode file to discard extra unused 00149 /// memory. 00150 /// 00151 void trimTo(size_t NewSize) { 00152 buff = (char*)::realloc(buff, NewSize); 00153 size = NewSize; 00154 } 00155 00156 /// This function handles allocation of the buffer used for decompression of 00157 /// compressed bytecode files. It is called by Compressor::decompress which is 00158 /// called by BytecodeReader::ParseBytecode. 00159 static size_t callback(char*&buff, size_t &sz, void* ctxt){ 00160 // Case the context variable to our BufferContext 00161 BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt); 00162 00163 // Compute the new, doubled, size of the block 00164 size_t new_size = bc->size * 2; 00165 00166 // Extend or allocate the block (realloc(0,n) == malloc(n)) 00167 char* new_buff = (char*) ::realloc(bc->buff, new_size); 00168 00169 // Figure out what to return to the Compressor. If this is the first call, 00170 // then bc->buff will be null. In this case we want to return the entire 00171 // buffer because there was no previous allocation. Otherwise, when the 00172 // buffer is reallocated, we save the new base pointer in the 00173 // BufferContext.buff field but return the address of only the extension, 00174 // mid-way through the buffer (since its size was doubled). Furthermore, 00175 // the sz result must be 1/2 the total size of the buffer. 00176 if (bc->buff == 0 ) { 00177 buff = bc->buff = new_buff; 00178 sz = new_size; 00179 } else { 00180 bc->buff = new_buff; 00181 buff = new_buff + bc->size; 00182 sz = bc->size; 00183 } 00184 00185 // Retain the size of the allocated block 00186 bc->size = new_size; 00187 00188 // Make sure we fail (return 1) if we didn't get any memory. 00189 return (bc->buff == 0 ? 1 : 0); 00190 } 00191 }; 00192 00193 } // end anonymous namespace 00194 00195 00196 namespace { 00197 00198 // This structure retains the context when compressing the bytecode file. The 00199 // WriteCompressedData function below uses it to keep track of the previously 00200 // filled chunk of memory (which it writes) and how many bytes have been 00201 // written. 00202 struct WriterContext { 00203 // Initialize the context 00204 WriterContext(std::ostream*OS, size_t CS) 00205 : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {} 00206 00207 // Make sure we clean up memory 00208 ~WriterContext() { 00209 if (chunk) 00210 delete [] chunk; 00211 } 00212 00213 // Write the chunk 00214 void write(size_t size = 0) { 00215 size_t write_size = (size == 0 ? sz : size); 00216 Out->write(chunk,write_size); 00217 written += write_size; 00218 delete [] chunk; 00219 chunk = 0; 00220 sz = 0; 00221 } 00222 00223 // This function is a callback used by the Compressor::compress function to 00224 // allocate memory for the compression buffer. This function fulfills that 00225 // responsibility but also writes the previous (now filled) buffer out to the 00226 // stream. 00227 static size_t callback(char*& buffer, size_t &size, void* context) { 00228 // Cast the context to the structure it must point to. 00229 WriterContext* ctxt = reinterpret_cast<WriterContext*>(context); 00230 00231 // If there's a previously allocated chunk, it must now be filled with 00232 // compressed data, so we write it out and deallocate it. 00233 if (ctxt->chunk != 0 && ctxt->sz > 0 ) { 00234 ctxt->write(); 00235 } 00236 00237 // Compute the size of the next chunk to allocate. We attempt to allocate 00238 // enough memory to handle the compression in a single memory allocation. In 00239 // general, the worst we do on compression of bytecode is about 50% so we 00240 // conservatively estimate compSize / 2 as the size needed for the 00241 // compression buffer. compSize is the size of the compressed data, provided 00242 // by WriteBytecodeToFile. 00243 size = ctxt->sz = ctxt->compSize / 2; 00244 00245 // Allocate the chunks 00246 buffer = ctxt->chunk = new char [size]; 00247 00248 // We must return 1 if the allocation failed so that the Compressor knows 00249 // not to use the buffer pointer. 00250 return (ctxt->chunk == 0 ? 1 : 0); 00251 } 00252 00253 char* chunk; // pointer to the chunk of memory filled by compression 00254 size_t sz; // size of chunk 00255 size_t written; // aggregate total of bytes written in all chunks 00256 size_t compSize; // size of the uncompressed buffer 00257 std::ostream* Out; // The stream we write the data to. 00258 }; 00259 00260 } // end anonymous namespace 00261 00262 // Compress in one of three ways 00263 size_t Compressor::compress(const char* in, size_t size, 00264 OutputDataCallback* cb, void* context) { 00265 assert(in && "Can't compress null buffer"); 00266 assert(size && "Can't compress empty buffer"); 00267 assert(cb && "Can't compress without a callback function"); 00268 00269 size_t result = 0; 00270 00271 // For small files, we just don't bother compressing. bzip2 isn't very good 00272 // with tiny files and can actually make the file larger, so we just avoid 00273 // it altogether. 00274 if (size > 64*1024) { 00275 // Set up the bz_stream 00276 bz_stream bzdata; 00277 bzdata.bzalloc = 0; 00278 bzdata.bzfree = 0; 00279 bzdata.opaque = 0; 00280 bzdata.next_in = (char*)in; 00281 bzdata.avail_in = size; 00282 bzdata.next_out = 0; 00283 bzdata.avail_out = 0; 00284 switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) { 00285 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled"); 00286 case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); 00287 case BZ_MEM_ERROR: throw std::string("Out of memory"); 00288 case BZ_OK: 00289 default: 00290 break; 00291 } 00292 00293 // Get a block of memory 00294 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { 00295 BZ2_bzCompressEnd(&bzdata); 00296 throw std::string("Can't allocate output buffer"); 00297 } 00298 00299 // Put compression code in first byte 00300 (*bzdata.next_out++) = COMP_TYPE_BZIP2; 00301 bzdata.avail_out--; 00302 00303 // Compress it 00304 int bzerr = BZ_FINISH_OK; 00305 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) { 00306 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { 00307 BZ2_bzCompressEnd(&bzdata); 00308 throw std::string("Can't allocate output buffer"); 00309 } 00310 } 00311 switch (bzerr) { 00312 case BZ_SEQUENCE_ERROR: 00313 case BZ_PARAM_ERROR: throw std::string("Param/Sequence error"); 00314 case BZ_FINISH_OK: 00315 case BZ_STREAM_END: break; 00316 default: throw std::string("Oops: ") + utostr(unsigned(bzerr)); 00317 } 00318 00319 // Finish 00320 result = bzdata.total_out_lo32 + 1; 00321 if (sizeof(size_t) == sizeof(uint64_t)) 00322 result |= static_cast<uint64_t>(bzdata.total_out_hi32) << 32; 00323 00324 BZ2_bzCompressEnd(&bzdata); 00325 } else { 00326 // Do null compression, for small files 00327 NULLCOMP_stream sdata; 00328 sdata.next_in = (char*)in; 00329 sdata.avail_in = size; 00330 NULLCOMP_init(&sdata); 00331 00332 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00333 throw std::string("Can't allocate output buffer"); 00334 } 00335 00336 *(sdata.next_out++) = COMP_TYPE_NONE; 00337 sdata.avail_out--; 00338 00339 while (!NULLCOMP_compress(&sdata)) { 00340 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00341 throw std::string("Can't allocate output buffer"); 00342 } 00343 } 00344 00345 result = sdata.output_count + 1; 00346 NULLCOMP_end(&sdata); 00347 } 00348 return result; 00349 } 00350 00351 size_t Compressor::compressToNewBuffer(const char* in, size_t size, char*&out) { 00352 BufferContext bc(size); 00353 size_t result = compress(in,size,BufferContext::callback,(void*)&bc); 00354 bc.trimTo(result); 00355 out = bc.buff; 00356 return result; 00357 } 00358 00359 size_t 00360 Compressor::compressToStream(const char*in, size_t size, std::ostream& out) { 00361 // Set up the context and writer 00362 WriterContext ctxt(&out, size / 2); 00363 00364 // Compress everything after the magic number (which we'll alter). 00365 size_t zipSize = Compressor::compress(in,size, 00366 WriterContext::callback, (void*)&ctxt); 00367 00368 if (ctxt.chunk) { 00369 ctxt.write(zipSize - ctxt.written); 00370 } 00371 return zipSize; 00372 } 00373 00374 // Decompress in one of three ways 00375 size_t Compressor::decompress(const char *in, size_t size, 00376 OutputDataCallback* cb, void* context) { 00377 assert(in && "Can't decompress null buffer"); 00378 assert(size > 1 && "Can't decompress empty buffer"); 00379 assert(cb && "Can't decompress without a callback function"); 00380 00381 size_t result = 0; 00382 00383 switch (*in++) { 00384 case COMP_TYPE_BZIP2: { 00385 // Set up the bz_stream 00386 bz_stream bzdata; 00387 bzdata.bzalloc = 0; 00388 bzdata.bzfree = 0; 00389 bzdata.opaque = 0; 00390 bzdata.next_in = (char*)in; 00391 bzdata.avail_in = size - 1; 00392 bzdata.next_out = 0; 00393 bzdata.avail_out = 0; 00394 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) { 00395 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled"); 00396 case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); 00397 case BZ_MEM_ERROR: throw std::string("Out of memory"); 00398 case BZ_OK: 00399 default: 00400 break; 00401 } 00402 00403 // Get a block of memory 00404 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { 00405 BZ2_bzDecompressEnd(&bzdata); 00406 throw std::string("Can't allocate output buffer"); 00407 } 00408 00409 // Decompress it 00410 int bzerr = BZ_OK; 00411 while ( BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata)) && 00412 bzdata.avail_in != 0 ) { 00413 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { 00414 BZ2_bzDecompressEnd(&bzdata); 00415 throw std::string("Can't allocate output buffer"); 00416 } 00417 } 00418 00419 switch (bzerr) { 00420 case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); 00421 case BZ_MEM_ERROR: throw std::string("Out of memory"); 00422 case BZ_DATA_ERROR: throw std::string("Data integrity error"); 00423 case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2"); 00424 case BZ_OK: throw std::string("Insufficient input for bzip2"); 00425 case BZ_STREAM_END: break; 00426 default: throw("Ooops"); 00427 } 00428 00429 00430 // Finish 00431 result = bzdata.total_out_lo32; 00432 if (sizeof(size_t) == sizeof(uint64_t)) 00433 result |= (static_cast<uint64_t>(bzdata.total_out_hi32) << 32); 00434 BZ2_bzDecompressEnd(&bzdata); 00435 break; 00436 } 00437 00438 case COMP_TYPE_NONE: { 00439 NULLCOMP_stream sdata; 00440 sdata.next_in = (char*)in; 00441 sdata.avail_in = size - 1; 00442 NULLCOMP_init(&sdata); 00443 00444 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00445 throw std::string("Can't allocate output buffer"); 00446 } 00447 00448 while (!NULLCOMP_decompress(&sdata)) { 00449 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00450 throw std::string("Can't allocate output buffer"); 00451 } 00452 } 00453 00454 result = sdata.output_count; 00455 NULLCOMP_end(&sdata); 00456 break; 00457 } 00458 00459 default: 00460 throw std::string("Unknown type of compressed data"); 00461 } 00462 00463 return result; 00464 } 00465 00466 size_t 00467 Compressor::decompressToNewBuffer(const char* in, size_t size, char*&out) { 00468 BufferContext bc(size); 00469 size_t result = decompress(in,size,BufferContext::callback,(void*)&bc); 00470 out = bc.buff; 00471 return result; 00472 } 00473 00474 size_t 00475 Compressor::decompressToStream(const char*in, size_t size, std::ostream& out){ 00476 // Set up the context and writer 00477 WriterContext ctxt(&out,size / 2); 00478 00479 // Decompress everything after the magic number (which we'll alter) 00480 size_t zipSize = Compressor::decompress(in,size, 00481 WriterContext::callback, (void*)&ctxt); 00482 00483 if (ctxt.chunk) { 00484 ctxt.write(zipSize - ctxt.written); 00485 } 00486 return zipSize; 00487 } 00488 00489 // vim: sw=2 ai