LLVM API Documentation
00001 //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Reid Spencer and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the llvm::Compressor class, an abstraction for memory 00011 // block compression. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Config/config.h" 00016 #include "llvm/Support/Compressor.h" 00017 #include "llvm/ADT/StringExtras.h" 00018 #include <cassert> 00019 #include <string> 00020 #include <ostream> 00021 #include "bzip2/bzlib.h" 00022 using namespace llvm; 00023 00024 enum CompressionTypes { 00025 COMP_TYPE_NONE = '0', 00026 COMP_TYPE_BZIP2 = '2' 00027 }; 00028 00029 static int getdata(char*& buffer, size_t &size, 00030 llvm::Compressor::OutputDataCallback* cb, void* context) { 00031 buffer = 0; 00032 size = 0; 00033 int result = (*cb)(buffer, size, context); 00034 assert(buffer != 0 && "Invalid result from Compressor callback"); 00035 assert(size != 0 && "Invalid result from Compressor callback"); 00036 return result; 00037 } 00038 00039 static int getdata_uns(char*& buffer, unsigned &size, 00040 llvm::Compressor::OutputDataCallback* cb, void* context) 00041 { 00042 size_t SizeOut; 00043 int Res = getdata(buffer, SizeOut, cb, context); 00044 size = SizeOut; 00045 return Res; 00046 } 00047 00048 //===----------------------------------------------------------------------===// 00049 //=== NULLCOMP - a compression like set of routines that just copies data 00050 //=== without doing any compression. This is provided so that if the 00051 //=== configured environment doesn't have a compression library the 00052 //=== program can still work, albeit using more data/memory. 00053 //===----------------------------------------------------------------------===// 00054 00055 struct NULLCOMP_stream { 00056 // User provided fields 00057 char* next_in; 00058 size_t avail_in; 00059 char* next_out; 00060 size_t avail_out; 00061 00062 // Information fields 00063 size_t output_count; // Total count of output bytes 00064 }; 00065 00066 static void NULLCOMP_init(NULLCOMP_stream* s) { 00067 s->output_count = 0; 00068 } 00069 00070 static bool NULLCOMP_compress(NULLCOMP_stream* s) { 00071 assert(s && "Invalid NULLCOMP_stream"); 00072 assert(s->next_in != 0); 00073 assert(s->next_out != 0); 00074 assert(s->avail_in >= 1); 00075 assert(s->avail_out >= 1); 00076 00077 if (s->avail_out >= s->avail_in) { 00078 ::memcpy(s->next_out, s->next_in, s->avail_in); 00079 s->output_count += s->avail_in; 00080 s->avail_out -= s->avail_in; 00081 s->next_in += s->avail_in; 00082 s->avail_in = 0; 00083 return true; 00084 } else { 00085 ::memcpy(s->next_out, s->next_in, s->avail_out); 00086 s->output_count += s->avail_out; 00087 s->avail_in -= s->avail_out; 00088 s->next_in += s->avail_out; 00089 s->avail_out = 0; 00090 return false; 00091 } 00092 } 00093 00094 static bool NULLCOMP_decompress(NULLCOMP_stream* s) { 00095 assert(s && "Invalid NULLCOMP_stream"); 00096 assert(s->next_in != 0); 00097 assert(s->next_out != 0); 00098 assert(s->avail_in >= 1); 00099 assert(s->avail_out >= 1); 00100 00101 if (s->avail_out >= s->avail_in) { 00102 ::memcpy(s->next_out, s->next_in, s->avail_in); 00103 s->output_count += s->avail_in; 00104 s->avail_out -= s->avail_in; 00105 s->next_in += s->avail_in; 00106 s->avail_in = 0; 00107 return true; 00108 } else { 00109 ::memcpy(s->next_out, s->next_in, s->avail_out); 00110 s->output_count += s->avail_out; 00111 s->avail_in -= s->avail_out; 00112 s->next_in += s->avail_out; 00113 s->avail_out = 0; 00114 return false; 00115 } 00116 } 00117 00118 static void NULLCOMP_end(NULLCOMP_stream* strm) { 00119 } 00120 00121 namespace { 00122 00123 /// This structure is only used when a bytecode file is compressed. 00124 /// As bytecode is being decompressed, the memory buffer might need 00125 /// to be reallocated. The buffer allocation is handled in a callback 00126 /// and this structure is needed to retain information across calls 00127 /// to the callback. 00128 /// @brief An internal buffer object used for handling decompression 00129 struct BufferContext { 00130 char* buff; 00131 size_t size; 00132 BufferContext(size_t compressedSize) { 00133 // Null to indicate malloc of a new block 00134 buff = 0; 00135 00136 // Compute the initial length of the uncompression buffer. Note that this 00137 // is twice the length of the compressed buffer and will be doubled again 00138 // in the callback for an initial allocation of 4x compressedSize. This 00139 // calculation is based on the typical compression ratio of bzip2 on LLVM 00140 // bytecode files which typically ranges in the 50%-75% range. Since we 00141 // typically get at least 50%, doubling is insufficient. By using a 4x 00142 // multiplier on the first allocation, we minimize the impact of having to 00143 // copy the buffer on reallocation. 00144 size = compressedSize*2; 00145 } 00146 00147 /// trimTo - Reduce the size of the buffer down to the specified amount. This 00148 /// is useful after have read in the bytecode file to discard extra unused 00149 /// memory. 00150 /// 00151 void trimTo(size_t NewSize) { 00152 buff = (char*)::realloc(buff, NewSize); 00153 size = NewSize; 00154 } 00155 00156 /// This function handles allocation of the buffer used for decompression of 00157 /// compressed bytecode files. It is called by Compressor::decompress which is 00158 /// called by BytecodeReader::ParseBytecode. 00159 static size_t callback(char*&buff, size_t &sz, void* ctxt){ 00160 // Case the context variable to our BufferContext 00161 BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt); 00162 00163 // Compute the new, doubled, size of the block 00164 size_t new_size = bc->size * 2; 00165 00166 // Extend or allocate the block (realloc(0,n) == malloc(n)) 00167 char* new_buff = (char*) ::realloc(bc->buff, new_size); 00168 00169 // Figure out what to return to the Compressor. If this is the first call, 00170 // then bc->buff will be null. In this case we want to return the entire 00171 // buffer because there was no previous allocation. Otherwise, when the 00172 // buffer is reallocated, we save the new base pointer in the 00173 // BufferContext.buff field but return the address of only the extension, 00174 // mid-way through the buffer (since its size was doubled). Furthermore, 00175 // the sz result must be 1/2 the total size of the buffer. 00176 if (bc->buff == 0 ) { 00177 buff = bc->buff = new_buff; 00178 sz = new_size; 00179 } else { 00180 bc->buff = new_buff; 00181 buff = new_buff + bc->size; 00182 sz = bc->size; 00183 } 00184 00185 // Retain the size of the allocated block 00186 bc->size = new_size; 00187 00188 // Make sure we fail (return 1) if we didn't get any memory. 00189 return (bc->buff == 0 ? 1 : 0); 00190 } 00191 }; 00192 00193 } // end anonymous namespace 00194 00195 00196 namespace { 00197 00198 // This structure retains the context when compressing the bytecode file. The 00199 // WriteCompressedData function below uses it to keep track of the previously 00200 // filled chunk of memory (which it writes) and how many bytes have been 00201 // written. 00202 struct WriterContext { 00203 // Initialize the context 00204 WriterContext(std::ostream*OS, size_t CS) 00205 : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {} 00206 00207 // Make sure we clean up memory 00208 ~WriterContext() { 00209 if (chunk) 00210 delete [] chunk; 00211 } 00212 00213 // Write the chunk 00214 void write(size_t size = 0) { 00215 size_t write_size = (size == 0 ? sz : size); 00216 Out->write(chunk,write_size); 00217 written += write_size; 00218 delete [] chunk; 00219 chunk = 0; 00220 sz = 0; 00221 } 00222 00223 // This function is a callback used by the Compressor::compress function to 00224 // allocate memory for the compression buffer. This function fulfills that 00225 // responsibility but also writes the previous (now filled) buffer out to the 00226 // stream. 00227 static size_t callback(char*& buffer, size_t &size, void* context) { 00228 // Cast the context to the structure it must point to. 00229 WriterContext* ctxt = reinterpret_cast<WriterContext*>(context); 00230 00231 // If there's a previously allocated chunk, it must now be filled with 00232 // compressed data, so we write it out and deallocate it. 00233 if (ctxt->chunk != 0 && ctxt->sz > 0 ) { 00234 ctxt->write(); 00235 } 00236 00237 // Compute the size of the next chunk to allocate. We attempt to allocate 00238 // enough memory to handle the compression in a single memory allocation. In 00239 // general, the worst we do on compression of bytecode is about 50% so we 00240 // conservatively estimate compSize / 2 as the size needed for the 00241 // compression buffer. compSize is the size of the compressed data, provided 00242 // by WriteBytecodeToFile. 00243 size = ctxt->sz = ctxt->compSize / 2; 00244 00245 // Allocate the chunks 00246 buffer = ctxt->chunk = new char [size]; 00247 00248 // We must return 1 if the allocation failed so that the Compressor knows 00249 // not to use the buffer pointer. 00250 return (ctxt->chunk == 0 ? 1 : 0); 00251 } 00252 00253 char* chunk; // pointer to the chunk of memory filled by compression 00254 size_t sz; // size of chunk 00255 size_t written; // aggregate total of bytes written in all chunks 00256 size_t compSize; // size of the uncompressed buffer 00257 std::ostream* Out; // The stream we write the data to. 00258 }; 00259 00260 } // end anonymous namespace 00261 00262 // Compress in one of three ways 00263 size_t Compressor::compress(const char* in, size_t size, 00264 OutputDataCallback* cb, void* context, 00265 std::string* error ) { 00266 assert(in && "Can't compress null buffer"); 00267 assert(size && "Can't compress empty buffer"); 00268 assert(cb && "Can't compress without a callback function"); 00269 00270 size_t result = 0; 00271 00272 // For small files, we just don't bother compressing. bzip2 isn't very good 00273 // with tiny files and can actually make the file larger, so we just avoid 00274 // it altogether. 00275 if (size > 64*1024) { 00276 // Set up the bz_stream 00277 bz_stream bzdata; 00278 bzdata.bzalloc = 0; 00279 bzdata.bzfree = 0; 00280 bzdata.opaque = 0; 00281 bzdata.next_in = (char*)in; 00282 bzdata.avail_in = size; 00283 bzdata.next_out = 0; 00284 bzdata.avail_out = 0; 00285 switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) { 00286 case BZ_CONFIG_ERROR: 00287 if (error) 00288 *error = "bzip2 library mis-compiled"; 00289 return result; 00290 case BZ_PARAM_ERROR: 00291 if (error) 00292 *error = "Compressor internal error"; 00293 return result; 00294 case BZ_MEM_ERROR: 00295 if (error) 00296 *error = "Out of memory"; 00297 return result; 00298 case BZ_OK: 00299 default: 00300 break; 00301 } 00302 00303 // Get a block of memory 00304 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { 00305 BZ2_bzCompressEnd(&bzdata); 00306 if (error) 00307 *error = "Can't allocate output buffer"; 00308 return result; 00309 } 00310 00311 // Put compression code in first byte 00312 (*bzdata.next_out++) = COMP_TYPE_BZIP2; 00313 bzdata.avail_out--; 00314 00315 // Compress it 00316 int bzerr = BZ_FINISH_OK; 00317 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) { 00318 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { 00319 BZ2_bzCompressEnd(&bzdata); 00320 if (error) 00321 *error = "Can't allocate output buffer"; 00322 return result; 00323 } 00324 } 00325 switch (bzerr) { 00326 case BZ_SEQUENCE_ERROR: 00327 case BZ_PARAM_ERROR: 00328 if (error) 00329 *error = "Param/Sequence error"; 00330 return result; 00331 case BZ_FINISH_OK: 00332 case BZ_STREAM_END: break; 00333 default: 00334 if (error) 00335 *error = "BZip2 Error: " + utostr(unsigned(bzerr)); 00336 return result; 00337 } 00338 00339 // Finish 00340 result = bzdata.total_out_lo32 + 1; 00341 if (sizeof(size_t) == sizeof(uint64_t)) 00342 result |= static_cast<uint64_t>(bzdata.total_out_hi32) << 32; 00343 00344 BZ2_bzCompressEnd(&bzdata); 00345 } else { 00346 // Do null compression, for small files 00347 NULLCOMP_stream sdata; 00348 sdata.next_in = (char*)in; 00349 sdata.avail_in = size; 00350 NULLCOMP_init(&sdata); 00351 00352 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00353 if (error) 00354 *error = "Can't allocate output buffer"; 00355 return result; 00356 } 00357 00358 *(sdata.next_out++) = COMP_TYPE_NONE; 00359 sdata.avail_out--; 00360 00361 while (!NULLCOMP_compress(&sdata)) { 00362 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00363 if (error) 00364 *error = "Can't allocate output buffer"; 00365 return result; 00366 } 00367 } 00368 00369 result = sdata.output_count + 1; 00370 NULLCOMP_end(&sdata); 00371 } 00372 return result; 00373 } 00374 00375 size_t Compressor::compressToNewBuffer(const char* in, size_t size, char*&out, 00376 std::string* error) { 00377 BufferContext bc(size); 00378 size_t result = compress(in,size,BufferContext::callback,(void*)&bc,error); 00379 bc.trimTo(result); 00380 out = bc.buff; 00381 return result; 00382 } 00383 00384 size_t 00385 Compressor::compressToStream(const char*in, size_t size, std::ostream& out, 00386 std::string* error) { 00387 // Set up the context and writer 00388 WriterContext ctxt(&out, size / 2); 00389 00390 // Compress everything after the magic number (which we'll alter). 00391 size_t zipSize = Compressor::compress(in,size, 00392 WriterContext::callback, (void*)&ctxt,error); 00393 00394 if (zipSize && ctxt.chunk) { 00395 ctxt.write(zipSize - ctxt.written); 00396 } 00397 return zipSize; 00398 } 00399 00400 // Decompress in one of three ways 00401 size_t Compressor::decompress(const char *in, size_t size, 00402 OutputDataCallback* cb, void* context, 00403 std::string* error) { 00404 assert(in && "Can't decompress null buffer"); 00405 assert(size > 1 && "Can't decompress empty buffer"); 00406 assert(cb && "Can't decompress without a callback function"); 00407 00408 size_t result = 0; 00409 00410 switch (*in++) { 00411 case COMP_TYPE_BZIP2: { 00412 // Set up the bz_stream 00413 bz_stream bzdata; 00414 bzdata.bzalloc = 0; 00415 bzdata.bzfree = 0; 00416 bzdata.opaque = 0; 00417 bzdata.next_in = (char*)in; 00418 bzdata.avail_in = size - 1; 00419 bzdata.next_out = 0; 00420 bzdata.avail_out = 0; 00421 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) { 00422 case BZ_CONFIG_ERROR: 00423 if (error) 00424 *error = "bzip2 library mis-compiled"; 00425 return result; 00426 case BZ_PARAM_ERROR: 00427 if (error) 00428 *error = "Compressor internal error"; 00429 return result; 00430 case BZ_MEM_ERROR: 00431 if (error) 00432 *error = "Out of memory"; 00433 return result; 00434 case BZ_OK: 00435 default: 00436 break; 00437 } 00438 00439 // Get a block of memory 00440 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { 00441 BZ2_bzDecompressEnd(&bzdata); 00442 if (error) 00443 *error = "Can't allocate output buffer"; 00444 return result; 00445 } 00446 00447 // Decompress it 00448 int bzerr = BZ_OK; 00449 while ( BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata)) && 00450 bzdata.avail_in != 0 ) { 00451 if (0 != getdata_uns(bzdata.next_out, bzdata.avail_out,cb,context)) { 00452 BZ2_bzDecompressEnd(&bzdata); 00453 if (error) 00454 *error = "Can't allocate output buffer"; 00455 return result; 00456 } 00457 } 00458 00459 switch (bzerr) { 00460 BZ2_bzDecompressEnd(&bzdata); 00461 case BZ_PARAM_ERROR: 00462 if (error) 00463 *error = "Compressor internal error"; 00464 return result; 00465 case BZ_MEM_ERROR: 00466 BZ2_bzDecompressEnd(&bzdata); 00467 if (error) 00468 *error = "Out of memory"; 00469 return result; 00470 case BZ_DATA_ERROR: 00471 BZ2_bzDecompressEnd(&bzdata); 00472 if (error) 00473 *error = "Data integrity error"; 00474 return result; 00475 case BZ_DATA_ERROR_MAGIC: 00476 BZ2_bzDecompressEnd(&bzdata); 00477 if (error) 00478 *error = "Data is not BZIP2"; 00479 return result; 00480 case BZ_OK: 00481 BZ2_bzDecompressEnd(&bzdata); 00482 if (error) 00483 *error = "Insufficient input for bzip2"; 00484 return result; 00485 case BZ_STREAM_END: break; 00486 default: 00487 BZ2_bzDecompressEnd(&bzdata); 00488 if (error) 00489 *error = "Unknown result code from bzDecompress"; 00490 return result; 00491 } 00492 00493 // Finish 00494 result = bzdata.total_out_lo32; 00495 if (sizeof(size_t) == sizeof(uint64_t)) 00496 result |= (static_cast<uint64_t>(bzdata.total_out_hi32) << 32); 00497 BZ2_bzDecompressEnd(&bzdata); 00498 break; 00499 } 00500 00501 case COMP_TYPE_NONE: { 00502 NULLCOMP_stream sdata; 00503 sdata.next_in = (char*)in; 00504 sdata.avail_in = size - 1; 00505 NULLCOMP_init(&sdata); 00506 00507 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00508 if (error) 00509 *error = "Can't allocate output buffer"; 00510 return result; 00511 } 00512 00513 while (!NULLCOMP_decompress(&sdata)) { 00514 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00515 if (error) 00516 *error = "Can't allocate output buffer"; 00517 return result; 00518 } 00519 } 00520 00521 result = sdata.output_count; 00522 NULLCOMP_end(&sdata); 00523 break; 00524 } 00525 00526 default: 00527 if (error) 00528 *error = "Unknown type of compressed data"; 00529 return result; 00530 } 00531 00532 return result; 00533 } 00534 00535 size_t 00536 Compressor::decompressToNewBuffer(const char* in, size_t size, char*&out, 00537 std::string* error) { 00538 BufferContext bc(size); 00539 size_t result = decompress(in,size,BufferContext::callback,(void*)&bc,error); 00540 out = bc.buff; 00541 return result; 00542 } 00543 00544 size_t 00545 Compressor::decompressToStream(const char*in, size_t size, std::ostream& out, 00546 std::string* error) { 00547 // Set up the context and writer 00548 WriterContext ctxt(&out,size / 2); 00549 00550 // Decompress everything after the magic number (which we'll alter) 00551 size_t zipSize = Compressor::decompress(in,size, 00552 WriterContext::callback, (void*)&ctxt,error); 00553 00554 if (zipSize && ctxt.chunk) { 00555 ctxt.write(zipSize - ctxt.written); 00556 } 00557 return zipSize; 00558 }