LLVM API Documentation
00001 //===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Reid Spencer and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the llvm::Compressor class, an abstraction for memory 00011 // block compression. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Config/config.h" 00016 #include "llvm/Support/Compressor.h" 00017 #include "llvm/ADT/StringExtras.h" 00018 #include <cassert> 00019 #include <string> 00020 #include "bzip2/bzlib.h" 00021 00022 namespace { 00023 00024 enum CompressionTypes { 00025 COMP_TYPE_NONE = '0', 00026 COMP_TYPE_BZIP2 = '2', 00027 }; 00028 00029 inline int getdata(char*& buffer, unsigned& size, 00030 llvm::Compressor::OutputDataCallback* cb, void* context) { 00031 buffer = 0; 00032 size = 0; 00033 int result = (*cb)(buffer, size, context); 00034 assert(buffer != 0 && "Invalid result from Compressor callback"); 00035 assert(size != 0 && "Invalid result from Compressor callback"); 00036 return result; 00037 } 00038 00039 //===----------------------------------------------------------------------===// 00040 //=== NULLCOMP - a compression like set of routines that just copies data 00041 //=== without doing any compression. This is provided so that if the 00042 //=== configured environment doesn't have a compression library the 00043 //=== program can still work, albeit using more data/memory. 00044 //===----------------------------------------------------------------------===// 00045 00046 struct NULLCOMP_stream { 00047 // User provided fields 00048 char* next_in; 00049 unsigned avail_in; 00050 char* next_out; 00051 unsigned avail_out; 00052 00053 // Information fields 00054 uint64_t output_count; // Total count of output bytes 00055 }; 00056 00057 void NULLCOMP_init(NULLCOMP_stream* s) { 00058 s->output_count = 0; 00059 } 00060 00061 bool NULLCOMP_compress(NULLCOMP_stream* s) { 00062 assert(s && "Invalid NULLCOMP_stream"); 00063 assert(s->next_in != 0); 00064 assert(s->next_out != 0); 00065 assert(s->avail_in >= 1); 00066 assert(s->avail_out >= 1); 00067 00068 if (s->avail_out >= s->avail_in) { 00069 ::memcpy(s->next_out, s->next_in, s->avail_in); 00070 s->output_count += s->avail_in; 00071 s->avail_out -= s->avail_in; 00072 s->next_in += s->avail_in; 00073 s->avail_in = 0; 00074 return true; 00075 } else { 00076 ::memcpy(s->next_out, s->next_in, s->avail_out); 00077 s->output_count += s->avail_out; 00078 s->avail_in -= s->avail_out; 00079 s->next_in += s->avail_out; 00080 s->avail_out = 0; 00081 return false; 00082 } 00083 } 00084 00085 bool NULLCOMP_decompress(NULLCOMP_stream* s) { 00086 assert(s && "Invalid NULLCOMP_stream"); 00087 assert(s->next_in != 0); 00088 assert(s->next_out != 0); 00089 assert(s->avail_in >= 1); 00090 assert(s->avail_out >= 1); 00091 00092 if (s->avail_out >= s->avail_in) { 00093 ::memcpy(s->next_out, s->next_in, s->avail_in); 00094 s->output_count += s->avail_in; 00095 s->avail_out -= s->avail_in; 00096 s->next_in += s->avail_in; 00097 s->avail_in = 0; 00098 return true; 00099 } else { 00100 ::memcpy(s->next_out, s->next_in, s->avail_out); 00101 s->output_count += s->avail_out; 00102 s->avail_in -= s->avail_out; 00103 s->next_in += s->avail_out; 00104 s->avail_out = 0; 00105 return false; 00106 } 00107 } 00108 00109 void NULLCOMP_end(NULLCOMP_stream* strm) { 00110 } 00111 00112 /// This structure is only used when a bytecode file is compressed. 00113 /// As bytecode is being decompressed, the memory buffer might need 00114 /// to be reallocated. The buffer allocation is handled in a callback 00115 /// and this structure is needed to retain information across calls 00116 /// to the callback. 00117 /// @brief An internal buffer object used for handling decompression 00118 struct BufferContext { 00119 char* buff; 00120 unsigned size; 00121 BufferContext(unsigned compressedSize ) { 00122 // Null to indicate malloc of a new block 00123 buff = 0; 00124 00125 // Compute the initial length of the uncompression buffer. Note that this 00126 // is twice the length of the compressed buffer and will be doubled again 00127 // in the callback for an initial allocation of 4x compressedSize. This 00128 // calculation is based on the typical compression ratio of bzip2 on LLVM 00129 // bytecode files which typically ranges in the 50%-75% range. Since we 00130 // tyipcally get at least 50%, doubling is insufficient. By using a 4x 00131 // multiplier on the first allocation, we minimize the impact of having to 00132 // copy the buffer on reallocation. 00133 size = compressedSize*2; 00134 } 00135 00136 /// This function handles allocation of the buffer used for decompression of 00137 /// compressed bytecode files. It is called by Compressor::decompress which is 00138 /// called by BytecodeReader::ParseBytecode. 00139 static unsigned callback(char*&buff, unsigned& sz, void* ctxt){ 00140 // Case the context variable to our BufferContext 00141 BufferContext* bc = reinterpret_cast<BufferContext*>(ctxt); 00142 00143 // Compute the new, doubled, size of the block 00144 unsigned new_size = bc->size * 2; 00145 00146 // Extend or allocate the block (realloc(0,n) == malloc(n)) 00147 char* new_buff = (char*) ::realloc(bc->buff, new_size); 00148 00149 // Figure out what to return to the Compressor. If this is the first call, 00150 // then bc->buff will be null. In this case we want to return the entire 00151 // buffer because there was no previous allocation. Otherwise, when the 00152 // buffer is reallocated, we save the new base pointer in the 00153 // BufferContext.buff field but return the address of only the extension, 00154 // mid-way through the buffer (since its size was doubled). Furthermore, 00155 // the sz result must be 1/2 the total size of the buffer. 00156 if (bc->buff == 0 ) { 00157 buff = bc->buff = new_buff; 00158 sz = new_size; 00159 } else { 00160 bc->buff = new_buff; 00161 buff = new_buff + bc->size; 00162 sz = bc->size; 00163 } 00164 00165 // Retain the size of the allocated block 00166 bc->size = new_size; 00167 00168 // Make sure we fail (return 1) if we didn't get any memory. 00169 return (bc->buff == 0 ? 1 : 0); 00170 } 00171 }; 00172 00173 // This structure retains the context when compressing the bytecode file. The 00174 // WriteCompressedData function below uses it to keep track of the previously 00175 // filled chunk of memory (which it writes) and how many bytes have been 00176 // written. 00177 struct WriterContext { 00178 // Initialize the context 00179 WriterContext(std::ostream*OS, unsigned CS) 00180 : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {} 00181 00182 // Make sure we clean up memory 00183 ~WriterContext() { 00184 if (chunk) 00185 delete [] chunk; 00186 } 00187 00188 // Write the chunk 00189 void write(unsigned size = 0) { 00190 unsigned write_size = (size == 0 ? sz : size); 00191 Out->write(chunk,write_size); 00192 written += write_size; 00193 delete [] chunk; 00194 chunk = 0; 00195 sz = 0; 00196 } 00197 00198 // This function is a callback used by the Compressor::compress function to 00199 // allocate memory for the compression buffer. This function fulfills that 00200 // responsibility but also writes the previous (now filled) buffer out to the 00201 // stream. 00202 static unsigned callback(char*& buffer, unsigned& size, void* context) { 00203 // Cast the context to the structure it must point to. 00204 WriterContext* ctxt = 00205 reinterpret_cast<WriterContext*>(context); 00206 00207 // If there's a previously allocated chunk, it must now be filled with 00208 // compressed data, so we write it out and deallocate it. 00209 if (ctxt->chunk != 0 && ctxt->sz > 0 ) { 00210 ctxt->write(); 00211 } 00212 00213 // Compute the size of the next chunk to allocate. We attempt to allocate 00214 // enough memory to handle the compression in a single memory allocation. In 00215 // general, the worst we do on compression of bytecode is about 50% so we 00216 // conservatively estimate compSize / 2 as the size needed for the 00217 // compression buffer. compSize is the size of the compressed data, provided 00218 // by WriteBytecodeToFile. 00219 size = ctxt->sz = ctxt->compSize / 2; 00220 00221 // Allocate the chunks 00222 buffer = ctxt->chunk = new char [size]; 00223 00224 // We must return 1 if the allocation failed so that the Compressor knows 00225 // not to use the buffer pointer. 00226 return (ctxt->chunk == 0 ? 1 : 0); 00227 } 00228 00229 char* chunk; // pointer to the chunk of memory filled by compression 00230 unsigned sz; // size of chunk 00231 unsigned written; // aggregate total of bytes written in all chunks 00232 unsigned compSize; // size of the uncompressed buffer 00233 std::ostream* Out; // The stream we write the data to. 00234 }; 00235 00236 } 00237 00238 namespace llvm { 00239 00240 // Compress in one of three ways 00241 uint64_t Compressor::compress(const char* in, unsigned size, 00242 OutputDataCallback* cb, void* context ) { 00243 assert(in && "Can't compress null buffer"); 00244 assert(size && "Can't compress empty buffer"); 00245 assert(cb && "Can't compress without a callback function"); 00246 00247 uint64_t result = 0; 00248 00249 // For small files, we just don't bother compressing. bzip2 isn't very good 00250 // with tiny files and can actually make the file larger, so we just avoid 00251 // it altogether. 00252 if (size > 64*1024) { 00253 // Set up the bz_stream 00254 bz_stream bzdata; 00255 bzdata.bzalloc = 0; 00256 bzdata.bzfree = 0; 00257 bzdata.opaque = 0; 00258 bzdata.next_in = (char*)in; 00259 bzdata.avail_in = size; 00260 bzdata.next_out = 0; 00261 bzdata.avail_out = 0; 00262 switch ( BZ2_bzCompressInit(&bzdata, 5, 0, 100) ) { 00263 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled"); 00264 case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); 00265 case BZ_MEM_ERROR: throw std::string("Out of memory"); 00266 case BZ_OK: 00267 default: 00268 break; 00269 } 00270 00271 // Get a block of memory 00272 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) { 00273 BZ2_bzCompressEnd(&bzdata); 00274 throw std::string("Can't allocate output buffer"); 00275 } 00276 00277 // Put compression code in first byte 00278 (*bzdata.next_out++) = COMP_TYPE_BZIP2; 00279 bzdata.avail_out--; 00280 00281 // Compress it 00282 int bzerr = BZ_FINISH_OK; 00283 while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) { 00284 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) { 00285 BZ2_bzCompressEnd(&bzdata); 00286 throw std::string("Can't allocate output buffer"); 00287 } 00288 } 00289 switch (bzerr) { 00290 case BZ_SEQUENCE_ERROR: 00291 case BZ_PARAM_ERROR: throw std::string("Param/Sequence error"); 00292 case BZ_FINISH_OK: 00293 case BZ_STREAM_END: break; 00294 default: throw std::string("Oops: ") + utostr(unsigned(bzerr)); 00295 } 00296 00297 // Finish 00298 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) | 00299 bzdata.total_out_lo32 + 1; 00300 00301 BZ2_bzCompressEnd(&bzdata); 00302 } else { 00303 // Do null compression, for small files 00304 NULLCOMP_stream sdata; 00305 sdata.next_in = (char*)in; 00306 sdata.avail_in = size; 00307 NULLCOMP_init(&sdata); 00308 00309 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00310 throw std::string("Can't allocate output buffer"); 00311 } 00312 00313 *(sdata.next_out++) = COMP_TYPE_NONE; 00314 sdata.avail_out--; 00315 00316 while (!NULLCOMP_compress(&sdata)) { 00317 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00318 throw std::string("Can't allocate output buffer"); 00319 } 00320 } 00321 00322 result = sdata.output_count + 1; 00323 NULLCOMP_end(&sdata); 00324 } 00325 return result; 00326 } 00327 00328 uint64_t 00329 Compressor::compressToNewBuffer(const char* in, unsigned size, char*&out) { 00330 BufferContext bc(size); 00331 unsigned result = compress(in,size,BufferContext::callback,(void*)&bc); 00332 out = bc.buff; 00333 return result; 00334 } 00335 00336 uint64_t 00337 Compressor::compressToStream(const char*in, unsigned size, std::ostream& out) { 00338 // Set up the context and writer 00339 WriterContext ctxt(&out,size / 2); 00340 00341 // Compress everything after the magic number (which we'll alter) 00342 uint64_t zipSize = Compressor::compress(in,size, 00343 WriterContext::callback, (void*)&ctxt); 00344 00345 if (ctxt.chunk) { 00346 ctxt.write(zipSize - ctxt.written); 00347 } 00348 return zipSize; 00349 } 00350 00351 // Decompress in one of three ways 00352 uint64_t Compressor::decompress(const char *in, unsigned size, 00353 OutputDataCallback* cb, void* context) { 00354 assert(in && "Can't decompress null buffer"); 00355 assert(size > 1 && "Can't decompress empty buffer"); 00356 assert(cb && "Can't decompress without a callback function"); 00357 00358 uint64_t result = 0; 00359 00360 switch (*in++) { 00361 case COMP_TYPE_BZIP2: { 00362 // Set up the bz_stream 00363 bz_stream bzdata; 00364 bzdata.bzalloc = 0; 00365 bzdata.bzfree = 0; 00366 bzdata.opaque = 0; 00367 bzdata.next_in = (char*)in; 00368 bzdata.avail_in = size - 1; 00369 bzdata.next_out = 0; 00370 bzdata.avail_out = 0; 00371 switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) { 00372 case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled"); 00373 case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); 00374 case BZ_MEM_ERROR: throw std::string("Out of memory"); 00375 case BZ_OK: 00376 default: 00377 break; 00378 } 00379 00380 // Get a block of memory 00381 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) { 00382 BZ2_bzDecompressEnd(&bzdata); 00383 throw std::string("Can't allocate output buffer"); 00384 } 00385 00386 // Decompress it 00387 int bzerr = BZ_OK; 00388 while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) { 00389 if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb,context)) { 00390 BZ2_bzDecompressEnd(&bzdata); 00391 throw std::string("Can't allocate output buffer"); 00392 } 00393 } 00394 00395 switch (bzerr) { 00396 case BZ_PARAM_ERROR: throw std::string("Compressor internal error"); 00397 case BZ_MEM_ERROR: throw std::string("Out of memory"); 00398 case BZ_DATA_ERROR: throw std::string("Data integrity error"); 00399 case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2"); 00400 default: throw("Ooops"); 00401 case BZ_STREAM_END: 00402 break; 00403 } 00404 00405 // Finish 00406 result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) | 00407 bzdata.total_out_lo32; 00408 BZ2_bzDecompressEnd(&bzdata); 00409 break; 00410 } 00411 00412 case COMP_TYPE_NONE: { 00413 NULLCOMP_stream sdata; 00414 sdata.next_in = (char*)in; 00415 sdata.avail_in = size - 1; 00416 NULLCOMP_init(&sdata); 00417 00418 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00419 throw std::string("Can't allocate output buffer"); 00420 } 00421 00422 while (!NULLCOMP_decompress(&sdata)) { 00423 if (0 != getdata(sdata.next_out, sdata.avail_out,cb,context)) { 00424 throw std::string("Can't allocate output buffer"); 00425 } 00426 } 00427 00428 result = sdata.output_count; 00429 NULLCOMP_end(&sdata); 00430 break; 00431 } 00432 00433 default: 00434 throw std::string("Unknown type of compressed data"); 00435 } 00436 00437 return result; 00438 } 00439 00440 uint64_t 00441 Compressor::decompressToNewBuffer(const char* in, unsigned size, char*&out) { 00442 BufferContext bc(size); 00443 unsigned result = decompress(in,size,BufferContext::callback,(void*)&bc); 00444 out = bc.buff; 00445 return result; 00446 } 00447 00448 uint64_t 00449 Compressor::decompressToStream(const char*in, unsigned size, std::ostream& out){ 00450 // Set up the context and writer 00451 WriterContext ctxt(&out,size / 2); 00452 00453 // Compress everything after the magic number (which we'll alter) 00454 uint64_t zipSize = Compressor::decompress(in,size, 00455 WriterContext::callback, (void*)&ctxt); 00456 00457 if (ctxt.chunk) { 00458 ctxt.write(zipSize - ctxt.written); 00459 } 00460 return zipSize; 00461 } 00462 00463 } 00464 00465 // vim: sw=2 ai