LLVM API Documentation
00001 //===-- Writer.cpp - Library for writing LLVM bytecode files --------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This library implements the functionality defined in llvm/Bytecode/Writer.h 00011 // 00012 // Note that this file uses an unusual technique of outputting all the bytecode 00013 // to a vector of unsigned char, then copies the vector to an ostream. The 00014 // reason for this is that we must do "seeking" in the stream to do back- 00015 // patching, and some very important ostreams that we want to support (like 00016 // pipes) do not support seeking. :( :( :( 00017 // 00018 //===----------------------------------------------------------------------===// 00019 00020 #include "WriterInternals.h" 00021 #include "llvm/Bytecode/WriteBytecodePass.h" 00022 #include "llvm/CallingConv.h" 00023 #include "llvm/Constants.h" 00024 #include "llvm/DerivedTypes.h" 00025 #include "llvm/InlineAsm.h" 00026 #include "llvm/Instructions.h" 00027 #include "llvm/Module.h" 00028 #include "llvm/SymbolTable.h" 00029 #include "llvm/Support/GetElementPtrTypeIterator.h" 00030 #include "llvm/Support/Compressor.h" 00031 #include "llvm/Support/MathExtras.h" 00032 #include "llvm/ADT/STLExtras.h" 00033 #include "llvm/ADT/Statistic.h" 00034 #include <cstring> 00035 #include <algorithm> 00036 using namespace llvm; 00037 00038 /// This value needs to be incremented every time the bytecode format changes 00039 /// so that the reader can distinguish which format of the bytecode file has 00040 /// been written. 00041 /// @brief The bytecode version number 00042 const unsigned BCVersionNum = 5; 00043 00044 static RegisterPass<WriteBytecodePass> X("emitbytecode", "Bytecode Writer"); 00045 00046 static Statistic<> 00047 BytesWritten("bytecodewriter", "Number of bytecode bytes written"); 00048 00049 //===----------------------------------------------------------------------===// 00050 //=== Output Primitives ===// 00051 //===----------------------------------------------------------------------===// 00052 00053 // output - If a position is specified, it must be in the valid portion of the 00054 // string... note that this should be inlined always so only the relevant IF 00055 // body should be included. 00056 inline void BytecodeWriter::output(unsigned i, int pos) { 00057 if (pos == -1) { // Be endian clean, little endian is our friend 00058 Out.push_back((unsigned char)i); 00059 Out.push_back((unsigned char)(i >> 8)); 00060 Out.push_back((unsigned char)(i >> 16)); 00061 Out.push_back((unsigned char)(i >> 24)); 00062 } else { 00063 Out[pos ] = (unsigned char)i; 00064 Out[pos+1] = (unsigned char)(i >> 8); 00065 Out[pos+2] = (unsigned char)(i >> 16); 00066 Out[pos+3] = (unsigned char)(i >> 24); 00067 } 00068 } 00069 00070 inline void BytecodeWriter::output(int i) { 00071 output((unsigned)i); 00072 } 00073 00074 /// output_vbr - Output an unsigned value, by using the least number of bytes 00075 /// possible. This is useful because many of our "infinite" values are really 00076 /// very small most of the time; but can be large a few times. 00077 /// Data format used: If you read a byte with the high bit set, use the low 00078 /// seven bits as data and then read another byte. 00079 inline void BytecodeWriter::output_vbr(uint64_t i) { 00080 while (1) { 00081 if (i < 0x80) { // done? 00082 Out.push_back((unsigned char)i); // We know the high bit is clear... 00083 return; 00084 } 00085 00086 // Nope, we are bigger than a character, output the next 7 bits and set the 00087 // high bit to say that there is more coming... 00088 Out.push_back(0x80 | ((unsigned char)i & 0x7F)); 00089 i >>= 7; // Shift out 7 bits now... 00090 } 00091 } 00092 00093 inline void BytecodeWriter::output_vbr(unsigned i) { 00094 while (1) { 00095 if (i < 0x80) { // done? 00096 Out.push_back((unsigned char)i); // We know the high bit is clear... 00097 return; 00098 } 00099 00100 // Nope, we are bigger than a character, output the next 7 bits and set the 00101 // high bit to say that there is more coming... 00102 Out.push_back(0x80 | ((unsigned char)i & 0x7F)); 00103 i >>= 7; // Shift out 7 bits now... 00104 } 00105 } 00106 00107 inline void BytecodeWriter::output_typeid(unsigned i) { 00108 if (i <= 0x00FFFFFF) 00109 this->output_vbr(i); 00110 else { 00111 this->output_vbr(0x00FFFFFF); 00112 this->output_vbr(i); 00113 } 00114 } 00115 00116 inline void BytecodeWriter::output_vbr(int64_t i) { 00117 if (i < 0) 00118 output_vbr(((uint64_t)(-i) << 1) | 1); // Set low order sign bit... 00119 else 00120 output_vbr((uint64_t)i << 1); // Low order bit is clear. 00121 } 00122 00123 00124 inline void BytecodeWriter::output_vbr(int i) { 00125 if (i < 0) 00126 output_vbr(((unsigned)(-i) << 1) | 1); // Set low order sign bit... 00127 else 00128 output_vbr((unsigned)i << 1); // Low order bit is clear. 00129 } 00130 00131 inline void BytecodeWriter::output(const std::string &s) { 00132 unsigned Len = s.length(); 00133 output_vbr(Len ); // Strings may have an arbitrary length... 00134 Out.insert(Out.end(), s.begin(), s.end()); 00135 } 00136 00137 inline void BytecodeWriter::output_data(const void *Ptr, const void *End) { 00138 Out.insert(Out.end(), (const unsigned char*)Ptr, (const unsigned char*)End); 00139 } 00140 00141 inline void BytecodeWriter::output_float(float& FloatVal) { 00142 /// FIXME: This isn't optimal, it has size problems on some platforms 00143 /// where FP is not IEEE. 00144 uint32_t i = FloatToBits(FloatVal); 00145 Out.push_back( static_cast<unsigned char>( (i & 0xFF ))); 00146 Out.push_back( static_cast<unsigned char>( (i >> 8) & 0xFF)); 00147 Out.push_back( static_cast<unsigned char>( (i >> 16) & 0xFF)); 00148 Out.push_back( static_cast<unsigned char>( (i >> 24) & 0xFF)); 00149 } 00150 00151 inline void BytecodeWriter::output_double(double& DoubleVal) { 00152 /// FIXME: This isn't optimal, it has size problems on some platforms 00153 /// where FP is not IEEE. 00154 uint64_t i = DoubleToBits(DoubleVal); 00155 Out.push_back( static_cast<unsigned char>( (i & 0xFF ))); 00156 Out.push_back( static_cast<unsigned char>( (i >> 8) & 0xFF)); 00157 Out.push_back( static_cast<unsigned char>( (i >> 16) & 0xFF)); 00158 Out.push_back( static_cast<unsigned char>( (i >> 24) & 0xFF)); 00159 Out.push_back( static_cast<unsigned char>( (i >> 32) & 0xFF)); 00160 Out.push_back( static_cast<unsigned char>( (i >> 40) & 0xFF)); 00161 Out.push_back( static_cast<unsigned char>( (i >> 48) & 0xFF)); 00162 Out.push_back( static_cast<unsigned char>( (i >> 56) & 0xFF)); 00163 } 00164 00165 inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter &w, 00166 bool elideIfEmpty, bool hasLongFormat) 00167 : Id(ID), Writer(w), ElideIfEmpty(elideIfEmpty), HasLongFormat(hasLongFormat){ 00168 00169 if (HasLongFormat) { 00170 w.output(ID); 00171 w.output(0U); // For length in long format 00172 } else { 00173 w.output(0U); /// Place holder for ID and length for this block 00174 } 00175 Loc = w.size(); 00176 } 00177 00178 inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block goes out 00179 // of scope... 00180 if (Loc == Writer.size() && ElideIfEmpty) { 00181 // If the block is empty, and we are allowed to, do not emit the block at 00182 // all! 00183 Writer.resize(Writer.size()-(HasLongFormat?8:4)); 00184 return; 00185 } 00186 00187 if (HasLongFormat) 00188 Writer.output(unsigned(Writer.size()-Loc), int(Loc-4)); 00189 else 00190 Writer.output(unsigned(Writer.size()-Loc) << 5 | (Id & 0x1F), int(Loc-4)); 00191 } 00192 00193 //===----------------------------------------------------------------------===// 00194 //=== Constant Output ===// 00195 //===----------------------------------------------------------------------===// 00196 00197 void BytecodeWriter::outputType(const Type *T) { 00198 output_vbr((unsigned)T->getTypeID()); 00199 00200 // That's all there is to handling primitive types... 00201 if (T->isPrimitiveType()) { 00202 return; // We might do this if we alias a prim type: %x = type int 00203 } 00204 00205 switch (T->getTypeID()) { // Handle derived types now. 00206 case Type::FunctionTyID: { 00207 const FunctionType *MT = cast<FunctionType>(T); 00208 int Slot = Table.getSlot(MT->getReturnType()); 00209 assert(Slot != -1 && "Type used but not available!!"); 00210 output_typeid((unsigned)Slot); 00211 00212 // Output the number of arguments to function (+1 if varargs): 00213 output_vbr((unsigned)MT->getNumParams()+MT->isVarArg()); 00214 00215 // Output all of the arguments... 00216 FunctionType::param_iterator I = MT->param_begin(); 00217 for (; I != MT->param_end(); ++I) { 00218 Slot = Table.getSlot(*I); 00219 assert(Slot != -1 && "Type used but not available!!"); 00220 output_typeid((unsigned)Slot); 00221 } 00222 00223 // Terminate list with VoidTy if we are a varargs function... 00224 if (MT->isVarArg()) 00225 output_typeid((unsigned)Type::VoidTyID); 00226 break; 00227 } 00228 00229 case Type::ArrayTyID: { 00230 const ArrayType *AT = cast<ArrayType>(T); 00231 int Slot = Table.getSlot(AT->getElementType()); 00232 assert(Slot != -1 && "Type used but not available!!"); 00233 output_typeid((unsigned)Slot); 00234 output_vbr(AT->getNumElements()); 00235 break; 00236 } 00237 00238 case Type::PackedTyID: { 00239 const PackedType *PT = cast<PackedType>(T); 00240 int Slot = Table.getSlot(PT->getElementType()); 00241 assert(Slot != -1 && "Type used but not available!!"); 00242 output_typeid((unsigned)Slot); 00243 output_vbr(PT->getNumElements()); 00244 break; 00245 } 00246 00247 00248 case Type::StructTyID: { 00249 const StructType *ST = cast<StructType>(T); 00250 00251 // Output all of the element types... 00252 for (StructType::element_iterator I = ST->element_begin(), 00253 E = ST->element_end(); I != E; ++I) { 00254 int Slot = Table.getSlot(*I); 00255 assert(Slot != -1 && "Type used but not available!!"); 00256 output_typeid((unsigned)Slot); 00257 } 00258 00259 // Terminate list with VoidTy 00260 output_typeid((unsigned)Type::VoidTyID); 00261 break; 00262 } 00263 00264 case Type::PointerTyID: { 00265 const PointerType *PT = cast<PointerType>(T); 00266 int Slot = Table.getSlot(PT->getElementType()); 00267 assert(Slot != -1 && "Type used but not available!!"); 00268 output_typeid((unsigned)Slot); 00269 break; 00270 } 00271 00272 case Type::OpaqueTyID: 00273 // No need to emit anything, just the count of opaque types is enough. 00274 break; 00275 00276 default: 00277 std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" 00278 << " Type '" << T->getDescription() << "'\n"; 00279 break; 00280 } 00281 } 00282 00283 void BytecodeWriter::outputConstant(const Constant *CPV) { 00284 assert((CPV->getType()->isPrimitiveType() || !CPV->isNullValue()) && 00285 "Shouldn't output null constants!"); 00286 00287 // We must check for a ConstantExpr before switching by type because 00288 // a ConstantExpr can be of any type, and has no explicit value. 00289 // 00290 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) { 00291 // FIXME: Encoding of constant exprs could be much more compact! 00292 assert(CE->getNumOperands() > 0 && "ConstantExpr with 0 operands"); 00293 assert(CE->getNumOperands() != 1 || CE->getOpcode() == Instruction::Cast); 00294 output_vbr(1+CE->getNumOperands()); // flags as an expr 00295 output_vbr(CE->getOpcode()); // flags as an expr 00296 00297 for (User::const_op_iterator OI = CE->op_begin(); OI != CE->op_end(); ++OI){ 00298 int Slot = Table.getSlot(*OI); 00299 assert(Slot != -1 && "Unknown constant used in ConstantExpr!!"); 00300 output_vbr((unsigned)Slot); 00301 Slot = Table.getSlot((*OI)->getType()); 00302 output_typeid((unsigned)Slot); 00303 } 00304 return; 00305 } else if (isa<UndefValue>(CPV)) { 00306 output_vbr(1U); // 1 -> UndefValue constant. 00307 return; 00308 } else { 00309 output_vbr(0U); // flag as not a ConstantExpr 00310 } 00311 00312 switch (CPV->getType()->getTypeID()) { 00313 case Type::BoolTyID: // Boolean Types 00314 if (cast<ConstantBool>(CPV)->getValue()) 00315 output_vbr(1U); 00316 else 00317 output_vbr(0U); 00318 break; 00319 00320 case Type::UByteTyID: // Unsigned integer types... 00321 case Type::UShortTyID: 00322 case Type::UIntTyID: 00323 case Type::ULongTyID: 00324 output_vbr(cast<ConstantUInt>(CPV)->getValue()); 00325 break; 00326 00327 case Type::SByteTyID: // Signed integer types... 00328 case Type::ShortTyID: 00329 case Type::IntTyID: 00330 case Type::LongTyID: 00331 output_vbr(cast<ConstantSInt>(CPV)->getValue()); 00332 break; 00333 00334 case Type::ArrayTyID: { 00335 const ConstantArray *CPA = cast<ConstantArray>(CPV); 00336 assert(!CPA->isString() && "Constant strings should be handled specially!"); 00337 00338 for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) { 00339 int Slot = Table.getSlot(CPA->getOperand(i)); 00340 assert(Slot != -1 && "Constant used but not available!!"); 00341 output_vbr((unsigned)Slot); 00342 } 00343 break; 00344 } 00345 00346 case Type::PackedTyID: { 00347 const ConstantPacked *CP = cast<ConstantPacked>(CPV); 00348 00349 for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) { 00350 int Slot = Table.getSlot(CP->getOperand(i)); 00351 assert(Slot != -1 && "Constant used but not available!!"); 00352 output_vbr((unsigned)Slot); 00353 } 00354 break; 00355 } 00356 00357 case Type::StructTyID: { 00358 const ConstantStruct *CPS = cast<ConstantStruct>(CPV); 00359 00360 for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) { 00361 int Slot = Table.getSlot(CPS->getOperand(i)); 00362 assert(Slot != -1 && "Constant used but not available!!"); 00363 output_vbr((unsigned)Slot); 00364 } 00365 break; 00366 } 00367 00368 case Type::PointerTyID: 00369 assert(0 && "No non-null, non-constant-expr constants allowed!"); 00370 abort(); 00371 00372 case Type::FloatTyID: { // Floating point types... 00373 float Tmp = (float)cast<ConstantFP>(CPV)->getValue(); 00374 output_float(Tmp); 00375 break; 00376 } 00377 case Type::DoubleTyID: { 00378 double Tmp = cast<ConstantFP>(CPV)->getValue(); 00379 output_double(Tmp); 00380 break; 00381 } 00382 00383 case Type::VoidTyID: 00384 case Type::LabelTyID: 00385 default: 00386 std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" 00387 << " type '" << *CPV->getType() << "'\n"; 00388 break; 00389 } 00390 return; 00391 } 00392 00393 /// outputInlineAsm - InlineAsm's get emitted to the constant pool, so they can 00394 /// be shared by multiple uses. 00395 void BytecodeWriter::outputInlineAsm(const InlineAsm *IA) { 00396 // Output a marker, so we know when we have one one parsing the constant pool. 00397 // Note that this encoding is 5 bytes: not very efficient for a marker. Since 00398 // unique inline asms are rare, this should hardly matter. 00399 output_vbr(~0U); 00400 00401 output(IA->getAsmString()); 00402 output(IA->getConstraintString()); 00403 output_vbr(unsigned(IA->hasSideEffects())); 00404 } 00405 00406 void BytecodeWriter::outputConstantStrings() { 00407 SlotCalculator::string_iterator I = Table.string_begin(); 00408 SlotCalculator::string_iterator E = Table.string_end(); 00409 if (I == E) return; // No strings to emit 00410 00411 // If we have != 0 strings to emit, output them now. Strings are emitted into 00412 // the 'void' type plane. 00413 output_vbr(unsigned(E-I)); 00414 output_typeid(Type::VoidTyID); 00415 00416 // Emit all of the strings. 00417 for (I = Table.string_begin(); I != E; ++I) { 00418 const ConstantArray *Str = *I; 00419 int Slot = Table.getSlot(Str->getType()); 00420 assert(Slot != -1 && "Constant string of unknown type?"); 00421 output_typeid((unsigned)Slot); 00422 00423 // Now that we emitted the type (which indicates the size of the string), 00424 // emit all of the characters. 00425 std::string Val = Str->getAsString(); 00426 output_data(Val.c_str(), Val.c_str()+Val.size()); 00427 } 00428 } 00429 00430 //===----------------------------------------------------------------------===// 00431 //=== Instruction Output ===// 00432 //===----------------------------------------------------------------------===// 00433 00434 // outputInstructionFormat0 - Output those weird instructions that have a large 00435 // number of operands or have large operands themselves. 00436 // 00437 // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>] 00438 // 00439 void BytecodeWriter::outputInstructionFormat0(const Instruction *I, 00440 unsigned Opcode, 00441 const SlotCalculator &Table, 00442 unsigned Type) { 00443 // Opcode must have top two bits clear... 00444 output_vbr(Opcode << 2); // Instruction Opcode ID 00445 output_typeid(Type); // Result type 00446 00447 unsigned NumArgs = I->getNumOperands(); 00448 output_vbr(NumArgs + (isa<CastInst>(I) || 00449 isa<VAArgInst>(I) || Opcode == 56 || Opcode == 58)); 00450 00451 if (!isa<GetElementPtrInst>(&I)) { 00452 for (unsigned i = 0; i < NumArgs; ++i) { 00453 int Slot = Table.getSlot(I->getOperand(i)); 00454 assert(Slot >= 0 && "No slot number for value!?!?"); 00455 output_vbr((unsigned)Slot); 00456 } 00457 00458 if (isa<CastInst>(I) || isa<VAArgInst>(I)) { 00459 int Slot = Table.getSlot(I->getType()); 00460 assert(Slot != -1 && "Cast return type unknown?"); 00461 output_typeid((unsigned)Slot); 00462 } else if (Opcode == 56) { // Invoke escape sequence 00463 output_vbr(cast<InvokeInst>(I)->getCallingConv()); 00464 } else if (Opcode == 58) { // Call escape sequence 00465 output_vbr((cast<CallInst>(I)->getCallingConv() << 1) | 00466 unsigned(cast<CallInst>(I)->isTailCall())); 00467 } 00468 } else { 00469 int Slot = Table.getSlot(I->getOperand(0)); 00470 assert(Slot >= 0 && "No slot number for value!?!?"); 00471 output_vbr(unsigned(Slot)); 00472 00473 // We need to encode the type of sequential type indices into their slot # 00474 unsigned Idx = 1; 00475 for (gep_type_iterator TI = gep_type_begin(I), E = gep_type_end(I); 00476 Idx != NumArgs; ++TI, ++Idx) { 00477 Slot = Table.getSlot(I->getOperand(Idx)); 00478 assert(Slot >= 0 && "No slot number for value!?!?"); 00479 00480 if (isa<SequentialType>(*TI)) { 00481 unsigned IdxId; 00482 switch (I->getOperand(Idx)->getType()->getTypeID()) { 00483 default: assert(0 && "Unknown index type!"); 00484 case Type::UIntTyID: IdxId = 0; break; 00485 case Type::IntTyID: IdxId = 1; break; 00486 case Type::ULongTyID: IdxId = 2; break; 00487 case Type::LongTyID: IdxId = 3; break; 00488 } 00489 Slot = (Slot << 2) | IdxId; 00490 } 00491 output_vbr(unsigned(Slot)); 00492 } 00493 } 00494 } 00495 00496 00497 // outputInstrVarArgsCall - Output the absurdly annoying varargs function calls. 00498 // This are more annoying than most because the signature of the call does not 00499 // tell us anything about the types of the arguments in the varargs portion. 00500 // Because of this, we encode (as type 0) all of the argument types explicitly 00501 // before the argument value. This really sucks, but you shouldn't be using 00502 // varargs functions in your code! *death to printf*! 00503 // 00504 // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>] 00505 // 00506 void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I, 00507 unsigned Opcode, 00508 const SlotCalculator &Table, 00509 unsigned Type) { 00510 assert(isa<CallInst>(I) || isa<InvokeInst>(I)); 00511 // Opcode must have top two bits clear... 00512 output_vbr(Opcode << 2); // Instruction Opcode ID 00513 output_typeid(Type); // Result type (varargs type) 00514 00515 const PointerType *PTy = cast<PointerType>(I->getOperand(0)->getType()); 00516 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 00517 unsigned NumParams = FTy->getNumParams(); 00518 00519 unsigned NumFixedOperands; 00520 if (isa<CallInst>(I)) { 00521 // Output an operand for the callee and each fixed argument, then two for 00522 // each variable argument. 00523 NumFixedOperands = 1+NumParams; 00524 } else { 00525 assert(isa<InvokeInst>(I) && "Not call or invoke??"); 00526 // Output an operand for the callee and destinations, then two for each 00527 // variable argument. 00528 NumFixedOperands = 3+NumParams; 00529 } 00530 output_vbr(2 * I->getNumOperands()-NumFixedOperands); 00531 00532 // The type for the function has already been emitted in the type field of the 00533 // instruction. Just emit the slot # now. 00534 for (unsigned i = 0; i != NumFixedOperands; ++i) { 00535 int Slot = Table.getSlot(I->getOperand(i)); 00536 assert(Slot >= 0 && "No slot number for value!?!?"); 00537 output_vbr((unsigned)Slot); 00538 } 00539 00540 for (unsigned i = NumFixedOperands, e = I->getNumOperands(); i != e; ++i) { 00541 // Output Arg Type ID 00542 int Slot = Table.getSlot(I->getOperand(i)->getType()); 00543 assert(Slot >= 0 && "No slot number for value!?!?"); 00544 output_typeid((unsigned)Slot); 00545 00546 // Output arg ID itself 00547 Slot = Table.getSlot(I->getOperand(i)); 00548 assert(Slot >= 0 && "No slot number for value!?!?"); 00549 output_vbr((unsigned)Slot); 00550 } 00551 } 00552 00553 00554 // outputInstructionFormat1 - Output one operand instructions, knowing that no 00555 // operand index is >= 2^12. 00556 // 00557 inline void BytecodeWriter::outputInstructionFormat1(const Instruction *I, 00558 unsigned Opcode, 00559 unsigned *Slots, 00560 unsigned Type) { 00561 // bits Instruction format: 00562 // -------------------------- 00563 // 01-00: Opcode type, fixed to 1. 00564 // 07-02: Opcode 00565 // 19-08: Resulting type plane 00566 // 31-20: Operand #1 (if set to (2^12-1), then zero operands) 00567 // 00568 output(1 | (Opcode << 2) | (Type << 8) | (Slots[0] << 20)); 00569 } 00570 00571 00572 // outputInstructionFormat2 - Output two operand instructions, knowing that no 00573 // operand index is >= 2^8. 00574 // 00575 inline void BytecodeWriter::outputInstructionFormat2(const Instruction *I, 00576 unsigned Opcode, 00577 unsigned *Slots, 00578 unsigned Type) { 00579 // bits Instruction format: 00580 // -------------------------- 00581 // 01-00: Opcode type, fixed to 2. 00582 // 07-02: Opcode 00583 // 15-08: Resulting type plane 00584 // 23-16: Operand #1 00585 // 31-24: Operand #2 00586 // 00587 output(2 | (Opcode << 2) | (Type << 8) | (Slots[0] << 16) | (Slots[1] << 24)); 00588 } 00589 00590 00591 // outputInstructionFormat3 - Output three operand instructions, knowing that no 00592 // operand index is >= 2^6. 00593 // 00594 inline void BytecodeWriter::outputInstructionFormat3(const Instruction *I, 00595 unsigned Opcode, 00596 unsigned *Slots, 00597 unsigned Type) { 00598 // bits Instruction format: 00599 // -------------------------- 00600 // 01-00: Opcode type, fixed to 3. 00601 // 07-02: Opcode 00602 // 13-08: Resulting type plane 00603 // 19-14: Operand #1 00604 // 25-20: Operand #2 00605 // 31-26: Operand #3 00606 // 00607 output(3 | (Opcode << 2) | (Type << 8) | 00608 (Slots[0] << 14) | (Slots[1] << 20) | (Slots[2] << 26)); 00609 } 00610 00611 void BytecodeWriter::outputInstruction(const Instruction &I) { 00612 assert(I.getOpcode() < 56 && "Opcode too big???"); 00613 unsigned Opcode = I.getOpcode(); 00614 unsigned NumOperands = I.getNumOperands(); 00615 00616 // Encode 'tail call' as 61, 'volatile load' as 62, and 'volatile store' as 00617 // 63. 00618 if (const CallInst *CI = dyn_cast<CallInst>(&I)) { 00619 if (CI->getCallingConv() == CallingConv::C) { 00620 if (CI->isTailCall()) 00621 Opcode = 61; // CCC + Tail Call 00622 else 00623 ; // Opcode = Instruction::Call 00624 } else if (CI->getCallingConv() == CallingConv::Fast) { 00625 if (CI->isTailCall()) 00626 Opcode = 59; // FastCC + TailCall 00627 else 00628 Opcode = 60; // FastCC + Not Tail Call 00629 } else { 00630 Opcode = 58; // Call escape sequence. 00631 } 00632 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 00633 if (II->getCallingConv() == CallingConv::Fast) 00634 Opcode = 57; // FastCC invoke. 00635 else if (II->getCallingConv() != CallingConv::C) 00636 Opcode = 56; // Invoke escape sequence. 00637 00638 } else if (isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) { 00639 Opcode = 62; 00640 } else if (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) { 00641 Opcode = 63; 00642 } 00643 00644 // Figure out which type to encode with the instruction. Typically we want 00645 // the type of the first parameter, as opposed to the type of the instruction 00646 // (for example, with setcc, we always know it returns bool, but the type of 00647 // the first param is actually interesting). But if we have no arguments 00648 // we take the type of the instruction itself. 00649 // 00650 const Type *Ty; 00651 switch (I.getOpcode()) { 00652 case Instruction::Select: 00653 case Instruction::Malloc: 00654 case Instruction::Alloca: 00655 Ty = I.getType(); // These ALWAYS want to encode the return type 00656 break; 00657 case Instruction::Store: 00658 Ty = I.getOperand(1)->getType(); // Encode the pointer type... 00659 assert(isa<PointerType>(Ty) && "Store to nonpointer type!?!?"); 00660 break; 00661 default: // Otherwise use the default behavior... 00662 Ty = NumOperands ? I.getOperand(0)->getType() : I.getType(); 00663 break; 00664 } 00665 00666 unsigned Type; 00667 int Slot = Table.getSlot(Ty); 00668 assert(Slot != -1 && "Type not available!!?!"); 00669 Type = (unsigned)Slot; 00670 00671 // Varargs calls and invokes are encoded entirely different from any other 00672 // instructions. 00673 if (const CallInst *CI = dyn_cast<CallInst>(&I)){ 00674 const PointerType *Ty =cast<PointerType>(CI->getCalledValue()->getType()); 00675 if (cast<FunctionType>(Ty->getElementType())->isVarArg()) { 00676 outputInstrVarArgsCall(CI, Opcode, Table, Type); 00677 return; 00678 } 00679 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 00680 const PointerType *Ty =cast<PointerType>(II->getCalledValue()->getType()); 00681 if (cast<FunctionType>(Ty->getElementType())->isVarArg()) { 00682 outputInstrVarArgsCall(II, Opcode, Table, Type); 00683 return; 00684 } 00685 } 00686 00687 if (NumOperands <= 3) { 00688 // Make sure that we take the type number into consideration. We don't want 00689 // to overflow the field size for the instruction format we select. 00690 // 00691 unsigned MaxOpSlot = Type; 00692 unsigned Slots[3]; Slots[0] = (1 << 12)-1; // Marker to signify 0 operands 00693 00694 for (unsigned i = 0; i != NumOperands; ++i) { 00695 int slot = Table.getSlot(I.getOperand(i)); 00696 assert(slot != -1 && "Broken bytecode!"); 00697 if (unsigned(slot) > MaxOpSlot) MaxOpSlot = unsigned(slot); 00698 Slots[i] = unsigned(slot); 00699 } 00700 00701 // Handle the special cases for various instructions... 00702 if (isa<CastInst>(I) || isa<VAArgInst>(I)) { 00703 // Cast has to encode the destination type as the second argument in the 00704 // packet, or else we won't know what type to cast to! 00705 Slots[1] = Table.getSlot(I.getType()); 00706 assert(Slots[1] != ~0U && "Cast return type unknown?"); 00707 if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; 00708 NumOperands++; 00709 } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(&I)) { 00710 assert(NumOperands == 1 && "Bogus allocation!"); 00711 if (AI->getAlignment()) { 00712 Slots[1] = Log2_32(AI->getAlignment())+1; 00713 if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; 00714 NumOperands = 2; 00715 } 00716 } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { 00717 // We need to encode the type of sequential type indices into their slot # 00718 unsigned Idx = 1; 00719 for (gep_type_iterator I = gep_type_begin(GEP), E = gep_type_end(GEP); 00720 I != E; ++I, ++Idx) 00721 if (isa<SequentialType>(*I)) { 00722 unsigned IdxId; 00723 switch (GEP->getOperand(Idx)->getType()->getTypeID()) { 00724 default: assert(0 && "Unknown index type!"); 00725 case Type::UIntTyID: IdxId = 0; break; 00726 case Type::IntTyID: IdxId = 1; break; 00727 case Type::ULongTyID: IdxId = 2; break; 00728 case Type::LongTyID: IdxId = 3; break; 00729 } 00730 Slots[Idx] = (Slots[Idx] << 2) | IdxId; 00731 if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx]; 00732 } 00733 } else if (Opcode == 58) { 00734 // If this is the escape sequence for call, emit the tailcall/cc info. 00735 const CallInst &CI = cast<CallInst>(I); 00736 ++NumOperands; 00737 if (NumOperands < 3) { 00738 Slots[NumOperands-1] = (CI.getCallingConv() << 1)|unsigned(CI.isTailCall()); 00739 if (Slots[NumOperands-1] > MaxOpSlot) 00740 MaxOpSlot = Slots[NumOperands-1]; 00741 } 00742 } else if (Opcode == 56) { 00743 // Invoke escape seq has at least 4 operands to encode. 00744 ++NumOperands; 00745 } 00746 00747 // Decide which instruction encoding to use. This is determined primarily 00748 // by the number of operands, and secondarily by whether or not the max 00749 // operand will fit into the instruction encoding. More operands == fewer 00750 // bits per operand. 00751 // 00752 switch (NumOperands) { 00753 case 0: 00754 case 1: 00755 if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops 00756 outputInstructionFormat1(&I, Opcode, Slots, Type); 00757 return; 00758 } 00759 break; 00760 00761 case 2: 00762 if (MaxOpSlot < (1 << 8)) { 00763 outputInstructionFormat2(&I, Opcode, Slots, Type); 00764 return; 00765 } 00766 break; 00767 00768 case 3: 00769 if (MaxOpSlot < (1 << 6)) { 00770 outputInstructionFormat3(&I, Opcode, Slots, Type); 00771 return; 00772 } 00773 break; 00774 default: 00775 break; 00776 } 00777 } 00778 00779 // If we weren't handled before here, we either have a large number of 00780 // operands or a large operand index that we are referring to. 00781 outputInstructionFormat0(&I, Opcode, Table, Type); 00782 } 00783 00784 //===----------------------------------------------------------------------===// 00785 //=== Block Output ===// 00786 //===----------------------------------------------------------------------===// 00787 00788 BytecodeWriter::BytecodeWriter(std::vector<unsigned char> &o, const Module *M) 00789 : Out(o), Table(M) { 00790 00791 // Emit the signature... 00792 static const unsigned char *Sig = (const unsigned char*)"llvm"; 00793 output_data(Sig, Sig+4); 00794 00795 // Emit the top level CLASS block. 00796 BytecodeBlock ModuleBlock(BytecodeFormat::ModuleBlockID, *this, false, true); 00797 00798 bool isBigEndian = M->getEndianness() == Module::BigEndian; 00799 bool hasLongPointers = M->getPointerSize() == Module::Pointer64; 00800 bool hasNoEndianness = M->getEndianness() == Module::AnyEndianness; 00801 bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize; 00802 00803 // Output the version identifier and other information. 00804 unsigned Version = (BCVersionNum << 4) | 00805 (unsigned)isBigEndian | (hasLongPointers << 1) | 00806 (hasNoEndianness << 2) | 00807 (hasNoPointerSize << 3); 00808 output_vbr(Version); 00809 00810 // The Global type plane comes first 00811 { 00812 BytecodeBlock CPool(BytecodeFormat::GlobalTypePlaneBlockID, *this ); 00813 outputTypes(Type::FirstDerivedTyID); 00814 } 00815 00816 // The ModuleInfoBlock follows directly after the type information 00817 outputModuleInfoBlock(M); 00818 00819 // Output module level constants, used for global variable initializers 00820 outputConstants(false); 00821 00822 // Do the whole module now! Process each function at a time... 00823 for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) 00824 outputFunction(I); 00825 00826 // If needed, output the symbol table for the module... 00827 outputSymbolTable(M->getSymbolTable()); 00828 } 00829 00830 void BytecodeWriter::outputTypes(unsigned TypeNum) { 00831 // Write the type plane for types first because earlier planes (e.g. for a 00832 // primitive type like float) may have constants constructed using types 00833 // coming later (e.g., via getelementptr from a pointer type). The type 00834 // plane is needed before types can be fwd or bkwd referenced. 00835 const std::vector<const Type*>& Types = Table.getTypes(); 00836 assert(!Types.empty() && "No types at all?"); 00837 assert(TypeNum <= Types.size() && "Invalid TypeNo index"); 00838 00839 unsigned NumEntries = Types.size() - TypeNum; 00840 00841 // Output type header: [num entries] 00842 output_vbr(NumEntries); 00843 00844 for (unsigned i = TypeNum; i < TypeNum+NumEntries; ++i) 00845 outputType(Types[i]); 00846 } 00847 00848 // Helper function for outputConstants(). 00849 // Writes out all the constants in the plane Plane starting at entry StartNo. 00850 // 00851 void BytecodeWriter::outputConstantsInPlane(const std::vector<const Value*> 00852 &Plane, unsigned StartNo) { 00853 unsigned ValNo = StartNo; 00854 00855 // Scan through and ignore function arguments, global values, and constant 00856 // strings. 00857 for (; ValNo < Plane.size() && 00858 (isa<Argument>(Plane[ValNo]) || isa<GlobalValue>(Plane[ValNo]) || 00859 (isa<ConstantArray>(Plane[ValNo]) && 00860 cast<ConstantArray>(Plane[ValNo])->isString())); ValNo++) 00861 /*empty*/; 00862 00863 unsigned NC = ValNo; // Number of constants 00864 for (; NC < Plane.size() && (isa<Constant>(Plane[NC]) || 00865 isa<InlineAsm>(Plane[NC])); NC++) 00866 /*empty*/; 00867 NC -= ValNo; // Convert from index into count 00868 if (NC == 0) return; // Skip empty type planes... 00869 00870 // FIXME: Most slabs only have 1 or 2 entries! We should encode this much 00871 // more compactly. 00872 00873 // Output type header: [num entries][type id number] 00874 // 00875 output_vbr(NC); 00876 00877 // Output the Type ID Number... 00878 int Slot = Table.getSlot(Plane.front()->getType()); 00879 assert (Slot != -1 && "Type in constant pool but not in function!!"); 00880 output_typeid((unsigned)Slot); 00881 00882 for (unsigned i = ValNo; i < ValNo+NC; ++i) { 00883 const Value *V = Plane[i]; 00884 if (const Constant *C = dyn_cast<Constant>(V)) 00885 outputConstant(C); 00886 else 00887 outputInlineAsm(cast<InlineAsm>(V)); 00888 } 00889 } 00890 00891 static inline bool hasNullValue(const Type *Ty) { 00892 return Ty != Type::LabelTy && Ty != Type::VoidTy && !isa<OpaqueType>(Ty); 00893 } 00894 00895 void BytecodeWriter::outputConstants(bool isFunction) { 00896 BytecodeBlock CPool(BytecodeFormat::ConstantPoolBlockID, *this, 00897 true /* Elide block if empty */); 00898 00899 unsigned NumPlanes = Table.getNumPlanes(); 00900 00901 if (isFunction) 00902 // Output the type plane before any constants! 00903 outputTypes(Table.getModuleTypeLevel()); 00904 else 00905 // Output module-level string constants before any other constants. 00906 outputConstantStrings(); 00907 00908 for (unsigned pno = 0; pno != NumPlanes; pno++) { 00909 const std::vector<const Value*> &Plane = Table.getPlane(pno); 00910 if (!Plane.empty()) { // Skip empty type planes... 00911 unsigned ValNo = 0; 00912 if (isFunction) // Don't re-emit module constants 00913 ValNo += Table.getModuleLevel(pno); 00914 00915 if (hasNullValue(Plane[0]->getType())) { 00916 // Skip zero initializer 00917 if (ValNo == 0) 00918 ValNo = 1; 00919 } 00920 00921 // Write out constants in the plane 00922 outputConstantsInPlane(Plane, ValNo); 00923 } 00924 } 00925 } 00926 00927 static unsigned getEncodedLinkage(const GlobalValue *GV) { 00928 switch (GV->getLinkage()) { 00929 default: assert(0 && "Invalid linkage!"); 00930 case GlobalValue::ExternalLinkage: return 0; 00931 case GlobalValue::WeakLinkage: return 1; 00932 case GlobalValue::AppendingLinkage: return 2; 00933 case GlobalValue::InternalLinkage: return 3; 00934 case GlobalValue::LinkOnceLinkage: return 4; 00935 } 00936 } 00937 00938 void BytecodeWriter::outputModuleInfoBlock(const Module *M) { 00939 BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfoBlockID, *this); 00940 00941 // Give numbers to sections as we encounter them. 00942 unsigned SectionIDCounter = 0; 00943 std::vector<std::string> SectionNames; 00944 std::map<std::string, unsigned> SectionID; 00945 00946 // Output the types for the global variables in the module... 00947 for (Module::const_global_iterator I = M->global_begin(), 00948 End = M->global_end(); I != End; ++I) { 00949 int Slot = Table.getSlot(I->getType()); 00950 assert(Slot != -1 && "Module global vars is broken!"); 00951 00952 assert((I->hasInitializer() || !I->hasInternalLinkage()) && 00953 "Global must have an initializer or have external linkage!"); 00954 00955 // Fields: bit0 = isConstant, bit1 = hasInitializer, bit2-4=Linkage, 00956 // bit5+ = Slot # for type. 00957 bool HasExtensionWord = (I->getAlignment() != 0) || I->hasSection(); 00958 00959 // If we need to use the extension byte, set linkage=3(internal) and 00960 // initializer = 0 (impossible!). 00961 if (!HasExtensionWord) { 00962 unsigned oSlot = ((unsigned)Slot << 5) | (getEncodedLinkage(I) << 2) | 00963 (I->hasInitializer() << 1) | (unsigned)I->isConstant(); 00964 output_vbr(oSlot); 00965 } else { 00966 unsigned oSlot = ((unsigned)Slot << 5) | (3 << 2) | 00967 (0 << 1) | (unsigned)I->isConstant(); 00968 output_vbr(oSlot); 00969 00970 // The extension word has this format: bit 0 = has initializer, bit 1-3 = 00971 // linkage, bit 4-8 = alignment (log2), bit 9 = has SectionID, 00972 // bits 10+ = future use. 00973 unsigned ExtWord = (unsigned)I->hasInitializer() | 00974 (getEncodedLinkage(I) << 1) | 00975 ((Log2_32(I->getAlignment())+1) << 4) | 00976 ((unsigned)I->hasSection() << 9); 00977 output_vbr(ExtWord); 00978 if (I->hasSection()) { 00979 // Give section names unique ID's. 00980 unsigned &Entry = SectionID[I->getSection()]; 00981 if (Entry == 0) { 00982 Entry = ++SectionIDCounter; 00983 SectionNames.push_back(I->getSection()); 00984 } 00985 output_vbr(Entry); 00986 } 00987 } 00988 00989 // If we have an initializer, output it now. 00990 if (I->hasInitializer()) { 00991 Slot = Table.getSlot((Value*)I->getInitializer()); 00992 assert(Slot != -1 && "No slot for global var initializer!"); 00993 output_vbr((unsigned)Slot); 00994 } 00995 } 00996 output_typeid((unsigned)Table.getSlot(Type::VoidTy)); 00997 00998 // Output the types of the functions in this module. 00999 for (Module::const_iterator I = M->begin(), End = M->end(); I != End; ++I) { 01000 int Slot = Table.getSlot(I->getType()); 01001 assert(Slot != -1 && "Module slot calculator is broken!"); 01002 assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!"); 01003 assert(((Slot << 6) >> 6) == Slot && "Slot # too big!"); 01004 unsigned CC = I->getCallingConv()+1; 01005 unsigned ID = (Slot << 5) | (CC & 15); 01006 01007 if (I->isExternal()) // If external, we don't have an FunctionInfo block. 01008 ID |= 1 << 4; 01009 01010 if (I->getAlignment() || I->hasSection() || (CC & ~15) != 0) 01011 ID |= 1 << 31; // Do we need an extension word? 01012 01013 output_vbr(ID); 01014 01015 if (ID & (1 << 31)) { 01016 // Extension byte: bits 0-4 = alignment, bits 5-9 = top nibble of calling 01017 // convention, bit 10 = hasSectionID. 01018 ID = (Log2_32(I->getAlignment())+1) | ((CC >> 4) << 5) | 01019 (I->hasSection() << 10); 01020 output_vbr(ID); 01021 01022 // Give section names unique ID's. 01023 if (I->hasSection()) { 01024 unsigned &Entry = SectionID[I->getSection()]; 01025 if (Entry == 0) { 01026 Entry = ++SectionIDCounter; 01027 SectionNames.push_back(I->getSection()); 01028 } 01029 output_vbr(Entry); 01030 } 01031 } 01032 } 01033 output_vbr((unsigned)Table.getSlot(Type::VoidTy) << 5); 01034 01035 // Emit the list of dependent libraries for the Module. 01036 Module::lib_iterator LI = M->lib_begin(); 01037 Module::lib_iterator LE = M->lib_end(); 01038 output_vbr(unsigned(LE - LI)); // Emit the number of dependent libraries. 01039 for (; LI != LE; ++LI) 01040 output(*LI); 01041 01042 // Output the target triple from the module 01043 output(M->getTargetTriple()); 01044 01045 // Emit the table of section names. 01046 output_vbr((unsigned)SectionNames.size()); 01047 for (unsigned i = 0, e = SectionNames.size(); i != e; ++i) 01048 output(SectionNames[i]); 01049 01050 // Output the inline asm string. 01051 output(M->getModuleInlineAsm()); 01052 } 01053 01054 void BytecodeWriter::outputInstructions(const Function *F) { 01055 BytecodeBlock ILBlock(BytecodeFormat::InstructionListBlockID, *this); 01056 for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 01057 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) 01058 outputInstruction(*I); 01059 } 01060 01061 void BytecodeWriter::outputFunction(const Function *F) { 01062 // If this is an external function, there is nothing else to emit! 01063 if (F->isExternal()) return; 01064 01065 BytecodeBlock FunctionBlock(BytecodeFormat::FunctionBlockID, *this); 01066 output_vbr(getEncodedLinkage(F)); 01067 01068 // Get slot information about the function... 01069 Table.incorporateFunction(F); 01070 01071 if (Table.getCompactionTable().empty()) { 01072 // Output information about the constants in the function if the compaction 01073 // table is not being used. 01074 outputConstants(true); 01075 } else { 01076 // Otherwise, emit the compaction table. 01077 outputCompactionTable(); 01078 } 01079 01080 // Output all of the instructions in the body of the function 01081 outputInstructions(F); 01082 01083 // If needed, output the symbol table for the function... 01084 outputSymbolTable(F->getSymbolTable()); 01085 01086 Table.purgeFunction(); 01087 } 01088 01089 void BytecodeWriter::outputCompactionTablePlane(unsigned PlaneNo, 01090 const std::vector<const Value*> &Plane, 01091 unsigned StartNo) { 01092 unsigned End = Table.getModuleLevel(PlaneNo); 01093 if (Plane.empty() || StartNo == End || End == 0) return; // Nothing to emit 01094 assert(StartNo < End && "Cannot emit negative range!"); 01095 assert(StartNo < Plane.size() && End <= Plane.size()); 01096 01097 // Do not emit the null initializer! 01098 ++StartNo; 01099 01100 // Figure out which encoding to use. By far the most common case we have is 01101 // to emit 0-2 entries in a compaction table plane. 01102 switch (End-StartNo) { 01103 case 0: // Avoid emitting two vbr's if possible. 01104 case 1: 01105 case 2: 01106 output_vbr((PlaneNo << 2) | End-StartNo); 01107 break; 01108 default: 01109 // Output the number of things. 01110 output_vbr((unsigned(End-StartNo) << 2) | 3); 01111 output_typeid(PlaneNo); // Emit the type plane this is 01112 break; 01113 } 01114 01115 for (unsigned i = StartNo; i != End; ++i) 01116 output_vbr(Table.getGlobalSlot(Plane[i])); 01117 } 01118 01119 void BytecodeWriter::outputCompactionTypes(unsigned StartNo) { 01120 // Get the compaction type table from the slot calculator 01121 const std::vector<const Type*> &CTypes = Table.getCompactionTypes(); 01122 01123 // The compaction types may have been uncompactified back to the 01124 // global types. If so, we just write an empty table 01125 if (CTypes.size() == 0 ) { 01126 output_vbr(0U); 01127 return; 01128 } 01129 01130 assert(CTypes.size() >= StartNo && "Invalid compaction types start index"); 01131 01132 // Determine how many types to write 01133 unsigned NumTypes = CTypes.size() - StartNo; 01134 01135 // Output the number of types. 01136 output_vbr(NumTypes); 01137 01138 for (unsigned i = StartNo; i < StartNo+NumTypes; ++i) 01139 output_typeid(Table.getGlobalSlot(CTypes[i])); 01140 } 01141 01142 void BytecodeWriter::outputCompactionTable() { 01143 // Avoid writing the compaction table at all if there is no content. 01144 if (Table.getCompactionTypes().size() >= Type::FirstDerivedTyID || 01145 (!Table.CompactionTableIsEmpty())) { 01146 BytecodeBlock CTB(BytecodeFormat::CompactionTableBlockID, *this, 01147 true/*ElideIfEmpty*/); 01148 const std::vector<std::vector<const Value*> > &CT = 01149 Table.getCompactionTable(); 01150 01151 // First things first, emit the type compaction table if there is one. 01152 outputCompactionTypes(Type::FirstDerivedTyID); 01153 01154 for (unsigned i = 0, e = CT.size(); i != e; ++i) 01155 outputCompactionTablePlane(i, CT[i], 0); 01156 } 01157 } 01158 01159 void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { 01160 // Do not output the Bytecode block for an empty symbol table, it just wastes 01161 // space! 01162 if (MST.isEmpty()) return; 01163 01164 BytecodeBlock SymTabBlock(BytecodeFormat::SymbolTableBlockID, *this, 01165 true/*ElideIfEmpty*/); 01166 01167 // Write the number of types 01168 output_vbr(MST.num_types()); 01169 01170 // Write each of the types 01171 for (SymbolTable::type_const_iterator TI = MST.type_begin(), 01172 TE = MST.type_end(); TI != TE; ++TI ) { 01173 // Symtab entry:[def slot #][name] 01174 output_typeid((unsigned)Table.getSlot(TI->second)); 01175 output(TI->first); 01176 } 01177 01178 // Now do each of the type planes in order. 01179 for (SymbolTable::plane_const_iterator PI = MST.plane_begin(), 01180 PE = MST.plane_end(); PI != PE; ++PI) { 01181 SymbolTable::value_const_iterator I = MST.value_begin(PI->first); 01182 SymbolTable::value_const_iterator End = MST.value_end(PI->first); 01183 int Slot; 01184 01185 if (I == End) continue; // Don't mess with an absent type... 01186 01187 // Write the number of values in this plane 01188 output_vbr((unsigned)PI->second.size()); 01189 01190 // Write the slot number of the type for this plane 01191 Slot = Table.getSlot(PI->first); 01192 assert(Slot != -1 && "Type in symtab, but not in table!"); 01193 output_typeid((unsigned)Slot); 01194 01195 // Write each of the values in this plane 01196 for (; I != End; ++I) { 01197 // Symtab entry: [def slot #][name] 01198 Slot = Table.getSlot(I->second); 01199 assert(Slot != -1 && "Value in symtab but has no slot number!!"); 01200 output_vbr((unsigned)Slot); 01201 output(I->first); 01202 } 01203 } 01204 } 01205 01206 void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out, 01207 bool compress ) { 01208 assert(M && "You can't write a null module!!"); 01209 01210 // Create a vector of unsigned char for the bytecode output. We 01211 // reserve 256KBytes of space in the vector so that we avoid doing 01212 // lots of little allocations. 256KBytes is sufficient for a large 01213 // proportion of the bytecode files we will encounter. Larger files 01214 // will be automatically doubled in size as needed (std::vector 01215 // behavior). 01216 std::vector<unsigned char> Buffer; 01217 Buffer.reserve(256 * 1024); 01218 01219 // The BytecodeWriter populates Buffer for us. 01220 BytecodeWriter BCW(Buffer, M); 01221 01222 // Keep track of how much we've written 01223 BytesWritten += Buffer.size(); 01224 01225 // Determine start and end points of the Buffer 01226 const unsigned char *FirstByte = &Buffer.front(); 01227 01228 // If we're supposed to compress this mess ... 01229 if (compress) { 01230 01231 // We signal compression by using an alternate magic number for the 01232 // file. The compressed bytecode file's magic number is "llvc" instead 01233 // of "llvm". 01234 char compressed_magic[4]; 01235 compressed_magic[0] = 'l'; 01236 compressed_magic[1] = 'l'; 01237 compressed_magic[2] = 'v'; 01238 compressed_magic[3] = 'c'; 01239 01240 Out.write(compressed_magic,4); 01241 01242 // Compress everything after the magic number (which we altered) 01243 uint64_t zipSize = Compressor::compressToStream( 01244 (char*)(FirstByte+4), // Skip the magic number 01245 Buffer.size()-4, // Skip the magic number 01246 Out // Where to write compressed data 01247 ); 01248 01249 } else { 01250 01251 // We're not compressing, so just write the entire block. 01252 Out.write((char*)FirstByte, Buffer.size()); 01253 } 01254 01255 // make sure it hits disk now 01256 Out.flush(); 01257 } 01258