LLVM API Documentation
00001 //===-- Writer.cpp - Library for writing LLVM bytecode files --------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This library implements the functionality defined in llvm/Bytecode/Writer.h 00011 // 00012 // Note that this file uses an unusual technique of outputting all the bytecode 00013 // to a vector of unsigned char, then copies the vector to an ostream. The 00014 // reason for this is that we must do "seeking" in the stream to do back- 00015 // patching, and some very important ostreams that we want to support (like 00016 // pipes) do not support seeking. :( :( :( 00017 // 00018 //===----------------------------------------------------------------------===// 00019 00020 #include "WriterInternals.h" 00021 #include "llvm/Bytecode/WriteBytecodePass.h" 00022 #include "llvm/CallingConv.h" 00023 #include "llvm/Constants.h" 00024 #include "llvm/DerivedTypes.h" 00025 #include "llvm/InlineAsm.h" 00026 #include "llvm/Instructions.h" 00027 #include "llvm/Module.h" 00028 #include "llvm/SymbolTable.h" 00029 #include "llvm/Support/GetElementPtrTypeIterator.h" 00030 #include "llvm/Support/Compressor.h" 00031 #include "llvm/Support/MathExtras.h" 00032 #include "llvm/System/Program.h" 00033 #include "llvm/ADT/STLExtras.h" 00034 #include "llvm/ADT/Statistic.h" 00035 #include <cstring> 00036 #include <algorithm> 00037 using namespace llvm; 00038 00039 /// This value needs to be incremented every time the bytecode format changes 00040 /// so that the reader can distinguish which format of the bytecode file has 00041 /// been written. 00042 /// @brief The bytecode version number 00043 const unsigned BCVersionNum = 5; 00044 00045 static RegisterPass<WriteBytecodePass> X("emitbytecode", "Bytecode Writer"); 00046 00047 static Statistic<> 00048 BytesWritten("bytecodewriter", "Number of bytecode bytes written"); 00049 00050 //===----------------------------------------------------------------------===// 00051 //=== Output Primitives ===// 00052 //===----------------------------------------------------------------------===// 00053 00054 // output - If a position is specified, it must be in the valid portion of the 00055 // string... note that this should be inlined always so only the relevant IF 00056 // body should be included. 00057 inline void BytecodeWriter::output(unsigned i, int pos) { 00058 if (pos == -1) { // Be endian clean, little endian is our friend 00059 Out.push_back((unsigned char)i); 00060 Out.push_back((unsigned char)(i >> 8)); 00061 Out.push_back((unsigned char)(i >> 16)); 00062 Out.push_back((unsigned char)(i >> 24)); 00063 } else { 00064 Out[pos ] = (unsigned char)i; 00065 Out[pos+1] = (unsigned char)(i >> 8); 00066 Out[pos+2] = (unsigned char)(i >> 16); 00067 Out[pos+3] = (unsigned char)(i >> 24); 00068 } 00069 } 00070 00071 inline void BytecodeWriter::output(int i) { 00072 output((unsigned)i); 00073 } 00074 00075 /// output_vbr - Output an unsigned value, by using the least number of bytes 00076 /// possible. This is useful because many of our "infinite" values are really 00077 /// very small most of the time; but can be large a few times. 00078 /// Data format used: If you read a byte with the high bit set, use the low 00079 /// seven bits as data and then read another byte. 00080 inline void BytecodeWriter::output_vbr(uint64_t i) { 00081 while (1) { 00082 if (i < 0x80) { // done? 00083 Out.push_back((unsigned char)i); // We know the high bit is clear... 00084 return; 00085 } 00086 00087 // Nope, we are bigger than a character, output the next 7 bits and set the 00088 // high bit to say that there is more coming... 00089 Out.push_back(0x80 | ((unsigned char)i & 0x7F)); 00090 i >>= 7; // Shift out 7 bits now... 00091 } 00092 } 00093 00094 inline void BytecodeWriter::output_vbr(unsigned i) { 00095 while (1) { 00096 if (i < 0x80) { // done? 00097 Out.push_back((unsigned char)i); // We know the high bit is clear... 00098 return; 00099 } 00100 00101 // Nope, we are bigger than a character, output the next 7 bits and set the 00102 // high bit to say that there is more coming... 00103 Out.push_back(0x80 | ((unsigned char)i & 0x7F)); 00104 i >>= 7; // Shift out 7 bits now... 00105 } 00106 } 00107 00108 inline void BytecodeWriter::output_typeid(unsigned i) { 00109 if (i <= 0x00FFFFFF) 00110 this->output_vbr(i); 00111 else { 00112 this->output_vbr(0x00FFFFFF); 00113 this->output_vbr(i); 00114 } 00115 } 00116 00117 inline void BytecodeWriter::output_vbr(int64_t i) { 00118 if (i < 0) 00119 output_vbr(((uint64_t)(-i) << 1) | 1); // Set low order sign bit... 00120 else 00121 output_vbr((uint64_t)i << 1); // Low order bit is clear. 00122 } 00123 00124 00125 inline void BytecodeWriter::output_vbr(int i) { 00126 if (i < 0) 00127 output_vbr(((unsigned)(-i) << 1) | 1); // Set low order sign bit... 00128 else 00129 output_vbr((unsigned)i << 1); // Low order bit is clear. 00130 } 00131 00132 inline void BytecodeWriter::output(const std::string &s) { 00133 unsigned Len = s.length(); 00134 output_vbr(Len ); // Strings may have an arbitrary length... 00135 Out.insert(Out.end(), s.begin(), s.end()); 00136 } 00137 00138 inline void BytecodeWriter::output_data(const void *Ptr, const void *End) { 00139 Out.insert(Out.end(), (const unsigned char*)Ptr, (const unsigned char*)End); 00140 } 00141 00142 inline void BytecodeWriter::output_float(float& FloatVal) { 00143 /// FIXME: This isn't optimal, it has size problems on some platforms 00144 /// where FP is not IEEE. 00145 uint32_t i = FloatToBits(FloatVal); 00146 Out.push_back( static_cast<unsigned char>( (i & 0xFF ))); 00147 Out.push_back( static_cast<unsigned char>( (i >> 8) & 0xFF)); 00148 Out.push_back( static_cast<unsigned char>( (i >> 16) & 0xFF)); 00149 Out.push_back( static_cast<unsigned char>( (i >> 24) & 0xFF)); 00150 } 00151 00152 inline void BytecodeWriter::output_double(double& DoubleVal) { 00153 /// FIXME: This isn't optimal, it has size problems on some platforms 00154 /// where FP is not IEEE. 00155 uint64_t i = DoubleToBits(DoubleVal); 00156 Out.push_back( static_cast<unsigned char>( (i & 0xFF ))); 00157 Out.push_back( static_cast<unsigned char>( (i >> 8) & 0xFF)); 00158 Out.push_back( static_cast<unsigned char>( (i >> 16) & 0xFF)); 00159 Out.push_back( static_cast<unsigned char>( (i >> 24) & 0xFF)); 00160 Out.push_back( static_cast<unsigned char>( (i >> 32) & 0xFF)); 00161 Out.push_back( static_cast<unsigned char>( (i >> 40) & 0xFF)); 00162 Out.push_back( static_cast<unsigned char>( (i >> 48) & 0xFF)); 00163 Out.push_back( static_cast<unsigned char>( (i >> 56) & 0xFF)); 00164 } 00165 00166 inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter &w, 00167 bool elideIfEmpty, bool hasLongFormat) 00168 : Id(ID), Writer(w), ElideIfEmpty(elideIfEmpty), HasLongFormat(hasLongFormat){ 00169 00170 if (HasLongFormat) { 00171 w.output(ID); 00172 w.output(0U); // For length in long format 00173 } else { 00174 w.output(0U); /// Place holder for ID and length for this block 00175 } 00176 Loc = w.size(); 00177 } 00178 00179 inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block goes out 00180 // of scope... 00181 if (Loc == Writer.size() && ElideIfEmpty) { 00182 // If the block is empty, and we are allowed to, do not emit the block at 00183 // all! 00184 Writer.resize(Writer.size()-(HasLongFormat?8:4)); 00185 return; 00186 } 00187 00188 if (HasLongFormat) 00189 Writer.output(unsigned(Writer.size()-Loc), int(Loc-4)); 00190 else 00191 Writer.output(unsigned(Writer.size()-Loc) << 5 | (Id & 0x1F), int(Loc-4)); 00192 } 00193 00194 //===----------------------------------------------------------------------===// 00195 //=== Constant Output ===// 00196 //===----------------------------------------------------------------------===// 00197 00198 void BytecodeWriter::outputType(const Type *T) { 00199 output_vbr((unsigned)T->getTypeID()); 00200 00201 // That's all there is to handling primitive types... 00202 if (T->isPrimitiveType()) { 00203 return; // We might do this if we alias a prim type: %x = type int 00204 } 00205 00206 switch (T->getTypeID()) { // Handle derived types now. 00207 case Type::FunctionTyID: { 00208 const FunctionType *MT = cast<FunctionType>(T); 00209 int Slot = Table.getSlot(MT->getReturnType()); 00210 assert(Slot != -1 && "Type used but not available!!"); 00211 output_typeid((unsigned)Slot); 00212 00213 // Output the number of arguments to function (+1 if varargs): 00214 output_vbr((unsigned)MT->getNumParams()+MT->isVarArg()); 00215 00216 // Output all of the arguments... 00217 FunctionType::param_iterator I = MT->param_begin(); 00218 for (; I != MT->param_end(); ++I) { 00219 Slot = Table.getSlot(*I); 00220 assert(Slot != -1 && "Type used but not available!!"); 00221 output_typeid((unsigned)Slot); 00222 } 00223 00224 // Terminate list with VoidTy if we are a varargs function... 00225 if (MT->isVarArg()) 00226 output_typeid((unsigned)Type::VoidTyID); 00227 break; 00228 } 00229 00230 case Type::ArrayTyID: { 00231 const ArrayType *AT = cast<ArrayType>(T); 00232 int Slot = Table.getSlot(AT->getElementType()); 00233 assert(Slot != -1 && "Type used but not available!!"); 00234 output_typeid((unsigned)Slot); 00235 output_vbr(AT->getNumElements()); 00236 break; 00237 } 00238 00239 case Type::PackedTyID: { 00240 const PackedType *PT = cast<PackedType>(T); 00241 int Slot = Table.getSlot(PT->getElementType()); 00242 assert(Slot != -1 && "Type used but not available!!"); 00243 output_typeid((unsigned)Slot); 00244 output_vbr(PT->getNumElements()); 00245 break; 00246 } 00247 00248 00249 case Type::StructTyID: { 00250 const StructType *ST = cast<StructType>(T); 00251 00252 // Output all of the element types... 00253 for (StructType::element_iterator I = ST->element_begin(), 00254 E = ST->element_end(); I != E; ++I) { 00255 int Slot = Table.getSlot(*I); 00256 assert(Slot != -1 && "Type used but not available!!"); 00257 output_typeid((unsigned)Slot); 00258 } 00259 00260 // Terminate list with VoidTy 00261 output_typeid((unsigned)Type::VoidTyID); 00262 break; 00263 } 00264 00265 case Type::PointerTyID: { 00266 const PointerType *PT = cast<PointerType>(T); 00267 int Slot = Table.getSlot(PT->getElementType()); 00268 assert(Slot != -1 && "Type used but not available!!"); 00269 output_typeid((unsigned)Slot); 00270 break; 00271 } 00272 00273 case Type::OpaqueTyID: 00274 // No need to emit anything, just the count of opaque types is enough. 00275 break; 00276 00277 default: 00278 std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" 00279 << " Type '" << T->getDescription() << "'\n"; 00280 break; 00281 } 00282 } 00283 00284 void BytecodeWriter::outputConstant(const Constant *CPV) { 00285 assert((CPV->getType()->isPrimitiveType() || !CPV->isNullValue()) && 00286 "Shouldn't output null constants!"); 00287 00288 // We must check for a ConstantExpr before switching by type because 00289 // a ConstantExpr can be of any type, and has no explicit value. 00290 // 00291 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) { 00292 // FIXME: Encoding of constant exprs could be much more compact! 00293 assert(CE->getNumOperands() > 0 && "ConstantExpr with 0 operands"); 00294 assert(CE->getNumOperands() != 1 || CE->getOpcode() == Instruction::Cast); 00295 output_vbr(1+CE->getNumOperands()); // flags as an expr 00296 output_vbr(CE->getOpcode()); // flags as an expr 00297 00298 for (User::const_op_iterator OI = CE->op_begin(); OI != CE->op_end(); ++OI){ 00299 int Slot = Table.getSlot(*OI); 00300 assert(Slot != -1 && "Unknown constant used in ConstantExpr!!"); 00301 output_vbr((unsigned)Slot); 00302 Slot = Table.getSlot((*OI)->getType()); 00303 output_typeid((unsigned)Slot); 00304 } 00305 return; 00306 } else if (isa<UndefValue>(CPV)) { 00307 output_vbr(1U); // 1 -> UndefValue constant. 00308 return; 00309 } else { 00310 output_vbr(0U); // flag as not a ConstantExpr 00311 } 00312 00313 switch (CPV->getType()->getTypeID()) { 00314 case Type::BoolTyID: // Boolean Types 00315 if (cast<ConstantBool>(CPV)->getValue()) 00316 output_vbr(1U); 00317 else 00318 output_vbr(0U); 00319 break; 00320 00321 case Type::UByteTyID: // Unsigned integer types... 00322 case Type::UShortTyID: 00323 case Type::UIntTyID: 00324 case Type::ULongTyID: 00325 output_vbr(cast<ConstantUInt>(CPV)->getValue()); 00326 break; 00327 00328 case Type::SByteTyID: // Signed integer types... 00329 case Type::ShortTyID: 00330 case Type::IntTyID: 00331 case Type::LongTyID: 00332 output_vbr(cast<ConstantSInt>(CPV)->getValue()); 00333 break; 00334 00335 case Type::ArrayTyID: { 00336 const ConstantArray *CPA = cast<ConstantArray>(CPV); 00337 assert(!CPA->isString() && "Constant strings should be handled specially!"); 00338 00339 for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) { 00340 int Slot = Table.getSlot(CPA->getOperand(i)); 00341 assert(Slot != -1 && "Constant used but not available!!"); 00342 output_vbr((unsigned)Slot); 00343 } 00344 break; 00345 } 00346 00347 case Type::PackedTyID: { 00348 const ConstantPacked *CP = cast<ConstantPacked>(CPV); 00349 00350 for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) { 00351 int Slot = Table.getSlot(CP->getOperand(i)); 00352 assert(Slot != -1 && "Constant used but not available!!"); 00353 output_vbr((unsigned)Slot); 00354 } 00355 break; 00356 } 00357 00358 case Type::StructTyID: { 00359 const ConstantStruct *CPS = cast<ConstantStruct>(CPV); 00360 00361 for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) { 00362 int Slot = Table.getSlot(CPS->getOperand(i)); 00363 assert(Slot != -1 && "Constant used but not available!!"); 00364 output_vbr((unsigned)Slot); 00365 } 00366 break; 00367 } 00368 00369 case Type::PointerTyID: 00370 assert(0 && "No non-null, non-constant-expr constants allowed!"); 00371 abort(); 00372 00373 case Type::FloatTyID: { // Floating point types... 00374 float Tmp = (float)cast<ConstantFP>(CPV)->getValue(); 00375 output_float(Tmp); 00376 break; 00377 } 00378 case Type::DoubleTyID: { 00379 double Tmp = cast<ConstantFP>(CPV)->getValue(); 00380 output_double(Tmp); 00381 break; 00382 } 00383 00384 case Type::VoidTyID: 00385 case Type::LabelTyID: 00386 default: 00387 std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" 00388 << " type '" << *CPV->getType() << "'\n"; 00389 break; 00390 } 00391 return; 00392 } 00393 00394 /// outputInlineAsm - InlineAsm's get emitted to the constant pool, so they can 00395 /// be shared by multiple uses. 00396 void BytecodeWriter::outputInlineAsm(const InlineAsm *IA) { 00397 // Output a marker, so we know when we have one one parsing the constant pool. 00398 // Note that this encoding is 5 bytes: not very efficient for a marker. Since 00399 // unique inline asms are rare, this should hardly matter. 00400 output_vbr(~0U); 00401 00402 output(IA->getAsmString()); 00403 output(IA->getConstraintString()); 00404 output_vbr(unsigned(IA->hasSideEffects())); 00405 } 00406 00407 void BytecodeWriter::outputConstantStrings() { 00408 SlotCalculator::string_iterator I = Table.string_begin(); 00409 SlotCalculator::string_iterator E = Table.string_end(); 00410 if (I == E) return; // No strings to emit 00411 00412 // If we have != 0 strings to emit, output them now. Strings are emitted into 00413 // the 'void' type plane. 00414 output_vbr(unsigned(E-I)); 00415 output_typeid(Type::VoidTyID); 00416 00417 // Emit all of the strings. 00418 for (I = Table.string_begin(); I != E; ++I) { 00419 const ConstantArray *Str = *I; 00420 int Slot = Table.getSlot(Str->getType()); 00421 assert(Slot != -1 && "Constant string of unknown type?"); 00422 output_typeid((unsigned)Slot); 00423 00424 // Now that we emitted the type (which indicates the size of the string), 00425 // emit all of the characters. 00426 std::string Val = Str->getAsString(); 00427 output_data(Val.c_str(), Val.c_str()+Val.size()); 00428 } 00429 } 00430 00431 //===----------------------------------------------------------------------===// 00432 //=== Instruction Output ===// 00433 //===----------------------------------------------------------------------===// 00434 00435 // outputInstructionFormat0 - Output those weird instructions that have a large 00436 // number of operands or have large operands themselves. 00437 // 00438 // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>] 00439 // 00440 void BytecodeWriter::outputInstructionFormat0(const Instruction *I, 00441 unsigned Opcode, 00442 const SlotCalculator &Table, 00443 unsigned Type) { 00444 // Opcode must have top two bits clear... 00445 output_vbr(Opcode << 2); // Instruction Opcode ID 00446 output_typeid(Type); // Result type 00447 00448 unsigned NumArgs = I->getNumOperands(); 00449 output_vbr(NumArgs + (isa<CastInst>(I) || 00450 isa<VAArgInst>(I) || Opcode == 56 || Opcode == 58)); 00451 00452 if (!isa<GetElementPtrInst>(&I)) { 00453 for (unsigned i = 0; i < NumArgs; ++i) { 00454 int Slot = Table.getSlot(I->getOperand(i)); 00455 assert(Slot >= 0 && "No slot number for value!?!?"); 00456 output_vbr((unsigned)Slot); 00457 } 00458 00459 if (isa<CastInst>(I) || isa<VAArgInst>(I)) { 00460 int Slot = Table.getSlot(I->getType()); 00461 assert(Slot != -1 && "Cast return type unknown?"); 00462 output_typeid((unsigned)Slot); 00463 } else if (Opcode == 56) { // Invoke escape sequence 00464 output_vbr(cast<InvokeInst>(I)->getCallingConv()); 00465 } else if (Opcode == 58) { // Call escape sequence 00466 output_vbr((cast<CallInst>(I)->getCallingConv() << 1) | 00467 unsigned(cast<CallInst>(I)->isTailCall())); 00468 } 00469 } else { 00470 int Slot = Table.getSlot(I->getOperand(0)); 00471 assert(Slot >= 0 && "No slot number for value!?!?"); 00472 output_vbr(unsigned(Slot)); 00473 00474 // We need to encode the type of sequential type indices into their slot # 00475 unsigned Idx = 1; 00476 for (gep_type_iterator TI = gep_type_begin(I), E = gep_type_end(I); 00477 Idx != NumArgs; ++TI, ++Idx) { 00478 Slot = Table.getSlot(I->getOperand(Idx)); 00479 assert(Slot >= 0 && "No slot number for value!?!?"); 00480 00481 if (isa<SequentialType>(*TI)) { 00482 unsigned IdxId; 00483 switch (I->getOperand(Idx)->getType()->getTypeID()) { 00484 default: assert(0 && "Unknown index type!"); 00485 case Type::UIntTyID: IdxId = 0; break; 00486 case Type::IntTyID: IdxId = 1; break; 00487 case Type::ULongTyID: IdxId = 2; break; 00488 case Type::LongTyID: IdxId = 3; break; 00489 } 00490 Slot = (Slot << 2) | IdxId; 00491 } 00492 output_vbr(unsigned(Slot)); 00493 } 00494 } 00495 } 00496 00497 00498 // outputInstrVarArgsCall - Output the absurdly annoying varargs function calls. 00499 // This are more annoying than most because the signature of the call does not 00500 // tell us anything about the types of the arguments in the varargs portion. 00501 // Because of this, we encode (as type 0) all of the argument types explicitly 00502 // before the argument value. This really sucks, but you shouldn't be using 00503 // varargs functions in your code! *death to printf*! 00504 // 00505 // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>] 00506 // 00507 void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I, 00508 unsigned Opcode, 00509 const SlotCalculator &Table, 00510 unsigned Type) { 00511 assert(isa<CallInst>(I) || isa<InvokeInst>(I)); 00512 // Opcode must have top two bits clear... 00513 output_vbr(Opcode << 2); // Instruction Opcode ID 00514 output_typeid(Type); // Result type (varargs type) 00515 00516 const PointerType *PTy = cast<PointerType>(I->getOperand(0)->getType()); 00517 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 00518 unsigned NumParams = FTy->getNumParams(); 00519 00520 unsigned NumFixedOperands; 00521 if (isa<CallInst>(I)) { 00522 // Output an operand for the callee and each fixed argument, then two for 00523 // each variable argument. 00524 NumFixedOperands = 1+NumParams; 00525 } else { 00526 assert(isa<InvokeInst>(I) && "Not call or invoke??"); 00527 // Output an operand for the callee and destinations, then two for each 00528 // variable argument. 00529 NumFixedOperands = 3+NumParams; 00530 } 00531 output_vbr(2 * I->getNumOperands()-NumFixedOperands + 00532 unsigned(Opcode == 56 || Opcode == 58)); 00533 00534 // The type for the function has already been emitted in the type field of the 00535 // instruction. Just emit the slot # now. 00536 for (unsigned i = 0; i != NumFixedOperands; ++i) { 00537 int Slot = Table.getSlot(I->getOperand(i)); 00538 assert(Slot >= 0 && "No slot number for value!?!?"); 00539 output_vbr((unsigned)Slot); 00540 } 00541 00542 for (unsigned i = NumFixedOperands, e = I->getNumOperands(); i != e; ++i) { 00543 // Output Arg Type ID 00544 int Slot = Table.getSlot(I->getOperand(i)->getType()); 00545 assert(Slot >= 0 && "No slot number for value!?!?"); 00546 output_typeid((unsigned)Slot); 00547 00548 // Output arg ID itself 00549 Slot = Table.getSlot(I->getOperand(i)); 00550 assert(Slot >= 0 && "No slot number for value!?!?"); 00551 output_vbr((unsigned)Slot); 00552 } 00553 00554 // If this is the escape sequence for call, emit the tailcall/cc info. 00555 if (Opcode == 58) { 00556 const CallInst *CI = cast<CallInst>(I); 00557 output_vbr((CI->getCallingConv() << 1) | unsigned(CI->isTailCall())); 00558 } else if (Opcode == 56) { // Invoke escape sequence. 00559 output_vbr(cast<InvokeInst>(I)->getCallingConv()); 00560 } 00561 } 00562 00563 00564 // outputInstructionFormat1 - Output one operand instructions, knowing that no 00565 // operand index is >= 2^12. 00566 // 00567 inline void BytecodeWriter::outputInstructionFormat1(const Instruction *I, 00568 unsigned Opcode, 00569 unsigned *Slots, 00570 unsigned Type) { 00571 // bits Instruction format: 00572 // -------------------------- 00573 // 01-00: Opcode type, fixed to 1. 00574 // 07-02: Opcode 00575 // 19-08: Resulting type plane 00576 // 31-20: Operand #1 (if set to (2^12-1), then zero operands) 00577 // 00578 output(1 | (Opcode << 2) | (Type << 8) | (Slots[0] << 20)); 00579 } 00580 00581 00582 // outputInstructionFormat2 - Output two operand instructions, knowing that no 00583 // operand index is >= 2^8. 00584 // 00585 inline void BytecodeWriter::outputInstructionFormat2(const Instruction *I, 00586 unsigned Opcode, 00587 unsigned *Slots, 00588 unsigned Type) { 00589 // bits Instruction format: 00590 // -------------------------- 00591 // 01-00: Opcode type, fixed to 2. 00592 // 07-02: Opcode 00593 // 15-08: Resulting type plane 00594 // 23-16: Operand #1 00595 // 31-24: Operand #2 00596 // 00597 output(2 | (Opcode << 2) | (Type << 8) | (Slots[0] << 16) | (Slots[1] << 24)); 00598 } 00599 00600 00601 // outputInstructionFormat3 - Output three operand instructions, knowing that no 00602 // operand index is >= 2^6. 00603 // 00604 inline void BytecodeWriter::outputInstructionFormat3(const Instruction *I, 00605 unsigned Opcode, 00606 unsigned *Slots, 00607 unsigned Type) { 00608 // bits Instruction format: 00609 // -------------------------- 00610 // 01-00: Opcode type, fixed to 3. 00611 // 07-02: Opcode 00612 // 13-08: Resulting type plane 00613 // 19-14: Operand #1 00614 // 25-20: Operand #2 00615 // 31-26: Operand #3 00616 // 00617 output(3 | (Opcode << 2) | (Type << 8) | 00618 (Slots[0] << 14) | (Slots[1] << 20) | (Slots[2] << 26)); 00619 } 00620 00621 void BytecodeWriter::outputInstruction(const Instruction &I) { 00622 assert(I.getOpcode() < 56 && "Opcode too big???"); 00623 unsigned Opcode = I.getOpcode(); 00624 unsigned NumOperands = I.getNumOperands(); 00625 00626 // Encode 'tail call' as 61, 'volatile load' as 62, and 'volatile store' as 00627 // 63. 00628 if (const CallInst *CI = dyn_cast<CallInst>(&I)) { 00629 if (CI->getCallingConv() == CallingConv::C) { 00630 if (CI->isTailCall()) 00631 Opcode = 61; // CCC + Tail Call 00632 else 00633 ; // Opcode = Instruction::Call 00634 } else if (CI->getCallingConv() == CallingConv::Fast) { 00635 if (CI->isTailCall()) 00636 Opcode = 59; // FastCC + TailCall 00637 else 00638 Opcode = 60; // FastCC + Not Tail Call 00639 } else { 00640 Opcode = 58; // Call escape sequence. 00641 } 00642 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 00643 if (II->getCallingConv() == CallingConv::Fast) 00644 Opcode = 57; // FastCC invoke. 00645 else if (II->getCallingConv() != CallingConv::C) 00646 Opcode = 56; // Invoke escape sequence. 00647 00648 } else if (isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) { 00649 Opcode = 62; 00650 } else if (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) { 00651 Opcode = 63; 00652 } 00653 00654 // Figure out which type to encode with the instruction. Typically we want 00655 // the type of the first parameter, as opposed to the type of the instruction 00656 // (for example, with setcc, we always know it returns bool, but the type of 00657 // the first param is actually interesting). But if we have no arguments 00658 // we take the type of the instruction itself. 00659 // 00660 const Type *Ty; 00661 switch (I.getOpcode()) { 00662 case Instruction::Select: 00663 case Instruction::Malloc: 00664 case Instruction::Alloca: 00665 Ty = I.getType(); // These ALWAYS want to encode the return type 00666 break; 00667 case Instruction::Store: 00668 Ty = I.getOperand(1)->getType(); // Encode the pointer type... 00669 assert(isa<PointerType>(Ty) && "Store to nonpointer type!?!?"); 00670 break; 00671 default: // Otherwise use the default behavior... 00672 Ty = NumOperands ? I.getOperand(0)->getType() : I.getType(); 00673 break; 00674 } 00675 00676 unsigned Type; 00677 int Slot = Table.getSlot(Ty); 00678 assert(Slot != -1 && "Type not available!!?!"); 00679 Type = (unsigned)Slot; 00680 00681 // Varargs calls and invokes are encoded entirely different from any other 00682 // instructions. 00683 if (const CallInst *CI = dyn_cast<CallInst>(&I)){ 00684 const PointerType *Ty =cast<PointerType>(CI->getCalledValue()->getType()); 00685 if (cast<FunctionType>(Ty->getElementType())->isVarArg()) { 00686 outputInstrVarArgsCall(CI, Opcode, Table, Type); 00687 return; 00688 } 00689 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 00690 const PointerType *Ty =cast<PointerType>(II->getCalledValue()->getType()); 00691 if (cast<FunctionType>(Ty->getElementType())->isVarArg()) { 00692 outputInstrVarArgsCall(II, Opcode, Table, Type); 00693 return; 00694 } 00695 } 00696 00697 if (NumOperands <= 3) { 00698 // Make sure that we take the type number into consideration. We don't want 00699 // to overflow the field size for the instruction format we select. 00700 // 00701 unsigned MaxOpSlot = Type; 00702 unsigned Slots[3]; Slots[0] = (1 << 12)-1; // Marker to signify 0 operands 00703 00704 for (unsigned i = 0; i != NumOperands; ++i) { 00705 int slot = Table.getSlot(I.getOperand(i)); 00706 assert(slot != -1 && "Broken bytecode!"); 00707 if (unsigned(slot) > MaxOpSlot) MaxOpSlot = unsigned(slot); 00708 Slots[i] = unsigned(slot); 00709 } 00710 00711 // Handle the special cases for various instructions... 00712 if (isa<CastInst>(I) || isa<VAArgInst>(I)) { 00713 // Cast has to encode the destination type as the second argument in the 00714 // packet, or else we won't know what type to cast to! 00715 Slots[1] = Table.getSlot(I.getType()); 00716 assert(Slots[1] != ~0U && "Cast return type unknown?"); 00717 if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; 00718 NumOperands++; 00719 } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(&I)) { 00720 assert(NumOperands == 1 && "Bogus allocation!"); 00721 if (AI->getAlignment()) { 00722 Slots[1] = Log2_32(AI->getAlignment())+1; 00723 if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; 00724 NumOperands = 2; 00725 } 00726 } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { 00727 // We need to encode the type of sequential type indices into their slot # 00728 unsigned Idx = 1; 00729 for (gep_type_iterator I = gep_type_begin(GEP), E = gep_type_end(GEP); 00730 I != E; ++I, ++Idx) 00731 if (isa<SequentialType>(*I)) { 00732 unsigned IdxId; 00733 switch (GEP->getOperand(Idx)->getType()->getTypeID()) { 00734 default: assert(0 && "Unknown index type!"); 00735 case Type::UIntTyID: IdxId = 0; break; 00736 case Type::IntTyID: IdxId = 1; break; 00737 case Type::ULongTyID: IdxId = 2; break; 00738 case Type::LongTyID: IdxId = 3; break; 00739 } 00740 Slots[Idx] = (Slots[Idx] << 2) | IdxId; 00741 if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx]; 00742 } 00743 } else if (Opcode == 58) { 00744 // If this is the escape sequence for call, emit the tailcall/cc info. 00745 const CallInst &CI = cast<CallInst>(I); 00746 ++NumOperands; 00747 if (NumOperands <= 3) { 00748 Slots[NumOperands-1] = 00749 (CI.getCallingConv() << 1)|unsigned(CI.isTailCall()); 00750 if (Slots[NumOperands-1] > MaxOpSlot) 00751 MaxOpSlot = Slots[NumOperands-1]; 00752 } 00753 } else if (Opcode == 56) { 00754 // Invoke escape seq has at least 4 operands to encode. 00755 ++NumOperands; 00756 } 00757 00758 // Decide which instruction encoding to use. This is determined primarily 00759 // by the number of operands, and secondarily by whether or not the max 00760 // operand will fit into the instruction encoding. More operands == fewer 00761 // bits per operand. 00762 // 00763 switch (NumOperands) { 00764 case 0: 00765 case 1: 00766 if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops 00767 outputInstructionFormat1(&I, Opcode, Slots, Type); 00768 return; 00769 } 00770 break; 00771 00772 case 2: 00773 if (MaxOpSlot < (1 << 8)) { 00774 outputInstructionFormat2(&I, Opcode, Slots, Type); 00775 return; 00776 } 00777 break; 00778 00779 case 3: 00780 if (MaxOpSlot < (1 << 6)) { 00781 outputInstructionFormat3(&I, Opcode, Slots, Type); 00782 return; 00783 } 00784 break; 00785 default: 00786 break; 00787 } 00788 } 00789 00790 // If we weren't handled before here, we either have a large number of 00791 // operands or a large operand index that we are referring to. 00792 outputInstructionFormat0(&I, Opcode, Table, Type); 00793 } 00794 00795 //===----------------------------------------------------------------------===// 00796 //=== Block Output ===// 00797 //===----------------------------------------------------------------------===// 00798 00799 BytecodeWriter::BytecodeWriter(std::vector<unsigned char> &o, const Module *M) 00800 : Out(o), Table(M) { 00801 00802 // Emit the signature... 00803 static const unsigned char *Sig = (const unsigned char*)"llvm"; 00804 output_data(Sig, Sig+4); 00805 00806 // Emit the top level CLASS block. 00807 BytecodeBlock ModuleBlock(BytecodeFormat::ModuleBlockID, *this, false, true); 00808 00809 bool isBigEndian = M->getEndianness() == Module::BigEndian; 00810 bool hasLongPointers = M->getPointerSize() == Module::Pointer64; 00811 bool hasNoEndianness = M->getEndianness() == Module::AnyEndianness; 00812 bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize; 00813 00814 // Output the version identifier and other information. 00815 unsigned Version = (BCVersionNum << 4) | 00816 (unsigned)isBigEndian | (hasLongPointers << 1) | 00817 (hasNoEndianness << 2) | 00818 (hasNoPointerSize << 3); 00819 output_vbr(Version); 00820 00821 // The Global type plane comes first 00822 { 00823 BytecodeBlock CPool(BytecodeFormat::GlobalTypePlaneBlockID, *this ); 00824 outputTypes(Type::FirstDerivedTyID); 00825 } 00826 00827 // The ModuleInfoBlock follows directly after the type information 00828 outputModuleInfoBlock(M); 00829 00830 // Output module level constants, used for global variable initializers 00831 outputConstants(false); 00832 00833 // Do the whole module now! Process each function at a time... 00834 for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) 00835 outputFunction(I); 00836 00837 // If needed, output the symbol table for the module... 00838 outputSymbolTable(M->getSymbolTable()); 00839 } 00840 00841 void BytecodeWriter::outputTypes(unsigned TypeNum) { 00842 // Write the type plane for types first because earlier planes (e.g. for a 00843 // primitive type like float) may have constants constructed using types 00844 // coming later (e.g., via getelementptr from a pointer type). The type 00845 // plane is needed before types can be fwd or bkwd referenced. 00846 const std::vector<const Type*>& Types = Table.getTypes(); 00847 assert(!Types.empty() && "No types at all?"); 00848 assert(TypeNum <= Types.size() && "Invalid TypeNo index"); 00849 00850 unsigned NumEntries = Types.size() - TypeNum; 00851 00852 // Output type header: [num entries] 00853 output_vbr(NumEntries); 00854 00855 for (unsigned i = TypeNum; i < TypeNum+NumEntries; ++i) 00856 outputType(Types[i]); 00857 } 00858 00859 // Helper function for outputConstants(). 00860 // Writes out all the constants in the plane Plane starting at entry StartNo. 00861 // 00862 void BytecodeWriter::outputConstantsInPlane(const std::vector<const Value*> 00863 &Plane, unsigned StartNo) { 00864 unsigned ValNo = StartNo; 00865 00866 // Scan through and ignore function arguments, global values, and constant 00867 // strings. 00868 for (; ValNo < Plane.size() && 00869 (isa<Argument>(Plane[ValNo]) || isa<GlobalValue>(Plane[ValNo]) || 00870 (isa<ConstantArray>(Plane[ValNo]) && 00871 cast<ConstantArray>(Plane[ValNo])->isString())); ValNo++) 00872 /*empty*/; 00873 00874 unsigned NC = ValNo; // Number of constants 00875 for (; NC < Plane.size() && (isa<Constant>(Plane[NC]) || 00876 isa<InlineAsm>(Plane[NC])); NC++) 00877 /*empty*/; 00878 NC -= ValNo; // Convert from index into count 00879 if (NC == 0) return; // Skip empty type planes... 00880 00881 // FIXME: Most slabs only have 1 or 2 entries! We should encode this much 00882 // more compactly. 00883 00884 // Output type header: [num entries][type id number] 00885 // 00886 output_vbr(NC); 00887 00888 // Output the Type ID Number... 00889 int Slot = Table.getSlot(Plane.front()->getType()); 00890 assert (Slot != -1 && "Type in constant pool but not in function!!"); 00891 output_typeid((unsigned)Slot); 00892 00893 for (unsigned i = ValNo; i < ValNo+NC; ++i) { 00894 const Value *V = Plane[i]; 00895 if (const Constant *C = dyn_cast<Constant>(V)) 00896 outputConstant(C); 00897 else 00898 outputInlineAsm(cast<InlineAsm>(V)); 00899 } 00900 } 00901 00902 static inline bool hasNullValue(const Type *Ty) { 00903 return Ty != Type::LabelTy && Ty != Type::VoidTy && !isa<OpaqueType>(Ty); 00904 } 00905 00906 void BytecodeWriter::outputConstants(bool isFunction) { 00907 BytecodeBlock CPool(BytecodeFormat::ConstantPoolBlockID, *this, 00908 true /* Elide block if empty */); 00909 00910 unsigned NumPlanes = Table.getNumPlanes(); 00911 00912 if (isFunction) 00913 // Output the type plane before any constants! 00914 outputTypes(Table.getModuleTypeLevel()); 00915 else 00916 // Output module-level string constants before any other constants. 00917 outputConstantStrings(); 00918 00919 for (unsigned pno = 0; pno != NumPlanes; pno++) { 00920 const std::vector<const Value*> &Plane = Table.getPlane(pno); 00921 if (!Plane.empty()) { // Skip empty type planes... 00922 unsigned ValNo = 0; 00923 if (isFunction) // Don't re-emit module constants 00924 ValNo += Table.getModuleLevel(pno); 00925 00926 if (hasNullValue(Plane[0]->getType())) { 00927 // Skip zero initializer 00928 if (ValNo == 0) 00929 ValNo = 1; 00930 } 00931 00932 // Write out constants in the plane 00933 outputConstantsInPlane(Plane, ValNo); 00934 } 00935 } 00936 } 00937 00938 static unsigned getEncodedLinkage(const GlobalValue *GV) { 00939 switch (GV->getLinkage()) { 00940 default: assert(0 && "Invalid linkage!"); 00941 case GlobalValue::ExternalLinkage: return 0; 00942 case GlobalValue::WeakLinkage: return 1; 00943 case GlobalValue::AppendingLinkage: return 2; 00944 case GlobalValue::InternalLinkage: return 3; 00945 case GlobalValue::LinkOnceLinkage: return 4; 00946 } 00947 } 00948 00949 void BytecodeWriter::outputModuleInfoBlock(const Module *M) { 00950 BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfoBlockID, *this); 00951 00952 // Give numbers to sections as we encounter them. 00953 unsigned SectionIDCounter = 0; 00954 std::vector<std::string> SectionNames; 00955 std::map<std::string, unsigned> SectionID; 00956 00957 // Output the types for the global variables in the module... 00958 for (Module::const_global_iterator I = M->global_begin(), 00959 End = M->global_end(); I != End; ++I) { 00960 int Slot = Table.getSlot(I->getType()); 00961 assert(Slot != -1 && "Module global vars is broken!"); 00962 00963 assert((I->hasInitializer() || !I->hasInternalLinkage()) && 00964 "Global must have an initializer or have external linkage!"); 00965 00966 // Fields: bit0 = isConstant, bit1 = hasInitializer, bit2-4=Linkage, 00967 // bit5+ = Slot # for type. 00968 bool HasExtensionWord = (I->getAlignment() != 0) || I->hasSection(); 00969 00970 // If we need to use the extension byte, set linkage=3(internal) and 00971 // initializer = 0 (impossible!). 00972 if (!HasExtensionWord) { 00973 unsigned oSlot = ((unsigned)Slot << 5) | (getEncodedLinkage(I) << 2) | 00974 (I->hasInitializer() << 1) | (unsigned)I->isConstant(); 00975 output_vbr(oSlot); 00976 } else { 00977 unsigned oSlot = ((unsigned)Slot << 5) | (3 << 2) | 00978 (0 << 1) | (unsigned)I->isConstant(); 00979 output_vbr(oSlot); 00980 00981 // The extension word has this format: bit 0 = has initializer, bit 1-3 = 00982 // linkage, bit 4-8 = alignment (log2), bit 9 = has SectionID, 00983 // bits 10+ = future use. 00984 unsigned ExtWord = (unsigned)I->hasInitializer() | 00985 (getEncodedLinkage(I) << 1) | 00986 ((Log2_32(I->getAlignment())+1) << 4) | 00987 ((unsigned)I->hasSection() << 9); 00988 output_vbr(ExtWord); 00989 if (I->hasSection()) { 00990 // Give section names unique ID's. 00991 unsigned &Entry = SectionID[I->getSection()]; 00992 if (Entry == 0) { 00993 Entry = ++SectionIDCounter; 00994 SectionNames.push_back(I->getSection()); 00995 } 00996 output_vbr(Entry); 00997 } 00998 } 00999 01000 // If we have an initializer, output it now. 01001 if (I->hasInitializer()) { 01002 Slot = Table.getSlot((Value*)I->getInitializer()); 01003 assert(Slot != -1 && "No slot for global var initializer!"); 01004 output_vbr((unsigned)Slot); 01005 } 01006 } 01007 output_typeid((unsigned)Table.getSlot(Type::VoidTy)); 01008 01009 // Output the types of the functions in this module. 01010 for (Module::const_iterator I = M->begin(), End = M->end(); I != End; ++I) { 01011 int Slot = Table.getSlot(I->getType()); 01012 assert(Slot != -1 && "Module slot calculator is broken!"); 01013 assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!"); 01014 assert(((Slot << 6) >> 6) == Slot && "Slot # too big!"); 01015 unsigned CC = I->getCallingConv()+1; 01016 unsigned ID = (Slot << 5) | (CC & 15); 01017 01018 if (I->isExternal()) // If external, we don't have an FunctionInfo block. 01019 ID |= 1 << 4; 01020 01021 if (I->getAlignment() || I->hasSection() || (CC & ~15) != 0) 01022 ID |= 1 << 31; // Do we need an extension word? 01023 01024 output_vbr(ID); 01025 01026 if (ID & (1 << 31)) { 01027 // Extension byte: bits 0-4 = alignment, bits 5-9 = top nibble of calling 01028 // convention, bit 10 = hasSectionID. 01029 ID = (Log2_32(I->getAlignment())+1) | ((CC >> 4) << 5) | 01030 (I->hasSection() << 10); 01031 output_vbr(ID); 01032 01033 // Give section names unique ID's. 01034 if (I->hasSection()) { 01035 unsigned &Entry = SectionID[I->getSection()]; 01036 if (Entry == 0) { 01037 Entry = ++SectionIDCounter; 01038 SectionNames.push_back(I->getSection()); 01039 } 01040 output_vbr(Entry); 01041 } 01042 } 01043 } 01044 output_vbr((unsigned)Table.getSlot(Type::VoidTy) << 5); 01045 01046 // Emit the list of dependent libraries for the Module. 01047 Module::lib_iterator LI = M->lib_begin(); 01048 Module::lib_iterator LE = M->lib_end(); 01049 output_vbr(unsigned(LE - LI)); // Emit the number of dependent libraries. 01050 for (; LI != LE; ++LI) 01051 output(*LI); 01052 01053 // Output the target triple from the module 01054 output(M->getTargetTriple()); 01055 01056 // Emit the table of section names. 01057 output_vbr((unsigned)SectionNames.size()); 01058 for (unsigned i = 0, e = SectionNames.size(); i != e; ++i) 01059 output(SectionNames[i]); 01060 01061 // Output the inline asm string. 01062 output(M->getModuleInlineAsm()); 01063 } 01064 01065 void BytecodeWriter::outputInstructions(const Function *F) { 01066 BytecodeBlock ILBlock(BytecodeFormat::InstructionListBlockID, *this); 01067 for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 01068 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) 01069 outputInstruction(*I); 01070 } 01071 01072 void BytecodeWriter::outputFunction(const Function *F) { 01073 // If this is an external function, there is nothing else to emit! 01074 if (F->isExternal()) return; 01075 01076 BytecodeBlock FunctionBlock(BytecodeFormat::FunctionBlockID, *this); 01077 output_vbr(getEncodedLinkage(F)); 01078 01079 // Get slot information about the function... 01080 Table.incorporateFunction(F); 01081 01082 if (Table.getCompactionTable().empty()) { 01083 // Output information about the constants in the function if the compaction 01084 // table is not being used. 01085 outputConstants(true); 01086 } else { 01087 // Otherwise, emit the compaction table. 01088 outputCompactionTable(); 01089 } 01090 01091 // Output all of the instructions in the body of the function 01092 outputInstructions(F); 01093 01094 // If needed, output the symbol table for the function... 01095 outputSymbolTable(F->getSymbolTable()); 01096 01097 Table.purgeFunction(); 01098 } 01099 01100 void BytecodeWriter::outputCompactionTablePlane(unsigned PlaneNo, 01101 const std::vector<const Value*> &Plane, 01102 unsigned StartNo) { 01103 unsigned End = Table.getModuleLevel(PlaneNo); 01104 if (Plane.empty() || StartNo == End || End == 0) return; // Nothing to emit 01105 assert(StartNo < End && "Cannot emit negative range!"); 01106 assert(StartNo < Plane.size() && End <= Plane.size()); 01107 01108 // Do not emit the null initializer! 01109 ++StartNo; 01110 01111 // Figure out which encoding to use. By far the most common case we have is 01112 // to emit 0-2 entries in a compaction table plane. 01113 switch (End-StartNo) { 01114 case 0: // Avoid emitting two vbr's if possible. 01115 case 1: 01116 case 2: 01117 output_vbr((PlaneNo << 2) | End-StartNo); 01118 break; 01119 default: 01120 // Output the number of things. 01121 output_vbr((unsigned(End-StartNo) << 2) | 3); 01122 output_typeid(PlaneNo); // Emit the type plane this is 01123 break; 01124 } 01125 01126 for (unsigned i = StartNo; i != End; ++i) 01127 output_vbr(Table.getGlobalSlot(Plane[i])); 01128 } 01129 01130 void BytecodeWriter::outputCompactionTypes(unsigned StartNo) { 01131 // Get the compaction type table from the slot calculator 01132 const std::vector<const Type*> &CTypes = Table.getCompactionTypes(); 01133 01134 // The compaction types may have been uncompactified back to the 01135 // global types. If so, we just write an empty table 01136 if (CTypes.size() == 0 ) { 01137 output_vbr(0U); 01138 return; 01139 } 01140 01141 assert(CTypes.size() >= StartNo && "Invalid compaction types start index"); 01142 01143 // Determine how many types to write 01144 unsigned NumTypes = CTypes.size() - StartNo; 01145 01146 // Output the number of types. 01147 output_vbr(NumTypes); 01148 01149 for (unsigned i = StartNo; i < StartNo+NumTypes; ++i) 01150 output_typeid(Table.getGlobalSlot(CTypes[i])); 01151 } 01152 01153 void BytecodeWriter::outputCompactionTable() { 01154 // Avoid writing the compaction table at all if there is no content. 01155 if (Table.getCompactionTypes().size() >= Type::FirstDerivedTyID || 01156 (!Table.CompactionTableIsEmpty())) { 01157 BytecodeBlock CTB(BytecodeFormat::CompactionTableBlockID, *this, 01158 true/*ElideIfEmpty*/); 01159 const std::vector<std::vector<const Value*> > &CT = 01160 Table.getCompactionTable(); 01161 01162 // First things first, emit the type compaction table if there is one. 01163 outputCompactionTypes(Type::FirstDerivedTyID); 01164 01165 for (unsigned i = 0, e = CT.size(); i != e; ++i) 01166 outputCompactionTablePlane(i, CT[i], 0); 01167 } 01168 } 01169 01170 void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { 01171 // Do not output the Bytecode block for an empty symbol table, it just wastes 01172 // space! 01173 if (MST.isEmpty()) return; 01174 01175 BytecodeBlock SymTabBlock(BytecodeFormat::SymbolTableBlockID, *this, 01176 true/*ElideIfEmpty*/); 01177 01178 // Write the number of types 01179 output_vbr(MST.num_types()); 01180 01181 // Write each of the types 01182 for (SymbolTable::type_const_iterator TI = MST.type_begin(), 01183 TE = MST.type_end(); TI != TE; ++TI ) { 01184 // Symtab entry:[def slot #][name] 01185 output_typeid((unsigned)Table.getSlot(TI->second)); 01186 output(TI->first); 01187 } 01188 01189 // Now do each of the type planes in order. 01190 for (SymbolTable::plane_const_iterator PI = MST.plane_begin(), 01191 PE = MST.plane_end(); PI != PE; ++PI) { 01192 SymbolTable::value_const_iterator I = MST.value_begin(PI->first); 01193 SymbolTable::value_const_iterator End = MST.value_end(PI->first); 01194 int Slot; 01195 01196 if (I == End) continue; // Don't mess with an absent type... 01197 01198 // Write the number of values in this plane 01199 output_vbr((unsigned)PI->second.size()); 01200 01201 // Write the slot number of the type for this plane 01202 Slot = Table.getSlot(PI->first); 01203 assert(Slot != -1 && "Type in symtab, but not in table!"); 01204 output_typeid((unsigned)Slot); 01205 01206 // Write each of the values in this plane 01207 for (; I != End; ++I) { 01208 // Symtab entry: [def slot #][name] 01209 Slot = Table.getSlot(I->second); 01210 assert(Slot != -1 && "Value in symtab but has no slot number!!"); 01211 output_vbr((unsigned)Slot); 01212 output(I->first); 01213 } 01214 } 01215 } 01216 01217 void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out, 01218 bool compress ) { 01219 assert(M && "You can't write a null module!!"); 01220 01221 // Make sure that std::cout is put into binary mode for systems 01222 // that care. 01223 if (&Out == std::cout) 01224 sys::Program::ChangeStdoutToBinary(); 01225 01226 // Create a vector of unsigned char for the bytecode output. We 01227 // reserve 256KBytes of space in the vector so that we avoid doing 01228 // lots of little allocations. 256KBytes is sufficient for a large 01229 // proportion of the bytecode files we will encounter. Larger files 01230 // will be automatically doubled in size as needed (std::vector 01231 // behavior). 01232 std::vector<unsigned char> Buffer; 01233 Buffer.reserve(256 * 1024); 01234 01235 // The BytecodeWriter populates Buffer for us. 01236 BytecodeWriter BCW(Buffer, M); 01237 01238 // Keep track of how much we've written 01239 BytesWritten += Buffer.size(); 01240 01241 // Determine start and end points of the Buffer 01242 const unsigned char *FirstByte = &Buffer.front(); 01243 01244 // If we're supposed to compress this mess ... 01245 if (compress) { 01246 01247 // We signal compression by using an alternate magic number for the 01248 // file. The compressed bytecode file's magic number is "llvc" instead 01249 // of "llvm". 01250 char compressed_magic[4]; 01251 compressed_magic[0] = 'l'; 01252 compressed_magic[1] = 'l'; 01253 compressed_magic[2] = 'v'; 01254 compressed_magic[3] = 'c'; 01255 01256 Out.write(compressed_magic,4); 01257 01258 // Compress everything after the magic number (which we altered) 01259 uint64_t zipSize = Compressor::compressToStream( 01260 (char*)(FirstByte+4), // Skip the magic number 01261 Buffer.size()-4, // Skip the magic number 01262 Out // Where to write compressed data 01263 ); 01264 01265 } else { 01266 01267 // We're not compressing, so just write the entire block. 01268 Out.write((char*)FirstByte, Buffer.size()); 01269 } 01270 01271 // make sure it hits disk now 01272 Out.flush(); 01273 } 01274