LLVM API Documentation
00001 //===-- Writer.cpp - Library for writing LLVM bytecode files --------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This library implements the functionality defined in llvm/Bytecode/Writer.h 00011 // 00012 // Note that this file uses an unusual technique of outputting all the bytecode 00013 // to a vector of unsigned char, then copies the vector to an ostream. The 00014 // reason for this is that we must do "seeking" in the stream to do back- 00015 // patching, and some very important ostreams that we want to support (like 00016 // pipes) do not support seeking. :( :( :( 00017 // 00018 //===----------------------------------------------------------------------===// 00019 00020 #include "WriterInternals.h" 00021 #include "llvm/Bytecode/WriteBytecodePass.h" 00022 #include "llvm/Constants.h" 00023 #include "llvm/DerivedTypes.h" 00024 #include "llvm/Instructions.h" 00025 #include "llvm/Module.h" 00026 #include "llvm/SymbolTable.h" 00027 #include "llvm/Support/GetElementPtrTypeIterator.h" 00028 #include "llvm/Support/Compressor.h" 00029 #include "llvm/ADT/STLExtras.h" 00030 #include "llvm/ADT/Statistic.h" 00031 #include <cstring> 00032 #include <algorithm> 00033 using namespace llvm; 00034 00035 /// This value needs to be incremented every time the bytecode format changes 00036 /// so that the reader can distinguish which format of the bytecode file has 00037 /// been written. 00038 /// @brief The bytecode version number 00039 const unsigned BCVersionNum = 5; 00040 00041 static RegisterPass<WriteBytecodePass> X("emitbytecode", "Bytecode Writer"); 00042 00043 static Statistic<> 00044 BytesWritten("bytecodewriter", "Number of bytecode bytes written"); 00045 00046 //===----------------------------------------------------------------------===// 00047 //=== Output Primitives ===// 00048 //===----------------------------------------------------------------------===// 00049 00050 // output - If a position is specified, it must be in the valid portion of the 00051 // string... note that this should be inlined always so only the relevant IF 00052 // body should be included. 00053 inline void BytecodeWriter::output(unsigned i, int pos) { 00054 if (pos == -1) { // Be endian clean, little endian is our friend 00055 Out.push_back((unsigned char)i); 00056 Out.push_back((unsigned char)(i >> 8)); 00057 Out.push_back((unsigned char)(i >> 16)); 00058 Out.push_back((unsigned char)(i >> 24)); 00059 } else { 00060 Out[pos ] = (unsigned char)i; 00061 Out[pos+1] = (unsigned char)(i >> 8); 00062 Out[pos+2] = (unsigned char)(i >> 16); 00063 Out[pos+3] = (unsigned char)(i >> 24); 00064 } 00065 } 00066 00067 inline void BytecodeWriter::output(int i) { 00068 output((unsigned)i); 00069 } 00070 00071 /// output_vbr - Output an unsigned value, by using the least number of bytes 00072 /// possible. This is useful because many of our "infinite" values are really 00073 /// very small most of the time; but can be large a few times. 00074 /// Data format used: If you read a byte with the high bit set, use the low 00075 /// seven bits as data and then read another byte. 00076 inline void BytecodeWriter::output_vbr(uint64_t i) { 00077 while (1) { 00078 if (i < 0x80) { // done? 00079 Out.push_back((unsigned char)i); // We know the high bit is clear... 00080 return; 00081 } 00082 00083 // Nope, we are bigger than a character, output the next 7 bits and set the 00084 // high bit to say that there is more coming... 00085 Out.push_back(0x80 | ((unsigned char)i & 0x7F)); 00086 i >>= 7; // Shift out 7 bits now... 00087 } 00088 } 00089 00090 inline void BytecodeWriter::output_vbr(unsigned i) { 00091 while (1) { 00092 if (i < 0x80) { // done? 00093 Out.push_back((unsigned char)i); // We know the high bit is clear... 00094 return; 00095 } 00096 00097 // Nope, we are bigger than a character, output the next 7 bits and set the 00098 // high bit to say that there is more coming... 00099 Out.push_back(0x80 | ((unsigned char)i & 0x7F)); 00100 i >>= 7; // Shift out 7 bits now... 00101 } 00102 } 00103 00104 inline void BytecodeWriter::output_typeid(unsigned i) { 00105 if (i <= 0x00FFFFFF) 00106 this->output_vbr(i); 00107 else { 00108 this->output_vbr(0x00FFFFFF); 00109 this->output_vbr(i); 00110 } 00111 } 00112 00113 inline void BytecodeWriter::output_vbr(int64_t i) { 00114 if (i < 0) 00115 output_vbr(((uint64_t)(-i) << 1) | 1); // Set low order sign bit... 00116 else 00117 output_vbr((uint64_t)i << 1); // Low order bit is clear. 00118 } 00119 00120 00121 inline void BytecodeWriter::output_vbr(int i) { 00122 if (i < 0) 00123 output_vbr(((unsigned)(-i) << 1) | 1); // Set low order sign bit... 00124 else 00125 output_vbr((unsigned)i << 1); // Low order bit is clear. 00126 } 00127 00128 inline void BytecodeWriter::output(const std::string &s) { 00129 unsigned Len = s.length(); 00130 output_vbr(Len ); // Strings may have an arbitrary length... 00131 Out.insert(Out.end(), s.begin(), s.end()); 00132 } 00133 00134 inline void BytecodeWriter::output_data(const void *Ptr, const void *End) { 00135 Out.insert(Out.end(), (const unsigned char*)Ptr, (const unsigned char*)End); 00136 } 00137 00138 inline void BytecodeWriter::output_float(float& FloatVal) { 00139 /// FIXME: This isn't optimal, it has size problems on some platforms 00140 /// where FP is not IEEE. 00141 union { 00142 float f; 00143 uint32_t i; 00144 } FloatUnion; 00145 FloatUnion.f = FloatVal; 00146 Out.push_back( static_cast<unsigned char>( (FloatUnion.i & 0xFF ))); 00147 Out.push_back( static_cast<unsigned char>( (FloatUnion.i >> 8) & 0xFF)); 00148 Out.push_back( static_cast<unsigned char>( (FloatUnion.i >> 16) & 0xFF)); 00149 Out.push_back( static_cast<unsigned char>( (FloatUnion.i >> 24) & 0xFF)); 00150 } 00151 00152 inline void BytecodeWriter::output_double(double& DoubleVal) { 00153 /// FIXME: This isn't optimal, it has size problems on some platforms 00154 /// where FP is not IEEE. 00155 union { 00156 double d; 00157 uint64_t i; 00158 } DoubleUnion; 00159 DoubleUnion.d = DoubleVal; 00160 Out.push_back( static_cast<unsigned char>( (DoubleUnion.i & 0xFF ))); 00161 Out.push_back( static_cast<unsigned char>( (DoubleUnion.i >> 8) & 0xFF)); 00162 Out.push_back( static_cast<unsigned char>( (DoubleUnion.i >> 16) & 0xFF)); 00163 Out.push_back( static_cast<unsigned char>( (DoubleUnion.i >> 24) & 0xFF)); 00164 Out.push_back( static_cast<unsigned char>( (DoubleUnion.i >> 32) & 0xFF)); 00165 Out.push_back( static_cast<unsigned char>( (DoubleUnion.i >> 40) & 0xFF)); 00166 Out.push_back( static_cast<unsigned char>( (DoubleUnion.i >> 48) & 0xFF)); 00167 Out.push_back( static_cast<unsigned char>( (DoubleUnion.i >> 56) & 0xFF)); 00168 } 00169 00170 inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter& w, 00171 bool elideIfEmpty, bool hasLongFormat ) 00172 : Id(ID), Writer(w), ElideIfEmpty(elideIfEmpty), HasLongFormat(hasLongFormat){ 00173 00174 if (HasLongFormat) { 00175 w.output(ID); 00176 w.output(0U); // For length in long format 00177 } else { 00178 w.output(0U); /// Place holder for ID and length for this block 00179 } 00180 Loc = w.size(); 00181 } 00182 00183 inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block goes out 00184 // of scope... 00185 if (Loc == Writer.size() && ElideIfEmpty) { 00186 // If the block is empty, and we are allowed to, do not emit the block at 00187 // all! 00188 Writer.resize(Writer.size()-(HasLongFormat?8:4)); 00189 return; 00190 } 00191 00192 if (HasLongFormat) 00193 Writer.output(unsigned(Writer.size()-Loc), int(Loc-4)); 00194 else 00195 Writer.output(unsigned(Writer.size()-Loc) << 5 | (Id & 0x1F), int(Loc-4)); 00196 } 00197 00198 //===----------------------------------------------------------------------===// 00199 //=== Constant Output ===// 00200 //===----------------------------------------------------------------------===// 00201 00202 void BytecodeWriter::outputType(const Type *T) { 00203 output_vbr((unsigned)T->getTypeID()); 00204 00205 // That's all there is to handling primitive types... 00206 if (T->isPrimitiveType()) { 00207 return; // We might do this if we alias a prim type: %x = type int 00208 } 00209 00210 switch (T->getTypeID()) { // Handle derived types now. 00211 case Type::FunctionTyID: { 00212 const FunctionType *MT = cast<FunctionType>(T); 00213 int Slot = Table.getSlot(MT->getReturnType()); 00214 assert(Slot != -1 && "Type used but not available!!"); 00215 output_typeid((unsigned)Slot); 00216 00217 // Output the number of arguments to function (+1 if varargs): 00218 output_vbr((unsigned)MT->getNumParams()+MT->isVarArg()); 00219 00220 // Output all of the arguments... 00221 FunctionType::param_iterator I = MT->param_begin(); 00222 for (; I != MT->param_end(); ++I) { 00223 Slot = Table.getSlot(*I); 00224 assert(Slot != -1 && "Type used but not available!!"); 00225 output_typeid((unsigned)Slot); 00226 } 00227 00228 // Terminate list with VoidTy if we are a varargs function... 00229 if (MT->isVarArg()) 00230 output_typeid((unsigned)Type::VoidTyID); 00231 break; 00232 } 00233 00234 case Type::ArrayTyID: { 00235 const ArrayType *AT = cast<ArrayType>(T); 00236 int Slot = Table.getSlot(AT->getElementType()); 00237 assert(Slot != -1 && "Type used but not available!!"); 00238 output_typeid((unsigned)Slot); 00239 output_vbr(AT->getNumElements()); 00240 break; 00241 } 00242 00243 case Type::PackedTyID: { 00244 const PackedType *PT = cast<PackedType>(T); 00245 int Slot = Table.getSlot(PT->getElementType()); 00246 assert(Slot != -1 && "Type used but not available!!"); 00247 output_typeid((unsigned)Slot); 00248 output_vbr(PT->getNumElements()); 00249 break; 00250 } 00251 00252 00253 case Type::StructTyID: { 00254 const StructType *ST = cast<StructType>(T); 00255 00256 // Output all of the element types... 00257 for (StructType::element_iterator I = ST->element_begin(), 00258 E = ST->element_end(); I != E; ++I) { 00259 int Slot = Table.getSlot(*I); 00260 assert(Slot != -1 && "Type used but not available!!"); 00261 output_typeid((unsigned)Slot); 00262 } 00263 00264 // Terminate list with VoidTy 00265 output_typeid((unsigned)Type::VoidTyID); 00266 break; 00267 } 00268 00269 case Type::PointerTyID: { 00270 const PointerType *PT = cast<PointerType>(T); 00271 int Slot = Table.getSlot(PT->getElementType()); 00272 assert(Slot != -1 && "Type used but not available!!"); 00273 output_typeid((unsigned)Slot); 00274 break; 00275 } 00276 00277 case Type::OpaqueTyID: 00278 // No need to emit anything, just the count of opaque types is enough. 00279 break; 00280 00281 default: 00282 std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" 00283 << " Type '" << T->getDescription() << "'\n"; 00284 break; 00285 } 00286 } 00287 00288 void BytecodeWriter::outputConstant(const Constant *CPV) { 00289 assert((CPV->getType()->isPrimitiveType() || !CPV->isNullValue()) && 00290 "Shouldn't output null constants!"); 00291 00292 // We must check for a ConstantExpr before switching by type because 00293 // a ConstantExpr can be of any type, and has no explicit value. 00294 // 00295 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) { 00296 // FIXME: Encoding of constant exprs could be much more compact! 00297 assert(CE->getNumOperands() > 0 && "ConstantExpr with 0 operands"); 00298 assert(CE->getNumOperands() != 1 || CE->getOpcode() == Instruction::Cast); 00299 output_vbr(1+CE->getNumOperands()); // flags as an expr 00300 output_vbr(CE->getOpcode()); // flags as an expr 00301 00302 for (User::const_op_iterator OI = CE->op_begin(); OI != CE->op_end(); ++OI){ 00303 int Slot = Table.getSlot(*OI); 00304 assert(Slot != -1 && "Unknown constant used in ConstantExpr!!"); 00305 output_vbr((unsigned)Slot); 00306 Slot = Table.getSlot((*OI)->getType()); 00307 output_typeid((unsigned)Slot); 00308 } 00309 return; 00310 } else if (isa<UndefValue>(CPV)) { 00311 output_vbr(1U); // 1 -> UndefValue constant. 00312 return; 00313 } else { 00314 output_vbr(0U); // flag as not a ConstantExpr 00315 } 00316 00317 switch (CPV->getType()->getTypeID()) { 00318 case Type::BoolTyID: // Boolean Types 00319 if (cast<ConstantBool>(CPV)->getValue()) 00320 output_vbr(1U); 00321 else 00322 output_vbr(0U); 00323 break; 00324 00325 case Type::UByteTyID: // Unsigned integer types... 00326 case Type::UShortTyID: 00327 case Type::UIntTyID: 00328 case Type::ULongTyID: 00329 output_vbr(cast<ConstantUInt>(CPV)->getValue()); 00330 break; 00331 00332 case Type::SByteTyID: // Signed integer types... 00333 case Type::ShortTyID: 00334 case Type::IntTyID: 00335 case Type::LongTyID: 00336 output_vbr(cast<ConstantSInt>(CPV)->getValue()); 00337 break; 00338 00339 case Type::ArrayTyID: { 00340 const ConstantArray *CPA = cast<ConstantArray>(CPV); 00341 assert(!CPA->isString() && "Constant strings should be handled specially!"); 00342 00343 for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) { 00344 int Slot = Table.getSlot(CPA->getOperand(i)); 00345 assert(Slot != -1 && "Constant used but not available!!"); 00346 output_vbr((unsigned)Slot); 00347 } 00348 break; 00349 } 00350 00351 case Type::PackedTyID: { 00352 const ConstantPacked *CP = cast<ConstantPacked>(CPV); 00353 00354 for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) { 00355 int Slot = Table.getSlot(CP->getOperand(i)); 00356 assert(Slot != -1 && "Constant used but not available!!"); 00357 output_vbr((unsigned)Slot); 00358 } 00359 break; 00360 } 00361 00362 case Type::StructTyID: { 00363 const ConstantStruct *CPS = cast<ConstantStruct>(CPV); 00364 00365 for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) { 00366 int Slot = Table.getSlot(CPS->getOperand(i)); 00367 assert(Slot != -1 && "Constant used but not available!!"); 00368 output_vbr((unsigned)Slot); 00369 } 00370 break; 00371 } 00372 00373 case Type::PointerTyID: 00374 assert(0 && "No non-null, non-constant-expr constants allowed!"); 00375 abort(); 00376 00377 case Type::FloatTyID: { // Floating point types... 00378 float Tmp = (float)cast<ConstantFP>(CPV)->getValue(); 00379 output_float(Tmp); 00380 break; 00381 } 00382 case Type::DoubleTyID: { 00383 double Tmp = cast<ConstantFP>(CPV)->getValue(); 00384 output_double(Tmp); 00385 break; 00386 } 00387 00388 case Type::VoidTyID: 00389 case Type::LabelTyID: 00390 default: 00391 std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" 00392 << " type '" << *CPV->getType() << "'\n"; 00393 break; 00394 } 00395 return; 00396 } 00397 00398 void BytecodeWriter::outputConstantStrings() { 00399 SlotCalculator::string_iterator I = Table.string_begin(); 00400 SlotCalculator::string_iterator E = Table.string_end(); 00401 if (I == E) return; // No strings to emit 00402 00403 // If we have != 0 strings to emit, output them now. Strings are emitted into 00404 // the 'void' type plane. 00405 output_vbr(unsigned(E-I)); 00406 output_typeid(Type::VoidTyID); 00407 00408 // Emit all of the strings. 00409 for (I = Table.string_begin(); I != E; ++I) { 00410 const ConstantArray *Str = *I; 00411 int Slot = Table.getSlot(Str->getType()); 00412 assert(Slot != -1 && "Constant string of unknown type?"); 00413 output_typeid((unsigned)Slot); 00414 00415 // Now that we emitted the type (which indicates the size of the string), 00416 // emit all of the characters. 00417 std::string Val = Str->getAsString(); 00418 output_data(Val.c_str(), Val.c_str()+Val.size()); 00419 } 00420 } 00421 00422 //===----------------------------------------------------------------------===// 00423 //=== Instruction Output ===// 00424 //===----------------------------------------------------------------------===// 00425 typedef unsigned char uchar; 00426 00427 // outputInstructionFormat0 - Output those wierd instructions that have a large 00428 // number of operands or have large operands themselves... 00429 // 00430 // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>] 00431 // 00432 void BytecodeWriter::outputInstructionFormat0(const Instruction *I, 00433 unsigned Opcode, 00434 const SlotCalculator &Table, 00435 unsigned Type) { 00436 // Opcode must have top two bits clear... 00437 output_vbr(Opcode << 2); // Instruction Opcode ID 00438 output_typeid(Type); // Result type 00439 00440 unsigned NumArgs = I->getNumOperands(); 00441 output_vbr(NumArgs + (isa<CastInst>(I) || isa<VANextInst>(I) || 00442 isa<VAArgInst>(I))); 00443 00444 if (!isa<GetElementPtrInst>(&I)) { 00445 for (unsigned i = 0; i < NumArgs; ++i) { 00446 int Slot = Table.getSlot(I->getOperand(i)); 00447 assert(Slot >= 0 && "No slot number for value!?!?"); 00448 output_vbr((unsigned)Slot); 00449 } 00450 00451 if (isa<CastInst>(I) || isa<VAArgInst>(I)) { 00452 int Slot = Table.getSlot(I->getType()); 00453 assert(Slot != -1 && "Cast return type unknown?"); 00454 output_typeid((unsigned)Slot); 00455 } else if (const VANextInst *VAI = dyn_cast<VANextInst>(I)) { 00456 int Slot = Table.getSlot(VAI->getArgType()); 00457 assert(Slot != -1 && "VarArg argument type unknown?"); 00458 output_typeid((unsigned)Slot); 00459 } 00460 00461 } else { 00462 int Slot = Table.getSlot(I->getOperand(0)); 00463 assert(Slot >= 0 && "No slot number for value!?!?"); 00464 output_vbr(unsigned(Slot)); 00465 00466 // We need to encode the type of sequential type indices into their slot # 00467 unsigned Idx = 1; 00468 for (gep_type_iterator TI = gep_type_begin(I), E = gep_type_end(I); 00469 Idx != NumArgs; ++TI, ++Idx) { 00470 Slot = Table.getSlot(I->getOperand(Idx)); 00471 assert(Slot >= 0 && "No slot number for value!?!?"); 00472 00473 if (isa<SequentialType>(*TI)) { 00474 unsigned IdxId; 00475 switch (I->getOperand(Idx)->getType()->getTypeID()) { 00476 default: assert(0 && "Unknown index type!"); 00477 case Type::UIntTyID: IdxId = 0; break; 00478 case Type::IntTyID: IdxId = 1; break; 00479 case Type::ULongTyID: IdxId = 2; break; 00480 case Type::LongTyID: IdxId = 3; break; 00481 } 00482 Slot = (Slot << 2) | IdxId; 00483 } 00484 output_vbr(unsigned(Slot)); 00485 } 00486 } 00487 } 00488 00489 00490 // outputInstrVarArgsCall - Output the absurdly annoying varargs function calls. 00491 // This are more annoying than most because the signature of the call does not 00492 // tell us anything about the types of the arguments in the varargs portion. 00493 // Because of this, we encode (as type 0) all of the argument types explicitly 00494 // before the argument value. This really sucks, but you shouldn't be using 00495 // varargs functions in your code! *death to printf*! 00496 // 00497 // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>] 00498 // 00499 void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I, 00500 unsigned Opcode, 00501 const SlotCalculator &Table, 00502 unsigned Type) { 00503 assert(isa<CallInst>(I) || isa<InvokeInst>(I)); 00504 // Opcode must have top two bits clear... 00505 output_vbr(Opcode << 2); // Instruction Opcode ID 00506 output_typeid(Type); // Result type (varargs type) 00507 00508 const PointerType *PTy = cast<PointerType>(I->getOperand(0)->getType()); 00509 const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 00510 unsigned NumParams = FTy->getNumParams(); 00511 00512 unsigned NumFixedOperands; 00513 if (isa<CallInst>(I)) { 00514 // Output an operand for the callee and each fixed argument, then two for 00515 // each variable argument. 00516 NumFixedOperands = 1+NumParams; 00517 } else { 00518 assert(isa<InvokeInst>(I) && "Not call or invoke??"); 00519 // Output an operand for the callee and destinations, then two for each 00520 // variable argument. 00521 NumFixedOperands = 3+NumParams; 00522 } 00523 output_vbr(2 * I->getNumOperands()-NumFixedOperands); 00524 00525 // The type for the function has already been emitted in the type field of the 00526 // instruction. Just emit the slot # now. 00527 for (unsigned i = 0; i != NumFixedOperands; ++i) { 00528 int Slot = Table.getSlot(I->getOperand(i)); 00529 assert(Slot >= 0 && "No slot number for value!?!?"); 00530 output_vbr((unsigned)Slot); 00531 } 00532 00533 for (unsigned i = NumFixedOperands, e = I->getNumOperands(); i != e; ++i) { 00534 // Output Arg Type ID 00535 int Slot = Table.getSlot(I->getOperand(i)->getType()); 00536 assert(Slot >= 0 && "No slot number for value!?!?"); 00537 output_typeid((unsigned)Slot); 00538 00539 // Output arg ID itself 00540 Slot = Table.getSlot(I->getOperand(i)); 00541 assert(Slot >= 0 && "No slot number for value!?!?"); 00542 output_vbr((unsigned)Slot); 00543 } 00544 } 00545 00546 00547 // outputInstructionFormat1 - Output one operand instructions, knowing that no 00548 // operand index is >= 2^12. 00549 // 00550 inline void BytecodeWriter::outputInstructionFormat1(const Instruction *I, 00551 unsigned Opcode, 00552 unsigned *Slots, 00553 unsigned Type) { 00554 // bits Instruction format: 00555 // -------------------------- 00556 // 01-00: Opcode type, fixed to 1. 00557 // 07-02: Opcode 00558 // 19-08: Resulting type plane 00559 // 31-20: Operand #1 (if set to (2^12-1), then zero operands) 00560 // 00561 output(1 | (Opcode << 2) | (Type << 8) | (Slots[0] << 20)); 00562 } 00563 00564 00565 // outputInstructionFormat2 - Output two operand instructions, knowing that no 00566 // operand index is >= 2^8. 00567 // 00568 inline void BytecodeWriter::outputInstructionFormat2(const Instruction *I, 00569 unsigned Opcode, 00570 unsigned *Slots, 00571 unsigned Type) { 00572 // bits Instruction format: 00573 // -------------------------- 00574 // 01-00: Opcode type, fixed to 2. 00575 // 07-02: Opcode 00576 // 15-08: Resulting type plane 00577 // 23-16: Operand #1 00578 // 31-24: Operand #2 00579 // 00580 output(2 | (Opcode << 2) | (Type << 8) | (Slots[0] << 16) | (Slots[1] << 24)); 00581 } 00582 00583 00584 // outputInstructionFormat3 - Output three operand instructions, knowing that no 00585 // operand index is >= 2^6. 00586 // 00587 inline void BytecodeWriter::outputInstructionFormat3(const Instruction *I, 00588 unsigned Opcode, 00589 unsigned *Slots, 00590 unsigned Type) { 00591 // bits Instruction format: 00592 // -------------------------- 00593 // 01-00: Opcode type, fixed to 3. 00594 // 07-02: Opcode 00595 // 13-08: Resulting type plane 00596 // 19-14: Operand #1 00597 // 25-20: Operand #2 00598 // 31-26: Operand #3 00599 // 00600 output(3 | (Opcode << 2) | (Type << 8) | 00601 (Slots[0] << 14) | (Slots[1] << 20) | (Slots[2] << 26)); 00602 } 00603 00604 void BytecodeWriter::outputInstruction(const Instruction &I) { 00605 assert(I.getOpcode() < 62 && "Opcode too big???"); 00606 unsigned Opcode = I.getOpcode(); 00607 unsigned NumOperands = I.getNumOperands(); 00608 00609 // Encode 'volatile load' as 62 and 'volatile store' as 63. 00610 if (isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) 00611 Opcode = 62; 00612 if (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) 00613 Opcode = 63; 00614 00615 // Figure out which type to encode with the instruction. Typically we want 00616 // the type of the first parameter, as opposed to the type of the instruction 00617 // (for example, with setcc, we always know it returns bool, but the type of 00618 // the first param is actually interesting). But if we have no arguments 00619 // we take the type of the instruction itself. 00620 // 00621 const Type *Ty; 00622 switch (I.getOpcode()) { 00623 case Instruction::Select: 00624 case Instruction::Malloc: 00625 case Instruction::Alloca: 00626 Ty = I.getType(); // These ALWAYS want to encode the return type 00627 break; 00628 case Instruction::Store: 00629 Ty = I.getOperand(1)->getType(); // Encode the pointer type... 00630 assert(isa<PointerType>(Ty) && "Store to nonpointer type!?!?"); 00631 break; 00632 default: // Otherwise use the default behavior... 00633 Ty = NumOperands ? I.getOperand(0)->getType() : I.getType(); 00634 break; 00635 } 00636 00637 unsigned Type; 00638 int Slot = Table.getSlot(Ty); 00639 assert(Slot != -1 && "Type not available!!?!"); 00640 Type = (unsigned)Slot; 00641 00642 // Varargs calls and invokes are encoded entirely different from any other 00643 // instructions. 00644 if (const CallInst *CI = dyn_cast<CallInst>(&I)){ 00645 const PointerType *Ty =cast<PointerType>(CI->getCalledValue()->getType()); 00646 if (cast<FunctionType>(Ty->getElementType())->isVarArg()) { 00647 outputInstrVarArgsCall(CI, Opcode, Table, Type); 00648 return; 00649 } 00650 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 00651 const PointerType *Ty =cast<PointerType>(II->getCalledValue()->getType()); 00652 if (cast<FunctionType>(Ty->getElementType())->isVarArg()) { 00653 outputInstrVarArgsCall(II, Opcode, Table, Type); 00654 return; 00655 } 00656 } 00657 00658 if (NumOperands <= 3) { 00659 // Make sure that we take the type number into consideration. We don't want 00660 // to overflow the field size for the instruction format we select. 00661 // 00662 unsigned MaxOpSlot = Type; 00663 unsigned Slots[3]; Slots[0] = (1 << 12)-1; // Marker to signify 0 operands 00664 00665 for (unsigned i = 0; i != NumOperands; ++i) { 00666 int slot = Table.getSlot(I.getOperand(i)); 00667 assert(slot != -1 && "Broken bytecode!"); 00668 if (unsigned(slot) > MaxOpSlot) MaxOpSlot = unsigned(slot); 00669 Slots[i] = unsigned(slot); 00670 } 00671 00672 // Handle the special cases for various instructions... 00673 if (isa<CastInst>(I) || isa<VAArgInst>(I)) { 00674 // Cast has to encode the destination type as the second argument in the 00675 // packet, or else we won't know what type to cast to! 00676 Slots[1] = Table.getSlot(I.getType()); 00677 assert(Slots[1] != ~0U && "Cast return type unknown?"); 00678 if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; 00679 NumOperands++; 00680 } else if (const VANextInst *VANI = dyn_cast<VANextInst>(&I)) { 00681 Slots[1] = Table.getSlot(VANI->getArgType()); 00682 assert(Slots[1] != ~0U && "va_next return type unknown?"); 00683 if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; 00684 NumOperands++; 00685 } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { 00686 // We need to encode the type of sequential type indices into their slot # 00687 unsigned Idx = 1; 00688 for (gep_type_iterator I = gep_type_begin(GEP), E = gep_type_end(GEP); 00689 I != E; ++I, ++Idx) 00690 if (isa<SequentialType>(*I)) { 00691 unsigned IdxId; 00692 switch (GEP->getOperand(Idx)->getType()->getTypeID()) { 00693 default: assert(0 && "Unknown index type!"); 00694 case Type::UIntTyID: IdxId = 0; break; 00695 case Type::IntTyID: IdxId = 1; break; 00696 case Type::ULongTyID: IdxId = 2; break; 00697 case Type::LongTyID: IdxId = 3; break; 00698 } 00699 Slots[Idx] = (Slots[Idx] << 2) | IdxId; 00700 if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx]; 00701 } 00702 } 00703 00704 // Decide which instruction encoding to use. This is determined primarily 00705 // by the number of operands, and secondarily by whether or not the max 00706 // operand will fit into the instruction encoding. More operands == fewer 00707 // bits per operand. 00708 // 00709 switch (NumOperands) { 00710 case 0: 00711 case 1: 00712 if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops 00713 outputInstructionFormat1(&I, Opcode, Slots, Type); 00714 return; 00715 } 00716 break; 00717 00718 case 2: 00719 if (MaxOpSlot < (1 << 8)) { 00720 outputInstructionFormat2(&I, Opcode, Slots, Type); 00721 return; 00722 } 00723 break; 00724 00725 case 3: 00726 if (MaxOpSlot < (1 << 6)) { 00727 outputInstructionFormat3(&I, Opcode, Slots, Type); 00728 return; 00729 } 00730 break; 00731 default: 00732 break; 00733 } 00734 } 00735 00736 // If we weren't handled before here, we either have a large number of 00737 // operands or a large operand index that we are referring to. 00738 outputInstructionFormat0(&I, Opcode, Table, Type); 00739 } 00740 00741 //===----------------------------------------------------------------------===// 00742 //=== Block Output ===// 00743 //===----------------------------------------------------------------------===// 00744 00745 BytecodeWriter::BytecodeWriter(std::vector<unsigned char> &o, const Module *M) 00746 : Out(o), Table(M) { 00747 00748 // Emit the signature... 00749 static const unsigned char *Sig = (const unsigned char*)"llvm"; 00750 output_data(Sig, Sig+4); 00751 00752 // Emit the top level CLASS block. 00753 BytecodeBlock ModuleBlock(BytecodeFormat::ModuleBlockID, *this, false, true); 00754 00755 bool isBigEndian = M->getEndianness() == Module::BigEndian; 00756 bool hasLongPointers = M->getPointerSize() == Module::Pointer64; 00757 bool hasNoEndianness = M->getEndianness() == Module::AnyEndianness; 00758 bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize; 00759 00760 // Output the version identifier and other information. 00761 unsigned Version = (BCVersionNum << 4) | 00762 (unsigned)isBigEndian | (hasLongPointers << 1) | 00763 (hasNoEndianness << 2) | 00764 (hasNoPointerSize << 3); 00765 output_vbr(Version); 00766 00767 // The Global type plane comes first 00768 { 00769 BytecodeBlock CPool(BytecodeFormat::GlobalTypePlaneBlockID, *this ); 00770 outputTypes(Type::FirstDerivedTyID); 00771 } 00772 00773 // The ModuleInfoBlock follows directly after the type information 00774 outputModuleInfoBlock(M); 00775 00776 // Output module level constants, used for global variable initializers 00777 outputConstants(false); 00778 00779 // Do the whole module now! Process each function at a time... 00780 for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) 00781 outputFunction(I); 00782 00783 // If needed, output the symbol table for the module... 00784 outputSymbolTable(M->getSymbolTable()); 00785 } 00786 00787 void BytecodeWriter::outputTypes(unsigned TypeNum) { 00788 // Write the type plane for types first because earlier planes (e.g. for a 00789 // primitive type like float) may have constants constructed using types 00790 // coming later (e.g., via getelementptr from a pointer type). The type 00791 // plane is needed before types can be fwd or bkwd referenced. 00792 const std::vector<const Type*>& Types = Table.getTypes(); 00793 assert(!Types.empty() && "No types at all?"); 00794 assert(TypeNum <= Types.size() && "Invalid TypeNo index"); 00795 00796 unsigned NumEntries = Types.size() - TypeNum; 00797 00798 // Output type header: [num entries] 00799 output_vbr(NumEntries); 00800 00801 for (unsigned i = TypeNum; i < TypeNum+NumEntries; ++i) 00802 outputType(Types[i]); 00803 } 00804 00805 // Helper function for outputConstants(). 00806 // Writes out all the constants in the plane Plane starting at entry StartNo. 00807 // 00808 void BytecodeWriter::outputConstantsInPlane(const std::vector<const Value*> 00809 &Plane, unsigned StartNo) { 00810 unsigned ValNo = StartNo; 00811 00812 // Scan through and ignore function arguments, global values, and constant 00813 // strings. 00814 for (; ValNo < Plane.size() && 00815 (isa<Argument>(Plane[ValNo]) || isa<GlobalValue>(Plane[ValNo]) || 00816 (isa<ConstantArray>(Plane[ValNo]) && 00817 cast<ConstantArray>(Plane[ValNo])->isString())); ValNo++) 00818 /*empty*/; 00819 00820 unsigned NC = ValNo; // Number of constants 00821 for (; NC < Plane.size() && (isa<Constant>(Plane[NC])); NC++) 00822 /*empty*/; 00823 NC -= ValNo; // Convert from index into count 00824 if (NC == 0) return; // Skip empty type planes... 00825 00826 // FIXME: Most slabs only have 1 or 2 entries! We should encode this much 00827 // more compactly. 00828 00829 // Output type header: [num entries][type id number] 00830 // 00831 output_vbr(NC); 00832 00833 // Output the Type ID Number... 00834 int Slot = Table.getSlot(Plane.front()->getType()); 00835 assert (Slot != -1 && "Type in constant pool but not in function!!"); 00836 output_typeid((unsigned)Slot); 00837 00838 for (unsigned i = ValNo; i < ValNo+NC; ++i) { 00839 const Value *V = Plane[i]; 00840 if (const Constant *C = dyn_cast<Constant>(V)) { 00841 outputConstant(C); 00842 } 00843 } 00844 } 00845 00846 static inline bool hasNullValue(unsigned TyID) { 00847 return TyID != Type::LabelTyID && TyID != Type::VoidTyID; 00848 } 00849 00850 void BytecodeWriter::outputConstants(bool isFunction) { 00851 BytecodeBlock CPool(BytecodeFormat::ConstantPoolBlockID, *this, 00852 true /* Elide block if empty */); 00853 00854 unsigned NumPlanes = Table.getNumPlanes(); 00855 00856 if (isFunction) 00857 // Output the type plane before any constants! 00858 outputTypes(Table.getModuleTypeLevel()); 00859 else 00860 // Output module-level string constants before any other constants. 00861 outputConstantStrings(); 00862 00863 for (unsigned pno = 0; pno != NumPlanes; pno++) { 00864 const std::vector<const Value*> &Plane = Table.getPlane(pno); 00865 if (!Plane.empty()) { // Skip empty type planes... 00866 unsigned ValNo = 0; 00867 if (isFunction) // Don't re-emit module constants 00868 ValNo += Table.getModuleLevel(pno); 00869 00870 if (hasNullValue(pno)) { 00871 // Skip zero initializer 00872 if (ValNo == 0) 00873 ValNo = 1; 00874 } 00875 00876 // Write out constants in the plane 00877 outputConstantsInPlane(Plane, ValNo); 00878 } 00879 } 00880 } 00881 00882 static unsigned getEncodedLinkage(const GlobalValue *GV) { 00883 switch (GV->getLinkage()) { 00884 default: assert(0 && "Invalid linkage!"); 00885 case GlobalValue::ExternalLinkage: return 0; 00886 case GlobalValue::WeakLinkage: return 1; 00887 case GlobalValue::AppendingLinkage: return 2; 00888 case GlobalValue::InternalLinkage: return 3; 00889 case GlobalValue::LinkOnceLinkage: return 4; 00890 } 00891 } 00892 00893 void BytecodeWriter::outputModuleInfoBlock(const Module *M) { 00894 BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfoBlockID, *this); 00895 00896 // Output the types for the global variables in the module... 00897 for (Module::const_giterator I = M->gbegin(), End = M->gend(); I != End;++I) { 00898 int Slot = Table.getSlot(I->getType()); 00899 assert(Slot != -1 && "Module global vars is broken!"); 00900 00901 // Fields: bit0 = isConstant, bit1 = hasInitializer, bit2-4=Linkage, 00902 // bit5+ = Slot # for type 00903 unsigned oSlot = ((unsigned)Slot << 5) | (getEncodedLinkage(I) << 2) | 00904 (I->hasInitializer() << 1) | (unsigned)I->isConstant(); 00905 output_vbr(oSlot); 00906 00907 // If we have an initializer, output it now. 00908 if (I->hasInitializer()) { 00909 Slot = Table.getSlot((Value*)I->getInitializer()); 00910 assert(Slot != -1 && "No slot for global var initializer!"); 00911 output_vbr((unsigned)Slot); 00912 } 00913 } 00914 output_typeid((unsigned)Table.getSlot(Type::VoidTy)); 00915 00916 // Output the types of the functions in this module. 00917 for (Module::const_iterator I = M->begin(), End = M->end(); I != End; ++I) { 00918 int Slot = Table.getSlot(I->getType()); 00919 assert(Slot != -1 && "Module slot calculator is broken!"); 00920 assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!"); 00921 assert(((Slot << 5) >> 5) == Slot && "Slot # too big!"); 00922 unsigned ID = (Slot << 5) + 1; 00923 if (I->isExternal()) // If external, we don't have an FunctionInfo block. 00924 ID |= 1 << 4; 00925 output_vbr(ID); 00926 } 00927 output_vbr((unsigned)Table.getSlot(Type::VoidTy) << 5); 00928 00929 // Emit the list of dependent libraries for the Module. 00930 Module::lib_iterator LI = M->lib_begin(); 00931 Module::lib_iterator LE = M->lib_end(); 00932 output_vbr(unsigned(LE - LI)); // Emit the number of dependent libraries. 00933 for (; LI != LE; ++LI) 00934 output(*LI); 00935 00936 // Output the target triple from the module 00937 output(M->getTargetTriple()); 00938 } 00939 00940 void BytecodeWriter::outputInstructions(const Function *F) { 00941 BytecodeBlock ILBlock(BytecodeFormat::InstructionListBlockID, *this); 00942 for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 00943 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) 00944 outputInstruction(*I); 00945 } 00946 00947 void BytecodeWriter::outputFunction(const Function *F) { 00948 // If this is an external function, there is nothing else to emit! 00949 if (F->isExternal()) return; 00950 00951 BytecodeBlock FunctionBlock(BytecodeFormat::FunctionBlockID, *this); 00952 output_vbr(getEncodedLinkage(F)); 00953 00954 // Get slot information about the function... 00955 Table.incorporateFunction(F); 00956 00957 if (Table.getCompactionTable().empty()) { 00958 // Output information about the constants in the function if the compaction 00959 // table is not being used. 00960 outputConstants(true); 00961 } else { 00962 // Otherwise, emit the compaction table. 00963 outputCompactionTable(); 00964 } 00965 00966 // Output all of the instructions in the body of the function 00967 outputInstructions(F); 00968 00969 // If needed, output the symbol table for the function... 00970 outputSymbolTable(F->getSymbolTable()); 00971 00972 Table.purgeFunction(); 00973 } 00974 00975 void BytecodeWriter::outputCompactionTablePlane(unsigned PlaneNo, 00976 const std::vector<const Value*> &Plane, 00977 unsigned StartNo) { 00978 unsigned End = Table.getModuleLevel(PlaneNo); 00979 if (Plane.empty() || StartNo == End || End == 0) return; // Nothing to emit 00980 assert(StartNo < End && "Cannot emit negative range!"); 00981 assert(StartNo < Plane.size() && End <= Plane.size()); 00982 00983 // Do not emit the null initializer! 00984 ++StartNo; 00985 00986 // Figure out which encoding to use. By far the most common case we have is 00987 // to emit 0-2 entries in a compaction table plane. 00988 switch (End-StartNo) { 00989 case 0: // Avoid emitting two vbr's if possible. 00990 case 1: 00991 case 2: 00992 output_vbr((PlaneNo << 2) | End-StartNo); 00993 break; 00994 default: 00995 // Output the number of things. 00996 output_vbr((unsigned(End-StartNo) << 2) | 3); 00997 output_typeid(PlaneNo); // Emit the type plane this is 00998 break; 00999 } 01000 01001 for (unsigned i = StartNo; i != End; ++i) 01002 output_vbr(Table.getGlobalSlot(Plane[i])); 01003 } 01004 01005 void BytecodeWriter::outputCompactionTypes(unsigned StartNo) { 01006 // Get the compaction type table from the slot calculator 01007 const std::vector<const Type*> &CTypes = Table.getCompactionTypes(); 01008 01009 // The compaction types may have been uncompactified back to the 01010 // global types. If so, we just write an empty table 01011 if (CTypes.size() == 0 ) { 01012 output_vbr(0U); 01013 return; 01014 } 01015 01016 assert(CTypes.size() >= StartNo && "Invalid compaction types start index"); 01017 01018 // Determine how many types to write 01019 unsigned NumTypes = CTypes.size() - StartNo; 01020 01021 // Output the number of types. 01022 output_vbr(NumTypes); 01023 01024 for (unsigned i = StartNo; i < StartNo+NumTypes; ++i) 01025 output_typeid(Table.getGlobalSlot(CTypes[i])); 01026 } 01027 01028 void BytecodeWriter::outputCompactionTable() { 01029 // Avoid writing the compaction table at all if there is no content. 01030 if (Table.getCompactionTypes().size() >= Type::FirstDerivedTyID || 01031 (!Table.CompactionTableIsEmpty())) { 01032 BytecodeBlock CTB(BytecodeFormat::CompactionTableBlockID, *this, 01033 true/*ElideIfEmpty*/); 01034 const std::vector<std::vector<const Value*> > &CT = 01035 Table.getCompactionTable(); 01036 01037 // First things first, emit the type compaction table if there is one. 01038 outputCompactionTypes(Type::FirstDerivedTyID); 01039 01040 for (unsigned i = 0, e = CT.size(); i != e; ++i) 01041 outputCompactionTablePlane(i, CT[i], 0); 01042 } 01043 } 01044 01045 void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { 01046 // Do not output the Bytecode block for an empty symbol table, it just wastes 01047 // space! 01048 if (MST.isEmpty()) return; 01049 01050 BytecodeBlock SymTabBlock(BytecodeFormat::SymbolTableBlockID, *this, 01051 true/*ElideIfEmpty*/); 01052 01053 // Write the number of types 01054 output_vbr(MST.num_types()); 01055 01056 // Write each of the types 01057 for (SymbolTable::type_const_iterator TI = MST.type_begin(), 01058 TE = MST.type_end(); TI != TE; ++TI ) { 01059 // Symtab entry:[def slot #][name] 01060 output_typeid((unsigned)Table.getSlot(TI->second)); 01061 output(TI->first); 01062 } 01063 01064 // Now do each of the type planes in order. 01065 for (SymbolTable::plane_const_iterator PI = MST.plane_begin(), 01066 PE = MST.plane_end(); PI != PE; ++PI) { 01067 SymbolTable::value_const_iterator I = MST.value_begin(PI->first); 01068 SymbolTable::value_const_iterator End = MST.value_end(PI->first); 01069 int Slot; 01070 01071 if (I == End) continue; // Don't mess with an absent type... 01072 01073 // Write the number of values in this plane 01074 output_vbr(MST.type_size(PI->first)); 01075 01076 // Write the slot number of the type for this plane 01077 Slot = Table.getSlot(PI->first); 01078 assert(Slot != -1 && "Type in symtab, but not in table!"); 01079 output_typeid((unsigned)Slot); 01080 01081 // Write each of the values in this plane 01082 for (; I != End; ++I) { 01083 // Symtab entry: [def slot #][name] 01084 Slot = Table.getSlot(I->second); 01085 assert(Slot != -1 && "Value in symtab but has no slot number!!"); 01086 output_vbr((unsigned)Slot); 01087 output(I->first); 01088 } 01089 } 01090 } 01091 01092 void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out, 01093 bool compress ) { 01094 assert(M && "You can't write a null module!!"); 01095 01096 // Create a vector of unsigned char for the bytecode output. We 01097 // reserve 256KBytes of space in the vector so that we avoid doing 01098 // lots of little allocations. 256KBytes is sufficient for a large 01099 // proportion of the bytecode files we will encounter. Larger files 01100 // will be automatically doubled in size as needed (std::vector 01101 // behavior). 01102 std::vector<unsigned char> Buffer; 01103 Buffer.reserve(256 * 1024); 01104 01105 // The BytecodeWriter populates Buffer for us. 01106 BytecodeWriter BCW(Buffer, M); 01107 01108 // Keep track of how much we've written 01109 BytesWritten += Buffer.size(); 01110 01111 // Determine start and end points of the Buffer 01112 const unsigned char *FirstByte = &Buffer.front(); 01113 01114 // If we're supposed to compress this mess ... 01115 if (compress) { 01116 01117 // We signal compression by using an alternate magic number for the 01118 // file. The compressed bytecode file's magic number is "llvc" instead 01119 // of "llvm". 01120 char compressed_magic[4]; 01121 compressed_magic[0] = 'l'; 01122 compressed_magic[1] = 'l'; 01123 compressed_magic[2] = 'v'; 01124 compressed_magic[3] = 'c'; 01125 01126 Out.write(compressed_magic,4); 01127 01128 // Compress everything after the magic number (which we altered) 01129 uint64_t zipSize = Compressor::compressToStream( 01130 (char*)(FirstByte+4), // Skip the magic number 01131 Buffer.size()-4, // Skip the magic number 01132 Out // Where to write compressed data 01133 ); 01134 01135 } else { 01136 01137 // We're not compressing, so just write the entire block. 01138 Out.write((char*)FirstByte, Buffer.size()); 01139 } 01140 01141 // make sure it hits disk now 01142 Out.flush(); 01143 } 01144 01145 // vim: sw=2 ai