LLVM API Documentation

Writer.cpp

Go to the documentation of this file.
00001 //===-- Writer.cpp - Library for writing LLVM bytecode files --------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This library implements the functionality defined in llvm/Bytecode/Writer.h
00011 //
00012 // Note that this file uses an unusual technique of outputting all the bytecode
00013 // to a vector of unsigned char, then copies the vector to an ostream.  The
00014 // reason for this is that we must do "seeking" in the stream to do back-
00015 // patching, and some very important ostreams that we want to support (like
00016 // pipes) do not support seeking.  :( :( :(
00017 //
00018 //===----------------------------------------------------------------------===//
00019 
00020 #include "WriterInternals.h"
00021 #include "llvm/Bytecode/WriteBytecodePass.h"
00022 #include "llvm/CallingConv.h"
00023 #include "llvm/Constants.h"
00024 #include "llvm/DerivedTypes.h"
00025 #include "llvm/InlineAsm.h"
00026 #include "llvm/Instructions.h"
00027 #include "llvm/Module.h"
00028 #include "llvm/SymbolTable.h"
00029 #include "llvm/Support/GetElementPtrTypeIterator.h"
00030 #include "llvm/Support/Compressor.h"
00031 #include "llvm/Support/MathExtras.h"
00032 #include "llvm/ADT/STLExtras.h"
00033 #include "llvm/ADT/Statistic.h"
00034 #include <cstring>
00035 #include <algorithm>
00036 using namespace llvm;
00037 
00038 /// This value needs to be incremented every time the bytecode format changes
00039 /// so that the reader can distinguish which format of the bytecode file has
00040 /// been written.
00041 /// @brief The bytecode version number
00042 const unsigned BCVersionNum = 5;
00043 
00044 static RegisterPass<WriteBytecodePass> X("emitbytecode", "Bytecode Writer");
00045 
00046 static Statistic<>
00047 BytesWritten("bytecodewriter", "Number of bytecode bytes written");
00048 
00049 //===----------------------------------------------------------------------===//
00050 //===                           Output Primitives                          ===//
00051 //===----------------------------------------------------------------------===//
00052 
00053 // output - If a position is specified, it must be in the valid portion of the
00054 // string... note that this should be inlined always so only the relevant IF
00055 // body should be included.
00056 inline void BytecodeWriter::output(unsigned i, int pos) {
00057   if (pos == -1) { // Be endian clean, little endian is our friend
00058     Out.push_back((unsigned char)i);
00059     Out.push_back((unsigned char)(i >> 8));
00060     Out.push_back((unsigned char)(i >> 16));
00061     Out.push_back((unsigned char)(i >> 24));
00062   } else {
00063     Out[pos  ] = (unsigned char)i;
00064     Out[pos+1] = (unsigned char)(i >> 8);
00065     Out[pos+2] = (unsigned char)(i >> 16);
00066     Out[pos+3] = (unsigned char)(i >> 24);
00067   }
00068 }
00069 
00070 inline void BytecodeWriter::output(int i) {
00071   output((unsigned)i);
00072 }
00073 
00074 /// output_vbr - Output an unsigned value, by using the least number of bytes
00075 /// possible.  This is useful because many of our "infinite" values are really
00076 /// very small most of the time; but can be large a few times.
00077 /// Data format used:  If you read a byte with the high bit set, use the low
00078 /// seven bits as data and then read another byte.
00079 inline void BytecodeWriter::output_vbr(uint64_t i) {
00080   while (1) {
00081     if (i < 0x80) { // done?
00082       Out.push_back((unsigned char)i);   // We know the high bit is clear...
00083       return;
00084     }
00085 
00086     // Nope, we are bigger than a character, output the next 7 bits and set the
00087     // high bit to say that there is more coming...
00088     Out.push_back(0x80 | ((unsigned char)i & 0x7F));
00089     i >>= 7;  // Shift out 7 bits now...
00090   }
00091 }
00092 
00093 inline void BytecodeWriter::output_vbr(unsigned i) {
00094   while (1) {
00095     if (i < 0x80) { // done?
00096       Out.push_back((unsigned char)i);   // We know the high bit is clear...
00097       return;
00098     }
00099 
00100     // Nope, we are bigger than a character, output the next 7 bits and set the
00101     // high bit to say that there is more coming...
00102     Out.push_back(0x80 | ((unsigned char)i & 0x7F));
00103     i >>= 7;  // Shift out 7 bits now...
00104   }
00105 }
00106 
00107 inline void BytecodeWriter::output_typeid(unsigned i) {
00108   if (i <= 0x00FFFFFF)
00109     this->output_vbr(i);
00110   else {
00111     this->output_vbr(0x00FFFFFF);
00112     this->output_vbr(i);
00113   }
00114 }
00115 
00116 inline void BytecodeWriter::output_vbr(int64_t i) {
00117   if (i < 0)
00118     output_vbr(((uint64_t)(-i) << 1) | 1); // Set low order sign bit...
00119   else
00120     output_vbr((uint64_t)i << 1);          // Low order bit is clear.
00121 }
00122 
00123 
00124 inline void BytecodeWriter::output_vbr(int i) {
00125   if (i < 0)
00126     output_vbr(((unsigned)(-i) << 1) | 1); // Set low order sign bit...
00127   else
00128     output_vbr((unsigned)i << 1);          // Low order bit is clear.
00129 }
00130 
00131 inline void BytecodeWriter::output(const std::string &s) {
00132   unsigned Len = s.length();
00133   output_vbr(Len );             // Strings may have an arbitrary length...
00134   Out.insert(Out.end(), s.begin(), s.end());
00135 }
00136 
00137 inline void BytecodeWriter::output_data(const void *Ptr, const void *End) {
00138   Out.insert(Out.end(), (const unsigned char*)Ptr, (const unsigned char*)End);
00139 }
00140 
00141 inline void BytecodeWriter::output_float(float& FloatVal) {
00142   /// FIXME: This isn't optimal, it has size problems on some platforms
00143   /// where FP is not IEEE.
00144   uint32_t i = FloatToBits(FloatVal);
00145   Out.push_back( static_cast<unsigned char>( (i & 0xFF )));
00146   Out.push_back( static_cast<unsigned char>( (i >> 8) & 0xFF));
00147   Out.push_back( static_cast<unsigned char>( (i >> 16) & 0xFF));
00148   Out.push_back( static_cast<unsigned char>( (i >> 24) & 0xFF));
00149 }
00150 
00151 inline void BytecodeWriter::output_double(double& DoubleVal) {
00152   /// FIXME: This isn't optimal, it has size problems on some platforms
00153   /// where FP is not IEEE.
00154   uint64_t i = DoubleToBits(DoubleVal);
00155   Out.push_back( static_cast<unsigned char>( (i & 0xFF )));
00156   Out.push_back( static_cast<unsigned char>( (i >> 8) & 0xFF));
00157   Out.push_back( static_cast<unsigned char>( (i >> 16) & 0xFF));
00158   Out.push_back( static_cast<unsigned char>( (i >> 24) & 0xFF));
00159   Out.push_back( static_cast<unsigned char>( (i >> 32) & 0xFF));
00160   Out.push_back( static_cast<unsigned char>( (i >> 40) & 0xFF));
00161   Out.push_back( static_cast<unsigned char>( (i >> 48) & 0xFF));
00162   Out.push_back( static_cast<unsigned char>( (i >> 56) & 0xFF));
00163 }
00164 
00165 inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter &w,
00166                                     bool elideIfEmpty, bool hasLongFormat)
00167   : Id(ID), Writer(w), ElideIfEmpty(elideIfEmpty), HasLongFormat(hasLongFormat){
00168 
00169   if (HasLongFormat) {
00170     w.output(ID);
00171     w.output(0U); // For length in long format
00172   } else {
00173     w.output(0U); /// Place holder for ID and length for this block
00174   }
00175   Loc = w.size();
00176 }
00177 
00178 inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block goes out
00179                                          // of scope...
00180   if (Loc == Writer.size() && ElideIfEmpty) {
00181     // If the block is empty, and we are allowed to, do not emit the block at
00182     // all!
00183     Writer.resize(Writer.size()-(HasLongFormat?8:4));
00184     return;
00185   }
00186 
00187   if (HasLongFormat)
00188     Writer.output(unsigned(Writer.size()-Loc), int(Loc-4));
00189   else
00190     Writer.output(unsigned(Writer.size()-Loc) << 5 | (Id & 0x1F), int(Loc-4));
00191 }
00192 
00193 //===----------------------------------------------------------------------===//
00194 //===                           Constant Output                            ===//
00195 //===----------------------------------------------------------------------===//
00196 
00197 void BytecodeWriter::outputType(const Type *T) {
00198   output_vbr((unsigned)T->getTypeID());
00199 
00200   // That's all there is to handling primitive types...
00201   if (T->isPrimitiveType()) {
00202     return;     // We might do this if we alias a prim type: %x = type int
00203   }
00204 
00205   switch (T->getTypeID()) {   // Handle derived types now.
00206   case Type::FunctionTyID: {
00207     const FunctionType *MT = cast<FunctionType>(T);
00208     int Slot = Table.getSlot(MT->getReturnType());
00209     assert(Slot != -1 && "Type used but not available!!");
00210     output_typeid((unsigned)Slot);
00211 
00212     // Output the number of arguments to function (+1 if varargs):
00213     output_vbr((unsigned)MT->getNumParams()+MT->isVarArg());
00214 
00215     // Output all of the arguments...
00216     FunctionType::param_iterator I = MT->param_begin();
00217     for (; I != MT->param_end(); ++I) {
00218       Slot = Table.getSlot(*I);
00219       assert(Slot != -1 && "Type used but not available!!");
00220       output_typeid((unsigned)Slot);
00221     }
00222 
00223     // Terminate list with VoidTy if we are a varargs function...
00224     if (MT->isVarArg())
00225       output_typeid((unsigned)Type::VoidTyID);
00226     break;
00227   }
00228 
00229   case Type::ArrayTyID: {
00230     const ArrayType *AT = cast<ArrayType>(T);
00231     int Slot = Table.getSlot(AT->getElementType());
00232     assert(Slot != -1 && "Type used but not available!!");
00233     output_typeid((unsigned)Slot);
00234     output_vbr(AT->getNumElements());
00235     break;
00236   }
00237 
00238  case Type::PackedTyID: {
00239     const PackedType *PT = cast<PackedType>(T);
00240     int Slot = Table.getSlot(PT->getElementType());
00241     assert(Slot != -1 && "Type used but not available!!");
00242     output_typeid((unsigned)Slot);
00243     output_vbr(PT->getNumElements());
00244     break;
00245   }
00246 
00247 
00248   case Type::StructTyID: {
00249     const StructType *ST = cast<StructType>(T);
00250 
00251     // Output all of the element types...
00252     for (StructType::element_iterator I = ST->element_begin(),
00253            E = ST->element_end(); I != E; ++I) {
00254       int Slot = Table.getSlot(*I);
00255       assert(Slot != -1 && "Type used but not available!!");
00256       output_typeid((unsigned)Slot);
00257     }
00258 
00259     // Terminate list with VoidTy
00260     output_typeid((unsigned)Type::VoidTyID);
00261     break;
00262   }
00263 
00264   case Type::PointerTyID: {
00265     const PointerType *PT = cast<PointerType>(T);
00266     int Slot = Table.getSlot(PT->getElementType());
00267     assert(Slot != -1 && "Type used but not available!!");
00268     output_typeid((unsigned)Slot);
00269     break;
00270   }
00271 
00272   case Type::OpaqueTyID:
00273     // No need to emit anything, just the count of opaque types is enough.
00274     break;
00275 
00276   default:
00277     std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize"
00278               << " Type '" << T->getDescription() << "'\n";
00279     break;
00280   }
00281 }
00282 
00283 void BytecodeWriter::outputConstant(const Constant *CPV) {
00284   assert((CPV->getType()->isPrimitiveType() || !CPV->isNullValue()) &&
00285          "Shouldn't output null constants!");
00286 
00287   // We must check for a ConstantExpr before switching by type because
00288   // a ConstantExpr can be of any type, and has no explicit value.
00289   //
00290   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
00291     // FIXME: Encoding of constant exprs could be much more compact!
00292     assert(CE->getNumOperands() > 0 && "ConstantExpr with 0 operands");
00293     assert(CE->getNumOperands() != 1 || CE->getOpcode() == Instruction::Cast);
00294     output_vbr(1+CE->getNumOperands());   // flags as an expr
00295     output_vbr(CE->getOpcode());        // flags as an expr
00296 
00297     for (User::const_op_iterator OI = CE->op_begin(); OI != CE->op_end(); ++OI){
00298       int Slot = Table.getSlot(*OI);
00299       assert(Slot != -1 && "Unknown constant used in ConstantExpr!!");
00300       output_vbr((unsigned)Slot);
00301       Slot = Table.getSlot((*OI)->getType());
00302       output_typeid((unsigned)Slot);
00303     }
00304     return;
00305   } else if (isa<UndefValue>(CPV)) {
00306     output_vbr(1U);       // 1 -> UndefValue constant.
00307     return;
00308   } else {
00309     output_vbr(0U);       // flag as not a ConstantExpr
00310   }
00311 
00312   switch (CPV->getType()->getTypeID()) {
00313   case Type::BoolTyID:    // Boolean Types
00314     if (cast<ConstantBool>(CPV)->getValue())
00315       output_vbr(1U);
00316     else
00317       output_vbr(0U);
00318     break;
00319 
00320   case Type::UByteTyID:   // Unsigned integer types...
00321   case Type::UShortTyID:
00322   case Type::UIntTyID:
00323   case Type::ULongTyID:
00324     output_vbr(cast<ConstantUInt>(CPV)->getValue());
00325     break;
00326 
00327   case Type::SByteTyID:   // Signed integer types...
00328   case Type::ShortTyID:
00329   case Type::IntTyID:
00330   case Type::LongTyID:
00331     output_vbr(cast<ConstantSInt>(CPV)->getValue());
00332     break;
00333 
00334   case Type::ArrayTyID: {
00335     const ConstantArray *CPA = cast<ConstantArray>(CPV);
00336     assert(!CPA->isString() && "Constant strings should be handled specially!");
00337 
00338     for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) {
00339       int Slot = Table.getSlot(CPA->getOperand(i));
00340       assert(Slot != -1 && "Constant used but not available!!");
00341       output_vbr((unsigned)Slot);
00342     }
00343     break;
00344   }
00345 
00346   case Type::PackedTyID: {
00347     const ConstantPacked *CP = cast<ConstantPacked>(CPV);
00348 
00349     for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) {
00350       int Slot = Table.getSlot(CP->getOperand(i));
00351       assert(Slot != -1 && "Constant used but not available!!");
00352       output_vbr((unsigned)Slot);
00353     }
00354     break;
00355   }
00356 
00357   case Type::StructTyID: {
00358     const ConstantStruct *CPS = cast<ConstantStruct>(CPV);
00359 
00360     for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) {
00361       int Slot = Table.getSlot(CPS->getOperand(i));
00362       assert(Slot != -1 && "Constant used but not available!!");
00363       output_vbr((unsigned)Slot);
00364     }
00365     break;
00366   }
00367 
00368   case Type::PointerTyID:
00369     assert(0 && "No non-null, non-constant-expr constants allowed!");
00370     abort();
00371 
00372   case Type::FloatTyID: {   // Floating point types...
00373     float Tmp = (float)cast<ConstantFP>(CPV)->getValue();
00374     output_float(Tmp);
00375     break;
00376   }
00377   case Type::DoubleTyID: {
00378     double Tmp = cast<ConstantFP>(CPV)->getValue();
00379     output_double(Tmp);
00380     break;
00381   }
00382 
00383   case Type::VoidTyID:
00384   case Type::LabelTyID:
00385   default:
00386     std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize"
00387               << " type '" << *CPV->getType() << "'\n";
00388     break;
00389   }
00390   return;
00391 }
00392 
00393 /// outputInlineAsm - InlineAsm's get emitted to the constant pool, so they can
00394 /// be shared by multiple uses.
00395 void BytecodeWriter::outputInlineAsm(const InlineAsm *IA) {
00396   // Output a marker, so we know when we have one one parsing the constant pool.
00397   // Note that this encoding is 5 bytes: not very efficient for a marker.  Since
00398   // unique inline asms are rare, this should hardly matter.
00399   output_vbr(~0U);
00400   
00401   output(IA->getAsmString());
00402   output(IA->getConstraintString());
00403   output_vbr(unsigned(IA->hasSideEffects()));
00404 }
00405 
00406 void BytecodeWriter::outputConstantStrings() {
00407   SlotCalculator::string_iterator I = Table.string_begin();
00408   SlotCalculator::string_iterator E = Table.string_end();
00409   if (I == E) return;  // No strings to emit
00410 
00411   // If we have != 0 strings to emit, output them now.  Strings are emitted into
00412   // the 'void' type plane.
00413   output_vbr(unsigned(E-I));
00414   output_typeid(Type::VoidTyID);
00415 
00416   // Emit all of the strings.
00417   for (I = Table.string_begin(); I != E; ++I) {
00418     const ConstantArray *Str = *I;
00419     int Slot = Table.getSlot(Str->getType());
00420     assert(Slot != -1 && "Constant string of unknown type?");
00421     output_typeid((unsigned)Slot);
00422 
00423     // Now that we emitted the type (which indicates the size of the string),
00424     // emit all of the characters.
00425     std::string Val = Str->getAsString();
00426     output_data(Val.c_str(), Val.c_str()+Val.size());
00427   }
00428 }
00429 
00430 //===----------------------------------------------------------------------===//
00431 //===                           Instruction Output                         ===//
00432 //===----------------------------------------------------------------------===//
00433 
00434 // outputInstructionFormat0 - Output those weird instructions that have a large
00435 // number of operands or have large operands themselves.
00436 //
00437 // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>]
00438 //
00439 void BytecodeWriter::outputInstructionFormat0(const Instruction *I,
00440                                               unsigned Opcode,
00441                                               const SlotCalculator &Table,
00442                                               unsigned Type) {
00443   // Opcode must have top two bits clear...
00444   output_vbr(Opcode << 2);                  // Instruction Opcode ID
00445   output_typeid(Type);                      // Result type
00446 
00447   unsigned NumArgs = I->getNumOperands();
00448   output_vbr(NumArgs + (isa<CastInst>(I)  ||
00449                         isa<VAArgInst>(I) || Opcode == 56 || Opcode == 58));
00450 
00451   if (!isa<GetElementPtrInst>(&I)) {
00452     for (unsigned i = 0; i < NumArgs; ++i) {
00453       int Slot = Table.getSlot(I->getOperand(i));
00454       assert(Slot >= 0 && "No slot number for value!?!?");
00455       output_vbr((unsigned)Slot);
00456     }
00457 
00458     if (isa<CastInst>(I) || isa<VAArgInst>(I)) {
00459       int Slot = Table.getSlot(I->getType());
00460       assert(Slot != -1 && "Cast return type unknown?");
00461       output_typeid((unsigned)Slot);
00462     } else if (Opcode == 56) {  // Invoke escape sequence
00463       output_vbr(cast<InvokeInst>(I)->getCallingConv());
00464     } else if (Opcode == 58) {  // Call escape sequence
00465       output_vbr((cast<CallInst>(I)->getCallingConv() << 1) |
00466                  unsigned(cast<CallInst>(I)->isTailCall()));
00467     }
00468   } else {
00469     int Slot = Table.getSlot(I->getOperand(0));
00470     assert(Slot >= 0 && "No slot number for value!?!?");
00471     output_vbr(unsigned(Slot));
00472 
00473     // We need to encode the type of sequential type indices into their slot #
00474     unsigned Idx = 1;
00475     for (gep_type_iterator TI = gep_type_begin(I), E = gep_type_end(I);
00476          Idx != NumArgs; ++TI, ++Idx) {
00477       Slot = Table.getSlot(I->getOperand(Idx));
00478       assert(Slot >= 0 && "No slot number for value!?!?");
00479 
00480       if (isa<SequentialType>(*TI)) {
00481         unsigned IdxId;
00482         switch (I->getOperand(Idx)->getType()->getTypeID()) {
00483         default: assert(0 && "Unknown index type!");
00484         case Type::UIntTyID:  IdxId = 0; break;
00485         case Type::IntTyID:   IdxId = 1; break;
00486         case Type::ULongTyID: IdxId = 2; break;
00487         case Type::LongTyID:  IdxId = 3; break;
00488         }
00489         Slot = (Slot << 2) | IdxId;
00490       }
00491       output_vbr(unsigned(Slot));
00492     }
00493   }
00494 }
00495 
00496 
00497 // outputInstrVarArgsCall - Output the absurdly annoying varargs function calls.
00498 // This are more annoying than most because the signature of the call does not
00499 // tell us anything about the types of the arguments in the varargs portion.
00500 // Because of this, we encode (as type 0) all of the argument types explicitly
00501 // before the argument value.  This really sucks, but you shouldn't be using
00502 // varargs functions in your code! *death to printf*!
00503 //
00504 // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>]
00505 //
00506 void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I,
00507                                             unsigned Opcode,
00508                                             const SlotCalculator &Table,
00509                                             unsigned Type) {
00510   assert(isa<CallInst>(I) || isa<InvokeInst>(I));
00511   // Opcode must have top two bits clear...
00512   output_vbr(Opcode << 2);                  // Instruction Opcode ID
00513   output_typeid(Type);                      // Result type (varargs type)
00514 
00515   const PointerType *PTy = cast<PointerType>(I->getOperand(0)->getType());
00516   const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
00517   unsigned NumParams = FTy->getNumParams();
00518 
00519   unsigned NumFixedOperands;
00520   if (isa<CallInst>(I)) {
00521     // Output an operand for the callee and each fixed argument, then two for
00522     // each variable argument.
00523     NumFixedOperands = 1+NumParams;
00524   } else {
00525     assert(isa<InvokeInst>(I) && "Not call or invoke??");
00526     // Output an operand for the callee and destinations, then two for each
00527     // variable argument.
00528     NumFixedOperands = 3+NumParams;
00529   }
00530   output_vbr(2 * I->getNumOperands()-NumFixedOperands);
00531 
00532   // The type for the function has already been emitted in the type field of the
00533   // instruction.  Just emit the slot # now.
00534   for (unsigned i = 0; i != NumFixedOperands; ++i) {
00535     int Slot = Table.getSlot(I->getOperand(i));
00536     assert(Slot >= 0 && "No slot number for value!?!?");
00537     output_vbr((unsigned)Slot);
00538   }
00539 
00540   for (unsigned i = NumFixedOperands, e = I->getNumOperands(); i != e; ++i) {
00541     // Output Arg Type ID
00542     int Slot = Table.getSlot(I->getOperand(i)->getType());
00543     assert(Slot >= 0 && "No slot number for value!?!?");
00544     output_typeid((unsigned)Slot);
00545 
00546     // Output arg ID itself
00547     Slot = Table.getSlot(I->getOperand(i));
00548     assert(Slot >= 0 && "No slot number for value!?!?");
00549     output_vbr((unsigned)Slot);
00550   }
00551 }
00552 
00553 
00554 // outputInstructionFormat1 - Output one operand instructions, knowing that no
00555 // operand index is >= 2^12.
00556 //
00557 inline void BytecodeWriter::outputInstructionFormat1(const Instruction *I,
00558                                                      unsigned Opcode,
00559                                                      unsigned *Slots,
00560                                                      unsigned Type) {
00561   // bits   Instruction format:
00562   // --------------------------
00563   // 01-00: Opcode type, fixed to 1.
00564   // 07-02: Opcode
00565   // 19-08: Resulting type plane
00566   // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
00567   //
00568   output(1 | (Opcode << 2) | (Type << 8) | (Slots[0] << 20));
00569 }
00570 
00571 
00572 // outputInstructionFormat2 - Output two operand instructions, knowing that no
00573 // operand index is >= 2^8.
00574 //
00575 inline void BytecodeWriter::outputInstructionFormat2(const Instruction *I,
00576                                                      unsigned Opcode,
00577                                                      unsigned *Slots,
00578                                                      unsigned Type) {
00579   // bits   Instruction format:
00580   // --------------------------
00581   // 01-00: Opcode type, fixed to 2.
00582   // 07-02: Opcode
00583   // 15-08: Resulting type plane
00584   // 23-16: Operand #1
00585   // 31-24: Operand #2
00586   //
00587   output(2 | (Opcode << 2) | (Type << 8) | (Slots[0] << 16) | (Slots[1] << 24));
00588 }
00589 
00590 
00591 // outputInstructionFormat3 - Output three operand instructions, knowing that no
00592 // operand index is >= 2^6.
00593 //
00594 inline void BytecodeWriter::outputInstructionFormat3(const Instruction *I,
00595                                                      unsigned Opcode,
00596                                                      unsigned *Slots,
00597                                                      unsigned Type) {
00598   // bits   Instruction format:
00599   // --------------------------
00600   // 01-00: Opcode type, fixed to 3.
00601   // 07-02: Opcode
00602   // 13-08: Resulting type plane
00603   // 19-14: Operand #1
00604   // 25-20: Operand #2
00605   // 31-26: Operand #3
00606   //
00607   output(3 | (Opcode << 2) | (Type << 8) |
00608           (Slots[0] << 14) | (Slots[1] << 20) | (Slots[2] << 26));
00609 }
00610 
00611 void BytecodeWriter::outputInstruction(const Instruction &I) {
00612   assert(I.getOpcode() < 56 && "Opcode too big???");
00613   unsigned Opcode = I.getOpcode();
00614   unsigned NumOperands = I.getNumOperands();
00615 
00616   // Encode 'tail call' as 61, 'volatile load' as 62, and 'volatile store' as
00617   // 63.
00618   if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
00619     if (CI->getCallingConv() == CallingConv::C) {
00620       if (CI->isTailCall())
00621         Opcode = 61;   // CCC + Tail Call
00622       else
00623         ;     // Opcode = Instruction::Call
00624     } else if (CI->getCallingConv() == CallingConv::Fast) {
00625       if (CI->isTailCall())
00626         Opcode = 59;    // FastCC + TailCall
00627       else
00628         Opcode = 60;    // FastCC + Not Tail Call
00629     } else {
00630       Opcode = 58;      // Call escape sequence.
00631     }
00632   } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
00633     if (II->getCallingConv() == CallingConv::Fast)
00634       Opcode = 57;      // FastCC invoke.
00635     else if (II->getCallingConv() != CallingConv::C)
00636       Opcode = 56;      // Invoke escape sequence.
00637 
00638   } else if (isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) {
00639     Opcode = 62;
00640   } else if (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) {
00641     Opcode = 63;
00642   }
00643 
00644   // Figure out which type to encode with the instruction.  Typically we want
00645   // the type of the first parameter, as opposed to the type of the instruction
00646   // (for example, with setcc, we always know it returns bool, but the type of
00647   // the first param is actually interesting).  But if we have no arguments
00648   // we take the type of the instruction itself.
00649   //
00650   const Type *Ty;
00651   switch (I.getOpcode()) {
00652   case Instruction::Select:
00653   case Instruction::Malloc:
00654   case Instruction::Alloca:
00655     Ty = I.getType();  // These ALWAYS want to encode the return type
00656     break;
00657   case Instruction::Store:
00658     Ty = I.getOperand(1)->getType();  // Encode the pointer type...
00659     assert(isa<PointerType>(Ty) && "Store to nonpointer type!?!?");
00660     break;
00661   default:              // Otherwise use the default behavior...
00662     Ty = NumOperands ? I.getOperand(0)->getType() : I.getType();
00663     break;
00664   }
00665 
00666   unsigned Type;
00667   int Slot = Table.getSlot(Ty);
00668   assert(Slot != -1 && "Type not available!!?!");
00669   Type = (unsigned)Slot;
00670 
00671   // Varargs calls and invokes are encoded entirely different from any other
00672   // instructions.
00673   if (const CallInst *CI = dyn_cast<CallInst>(&I)){
00674     const PointerType *Ty =cast<PointerType>(CI->getCalledValue()->getType());
00675     if (cast<FunctionType>(Ty->getElementType())->isVarArg()) {
00676       outputInstrVarArgsCall(CI, Opcode, Table, Type);
00677       return;
00678     }
00679   } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
00680     const PointerType *Ty =cast<PointerType>(II->getCalledValue()->getType());
00681     if (cast<FunctionType>(Ty->getElementType())->isVarArg()) {
00682       outputInstrVarArgsCall(II, Opcode, Table, Type);
00683       return;
00684     }
00685   }
00686 
00687   if (NumOperands <= 3) {
00688     // Make sure that we take the type number into consideration.  We don't want
00689     // to overflow the field size for the instruction format we select.
00690     //
00691     unsigned MaxOpSlot = Type;
00692     unsigned Slots[3]; Slots[0] = (1 << 12)-1;   // Marker to signify 0 operands
00693 
00694     for (unsigned i = 0; i != NumOperands; ++i) {
00695       int slot = Table.getSlot(I.getOperand(i));
00696       assert(slot != -1 && "Broken bytecode!");
00697       if (unsigned(slot) > MaxOpSlot) MaxOpSlot = unsigned(slot);
00698       Slots[i] = unsigned(slot);
00699     }
00700 
00701     // Handle the special cases for various instructions...
00702     if (isa<CastInst>(I) || isa<VAArgInst>(I)) {
00703       // Cast has to encode the destination type as the second argument in the
00704       // packet, or else we won't know what type to cast to!
00705       Slots[1] = Table.getSlot(I.getType());
00706       assert(Slots[1] != ~0U && "Cast return type unknown?");
00707       if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1];
00708       NumOperands++;
00709     } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(&I)) {
00710       assert(NumOperands == 1 && "Bogus allocation!");
00711       if (AI->getAlignment()) {
00712         Slots[1] = Log2_32(AI->getAlignment())+1;
00713         if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1];
00714         NumOperands = 2;
00715       }
00716     } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
00717       // We need to encode the type of sequential type indices into their slot #
00718       unsigned Idx = 1;
00719       for (gep_type_iterator I = gep_type_begin(GEP), E = gep_type_end(GEP);
00720            I != E; ++I, ++Idx)
00721         if (isa<SequentialType>(*I)) {
00722           unsigned IdxId;
00723           switch (GEP->getOperand(Idx)->getType()->getTypeID()) {
00724           default: assert(0 && "Unknown index type!");
00725           case Type::UIntTyID:  IdxId = 0; break;
00726           case Type::IntTyID:   IdxId = 1; break;
00727           case Type::ULongTyID: IdxId = 2; break;
00728           case Type::LongTyID:  IdxId = 3; break;
00729           }
00730           Slots[Idx] = (Slots[Idx] << 2) | IdxId;
00731           if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx];
00732         }
00733     } else if (Opcode == 58) {
00734       // If this is the escape sequence for call, emit the tailcall/cc info.
00735       const CallInst &CI = cast<CallInst>(I);
00736       ++NumOperands;
00737       if (NumOperands < 3) {
00738         Slots[NumOperands-1] = (CI.getCallingConv() << 1)|unsigned(CI.isTailCall());
00739         if (Slots[NumOperands-1] > MaxOpSlot)
00740           MaxOpSlot = Slots[NumOperands-1];
00741       }
00742     } else if (Opcode == 56) {
00743       // Invoke escape seq has at least 4 operands to encode.
00744       ++NumOperands;
00745     }
00746 
00747     // Decide which instruction encoding to use.  This is determined primarily
00748     // by the number of operands, and secondarily by whether or not the max
00749     // operand will fit into the instruction encoding.  More operands == fewer
00750     // bits per operand.
00751     //
00752     switch (NumOperands) {
00753     case 0:
00754     case 1:
00755       if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops
00756         outputInstructionFormat1(&I, Opcode, Slots, Type);
00757         return;
00758       }
00759       break;
00760 
00761     case 2:
00762       if (MaxOpSlot < (1 << 8)) {
00763         outputInstructionFormat2(&I, Opcode, Slots, Type);
00764         return;
00765       }
00766       break;
00767 
00768     case 3:
00769       if (MaxOpSlot < (1 << 6)) {
00770         outputInstructionFormat3(&I, Opcode, Slots, Type);
00771         return;
00772       }
00773       break;
00774     default:
00775       break;
00776     }
00777   }
00778 
00779   // If we weren't handled before here, we either have a large number of
00780   // operands or a large operand index that we are referring to.
00781   outputInstructionFormat0(&I, Opcode, Table, Type);
00782 }
00783 
00784 //===----------------------------------------------------------------------===//
00785 //===                              Block Output                            ===//
00786 //===----------------------------------------------------------------------===//
00787 
00788 BytecodeWriter::BytecodeWriter(std::vector<unsigned char> &o, const Module *M)
00789   : Out(o), Table(M) {
00790 
00791   // Emit the signature...
00792   static const unsigned char *Sig =  (const unsigned char*)"llvm";
00793   output_data(Sig, Sig+4);
00794 
00795   // Emit the top level CLASS block.
00796   BytecodeBlock ModuleBlock(BytecodeFormat::ModuleBlockID, *this, false, true);
00797 
00798   bool isBigEndian      = M->getEndianness() == Module::BigEndian;
00799   bool hasLongPointers  = M->getPointerSize() == Module::Pointer64;
00800   bool hasNoEndianness  = M->getEndianness() == Module::AnyEndianness;
00801   bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize;
00802 
00803   // Output the version identifier and other information.
00804   unsigned Version = (BCVersionNum << 4) |
00805                      (unsigned)isBigEndian | (hasLongPointers << 1) |
00806                      (hasNoEndianness << 2) |
00807                      (hasNoPointerSize << 3);
00808   output_vbr(Version);
00809 
00810   // The Global type plane comes first
00811   {
00812       BytecodeBlock CPool(BytecodeFormat::GlobalTypePlaneBlockID, *this );
00813       outputTypes(Type::FirstDerivedTyID);
00814   }
00815 
00816   // The ModuleInfoBlock follows directly after the type information
00817   outputModuleInfoBlock(M);
00818 
00819   // Output module level constants, used for global variable initializers
00820   outputConstants(false);
00821 
00822   // Do the whole module now! Process each function at a time...
00823   for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
00824     outputFunction(I);
00825 
00826   // If needed, output the symbol table for the module...
00827   outputSymbolTable(M->getSymbolTable());
00828 }
00829 
00830 void BytecodeWriter::outputTypes(unsigned TypeNum) {
00831   // Write the type plane for types first because earlier planes (e.g. for a
00832   // primitive type like float) may have constants constructed using types
00833   // coming later (e.g., via getelementptr from a pointer type).  The type
00834   // plane is needed before types can be fwd or bkwd referenced.
00835   const std::vector<const Type*>& Types = Table.getTypes();
00836   assert(!Types.empty() && "No types at all?");
00837   assert(TypeNum <= Types.size() && "Invalid TypeNo index");
00838 
00839   unsigned NumEntries = Types.size() - TypeNum;
00840 
00841   // Output type header: [num entries]
00842   output_vbr(NumEntries);
00843 
00844   for (unsigned i = TypeNum; i < TypeNum+NumEntries; ++i)
00845     outputType(Types[i]);
00846 }
00847 
00848 // Helper function for outputConstants().
00849 // Writes out all the constants in the plane Plane starting at entry StartNo.
00850 //
00851 void BytecodeWriter::outputConstantsInPlane(const std::vector<const Value*>
00852                                             &Plane, unsigned StartNo) {
00853   unsigned ValNo = StartNo;
00854 
00855   // Scan through and ignore function arguments, global values, and constant
00856   // strings.
00857   for (; ValNo < Plane.size() &&
00858          (isa<Argument>(Plane[ValNo]) || isa<GlobalValue>(Plane[ValNo]) ||
00859           (isa<ConstantArray>(Plane[ValNo]) &&
00860            cast<ConstantArray>(Plane[ValNo])->isString())); ValNo++)
00861     /*empty*/;
00862 
00863   unsigned NC = ValNo;              // Number of constants
00864   for (; NC < Plane.size() && (isa<Constant>(Plane[NC]) || 
00865                                isa<InlineAsm>(Plane[NC])); NC++)
00866     /*empty*/;
00867   NC -= ValNo;                      // Convert from index into count
00868   if (NC == 0) return;              // Skip empty type planes...
00869 
00870   // FIXME: Most slabs only have 1 or 2 entries!  We should encode this much
00871   // more compactly.
00872 
00873   // Output type header: [num entries][type id number]
00874   //
00875   output_vbr(NC);
00876 
00877   // Output the Type ID Number...
00878   int Slot = Table.getSlot(Plane.front()->getType());
00879   assert (Slot != -1 && "Type in constant pool but not in function!!");
00880   output_typeid((unsigned)Slot);
00881 
00882   for (unsigned i = ValNo; i < ValNo+NC; ++i) {
00883     const Value *V = Plane[i];
00884     if (const Constant *C = dyn_cast<Constant>(V))
00885       outputConstant(C);
00886     else
00887       outputInlineAsm(cast<InlineAsm>(V));
00888   }
00889 }
00890 
00891 static inline bool hasNullValue(const Type *Ty) {
00892   return Ty != Type::LabelTy && Ty != Type::VoidTy && !isa<OpaqueType>(Ty);
00893 }
00894 
00895 void BytecodeWriter::outputConstants(bool isFunction) {
00896   BytecodeBlock CPool(BytecodeFormat::ConstantPoolBlockID, *this,
00897                       true  /* Elide block if empty */);
00898 
00899   unsigned NumPlanes = Table.getNumPlanes();
00900 
00901   if (isFunction)
00902     // Output the type plane before any constants!
00903     outputTypes(Table.getModuleTypeLevel());
00904   else
00905     // Output module-level string constants before any other constants.
00906     outputConstantStrings();
00907 
00908   for (unsigned pno = 0; pno != NumPlanes; pno++) {
00909     const std::vector<const Value*> &Plane = Table.getPlane(pno);
00910     if (!Plane.empty()) {              // Skip empty type planes...
00911       unsigned ValNo = 0;
00912       if (isFunction)                  // Don't re-emit module constants
00913         ValNo += Table.getModuleLevel(pno);
00914 
00915       if (hasNullValue(Plane[0]->getType())) {
00916         // Skip zero initializer
00917         if (ValNo == 0)
00918           ValNo = 1;
00919       }
00920 
00921       // Write out constants in the plane
00922       outputConstantsInPlane(Plane, ValNo);
00923     }
00924   }
00925 }
00926 
00927 static unsigned getEncodedLinkage(const GlobalValue *GV) {
00928   switch (GV->getLinkage()) {
00929   default: assert(0 && "Invalid linkage!");
00930   case GlobalValue::ExternalLinkage:  return 0;
00931   case GlobalValue::WeakLinkage:      return 1;
00932   case GlobalValue::AppendingLinkage: return 2;
00933   case GlobalValue::InternalLinkage:  return 3;
00934   case GlobalValue::LinkOnceLinkage:  return 4;
00935   }
00936 }
00937 
00938 void BytecodeWriter::outputModuleInfoBlock(const Module *M) {
00939   BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfoBlockID, *this);
00940 
00941   // Give numbers to sections as we encounter them.
00942   unsigned SectionIDCounter = 0;
00943   std::vector<std::string> SectionNames;
00944   std::map<std::string, unsigned> SectionID;
00945   
00946   // Output the types for the global variables in the module...
00947   for (Module::const_global_iterator I = M->global_begin(),
00948          End = M->global_end(); I != End; ++I) {
00949     int Slot = Table.getSlot(I->getType());
00950     assert(Slot != -1 && "Module global vars is broken!");
00951 
00952     assert((I->hasInitializer() || !I->hasInternalLinkage()) &&
00953            "Global must have an initializer or have external linkage!");
00954     
00955     // Fields: bit0 = isConstant, bit1 = hasInitializer, bit2-4=Linkage,
00956     // bit5+ = Slot # for type.
00957     bool HasExtensionWord = (I->getAlignment() != 0) || I->hasSection();
00958     
00959     // If we need to use the extension byte, set linkage=3(internal) and
00960     // initializer = 0 (impossible!).
00961     if (!HasExtensionWord) {
00962       unsigned oSlot = ((unsigned)Slot << 5) | (getEncodedLinkage(I) << 2) |
00963                         (I->hasInitializer() << 1) | (unsigned)I->isConstant();
00964       output_vbr(oSlot);
00965     } else {
00966       unsigned oSlot = ((unsigned)Slot << 5) | (3 << 2) |
00967                         (0 << 1) | (unsigned)I->isConstant();
00968       output_vbr(oSlot);
00969       
00970       // The extension word has this format: bit 0 = has initializer, bit 1-3 =
00971       // linkage, bit 4-8 = alignment (log2), bit 9 = has SectionID, 
00972       // bits 10+ = future use.
00973       unsigned ExtWord = (unsigned)I->hasInitializer() |
00974                          (getEncodedLinkage(I) << 1) |
00975                          ((Log2_32(I->getAlignment())+1) << 4) |
00976                          ((unsigned)I->hasSection() << 9);
00977       output_vbr(ExtWord);
00978       if (I->hasSection()) {
00979         // Give section names unique ID's.
00980         unsigned &Entry = SectionID[I->getSection()];
00981         if (Entry == 0) {
00982           Entry = ++SectionIDCounter;
00983           SectionNames.push_back(I->getSection());
00984         }
00985         output_vbr(Entry);
00986       }
00987     }
00988 
00989     // If we have an initializer, output it now.
00990     if (I->hasInitializer()) {
00991       Slot = Table.getSlot((Value*)I->getInitializer());
00992       assert(Slot != -1 && "No slot for global var initializer!");
00993       output_vbr((unsigned)Slot);
00994     }
00995   }
00996   output_typeid((unsigned)Table.getSlot(Type::VoidTy));
00997 
00998   // Output the types of the functions in this module.
00999   for (Module::const_iterator I = M->begin(), End = M->end(); I != End; ++I) {
01000     int Slot = Table.getSlot(I->getType());
01001     assert(Slot != -1 && "Module slot calculator is broken!");
01002     assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!");
01003     assert(((Slot << 6) >> 6) == Slot && "Slot # too big!");
01004     unsigned CC = I->getCallingConv()+1;
01005     unsigned ID = (Slot << 5) | (CC & 15);
01006 
01007     if (I->isExternal())   // If external, we don't have an FunctionInfo block.
01008       ID |= 1 << 4;
01009     
01010     if (I->getAlignment() || I->hasSection() || (CC & ~15) != 0)
01011       ID |= 1 << 31;       // Do we need an extension word?
01012     
01013     output_vbr(ID);
01014     
01015     if (ID & (1 << 31)) {
01016       // Extension byte: bits 0-4 = alignment, bits 5-9 = top nibble of calling
01017       // convention, bit 10 = hasSectionID.
01018       ID = (Log2_32(I->getAlignment())+1) | ((CC >> 4) << 5) | 
01019            (I->hasSection() << 10);
01020       output_vbr(ID);
01021       
01022       // Give section names unique ID's.
01023       if (I->hasSection()) {
01024         unsigned &Entry = SectionID[I->getSection()];
01025         if (Entry == 0) {
01026           Entry = ++SectionIDCounter;
01027           SectionNames.push_back(I->getSection());
01028         }
01029         output_vbr(Entry);
01030       }
01031     }
01032   }
01033   output_vbr((unsigned)Table.getSlot(Type::VoidTy) << 5);
01034 
01035   // Emit the list of dependent libraries for the Module.
01036   Module::lib_iterator LI = M->lib_begin();
01037   Module::lib_iterator LE = M->lib_end();
01038   output_vbr(unsigned(LE - LI));   // Emit the number of dependent libraries.
01039   for (; LI != LE; ++LI)
01040     output(*LI);
01041 
01042   // Output the target triple from the module
01043   output(M->getTargetTriple());
01044   
01045   // Emit the table of section names.
01046   output_vbr((unsigned)SectionNames.size());
01047   for (unsigned i = 0, e = SectionNames.size(); i != e; ++i)
01048     output(SectionNames[i]);
01049   
01050   // Output the inline asm string.
01051   output(M->getModuleInlineAsm());
01052 }
01053 
01054 void BytecodeWriter::outputInstructions(const Function *F) {
01055   BytecodeBlock ILBlock(BytecodeFormat::InstructionListBlockID, *this);
01056   for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
01057     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
01058       outputInstruction(*I);
01059 }
01060 
01061 void BytecodeWriter::outputFunction(const Function *F) {
01062   // If this is an external function, there is nothing else to emit!
01063   if (F->isExternal()) return;
01064 
01065   BytecodeBlock FunctionBlock(BytecodeFormat::FunctionBlockID, *this);
01066   output_vbr(getEncodedLinkage(F));
01067 
01068   // Get slot information about the function...
01069   Table.incorporateFunction(F);
01070 
01071   if (Table.getCompactionTable().empty()) {
01072     // Output information about the constants in the function if the compaction
01073     // table is not being used.
01074     outputConstants(true);
01075   } else {
01076     // Otherwise, emit the compaction table.
01077     outputCompactionTable();
01078   }
01079 
01080   // Output all of the instructions in the body of the function
01081   outputInstructions(F);
01082 
01083   // If needed, output the symbol table for the function...
01084   outputSymbolTable(F->getSymbolTable());
01085 
01086   Table.purgeFunction();
01087 }
01088 
01089 void BytecodeWriter::outputCompactionTablePlane(unsigned PlaneNo,
01090                                          const std::vector<const Value*> &Plane,
01091                                                 unsigned StartNo) {
01092   unsigned End = Table.getModuleLevel(PlaneNo);
01093   if (Plane.empty() || StartNo == End || End == 0) return;   // Nothing to emit
01094   assert(StartNo < End && "Cannot emit negative range!");
01095   assert(StartNo < Plane.size() && End <= Plane.size());
01096 
01097   // Do not emit the null initializer!
01098   ++StartNo;
01099 
01100   // Figure out which encoding to use.  By far the most common case we have is
01101   // to emit 0-2 entries in a compaction table plane.
01102   switch (End-StartNo) {
01103   case 0:         // Avoid emitting two vbr's if possible.
01104   case 1:
01105   case 2:
01106     output_vbr((PlaneNo << 2) | End-StartNo);
01107     break;
01108   default:
01109     // Output the number of things.
01110     output_vbr((unsigned(End-StartNo) << 2) | 3);
01111     output_typeid(PlaneNo);                 // Emit the type plane this is
01112     break;
01113   }
01114 
01115   for (unsigned i = StartNo; i != End; ++i)
01116     output_vbr(Table.getGlobalSlot(Plane[i]));
01117 }
01118 
01119 void BytecodeWriter::outputCompactionTypes(unsigned StartNo) {
01120   // Get the compaction type table from the slot calculator
01121   const std::vector<const Type*> &CTypes = Table.getCompactionTypes();
01122 
01123   // The compaction types may have been uncompactified back to the
01124   // global types. If so, we just write an empty table
01125   if (CTypes.size() == 0 ) {
01126     output_vbr(0U);
01127     return;
01128   }
01129 
01130   assert(CTypes.size() >= StartNo && "Invalid compaction types start index");
01131 
01132   // Determine how many types to write
01133   unsigned NumTypes = CTypes.size() - StartNo;
01134 
01135   // Output the number of types.
01136   output_vbr(NumTypes);
01137 
01138   for (unsigned i = StartNo; i < StartNo+NumTypes; ++i)
01139     output_typeid(Table.getGlobalSlot(CTypes[i]));
01140 }
01141 
01142 void BytecodeWriter::outputCompactionTable() {
01143   // Avoid writing the compaction table at all if there is no content.
01144   if (Table.getCompactionTypes().size() >= Type::FirstDerivedTyID ||
01145       (!Table.CompactionTableIsEmpty())) {
01146     BytecodeBlock CTB(BytecodeFormat::CompactionTableBlockID, *this,
01147                       true/*ElideIfEmpty*/);
01148     const std::vector<std::vector<const Value*> > &CT =
01149       Table.getCompactionTable();
01150 
01151     // First things first, emit the type compaction table if there is one.
01152     outputCompactionTypes(Type::FirstDerivedTyID);
01153 
01154     for (unsigned i = 0, e = CT.size(); i != e; ++i)
01155       outputCompactionTablePlane(i, CT[i], 0);
01156   }
01157 }
01158 
01159 void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
01160   // Do not output the Bytecode block for an empty symbol table, it just wastes
01161   // space!
01162   if (MST.isEmpty()) return;
01163 
01164   BytecodeBlock SymTabBlock(BytecodeFormat::SymbolTableBlockID, *this,
01165                             true/*ElideIfEmpty*/);
01166 
01167   // Write the number of types
01168   output_vbr(MST.num_types());
01169 
01170   // Write each of the types
01171   for (SymbolTable::type_const_iterator TI = MST.type_begin(),
01172        TE = MST.type_end(); TI != TE; ++TI ) {
01173     // Symtab entry:[def slot #][name]
01174     output_typeid((unsigned)Table.getSlot(TI->second));
01175     output(TI->first);
01176   }
01177 
01178   // Now do each of the type planes in order.
01179   for (SymbolTable::plane_const_iterator PI = MST.plane_begin(),
01180        PE = MST.plane_end(); PI != PE;  ++PI) {
01181     SymbolTable::value_const_iterator I = MST.value_begin(PI->first);
01182     SymbolTable::value_const_iterator End = MST.value_end(PI->first);
01183     int Slot;
01184 
01185     if (I == End) continue;  // Don't mess with an absent type...
01186 
01187     // Write the number of values in this plane
01188     output_vbr((unsigned)PI->second.size());
01189 
01190     // Write the slot number of the type for this plane
01191     Slot = Table.getSlot(PI->first);
01192     assert(Slot != -1 && "Type in symtab, but not in table!");
01193     output_typeid((unsigned)Slot);
01194 
01195     // Write each of the values in this plane
01196     for (; I != End; ++I) {
01197       // Symtab entry: [def slot #][name]
01198       Slot = Table.getSlot(I->second);
01199       assert(Slot != -1 && "Value in symtab but has no slot number!!");
01200       output_vbr((unsigned)Slot);
01201       output(I->first);
01202     }
01203   }
01204 }
01205 
01206 void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out,
01207                                bool compress ) {
01208   assert(M && "You can't write a null module!!");
01209 
01210   // Create a vector of unsigned char for the bytecode output. We
01211   // reserve 256KBytes of space in the vector so that we avoid doing
01212   // lots of little allocations. 256KBytes is sufficient for a large
01213   // proportion of the bytecode files we will encounter. Larger files
01214   // will be automatically doubled in size as needed (std::vector
01215   // behavior).
01216   std::vector<unsigned char> Buffer;
01217   Buffer.reserve(256 * 1024);
01218 
01219   // The BytecodeWriter populates Buffer for us.
01220   BytecodeWriter BCW(Buffer, M);
01221 
01222   // Keep track of how much we've written
01223   BytesWritten += Buffer.size();
01224 
01225   // Determine start and end points of the Buffer
01226   const unsigned char *FirstByte = &Buffer.front();
01227 
01228   // If we're supposed to compress this mess ...
01229   if (compress) {
01230 
01231     // We signal compression by using an alternate magic number for the
01232     // file. The compressed bytecode file's magic number is "llvc" instead
01233     // of "llvm".
01234     char compressed_magic[4];
01235     compressed_magic[0] = 'l';
01236     compressed_magic[1] = 'l';
01237     compressed_magic[2] = 'v';
01238     compressed_magic[3] = 'c';
01239 
01240     Out.write(compressed_magic,4);
01241 
01242     // Compress everything after the magic number (which we altered)
01243     uint64_t zipSize = Compressor::compressToStream(
01244       (char*)(FirstByte+4),        // Skip the magic number
01245       Buffer.size()-4,             // Skip the magic number
01246       Out                          // Where to write compressed data
01247     );
01248 
01249   } else {
01250 
01251     // We're not compressing, so just write the entire block.
01252     Out.write((char*)FirstByte, Buffer.size());
01253   }
01254 
01255   // make sure it hits disk now
01256   Out.flush();
01257 }
01258