LLVM API Documentation

Reader.cpp

Go to the documentation of this file.
00001 //===- Reader.cpp - Code to read bytecode files ---------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This library implements the functionality defined in llvm/Bytecode/Reader.h
00011 //
00012 // Note that this library should be as fast as possible, reentrant, and
00013 // threadsafe!!
00014 //
00015 // TODO: Allow passing in an option to ignore the symbol table
00016 //
00017 //===----------------------------------------------------------------------===//
00018 
00019 #include "Reader.h"
00020 #include "llvm/Assembly/AutoUpgrade.h"
00021 #include "llvm/Bytecode/BytecodeHandler.h"
00022 #include "llvm/BasicBlock.h"
00023 #include "llvm/CallingConv.h"
00024 #include "llvm/Constants.h"
00025 #include "llvm/InlineAsm.h"
00026 #include "llvm/Instructions.h"
00027 #include "llvm/SymbolTable.h"
00028 #include "llvm/Bytecode/Format.h"
00029 #include "llvm/Config/alloca.h"
00030 #include "llvm/Support/GetElementPtrTypeIterator.h"
00031 #include "llvm/Support/Compressor.h"
00032 #include "llvm/Support/MathExtras.h"
00033 #include "llvm/ADT/StringExtras.h"
00034 #include <sstream>
00035 #include <algorithm>
00036 using namespace llvm;
00037 
00038 namespace {
00039   /// @brief A class for maintaining the slot number definition
00040   /// as a placeholder for the actual definition for forward constants defs.
00041   class ConstantPlaceHolder : public ConstantExpr {
00042     ConstantPlaceHolder();                       // DO NOT IMPLEMENT
00043     void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT
00044   public:
00045     Use Op;
00046     ConstantPlaceHolder(const Type *Ty)
00047       : ConstantExpr(Ty, Instruction::UserOp1, &Op, 1),
00048         Op(UndefValue::get(Type::IntTy), this) {
00049     }
00050   };
00051 }
00052 
00053 // Provide some details on error
00054 inline void BytecodeReader::error(std::string err) {
00055   err +=  " (Vers=" ;
00056   err += itostr(RevisionNum) ;
00057   err += ", Pos=" ;
00058   err += itostr(At-MemStart);
00059   err += ")";
00060   throw err;
00061 }
00062 
00063 //===----------------------------------------------------------------------===//
00064 // Bytecode Reading Methods
00065 //===----------------------------------------------------------------------===//
00066 
00067 /// Determine if the current block being read contains any more data.
00068 inline bool BytecodeReader::moreInBlock() {
00069   return At < BlockEnd;
00070 }
00071 
00072 /// Throw an error if we've read past the end of the current block
00073 inline void BytecodeReader::checkPastBlockEnd(const char * block_name) {
00074   if (At > BlockEnd)
00075     error(std::string("Attempt to read past the end of ") + block_name +
00076           " block.");
00077 }
00078 
00079 /// Align the buffer position to a 32 bit boundary
00080 inline void BytecodeReader::align32() {
00081   if (hasAlignment) {
00082     BufPtr Save = At;
00083     At = (const unsigned char *)((intptr_t)(At+3) & (~3UL));
00084     if (At > Save)
00085       if (Handler) Handler->handleAlignment(At - Save);
00086     if (At > BlockEnd)
00087       error("Ran out of data while aligning!");
00088   }
00089 }
00090 
00091 /// Read a whole unsigned integer
00092 inline unsigned BytecodeReader::read_uint() {
00093   if (At+4 > BlockEnd)
00094     error("Ran out of data reading uint!");
00095   At += 4;
00096   return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24);
00097 }
00098 
00099 /// Read a variable-bit-rate encoded unsigned integer
00100 inline unsigned BytecodeReader::read_vbr_uint() {
00101   unsigned Shift = 0;
00102   unsigned Result = 0;
00103   BufPtr Save = At;
00104 
00105   do {
00106     if (At == BlockEnd)
00107       error("Ran out of data reading vbr_uint!");
00108     Result |= (unsigned)((*At++) & 0x7F) << Shift;
00109     Shift += 7;
00110   } while (At[-1] & 0x80);
00111   if (Handler) Handler->handleVBR32(At-Save);
00112   return Result;
00113 }
00114 
00115 /// Read a variable-bit-rate encoded unsigned 64-bit integer.
00116 inline uint64_t BytecodeReader::read_vbr_uint64() {
00117   unsigned Shift = 0;
00118   uint64_t Result = 0;
00119   BufPtr Save = At;
00120 
00121   do {
00122     if (At == BlockEnd)
00123       error("Ran out of data reading vbr_uint64!");
00124     Result |= (uint64_t)((*At++) & 0x7F) << Shift;
00125     Shift += 7;
00126   } while (At[-1] & 0x80);
00127   if (Handler) Handler->handleVBR64(At-Save);
00128   return Result;
00129 }
00130 
00131 /// Read a variable-bit-rate encoded signed 64-bit integer.
00132 inline int64_t BytecodeReader::read_vbr_int64() {
00133   uint64_t R = read_vbr_uint64();
00134   if (R & 1) {
00135     if (R != 1)
00136       return -(int64_t)(R >> 1);
00137     else   // There is no such thing as -0 with integers.  "-0" really means
00138            // 0x8000000000000000.
00139       return 1LL << 63;
00140   } else
00141     return  (int64_t)(R >> 1);
00142 }
00143 
00144 /// Read a pascal-style string (length followed by text)
00145 inline std::string BytecodeReader::read_str() {
00146   unsigned Size = read_vbr_uint();
00147   const unsigned char *OldAt = At;
00148   At += Size;
00149   if (At > BlockEnd)             // Size invalid?
00150     error("Ran out of data reading a string!");
00151   return std::string((char*)OldAt, Size);
00152 }
00153 
00154 /// Read an arbitrary block of data
00155 inline void BytecodeReader::read_data(void *Ptr, void *End) {
00156   unsigned char *Start = (unsigned char *)Ptr;
00157   unsigned Amount = (unsigned char *)End - Start;
00158   if (At+Amount > BlockEnd)
00159     error("Ran out of data!");
00160   std::copy(At, At+Amount, Start);
00161   At += Amount;
00162 }
00163 
00164 /// Read a float value in little-endian order
00165 inline void BytecodeReader::read_float(float& FloatVal) {
00166   /// FIXME: This isn't optimal, it has size problems on some platforms
00167   /// where FP is not IEEE.
00168   FloatVal = BitsToFloat(At[0] | (At[1] << 8) | (At[2] << 16) | (At[3] << 24));
00169   At+=sizeof(uint32_t);
00170 }
00171 
00172 /// Read a double value in little-endian order
00173 inline void BytecodeReader::read_double(double& DoubleVal) {
00174   /// FIXME: This isn't optimal, it has size problems on some platforms
00175   /// where FP is not IEEE.
00176   DoubleVal = BitsToDouble((uint64_t(At[0]) <<  0) | (uint64_t(At[1]) << 8) |
00177                            (uint64_t(At[2]) << 16) | (uint64_t(At[3]) << 24) |
00178                            (uint64_t(At[4]) << 32) | (uint64_t(At[5]) << 40) |
00179                            (uint64_t(At[6]) << 48) | (uint64_t(At[7]) << 56));
00180   At+=sizeof(uint64_t);
00181 }
00182 
00183 /// Read a block header and obtain its type and size
00184 inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) {
00185   if ( hasLongBlockHeaders ) {
00186     Type = read_uint();
00187     Size = read_uint();
00188     switch (Type) {
00189     case BytecodeFormat::Reserved_DoNotUse :
00190       error("Reserved_DoNotUse used as Module Type?");
00191       Type = BytecodeFormat::ModuleBlockID; break;
00192     case BytecodeFormat::Module:
00193       Type = BytecodeFormat::ModuleBlockID; break;
00194     case BytecodeFormat::Function:
00195       Type = BytecodeFormat::FunctionBlockID; break;
00196     case BytecodeFormat::ConstantPool:
00197       Type = BytecodeFormat::ConstantPoolBlockID; break;
00198     case BytecodeFormat::SymbolTable:
00199       Type = BytecodeFormat::SymbolTableBlockID; break;
00200     case BytecodeFormat::ModuleGlobalInfo:
00201       Type = BytecodeFormat::ModuleGlobalInfoBlockID; break;
00202     case BytecodeFormat::GlobalTypePlane:
00203       Type = BytecodeFormat::GlobalTypePlaneBlockID; break;
00204     case BytecodeFormat::InstructionList:
00205       Type = BytecodeFormat::InstructionListBlockID; break;
00206     case BytecodeFormat::CompactionTable:
00207       Type = BytecodeFormat::CompactionTableBlockID; break;
00208     case BytecodeFormat::BasicBlock:
00209       /// This block type isn't used after version 1.1. However, we have to
00210       /// still allow the value in case this is an old bc format file.
00211       /// We just let its value creep thru.
00212       break;
00213     default:
00214       error("Invalid block id found: " + utostr(Type));
00215       break;
00216     }
00217   } else {
00218     Size = read_uint();
00219     Type = Size & 0x1F; // mask low order five bits
00220     Size >>= 5; // get rid of five low order bits, leaving high 27
00221   }
00222   BlockStart = At;
00223   if (At + Size > BlockEnd)
00224     error("Attempt to size a block past end of memory");
00225   BlockEnd = At + Size;
00226   if (Handler) Handler->handleBlock(Type, BlockStart, Size);
00227 }
00228 
00229 
00230 /// In LLVM 1.2 and before, Types were derived from Value and so they were
00231 /// written as part of the type planes along with any other Value. In LLVM
00232 /// 1.3 this changed so that Type does not derive from Value. Consequently,
00233 /// the BytecodeReader's containers for Values can't contain Types because
00234 /// there's no inheritance relationship. This means that the "Type Type"
00235 /// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3
00236 /// whenever a bytecode construct must have both types and values together,
00237 /// the types are always read/written first and then the Values. Furthermore
00238 /// since Type::TypeTyID no longer exists, its value (12) now corresponds to
00239 /// Type::LabelTyID. In order to overcome this we must "sanitize" all the
00240 /// type TypeIDs we encounter. For LLVM 1.3 bytecode files, there's no change.
00241 /// For LLVM 1.2 and before, this function will decrement the type id by
00242 /// one to account for the missing Type::TypeTyID enumerator if the value is
00243 /// larger than 12 (Type::LabelTyID). If the value is exactly 12, then this
00244 /// function returns true, otherwise false. This helps detect situations
00245 /// where the pre 1.3 bytecode is indicating that what follows is a type.
00246 /// @returns true iff type id corresponds to pre 1.3 "type type"
00247 inline bool BytecodeReader::sanitizeTypeId(unsigned &TypeId) {
00248   if (hasTypeDerivedFromValue) { /// do nothing if 1.3 or later
00249     if (TypeId == Type::LabelTyID) {
00250       TypeId = Type::VoidTyID; // sanitize it
00251       return true; // indicate we got TypeTyID in pre 1.3 bytecode
00252     } else if (TypeId > Type::LabelTyID)
00253       --TypeId; // shift all planes down because type type plane is missing
00254   }
00255   return false;
00256 }
00257 
00258 /// Reads a vbr uint to read in a type id and does the necessary
00259 /// conversion on it by calling sanitizeTypeId.
00260 /// @returns true iff \p TypeId read corresponds to a pre 1.3 "type type"
00261 /// @see sanitizeTypeId
00262 inline bool BytecodeReader::read_typeid(unsigned &TypeId) {
00263   TypeId = read_vbr_uint();
00264   if ( !has32BitTypes )
00265     if ( TypeId == 0x00FFFFFF )
00266       TypeId = read_vbr_uint();
00267   return sanitizeTypeId(TypeId);
00268 }
00269 
00270 //===----------------------------------------------------------------------===//
00271 // IR Lookup Methods
00272 //===----------------------------------------------------------------------===//
00273 
00274 /// Determine if a type id has an implicit null value
00275 inline bool BytecodeReader::hasImplicitNull(unsigned TyID) {
00276   if (!hasExplicitPrimitiveZeros)
00277     return TyID != Type::LabelTyID && TyID != Type::VoidTyID;
00278   return TyID >= Type::FirstDerivedTyID;
00279 }
00280 
00281 /// Obtain a type given a typeid and account for things like compaction tables,
00282 /// function level vs module level, and the offsetting for the primitive types.
00283 const Type *BytecodeReader::getType(unsigned ID) {
00284   if (ID < Type::FirstDerivedTyID)
00285     if (const Type *T = Type::getPrimitiveType((Type::TypeID)ID))
00286       return T;   // Asked for a primitive type...
00287 
00288   // Otherwise, derived types need offset...
00289   ID -= Type::FirstDerivedTyID;
00290 
00291   if (!CompactionTypes.empty()) {
00292     if (ID >= CompactionTypes.size())
00293       error("Type ID out of range for compaction table!");
00294     return CompactionTypes[ID].first;
00295   }
00296 
00297   // Is it a module-level type?
00298   if (ID < ModuleTypes.size())
00299     return ModuleTypes[ID].get();
00300 
00301   // Nope, is it a function-level type?
00302   ID -= ModuleTypes.size();
00303   if (ID < FunctionTypes.size())
00304     return FunctionTypes[ID].get();
00305 
00306   error("Illegal type reference!");
00307   return Type::VoidTy;
00308 }
00309 
00310 /// Get a sanitized type id. This just makes sure that the \p ID
00311 /// is both sanitized and not the "type type" of pre-1.3 bytecode.
00312 /// @see sanitizeTypeId
00313 inline const Type* BytecodeReader::getSanitizedType(unsigned& ID) {
00314   if (sanitizeTypeId(ID))
00315     error("Invalid type id encountered");
00316   return getType(ID);
00317 }
00318 
00319 /// This method just saves some coding. It uses read_typeid to read
00320 /// in a sanitized type id, errors that its not the type type, and
00321 /// then calls getType to return the type value.
00322 inline const Type* BytecodeReader::readSanitizedType() {
00323   unsigned ID;
00324   if (read_typeid(ID))
00325     error("Invalid type id encountered");
00326   return getType(ID);
00327 }
00328 
00329 /// Get the slot number associated with a type accounting for primitive
00330 /// types, compaction tables, and function level vs module level.
00331 unsigned BytecodeReader::getTypeSlot(const Type *Ty) {
00332   if (Ty->isPrimitiveType())
00333     return Ty->getTypeID();
00334 
00335   // Scan the compaction table for the type if needed.
00336   if (!CompactionTypes.empty()) {
00337     for (unsigned i = 0, e = CompactionTypes.size(); i != e; ++i)
00338       if (CompactionTypes[i].first == Ty)
00339         return Type::FirstDerivedTyID + i;
00340 
00341     error("Couldn't find type specified in compaction table!");
00342   }
00343 
00344   // Check the function level types first...
00345   TypeListTy::iterator I = std::find(FunctionTypes.begin(),
00346                                      FunctionTypes.end(), Ty);
00347 
00348   if (I != FunctionTypes.end())
00349     return Type::FirstDerivedTyID + ModuleTypes.size() +
00350            (&*I - &FunctionTypes[0]);
00351 
00352   // If we don't have our cache yet, build it now.
00353   if (ModuleTypeIDCache.empty()) {
00354     unsigned N = 0;
00355     ModuleTypeIDCache.reserve(ModuleTypes.size());
00356     for (TypeListTy::iterator I = ModuleTypes.begin(), E = ModuleTypes.end();
00357          I != E; ++I, ++N)
00358       ModuleTypeIDCache.push_back(std::make_pair(*I, N));
00359     
00360     std::sort(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end());
00361   }
00362   
00363   // Binary search the cache for the entry.
00364   std::vector<std::pair<const Type*, unsigned> >::iterator IT =
00365     std::lower_bound(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end(),
00366                      std::make_pair(Ty, 0U));
00367   if (IT == ModuleTypeIDCache.end() || IT->first != Ty)
00368     error("Didn't find type in ModuleTypes.");
00369     
00370   return Type::FirstDerivedTyID + IT->second;
00371 }
00372 
00373 /// This is just like getType, but when a compaction table is in use, it is
00374 /// ignored.  It also ignores function level types.
00375 /// @see getType
00376 const Type *BytecodeReader::getGlobalTableType(unsigned Slot) {
00377   if (Slot < Type::FirstDerivedTyID) {
00378     const Type *Ty = Type::getPrimitiveType((Type::TypeID)Slot);
00379     if (!Ty)
00380       error("Not a primitive type ID?");
00381     return Ty;
00382   }
00383   Slot -= Type::FirstDerivedTyID;
00384   if (Slot >= ModuleTypes.size())
00385     error("Illegal compaction table type reference!");
00386   return ModuleTypes[Slot];
00387 }
00388 
00389 /// This is just like getTypeSlot, but when a compaction table is in use, it
00390 /// is ignored. It also ignores function level types.
00391 unsigned BytecodeReader::getGlobalTableTypeSlot(const Type *Ty) {
00392   if (Ty->isPrimitiveType())
00393     return Ty->getTypeID();
00394   
00395   // If we don't have our cache yet, build it now.
00396   if (ModuleTypeIDCache.empty()) {
00397     unsigned N = 0;
00398     ModuleTypeIDCache.reserve(ModuleTypes.size());
00399     for (TypeListTy::iterator I = ModuleTypes.begin(), E = ModuleTypes.end();
00400          I != E; ++I, ++N)
00401       ModuleTypeIDCache.push_back(std::make_pair(*I, N));
00402     
00403     std::sort(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end());
00404   }
00405   
00406   // Binary search the cache for the entry.
00407   std::vector<std::pair<const Type*, unsigned> >::iterator IT =
00408     std::lower_bound(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end(),
00409                      std::make_pair(Ty, 0U));
00410   if (IT == ModuleTypeIDCache.end() || IT->first != Ty)
00411     error("Didn't find type in ModuleTypes.");
00412   
00413   return Type::FirstDerivedTyID + IT->second;
00414 }
00415 
00416 /// Retrieve a value of a given type and slot number, possibly creating
00417 /// it if it doesn't already exist.
00418 Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) {
00419   assert(type != Type::LabelTyID && "getValue() cannot get blocks!");
00420   unsigned Num = oNum;
00421 
00422   // If there is a compaction table active, it defines the low-level numbers.
00423   // If not, the module values define the low-level numbers.
00424   if (CompactionValues.size() > type && !CompactionValues[type].empty()) {
00425     if (Num < CompactionValues[type].size())
00426       return CompactionValues[type][Num];
00427     Num -= CompactionValues[type].size();
00428   } else {
00429     // By default, the global type id is the type id passed in
00430     unsigned GlobalTyID = type;
00431 
00432     // If the type plane was compactified, figure out the global type ID by
00433     // adding the derived type ids and the distance.
00434     if (!CompactionTypes.empty() && type >= Type::FirstDerivedTyID)
00435       GlobalTyID = CompactionTypes[type-Type::FirstDerivedTyID].second;
00436 
00437     if (hasImplicitNull(GlobalTyID)) {
00438       const Type *Ty = getType(type);
00439       if (!isa<OpaqueType>(Ty)) {
00440         if (Num == 0)
00441           return Constant::getNullValue(Ty);
00442         --Num;
00443       }
00444     }
00445 
00446     if (GlobalTyID < ModuleValues.size() && ModuleValues[GlobalTyID]) {
00447       if (Num < ModuleValues[GlobalTyID]->size())
00448         return ModuleValues[GlobalTyID]->getOperand(Num);
00449       Num -= ModuleValues[GlobalTyID]->size();
00450     }
00451   }
00452 
00453   if (FunctionValues.size() > type &&
00454       FunctionValues[type] &&
00455       Num < FunctionValues[type]->size())
00456     return FunctionValues[type]->getOperand(Num);
00457 
00458   if (!Create) return 0;  // Do not create a placeholder?
00459 
00460   // Did we already create a place holder?
00461   std::pair<unsigned,unsigned> KeyValue(type, oNum);
00462   ForwardReferenceMap::iterator I = ForwardReferences.lower_bound(KeyValue);
00463   if (I != ForwardReferences.end() && I->first == KeyValue)
00464     return I->second;   // We have already created this placeholder
00465 
00466   // If the type exists (it should)
00467   if (const Type* Ty = getType(type)) {
00468     // Create the place holder
00469     Value *Val = new Argument(Ty);
00470     ForwardReferences.insert(I, std::make_pair(KeyValue, Val));
00471     return Val;
00472   }
00473   throw "Can't create placeholder for value of type slot #" + utostr(type);
00474 }
00475 
00476 /// This is just like getValue, but when a compaction table is in use, it
00477 /// is ignored.  Also, no forward references or other fancy features are
00478 /// supported.
00479 Value* BytecodeReader::getGlobalTableValue(unsigned TyID, unsigned SlotNo) {
00480   if (SlotNo == 0)
00481     return Constant::getNullValue(getType(TyID));
00482 
00483   if (!CompactionTypes.empty() && TyID >= Type::FirstDerivedTyID) {
00484     TyID -= Type::FirstDerivedTyID;
00485     if (TyID >= CompactionTypes.size())
00486       error("Type ID out of range for compaction table!");
00487     TyID = CompactionTypes[TyID].second;
00488   }
00489 
00490   --SlotNo;
00491 
00492   if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0 ||
00493       SlotNo >= ModuleValues[TyID]->size()) {
00494     if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0)
00495       error("Corrupt compaction table entry!"
00496             + utostr(TyID) + ", " + utostr(SlotNo) + ": "
00497             + utostr(ModuleValues.size()));
00498     else
00499       error("Corrupt compaction table entry!"
00500             + utostr(TyID) + ", " + utostr(SlotNo) + ": "
00501             + utostr(ModuleValues.size()) + ", "
00502             + utohexstr(reinterpret_cast<uint64_t>(((void*)ModuleValues[TyID])))
00503             + ", "
00504             + utostr(ModuleValues[TyID]->size()));
00505   }
00506   return ModuleValues[TyID]->getOperand(SlotNo);
00507 }
00508 
00509 /// Just like getValue, except that it returns a null pointer
00510 /// only on error.  It always returns a constant (meaning that if the value is
00511 /// defined, but is not a constant, that is an error).  If the specified
00512 /// constant hasn't been parsed yet, a placeholder is defined and used.
00513 /// Later, after the real value is parsed, the placeholder is eliminated.
00514 Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) {
00515   if (Value *V = getValue(TypeSlot, Slot, false))
00516     if (Constant *C = dyn_cast<Constant>(V))
00517       return C;   // If we already have the value parsed, just return it
00518     else
00519       error("Value for slot " + utostr(Slot) +
00520             " is expected to be a constant!");
00521 
00522   std::pair<unsigned, unsigned> Key(TypeSlot, Slot);
00523   ConstantRefsType::iterator I = ConstantFwdRefs.lower_bound(Key);
00524 
00525   if (I != ConstantFwdRefs.end() && I->first == Key) {
00526     return I->second;
00527   } else {
00528     // Create a placeholder for the constant reference and
00529     // keep track of the fact that we have a forward ref to recycle it
00530     Constant *C = new ConstantPlaceHolder(getType(TypeSlot));
00531 
00532     // Keep track of the fact that we have a forward ref to recycle it
00533     ConstantFwdRefs.insert(I, std::make_pair(Key, C));
00534     return C;
00535   }
00536 }
00537 
00538 //===----------------------------------------------------------------------===//
00539 // IR Construction Methods
00540 //===----------------------------------------------------------------------===//
00541 
00542 /// As values are created, they are inserted into the appropriate place
00543 /// with this method. The ValueTable argument must be one of ModuleValues
00544 /// or FunctionValues data members of this class.
00545 unsigned BytecodeReader::insertValue(Value *Val, unsigned type,
00546                                       ValueTable &ValueTab) {
00547   if (ValueTab.size() <= type)
00548     ValueTab.resize(type+1);
00549 
00550   if (!ValueTab[type]) ValueTab[type] = new ValueList();
00551 
00552   ValueTab[type]->push_back(Val);
00553 
00554   bool HasOffset = hasImplicitNull(type) && !isa<OpaqueType>(Val->getType());
00555   return ValueTab[type]->size()-1 + HasOffset;
00556 }
00557 
00558 /// Insert the arguments of a function as new values in the reader.
00559 void BytecodeReader::insertArguments(Function* F) {
00560   const FunctionType *FT = F->getFunctionType();
00561   Function::arg_iterator AI = F->arg_begin();
00562   for (FunctionType::param_iterator It = FT->param_begin();
00563        It != FT->param_end(); ++It, ++AI)
00564     insertValue(AI, getTypeSlot(AI->getType()), FunctionValues);
00565 }
00566 
00567 //===----------------------------------------------------------------------===//
00568 // Bytecode Parsing Methods
00569 //===----------------------------------------------------------------------===//
00570 
00571 /// This method parses a single instruction. The instruction is
00572 /// inserted at the end of the \p BB provided. The arguments of
00573 /// the instruction are provided in the \p Oprnds vector.
00574 void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds,
00575                                       BasicBlock* BB) {
00576   BufPtr SaveAt = At;
00577 
00578   // Clear instruction data
00579   Oprnds.clear();
00580   unsigned iType = 0;
00581   unsigned Opcode = 0;
00582   unsigned Op = read_uint();
00583 
00584   // bits   Instruction format:        Common to all formats
00585   // --------------------------
00586   // 01-00: Opcode type, fixed to 1.
00587   // 07-02: Opcode
00588   Opcode    = (Op >> 2) & 63;
00589   Oprnds.resize((Op >> 0) & 03);
00590 
00591   // Extract the operands
00592   switch (Oprnds.size()) {
00593   case 1:
00594     // bits   Instruction format:
00595     // --------------------------
00596     // 19-08: Resulting type plane
00597     // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
00598     //
00599     iType   = (Op >>  8) & 4095;
00600     Oprnds[0] = (Op >> 20) & 4095;
00601     if (Oprnds[0] == 4095)    // Handle special encoding for 0 operands...
00602       Oprnds.resize(0);
00603     break;
00604   case 2:
00605     // bits   Instruction format:
00606     // --------------------------
00607     // 15-08: Resulting type plane
00608     // 23-16: Operand #1
00609     // 31-24: Operand #2
00610     //
00611     iType   = (Op >>  8) & 255;
00612     Oprnds[0] = (Op >> 16) & 255;
00613     Oprnds[1] = (Op >> 24) & 255;
00614     break;
00615   case 3:
00616     // bits   Instruction format:
00617     // --------------------------
00618     // 13-08: Resulting type plane
00619     // 19-14: Operand #1
00620     // 25-20: Operand #2
00621     // 31-26: Operand #3
00622     //
00623     iType   = (Op >>  8) & 63;
00624     Oprnds[0] = (Op >> 14) & 63;
00625     Oprnds[1] = (Op >> 20) & 63;
00626     Oprnds[2] = (Op >> 26) & 63;
00627     break;
00628   case 0:
00629     At -= 4;  // Hrm, try this again...
00630     Opcode = read_vbr_uint();
00631     Opcode >>= 2;
00632     iType = read_vbr_uint();
00633 
00634     unsigned NumOprnds = read_vbr_uint();
00635     Oprnds.resize(NumOprnds);
00636 
00637     if (NumOprnds == 0)
00638       error("Zero-argument instruction found; this is invalid.");
00639 
00640     for (unsigned i = 0; i != NumOprnds; ++i)
00641       Oprnds[i] = read_vbr_uint();
00642     align32();
00643     break;
00644   }
00645 
00646   const Type *InstTy = getSanitizedType(iType);
00647 
00648   // We have enough info to inform the handler now.
00649   if (Handler) Handler->handleInstruction(Opcode, InstTy, Oprnds, At-SaveAt);
00650 
00651   // Declare the resulting instruction we'll build.
00652   Instruction *Result = 0;
00653 
00654   // If this is a bytecode format that did not include the unreachable
00655   // instruction, bump up all opcodes numbers to make space.
00656   if (hasNoUnreachableInst) {
00657     if (Opcode >= Instruction::Unreachable &&
00658         Opcode < 62) {
00659       ++Opcode;
00660     }
00661   }
00662 
00663   // Handle binary operators
00664   if (Opcode >= Instruction::BinaryOpsBegin &&
00665       Opcode <  Instruction::BinaryOpsEnd  && Oprnds.size() == 2)
00666     Result = BinaryOperator::create((Instruction::BinaryOps)Opcode,
00667                                     getValue(iType, Oprnds[0]),
00668                                     getValue(iType, Oprnds[1]));
00669 
00670   bool isCall = false;
00671   switch (Opcode) {
00672   default:
00673     if (Result == 0)
00674       error("Illegal instruction read!");
00675     break;
00676   case Instruction::VAArg:
00677     Result = new VAArgInst(getValue(iType, Oprnds[0]),
00678                            getSanitizedType(Oprnds[1]));
00679     break;
00680   case 32: { //VANext_old
00681     const Type* ArgTy = getValue(iType, Oprnds[0])->getType();
00682     Function* NF = TheModule->getOrInsertFunction("llvm.va_copy", ArgTy, ArgTy,
00683                                                   (Type *)0);
00684 
00685     //b = vanext a, t ->
00686     //foo = alloca 1 of t
00687     //bar = vacopy a
00688     //store bar -> foo
00689     //tmp = vaarg foo, t
00690     //b = load foo
00691     AllocaInst* foo = new AllocaInst(ArgTy, 0, "vanext.fix");
00692     BB->getInstList().push_back(foo);
00693     CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0]));
00694     BB->getInstList().push_back(bar);
00695     BB->getInstList().push_back(new StoreInst(bar, foo));
00696     Instruction* tmp = new VAArgInst(foo, getSanitizedType(Oprnds[1]));
00697     BB->getInstList().push_back(tmp);
00698     Result = new LoadInst(foo);
00699     break;
00700   }
00701   case 33: { //VAArg_old
00702     const Type* ArgTy = getValue(iType, Oprnds[0])->getType();
00703     Function* NF = TheModule->getOrInsertFunction("llvm.va_copy", ArgTy, ArgTy,
00704                                                   (Type *)0);
00705 
00706     //b = vaarg a, t ->
00707     //foo = alloca 1 of t
00708     //bar = vacopy a
00709     //store bar -> foo
00710     //b = vaarg foo, t
00711     AllocaInst* foo = new AllocaInst(ArgTy, 0, "vaarg.fix");
00712     BB->getInstList().push_back(foo);
00713     CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0]));
00714     BB->getInstList().push_back(bar);
00715     BB->getInstList().push_back(new StoreInst(bar, foo));
00716     Result = new VAArgInst(foo, getSanitizedType(Oprnds[1]));
00717     break;
00718   }
00719   case Instruction::ExtractElement: {
00720     if (Oprnds.size() != 2)
00721       throw std::string("Invalid extractelement instruction!");
00722     Value *V1 = getValue(iType, Oprnds[0]);
00723     Value *V2 = getValue(Type::UIntTyID, Oprnds[1]);
00724     
00725     if (!ExtractElementInst::isValidOperands(V1, V2))
00726       throw std::string("Invalid extractelement instruction!");
00727 
00728     Result = new ExtractElementInst(V1, V2);
00729     break;
00730   }
00731   case Instruction::InsertElement: {
00732     const PackedType *PackedTy = dyn_cast<PackedType>(InstTy);
00733     if (!PackedTy || Oprnds.size() != 3)
00734       throw std::string("Invalid insertelement instruction!");
00735     
00736     Value *V1 = getValue(iType, Oprnds[0]);
00737     Value *V2 = getValue(getTypeSlot(PackedTy->getElementType()), Oprnds[1]);
00738     Value *V3 = getValue(Type::UIntTyID, Oprnds[2]);
00739       
00740     if (!InsertElementInst::isValidOperands(V1, V2, V3))
00741       throw std::string("Invalid insertelement instruction!");
00742     Result = new InsertElementInst(V1, V2, V3);
00743     break;
00744   }
00745   case Instruction::ShuffleVector: {
00746     const PackedType *PackedTy = dyn_cast<PackedType>(InstTy);
00747     if (!PackedTy || Oprnds.size() != 3)
00748       throw std::string("Invalid shufflevector instruction!");
00749     Value *V1 = getValue(iType, Oprnds[0]);
00750     Value *V2 = getValue(iType, Oprnds[1]);
00751     const PackedType *EltTy = 
00752       PackedType::get(Type::UIntTy, PackedTy->getNumElements());
00753     Value *V3 = getValue(getTypeSlot(EltTy), Oprnds[2]);
00754     if (!ShuffleVectorInst::isValidOperands(V1, V2, V3))
00755       throw std::string("Invalid shufflevector instruction!");
00756     Result = new ShuffleVectorInst(V1, V2, V3);
00757     break;
00758   }
00759   case Instruction::Cast:
00760     Result = new CastInst(getValue(iType, Oprnds[0]),
00761                           getSanitizedType(Oprnds[1]));
00762     break;
00763   case Instruction::Select:
00764     Result = new SelectInst(getValue(Type::BoolTyID, Oprnds[0]),
00765                             getValue(iType, Oprnds[1]),
00766                             getValue(iType, Oprnds[2]));
00767     break;
00768   case Instruction::PHI: {
00769     if (Oprnds.size() == 0 || (Oprnds.size() & 1))
00770       error("Invalid phi node encountered!");
00771 
00772     PHINode *PN = new PHINode(InstTy);
00773     PN->reserveOperandSpace(Oprnds.size());
00774     for (unsigned i = 0, e = Oprnds.size(); i != e; i += 2)
00775       PN->addIncoming(getValue(iType, Oprnds[i]), getBasicBlock(Oprnds[i+1]));
00776     Result = PN;
00777     break;
00778   }
00779 
00780   case Instruction::Shl:
00781   case Instruction::Shr:
00782     Result = new ShiftInst((Instruction::OtherOps)Opcode,
00783                            getValue(iType, Oprnds[0]),
00784                            getValue(Type::UByteTyID, Oprnds[1]));
00785     break;
00786   case Instruction::Ret:
00787     if (Oprnds.size() == 0)
00788       Result = new ReturnInst();
00789     else if (Oprnds.size() == 1)
00790       Result = new ReturnInst(getValue(iType, Oprnds[0]));
00791     else
00792       error("Unrecognized instruction!");
00793     break;
00794 
00795   case Instruction::Br:
00796     if (Oprnds.size() == 1)
00797       Result = new BranchInst(getBasicBlock(Oprnds[0]));
00798     else if (Oprnds.size() == 3)
00799       Result = new BranchInst(getBasicBlock(Oprnds[0]),
00800           getBasicBlock(Oprnds[1]), getValue(Type::BoolTyID , Oprnds[2]));
00801     else
00802       error("Invalid number of operands for a 'br' instruction!");
00803     break;
00804   case Instruction::Switch: {
00805     if (Oprnds.size() & 1)
00806       error("Switch statement with odd number of arguments!");
00807 
00808     SwitchInst *I = new SwitchInst(getValue(iType, Oprnds[0]),
00809                                    getBasicBlock(Oprnds[1]),
00810                                    Oprnds.size()/2-1);
00811     for (unsigned i = 2, e = Oprnds.size(); i != e; i += 2)
00812       I->addCase(cast<ConstantInt>(getValue(iType, Oprnds[i])),
00813                  getBasicBlock(Oprnds[i+1]));
00814     Result = I;
00815     break;
00816   }
00817 
00818   case 58:                   // Call with extra operand for calling conv
00819   case 59:                   // tail call, Fast CC
00820   case 60:                   // normal call, Fast CC
00821   case 61:                   // tail call, C Calling Conv
00822   case Instruction::Call: {  // Normal Call, C Calling Convention
00823     if (Oprnds.size() == 0)
00824       error("Invalid call instruction encountered!");
00825 
00826     Value *F = getValue(iType, Oprnds[0]);
00827 
00828     unsigned CallingConv = CallingConv::C;
00829     bool isTailCall = false;
00830 
00831     if (Opcode == 61 || Opcode == 59)
00832       isTailCall = true;
00833 
00834     // Check to make sure we have a pointer to function type
00835     const PointerType *PTy = dyn_cast<PointerType>(F->getType());
00836     if (PTy == 0) error("Call to non function pointer value!");
00837     const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType());
00838     if (FTy == 0) error("Call to non function pointer value!");
00839 
00840     std::vector<Value *> Params;
00841     if (!FTy->isVarArg()) {
00842       FunctionType::param_iterator It = FTy->param_begin();
00843 
00844       if (Opcode == 58) {
00845         isTailCall = Oprnds.back() & 1;
00846         CallingConv = Oprnds.back() >> 1;
00847         Oprnds.pop_back();
00848       } else if (Opcode == 59 || Opcode == 60)
00849         CallingConv = CallingConv::Fast;
00850 
00851       for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) {
00852         if (It == FTy->param_end())
00853           error("Invalid call instruction!");
00854         Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i]));
00855       }
00856       if (It != FTy->param_end())
00857         error("Invalid call instruction!");
00858     } else {
00859       Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1);
00860 
00861       unsigned FirstVariableOperand;
00862       if (Oprnds.size() < FTy->getNumParams())
00863         error("Call instruction missing operands!");
00864 
00865       // Read all of the fixed arguments
00866       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
00867         Params.push_back(getValue(getTypeSlot(FTy->getParamType(i)),Oprnds[i]));
00868 
00869       FirstVariableOperand = FTy->getNumParams();
00870 
00871       if ((Oprnds.size()-FirstVariableOperand) & 1)
00872         error("Invalid call instruction!");   // Must be pairs of type/value
00873 
00874       for (unsigned i = FirstVariableOperand, e = Oprnds.size();
00875            i != e; i += 2)
00876         Params.push_back(getValue(Oprnds[i], Oprnds[i+1]));
00877     }
00878 
00879     Result = new CallInst(F, Params);
00880     if (isTailCall) cast<CallInst>(Result)->setTailCall();
00881     if (CallingConv) cast<CallInst>(Result)->setCallingConv(CallingConv);
00882     break;
00883   }
00884   case 56:                     // Invoke with encoded CC
00885   case 57:                     // Invoke Fast CC
00886   case Instruction::Invoke: {  // Invoke C CC
00887     if (Oprnds.size() < 3)
00888       error("Invalid invoke instruction!");
00889     Value *F = getValue(iType, Oprnds[0]);
00890 
00891     // Check to make sure we have a pointer to function type
00892     const PointerType *PTy = dyn_cast<PointerType>(F->getType());
00893     if (PTy == 0)
00894       error("Invoke to non function pointer value!");
00895     const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType());
00896     if (FTy == 0)
00897       error("Invoke to non function pointer value!");
00898 
00899     std::vector<Value *> Params;
00900     BasicBlock *Normal, *Except;
00901     unsigned CallingConv = CallingConv::C;
00902 
00903     if (Opcode == 57)
00904       CallingConv = CallingConv::Fast;
00905     else if (Opcode == 56) {
00906       CallingConv = Oprnds.back();
00907       Oprnds.pop_back();
00908     }
00909 
00910     if (!FTy->isVarArg()) {
00911       Normal = getBasicBlock(Oprnds[1]);
00912       Except = getBasicBlock(Oprnds[2]);
00913 
00914       FunctionType::param_iterator It = FTy->param_begin();
00915       for (unsigned i = 3, e = Oprnds.size(); i != e; ++i) {
00916         if (It == FTy->param_end())
00917           error("Invalid invoke instruction!");
00918         Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i]));
00919       }
00920       if (It != FTy->param_end())
00921         error("Invalid invoke instruction!");
00922     } else {
00923       Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1);
00924 
00925       Normal = getBasicBlock(Oprnds[0]);
00926       Except = getBasicBlock(Oprnds[1]);
00927 
00928       unsigned FirstVariableArgument = FTy->getNumParams()+2;
00929       for (unsigned i = 2; i != FirstVariableArgument; ++i)
00930         Params.push_back(getValue(getTypeSlot(FTy->getParamType(i-2)),
00931                                   Oprnds[i]));
00932 
00933       if (Oprnds.size()-FirstVariableArgument & 1) // Must be type/value pairs
00934         error("Invalid invoke instruction!");
00935 
00936       for (unsigned i = FirstVariableArgument; i < Oprnds.size(); i += 2)
00937         Params.push_back(getValue(Oprnds[i], Oprnds[i+1]));
00938     }
00939 
00940     Result = new InvokeInst(F, Normal, Except, Params);
00941     if (CallingConv) cast<InvokeInst>(Result)->setCallingConv(CallingConv);
00942     break;
00943   }
00944   case Instruction::Malloc: {
00945     unsigned Align = 0;
00946     if (Oprnds.size() == 2)
00947       Align = (1 << Oprnds[1]) >> 1;
00948     else if (Oprnds.size() > 2)
00949       error("Invalid malloc instruction!");
00950     if (!isa<PointerType>(InstTy))
00951       error("Invalid malloc instruction!");
00952 
00953     Result = new MallocInst(cast<PointerType>(InstTy)->getElementType(),
00954                             getValue(Type::UIntTyID, Oprnds[0]), Align);
00955     break;
00956   }
00957 
00958   case Instruction::Alloca: {
00959     unsigned Align = 0;
00960     if (Oprnds.size() == 2)
00961       Align = (1 << Oprnds[1]) >> 1;
00962     else if (Oprnds.size() > 2)
00963       error("Invalid alloca instruction!");
00964     if (!isa<PointerType>(InstTy))
00965       error("Invalid alloca instruction!");
00966 
00967     Result = new AllocaInst(cast<PointerType>(InstTy)->getElementType(),
00968                             getValue(Type::UIntTyID, Oprnds[0]), Align);
00969     break;
00970   }
00971   case Instruction::Free:
00972     if (!isa<PointerType>(InstTy))
00973       error("Invalid free instruction!");
00974     Result = new FreeInst(getValue(iType, Oprnds[0]));
00975     break;
00976   case Instruction::GetElementPtr: {
00977     if (Oprnds.size() == 0 || !isa<PointerType>(InstTy))
00978       error("Invalid getelementptr instruction!");
00979 
00980     std::vector<Value*> Idx;
00981 
00982     const Type *NextTy = InstTy;
00983     for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) {
00984       const CompositeType *TopTy = dyn_cast_or_null<CompositeType>(NextTy);
00985       if (!TopTy)
00986         error("Invalid getelementptr instruction!");
00987 
00988       unsigned ValIdx = Oprnds[i];
00989       unsigned IdxTy = 0;
00990       if (!hasRestrictedGEPTypes) {
00991         // Struct indices are always uints, sequential type indices can be any
00992         // of the 32 or 64-bit integer types.  The actual choice of type is
00993         // encoded in the low two bits of the slot number.
00994         if (isa<StructType>(TopTy))
00995           IdxTy = Type::UIntTyID;
00996         else {
00997           switch (ValIdx & 3) {
00998           default:
00999           case 0: IdxTy = Type::UIntTyID; break;
01000           case 1: IdxTy = Type::IntTyID; break;
01001           case 2: IdxTy = Type::ULongTyID; break;
01002           case 3: IdxTy = Type::LongTyID; break;
01003           }
01004           ValIdx >>= 2;
01005         }
01006       } else {
01007         IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID : Type::LongTyID;
01008       }
01009 
01010       Idx.push_back(getValue(IdxTy, ValIdx));
01011 
01012       // Convert ubyte struct indices into uint struct indices.
01013       if (isa<StructType>(TopTy) && hasRestrictedGEPTypes)
01014         if (ConstantUInt *C = dyn_cast<ConstantUInt>(Idx.back()))
01015           Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy);
01016 
01017       NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true);
01018     }
01019 
01020     Result = new GetElementPtrInst(getValue(iType, Oprnds[0]), Idx);
01021     break;
01022   }
01023 
01024   case 62:   // volatile load
01025   case Instruction::Load:
01026     if (Oprnds.size() != 1 || !isa<PointerType>(InstTy))
01027       error("Invalid load instruction!");
01028     Result = new LoadInst(getValue(iType, Oprnds[0]), "", Opcode == 62);
01029     break;
01030 
01031   case 63:   // volatile store
01032   case Instruction::Store: {
01033     if (!isa<PointerType>(InstTy) || Oprnds.size() != 2)
01034       error("Invalid store instruction!");
01035 
01036     Value *Ptr = getValue(iType, Oprnds[1]);
01037     const Type *ValTy = cast<PointerType>(Ptr->getType())->getElementType();
01038     Result = new StoreInst(getValue(getTypeSlot(ValTy), Oprnds[0]), Ptr,
01039                            Opcode == 63);
01040     break;
01041   }
01042   case Instruction::Unwind:
01043     if (Oprnds.size() != 0) error("Invalid unwind instruction!");
01044     Result = new UnwindInst();
01045     break;
01046   case Instruction::Unreachable:
01047     if (Oprnds.size() != 0) error("Invalid unreachable instruction!");
01048     Result = new UnreachableInst();
01049     break;
01050   }  // end switch(Opcode)
01051 
01052   BB->getInstList().push_back(Result);
01053 
01054   unsigned TypeSlot;
01055   if (Result->getType() == InstTy)
01056     TypeSlot = iType;
01057   else
01058     TypeSlot = getTypeSlot(Result->getType());
01059 
01060   insertValue(Result, TypeSlot, FunctionValues);
01061 }
01062 
01063 /// Get a particular numbered basic block, which might be a forward reference.
01064 /// This works together with ParseBasicBlock to handle these forward references
01065 /// in a clean manner.  This function is used when constructing phi, br, switch,
01066 /// and other instructions that reference basic blocks. Blocks are numbered
01067 /// sequentially as they appear in the function.
01068 BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) {
01069   // Make sure there is room in the table...
01070   if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1);
01071 
01072   // First check to see if this is a backwards reference, i.e., ParseBasicBlock
01073   // has already created this block, or if the forward reference has already
01074   // been created.
01075   if (ParsedBasicBlocks[ID])
01076     return ParsedBasicBlocks[ID];
01077 
01078   // Otherwise, the basic block has not yet been created.  Do so and add it to
01079   // the ParsedBasicBlocks list.
01080   return ParsedBasicBlocks[ID] = new BasicBlock();
01081 }
01082 
01083 /// In LLVM 1.0 bytecode files, we used to output one basicblock at a time.
01084 /// This method reads in one of the basicblock packets. This method is not used
01085 /// for bytecode files after LLVM 1.0
01086 /// @returns The basic block constructed.
01087 BasicBlock *BytecodeReader::ParseBasicBlock(unsigned BlockNo) {
01088   if (Handler) Handler->handleBasicBlockBegin(BlockNo);
01089 
01090   BasicBlock *BB = 0;
01091 
01092   if (ParsedBasicBlocks.size() == BlockNo)
01093     ParsedBasicBlocks.push_back(BB = new BasicBlock());
01094   else if (ParsedBasicBlocks[BlockNo] == 0)
01095     BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
01096   else
01097     BB = ParsedBasicBlocks[BlockNo];
01098 
01099   std::vector<unsigned> Operands;
01100   while (moreInBlock())
01101     ParseInstruction(Operands, BB);
01102 
01103   if (Handler) Handler->handleBasicBlockEnd(BlockNo);
01104   return BB;
01105 }
01106 
01107 /// Parse all of the BasicBlock's & Instruction's in the body of a function.
01108 /// In post 1.0 bytecode files, we no longer emit basic block individually,
01109 /// in order to avoid per-basic-block overhead.
01110 /// @returns Rhe number of basic blocks encountered.
01111 unsigned BytecodeReader::ParseInstructionList(Function* F) {
01112   unsigned BlockNo = 0;
01113   std::vector<unsigned> Args;
01114 
01115   while (moreInBlock()) {
01116     if (Handler) Handler->handleBasicBlockBegin(BlockNo);
01117     BasicBlock *BB;
01118     if (ParsedBasicBlocks.size() == BlockNo)
01119       ParsedBasicBlocks.push_back(BB = new BasicBlock());
01120     else if (ParsedBasicBlocks[BlockNo] == 0)
01121       BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
01122     else
01123       BB = ParsedBasicBlocks[BlockNo];
01124     ++BlockNo;
01125     F->getBasicBlockList().push_back(BB);
01126 
01127     // Read instructions into this basic block until we get to a terminator
01128     while (moreInBlock() && !BB->getTerminator())
01129       ParseInstruction(Args, BB);
01130 
01131     if (!BB->getTerminator())
01132       error("Non-terminated basic block found!");
01133 
01134     if (Handler) Handler->handleBasicBlockEnd(BlockNo-1);
01135   }
01136 
01137   return BlockNo;
01138 }
01139 
01140 /// Parse a symbol table. This works for both module level and function
01141 /// level symbol tables.  For function level symbol tables, the CurrentFunction
01142 /// parameter must be non-zero and the ST parameter must correspond to
01143 /// CurrentFunction's symbol table. For Module level symbol tables, the
01144 /// CurrentFunction argument must be zero.
01145 void BytecodeReader::ParseSymbolTable(Function *CurrentFunction,
01146                                       SymbolTable *ST) {
01147   if (Handler) Handler->handleSymbolTableBegin(CurrentFunction,ST);
01148 
01149   // Allow efficient basic block lookup by number.
01150   std::vector<BasicBlock*> BBMap;
01151   if (CurrentFunction)
01152     for (Function::iterator I = CurrentFunction->begin(),
01153            E = CurrentFunction->end(); I != E; ++I)
01154       BBMap.push_back(I);
01155 
01156   /// In LLVM 1.3 we write types separately from values so
01157   /// The types are always first in the symbol table. This is
01158   /// because Type no longer derives from Value.
01159   if (!hasTypeDerivedFromValue) {
01160     // Symtab block header: [num entries]
01161     unsigned NumEntries = read_vbr_uint();
01162     for (unsigned i = 0; i < NumEntries; ++i) {
01163       // Symtab entry: [def slot #][name]
01164       unsigned slot = read_vbr_uint();
01165       std::string Name = read_str();
01166       const Type* T = getType(slot);
01167       ST->insert(Name, T);
01168     }
01169   }
01170 
01171   while (moreInBlock()) {
01172     // Symtab block header: [num entries][type id number]
01173     unsigned NumEntries = read_vbr_uint();
01174     unsigned Typ = 0;
01175     bool isTypeType = read_typeid(Typ);
01176     const Type *Ty = getType(Typ);
01177 
01178     for (unsigned i = 0; i != NumEntries; ++i) {
01179       // Symtab entry: [def slot #][name]
01180       unsigned slot = read_vbr_uint();
01181       std::string Name = read_str();
01182 
01183       // if we're reading a pre 1.3 bytecode file and the type plane
01184       // is the "type type", handle it here
01185       if (isTypeType) {
01186         const Type* T = getType(slot);
01187         if (T == 0)
01188           error("Failed type look-up for name '" + Name + "'");
01189         ST->insert(Name, T);
01190         continue; // code below must be short circuited
01191       } else {
01192         Value *V = 0;
01193         if (Typ == Type::LabelTyID) {
01194           if (slot < BBMap.size())
01195             V = BBMap[slot];
01196         } else {
01197           V = getValue(Typ, slot, false); // Find mapping...
01198         }
01199         if (V == 0)
01200           error("Failed value look-up for name '" + Name + "'");
01201         V->setName(Name);
01202       }
01203     }
01204   }
01205   checkPastBlockEnd("Symbol Table");
01206   if (Handler) Handler->handleSymbolTableEnd();
01207 }
01208 
01209 /// Read in the types portion of a compaction table.
01210 void BytecodeReader::ParseCompactionTypes(unsigned NumEntries) {
01211   for (unsigned i = 0; i != NumEntries; ++i) {
01212     unsigned TypeSlot = 0;
01213     if (read_typeid(TypeSlot))
01214       error("Invalid type in compaction table: type type");
01215     const Type *Typ = getGlobalTableType(TypeSlot);
01216     CompactionTypes.push_back(std::make_pair(Typ, TypeSlot));
01217     if (Handler) Handler->handleCompactionTableType(i, TypeSlot, Typ);
01218   }
01219 }
01220 
01221 /// Parse a compaction table.
01222 void BytecodeReader::ParseCompactionTable() {
01223 
01224   // Notify handler that we're beginning a compaction table.
01225   if (Handler) Handler->handleCompactionTableBegin();
01226 
01227   // In LLVM 1.3 Type no longer derives from Value. So,
01228   // we always write them first in the compaction table
01229   // because they can't occupy a "type plane" where the
01230   // Values reside.
01231   if (! hasTypeDerivedFromValue) {
01232     unsigned NumEntries = read_vbr_uint();
01233     ParseCompactionTypes(NumEntries);
01234   }
01235 
01236   // Compaction tables live in separate blocks so we have to loop
01237   // until we've read the whole thing.
01238   while (moreInBlock()) {
01239     // Read the number of Value* entries in the compaction table
01240     unsigned NumEntries = read_vbr_uint();
01241     unsigned Ty = 0;
01242     unsigned isTypeType = false;
01243 
01244     // Decode the type from value read in. Most compaction table
01245     // planes will have one or two entries in them. If that's the
01246     // case then the length is encoded in the bottom two bits and
01247     // the higher bits encode the type. This saves another VBR value.
01248     if ((NumEntries & 3) == 3) {
01249       // In this case, both low-order bits are set (value 3). This
01250       // is a signal that the typeid follows.
01251       NumEntries >>= 2;
01252       isTypeType = read_typeid(Ty);
01253     } else {
01254       // In this case, the low-order bits specify the number of entries
01255       // and the high order bits specify the type.
01256       Ty = NumEntries >> 2;
01257       isTypeType = sanitizeTypeId(Ty);
01258       NumEntries &= 3;
01259     }
01260 
01261     // if we're reading a pre 1.3 bytecode file and the type plane
01262     // is the "type type", handle it here
01263     if (isTypeType) {
01264       ParseCompactionTypes(NumEntries);
01265     } else {
01266       // Make sure we have enough room for the plane.
01267       if (Ty >= CompactionValues.size())
01268         CompactionValues.resize(Ty+1);
01269 
01270       // Make sure the plane is empty or we have some kind of error.
01271       if (!CompactionValues[Ty].empty())
01272         error("Compaction table plane contains multiple entries!");
01273 
01274       // Notify handler about the plane.
01275       if (Handler) Handler->handleCompactionTablePlane(Ty, NumEntries);
01276 
01277       // Push the implicit zero.
01278       CompactionValues[Ty].push_back(Constant::getNullValue(getType(Ty)));
01279 
01280       // Read in each of the entries, put them in the compaction table
01281       // and notify the handler that we have a new compaction table value.
01282       for (unsigned i = 0; i != NumEntries; ++i) {
01283         unsigned ValSlot = read_vbr_uint();
01284         Value *V = getGlobalTableValue(Ty, ValSlot);
01285         CompactionValues[Ty].push_back(V);
01286         if (Handler) Handler->handleCompactionTableValue(i, Ty, ValSlot);
01287       }
01288     }
01289   }
01290   // Notify handler that the compaction table is done.
01291   if (Handler) Handler->handleCompactionTableEnd();
01292 }
01293 
01294 // Parse a single type. The typeid is read in first. If its a primitive type
01295 // then nothing else needs to be read, we know how to instantiate it. If its
01296 // a derived type, then additional data is read to fill out the type
01297 // definition.
01298 const Type *BytecodeReader::ParseType() {
01299   unsigned PrimType = 0;
01300   if (read_typeid(PrimType))
01301     error("Invalid type (type type) in type constants!");
01302 
01303   const Type *Result = 0;
01304   if ((Result = Type::getPrimitiveType((Type::TypeID)PrimType)))
01305     return Result;
01306 
01307   switch (PrimType) {
01308   case Type::FunctionTyID: {
01309     const Type *RetType = readSanitizedType();
01310 
01311     unsigned NumParams = read_vbr_uint();
01312 
01313     std::vector<const Type*> Params;
01314     while (NumParams--)
01315       Params.push_back(readSanitizedType());
01316 
01317     bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
01318     if (isVarArg) Params.pop_back();
01319 
01320     Result = FunctionType::get(RetType, Params, isVarArg);
01321     break;
01322   }
01323   case Type::ArrayTyID: {
01324     const Type *ElementType = readSanitizedType();
01325     unsigned NumElements = read_vbr_uint();
01326     Result =  ArrayType::get(ElementType, NumElements);
01327     break;
01328   }
01329   case Type::PackedTyID: {
01330     const Type *ElementType = readSanitizedType();
01331     unsigned NumElements = read_vbr_uint();
01332     Result =  PackedType::get(ElementType, NumElements);
01333     break;
01334   }
01335   case Type::StructTyID: {
01336     std::vector<const Type*> Elements;
01337     unsigned Typ = 0;
01338     if (read_typeid(Typ))
01339       error("Invalid element type (type type) for structure!");
01340 
01341     while (Typ) {         // List is terminated by void/0 typeid
01342       Elements.push_back(getType(Typ));
01343       if (read_typeid(Typ))
01344         error("Invalid element type (type type) for structure!");
01345     }
01346 
01347     Result = StructType::get(Elements);
01348     break;
01349   }
01350   case Type::PointerTyID: {
01351     Result = PointerType::get(readSanitizedType());
01352     break;
01353   }
01354 
01355   case Type::OpaqueTyID: {
01356     Result = OpaqueType::get();
01357     break;
01358   }
01359 
01360   default:
01361     error("Don't know how to deserialize primitive type " + utostr(PrimType));
01362     break;
01363   }
01364   if (Handler) Handler->handleType(Result);
01365   return Result;
01366 }
01367 
01368 // ParseTypes - We have to use this weird code to handle recursive
01369 // types.  We know that recursive types will only reference the current slab of
01370 // values in the type plane, but they can forward reference types before they
01371 // have been read.  For example, Type #0 might be '{ Ty#1 }' and Type #1 might
01372 // be 'Ty#0*'.  When reading Type #0, type number one doesn't exist.  To fix
01373 // this ugly problem, we pessimistically insert an opaque type for each type we
01374 // are about to read.  This means that forward references will resolve to
01375 // something and when we reread the type later, we can replace the opaque type
01376 // with a new resolved concrete type.
01377 //
01378 void BytecodeReader::ParseTypes(TypeListTy &Tab, unsigned NumEntries){
01379   assert(Tab.size() == 0 && "should not have read type constants in before!");
01380 
01381   // Insert a bunch of opaque types to be resolved later...
01382   Tab.reserve(NumEntries);
01383   for (unsigned i = 0; i != NumEntries; ++i)
01384     Tab.push_back(OpaqueType::get());
01385 
01386   if (Handler)
01387     Handler->handleTypeList(NumEntries);
01388 
01389   // If we are about to resolve types, make sure the type cache is clear.
01390   if (NumEntries)
01391     ModuleTypeIDCache.clear();
01392   
01393   // Loop through reading all of the types.  Forward types will make use of the
01394   // opaque types just inserted.
01395   //
01396   for (unsigned i = 0; i != NumEntries; ++i) {
01397     const Type* NewTy = ParseType();
01398     const Type* OldTy = Tab[i].get();
01399     if (NewTy == 0)
01400       error("Couldn't parse type!");
01401 
01402     // Don't directly push the new type on the Tab. Instead we want to replace
01403     // the opaque type we previously inserted with the new concrete value. This
01404     // approach helps with forward references to types. The refinement from the
01405     // abstract (opaque) type to the new type causes all uses of the abstract
01406     // type to use the concrete type (NewTy). This will also cause the opaque
01407     // type to be deleted.
01408     cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy);
01409 
01410     // This should have replaced the old opaque type with the new type in the
01411     // value table... or with a preexisting type that was already in the system.
01412     // Let's just make sure it did.
01413     assert(Tab[i] != OldTy && "refineAbstractType didn't work!");
01414   }
01415 }
01416 
01417 /// Parse a single constant value
01418 Value *BytecodeReader::ParseConstantPoolValue(unsigned TypeID) {
01419   // We must check for a ConstantExpr before switching by type because
01420   // a ConstantExpr can be of any type, and has no explicit value.
01421   //
01422   // 0 if not expr; numArgs if is expr
01423   unsigned isExprNumArgs = read_vbr_uint();
01424 
01425   if (isExprNumArgs) {
01426     if (!hasNoUndefValue) {
01427       // 'undef' is encoded with 'exprnumargs' == 1.
01428       if (isExprNumArgs == 1)
01429         return UndefValue::get(getType(TypeID));
01430 
01431       // Inline asm is encoded with exprnumargs == ~0U.
01432       if (isExprNumArgs == ~0U) {
01433         std::string AsmStr = read_str();
01434         std::string ConstraintStr = read_str();
01435         unsigned Flags = read_vbr_uint();
01436         
01437         const PointerType *PTy = dyn_cast<PointerType>(getType(TypeID));
01438         const FunctionType *FTy = 
01439           PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
01440 
01441         if (!FTy || !InlineAsm::Verify(FTy, ConstraintStr))
01442           error("Invalid constraints for inline asm");
01443         if (Flags & ~1U)
01444           error("Invalid flags for inline asm");
01445         bool HasSideEffects = Flags & 1;
01446         return InlineAsm::get(FTy, AsmStr, ConstraintStr, HasSideEffects);
01447       }
01448       
01449       --isExprNumArgs;
01450     }
01451 
01452     // FIXME: Encoding of constant exprs could be much more compact!
01453     std::vector<Constant*> ArgVec;
01454     ArgVec.reserve(isExprNumArgs);
01455     unsigned Opcode = read_vbr_uint();
01456 
01457     // Bytecode files before LLVM 1.4 need have a missing terminator inst.
01458     if (hasNoUnreachableInst) Opcode++;
01459 
01460     // Read the slot number and types of each of the arguments
01461     for (unsigned i = 0; i != isExprNumArgs; ++i) {
01462       unsigned ArgValSlot = read_vbr_uint();
01463       unsigned ArgTypeSlot = 0;
01464       if (read_typeid(ArgTypeSlot))
01465         error("Invalid argument type (type type) for constant value");
01466 
01467       // Get the arg value from its slot if it exists, otherwise a placeholder
01468       ArgVec.push_back(getConstantValue(ArgTypeSlot, ArgValSlot));
01469     }
01470 
01471     // Construct a ConstantExpr of the appropriate kind
01472     if (isExprNumArgs == 1) {           // All one-operand expressions
01473       if (Opcode != Instruction::Cast)
01474         error("Only cast instruction has one argument for ConstantExpr");
01475 
01476       Constant* Result = ConstantExpr::getCast(ArgVec[0], getType(TypeID));
01477       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01478       return Result;
01479     } else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr
01480       std::vector<Constant*> IdxList(ArgVec.begin()+1, ArgVec.end());
01481 
01482       if (hasRestrictedGEPTypes) {
01483         const Type *BaseTy = ArgVec[0]->getType();
01484         generic_gep_type_iterator<std::vector<Constant*>::iterator>
01485           GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()),
01486           E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end());
01487         for (unsigned i = 0; GTI != E; ++GTI, ++i)
01488           if (isa<StructType>(*GTI)) {
01489             if (IdxList[i]->getType() != Type::UByteTy)
01490               error("Invalid index for getelementptr!");
01491             IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy);
01492           }
01493       }
01494 
01495       Constant* Result = ConstantExpr::getGetElementPtr(ArgVec[0], IdxList);
01496       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01497       return Result;
01498     } else if (Opcode == Instruction::Select) {
01499       if (ArgVec.size() != 3)
01500         error("Select instruction must have three arguments.");
01501       Constant* Result = ConstantExpr::getSelect(ArgVec[0], ArgVec[1],
01502                                                  ArgVec[2]);
01503       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01504       return Result;
01505     } else if (Opcode == Instruction::ExtractElement) {
01506       if (ArgVec.size() != 2 ||
01507           !ExtractElementInst::isValidOperands(ArgVec[0], ArgVec[1]))
01508         error("Invalid extractelement constand expr arguments");
01509       Constant* Result = ConstantExpr::getExtractElement(ArgVec[0], ArgVec[1]);
01510       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01511       return Result;
01512     } else if (Opcode == Instruction::InsertElement) {
01513       if (ArgVec.size() != 3 ||
01514           !InsertElementInst::isValidOperands(ArgVec[0], ArgVec[1], ArgVec[2]))
01515         error("Invalid insertelement constand expr arguments");
01516         
01517       Constant *Result = 
01518         ConstantExpr::getInsertElement(ArgVec[0], ArgVec[1], ArgVec[2]);
01519       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01520       return Result;
01521     } else if (Opcode == Instruction::ShuffleVector) {
01522       if (ArgVec.size() != 3 ||
01523           !ShuffleVectorInst::isValidOperands(ArgVec[0], ArgVec[1], ArgVec[2]))
01524         error("Invalid shufflevector constant expr arguments.");
01525       Constant *Result = 
01526         ConstantExpr::getShuffleVector(ArgVec[0], ArgVec[1], ArgVec[2]);
01527       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01528       return Result;
01529     } else {                            // All other 2-operand expressions
01530       Constant* Result = ConstantExpr::get(Opcode, ArgVec[0], ArgVec[1]);
01531       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01532       return Result;
01533     }
01534   }
01535 
01536   // Ok, not an ConstantExpr.  We now know how to read the given type...
01537   const Type *Ty = getType(TypeID);
01538   Constant *Result = 0;
01539   switch (Ty->getTypeID()) {
01540   case Type::BoolTyID: {
01541     unsigned Val = read_vbr_uint();
01542     if (Val != 0 && Val != 1)
01543       error("Invalid boolean value read.");
01544     Result = ConstantBool::get(Val == 1);
01545     if (Handler) Handler->handleConstantValue(Result);
01546     break;
01547   }
01548 
01549   case Type::UByteTyID:   // Unsigned integer types...
01550   case Type::UShortTyID:
01551   case Type::UIntTyID: {
01552     unsigned Val = read_vbr_uint();
01553     if (!ConstantUInt::isValueValidForType(Ty, Val))
01554       error("Invalid unsigned byte/short/int read.");
01555     Result = ConstantUInt::get(Ty, Val);
01556     if (Handler) Handler->handleConstantValue(Result);
01557     break;
01558   }
01559 
01560   case Type::ULongTyID:
01561     Result = ConstantUInt::get(Ty, read_vbr_uint64());
01562     if (Handler) Handler->handleConstantValue(Result);
01563     break;
01564     
01565   case Type::SByteTyID:   // Signed integer types...
01566   case Type::ShortTyID:
01567   case Type::IntTyID:
01568   case Type::LongTyID: {
01569     int64_t Val = read_vbr_int64();
01570     if (!ConstantSInt::isValueValidForType(Ty, Val))
01571       error("Invalid signed byte/short/int/long read.");
01572     Result = ConstantSInt::get(Ty, Val);
01573     if (Handler) Handler->handleConstantValue(Result);
01574     break;
01575   }
01576 
01577   case Type::FloatTyID: {
01578     float Val;
01579     read_float(Val);
01580     Result = ConstantFP::get(Ty, Val);
01581     if (Handler) Handler->handleConstantValue(Result);
01582     break;
01583   }
01584 
01585   case Type::DoubleTyID: {
01586     double Val;
01587     read_double(Val);
01588     Result = ConstantFP::get(Ty, Val);
01589     if (Handler) Handler->handleConstantValue(Result);
01590     break;
01591   }
01592 
01593   case Type::ArrayTyID: {
01594     const ArrayType *AT = cast<ArrayType>(Ty);
01595     unsigned NumElements = AT->getNumElements();
01596     unsigned TypeSlot = getTypeSlot(AT->getElementType());
01597     std::vector<Constant*> Elements;
01598     Elements.reserve(NumElements);
01599     while (NumElements--)     // Read all of the elements of the constant.
01600       Elements.push_back(getConstantValue(TypeSlot,
01601                                           read_vbr_uint()));
01602     Result = ConstantArray::get(AT, Elements);
01603     if (Handler) Handler->handleConstantArray(AT, Elements, TypeSlot, Result);
01604     break;
01605   }
01606 
01607   case Type::StructTyID: {
01608     const StructType *ST = cast<StructType>(Ty);
01609 
01610     std::vector<Constant *> Elements;
01611     Elements.reserve(ST->getNumElements());
01612     for (unsigned i = 0; i != ST->getNumElements(); ++i)
01613       Elements.push_back(getConstantValue(ST->getElementType(i),
01614                                           read_vbr_uint()));
01615 
01616     Result = ConstantStruct::get(ST, Elements);
01617     if (Handler) Handler->handleConstantStruct(ST, Elements, Result);
01618     break;
01619   }
01620 
01621   case Type::PackedTyID: {
01622     const PackedType *PT = cast<PackedType>(Ty);
01623     unsigned NumElements = PT->getNumElements();
01624     unsigned TypeSlot = getTypeSlot(PT->getElementType());
01625     std::vector<Constant*> Elements;
01626     Elements.reserve(NumElements);
01627     while (NumElements--)     // Read all of the elements of the constant.
01628       Elements.push_back(getConstantValue(TypeSlot,
01629                                           read_vbr_uint()));
01630     Result = ConstantPacked::get(PT, Elements);
01631     if (Handler) Handler->handleConstantPacked(PT, Elements, TypeSlot, Result);
01632     break;
01633   }
01634 
01635   case Type::PointerTyID: {  // ConstantPointerRef value (backwards compat).
01636     const PointerType *PT = cast<PointerType>(Ty);
01637     unsigned Slot = read_vbr_uint();
01638 
01639     // Check to see if we have already read this global variable...
01640     Value *Val = getValue(TypeID, Slot, false);
01641     if (Val) {
01642       GlobalValue *GV = dyn_cast<GlobalValue>(Val);
01643       if (!GV) error("GlobalValue not in ValueTable!");
01644       if (Handler) Handler->handleConstantPointer(PT, Slot, GV);
01645       return GV;
01646     } else {
01647       error("Forward references are not allowed here.");
01648     }
01649   }
01650 
01651   default:
01652     error("Don't know how to deserialize constant value of type '" +
01653                       Ty->getDescription());
01654     break;
01655   }
01656   
01657   // Check that we didn't read a null constant if they are implicit for this
01658   // type plane.  Do not do this check for constantexprs, as they may be folded
01659   // to a null value in a way that isn't predicted when a .bc file is initially
01660   // produced.
01661   assert((!isa<Constant>(Result) || !cast<Constant>(Result)->isNullValue()) ||
01662          !hasImplicitNull(TypeID) &&
01663          "Cannot read null values from bytecode!");
01664   return Result;
01665 }
01666 
01667 /// Resolve references for constants. This function resolves the forward
01668 /// referenced constants in the ConstantFwdRefs map. It uses the
01669 /// replaceAllUsesWith method of Value class to substitute the placeholder
01670 /// instance with the actual instance.
01671 void BytecodeReader::ResolveReferencesToConstant(Constant *NewV, unsigned Typ,
01672                                                  unsigned Slot) {
01673   ConstantRefsType::iterator I =
01674     ConstantFwdRefs.find(std::make_pair(Typ, Slot));
01675   if (I == ConstantFwdRefs.end()) return;   // Never forward referenced?
01676 
01677   Value *PH = I->second;   // Get the placeholder...
01678   PH->replaceAllUsesWith(NewV);
01679   delete PH;                               // Delete the old placeholder
01680   ConstantFwdRefs.erase(I);                // Remove the map entry for it
01681 }
01682 
01683 /// Parse the constant strings section.
01684 void BytecodeReader::ParseStringConstants(unsigned NumEntries, ValueTable &Tab){
01685   for (; NumEntries; --NumEntries) {
01686     unsigned Typ = 0;
01687     if (read_typeid(Typ))
01688       error("Invalid type (type type) for string constant");
01689     const Type *Ty = getType(Typ);
01690     if (!isa<ArrayType>(Ty))
01691       error("String constant data invalid!");
01692 
01693     const ArrayType *ATy = cast<ArrayType>(Ty);
01694     if (ATy->getElementType() != Type::SByteTy &&
01695         ATy->getElementType() != Type::UByteTy)
01696       error("String constant data invalid!");
01697 
01698     // Read character data.  The type tells us how long the string is.
01699     char *Data = reinterpret_cast<char *>(alloca(ATy->getNumElements()));
01700     read_data(Data, Data+ATy->getNumElements());
01701 
01702     std::vector<Constant*> Elements(ATy->getNumElements());
01703     if (ATy->getElementType() == Type::SByteTy)
01704       for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
01705         Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]);
01706     else
01707       for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
01708         Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]);
01709 
01710     // Create the constant, inserting it as needed.
01711     Constant *C = ConstantArray::get(ATy, Elements);
01712     unsigned Slot = insertValue(C, Typ, Tab);
01713     ResolveReferencesToConstant(C, Typ, Slot);
01714     if (Handler) Handler->handleConstantString(cast<ConstantArray>(C));
01715   }
01716 }
01717 
01718 /// Parse the constant pool.
01719 void BytecodeReader::ParseConstantPool(ValueTable &Tab,
01720                                        TypeListTy &TypeTab,
01721                                        bool isFunction) {
01722   if (Handler) Handler->handleGlobalConstantsBegin();
01723 
01724   /// In LLVM 1.3 Type does not derive from Value so the types
01725   /// do not occupy a plane. Consequently, we read the types
01726   /// first in the constant pool.
01727   if (isFunction && !hasTypeDerivedFromValue) {
01728     unsigned NumEntries = read_vbr_uint();
01729     ParseTypes(TypeTab, NumEntries);
01730   }
01731 
01732   while (moreInBlock()) {
01733     unsigned NumEntries = read_vbr_uint();
01734     unsigned Typ = 0;
01735     bool isTypeType = read_typeid(Typ);
01736 
01737     /// In LLVM 1.2 and before, Types were written to the
01738     /// bytecode file in the "Type Type" plane (#12).
01739     /// In 1.3 plane 12 is now the label plane.  Handle this here.
01740     if (isTypeType) {
01741       ParseTypes(TypeTab, NumEntries);
01742     } else if (Typ == Type::VoidTyID) {
01743       /// Use of Type::VoidTyID is a misnomer. It actually means
01744       /// that the following plane is constant strings
01745       assert(&Tab == &ModuleValues && "Cannot read strings in functions!");
01746       ParseStringConstants(NumEntries, Tab);
01747     } else {
01748       for (unsigned i = 0; i < NumEntries; ++i) {
01749         Value *V = ParseConstantPoolValue(Typ);
01750         assert(V && "ParseConstantPoolValue returned NULL!");
01751         unsigned Slot = insertValue(V, Typ, Tab);
01752 
01753         // If we are reading a function constant table, make sure that we adjust
01754         // the slot number to be the real global constant number.
01755         //
01756         if (&Tab != &ModuleValues && Typ < ModuleValues.size() &&
01757             ModuleValues[Typ])
01758           Slot += ModuleValues[Typ]->size();
01759         if (Constant *C = dyn_cast<Constant>(V))
01760           ResolveReferencesToConstant(C, Typ, Slot);
01761       }
01762     }
01763   }
01764 
01765   // After we have finished parsing the constant pool, we had better not have
01766   // any dangling references left.
01767   if (!ConstantFwdRefs.empty()) {
01768     ConstantRefsType::const_iterator I = ConstantFwdRefs.begin();
01769     Constant* missingConst = I->second;
01770     error(utostr(ConstantFwdRefs.size()) +
01771           " unresolved constant reference exist. First one is '" +
01772           missingConst->getName() + "' of type '" +
01773           missingConst->getType()->getDescription() + "'.");
01774   }
01775 
01776   checkPastBlockEnd("Constant Pool");
01777   if (Handler) Handler->handleGlobalConstantsEnd();
01778 }
01779 
01780 /// Parse the contents of a function. Note that this function can be
01781 /// called lazily by materializeFunction
01782 /// @see materializeFunction
01783 void BytecodeReader::ParseFunctionBody(Function* F) {
01784 
01785   unsigned FuncSize = BlockEnd - At;
01786   GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage;
01787 
01788   unsigned LinkageType = read_vbr_uint();
01789   switch (LinkageType) {
01790   case 0: Linkage = GlobalValue::ExternalLinkage; break;
01791   case 1: Linkage = GlobalValue::WeakLinkage; break;
01792   case 2: Linkage = GlobalValue::AppendingLinkage; break;
01793   case 3: Linkage = GlobalValue::InternalLinkage; break;
01794   case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
01795   default:
01796     error("Invalid linkage type for Function.");
01797     Linkage = GlobalValue::InternalLinkage;
01798     break;
01799   }
01800 
01801   F->setLinkage(Linkage);
01802   if (Handler) Handler->handleFunctionBegin(F,FuncSize);
01803 
01804   // Keep track of how many basic blocks we have read in...
01805   unsigned BlockNum = 0;
01806   bool InsertedArguments = false;
01807 
01808   BufPtr MyEnd = BlockEnd;
01809   while (At < MyEnd) {
01810     unsigned Type, Size;
01811     BufPtr OldAt = At;
01812     read_block(Type, Size);
01813 
01814     switch (Type) {
01815     case BytecodeFormat::ConstantPoolBlockID:
01816       if (!InsertedArguments) {
01817         // Insert arguments into the value table before we parse the first basic
01818         // block in the function, but after we potentially read in the
01819         // compaction table.
01820         insertArguments(F);
01821         InsertedArguments = true;
01822       }
01823 
01824       ParseConstantPool(FunctionValues, FunctionTypes, true);
01825       break;
01826 
01827     case BytecodeFormat::CompactionTableBlockID:
01828       ParseCompactionTable();
01829       break;
01830 
01831     case BytecodeFormat::BasicBlock: {
01832       if (!InsertedArguments) {
01833         // Insert arguments into the value table before we parse the first basic
01834         // block in the function, but after we potentially read in the
01835         // compaction table.
01836         insertArguments(F);
01837         InsertedArguments = true;
01838       }
01839 
01840       BasicBlock *BB = ParseBasicBlock(BlockNum++);
01841       F->getBasicBlockList().push_back(BB);
01842       break;
01843     }
01844 
01845     case BytecodeFormat::InstructionListBlockID: {
01846       // Insert arguments into the value table before we parse the instruction
01847       // list for the function, but after we potentially read in the compaction
01848       // table.
01849       if (!InsertedArguments) {
01850         insertArguments(F);
01851         InsertedArguments = true;
01852       }
01853 
01854       if (BlockNum)
01855         error("Already parsed basic blocks!");
01856       BlockNum = ParseInstructionList(F);
01857       break;
01858     }
01859 
01860     case BytecodeFormat::SymbolTableBlockID:
01861       ParseSymbolTable(F, &F->getSymbolTable());
01862       break;
01863 
01864     default:
01865       At += Size;
01866       if (OldAt > At)
01867         error("Wrapped around reading bytecode.");
01868       break;
01869     }
01870     BlockEnd = MyEnd;
01871 
01872     // Malformed bc file if read past end of block.
01873     align32();
01874   }
01875 
01876   // Make sure there were no references to non-existant basic blocks.
01877   if (BlockNum != ParsedBasicBlocks.size())
01878     error("Illegal basic block operand reference");
01879 
01880   ParsedBasicBlocks.clear();
01881 
01882   // Resolve forward references.  Replace any uses of a forward reference value
01883   // with the real value.
01884   while (!ForwardReferences.empty()) {
01885     std::map<std::pair<unsigned,unsigned>, Value*>::iterator
01886       I = ForwardReferences.begin();
01887     Value *V = getValue(I->first.first, I->first.second, false);
01888     Value *PlaceHolder = I->second;
01889     PlaceHolder->replaceAllUsesWith(V);
01890     ForwardReferences.erase(I);
01891     delete PlaceHolder;
01892   }
01893 
01894   // If upgraded intrinsic functions were detected during reading of the 
01895   // module information, then we need to look for instructions that need to
01896   // be upgraded. This can't be done while the instructions are read in because
01897   // additional instructions inserted mess up the slot numbering.
01898   if (!upgradedFunctions.empty()) {
01899     for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) 
01900       for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); 
01901            II != IE;)
01902         if (CallInst* CI = dyn_cast<CallInst>(II++)) {
01903           std::map<Function*,Function*>::iterator FI = 
01904             upgradedFunctions.find(CI->getCalledFunction());
01905           if (FI != upgradedFunctions.end())
01906             UpgradeIntrinsicCall(CI, FI->second);
01907         }
01908   }
01909 
01910   // Clear out function-level types...
01911   FunctionTypes.clear();
01912   CompactionTypes.clear();
01913   CompactionValues.clear();
01914   freeTable(FunctionValues);
01915 
01916   if (Handler) Handler->handleFunctionEnd(F);
01917 }
01918 
01919 /// This function parses LLVM functions lazily. It obtains the type of the
01920 /// function and records where the body of the function is in the bytecode
01921 /// buffer. The caller can then use the ParseNextFunction and
01922 /// ParseAllFunctionBodies to get handler events for the functions.
01923 void BytecodeReader::ParseFunctionLazily() {
01924   if (FunctionSignatureList.empty())
01925     error("FunctionSignatureList empty!");
01926 
01927   Function *Func = FunctionSignatureList.back();
01928   FunctionSignatureList.pop_back();
01929 
01930   // Save the information for future reading of the function
01931   LazyFunctionLoadMap[Func] = LazyFunctionInfo(BlockStart, BlockEnd);
01932 
01933   // This function has a body but it's not loaded so it appears `External'.
01934   // Mark it as a `Ghost' instead to notify the users that it has a body.
01935   Func->setLinkage(GlobalValue::GhostLinkage);
01936 
01937   // Pretend we've `parsed' this function
01938   At = BlockEnd;
01939 }
01940 
01941 /// The ParserFunction method lazily parses one function. Use this method to
01942 /// casue the parser to parse a specific function in the module. Note that
01943 /// this will remove the function from what is to be included by
01944 /// ParseAllFunctionBodies.
01945 /// @see ParseAllFunctionBodies
01946 /// @see ParseBytecode
01947 void BytecodeReader::ParseFunction(Function* Func) {
01948   // Find {start, end} pointers and slot in the map. If not there, we're done.
01949   LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(Func);
01950 
01951   // Make sure we found it
01952   if (Fi == LazyFunctionLoadMap.end()) {
01953     error("Unrecognized function of type " + Func->getType()->getDescription());
01954     return;
01955   }
01956 
01957   BlockStart = At = Fi->second.Buf;
01958   BlockEnd = Fi->second.EndBuf;
01959   assert(Fi->first == Func && "Found wrong function?");
01960 
01961   LazyFunctionLoadMap.erase(Fi);
01962 
01963   this->ParseFunctionBody(Func);
01964 }
01965 
01966 /// The ParseAllFunctionBodies method parses through all the previously
01967 /// unparsed functions in the bytecode file. If you want to completely parse
01968 /// a bytecode file, this method should be called after Parsebytecode because
01969 /// Parsebytecode only records the locations in the bytecode file of where
01970 /// the function definitions are located. This function uses that information
01971 /// to materialize the functions.
01972 /// @see ParseBytecode
01973 void BytecodeReader::ParseAllFunctionBodies() {
01974   LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin();
01975   LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end();
01976 
01977   while (Fi != Fe) {
01978     Function* Func = Fi->first;
01979     BlockStart = At = Fi->second.Buf;
01980     BlockEnd = Fi->second.EndBuf;
01981     ParseFunctionBody(Func);
01982     ++Fi;
01983   }
01984   LazyFunctionLoadMap.clear();
01985 
01986 }
01987 
01988 /// Parse the global type list
01989 void BytecodeReader::ParseGlobalTypes() {
01990   // Read the number of types
01991   unsigned NumEntries = read_vbr_uint();
01992 
01993   // Ignore the type plane identifier for types if the bc file is pre 1.3
01994   if (hasTypeDerivedFromValue)
01995     read_vbr_uint();
01996 
01997   ParseTypes(ModuleTypes, NumEntries);
01998 }
01999 
02000 /// Parse the Global info (types, global vars, constants)
02001 void BytecodeReader::ParseModuleGlobalInfo() {
02002 
02003   if (Handler) Handler->handleModuleGlobalsBegin();
02004 
02005   // SectionID - If a global has an explicit section specified, this map
02006   // remembers the ID until we can translate it into a string.
02007   std::map<GlobalValue*, unsigned> SectionID;
02008   
02009   // Read global variables...
02010   unsigned VarType = read_vbr_uint();
02011   while (VarType != Type::VoidTyID) { // List is terminated by Void
02012     // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
02013     // Linkage, bit4+ = slot#
02014     unsigned SlotNo = VarType >> 5;
02015     if (sanitizeTypeId(SlotNo))
02016       error("Invalid type (type type) for global var!");
02017     unsigned LinkageID = (VarType >> 2) & 7;
02018     bool isConstant = VarType & 1;
02019     bool hasInitializer = (VarType & 2) != 0;
02020     unsigned Alignment = 0;
02021     unsigned GlobalSectionID = 0;
02022     
02023     // An extension word is present when linkage = 3 (internal) and hasinit = 0.
02024     if (LinkageID == 3 && !hasInitializer) {
02025       unsigned ExtWord = read_vbr_uint();
02026       // The extension word has this format: bit 0 = has initializer, bit 1-3 =
02027       // linkage, bit 4-8 = alignment (log2), bits 10+ = future use.
02028       hasInitializer = ExtWord & 1;
02029       LinkageID = (ExtWord >> 1) & 7;
02030       Alignment = (1 << ((ExtWord >> 4) & 31)) >> 1;
02031       
02032       if (ExtWord & (1 << 9))  // Has a section ID.
02033         GlobalSectionID = read_vbr_uint();
02034     }
02035 
02036     GlobalValue::LinkageTypes Linkage;
02037     switch (LinkageID) {
02038     case 0: Linkage = GlobalValue::ExternalLinkage;  break;
02039     case 1: Linkage = GlobalValue::WeakLinkage;      break;
02040     case 2: Linkage = GlobalValue::AppendingLinkage; break;
02041     case 3: Linkage = GlobalValue::InternalLinkage;  break;
02042     case 4: Linkage = GlobalValue::LinkOnceLinkage;  break;
02043     default:
02044       error("Unknown linkage type: " + utostr(LinkageID));
02045       Linkage = GlobalValue::InternalLinkage;
02046       break;
02047     }
02048 
02049     const Type *Ty = getType(SlotNo);
02050     if (!Ty)
02051       error("Global has no type! SlotNo=" + utostr(SlotNo));
02052 
02053     if (!isa<PointerType>(Ty))
02054       error("Global not a pointer type! Ty= " + Ty->getDescription());
02055 
02056     const Type *ElTy = cast<PointerType>(Ty)->getElementType();
02057 
02058     // Create the global variable...
02059     GlobalVariable *GV = new GlobalVariable(ElTy, isConstant, Linkage,
02060                                             0, "", TheModule);
02061     GV->setAlignment(Alignment);
02062     insertValue(GV, SlotNo, ModuleValues);
02063 
02064     if (GlobalSectionID != 0)
02065       SectionID[GV] = GlobalSectionID;
02066 
02067     unsigned initSlot = 0;
02068     if (hasInitializer) {
02069       initSlot = read_vbr_uint();
02070       GlobalInits.push_back(std::make_pair(GV, initSlot));
02071     }
02072 
02073     // Notify handler about the global value.
02074     if (Handler)
02075       Handler->handleGlobalVariable(ElTy, isConstant, Linkage, SlotNo,initSlot);
02076 
02077     // Get next item
02078     VarType = read_vbr_uint();
02079   }
02080 
02081   // Read the function objects for all of the functions that are coming
02082   unsigned FnSignature = read_vbr_uint();
02083 
02084   if (hasNoFlagsForFunctions)
02085     FnSignature = (FnSignature << 5) + 1;
02086 
02087   // List is terminated by VoidTy.
02088   while (((FnSignature & (~0U >> 1)) >> 5) != Type::VoidTyID) {
02089     const Type *Ty = getType((FnSignature & (~0U >> 1)) >> 5);
02090     if (!isa<PointerType>(Ty) ||
02091         !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
02092       error("Function not a pointer to function type! Ty = " +
02093             Ty->getDescription());
02094     }
02095 
02096     // We create functions by passing the underlying FunctionType to create...
02097     const FunctionType* FTy =
02098       cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
02099 
02100     // Insert the place holder.
02101     Function *Func = new Function(FTy, GlobalValue::ExternalLinkage,
02102                                   "", TheModule);
02103 
02104     insertValue(Func, (FnSignature & (~0U >> 1)) >> 5, ModuleValues);
02105 
02106     // Flags are not used yet.
02107     unsigned Flags = FnSignature & 31;
02108 
02109     // Save this for later so we know type of lazily instantiated functions.
02110     // Note that known-external functions do not have FunctionInfo blocks, so we
02111     // do not add them to the FunctionSignatureList.
02112     if ((Flags & (1 << 4)) == 0)
02113       FunctionSignatureList.push_back(Func);
02114 
02115     // Get the calling convention from the low bits.
02116     unsigned CC = Flags & 15;
02117     unsigned Alignment = 0;
02118     if (FnSignature & (1 << 31)) {  // Has extension word?
02119       unsigned ExtWord = read_vbr_uint();
02120       Alignment = (1 << (ExtWord & 31)) >> 1;
02121       CC |= ((ExtWord >> 5) & 15) << 4;
02122       
02123       if (ExtWord & (1 << 10))  // Has a section ID.
02124         SectionID[Func] = read_vbr_uint();
02125     }
02126     
02127     Func->setCallingConv(CC-1);
02128     Func->setAlignment(Alignment);
02129 
02130     if (Handler) Handler->handleFunctionDeclaration(Func);
02131 
02132     // Get the next function signature.
02133     FnSignature = read_vbr_uint();
02134     if (hasNoFlagsForFunctions)
02135       FnSignature = (FnSignature << 5) + 1;
02136   }
02137 
02138   // Now that the function signature list is set up, reverse it so that we can
02139   // remove elements efficiently from the back of the vector.
02140   std::reverse(FunctionSignatureList.begin(), FunctionSignatureList.end());
02141 
02142   /// SectionNames - This contains the list of section names encoded in the
02143   /// moduleinfoblock.  Functions and globals with an explicit section index
02144   /// into this to get their section name.
02145   std::vector<std::string> SectionNames;
02146   
02147   if (hasInconsistentModuleGlobalInfo) {
02148     align32();
02149   } else if (!hasNoDependentLibraries) {
02150     // If this bytecode format has dependent library information in it, read in
02151     // the number of dependent library items that follow.
02152     unsigned num_dep_libs = read_vbr_uint();
02153     std::string dep_lib;
02154     while (num_dep_libs--) {
02155       dep_lib = read_str();
02156       TheModule->addLibrary(dep_lib);
02157       if (Handler)
02158         Handler->handleDependentLibrary(dep_lib);
02159     }
02160 
02161     // Read target triple and place into the module.
02162     std::string triple = read_str();
02163     TheModule->setTargetTriple(triple);
02164     if (Handler)
02165       Handler->handleTargetTriple(triple);
02166     
02167     if (!hasAlignment && At != BlockEnd) {
02168       // If the file has section info in it, read the section names now.
02169       unsigned NumSections = read_vbr_uint();
02170       while (NumSections--)
02171         SectionNames.push_back(read_str());
02172     }
02173     
02174     // If the file has module-level inline asm, read it now.
02175     if (!hasAlignment && At != BlockEnd)
02176       TheModule->setModuleInlineAsm(read_str());
02177   }
02178 
02179   // If any globals are in specified sections, assign them now.
02180   for (std::map<GlobalValue*, unsigned>::iterator I = SectionID.begin(), E =
02181        SectionID.end(); I != E; ++I)
02182     if (I->second) {
02183       if (I->second > SectionID.size())
02184         error("SectionID out of range for global!");
02185       I->first->setSection(SectionNames[I->second-1]);
02186     }
02187 
02188   // This is for future proofing... in the future extra fields may be added that
02189   // we don't understand, so we transparently ignore them.
02190   //
02191   At = BlockEnd;
02192 
02193   if (Handler) Handler->handleModuleGlobalsEnd();
02194 }
02195 
02196 /// Parse the version information and decode it by setting flags on the
02197 /// Reader that enable backward compatibility of the reader.
02198 void BytecodeReader::ParseVersionInfo() {
02199   unsigned Version = read_vbr_uint();
02200 
02201   // Unpack version number: low four bits are for flags, top bits = version
02202   Module::Endianness  Endianness;
02203   Module::PointerSize PointerSize;
02204   Endianness  = (Version & 1) ? Module::BigEndian : Module::LittleEndian;
02205   PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32;
02206 
02207   bool hasNoEndianness = Version & 4;
02208   bool hasNoPointerSize = Version & 8;
02209 
02210   RevisionNum = Version >> 4;
02211 
02212   // Default values for the current bytecode version
02213   hasInconsistentModuleGlobalInfo = false;
02214   hasExplicitPrimitiveZeros = false;
02215   hasRestrictedGEPTypes = false;
02216   hasTypeDerivedFromValue = false;
02217   hasLongBlockHeaders = false;
02218   has32BitTypes = false;
02219   hasNoDependentLibraries = false;
02220   hasAlignment = false;
02221   hasNoUndefValue = false;
02222   hasNoFlagsForFunctions = false;
02223   hasNoUnreachableInst = false;
02224 
02225   switch (RevisionNum) {
02226   case 0:               //  LLVM 1.0, 1.1 (Released)
02227     // Base LLVM 1.0 bytecode format.
02228     hasInconsistentModuleGlobalInfo = true;
02229     hasExplicitPrimitiveZeros = true;
02230 
02231     // FALL THROUGH
02232 
02233   case 1:               // LLVM 1.2 (Released)
02234     // LLVM 1.2 added explicit support for emitting strings efficiently.
02235 
02236     // Also, it fixed the problem where the size of the ModuleGlobalInfo block
02237     // included the size for the alignment at the end, where the rest of the
02238     // blocks did not.
02239 
02240     // LLVM 1.2 and before required that GEP indices be ubyte constants for
02241     // structures and longs for sequential types.
02242     hasRestrictedGEPTypes = true;
02243 
02244     // LLVM 1.2 and before had the Type class derive from Value class. This
02245     // changed in release 1.3 and consequently LLVM 1.3 bytecode files are
02246     // written differently because Types can no longer be part of the
02247     // type planes for Values.
02248     hasTypeDerivedFromValue = true;
02249 
02250     // FALL THROUGH
02251 
02252   case 2:                // 1.2.5 (Not Released)
02253 
02254     // LLVM 1.2 and earlier had two-word block headers. This is a bit wasteful,
02255     // especially for small files where the 8 bytes per block is a large
02256     // fraction of the total block size. In LLVM 1.3, the block type and length
02257     // are compressed into a single 32-bit unsigned integer. 27 bits for length,
02258     // 5 bits for block type.
02259     hasLongBlockHeaders = true;
02260 
02261     // LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
02262     // this has been reduced to vbr_uint24. It shouldn't make much difference
02263     // since we haven't run into a module with > 24 million types, but for
02264     // safety the 24-bit restriction has been enforced in 1.3 to free some bits
02265     // in various places and to ensure consistency.
02266     has32BitTypes = true;
02267 
02268     // LLVM 1.2 and earlier did not provide a target triple nor a list of
02269     // libraries on which the bytecode is dependent. LLVM 1.3 provides these
02270     // features, for use in future versions of LLVM.
02271     hasNoDependentLibraries = true;
02272 
02273     // FALL THROUGH
02274 
02275   case 3:               // LLVM 1.3 (Released)
02276     // LLVM 1.3 and earlier caused alignment bytes to be written on some block
02277     // boundaries and at the end of some strings. In extreme cases (e.g. lots
02278     // of GEP references to a constant array), this can increase the file size
02279     // by 30% or more. In version 1.4 alignment is done away with completely.
02280     hasAlignment = true;
02281 
02282     // FALL THROUGH
02283 
02284   case 4:               // 1.3.1 (Not Released)
02285     // In version 4, we did not support the 'undef' constant.
02286     hasNoUndefValue = true;
02287 
02288     // In version 4 and above, we did not include space for flags for functions
02289     // in the module info block.
02290     hasNoFlagsForFunctions = true;
02291 
02292     // In version 4 and above, we did not include the 'unreachable' instruction
02293     // in the opcode numbering in the bytecode file.
02294     hasNoUnreachableInst = true;
02295     break;
02296 
02297     // FALL THROUGH
02298 
02299   case 5:               // 1.4 (Released)
02300     break;
02301 
02302   default:
02303     error("Unknown bytecode version number: " + itostr(RevisionNum));
02304   }
02305 
02306   if (hasNoEndianness) Endianness  = Module::AnyEndianness;
02307   if (hasNoPointerSize) PointerSize = Module::AnyPointerSize;
02308 
02309   TheModule->setEndianness(Endianness);
02310   TheModule->setPointerSize(PointerSize);
02311 
02312   if (Handler) Handler->handleVersionInfo(RevisionNum, Endianness, PointerSize);
02313 }
02314 
02315 /// Parse a whole module.
02316 void BytecodeReader::ParseModule() {
02317   unsigned Type, Size;
02318 
02319   FunctionSignatureList.clear(); // Just in case...
02320 
02321   // Read into instance variables...
02322   ParseVersionInfo();
02323   align32();
02324 
02325   bool SeenModuleGlobalInfo = false;
02326   bool SeenGlobalTypePlane = false;
02327   BufPtr MyEnd = BlockEnd;
02328   while (At < MyEnd) {
02329     BufPtr OldAt = At;
02330     read_block(Type, Size);
02331 
02332     switch (Type) {
02333 
02334     case BytecodeFormat::GlobalTypePlaneBlockID:
02335       if (SeenGlobalTypePlane)
02336         error("Two GlobalTypePlane Blocks Encountered!");
02337 
02338       if (Size > 0)
02339         ParseGlobalTypes();
02340       SeenGlobalTypePlane = true;
02341       break;
02342 
02343     case BytecodeFormat::ModuleGlobalInfoBlockID:
02344       if (SeenModuleGlobalInfo)
02345         error("Two ModuleGlobalInfo Blocks Encountered!");
02346       ParseModuleGlobalInfo();
02347       SeenModuleGlobalInfo = true;
02348       break;
02349 
02350     case BytecodeFormat::ConstantPoolBlockID:
02351       ParseConstantPool(ModuleValues, ModuleTypes,false);
02352       break;
02353 
02354     case BytecodeFormat::FunctionBlockID:
02355       ParseFunctionLazily();
02356       break;
02357 
02358     case BytecodeFormat::SymbolTableBlockID:
02359       ParseSymbolTable(0, &TheModule->getSymbolTable());
02360       break;
02361 
02362     default:
02363       At += Size;
02364       if (OldAt > At) {
02365         error("Unexpected Block of Type #" + utostr(Type) + " encountered!");
02366       }
02367       break;
02368     }
02369     BlockEnd = MyEnd;
02370     align32();
02371   }
02372 
02373   // After the module constant pool has been read, we can safely initialize
02374   // global variables...
02375   while (!GlobalInits.empty()) {
02376     GlobalVariable *GV = GlobalInits.back().first;
02377     unsigned Slot = GlobalInits.back().second;
02378     GlobalInits.pop_back();
02379 
02380     // Look up the initializer value...
02381     // FIXME: Preserve this type ID!
02382 
02383     const llvm::PointerType* GVType = GV->getType();
02384     unsigned TypeSlot = getTypeSlot(GVType->getElementType());
02385     if (Constant *CV = getConstantValue(TypeSlot, Slot)) {
02386       if (GV->hasInitializer())
02387         error("Global *already* has an initializer?!");
02388       if (Handler) Handler->handleGlobalInitializer(GV,CV);
02389       GV->setInitializer(CV);
02390     } else
02391       error("Cannot find initializer value.");
02392   }
02393 
02394   if (!ConstantFwdRefs.empty())
02395     error("Use of undefined constants in a module");
02396 
02397   /// Make sure we pulled them all out. If we didn't then there's a declaration
02398   /// but a missing body. That's not allowed.
02399   if (!FunctionSignatureList.empty())
02400     error("Function declared, but bytecode stream ended before definition");
02401 }
02402 
02403 /// This function completely parses a bytecode buffer given by the \p Buf
02404 /// and \p Length parameters.
02405 void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
02406                                    const std::string &ModuleID) {
02407 
02408   try {
02409     RevisionNum = 0;
02410     At = MemStart = BlockStart = Buf;
02411     MemEnd = BlockEnd = Buf + Length;
02412 
02413     // Create the module
02414     TheModule = new Module(ModuleID);
02415 
02416     if (Handler) Handler->handleStart(TheModule, Length);
02417 
02418     // Read the four bytes of the signature.
02419     unsigned Sig = read_uint();
02420 
02421     // If this is a compressed file
02422     if (Sig == ('l' | ('l' << 8) | ('v' << 16) | ('c' << 24))) {
02423 
02424       // Invoke the decompression of the bytecode. Note that we have to skip the
02425       // file's magic number which is not part of the compressed block. Hence,
02426       // the Buf+4 and Length-4. The result goes into decompressedBlock, a data
02427       // member for retention until BytecodeReader is destructed.
02428       unsigned decompressedLength = Compressor::decompressToNewBuffer(
02429           (char*)Buf+4,Length-4,decompressedBlock);
02430 
02431       // We must adjust the buffer pointers used by the bytecode reader to point
02432       // into the new decompressed block. After decompression, the
02433       // decompressedBlock will point to a contiguous memory area that has
02434       // the decompressed data.
02435       At = MemStart = BlockStart = Buf = (BufPtr) decompressedBlock;
02436       MemEnd = BlockEnd = Buf + decompressedLength;
02437 
02438     // else if this isn't a regular (uncompressed) bytecode file, then its
02439     // and error, generate that now.
02440     } else if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
02441       error("Invalid bytecode signature: " + utohexstr(Sig));
02442     }
02443 
02444     // Tell the handler we're starting a module
02445     if (Handler) Handler->handleModuleBegin(ModuleID);
02446 
02447     // Get the module block and size and verify. This is handled specially
02448     // because the module block/size is always written in long format. Other
02449     // blocks are written in short format so the read_block method is used.
02450     unsigned Type, Size;
02451     Type = read_uint();
02452     Size = read_uint();
02453     if (Type != BytecodeFormat::ModuleBlockID) {
02454       error("Expected Module Block! Type:" + utostr(Type) + ", Size:"
02455             + utostr(Size));
02456     }
02457 
02458     // It looks like the darwin ranlib program is broken, and adds trailing
02459     // garbage to the end of some bytecode files.  This hack allows the bc
02460     // reader to ignore trailing garbage on bytecode files.
02461     if (At + Size < MemEnd)
02462       MemEnd = BlockEnd = At+Size;
02463 
02464     if (At + Size != MemEnd)
02465       error("Invalid Top Level Block Length! Type:" + utostr(Type)
02466             + ", Size:" + utostr(Size));
02467 
02468     // Parse the module contents
02469     this->ParseModule();
02470 
02471     // Check for missing functions
02472     if (hasFunctions())
02473       error("Function expected, but bytecode stream ended!");
02474 
02475     // Look for intrinsic functions to upgrade, upgrade them, and save the
02476     // mapping from old function to new for use later when instructions are
02477     // converted.
02478     for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
02479          FI != FE; ++FI)
02480       if (Function* newF = UpgradeIntrinsicFunction(FI)) {
02481         upgradedFunctions.insert(std::make_pair(FI, newF));
02482         FI->setName("");
02483       }
02484 
02485     // Tell the handler we're done with the module
02486     if (Handler)
02487       Handler->handleModuleEnd(ModuleID);
02488 
02489     // Tell the handler we're finished the parse
02490     if (Handler) Handler->handleFinish();
02491 
02492   } catch (std::string& errstr) {
02493     if (Handler) Handler->handleError(errstr);
02494     freeState();
02495     delete TheModule;
02496     TheModule = 0;
02497     if (decompressedBlock != 0 ) {
02498       ::free(decompressedBlock);
02499       decompressedBlock = 0;
02500     }
02501     throw;
02502   } catch (...) {
02503     std::string msg("Unknown Exception Occurred");
02504     if (Handler) Handler->handleError(msg);
02505     freeState();
02506     delete TheModule;
02507     TheModule = 0;
02508     if (decompressedBlock != 0) {
02509       ::free(decompressedBlock);
02510       decompressedBlock = 0;
02511     }
02512     throw msg;
02513   }
02514 }
02515 
02516 //===----------------------------------------------------------------------===//
02517 //=== Default Implementations of Handler Methods
02518 //===----------------------------------------------------------------------===//
02519 
02520 BytecodeHandler::~BytecodeHandler() {}
02521