LLVM API Documentation

Reader.cpp

Go to the documentation of this file.
00001 //===- Reader.cpp - Code to read bytecode files ---------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This library implements the functionality defined in llvm/Bytecode/Reader.h
00011 //
00012 // Note that this library should be as fast as possible, reentrant, and
00013 // threadsafe!!
00014 //
00015 // TODO: Allow passing in an option to ignore the symbol table
00016 //
00017 //===----------------------------------------------------------------------===//
00018 
00019 #include "Reader.h"
00020 #include "llvm/Assembly/AutoUpgrade.h"
00021 #include "llvm/Bytecode/BytecodeHandler.h"
00022 #include "llvm/BasicBlock.h"
00023 #include "llvm/CallingConv.h"
00024 #include "llvm/Constants.h"
00025 #include "llvm/InlineAsm.h"
00026 #include "llvm/Instructions.h"
00027 #include "llvm/SymbolTable.h"
00028 #include "llvm/Bytecode/Format.h"
00029 #include "llvm/Config/alloca.h"
00030 #include "llvm/Support/GetElementPtrTypeIterator.h"
00031 #include "llvm/Support/Compressor.h"
00032 #include "llvm/Support/MathExtras.h"
00033 #include "llvm/ADT/StringExtras.h"
00034 #include <sstream>
00035 #include <algorithm>
00036 using namespace llvm;
00037 
00038 namespace {
00039   /// @brief A class for maintaining the slot number definition
00040   /// as a placeholder for the actual definition for forward constants defs.
00041   class ConstantPlaceHolder : public ConstantExpr {
00042     ConstantPlaceHolder();                       // DO NOT IMPLEMENT
00043     void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT
00044   public:
00045     Use Op;
00046     ConstantPlaceHolder(const Type *Ty)
00047       : ConstantExpr(Ty, Instruction::UserOp1, &Op, 1),
00048         Op(UndefValue::get(Type::IntTy), this) {
00049     }
00050   };
00051 }
00052 
00053 // Provide some details on error
00054 inline void BytecodeReader::error(std::string err) {
00055   err +=  " (Vers=" ;
00056   err += itostr(RevisionNum) ;
00057   err += ", Pos=" ;
00058   err += itostr(At-MemStart);
00059   err += ")";
00060   throw err;
00061 }
00062 
00063 //===----------------------------------------------------------------------===//
00064 // Bytecode Reading Methods
00065 //===----------------------------------------------------------------------===//
00066 
00067 /// Determine if the current block being read contains any more data.
00068 inline bool BytecodeReader::moreInBlock() {
00069   return At < BlockEnd;
00070 }
00071 
00072 /// Throw an error if we've read past the end of the current block
00073 inline void BytecodeReader::checkPastBlockEnd(const char * block_name) {
00074   if (At > BlockEnd)
00075     error(std::string("Attempt to read past the end of ") + block_name +
00076           " block.");
00077 }
00078 
00079 /// Align the buffer position to a 32 bit boundary
00080 inline void BytecodeReader::align32() {
00081   if (hasAlignment) {
00082     BufPtr Save = At;
00083     At = (const unsigned char *)((intptr_t)(At+3) & (~3UL));
00084     if (At > Save)
00085       if (Handler) Handler->handleAlignment(At - Save);
00086     if (At > BlockEnd)
00087       error("Ran out of data while aligning!");
00088   }
00089 }
00090 
00091 /// Read a whole unsigned integer
00092 inline unsigned BytecodeReader::read_uint() {
00093   if (At+4 > BlockEnd)
00094     error("Ran out of data reading uint!");
00095   At += 4;
00096   return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24);
00097 }
00098 
00099 /// Read a variable-bit-rate encoded unsigned integer
00100 inline unsigned BytecodeReader::read_vbr_uint() {
00101   unsigned Shift = 0;
00102   unsigned Result = 0;
00103   BufPtr Save = At;
00104 
00105   do {
00106     if (At == BlockEnd)
00107       error("Ran out of data reading vbr_uint!");
00108     Result |= (unsigned)((*At++) & 0x7F) << Shift;
00109     Shift += 7;
00110   } while (At[-1] & 0x80);
00111   if (Handler) Handler->handleVBR32(At-Save);
00112   return Result;
00113 }
00114 
00115 /// Read a variable-bit-rate encoded unsigned 64-bit integer.
00116 inline uint64_t BytecodeReader::read_vbr_uint64() {
00117   unsigned Shift = 0;
00118   uint64_t Result = 0;
00119   BufPtr Save = At;
00120 
00121   do {
00122     if (At == BlockEnd)
00123       error("Ran out of data reading vbr_uint64!");
00124     Result |= (uint64_t)((*At++) & 0x7F) << Shift;
00125     Shift += 7;
00126   } while (At[-1] & 0x80);
00127   if (Handler) Handler->handleVBR64(At-Save);
00128   return Result;
00129 }
00130 
00131 /// Read a variable-bit-rate encoded signed 64-bit integer.
00132 inline int64_t BytecodeReader::read_vbr_int64() {
00133   uint64_t R = read_vbr_uint64();
00134   if (R & 1) {
00135     if (R != 1)
00136       return -(int64_t)(R >> 1);
00137     else   // There is no such thing as -0 with integers.  "-0" really means
00138            // 0x8000000000000000.
00139       return 1LL << 63;
00140   } else
00141     return  (int64_t)(R >> 1);
00142 }
00143 
00144 /// Read a pascal-style string (length followed by text)
00145 inline std::string BytecodeReader::read_str() {
00146   unsigned Size = read_vbr_uint();
00147   const unsigned char *OldAt = At;
00148   At += Size;
00149   if (At > BlockEnd)             // Size invalid?
00150     error("Ran out of data reading a string!");
00151   return std::string((char*)OldAt, Size);
00152 }
00153 
00154 /// Read an arbitrary block of data
00155 inline void BytecodeReader::read_data(void *Ptr, void *End) {
00156   unsigned char *Start = (unsigned char *)Ptr;
00157   unsigned Amount = (unsigned char *)End - Start;
00158   if (At+Amount > BlockEnd)
00159     error("Ran out of data!");
00160   std::copy(At, At+Amount, Start);
00161   At += Amount;
00162 }
00163 
00164 /// Read a float value in little-endian order
00165 inline void BytecodeReader::read_float(float& FloatVal) {
00166   /// FIXME: This isn't optimal, it has size problems on some platforms
00167   /// where FP is not IEEE.
00168   FloatVal = BitsToFloat(At[0] | (At[1] << 8) | (At[2] << 16) | (At[3] << 24));
00169   At+=sizeof(uint32_t);
00170 }
00171 
00172 /// Read a double value in little-endian order
00173 inline void BytecodeReader::read_double(double& DoubleVal) {
00174   /// FIXME: This isn't optimal, it has size problems on some platforms
00175   /// where FP is not IEEE.
00176   DoubleVal = BitsToDouble((uint64_t(At[0]) <<  0) | (uint64_t(At[1]) << 8) |
00177                            (uint64_t(At[2]) << 16) | (uint64_t(At[3]) << 24) |
00178                            (uint64_t(At[4]) << 32) | (uint64_t(At[5]) << 40) |
00179                            (uint64_t(At[6]) << 48) | (uint64_t(At[7]) << 56));
00180   At+=sizeof(uint64_t);
00181 }
00182 
00183 /// Read a block header and obtain its type and size
00184 inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) {
00185   if ( hasLongBlockHeaders ) {
00186     Type = read_uint();
00187     Size = read_uint();
00188     switch (Type) {
00189     case BytecodeFormat::Reserved_DoNotUse :
00190       error("Reserved_DoNotUse used as Module Type?");
00191       Type = BytecodeFormat::ModuleBlockID; break;
00192     case BytecodeFormat::Module:
00193       Type = BytecodeFormat::ModuleBlockID; break;
00194     case BytecodeFormat::Function:
00195       Type = BytecodeFormat::FunctionBlockID; break;
00196     case BytecodeFormat::ConstantPool:
00197       Type = BytecodeFormat::ConstantPoolBlockID; break;
00198     case BytecodeFormat::SymbolTable:
00199       Type = BytecodeFormat::SymbolTableBlockID; break;
00200     case BytecodeFormat::ModuleGlobalInfo:
00201       Type = BytecodeFormat::ModuleGlobalInfoBlockID; break;
00202     case BytecodeFormat::GlobalTypePlane:
00203       Type = BytecodeFormat::GlobalTypePlaneBlockID; break;
00204     case BytecodeFormat::InstructionList:
00205       Type = BytecodeFormat::InstructionListBlockID; break;
00206     case BytecodeFormat::CompactionTable:
00207       Type = BytecodeFormat::CompactionTableBlockID; break;
00208     case BytecodeFormat::BasicBlock:
00209       /// This block type isn't used after version 1.1. However, we have to
00210       /// still allow the value in case this is an old bc format file.
00211       /// We just let its value creep thru.
00212       break;
00213     default:
00214       error("Invalid block id found: " + utostr(Type));
00215       break;
00216     }
00217   } else {
00218     Size = read_uint();
00219     Type = Size & 0x1F; // mask low order five bits
00220     Size >>= 5; // get rid of five low order bits, leaving high 27
00221   }
00222   BlockStart = At;
00223   if (At + Size > BlockEnd)
00224     error("Attempt to size a block past end of memory");
00225   BlockEnd = At + Size;
00226   if (Handler) Handler->handleBlock(Type, BlockStart, Size);
00227 }
00228 
00229 
00230 /// In LLVM 1.2 and before, Types were derived from Value and so they were
00231 /// written as part of the type planes along with any other Value. In LLVM
00232 /// 1.3 this changed so that Type does not derive from Value. Consequently,
00233 /// the BytecodeReader's containers for Values can't contain Types because
00234 /// there's no inheritance relationship. This means that the "Type Type"
00235 /// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3
00236 /// whenever a bytecode construct must have both types and values together,
00237 /// the types are always read/written first and then the Values. Furthermore
00238 /// since Type::TypeTyID no longer exists, its value (12) now corresponds to
00239 /// Type::LabelTyID. In order to overcome this we must "sanitize" all the
00240 /// type TypeIDs we encounter. For LLVM 1.3 bytecode files, there's no change.
00241 /// For LLVM 1.2 and before, this function will decrement the type id by
00242 /// one to account for the missing Type::TypeTyID enumerator if the value is
00243 /// larger than 12 (Type::LabelTyID). If the value is exactly 12, then this
00244 /// function returns true, otherwise false. This helps detect situations
00245 /// where the pre 1.3 bytecode is indicating that what follows is a type.
00246 /// @returns true iff type id corresponds to pre 1.3 "type type"
00247 inline bool BytecodeReader::sanitizeTypeId(unsigned &TypeId) {
00248   if (hasTypeDerivedFromValue) { /// do nothing if 1.3 or later
00249     if (TypeId == Type::LabelTyID) {
00250       TypeId = Type::VoidTyID; // sanitize it
00251       return true; // indicate we got TypeTyID in pre 1.3 bytecode
00252     } else if (TypeId > Type::LabelTyID)
00253       --TypeId; // shift all planes down because type type plane is missing
00254   }
00255   return false;
00256 }
00257 
00258 /// Reads a vbr uint to read in a type id and does the necessary
00259 /// conversion on it by calling sanitizeTypeId.
00260 /// @returns true iff \p TypeId read corresponds to a pre 1.3 "type type"
00261 /// @see sanitizeTypeId
00262 inline bool BytecodeReader::read_typeid(unsigned &TypeId) {
00263   TypeId = read_vbr_uint();
00264   if ( !has32BitTypes )
00265     if ( TypeId == 0x00FFFFFF )
00266       TypeId = read_vbr_uint();
00267   return sanitizeTypeId(TypeId);
00268 }
00269 
00270 //===----------------------------------------------------------------------===//
00271 // IR Lookup Methods
00272 //===----------------------------------------------------------------------===//
00273 
00274 /// Determine if a type id has an implicit null value
00275 inline bool BytecodeReader::hasImplicitNull(unsigned TyID) {
00276   if (!hasExplicitPrimitiveZeros)
00277     return TyID != Type::LabelTyID && TyID != Type::VoidTyID;
00278   return TyID >= Type::FirstDerivedTyID;
00279 }
00280 
00281 /// Obtain a type given a typeid and account for things like compaction tables,
00282 /// function level vs module level, and the offsetting for the primitive types.
00283 const Type *BytecodeReader::getType(unsigned ID) {
00284   if (ID < Type::FirstDerivedTyID)
00285     if (const Type *T = Type::getPrimitiveType((Type::TypeID)ID))
00286       return T;   // Asked for a primitive type...
00287 
00288   // Otherwise, derived types need offset...
00289   ID -= Type::FirstDerivedTyID;
00290 
00291   if (!CompactionTypes.empty()) {
00292     if (ID >= CompactionTypes.size())
00293       error("Type ID out of range for compaction table!");
00294     return CompactionTypes[ID].first;
00295   }
00296 
00297   // Is it a module-level type?
00298   if (ID < ModuleTypes.size())
00299     return ModuleTypes[ID].get();
00300 
00301   // Nope, is it a function-level type?
00302   ID -= ModuleTypes.size();
00303   if (ID < FunctionTypes.size())
00304     return FunctionTypes[ID].get();
00305 
00306   error("Illegal type reference!");
00307   return Type::VoidTy;
00308 }
00309 
00310 /// Get a sanitized type id. This just makes sure that the \p ID
00311 /// is both sanitized and not the "type type" of pre-1.3 bytecode.
00312 /// @see sanitizeTypeId
00313 inline const Type* BytecodeReader::getSanitizedType(unsigned& ID) {
00314   if (sanitizeTypeId(ID))
00315     error("Invalid type id encountered");
00316   return getType(ID);
00317 }
00318 
00319 /// This method just saves some coding. It uses read_typeid to read
00320 /// in a sanitized type id, errors that its not the type type, and
00321 /// then calls getType to return the type value.
00322 inline const Type* BytecodeReader::readSanitizedType() {
00323   unsigned ID;
00324   if (read_typeid(ID))
00325     error("Invalid type id encountered");
00326   return getType(ID);
00327 }
00328 
00329 /// Get the slot number associated with a type accounting for primitive
00330 /// types, compaction tables, and function level vs module level.
00331 unsigned BytecodeReader::getTypeSlot(const Type *Ty) {
00332   if (Ty->isPrimitiveType())
00333     return Ty->getTypeID();
00334 
00335   // Scan the compaction table for the type if needed.
00336   if (!CompactionTypes.empty()) {
00337     for (unsigned i = 0, e = CompactionTypes.size(); i != e; ++i)
00338       if (CompactionTypes[i].first == Ty)
00339         return Type::FirstDerivedTyID + i;
00340 
00341     error("Couldn't find type specified in compaction table!");
00342   }
00343 
00344   // Check the function level types first...
00345   TypeListTy::iterator I = std::find(FunctionTypes.begin(),
00346                                      FunctionTypes.end(), Ty);
00347 
00348   if (I != FunctionTypes.end())
00349     return Type::FirstDerivedTyID + ModuleTypes.size() +
00350            (&*I - &FunctionTypes[0]);
00351 
00352   // If we don't have our cache yet, build it now.
00353   if (ModuleTypeIDCache.empty()) {
00354     unsigned N = 0;
00355     ModuleTypeIDCache.reserve(ModuleTypes.size());
00356     for (TypeListTy::iterator I = ModuleTypes.begin(), E = ModuleTypes.end();
00357          I != E; ++I, ++N)
00358       ModuleTypeIDCache.push_back(std::make_pair(*I, N));
00359     
00360     std::sort(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end());
00361   }
00362   
00363   // Binary search the cache for the entry.
00364   std::vector<std::pair<const Type*, unsigned> >::iterator IT =
00365     std::lower_bound(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end(),
00366                      std::make_pair(Ty, 0U));
00367   if (IT == ModuleTypeIDCache.end() || IT->first != Ty)
00368     error("Didn't find type in ModuleTypes.");
00369     
00370   return Type::FirstDerivedTyID + IT->second;
00371 }
00372 
00373 /// This is just like getType, but when a compaction table is in use, it is
00374 /// ignored.  It also ignores function level types.
00375 /// @see getType
00376 const Type *BytecodeReader::getGlobalTableType(unsigned Slot) {
00377   if (Slot < Type::FirstDerivedTyID) {
00378     const Type *Ty = Type::getPrimitiveType((Type::TypeID)Slot);
00379     if (!Ty)
00380       error("Not a primitive type ID?");
00381     return Ty;
00382   }
00383   Slot -= Type::FirstDerivedTyID;
00384   if (Slot >= ModuleTypes.size())
00385     error("Illegal compaction table type reference!");
00386   return ModuleTypes[Slot];
00387 }
00388 
00389 /// This is just like getTypeSlot, but when a compaction table is in use, it
00390 /// is ignored. It also ignores function level types.
00391 unsigned BytecodeReader::getGlobalTableTypeSlot(const Type *Ty) {
00392   if (Ty->isPrimitiveType())
00393     return Ty->getTypeID();
00394   
00395   // If we don't have our cache yet, build it now.
00396   if (ModuleTypeIDCache.empty()) {
00397     unsigned N = 0;
00398     ModuleTypeIDCache.reserve(ModuleTypes.size());
00399     for (TypeListTy::iterator I = ModuleTypes.begin(), E = ModuleTypes.end();
00400          I != E; ++I, ++N)
00401       ModuleTypeIDCache.push_back(std::make_pair(*I, N));
00402     
00403     std::sort(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end());
00404   }
00405   
00406   // Binary search the cache for the entry.
00407   std::vector<std::pair<const Type*, unsigned> >::iterator IT =
00408     std::lower_bound(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end(),
00409                      std::make_pair(Ty, 0U));
00410   if (IT == ModuleTypeIDCache.end() || IT->first != Ty)
00411     error("Didn't find type in ModuleTypes.");
00412   
00413   return Type::FirstDerivedTyID + IT->second;
00414 }
00415 
00416 /// Retrieve a value of a given type and slot number, possibly creating
00417 /// it if it doesn't already exist.
00418 Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) {
00419   assert(type != Type::LabelTyID && "getValue() cannot get blocks!");
00420   unsigned Num = oNum;
00421 
00422   // If there is a compaction table active, it defines the low-level numbers.
00423   // If not, the module values define the low-level numbers.
00424   if (CompactionValues.size() > type && !CompactionValues[type].empty()) {
00425     if (Num < CompactionValues[type].size())
00426       return CompactionValues[type][Num];
00427     Num -= CompactionValues[type].size();
00428   } else {
00429     // By default, the global type id is the type id passed in
00430     unsigned GlobalTyID = type;
00431 
00432     // If the type plane was compactified, figure out the global type ID by
00433     // adding the derived type ids and the distance.
00434     if (!CompactionTypes.empty() && type >= Type::FirstDerivedTyID)
00435       GlobalTyID = CompactionTypes[type-Type::FirstDerivedTyID].second;
00436 
00437     if (hasImplicitNull(GlobalTyID)) {
00438       const Type *Ty = getType(type);
00439       if (!isa<OpaqueType>(Ty)) {
00440         if (Num == 0)
00441           return Constant::getNullValue(Ty);
00442         --Num;
00443       }
00444     }
00445 
00446     if (GlobalTyID < ModuleValues.size() && ModuleValues[GlobalTyID]) {
00447       if (Num < ModuleValues[GlobalTyID]->size())
00448         return ModuleValues[GlobalTyID]->getOperand(Num);
00449       Num -= ModuleValues[GlobalTyID]->size();
00450     }
00451   }
00452 
00453   if (FunctionValues.size() > type &&
00454       FunctionValues[type] &&
00455       Num < FunctionValues[type]->size())
00456     return FunctionValues[type]->getOperand(Num);
00457 
00458   if (!Create) return 0;  // Do not create a placeholder?
00459 
00460   // Did we already create a place holder?
00461   std::pair<unsigned,unsigned> KeyValue(type, oNum);
00462   ForwardReferenceMap::iterator I = ForwardReferences.lower_bound(KeyValue);
00463   if (I != ForwardReferences.end() && I->first == KeyValue)
00464     return I->second;   // We have already created this placeholder
00465 
00466   // If the type exists (it should)
00467   if (const Type* Ty = getType(type)) {
00468     // Create the place holder
00469     Value *Val = new Argument(Ty);
00470     ForwardReferences.insert(I, std::make_pair(KeyValue, Val));
00471     return Val;
00472   }
00473   throw "Can't create placeholder for value of type slot #" + utostr(type);
00474 }
00475 
00476 /// This is just like getValue, but when a compaction table is in use, it
00477 /// is ignored.  Also, no forward references or other fancy features are
00478 /// supported.
00479 Value* BytecodeReader::getGlobalTableValue(unsigned TyID, unsigned SlotNo) {
00480   if (SlotNo == 0)
00481     return Constant::getNullValue(getType(TyID));
00482 
00483   if (!CompactionTypes.empty() && TyID >= Type::FirstDerivedTyID) {
00484     TyID -= Type::FirstDerivedTyID;
00485     if (TyID >= CompactionTypes.size())
00486       error("Type ID out of range for compaction table!");
00487     TyID = CompactionTypes[TyID].second;
00488   }
00489 
00490   --SlotNo;
00491 
00492   if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0 ||
00493       SlotNo >= ModuleValues[TyID]->size()) {
00494     if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0)
00495       error("Corrupt compaction table entry!"
00496             + utostr(TyID) + ", " + utostr(SlotNo) + ": "
00497             + utostr(ModuleValues.size()));
00498     else
00499       error("Corrupt compaction table entry!"
00500             + utostr(TyID) + ", " + utostr(SlotNo) + ": "
00501             + utostr(ModuleValues.size()) + ", "
00502             + utohexstr(reinterpret_cast<uint64_t>(((void*)ModuleValues[TyID])))
00503             + ", "
00504             + utostr(ModuleValues[TyID]->size()));
00505   }
00506   return ModuleValues[TyID]->getOperand(SlotNo);
00507 }
00508 
00509 /// Just like getValue, except that it returns a null pointer
00510 /// only on error.  It always returns a constant (meaning that if the value is
00511 /// defined, but is not a constant, that is an error).  If the specified
00512 /// constant hasn't been parsed yet, a placeholder is defined and used.
00513 /// Later, after the real value is parsed, the placeholder is eliminated.
00514 Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) {
00515   if (Value *V = getValue(TypeSlot, Slot, false))
00516     if (Constant *C = dyn_cast<Constant>(V))
00517       return C;   // If we already have the value parsed, just return it
00518     else
00519       error("Value for slot " + utostr(Slot) +
00520             " is expected to be a constant!");
00521 
00522   std::pair<unsigned, unsigned> Key(TypeSlot, Slot);
00523   ConstantRefsType::iterator I = ConstantFwdRefs.lower_bound(Key);
00524 
00525   if (I != ConstantFwdRefs.end() && I->first == Key) {
00526     return I->second;
00527   } else {
00528     // Create a placeholder for the constant reference and
00529     // keep track of the fact that we have a forward ref to recycle it
00530     Constant *C = new ConstantPlaceHolder(getType(TypeSlot));
00531 
00532     // Keep track of the fact that we have a forward ref to recycle it
00533     ConstantFwdRefs.insert(I, std::make_pair(Key, C));
00534     return C;
00535   }
00536 }
00537 
00538 //===----------------------------------------------------------------------===//
00539 // IR Construction Methods
00540 //===----------------------------------------------------------------------===//
00541 
00542 /// As values are created, they are inserted into the appropriate place
00543 /// with this method. The ValueTable argument must be one of ModuleValues
00544 /// or FunctionValues data members of this class.
00545 unsigned BytecodeReader::insertValue(Value *Val, unsigned type,
00546                                       ValueTable &ValueTab) {
00547   if (ValueTab.size() <= type)
00548     ValueTab.resize(type+1);
00549 
00550   if (!ValueTab[type]) ValueTab[type] = new ValueList();
00551 
00552   ValueTab[type]->push_back(Val);
00553 
00554   bool HasOffset = hasImplicitNull(type) && !isa<OpaqueType>(Val->getType());
00555   return ValueTab[type]->size()-1 + HasOffset;
00556 }
00557 
00558 /// Insert the arguments of a function as new values in the reader.
00559 void BytecodeReader::insertArguments(Function* F) {
00560   const FunctionType *FT = F->getFunctionType();
00561   Function::arg_iterator AI = F->arg_begin();
00562   for (FunctionType::param_iterator It = FT->param_begin();
00563        It != FT->param_end(); ++It, ++AI)
00564     insertValue(AI, getTypeSlot(AI->getType()), FunctionValues);
00565 }
00566 
00567 //===----------------------------------------------------------------------===//
00568 // Bytecode Parsing Methods
00569 //===----------------------------------------------------------------------===//
00570 
00571 /// This method parses a single instruction. The instruction is
00572 /// inserted at the end of the \p BB provided. The arguments of
00573 /// the instruction are provided in the \p Oprnds vector.
00574 void BytecodeReader::ParseInstruction(std::vector<unsigned> &Oprnds,
00575                                       BasicBlock* BB) {
00576   BufPtr SaveAt = At;
00577 
00578   // Clear instruction data
00579   Oprnds.clear();
00580   unsigned iType = 0;
00581   unsigned Opcode = 0;
00582   unsigned Op = read_uint();
00583 
00584   // bits   Instruction format:        Common to all formats
00585   // --------------------------
00586   // 01-00: Opcode type, fixed to 1.
00587   // 07-02: Opcode
00588   Opcode    = (Op >> 2) & 63;
00589   Oprnds.resize((Op >> 0) & 03);
00590 
00591   // Extract the operands
00592   switch (Oprnds.size()) {
00593   case 1:
00594     // bits   Instruction format:
00595     // --------------------------
00596     // 19-08: Resulting type plane
00597     // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
00598     //
00599     iType   = (Op >>  8) & 4095;
00600     Oprnds[0] = (Op >> 20) & 4095;
00601     if (Oprnds[0] == 4095)    // Handle special encoding for 0 operands...
00602       Oprnds.resize(0);
00603     break;
00604   case 2:
00605     // bits   Instruction format:
00606     // --------------------------
00607     // 15-08: Resulting type plane
00608     // 23-16: Operand #1
00609     // 31-24: Operand #2
00610     //
00611     iType   = (Op >>  8) & 255;
00612     Oprnds[0] = (Op >> 16) & 255;
00613     Oprnds[1] = (Op >> 24) & 255;
00614     break;
00615   case 3:
00616     // bits   Instruction format:
00617     // --------------------------
00618     // 13-08: Resulting type plane
00619     // 19-14: Operand #1
00620     // 25-20: Operand #2
00621     // 31-26: Operand #3
00622     //
00623     iType   = (Op >>  8) & 63;
00624     Oprnds[0] = (Op >> 14) & 63;
00625     Oprnds[1] = (Op >> 20) & 63;
00626     Oprnds[2] = (Op >> 26) & 63;
00627     break;
00628   case 0:
00629     At -= 4;  // Hrm, try this again...
00630     Opcode = read_vbr_uint();
00631     Opcode >>= 2;
00632     iType = read_vbr_uint();
00633 
00634     unsigned NumOprnds = read_vbr_uint();
00635     Oprnds.resize(NumOprnds);
00636 
00637     if (NumOprnds == 0)
00638       error("Zero-argument instruction found; this is invalid.");
00639 
00640     for (unsigned i = 0; i != NumOprnds; ++i)
00641       Oprnds[i] = read_vbr_uint();
00642     align32();
00643     break;
00644   }
00645 
00646   const Type *InstTy = getSanitizedType(iType);
00647 
00648   // We have enough info to inform the handler now.
00649   if (Handler) Handler->handleInstruction(Opcode, InstTy, Oprnds, At-SaveAt);
00650 
00651   // Declare the resulting instruction we'll build.
00652   Instruction *Result = 0;
00653 
00654   // If this is a bytecode format that did not include the unreachable
00655   // instruction, bump up all opcodes numbers to make space.
00656   if (hasNoUnreachableInst) {
00657     if (Opcode >= Instruction::Unreachable &&
00658         Opcode < 62) {
00659       ++Opcode;
00660     }
00661   }
00662 
00663   // Handle binary operators
00664   if (Opcode >= Instruction::BinaryOpsBegin &&
00665       Opcode <  Instruction::BinaryOpsEnd  && Oprnds.size() == 2)
00666     Result = BinaryOperator::create((Instruction::BinaryOps)Opcode,
00667                                     getValue(iType, Oprnds[0]),
00668                                     getValue(iType, Oprnds[1]));
00669 
00670   bool isCall = false;
00671   switch (Opcode) {
00672   default:
00673     if (Result == 0)
00674       error("Illegal instruction read!");
00675     break;
00676   case Instruction::VAArg:
00677     Result = new VAArgInst(getValue(iType, Oprnds[0]),
00678                            getSanitizedType(Oprnds[1]));
00679     break;
00680   case 32: { //VANext_old
00681     const Type* ArgTy = getValue(iType, Oprnds[0])->getType();
00682     Function* NF = TheModule->getOrInsertFunction("llvm.va_copy", ArgTy, ArgTy,
00683                                                   (Type *)0);
00684 
00685     //b = vanext a, t ->
00686     //foo = alloca 1 of t
00687     //bar = vacopy a
00688     //store bar -> foo
00689     //tmp = vaarg foo, t
00690     //b = load foo
00691     AllocaInst* foo = new AllocaInst(ArgTy, 0, "vanext.fix");
00692     BB->getInstList().push_back(foo);
00693     CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0]));
00694     BB->getInstList().push_back(bar);
00695     BB->getInstList().push_back(new StoreInst(bar, foo));
00696     Instruction* tmp = new VAArgInst(foo, getSanitizedType(Oprnds[1]));
00697     BB->getInstList().push_back(tmp);
00698     Result = new LoadInst(foo);
00699     break;
00700   }
00701   case 33: { //VAArg_old
00702     const Type* ArgTy = getValue(iType, Oprnds[0])->getType();
00703     Function* NF = TheModule->getOrInsertFunction("llvm.va_copy", ArgTy, ArgTy,
00704                                                   (Type *)0);
00705 
00706     //b = vaarg a, t ->
00707     //foo = alloca 1 of t
00708     //bar = vacopy a
00709     //store bar -> foo
00710     //b = vaarg foo, t
00711     AllocaInst* foo = new AllocaInst(ArgTy, 0, "vaarg.fix");
00712     BB->getInstList().push_back(foo);
00713     CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0]));
00714     BB->getInstList().push_back(bar);
00715     BB->getInstList().push_back(new StoreInst(bar, foo));
00716     Result = new VAArgInst(foo, getSanitizedType(Oprnds[1]));
00717     break;
00718   }
00719   case Instruction::ExtractElement: {
00720     if (Oprnds.size() != 2)
00721       throw std::string("Invalid extractelement instruction!");
00722     Value *V1 = getValue(iType, Oprnds[0]);
00723     Value *V2 = getValue(Type::UIntTyID, Oprnds[1]);
00724     
00725     if (!ExtractElementInst::isValidOperands(V1, V2))
00726       throw std::string("Invalid extractelement instruction!");
00727 
00728     Result = new ExtractElementInst(V1, V2);
00729     break;
00730   }
00731   case Instruction::InsertElement: {
00732     const PackedType *PackedTy = dyn_cast<PackedType>(InstTy);
00733     if (!PackedTy || Oprnds.size() != 3)
00734       throw std::string("Invalid insertelement instruction!");
00735     
00736     Value *V1 = getValue(iType, Oprnds[0]);
00737     Value *V2 = getValue(getTypeSlot(PackedTy->getElementType()), Oprnds[1]);
00738     Value *V3 = getValue(Type::UIntTyID, Oprnds[2]);
00739       
00740     if (!InsertElementInst::isValidOperands(V1, V2, V3))
00741       throw std::string("Invalid insertelement instruction!");
00742     Result = new InsertElementInst(V1, V2, V3);
00743     break;
00744   }
00745   case Instruction::ShuffleVector: {
00746     const PackedType *PackedTy = dyn_cast<PackedType>(InstTy);
00747     if (!PackedTy || Oprnds.size() != 3)
00748       throw std::string("Invalid shufflevector instruction!");
00749     Value *V1 = getValue(iType, Oprnds[0]);
00750     Value *V2 = getValue(iType, Oprnds[1]);
00751     const PackedType *EltTy = 
00752       PackedType::get(Type::UIntTy, PackedTy->getNumElements());
00753     Value *V3 = getValue(getTypeSlot(EltTy), Oprnds[2]);
00754     if (!ShuffleVectorInst::isValidOperands(V1, V2, V3))
00755       throw std::string("Invalid shufflevector instruction!");
00756     Result = new ShuffleVectorInst(V1, V2, V3);
00757     break;
00758   }
00759   case Instruction::Cast:
00760     Result = new CastInst(getValue(iType, Oprnds[0]),
00761                           getSanitizedType(Oprnds[1]));
00762     break;
00763   case Instruction::Select:
00764     Result = new SelectInst(getValue(Type::BoolTyID, Oprnds[0]),
00765                             getValue(iType, Oprnds[1]),
00766                             getValue(iType, Oprnds[2]));
00767     break;
00768   case Instruction::PHI: {
00769     if (Oprnds.size() == 0 || (Oprnds.size() & 1))
00770       error("Invalid phi node encountered!");
00771 
00772     PHINode *PN = new PHINode(InstTy);
00773     PN->reserveOperandSpace(Oprnds.size());
00774     for (unsigned i = 0, e = Oprnds.size(); i != e; i += 2)
00775       PN->addIncoming(getValue(iType, Oprnds[i]), getBasicBlock(Oprnds[i+1]));
00776     Result = PN;
00777     break;
00778   }
00779 
00780   case Instruction::Shl:
00781   case Instruction::Shr:
00782     Result = new ShiftInst((Instruction::OtherOps)Opcode,
00783                            getValue(iType, Oprnds[0]),
00784                            getValue(Type::UByteTyID, Oprnds[1]));
00785     break;
00786   case Instruction::Ret:
00787     if (Oprnds.size() == 0)
00788       Result = new ReturnInst();
00789     else if (Oprnds.size() == 1)
00790       Result = new ReturnInst(getValue(iType, Oprnds[0]));
00791     else
00792       error("Unrecognized instruction!");
00793     break;
00794 
00795   case Instruction::Br:
00796     if (Oprnds.size() == 1)
00797       Result = new BranchInst(getBasicBlock(Oprnds[0]));
00798     else if (Oprnds.size() == 3)
00799       Result = new BranchInst(getBasicBlock(Oprnds[0]),
00800           getBasicBlock(Oprnds[1]), getValue(Type::BoolTyID , Oprnds[2]));
00801     else
00802       error("Invalid number of operands for a 'br' instruction!");
00803     break;
00804   case Instruction::Switch: {
00805     if (Oprnds.size() & 1)
00806       error("Switch statement with odd number of arguments!");
00807 
00808     SwitchInst *I = new SwitchInst(getValue(iType, Oprnds[0]),
00809                                    getBasicBlock(Oprnds[1]),
00810                                    Oprnds.size()/2-1);
00811     for (unsigned i = 2, e = Oprnds.size(); i != e; i += 2)
00812       I->addCase(cast<ConstantInt>(getValue(iType, Oprnds[i])),
00813                  getBasicBlock(Oprnds[i+1]));
00814     Result = I;
00815     break;
00816   }
00817 
00818   case 58:                   // Call with extra operand for calling conv
00819   case 59:                   // tail call, Fast CC
00820   case 60:                   // normal call, Fast CC
00821   case 61:                   // tail call, C Calling Conv
00822   case Instruction::Call: {  // Normal Call, C Calling Convention
00823     if (Oprnds.size() == 0)
00824       error("Invalid call instruction encountered!");
00825 
00826     Value *F = getValue(iType, Oprnds[0]);
00827 
00828     unsigned CallingConv = CallingConv::C;
00829     bool isTailCall = false;
00830 
00831     if (Opcode == 61 || Opcode == 59)
00832       isTailCall = true;
00833     
00834     if (Opcode == 58) {
00835       isTailCall = Oprnds.back() & 1;
00836       CallingConv = Oprnds.back() >> 1;
00837       Oprnds.pop_back();
00838     } else if (Opcode == 59 || Opcode == 60) {
00839       CallingConv = CallingConv::Fast;
00840     }
00841     
00842     // Check to make sure we have a pointer to function type
00843     const PointerType *PTy = dyn_cast<PointerType>(F->getType());
00844     if (PTy == 0) error("Call to non function pointer value!");
00845     const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType());
00846     if (FTy == 0) error("Call to non function pointer value!");
00847 
00848     std::vector<Value *> Params;
00849     if (!FTy->isVarArg()) {
00850       FunctionType::param_iterator It = FTy->param_begin();
00851 
00852       for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) {
00853         if (It == FTy->param_end())
00854           error("Invalid call instruction!");
00855         Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i]));
00856       }
00857       if (It != FTy->param_end())
00858         error("Invalid call instruction!");
00859     } else {
00860       Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1);
00861 
00862       unsigned FirstVariableOperand;
00863       if (Oprnds.size() < FTy->getNumParams())
00864         error("Call instruction missing operands!");
00865 
00866       // Read all of the fixed arguments
00867       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
00868         Params.push_back(getValue(getTypeSlot(FTy->getParamType(i)),Oprnds[i]));
00869 
00870       FirstVariableOperand = FTy->getNumParams();
00871 
00872       if ((Oprnds.size()-FirstVariableOperand) & 1)
00873         error("Invalid call instruction!");   // Must be pairs of type/value
00874 
00875       for (unsigned i = FirstVariableOperand, e = Oprnds.size();
00876            i != e; i += 2)
00877         Params.push_back(getValue(Oprnds[i], Oprnds[i+1]));
00878     }
00879 
00880     Result = new CallInst(F, Params);
00881     if (isTailCall) cast<CallInst>(Result)->setTailCall();
00882     if (CallingConv) cast<CallInst>(Result)->setCallingConv(CallingConv);
00883     break;
00884   }
00885   case 56:                     // Invoke with encoded CC
00886   case 57:                     // Invoke Fast CC
00887   case Instruction::Invoke: {  // Invoke C CC
00888     if (Oprnds.size() < 3)
00889       error("Invalid invoke instruction!");
00890     Value *F = getValue(iType, Oprnds[0]);
00891 
00892     // Check to make sure we have a pointer to function type
00893     const PointerType *PTy = dyn_cast<PointerType>(F->getType());
00894     if (PTy == 0)
00895       error("Invoke to non function pointer value!");
00896     const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType());
00897     if (FTy == 0)
00898       error("Invoke to non function pointer value!");
00899 
00900     std::vector<Value *> Params;
00901     BasicBlock *Normal, *Except;
00902     unsigned CallingConv = CallingConv::C;
00903 
00904     if (Opcode == 57)
00905       CallingConv = CallingConv::Fast;
00906     else if (Opcode == 56) {
00907       CallingConv = Oprnds.back();
00908       Oprnds.pop_back();
00909     }
00910 
00911     if (!FTy->isVarArg()) {
00912       Normal = getBasicBlock(Oprnds[1]);
00913       Except = getBasicBlock(Oprnds[2]);
00914 
00915       FunctionType::param_iterator It = FTy->param_begin();
00916       for (unsigned i = 3, e = Oprnds.size(); i != e; ++i) {
00917         if (It == FTy->param_end())
00918           error("Invalid invoke instruction!");
00919         Params.push_back(getValue(getTypeSlot(*It++), Oprnds[i]));
00920       }
00921       if (It != FTy->param_end())
00922         error("Invalid invoke instruction!");
00923     } else {
00924       Oprnds.erase(Oprnds.begin(), Oprnds.begin()+1);
00925 
00926       Normal = getBasicBlock(Oprnds[0]);
00927       Except = getBasicBlock(Oprnds[1]);
00928 
00929       unsigned FirstVariableArgument = FTy->getNumParams()+2;
00930       for (unsigned i = 2; i != FirstVariableArgument; ++i)
00931         Params.push_back(getValue(getTypeSlot(FTy->getParamType(i-2)),
00932                                   Oprnds[i]));
00933 
00934       if (Oprnds.size()-FirstVariableArgument & 1) // Must be type/value pairs
00935         error("Invalid invoke instruction!");
00936 
00937       for (unsigned i = FirstVariableArgument; i < Oprnds.size(); i += 2)
00938         Params.push_back(getValue(Oprnds[i], Oprnds[i+1]));
00939     }
00940 
00941     Result = new InvokeInst(F, Normal, Except, Params);
00942     if (CallingConv) cast<InvokeInst>(Result)->setCallingConv(CallingConv);
00943     break;
00944   }
00945   case Instruction::Malloc: {
00946     unsigned Align = 0;
00947     if (Oprnds.size() == 2)
00948       Align = (1 << Oprnds[1]) >> 1;
00949     else if (Oprnds.size() > 2)
00950       error("Invalid malloc instruction!");
00951     if (!isa<PointerType>(InstTy))
00952       error("Invalid malloc instruction!");
00953 
00954     Result = new MallocInst(cast<PointerType>(InstTy)->getElementType(),
00955                             getValue(Type::UIntTyID, Oprnds[0]), Align);
00956     break;
00957   }
00958 
00959   case Instruction::Alloca: {
00960     unsigned Align = 0;
00961     if (Oprnds.size() == 2)
00962       Align = (1 << Oprnds[1]) >> 1;
00963     else if (Oprnds.size() > 2)
00964       error("Invalid alloca instruction!");
00965     if (!isa<PointerType>(InstTy))
00966       error("Invalid alloca instruction!");
00967 
00968     Result = new AllocaInst(cast<PointerType>(InstTy)->getElementType(),
00969                             getValue(Type::UIntTyID, Oprnds[0]), Align);
00970     break;
00971   }
00972   case Instruction::Free:
00973     if (!isa<PointerType>(InstTy))
00974       error("Invalid free instruction!");
00975     Result = new FreeInst(getValue(iType, Oprnds[0]));
00976     break;
00977   case Instruction::GetElementPtr: {
00978     if (Oprnds.size() == 0 || !isa<PointerType>(InstTy))
00979       error("Invalid getelementptr instruction!");
00980 
00981     std::vector<Value*> Idx;
00982 
00983     const Type *NextTy = InstTy;
00984     for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) {
00985       const CompositeType *TopTy = dyn_cast_or_null<CompositeType>(NextTy);
00986       if (!TopTy)
00987         error("Invalid getelementptr instruction!");
00988 
00989       unsigned ValIdx = Oprnds[i];
00990       unsigned IdxTy = 0;
00991       if (!hasRestrictedGEPTypes) {
00992         // Struct indices are always uints, sequential type indices can be any
00993         // of the 32 or 64-bit integer types.  The actual choice of type is
00994         // encoded in the low two bits of the slot number.
00995         if (isa<StructType>(TopTy))
00996           IdxTy = Type::UIntTyID;
00997         else {
00998           switch (ValIdx & 3) {
00999           default:
01000           case 0: IdxTy = Type::UIntTyID; break;
01001           case 1: IdxTy = Type::IntTyID; break;
01002           case 2: IdxTy = Type::ULongTyID; break;
01003           case 3: IdxTy = Type::LongTyID; break;
01004           }
01005           ValIdx >>= 2;
01006         }
01007       } else {
01008         IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID : Type::LongTyID;
01009       }
01010 
01011       Idx.push_back(getValue(IdxTy, ValIdx));
01012 
01013       // Convert ubyte struct indices into uint struct indices.
01014       if (isa<StructType>(TopTy) && hasRestrictedGEPTypes)
01015         if (ConstantUInt *C = dyn_cast<ConstantUInt>(Idx.back()))
01016           Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy);
01017 
01018       NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true);
01019     }
01020 
01021     Result = new GetElementPtrInst(getValue(iType, Oprnds[0]), Idx);
01022     break;
01023   }
01024 
01025   case 62:   // volatile load
01026   case Instruction::Load:
01027     if (Oprnds.size() != 1 || !isa<PointerType>(InstTy))
01028       error("Invalid load instruction!");
01029     Result = new LoadInst(getValue(iType, Oprnds[0]), "", Opcode == 62);
01030     break;
01031 
01032   case 63:   // volatile store
01033   case Instruction::Store: {
01034     if (!isa<PointerType>(InstTy) || Oprnds.size() != 2)
01035       error("Invalid store instruction!");
01036 
01037     Value *Ptr = getValue(iType, Oprnds[1]);
01038     const Type *ValTy = cast<PointerType>(Ptr->getType())->getElementType();
01039     Result = new StoreInst(getValue(getTypeSlot(ValTy), Oprnds[0]), Ptr,
01040                            Opcode == 63);
01041     break;
01042   }
01043   case Instruction::Unwind:
01044     if (Oprnds.size() != 0) error("Invalid unwind instruction!");
01045     Result = new UnwindInst();
01046     break;
01047   case Instruction::Unreachable:
01048     if (Oprnds.size() != 0) error("Invalid unreachable instruction!");
01049     Result = new UnreachableInst();
01050     break;
01051   }  // end switch(Opcode)
01052 
01053   BB->getInstList().push_back(Result);
01054 
01055   unsigned TypeSlot;
01056   if (Result->getType() == InstTy)
01057     TypeSlot = iType;
01058   else
01059     TypeSlot = getTypeSlot(Result->getType());
01060 
01061   insertValue(Result, TypeSlot, FunctionValues);
01062 }
01063 
01064 /// Get a particular numbered basic block, which might be a forward reference.
01065 /// This works together with ParseBasicBlock to handle these forward references
01066 /// in a clean manner.  This function is used when constructing phi, br, switch,
01067 /// and other instructions that reference basic blocks. Blocks are numbered
01068 /// sequentially as they appear in the function.
01069 BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) {
01070   // Make sure there is room in the table...
01071   if (ParsedBasicBlocks.size() <= ID) ParsedBasicBlocks.resize(ID+1);
01072 
01073   // First check to see if this is a backwards reference, i.e., ParseBasicBlock
01074   // has already created this block, or if the forward reference has already
01075   // been created.
01076   if (ParsedBasicBlocks[ID])
01077     return ParsedBasicBlocks[ID];
01078 
01079   // Otherwise, the basic block has not yet been created.  Do so and add it to
01080   // the ParsedBasicBlocks list.
01081   return ParsedBasicBlocks[ID] = new BasicBlock();
01082 }
01083 
01084 /// In LLVM 1.0 bytecode files, we used to output one basicblock at a time.
01085 /// This method reads in one of the basicblock packets. This method is not used
01086 /// for bytecode files after LLVM 1.0
01087 /// @returns The basic block constructed.
01088 BasicBlock *BytecodeReader::ParseBasicBlock(unsigned BlockNo) {
01089   if (Handler) Handler->handleBasicBlockBegin(BlockNo);
01090 
01091   BasicBlock *BB = 0;
01092 
01093   if (ParsedBasicBlocks.size() == BlockNo)
01094     ParsedBasicBlocks.push_back(BB = new BasicBlock());
01095   else if (ParsedBasicBlocks[BlockNo] == 0)
01096     BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
01097   else
01098     BB = ParsedBasicBlocks[BlockNo];
01099 
01100   std::vector<unsigned> Operands;
01101   while (moreInBlock())
01102     ParseInstruction(Operands, BB);
01103 
01104   if (Handler) Handler->handleBasicBlockEnd(BlockNo);
01105   return BB;
01106 }
01107 
01108 /// Parse all of the BasicBlock's & Instruction's in the body of a function.
01109 /// In post 1.0 bytecode files, we no longer emit basic block individually,
01110 /// in order to avoid per-basic-block overhead.
01111 /// @returns Rhe number of basic blocks encountered.
01112 unsigned BytecodeReader::ParseInstructionList(Function* F) {
01113   unsigned BlockNo = 0;
01114   std::vector<unsigned> Args;
01115 
01116   while (moreInBlock()) {
01117     if (Handler) Handler->handleBasicBlockBegin(BlockNo);
01118     BasicBlock *BB;
01119     if (ParsedBasicBlocks.size() == BlockNo)
01120       ParsedBasicBlocks.push_back(BB = new BasicBlock());
01121     else if (ParsedBasicBlocks[BlockNo] == 0)
01122       BB = ParsedBasicBlocks[BlockNo] = new BasicBlock();
01123     else
01124       BB = ParsedBasicBlocks[BlockNo];
01125     ++BlockNo;
01126     F->getBasicBlockList().push_back(BB);
01127 
01128     // Read instructions into this basic block until we get to a terminator
01129     while (moreInBlock() && !BB->getTerminator())
01130       ParseInstruction(Args, BB);
01131 
01132     if (!BB->getTerminator())
01133       error("Non-terminated basic block found!");
01134 
01135     if (Handler) Handler->handleBasicBlockEnd(BlockNo-1);
01136   }
01137 
01138   return BlockNo;
01139 }
01140 
01141 /// Parse a symbol table. This works for both module level and function
01142 /// level symbol tables.  For function level symbol tables, the CurrentFunction
01143 /// parameter must be non-zero and the ST parameter must correspond to
01144 /// CurrentFunction's symbol table. For Module level symbol tables, the
01145 /// CurrentFunction argument must be zero.
01146 void BytecodeReader::ParseSymbolTable(Function *CurrentFunction,
01147                                       SymbolTable *ST) {
01148   if (Handler) Handler->handleSymbolTableBegin(CurrentFunction,ST);
01149 
01150   // Allow efficient basic block lookup by number.
01151   std::vector<BasicBlock*> BBMap;
01152   if (CurrentFunction)
01153     for (Function::iterator I = CurrentFunction->begin(),
01154            E = CurrentFunction->end(); I != E; ++I)
01155       BBMap.push_back(I);
01156 
01157   /// In LLVM 1.3 we write types separately from values so
01158   /// The types are always first in the symbol table. This is
01159   /// because Type no longer derives from Value.
01160   if (!hasTypeDerivedFromValue) {
01161     // Symtab block header: [num entries]
01162     unsigned NumEntries = read_vbr_uint();
01163     for (unsigned i = 0; i < NumEntries; ++i) {
01164       // Symtab entry: [def slot #][name]
01165       unsigned slot = read_vbr_uint();
01166       std::string Name = read_str();
01167       const Type* T = getType(slot);
01168       ST->insert(Name, T);
01169     }
01170   }
01171 
01172   while (moreInBlock()) {
01173     // Symtab block header: [num entries][type id number]
01174     unsigned NumEntries = read_vbr_uint();
01175     unsigned Typ = 0;
01176     bool isTypeType = read_typeid(Typ);
01177     const Type *Ty = getType(Typ);
01178 
01179     for (unsigned i = 0; i != NumEntries; ++i) {
01180       // Symtab entry: [def slot #][name]
01181       unsigned slot = read_vbr_uint();
01182       std::string Name = read_str();
01183 
01184       // if we're reading a pre 1.3 bytecode file and the type plane
01185       // is the "type type", handle it here
01186       if (isTypeType) {
01187         const Type* T = getType(slot);
01188         if (T == 0)
01189           error("Failed type look-up for name '" + Name + "'");
01190         ST->insert(Name, T);
01191         continue; // code below must be short circuited
01192       } else {
01193         Value *V = 0;
01194         if (Typ == Type::LabelTyID) {
01195           if (slot < BBMap.size())
01196             V = BBMap[slot];
01197         } else {
01198           V = getValue(Typ, slot, false); // Find mapping...
01199         }
01200         if (V == 0)
01201           error("Failed value look-up for name '" + Name + "'");
01202         V->setName(Name);
01203       }
01204     }
01205   }
01206   checkPastBlockEnd("Symbol Table");
01207   if (Handler) Handler->handleSymbolTableEnd();
01208 }
01209 
01210 /// Read in the types portion of a compaction table.
01211 void BytecodeReader::ParseCompactionTypes(unsigned NumEntries) {
01212   for (unsigned i = 0; i != NumEntries; ++i) {
01213     unsigned TypeSlot = 0;
01214     if (read_typeid(TypeSlot))
01215       error("Invalid type in compaction table: type type");
01216     const Type *Typ = getGlobalTableType(TypeSlot);
01217     CompactionTypes.push_back(std::make_pair(Typ, TypeSlot));
01218     if (Handler) Handler->handleCompactionTableType(i, TypeSlot, Typ);
01219   }
01220 }
01221 
01222 /// Parse a compaction table.
01223 void BytecodeReader::ParseCompactionTable() {
01224 
01225   // Notify handler that we're beginning a compaction table.
01226   if (Handler) Handler->handleCompactionTableBegin();
01227 
01228   // In LLVM 1.3 Type no longer derives from Value. So,
01229   // we always write them first in the compaction table
01230   // because they can't occupy a "type plane" where the
01231   // Values reside.
01232   if (! hasTypeDerivedFromValue) {
01233     unsigned NumEntries = read_vbr_uint();
01234     ParseCompactionTypes(NumEntries);
01235   }
01236 
01237   // Compaction tables live in separate blocks so we have to loop
01238   // until we've read the whole thing.
01239   while (moreInBlock()) {
01240     // Read the number of Value* entries in the compaction table
01241     unsigned NumEntries = read_vbr_uint();
01242     unsigned Ty = 0;
01243     unsigned isTypeType = false;
01244 
01245     // Decode the type from value read in. Most compaction table
01246     // planes will have one or two entries in them. If that's the
01247     // case then the length is encoded in the bottom two bits and
01248     // the higher bits encode the type. This saves another VBR value.
01249     if ((NumEntries & 3) == 3) {
01250       // In this case, both low-order bits are set (value 3). This
01251       // is a signal that the typeid follows.
01252       NumEntries >>= 2;
01253       isTypeType = read_typeid(Ty);
01254     } else {
01255       // In this case, the low-order bits specify the number of entries
01256       // and the high order bits specify the type.
01257       Ty = NumEntries >> 2;
01258       isTypeType = sanitizeTypeId(Ty);
01259       NumEntries &= 3;
01260     }
01261 
01262     // if we're reading a pre 1.3 bytecode file and the type plane
01263     // is the "type type", handle it here
01264     if (isTypeType) {
01265       ParseCompactionTypes(NumEntries);
01266     } else {
01267       // Make sure we have enough room for the plane.
01268       if (Ty >= CompactionValues.size())
01269         CompactionValues.resize(Ty+1);
01270 
01271       // Make sure the plane is empty or we have some kind of error.
01272       if (!CompactionValues[Ty].empty())
01273         error("Compaction table plane contains multiple entries!");
01274 
01275       // Notify handler about the plane.
01276       if (Handler) Handler->handleCompactionTablePlane(Ty, NumEntries);
01277 
01278       // Push the implicit zero.
01279       CompactionValues[Ty].push_back(Constant::getNullValue(getType(Ty)));
01280 
01281       // Read in each of the entries, put them in the compaction table
01282       // and notify the handler that we have a new compaction table value.
01283       for (unsigned i = 0; i != NumEntries; ++i) {
01284         unsigned ValSlot = read_vbr_uint();
01285         Value *V = getGlobalTableValue(Ty, ValSlot);
01286         CompactionValues[Ty].push_back(V);
01287         if (Handler) Handler->handleCompactionTableValue(i, Ty, ValSlot);
01288       }
01289     }
01290   }
01291   // Notify handler that the compaction table is done.
01292   if (Handler) Handler->handleCompactionTableEnd();
01293 }
01294 
01295 // Parse a single type. The typeid is read in first. If its a primitive type
01296 // then nothing else needs to be read, we know how to instantiate it. If its
01297 // a derived type, then additional data is read to fill out the type
01298 // definition.
01299 const Type *BytecodeReader::ParseType() {
01300   unsigned PrimType = 0;
01301   if (read_typeid(PrimType))
01302     error("Invalid type (type type) in type constants!");
01303 
01304   const Type *Result = 0;
01305   if ((Result = Type::getPrimitiveType((Type::TypeID)PrimType)))
01306     return Result;
01307 
01308   switch (PrimType) {
01309   case Type::FunctionTyID: {
01310     const Type *RetType = readSanitizedType();
01311 
01312     unsigned NumParams = read_vbr_uint();
01313 
01314     std::vector<const Type*> Params;
01315     while (NumParams--)
01316       Params.push_back(readSanitizedType());
01317 
01318     bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
01319     if (isVarArg) Params.pop_back();
01320 
01321     Result = FunctionType::get(RetType, Params, isVarArg);
01322     break;
01323   }
01324   case Type::ArrayTyID: {
01325     const Type *ElementType = readSanitizedType();
01326     unsigned NumElements = read_vbr_uint();
01327     Result =  ArrayType::get(ElementType, NumElements);
01328     break;
01329   }
01330   case Type::PackedTyID: {
01331     const Type *ElementType = readSanitizedType();
01332     unsigned NumElements = read_vbr_uint();
01333     Result =  PackedType::get(ElementType, NumElements);
01334     break;
01335   }
01336   case Type::StructTyID: {
01337     std::vector<const Type*> Elements;
01338     unsigned Typ = 0;
01339     if (read_typeid(Typ))
01340       error("Invalid element type (type type) for structure!");
01341 
01342     while (Typ) {         // List is terminated by void/0 typeid
01343       Elements.push_back(getType(Typ));
01344       if (read_typeid(Typ))
01345         error("Invalid element type (type type) for structure!");
01346     }
01347 
01348     Result = StructType::get(Elements);
01349     break;
01350   }
01351   case Type::PointerTyID: {
01352     Result = PointerType::get(readSanitizedType());
01353     break;
01354   }
01355 
01356   case Type::OpaqueTyID: {
01357     Result = OpaqueType::get();
01358     break;
01359   }
01360 
01361   default:
01362     error("Don't know how to deserialize primitive type " + utostr(PrimType));
01363     break;
01364   }
01365   if (Handler) Handler->handleType(Result);
01366   return Result;
01367 }
01368 
01369 // ParseTypes - We have to use this weird code to handle recursive
01370 // types.  We know that recursive types will only reference the current slab of
01371 // values in the type plane, but they can forward reference types before they
01372 // have been read.  For example, Type #0 might be '{ Ty#1 }' and Type #1 might
01373 // be 'Ty#0*'.  When reading Type #0, type number one doesn't exist.  To fix
01374 // this ugly problem, we pessimistically insert an opaque type for each type we
01375 // are about to read.  This means that forward references will resolve to
01376 // something and when we reread the type later, we can replace the opaque type
01377 // with a new resolved concrete type.
01378 //
01379 void BytecodeReader::ParseTypes(TypeListTy &Tab, unsigned NumEntries){
01380   assert(Tab.size() == 0 && "should not have read type constants in before!");
01381 
01382   // Insert a bunch of opaque types to be resolved later...
01383   Tab.reserve(NumEntries);
01384   for (unsigned i = 0; i != NumEntries; ++i)
01385     Tab.push_back(OpaqueType::get());
01386 
01387   if (Handler)
01388     Handler->handleTypeList(NumEntries);
01389 
01390   // If we are about to resolve types, make sure the type cache is clear.
01391   if (NumEntries)
01392     ModuleTypeIDCache.clear();
01393   
01394   // Loop through reading all of the types.  Forward types will make use of the
01395   // opaque types just inserted.
01396   //
01397   for (unsigned i = 0; i != NumEntries; ++i) {
01398     const Type* NewTy = ParseType();
01399     const Type* OldTy = Tab[i].get();
01400     if (NewTy == 0)
01401       error("Couldn't parse type!");
01402 
01403     // Don't directly push the new type on the Tab. Instead we want to replace
01404     // the opaque type we previously inserted with the new concrete value. This
01405     // approach helps with forward references to types. The refinement from the
01406     // abstract (opaque) type to the new type causes all uses of the abstract
01407     // type to use the concrete type (NewTy). This will also cause the opaque
01408     // type to be deleted.
01409     cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy);
01410 
01411     // This should have replaced the old opaque type with the new type in the
01412     // value table... or with a preexisting type that was already in the system.
01413     // Let's just make sure it did.
01414     assert(Tab[i] != OldTy && "refineAbstractType didn't work!");
01415   }
01416 }
01417 
01418 /// Parse a single constant value
01419 Value *BytecodeReader::ParseConstantPoolValue(unsigned TypeID) {
01420   // We must check for a ConstantExpr before switching by type because
01421   // a ConstantExpr can be of any type, and has no explicit value.
01422   //
01423   // 0 if not expr; numArgs if is expr
01424   unsigned isExprNumArgs = read_vbr_uint();
01425 
01426   if (isExprNumArgs) {
01427     if (!hasNoUndefValue) {
01428       // 'undef' is encoded with 'exprnumargs' == 1.
01429       if (isExprNumArgs == 1)
01430         return UndefValue::get(getType(TypeID));
01431 
01432       // Inline asm is encoded with exprnumargs == ~0U.
01433       if (isExprNumArgs == ~0U) {
01434         std::string AsmStr = read_str();
01435         std::string ConstraintStr = read_str();
01436         unsigned Flags = read_vbr_uint();
01437         
01438         const PointerType *PTy = dyn_cast<PointerType>(getType(TypeID));
01439         const FunctionType *FTy = 
01440           PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
01441 
01442         if (!FTy || !InlineAsm::Verify(FTy, ConstraintStr))
01443           error("Invalid constraints for inline asm");
01444         if (Flags & ~1U)
01445           error("Invalid flags for inline asm");
01446         bool HasSideEffects = Flags & 1;
01447         return InlineAsm::get(FTy, AsmStr, ConstraintStr, HasSideEffects);
01448       }
01449       
01450       --isExprNumArgs;
01451     }
01452 
01453     // FIXME: Encoding of constant exprs could be much more compact!
01454     std::vector<Constant*> ArgVec;
01455     ArgVec.reserve(isExprNumArgs);
01456     unsigned Opcode = read_vbr_uint();
01457 
01458     // Bytecode files before LLVM 1.4 need have a missing terminator inst.
01459     if (hasNoUnreachableInst) Opcode++;
01460 
01461     // Read the slot number and types of each of the arguments
01462     for (unsigned i = 0; i != isExprNumArgs; ++i) {
01463       unsigned ArgValSlot = read_vbr_uint();
01464       unsigned ArgTypeSlot = 0;
01465       if (read_typeid(ArgTypeSlot))
01466         error("Invalid argument type (type type) for constant value");
01467 
01468       // Get the arg value from its slot if it exists, otherwise a placeholder
01469       ArgVec.push_back(getConstantValue(ArgTypeSlot, ArgValSlot));
01470     }
01471 
01472     // Construct a ConstantExpr of the appropriate kind
01473     if (isExprNumArgs == 1) {           // All one-operand expressions
01474       if (Opcode != Instruction::Cast)
01475         error("Only cast instruction has one argument for ConstantExpr");
01476 
01477       Constant* Result = ConstantExpr::getCast(ArgVec[0], getType(TypeID));
01478       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01479       return Result;
01480     } else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr
01481       std::vector<Constant*> IdxList(ArgVec.begin()+1, ArgVec.end());
01482 
01483       if (hasRestrictedGEPTypes) {
01484         const Type *BaseTy = ArgVec[0]->getType();
01485         generic_gep_type_iterator<std::vector<Constant*>::iterator>
01486           GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()),
01487           E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end());
01488         for (unsigned i = 0; GTI != E; ++GTI, ++i)
01489           if (isa<StructType>(*GTI)) {
01490             if (IdxList[i]->getType() != Type::UByteTy)
01491               error("Invalid index for getelementptr!");
01492             IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy);
01493           }
01494       }
01495 
01496       Constant* Result = ConstantExpr::getGetElementPtr(ArgVec[0], IdxList);
01497       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01498       return Result;
01499     } else if (Opcode == Instruction::Select) {
01500       if (ArgVec.size() != 3)
01501         error("Select instruction must have three arguments.");
01502       Constant* Result = ConstantExpr::getSelect(ArgVec[0], ArgVec[1],
01503                                                  ArgVec[2]);
01504       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01505       return Result;
01506     } else if (Opcode == Instruction::ExtractElement) {
01507       if (ArgVec.size() != 2 ||
01508           !ExtractElementInst::isValidOperands(ArgVec[0], ArgVec[1]))
01509         error("Invalid extractelement constand expr arguments");
01510       Constant* Result = ConstantExpr::getExtractElement(ArgVec[0], ArgVec[1]);
01511       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01512       return Result;
01513     } else if (Opcode == Instruction::InsertElement) {
01514       if (ArgVec.size() != 3 ||
01515           !InsertElementInst::isValidOperands(ArgVec[0], ArgVec[1], ArgVec[2]))
01516         error("Invalid insertelement constand expr arguments");
01517         
01518       Constant *Result = 
01519         ConstantExpr::getInsertElement(ArgVec[0], ArgVec[1], ArgVec[2]);
01520       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01521       return Result;
01522     } else if (Opcode == Instruction::ShuffleVector) {
01523       if (ArgVec.size() != 3 ||
01524           !ShuffleVectorInst::isValidOperands(ArgVec[0], ArgVec[1], ArgVec[2]))
01525         error("Invalid shufflevector constant expr arguments.");
01526       Constant *Result = 
01527         ConstantExpr::getShuffleVector(ArgVec[0], ArgVec[1], ArgVec[2]);
01528       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01529       return Result;
01530     } else {                            // All other 2-operand expressions
01531       Constant* Result = ConstantExpr::get(Opcode, ArgVec[0], ArgVec[1]);
01532       if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result);
01533       return Result;
01534     }
01535   }
01536 
01537   // Ok, not an ConstantExpr.  We now know how to read the given type...
01538   const Type *Ty = getType(TypeID);
01539   Constant *Result = 0;
01540   switch (Ty->getTypeID()) {
01541   case Type::BoolTyID: {
01542     unsigned Val = read_vbr_uint();
01543     if (Val != 0 && Val != 1)
01544       error("Invalid boolean value read.");
01545     Result = ConstantBool::get(Val == 1);
01546     if (Handler) Handler->handleConstantValue(Result);
01547     break;
01548   }
01549 
01550   case Type::UByteTyID:   // Unsigned integer types...
01551   case Type::UShortTyID:
01552   case Type::UIntTyID: {
01553     unsigned Val = read_vbr_uint();
01554     if (!ConstantUInt::isValueValidForType(Ty, Val))
01555       error("Invalid unsigned byte/short/int read.");
01556     Result = ConstantUInt::get(Ty, Val);
01557     if (Handler) Handler->handleConstantValue(Result);
01558     break;
01559   }
01560 
01561   case Type::ULongTyID:
01562     Result = ConstantUInt::get(Ty, read_vbr_uint64());
01563     if (Handler) Handler->handleConstantValue(Result);
01564     break;
01565     
01566   case Type::SByteTyID:   // Signed integer types...
01567   case Type::ShortTyID:
01568   case Type::IntTyID:
01569   case Type::LongTyID: {
01570     int64_t Val = read_vbr_int64();
01571     if (!ConstantSInt::isValueValidForType(Ty, Val))
01572       error("Invalid signed byte/short/int/long read.");
01573     Result = ConstantSInt::get(Ty, Val);
01574     if (Handler) Handler->handleConstantValue(Result);
01575     break;
01576   }
01577 
01578   case Type::FloatTyID: {
01579     float Val;
01580     read_float(Val);
01581     Result = ConstantFP::get(Ty, Val);
01582     if (Handler) Handler->handleConstantValue(Result);
01583     break;
01584   }
01585 
01586   case Type::DoubleTyID: {
01587     double Val;
01588     read_double(Val);
01589     Result = ConstantFP::get(Ty, Val);
01590     if (Handler) Handler->handleConstantValue(Result);
01591     break;
01592   }
01593 
01594   case Type::ArrayTyID: {
01595     const ArrayType *AT = cast<ArrayType>(Ty);
01596     unsigned NumElements = AT->getNumElements();
01597     unsigned TypeSlot = getTypeSlot(AT->getElementType());
01598     std::vector<Constant*> Elements;
01599     Elements.reserve(NumElements);
01600     while (NumElements--)     // Read all of the elements of the constant.
01601       Elements.push_back(getConstantValue(TypeSlot,
01602                                           read_vbr_uint()));
01603     Result = ConstantArray::get(AT, Elements);
01604     if (Handler) Handler->handleConstantArray(AT, Elements, TypeSlot, Result);
01605     break;
01606   }
01607 
01608   case Type::StructTyID: {
01609     const StructType *ST = cast<StructType>(Ty);
01610 
01611     std::vector<Constant *> Elements;
01612     Elements.reserve(ST->getNumElements());
01613     for (unsigned i = 0; i != ST->getNumElements(); ++i)
01614       Elements.push_back(getConstantValue(ST->getElementType(i),
01615                                           read_vbr_uint()));
01616 
01617     Result = ConstantStruct::get(ST, Elements);
01618     if (Handler) Handler->handleConstantStruct(ST, Elements, Result);
01619     break;
01620   }
01621 
01622   case Type::PackedTyID: {
01623     const PackedType *PT = cast<PackedType>(Ty);
01624     unsigned NumElements = PT->getNumElements();
01625     unsigned TypeSlot = getTypeSlot(PT->getElementType());
01626     std::vector<Constant*> Elements;
01627     Elements.reserve(NumElements);
01628     while (NumElements--)     // Read all of the elements of the constant.
01629       Elements.push_back(getConstantValue(TypeSlot,
01630                                           read_vbr_uint()));
01631     Result = ConstantPacked::get(PT, Elements);
01632     if (Handler) Handler->handleConstantPacked(PT, Elements, TypeSlot, Result);
01633     break;
01634   }
01635 
01636   case Type::PointerTyID: {  // ConstantPointerRef value (backwards compat).
01637     const PointerType *PT = cast<PointerType>(Ty);
01638     unsigned Slot = read_vbr_uint();
01639 
01640     // Check to see if we have already read this global variable...
01641     Value *Val = getValue(TypeID, Slot, false);
01642     if (Val) {
01643       GlobalValue *GV = dyn_cast<GlobalValue>(Val);
01644       if (!GV) error("GlobalValue not in ValueTable!");
01645       if (Handler) Handler->handleConstantPointer(PT, Slot, GV);
01646       return GV;
01647     } else {
01648       error("Forward references are not allowed here.");
01649     }
01650   }
01651 
01652   default:
01653     error("Don't know how to deserialize constant value of type '" +
01654                       Ty->getDescription());
01655     break;
01656   }
01657   
01658   // Check that we didn't read a null constant if they are implicit for this
01659   // type plane.  Do not do this check for constantexprs, as they may be folded
01660   // to a null value in a way that isn't predicted when a .bc file is initially
01661   // produced.
01662   assert((!isa<Constant>(Result) || !cast<Constant>(Result)->isNullValue()) ||
01663          !hasImplicitNull(TypeID) &&
01664          "Cannot read null values from bytecode!");
01665   return Result;
01666 }
01667 
01668 /// Resolve references for constants. This function resolves the forward
01669 /// referenced constants in the ConstantFwdRefs map. It uses the
01670 /// replaceAllUsesWith method of Value class to substitute the placeholder
01671 /// instance with the actual instance.
01672 void BytecodeReader::ResolveReferencesToConstant(Constant *NewV, unsigned Typ,
01673                                                  unsigned Slot) {
01674   ConstantRefsType::iterator I =
01675     ConstantFwdRefs.find(std::make_pair(Typ, Slot));
01676   if (I == ConstantFwdRefs.end()) return;   // Never forward referenced?
01677 
01678   Value *PH = I->second;   // Get the placeholder...
01679   PH->replaceAllUsesWith(NewV);
01680   delete PH;                               // Delete the old placeholder
01681   ConstantFwdRefs.erase(I);                // Remove the map entry for it
01682 }
01683 
01684 /// Parse the constant strings section.
01685 void BytecodeReader::ParseStringConstants(unsigned NumEntries, ValueTable &Tab){
01686   for (; NumEntries; --NumEntries) {
01687     unsigned Typ = 0;
01688     if (read_typeid(Typ))
01689       error("Invalid type (type type) for string constant");
01690     const Type *Ty = getType(Typ);
01691     if (!isa<ArrayType>(Ty))
01692       error("String constant data invalid!");
01693 
01694     const ArrayType *ATy = cast<ArrayType>(Ty);
01695     if (ATy->getElementType() != Type::SByteTy &&
01696         ATy->getElementType() != Type::UByteTy)
01697       error("String constant data invalid!");
01698 
01699     // Read character data.  The type tells us how long the string is.
01700     char *Data = reinterpret_cast<char *>(alloca(ATy->getNumElements()));
01701     read_data(Data, Data+ATy->getNumElements());
01702 
01703     std::vector<Constant*> Elements(ATy->getNumElements());
01704     if (ATy->getElementType() == Type::SByteTy)
01705       for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
01706         Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]);
01707     else
01708       for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
01709         Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]);
01710 
01711     // Create the constant, inserting it as needed.
01712     Constant *C = ConstantArray::get(ATy, Elements);
01713     unsigned Slot = insertValue(C, Typ, Tab);
01714     ResolveReferencesToConstant(C, Typ, Slot);
01715     if (Handler) Handler->handleConstantString(cast<ConstantArray>(C));
01716   }
01717 }
01718 
01719 /// Parse the constant pool.
01720 void BytecodeReader::ParseConstantPool(ValueTable &Tab,
01721                                        TypeListTy &TypeTab,
01722                                        bool isFunction) {
01723   if (Handler) Handler->handleGlobalConstantsBegin();
01724 
01725   /// In LLVM 1.3 Type does not derive from Value so the types
01726   /// do not occupy a plane. Consequently, we read the types
01727   /// first in the constant pool.
01728   if (isFunction && !hasTypeDerivedFromValue) {
01729     unsigned NumEntries = read_vbr_uint();
01730     ParseTypes(TypeTab, NumEntries);
01731   }
01732 
01733   while (moreInBlock()) {
01734     unsigned NumEntries = read_vbr_uint();
01735     unsigned Typ = 0;
01736     bool isTypeType = read_typeid(Typ);
01737 
01738     /// In LLVM 1.2 and before, Types were written to the
01739     /// bytecode file in the "Type Type" plane (#12).
01740     /// In 1.3 plane 12 is now the label plane.  Handle this here.
01741     if (isTypeType) {
01742       ParseTypes(TypeTab, NumEntries);
01743     } else if (Typ == Type::VoidTyID) {
01744       /// Use of Type::VoidTyID is a misnomer. It actually means
01745       /// that the following plane is constant strings
01746       assert(&Tab == &ModuleValues && "Cannot read strings in functions!");
01747       ParseStringConstants(NumEntries, Tab);
01748     } else {
01749       for (unsigned i = 0; i < NumEntries; ++i) {
01750         Value *V = ParseConstantPoolValue(Typ);
01751         assert(V && "ParseConstantPoolValue returned NULL!");
01752         unsigned Slot = insertValue(V, Typ, Tab);
01753 
01754         // If we are reading a function constant table, make sure that we adjust
01755         // the slot number to be the real global constant number.
01756         //
01757         if (&Tab != &ModuleValues && Typ < ModuleValues.size() &&
01758             ModuleValues[Typ])
01759           Slot += ModuleValues[Typ]->size();
01760         if (Constant *C = dyn_cast<Constant>(V))
01761           ResolveReferencesToConstant(C, Typ, Slot);
01762       }
01763     }
01764   }
01765 
01766   // After we have finished parsing the constant pool, we had better not have
01767   // any dangling references left.
01768   if (!ConstantFwdRefs.empty()) {
01769     ConstantRefsType::const_iterator I = ConstantFwdRefs.begin();
01770     Constant* missingConst = I->second;
01771     error(utostr(ConstantFwdRefs.size()) +
01772           " unresolved constant reference exist. First one is '" +
01773           missingConst->getName() + "' of type '" +
01774           missingConst->getType()->getDescription() + "'.");
01775   }
01776 
01777   checkPastBlockEnd("Constant Pool");
01778   if (Handler) Handler->handleGlobalConstantsEnd();
01779 }
01780 
01781 /// Parse the contents of a function. Note that this function can be
01782 /// called lazily by materializeFunction
01783 /// @see materializeFunction
01784 void BytecodeReader::ParseFunctionBody(Function* F) {
01785 
01786   unsigned FuncSize = BlockEnd - At;
01787   GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage;
01788 
01789   unsigned LinkageType = read_vbr_uint();
01790   switch (LinkageType) {
01791   case 0: Linkage = GlobalValue::ExternalLinkage; break;
01792   case 1: Linkage = GlobalValue::WeakLinkage; break;
01793   case 2: Linkage = GlobalValue::AppendingLinkage; break;
01794   case 3: Linkage = GlobalValue::InternalLinkage; break;
01795   case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
01796   default:
01797     error("Invalid linkage type for Function.");
01798     Linkage = GlobalValue::InternalLinkage;
01799     break;
01800   }
01801 
01802   F->setLinkage(Linkage);
01803   if (Handler) Handler->handleFunctionBegin(F,FuncSize);
01804 
01805   // Keep track of how many basic blocks we have read in...
01806   unsigned BlockNum = 0;
01807   bool InsertedArguments = false;
01808 
01809   BufPtr MyEnd = BlockEnd;
01810   while (At < MyEnd) {
01811     unsigned Type, Size;
01812     BufPtr OldAt = At;
01813     read_block(Type, Size);
01814 
01815     switch (Type) {
01816     case BytecodeFormat::ConstantPoolBlockID:
01817       if (!InsertedArguments) {
01818         // Insert arguments into the value table before we parse the first basic
01819         // block in the function, but after we potentially read in the
01820         // compaction table.
01821         insertArguments(F);
01822         InsertedArguments = true;
01823       }
01824 
01825       ParseConstantPool(FunctionValues, FunctionTypes, true);
01826       break;
01827 
01828     case BytecodeFormat::CompactionTableBlockID:
01829       ParseCompactionTable();
01830       break;
01831 
01832     case BytecodeFormat::BasicBlock: {
01833       if (!InsertedArguments) {
01834         // Insert arguments into the value table before we parse the first basic
01835         // block in the function, but after we potentially read in the
01836         // compaction table.
01837         insertArguments(F);
01838         InsertedArguments = true;
01839       }
01840 
01841       BasicBlock *BB = ParseBasicBlock(BlockNum++);
01842       F->getBasicBlockList().push_back(BB);
01843       break;
01844     }
01845 
01846     case BytecodeFormat::InstructionListBlockID: {
01847       // Insert arguments into the value table before we parse the instruction
01848       // list for the function, but after we potentially read in the compaction
01849       // table.
01850       if (!InsertedArguments) {
01851         insertArguments(F);
01852         InsertedArguments = true;
01853       }
01854 
01855       if (BlockNum)
01856         error("Already parsed basic blocks!");
01857       BlockNum = ParseInstructionList(F);
01858       break;
01859     }
01860 
01861     case BytecodeFormat::SymbolTableBlockID:
01862       ParseSymbolTable(F, &F->getSymbolTable());
01863       break;
01864 
01865     default:
01866       At += Size;
01867       if (OldAt > At)
01868         error("Wrapped around reading bytecode.");
01869       break;
01870     }
01871     BlockEnd = MyEnd;
01872 
01873     // Malformed bc file if read past end of block.
01874     align32();
01875   }
01876 
01877   // Make sure there were no references to non-existant basic blocks.
01878   if (BlockNum != ParsedBasicBlocks.size())
01879     error("Illegal basic block operand reference");
01880 
01881   ParsedBasicBlocks.clear();
01882 
01883   // Resolve forward references.  Replace any uses of a forward reference value
01884   // with the real value.
01885   while (!ForwardReferences.empty()) {
01886     std::map<std::pair<unsigned,unsigned>, Value*>::iterator
01887       I = ForwardReferences.begin();
01888     Value *V = getValue(I->first.first, I->first.second, false);
01889     Value *PlaceHolder = I->second;
01890     PlaceHolder->replaceAllUsesWith(V);
01891     ForwardReferences.erase(I);
01892     delete PlaceHolder;
01893   }
01894 
01895   // If upgraded intrinsic functions were detected during reading of the 
01896   // module information, then we need to look for instructions that need to
01897   // be upgraded. This can't be done while the instructions are read in because
01898   // additional instructions inserted mess up the slot numbering.
01899   if (!upgradedFunctions.empty()) {
01900     for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) 
01901       for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); 
01902            II != IE;)
01903         if (CallInst* CI = dyn_cast<CallInst>(II++)) {
01904           std::map<Function*,Function*>::iterator FI = 
01905             upgradedFunctions.find(CI->getCalledFunction());
01906           if (FI != upgradedFunctions.end())
01907             UpgradeIntrinsicCall(CI, FI->second);
01908         }
01909   }
01910 
01911   // Clear out function-level types...
01912   FunctionTypes.clear();
01913   CompactionTypes.clear();
01914   CompactionValues.clear();
01915   freeTable(FunctionValues);
01916 
01917   if (Handler) Handler->handleFunctionEnd(F);
01918 }
01919 
01920 /// This function parses LLVM functions lazily. It obtains the type of the
01921 /// function and records where the body of the function is in the bytecode
01922 /// buffer. The caller can then use the ParseNextFunction and
01923 /// ParseAllFunctionBodies to get handler events for the functions.
01924 void BytecodeReader::ParseFunctionLazily() {
01925   if (FunctionSignatureList.empty())
01926     error("FunctionSignatureList empty!");
01927 
01928   Function *Func = FunctionSignatureList.back();
01929   FunctionSignatureList.pop_back();
01930 
01931   // Save the information for future reading of the function
01932   LazyFunctionLoadMap[Func] = LazyFunctionInfo(BlockStart, BlockEnd);
01933 
01934   // This function has a body but it's not loaded so it appears `External'.
01935   // Mark it as a `Ghost' instead to notify the users that it has a body.
01936   Func->setLinkage(GlobalValue::GhostLinkage);
01937 
01938   // Pretend we've `parsed' this function
01939   At = BlockEnd;
01940 }
01941 
01942 /// The ParserFunction method lazily parses one function. Use this method to
01943 /// casue the parser to parse a specific function in the module. Note that
01944 /// this will remove the function from what is to be included by
01945 /// ParseAllFunctionBodies.
01946 /// @see ParseAllFunctionBodies
01947 /// @see ParseBytecode
01948 void BytecodeReader::ParseFunction(Function* Func) {
01949   // Find {start, end} pointers and slot in the map. If not there, we're done.
01950   LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(Func);
01951 
01952   // Make sure we found it
01953   if (Fi == LazyFunctionLoadMap.end()) {
01954     error("Unrecognized function of type " + Func->getType()->getDescription());
01955     return;
01956   }
01957 
01958   BlockStart = At = Fi->second.Buf;
01959   BlockEnd = Fi->second.EndBuf;
01960   assert(Fi->first == Func && "Found wrong function?");
01961 
01962   LazyFunctionLoadMap.erase(Fi);
01963 
01964   this->ParseFunctionBody(Func);
01965 }
01966 
01967 /// The ParseAllFunctionBodies method parses through all the previously
01968 /// unparsed functions in the bytecode file. If you want to completely parse
01969 /// a bytecode file, this method should be called after Parsebytecode because
01970 /// Parsebytecode only records the locations in the bytecode file of where
01971 /// the function definitions are located. This function uses that information
01972 /// to materialize the functions.
01973 /// @see ParseBytecode
01974 void BytecodeReader::ParseAllFunctionBodies() {
01975   LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin();
01976   LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end();
01977 
01978   while (Fi != Fe) {
01979     Function* Func = Fi->first;
01980     BlockStart = At = Fi->second.Buf;
01981     BlockEnd = Fi->second.EndBuf;
01982     ParseFunctionBody(Func);
01983     ++Fi;
01984   }
01985   LazyFunctionLoadMap.clear();
01986 
01987 }
01988 
01989 /// Parse the global type list
01990 void BytecodeReader::ParseGlobalTypes() {
01991   // Read the number of types
01992   unsigned NumEntries = read_vbr_uint();
01993 
01994   // Ignore the type plane identifier for types if the bc file is pre 1.3
01995   if (hasTypeDerivedFromValue)
01996     read_vbr_uint();
01997 
01998   ParseTypes(ModuleTypes, NumEntries);
01999 }
02000 
02001 /// Parse the Global info (types, global vars, constants)
02002 void BytecodeReader::ParseModuleGlobalInfo() {
02003 
02004   if (Handler) Handler->handleModuleGlobalsBegin();
02005 
02006   // SectionID - If a global has an explicit section specified, this map
02007   // remembers the ID until we can translate it into a string.
02008   std::map<GlobalValue*, unsigned> SectionID;
02009   
02010   // Read global variables...
02011   unsigned VarType = read_vbr_uint();
02012   while (VarType != Type::VoidTyID) { // List is terminated by Void
02013     // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
02014     // Linkage, bit4+ = slot#
02015     unsigned SlotNo = VarType >> 5;
02016     if (sanitizeTypeId(SlotNo))
02017       error("Invalid type (type type) for global var!");
02018     unsigned LinkageID = (VarType >> 2) & 7;
02019     bool isConstant = VarType & 1;
02020     bool hasInitializer = (VarType & 2) != 0;
02021     unsigned Alignment = 0;
02022     unsigned GlobalSectionID = 0;
02023     
02024     // An extension word is present when linkage = 3 (internal) and hasinit = 0.
02025     if (LinkageID == 3 && !hasInitializer) {
02026       unsigned ExtWord = read_vbr_uint();
02027       // The extension word has this format: bit 0 = has initializer, bit 1-3 =
02028       // linkage, bit 4-8 = alignment (log2), bits 10+ = future use.
02029       hasInitializer = ExtWord & 1;
02030       LinkageID = (ExtWord >> 1) & 7;
02031       Alignment = (1 << ((ExtWord >> 4) & 31)) >> 1;
02032       
02033       if (ExtWord & (1 << 9))  // Has a section ID.
02034         GlobalSectionID = read_vbr_uint();
02035     }
02036 
02037     GlobalValue::LinkageTypes Linkage;
02038     switch (LinkageID) {
02039     case 0: Linkage = GlobalValue::ExternalLinkage;  break;
02040     case 1: Linkage = GlobalValue::WeakLinkage;      break;
02041     case 2: Linkage = GlobalValue::AppendingLinkage; break;
02042     case 3: Linkage = GlobalValue::InternalLinkage;  break;
02043     case 4: Linkage = GlobalValue::LinkOnceLinkage;  break;
02044     default:
02045       error("Unknown linkage type: " + utostr(LinkageID));
02046       Linkage = GlobalValue::InternalLinkage;
02047       break;
02048     }
02049 
02050     const Type *Ty = getType(SlotNo);
02051     if (!Ty)
02052       error("Global has no type! SlotNo=" + utostr(SlotNo));
02053 
02054     if (!isa<PointerType>(Ty))
02055       error("Global not a pointer type! Ty= " + Ty->getDescription());
02056 
02057     const Type *ElTy = cast<PointerType>(Ty)->getElementType();
02058 
02059     // Create the global variable...
02060     GlobalVariable *GV = new GlobalVariable(ElTy, isConstant, Linkage,
02061                                             0, "", TheModule);
02062     GV->setAlignment(Alignment);
02063     insertValue(GV, SlotNo, ModuleValues);
02064 
02065     if (GlobalSectionID != 0)
02066       SectionID[GV] = GlobalSectionID;
02067 
02068     unsigned initSlot = 0;
02069     if (hasInitializer) {
02070       initSlot = read_vbr_uint();
02071       GlobalInits.push_back(std::make_pair(GV, initSlot));
02072     }
02073 
02074     // Notify handler about the global value.
02075     if (Handler)
02076       Handler->handleGlobalVariable(ElTy, isConstant, Linkage, SlotNo,initSlot);
02077 
02078     // Get next item
02079     VarType = read_vbr_uint();
02080   }
02081 
02082   // Read the function objects for all of the functions that are coming
02083   unsigned FnSignature = read_vbr_uint();
02084 
02085   if (hasNoFlagsForFunctions)
02086     FnSignature = (FnSignature << 5) + 1;
02087 
02088   // List is terminated by VoidTy.
02089   while (((FnSignature & (~0U >> 1)) >> 5) != Type::VoidTyID) {
02090     const Type *Ty = getType((FnSignature & (~0U >> 1)) >> 5);
02091     if (!isa<PointerType>(Ty) ||
02092         !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
02093       error("Function not a pointer to function type! Ty = " +
02094             Ty->getDescription());
02095     }
02096 
02097     // We create functions by passing the underlying FunctionType to create...
02098     const FunctionType* FTy =
02099       cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
02100 
02101     // Insert the place holder.
02102     Function *Func = new Function(FTy, GlobalValue::ExternalLinkage,
02103                                   "", TheModule);
02104 
02105     insertValue(Func, (FnSignature & (~0U >> 1)) >> 5, ModuleValues);
02106 
02107     // Flags are not used yet.
02108     unsigned Flags = FnSignature & 31;
02109 
02110     // Save this for later so we know type of lazily instantiated functions.
02111     // Note that known-external functions do not have FunctionInfo blocks, so we
02112     // do not add them to the FunctionSignatureList.
02113     if ((Flags & (1 << 4)) == 0)
02114       FunctionSignatureList.push_back(Func);
02115 
02116     // Get the calling convention from the low bits.
02117     unsigned CC = Flags & 15;
02118     unsigned Alignment = 0;
02119     if (FnSignature & (1 << 31)) {  // Has extension word?
02120       unsigned ExtWord = read_vbr_uint();
02121       Alignment = (1 << (ExtWord & 31)) >> 1;
02122       CC |= ((ExtWord >> 5) & 15) << 4;
02123       
02124       if (ExtWord & (1 << 10))  // Has a section ID.
02125         SectionID[Func] = read_vbr_uint();
02126     }
02127     
02128     Func->setCallingConv(CC-1);
02129     Func->setAlignment(Alignment);
02130 
02131     if (Handler) Handler->handleFunctionDeclaration(Func);
02132 
02133     // Get the next function signature.
02134     FnSignature = read_vbr_uint();
02135     if (hasNoFlagsForFunctions)
02136       FnSignature = (FnSignature << 5) + 1;
02137   }
02138 
02139   // Now that the function signature list is set up, reverse it so that we can
02140   // remove elements efficiently from the back of the vector.
02141   std::reverse(FunctionSignatureList.begin(), FunctionSignatureList.end());
02142 
02143   /// SectionNames - This contains the list of section names encoded in the
02144   /// moduleinfoblock.  Functions and globals with an explicit section index
02145   /// into this to get their section name.
02146   std::vector<std::string> SectionNames;
02147   
02148   if (hasInconsistentModuleGlobalInfo) {
02149     align32();
02150   } else if (!hasNoDependentLibraries) {
02151     // If this bytecode format has dependent library information in it, read in
02152     // the number of dependent library items that follow.
02153     unsigned num_dep_libs = read_vbr_uint();
02154     std::string dep_lib;
02155     while (num_dep_libs--) {
02156       dep_lib = read_str();
02157       TheModule->addLibrary(dep_lib);
02158       if (Handler)
02159         Handler->handleDependentLibrary(dep_lib);
02160     }
02161 
02162     // Read target triple and place into the module.
02163     std::string triple = read_str();
02164     TheModule->setTargetTriple(triple);
02165     if (Handler)
02166       Handler->handleTargetTriple(triple);
02167     
02168     if (!hasAlignment && At != BlockEnd) {
02169       // If the file has section info in it, read the section names now.
02170       unsigned NumSections = read_vbr_uint();
02171       while (NumSections--)
02172         SectionNames.push_back(read_str());
02173     }
02174     
02175     // If the file has module-level inline asm, read it now.
02176     if (!hasAlignment && At != BlockEnd)
02177       TheModule->setModuleInlineAsm(read_str());
02178   }
02179 
02180   // If any globals are in specified sections, assign them now.
02181   for (std::map<GlobalValue*, unsigned>::iterator I = SectionID.begin(), E =
02182        SectionID.end(); I != E; ++I)
02183     if (I->second) {
02184       if (I->second > SectionID.size())
02185         error("SectionID out of range for global!");
02186       I->first->setSection(SectionNames[I->second-1]);
02187     }
02188 
02189   // This is for future proofing... in the future extra fields may be added that
02190   // we don't understand, so we transparently ignore them.
02191   //
02192   At = BlockEnd;
02193 
02194   if (Handler) Handler->handleModuleGlobalsEnd();
02195 }
02196 
02197 /// Parse the version information and decode it by setting flags on the
02198 /// Reader that enable backward compatibility of the reader.
02199 void BytecodeReader::ParseVersionInfo() {
02200   unsigned Version = read_vbr_uint();
02201 
02202   // Unpack version number: low four bits are for flags, top bits = version
02203   Module::Endianness  Endianness;
02204   Module::PointerSize PointerSize;
02205   Endianness  = (Version & 1) ? Module::BigEndian : Module::LittleEndian;
02206   PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32;
02207 
02208   bool hasNoEndianness = Version & 4;
02209   bool hasNoPointerSize = Version & 8;
02210 
02211   RevisionNum = Version >> 4;
02212 
02213   // Default values for the current bytecode version
02214   hasInconsistentModuleGlobalInfo = false;
02215   hasExplicitPrimitiveZeros = false;
02216   hasRestrictedGEPTypes = false;
02217   hasTypeDerivedFromValue = false;
02218   hasLongBlockHeaders = false;
02219   has32BitTypes = false;
02220   hasNoDependentLibraries = false;
02221   hasAlignment = false;
02222   hasNoUndefValue = false;
02223   hasNoFlagsForFunctions = false;
02224   hasNoUnreachableInst = false;
02225 
02226   switch (RevisionNum) {
02227   case 0:               //  LLVM 1.0, 1.1 (Released)
02228     // Base LLVM 1.0 bytecode format.
02229     hasInconsistentModuleGlobalInfo = true;
02230     hasExplicitPrimitiveZeros = true;
02231 
02232     // FALL THROUGH
02233 
02234   case 1:               // LLVM 1.2 (Released)
02235     // LLVM 1.2 added explicit support for emitting strings efficiently.
02236 
02237     // Also, it fixed the problem where the size of the ModuleGlobalInfo block
02238     // included the size for the alignment at the end, where the rest of the
02239     // blocks did not.
02240 
02241     // LLVM 1.2 and before required that GEP indices be ubyte constants for
02242     // structures and longs for sequential types.
02243     hasRestrictedGEPTypes = true;
02244 
02245     // LLVM 1.2 and before had the Type class derive from Value class. This
02246     // changed in release 1.3 and consequently LLVM 1.3 bytecode files are
02247     // written differently because Types can no longer be part of the
02248     // type planes for Values.
02249     hasTypeDerivedFromValue = true;
02250 
02251     // FALL THROUGH
02252 
02253   case 2:                // 1.2.5 (Not Released)
02254 
02255     // LLVM 1.2 and earlier had two-word block headers. This is a bit wasteful,
02256     // especially for small files where the 8 bytes per block is a large
02257     // fraction of the total block size. In LLVM 1.3, the block type and length
02258     // are compressed into a single 32-bit unsigned integer. 27 bits for length,
02259     // 5 bits for block type.
02260     hasLongBlockHeaders = true;
02261 
02262     // LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
02263     // this has been reduced to vbr_uint24. It shouldn't make much difference
02264     // since we haven't run into a module with > 24 million types, but for
02265     // safety the 24-bit restriction has been enforced in 1.3 to free some bits
02266     // in various places and to ensure consistency.
02267     has32BitTypes = true;
02268 
02269     // LLVM 1.2 and earlier did not provide a target triple nor a list of
02270     // libraries on which the bytecode is dependent. LLVM 1.3 provides these
02271     // features, for use in future versions of LLVM.
02272     hasNoDependentLibraries = true;
02273 
02274     // FALL THROUGH
02275 
02276   case 3:               // LLVM 1.3 (Released)
02277     // LLVM 1.3 and earlier caused alignment bytes to be written on some block
02278     // boundaries and at the end of some strings. In extreme cases (e.g. lots
02279     // of GEP references to a constant array), this can increase the file size
02280     // by 30% or more. In version 1.4 alignment is done away with completely.
02281     hasAlignment = true;
02282 
02283     // FALL THROUGH
02284 
02285   case 4:               // 1.3.1 (Not Released)
02286     // In version 4, we did not support the 'undef' constant.
02287     hasNoUndefValue = true;
02288 
02289     // In version 4 and above, we did not include space for flags for functions
02290     // in the module info block.
02291     hasNoFlagsForFunctions = true;
02292 
02293     // In version 4 and above, we did not include the 'unreachable' instruction
02294     // in the opcode numbering in the bytecode file.
02295     hasNoUnreachableInst = true;
02296     break;
02297 
02298     // FALL THROUGH
02299 
02300   case 5:               // 1.4 (Released)
02301     break;
02302 
02303   default:
02304     error("Unknown bytecode version number: " + itostr(RevisionNum));
02305   }
02306 
02307   if (hasNoEndianness) Endianness  = Module::AnyEndianness;
02308   if (hasNoPointerSize) PointerSize = Module::AnyPointerSize;
02309 
02310   TheModule->setEndianness(Endianness);
02311   TheModule->setPointerSize(PointerSize);
02312 
02313   if (Handler) Handler->handleVersionInfo(RevisionNum, Endianness, PointerSize);
02314 }
02315 
02316 /// Parse a whole module.
02317 void BytecodeReader::ParseModule() {
02318   unsigned Type, Size;
02319 
02320   FunctionSignatureList.clear(); // Just in case...
02321 
02322   // Read into instance variables...
02323   ParseVersionInfo();
02324   align32();
02325 
02326   bool SeenModuleGlobalInfo = false;
02327   bool SeenGlobalTypePlane = false;
02328   BufPtr MyEnd = BlockEnd;
02329   while (At < MyEnd) {
02330     BufPtr OldAt = At;
02331     read_block(Type, Size);
02332 
02333     switch (Type) {
02334 
02335     case BytecodeFormat::GlobalTypePlaneBlockID:
02336       if (SeenGlobalTypePlane)
02337         error("Two GlobalTypePlane Blocks Encountered!");
02338 
02339       if (Size > 0)
02340         ParseGlobalTypes();
02341       SeenGlobalTypePlane = true;
02342       break;
02343 
02344     case BytecodeFormat::ModuleGlobalInfoBlockID:
02345       if (SeenModuleGlobalInfo)
02346         error("Two ModuleGlobalInfo Blocks Encountered!");
02347       ParseModuleGlobalInfo();
02348       SeenModuleGlobalInfo = true;
02349       break;
02350 
02351     case BytecodeFormat::ConstantPoolBlockID:
02352       ParseConstantPool(ModuleValues, ModuleTypes,false);
02353       break;
02354 
02355     case BytecodeFormat::FunctionBlockID:
02356       ParseFunctionLazily();
02357       break;
02358 
02359     case BytecodeFormat::SymbolTableBlockID:
02360       ParseSymbolTable(0, &TheModule->getSymbolTable());
02361       break;
02362 
02363     default:
02364       At += Size;
02365       if (OldAt > At) {
02366         error("Unexpected Block of Type #" + utostr(Type) + " encountered!");
02367       }
02368       break;
02369     }
02370     BlockEnd = MyEnd;
02371     align32();
02372   }
02373 
02374   // After the module constant pool has been read, we can safely initialize
02375   // global variables...
02376   while (!GlobalInits.empty()) {
02377     GlobalVariable *GV = GlobalInits.back().first;
02378     unsigned Slot = GlobalInits.back().second;
02379     GlobalInits.pop_back();
02380 
02381     // Look up the initializer value...
02382     // FIXME: Preserve this type ID!
02383 
02384     const llvm::PointerType* GVType = GV->getType();
02385     unsigned TypeSlot = getTypeSlot(GVType->getElementType());
02386     if (Constant *CV = getConstantValue(TypeSlot, Slot)) {
02387       if (GV->hasInitializer())
02388         error("Global *already* has an initializer?!");
02389       if (Handler) Handler->handleGlobalInitializer(GV,CV);
02390       GV->setInitializer(CV);
02391     } else
02392       error("Cannot find initializer value.");
02393   }
02394 
02395   if (!ConstantFwdRefs.empty())
02396     error("Use of undefined constants in a module");
02397 
02398   /// Make sure we pulled them all out. If we didn't then there's a declaration
02399   /// but a missing body. That's not allowed.
02400   if (!FunctionSignatureList.empty())
02401     error("Function declared, but bytecode stream ended before definition");
02402 }
02403 
02404 /// This function completely parses a bytecode buffer given by the \p Buf
02405 /// and \p Length parameters.
02406 void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length,
02407                                    const std::string &ModuleID) {
02408 
02409   try {
02410     RevisionNum = 0;
02411     At = MemStart = BlockStart = Buf;
02412     MemEnd = BlockEnd = Buf + Length;
02413 
02414     // Create the module
02415     TheModule = new Module(ModuleID);
02416 
02417     if (Handler) Handler->handleStart(TheModule, Length);
02418 
02419     // Read the four bytes of the signature.
02420     unsigned Sig = read_uint();
02421 
02422     // If this is a compressed file
02423     if (Sig == ('l' | ('l' << 8) | ('v' << 16) | ('c' << 24))) {
02424 
02425       // Invoke the decompression of the bytecode. Note that we have to skip the
02426       // file's magic number which is not part of the compressed block. Hence,
02427       // the Buf+4 and Length-4. The result goes into decompressedBlock, a data
02428       // member for retention until BytecodeReader is destructed.
02429       unsigned decompressedLength = Compressor::decompressToNewBuffer(
02430           (char*)Buf+4,Length-4,decompressedBlock);
02431 
02432       // We must adjust the buffer pointers used by the bytecode reader to point
02433       // into the new decompressed block. After decompression, the
02434       // decompressedBlock will point to a contiguous memory area that has
02435       // the decompressed data.
02436       At = MemStart = BlockStart = Buf = (BufPtr) decompressedBlock;
02437       MemEnd = BlockEnd = Buf + decompressedLength;
02438 
02439     // else if this isn't a regular (uncompressed) bytecode file, then its
02440     // and error, generate that now.
02441     } else if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
02442       error("Invalid bytecode signature: " + utohexstr(Sig));
02443     }
02444 
02445     // Tell the handler we're starting a module
02446     if (Handler) Handler->handleModuleBegin(ModuleID);
02447 
02448     // Get the module block and size and verify. This is handled specially
02449     // because the module block/size is always written in long format. Other
02450     // blocks are written in short format so the read_block method is used.
02451     unsigned Type, Size;
02452     Type = read_uint();
02453     Size = read_uint();
02454     if (Type != BytecodeFormat::ModuleBlockID) {
02455       error("Expected Module Block! Type:" + utostr(Type) + ", Size:"
02456             + utostr(Size));
02457     }
02458 
02459     // It looks like the darwin ranlib program is broken, and adds trailing
02460     // garbage to the end of some bytecode files.  This hack allows the bc
02461     // reader to ignore trailing garbage on bytecode files.
02462     if (At + Size < MemEnd)
02463       MemEnd = BlockEnd = At+Size;
02464 
02465     if (At + Size != MemEnd)
02466       error("Invalid Top Level Block Length! Type:" + utostr(Type)
02467             + ", Size:" + utostr(Size));
02468 
02469     // Parse the module contents
02470     this->ParseModule();
02471 
02472     // Check for missing functions
02473     if (hasFunctions())
02474       error("Function expected, but bytecode stream ended!");
02475 
02476     // Look for intrinsic functions to upgrade, upgrade them, and save the
02477     // mapping from old function to new for use later when instructions are
02478     // converted.
02479     for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
02480          FI != FE; ++FI)
02481       if (Function* newF = UpgradeIntrinsicFunction(FI)) {
02482         upgradedFunctions.insert(std::make_pair(FI, newF));
02483         FI->setName("");
02484       }
02485 
02486     // Tell the handler we're done with the module
02487     if (Handler)
02488       Handler->handleModuleEnd(ModuleID);
02489 
02490     // Tell the handler we're finished the parse
02491     if (Handler) Handler->handleFinish();
02492 
02493   } catch (std::string& errstr) {
02494     if (Handler) Handler->handleError(errstr);
02495     freeState();
02496     delete TheModule;
02497     TheModule = 0;
02498     if (decompressedBlock != 0 ) {
02499       ::free(decompressedBlock);
02500       decompressedBlock = 0;
02501     }
02502     throw;
02503   } catch (...) {
02504     std::string msg("Unknown Exception Occurred");
02505     if (Handler) Handler->handleError(msg);
02506     freeState();
02507     delete TheModule;
02508     TheModule = 0;
02509     if (decompressedBlock != 0) {
02510       ::free(decompressedBlock);
02511       decompressedBlock = 0;
02512     }
02513     throw msg;
02514   }
02515 }
02516 
02517 //===----------------------------------------------------------------------===//
02518 //=== Default Implementations of Handler Methods
02519 //===----------------------------------------------------------------------===//
02520 
02521 BytecodeHandler::~BytecodeHandler() {}
02522