LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

X86ISelSimple.cpp

Go to the documentation of this file.
00001 //===-- X86ISelSimple.cpp - A simple instruction selector for x86 ---------===//
00002 // 
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 // 
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines a simple peephole instruction selector for the x86 target
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "X86.h"
00015 #include "X86InstrBuilder.h"
00016 #include "X86InstrInfo.h"
00017 #include "llvm/Constants.h"
00018 #include "llvm/DerivedTypes.h"
00019 #include "llvm/Function.h"
00020 #include "llvm/Instructions.h"
00021 #include "llvm/Pass.h"
00022 #include "llvm/CodeGen/IntrinsicLowering.h"
00023 #include "llvm/CodeGen/MachineConstantPool.h"
00024 #include "llvm/CodeGen/MachineFrameInfo.h"
00025 #include "llvm/CodeGen/MachineFunction.h"
00026 #include "llvm/CodeGen/SSARegMap.h"
00027 #include "llvm/Target/MRegisterInfo.h"
00028 #include "llvm/Target/TargetMachine.h"
00029 #include "llvm/Support/GetElementPtrTypeIterator.h"
00030 #include "llvm/Support/InstVisitor.h"
00031 #include "llvm/ADT/Statistic.h"
00032 using namespace llvm;
00033 
00034 namespace {
00035   Statistic<>
00036   NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
00037 
00038   /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86
00039   /// Representation.
00040   ///
00041   enum TypeClass {
00042     cByte, cShort, cInt, cFP, cLong
00043   };
00044 }
00045 
00046 /// getClass - Turn a primitive type into a "class" number which is based on the
00047 /// size of the type, and whether or not it is floating point.
00048 ///
00049 static inline TypeClass getClass(const Type *Ty) {
00050   switch (Ty->getTypeID()) {
00051   case Type::SByteTyID:
00052   case Type::UByteTyID:   return cByte;      // Byte operands are class #0
00053   case Type::ShortTyID:
00054   case Type::UShortTyID:  return cShort;     // Short operands are class #1
00055   case Type::IntTyID:
00056   case Type::UIntTyID:
00057   case Type::PointerTyID: return cInt;       // Int's and pointers are class #2
00058 
00059   case Type::FloatTyID:
00060   case Type::DoubleTyID:  return cFP;        // Floating Point is #3
00061 
00062   case Type::LongTyID:
00063   case Type::ULongTyID:   return cLong;      // Longs are class #4
00064   default:
00065     assert(0 && "Invalid type to getClass!");
00066     return cByte;  // not reached
00067   }
00068 }
00069 
00070 // getClassB - Just like getClass, but treat boolean values as bytes.
00071 static inline TypeClass getClassB(const Type *Ty) {
00072   if (Ty == Type::BoolTy) return cByte;
00073   return getClass(Ty);
00074 }
00075 
00076 namespace {
00077   struct X86ISel : public FunctionPass, InstVisitor<X86ISel> {
00078     TargetMachine &TM;
00079     MachineFunction *F;                 // The function we are compiling into
00080     MachineBasicBlock *BB;              // The current MBB we are compiling
00081     int VarArgsFrameIndex;              // FrameIndex for start of varargs area
00082     int ReturnAddressIndex;             // FrameIndex for the return address
00083 
00084     std::map<Value*, unsigned> RegMap;  // Mapping between Val's and SSA Regs
00085 
00086     // MBBMap - Mapping between LLVM BB -> Machine BB
00087     std::map<const BasicBlock*, MachineBasicBlock*> MBBMap;
00088 
00089     // AllocaMap - Mapping from fixed sized alloca instructions to the
00090     // FrameIndex for the alloca.
00091     std::map<AllocaInst*, unsigned> AllocaMap;
00092 
00093     X86ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {}
00094 
00095     /// runOnFunction - Top level implementation of instruction selection for
00096     /// the entire function.
00097     ///
00098     bool runOnFunction(Function &Fn) {
00099       // First pass over the function, lower any unknown intrinsic functions
00100       // with the IntrinsicLowering class.
00101       LowerUnknownIntrinsicFunctionCalls(Fn);
00102 
00103       F = &MachineFunction::construct(&Fn, TM);
00104 
00105       // Create all of the machine basic blocks for the function...
00106       for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
00107         F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I));
00108 
00109       BB = &F->front();
00110 
00111       // Set up a frame object for the return address.  This is used by the
00112       // llvm.returnaddress & llvm.frameaddress intrinisics.
00113       ReturnAddressIndex = F->getFrameInfo()->CreateFixedObject(4, -4);
00114 
00115       // Copy incoming arguments off of the stack...
00116       LoadArgumentsToVirtualRegs(Fn);
00117 
00118       // Instruction select everything except PHI nodes
00119       visit(Fn);
00120 
00121       // Select the PHI nodes
00122       SelectPHINodes();
00123 
00124       // Insert the FP_REG_KILL instructions into blocks that need them.
00125       InsertFPRegKills();
00126 
00127       RegMap.clear();
00128       MBBMap.clear();
00129       AllocaMap.clear();
00130       F = 0;
00131       // We always build a machine code representation for the function
00132       return true;
00133     }
00134 
00135     virtual const char *getPassName() const {
00136       return "X86 Simple Instruction Selection";
00137     }
00138 
00139     /// visitBasicBlock - This method is called when we are visiting a new basic
00140     /// block.  This simply creates a new MachineBasicBlock to emit code into
00141     /// and adds it to the current MachineFunction.  Subsequent visit* for
00142     /// instructions will be invoked for all instructions in the basic block.
00143     ///
00144     void visitBasicBlock(BasicBlock &LLVM_BB) {
00145       BB = MBBMap[&LLVM_BB];
00146     }
00147 
00148     /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
00149     /// function, lowering any calls to unknown intrinsic functions into the
00150     /// equivalent LLVM code.
00151     ///
00152     void LowerUnknownIntrinsicFunctionCalls(Function &F);
00153 
00154     /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function
00155     /// from the stack into virtual registers.
00156     ///
00157     void LoadArgumentsToVirtualRegs(Function &F);
00158 
00159     /// SelectPHINodes - Insert machine code to generate phis.  This is tricky
00160     /// because we have to generate our sources into the source basic blocks,
00161     /// not the current one.
00162     ///
00163     void SelectPHINodes();
00164 
00165     /// InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks
00166     /// that need them.  This only occurs due to the floating point stackifier
00167     /// not being aggressive enough to handle arbitrary global stackification.
00168     ///
00169     void InsertFPRegKills();
00170 
00171     // Visitation methods for various instructions.  These methods simply emit
00172     // fixed X86 code for each instruction.
00173     //
00174 
00175     // Control flow operators
00176     void visitReturnInst(ReturnInst &RI);
00177     void visitBranchInst(BranchInst &BI);
00178     void visitUnreachableInst(UnreachableInst &UI) {}
00179 
00180     struct ValueRecord {
00181       Value *Val;
00182       unsigned Reg;
00183       const Type *Ty;
00184       ValueRecord(unsigned R, const Type *T) : Val(0), Reg(R), Ty(T) {}
00185       ValueRecord(Value *V) : Val(V), Reg(0), Ty(V->getType()) {}
00186     };
00187     void doCall(const ValueRecord &Ret, MachineInstr *CallMI,
00188                 const std::vector<ValueRecord> &Args);
00189     void visitCallInst(CallInst &I);
00190     void visitIntrinsicCall(Intrinsic::ID ID, CallInst &I);
00191 
00192     // Arithmetic operators
00193     void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass);
00194     void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); }
00195     void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); }
00196     void visitMul(BinaryOperator &B);
00197 
00198     void visitDiv(BinaryOperator &B) { visitDivRem(B); }
00199     void visitRem(BinaryOperator &B) { visitDivRem(B); }
00200     void visitDivRem(BinaryOperator &B);
00201 
00202     // Bitwise operators
00203     void visitAnd(BinaryOperator &B) { visitSimpleBinary(B, 2); }
00204     void visitOr (BinaryOperator &B) { visitSimpleBinary(B, 3); }
00205     void visitXor(BinaryOperator &B) { visitSimpleBinary(B, 4); }
00206 
00207     // Comparison operators...
00208     void visitSetCondInst(SetCondInst &I);
00209     unsigned EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
00210                             MachineBasicBlock *MBB,
00211                             MachineBasicBlock::iterator MBBI);
00212     void visitSelectInst(SelectInst &SI);
00213     
00214     
00215     // Memory Instructions
00216     void visitLoadInst(LoadInst &I);
00217     void visitStoreInst(StoreInst &I);
00218     void visitGetElementPtrInst(GetElementPtrInst &I);
00219     void visitAllocaInst(AllocaInst &I);
00220     void visitMallocInst(MallocInst &I);
00221     void visitFreeInst(FreeInst &I);
00222     
00223     // Other operators
00224     void visitShiftInst(ShiftInst &I);
00225     void visitPHINode(PHINode &I) {}      // PHI nodes handled by second pass
00226     void visitCastInst(CastInst &I);
00227     void visitVANextInst(VANextInst &I);
00228     void visitVAArgInst(VAArgInst &I);
00229 
00230     void visitInstruction(Instruction &I) {
00231       std::cerr << "Cannot instruction select: " << I;
00232       abort();
00233     }
00234 
00235     /// promote32 - Make a value 32-bits wide, and put it somewhere.
00236     ///
00237     void promote32(unsigned targetReg, const ValueRecord &VR);
00238 
00239     /// getAddressingMode - Get the addressing mode to use to address the
00240     /// specified value.  The returned value should be used with addFullAddress.
00241     void getAddressingMode(Value *Addr, X86AddressMode &AM);
00242 
00243 
00244     /// getGEPIndex - This is used to fold GEP instructions into X86 addressing
00245     /// expressions.
00246     void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
00247                      std::vector<Value*> &GEPOps,
00248                      std::vector<const Type*> &GEPTypes,
00249                      X86AddressMode &AM);
00250 
00251     /// isGEPFoldable - Return true if the specified GEP can be completely
00252     /// folded into the addressing mode of a load/store or lea instruction.
00253     bool isGEPFoldable(MachineBasicBlock *MBB,
00254                        Value *Src, User::op_iterator IdxBegin,
00255                        User::op_iterator IdxEnd, X86AddressMode &AM);
00256 
00257     /// emitGEPOperation - Common code shared between visitGetElementPtrInst and
00258     /// constant expression GEP support.
00259     ///
00260     void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
00261                           Value *Src, User::op_iterator IdxBegin,
00262                           User::op_iterator IdxEnd, unsigned TargetReg);
00263 
00264     /// emitCastOperation - Common code shared between visitCastInst and
00265     /// constant expression cast support.
00266     ///
00267     void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator IP,
00268                            Value *Src, const Type *DestTy, unsigned TargetReg);
00269 
00270     /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
00271     /// and constant expression support.
00272     ///
00273     void emitSimpleBinaryOperation(MachineBasicBlock *BB,
00274                                    MachineBasicBlock::iterator IP,
00275                                    Value *Op0, Value *Op1,
00276                                    unsigned OperatorClass, unsigned TargetReg);
00277 
00278     /// emitBinaryFPOperation - This method handles emission of floating point
00279     /// Add (0), Sub (1), Mul (2), and Div (3) operations.
00280     void emitBinaryFPOperation(MachineBasicBlock *BB,
00281                                MachineBasicBlock::iterator IP,
00282                                Value *Op0, Value *Op1,
00283                                unsigned OperatorClass, unsigned TargetReg);
00284 
00285     void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP,
00286                       Value *Op0, Value *Op1, unsigned TargetReg);
00287 
00288     void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
00289                     unsigned DestReg, const Type *DestTy,
00290                     unsigned Op0Reg, unsigned Op1Reg);
00291     void doMultiplyConst(MachineBasicBlock *MBB, 
00292                          MachineBasicBlock::iterator MBBI,
00293                          unsigned DestReg, const Type *DestTy,
00294                          unsigned Op0Reg, unsigned Op1Val);
00295 
00296     void emitDivRemOperation(MachineBasicBlock *BB,
00297                              MachineBasicBlock::iterator IP,
00298                              Value *Op0, Value *Op1, bool isDiv,
00299                              unsigned TargetReg);
00300 
00301     /// emitSetCCOperation - Common code shared between visitSetCondInst and
00302     /// constant expression support.
00303     ///
00304     void emitSetCCOperation(MachineBasicBlock *BB,
00305                             MachineBasicBlock::iterator IP,
00306                             Value *Op0, Value *Op1, unsigned Opcode,
00307                             unsigned TargetReg);
00308 
00309     /// emitShiftOperation - Common code shared between visitShiftInst and
00310     /// constant expression support.
00311     ///
00312     void emitShiftOperation(MachineBasicBlock *MBB,
00313                             MachineBasicBlock::iterator IP,
00314                             Value *Op, Value *ShiftAmount, bool isLeftShift,
00315                             const Type *ResultTy, unsigned DestReg);
00316 
00317     // Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
00318     // constant.
00319     void doSHLDConst(MachineBasicBlock *MBB, 
00320                      MachineBasicBlock::iterator MBBI,
00321                      unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
00322                      unsigned Op1Val);
00323       
00324     /// emitSelectOperation - Common code shared between visitSelectInst and the
00325     /// constant expression support.
00326     void emitSelectOperation(MachineBasicBlock *MBB,
00327                              MachineBasicBlock::iterator IP,
00328                              Value *Cond, Value *TrueVal, Value *FalseVal,
00329                              unsigned DestReg);
00330 
00331     /// copyConstantToRegister - Output the instructions required to put the
00332     /// specified constant into the specified register.
00333     ///
00334     void copyConstantToRegister(MachineBasicBlock *MBB,
00335                                 MachineBasicBlock::iterator MBBI,
00336                                 Constant *C, unsigned Reg);
00337 
00338     void emitUCOMr(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
00339                    unsigned LHS, unsigned RHS);
00340 
00341     /// makeAnotherReg - This method returns the next register number we haven't
00342     /// yet used.
00343     ///
00344     /// Long values are handled somewhat specially.  They are always allocated
00345     /// as pairs of 32 bit integer values.  The register number returned is the
00346     /// lower 32 bits of the long value, and the regNum+1 is the upper 32 bits
00347     /// of the long value.
00348     ///
00349     unsigned makeAnotherReg(const Type *Ty) {
00350       assert(dynamic_cast<const X86RegisterInfo*>(TM.getRegisterInfo()) &&
00351              "Current target doesn't have X86 reg info??");
00352       const X86RegisterInfo *MRI =
00353         static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
00354       if (Ty == Type::LongTy || Ty == Type::ULongTy) {
00355         const TargetRegisterClass *RC = MRI->getRegClassForType(Type::IntTy);
00356         // Create the lower part
00357         F->getSSARegMap()->createVirtualRegister(RC);
00358         // Create the upper part.
00359         return F->getSSARegMap()->createVirtualRegister(RC)-1;
00360       }
00361 
00362       // Add the mapping of regnumber => reg class to MachineFunction
00363       const TargetRegisterClass *RC = MRI->getRegClassForType(Ty);
00364       return F->getSSARegMap()->createVirtualRegister(RC);
00365     }
00366 
00367     /// getReg - This method turns an LLVM value into a register number.
00368     ///
00369     unsigned getReg(Value &V) { return getReg(&V); }  // Allow references
00370     unsigned getReg(Value *V) {
00371       // Just append to the end of the current bb.
00372       MachineBasicBlock::iterator It = BB->end();
00373       return getReg(V, BB, It);
00374     }
00375     unsigned getReg(Value *V, MachineBasicBlock *MBB,
00376                     MachineBasicBlock::iterator IPt);
00377 
00378     /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca
00379     /// that is to be statically allocated with the initial stack frame
00380     /// adjustment.
00381     unsigned getFixedSizedAllocaFI(AllocaInst *AI);
00382   };
00383 }
00384 
00385 /// dyn_castFixedAlloca - If the specified value is a fixed size alloca
00386 /// instruction in the entry block, return it.  Otherwise, return a null
00387 /// pointer.
00388 static AllocaInst *dyn_castFixedAlloca(Value *V) {
00389   if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
00390     BasicBlock *BB = AI->getParent();
00391     if (isa<ConstantUInt>(AI->getArraySize()) && BB ==&BB->getParent()->front())
00392       return AI;
00393   }
00394   return 0;
00395 }
00396 
00397 /// getReg - This method turns an LLVM value into a register number.
00398 ///
00399 unsigned X86ISel::getReg(Value *V, MachineBasicBlock *MBB,
00400                          MachineBasicBlock::iterator IPt) {
00401   // If this operand is a constant, emit the code to copy the constant into
00402   // the register here...
00403   if (Constant *C = dyn_cast<Constant>(V)) {
00404     unsigned Reg = makeAnotherReg(V->getType());
00405     copyConstantToRegister(MBB, IPt, C, Reg);
00406     return Reg;
00407   } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
00408     // Do not emit noop casts at all, unless it's a double -> float cast.
00409     if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()) &&
00410         (CI->getType() != Type::FloatTy || 
00411          CI->getOperand(0)->getType() != Type::DoubleTy))
00412       return getReg(CI->getOperand(0), MBB, IPt);
00413   } else if (AllocaInst *AI = dyn_castFixedAlloca(V)) {
00414     // If the alloca address couldn't be folded into the instruction addressing,
00415     // emit an explicit LEA as appropriate.
00416     unsigned Reg = makeAnotherReg(V->getType());
00417     unsigned FI = getFixedSizedAllocaFI(AI);
00418     addFrameReference(BuildMI(*MBB, IPt, X86::LEA32r, 4, Reg), FI);
00419     return Reg;
00420   }
00421 
00422   unsigned &Reg = RegMap[V];
00423   if (Reg == 0) {
00424     Reg = makeAnotherReg(V->getType());
00425     RegMap[V] = Reg;
00426   }
00427 
00428   return Reg;
00429 }
00430 
00431 /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca
00432 /// that is to be statically allocated with the initial stack frame
00433 /// adjustment.
00434 unsigned X86ISel::getFixedSizedAllocaFI(AllocaInst *AI) {
00435   // Already computed this?
00436   std::map<AllocaInst*, unsigned>::iterator I = AllocaMap.lower_bound(AI);
00437   if (I != AllocaMap.end() && I->first == AI) return I->second;
00438 
00439   const Type *Ty = AI->getAllocatedType();
00440   ConstantUInt *CUI = cast<ConstantUInt>(AI->getArraySize());
00441   unsigned TySize = TM.getTargetData().getTypeSize(Ty);
00442   TySize *= CUI->getValue();   // Get total allocated size...
00443   unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty);
00444       
00445   // Create a new stack object using the frame manager...
00446   int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment);
00447   AllocaMap.insert(I, std::make_pair(AI, FrameIdx));
00448   return FrameIdx;
00449 }
00450 
00451 
00452 /// copyConstantToRegister - Output the instructions required to put the
00453 /// specified constant into the specified register.
00454 ///
00455 void X86ISel::copyConstantToRegister(MachineBasicBlock *MBB,
00456                                      MachineBasicBlock::iterator IP,
00457                                      Constant *C, unsigned R) {
00458   if (isa<UndefValue>(C)) {
00459     switch (getClassB(C->getType())) {
00460     case cFP:
00461       // FIXME: SHOULD TEACH STACKIFIER ABOUT UNDEF VALUES!
00462       BuildMI(*MBB, IP, X86::FLD0, 0, R);
00463       return;
00464     case cLong:
00465       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R+1);
00466       // FALL THROUGH
00467     default:
00468       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R);
00469       return;
00470     }
00471   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
00472     unsigned Class = 0;
00473     switch (CE->getOpcode()) {
00474     case Instruction::GetElementPtr:
00475       emitGEPOperation(MBB, IP, CE->getOperand(0),
00476                        CE->op_begin()+1, CE->op_end(), R);
00477       return;
00478     case Instruction::Cast:
00479       emitCastOperation(MBB, IP, CE->getOperand(0), CE->getType(), R);
00480       return;
00481 
00482     case Instruction::Xor: ++Class; // FALL THROUGH
00483     case Instruction::Or:  ++Class; // FALL THROUGH
00484     case Instruction::And: ++Class; // FALL THROUGH
00485     case Instruction::Sub: ++Class; // FALL THROUGH
00486     case Instruction::Add:
00487       emitSimpleBinaryOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
00488                                 Class, R);
00489       return;
00490 
00491     case Instruction::Mul:
00492       emitMultiply(MBB, IP, CE->getOperand(0), CE->getOperand(1), R);
00493       return;
00494 
00495     case Instruction::Div:
00496     case Instruction::Rem:
00497       emitDivRemOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
00498                           CE->getOpcode() == Instruction::Div, R);
00499       return;
00500 
00501     case Instruction::SetNE:
00502     case Instruction::SetEQ:
00503     case Instruction::SetLT:
00504     case Instruction::SetGT:
00505     case Instruction::SetLE:
00506     case Instruction::SetGE:
00507       emitSetCCOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
00508                          CE->getOpcode(), R);
00509       return;
00510 
00511     case Instruction::Shl:
00512     case Instruction::Shr:
00513       emitShiftOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
00514                          CE->getOpcode() == Instruction::Shl, CE->getType(), R);
00515       return;
00516 
00517     case Instruction::Select:
00518       emitSelectOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1),
00519                           CE->getOperand(2), R);
00520       return;
00521 
00522     default:
00523       std::cerr << "Offending expr: " << *C << "\n";
00524       assert(0 && "Constant expression not yet handled!\n");
00525     }
00526   }
00527 
00528   if (C->getType()->isIntegral()) {
00529     unsigned Class = getClassB(C->getType());
00530 
00531     if (Class == cLong) {
00532       // Copy the value into the register pair.
00533       uint64_t Val = cast<ConstantInt>(C)->getRawValue();
00534       BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(Val & 0xFFFFFFFF);
00535       BuildMI(*MBB, IP, X86::MOV32ri, 1, R+1).addImm(Val >> 32);
00536       return;
00537     }
00538 
00539     assert(Class <= cInt && "Type not handled yet!");
00540 
00541     static const unsigned IntegralOpcodeTab[] = {
00542       X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
00543     };
00544 
00545     if (C->getType() == Type::BoolTy) {
00546       BuildMI(*MBB, IP, X86::MOV8ri, 1, R).addImm(C == ConstantBool::True);
00547     } else {
00548       ConstantInt *CI = cast<ConstantInt>(C);
00549       BuildMI(*MBB, IP, IntegralOpcodeTab[Class],1,R).addImm(CI->getRawValue());
00550     }
00551   } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
00552     if (CFP->isExactlyValue(+0.0))
00553       BuildMI(*MBB, IP, X86::FLD0, 0, R);
00554     else if (CFP->isExactlyValue(+1.0))
00555       BuildMI(*MBB, IP, X86::FLD1, 0, R);
00556     else {
00557       // Otherwise we need to spill the constant to memory...
00558       MachineConstantPool *CP = F->getConstantPool();
00559       unsigned CPI = CP->getConstantPoolIndex(CFP);
00560       const Type *Ty = CFP->getType();
00561 
00562       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
00563       unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLD32m : X86::FLD64m;
00564       addConstantPoolReference(BuildMI(*MBB, IP, LoadOpcode, 4, R), CPI);
00565     }
00566 
00567   } else if (isa<ConstantPointerNull>(C)) {
00568     // Copy zero (null pointer) to the register.
00569     BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(0);
00570   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
00571     BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addGlobalAddress(GV);
00572   } else {
00573     std::cerr << "Offending constant: " << *C << "\n";
00574     assert(0 && "Type not handled yet!");
00575   }
00576 }
00577 
00578 /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from
00579 /// the stack into virtual registers.
00580 ///
00581 void X86ISel::LoadArgumentsToVirtualRegs(Function &Fn) {
00582   // Emit instructions to load the arguments...  On entry to a function on the
00583   // X86, the stack frame looks like this:
00584   //
00585   // [ESP] -- return address
00586   // [ESP + 4] -- first argument (leftmost lexically)
00587   // [ESP + 8] -- second argument, if first argument is four bytes in size
00588   //    ... 
00589   //
00590   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
00591   MachineFrameInfo *MFI = F->getFrameInfo();
00592 
00593   for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) {
00594     bool ArgLive = !I->use_empty();
00595     unsigned Reg = ArgLive ? getReg(*I) : 0;
00596     int FI;          // Frame object index
00597 
00598     switch (getClassB(I->getType())) {
00599     case cByte:
00600       if (ArgLive) {
00601         FI = MFI->CreateFixedObject(1, ArgOffset);
00602         addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI);
00603       }
00604       break;
00605     case cShort:
00606       if (ArgLive) {
00607         FI = MFI->CreateFixedObject(2, ArgOffset);
00608         addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI);
00609       }
00610       break;
00611     case cInt:
00612       if (ArgLive) {
00613         FI = MFI->CreateFixedObject(4, ArgOffset);
00614         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
00615       }
00616       break;
00617     case cLong:
00618       if (ArgLive) {
00619         FI = MFI->CreateFixedObject(8, ArgOffset);
00620         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI);
00621         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4);
00622       }
00623       ArgOffset += 4;   // longs require 4 additional bytes
00624       break;
00625     case cFP:
00626       if (ArgLive) {
00627         unsigned Opcode;
00628         if (I->getType() == Type::FloatTy) {
00629           Opcode = X86::FLD32m;
00630           FI = MFI->CreateFixedObject(4, ArgOffset);
00631         } else {
00632           Opcode = X86::FLD64m;
00633           FI = MFI->CreateFixedObject(8, ArgOffset);
00634         }
00635         addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI);
00636       }
00637       if (I->getType() == Type::DoubleTy)
00638         ArgOffset += 4;   // doubles require 4 additional bytes
00639       break;
00640     default:
00641       assert(0 && "Unhandled argument type!");
00642     }
00643     ArgOffset += 4;  // Each argument takes at least 4 bytes on the stack...
00644   }
00645 
00646   // If the function takes variable number of arguments, add a frame offset for
00647   // the start of the first vararg value... this is used to expand
00648   // llvm.va_start.
00649   if (Fn.getFunctionType()->isVarArg())
00650     VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
00651 }
00652 
00653 
00654 /// SelectPHINodes - Insert machine code to generate phis.  This is tricky
00655 /// because we have to generate our sources into the source basic blocks, not
00656 /// the current one.
00657 ///
00658 void X86ISel::SelectPHINodes() {
00659   const TargetInstrInfo &TII = *TM.getInstrInfo();
00660   const Function &LF = *F->getFunction();  // The LLVM function...
00661   for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) {
00662     const BasicBlock *BB = I;
00663     MachineBasicBlock &MBB = *MBBMap[I];
00664 
00665     // Loop over all of the PHI nodes in the LLVM basic block...
00666     MachineBasicBlock::iterator PHIInsertPoint = MBB.begin();
00667     for (BasicBlock::const_iterator I = BB->begin(); isa<PHINode>(I); ++I) {
00668       PHINode *PN = const_cast<PHINode*>(dyn_cast<PHINode>(I));
00669 
00670       // Create a new machine instr PHI node, and insert it.
00671       unsigned PHIReg = getReg(*PN);
00672       MachineInstr *PhiMI = BuildMI(MBB, PHIInsertPoint,
00673                                     X86::PHI, PN->getNumOperands(), PHIReg);
00674 
00675       MachineInstr *LongPhiMI = 0;
00676       if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy)
00677         LongPhiMI = BuildMI(MBB, PHIInsertPoint,
00678                             X86::PHI, PN->getNumOperands(), PHIReg+1);
00679 
00680       // PHIValues - Map of blocks to incoming virtual registers.  We use this
00681       // so that we only initialize one incoming value for a particular block,
00682       // even if the block has multiple entries in the PHI node.
00683       //
00684       std::map<MachineBasicBlock*, unsigned> PHIValues;
00685 
00686       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
00687         MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)];
00688         unsigned ValReg;
00689         std::map<MachineBasicBlock*, unsigned>::iterator EntryIt =
00690           PHIValues.lower_bound(PredMBB);
00691 
00692         if (EntryIt != PHIValues.end() && EntryIt->first == PredMBB) {
00693           // We already inserted an initialization of the register for this
00694           // predecessor.  Recycle it.
00695           ValReg = EntryIt->second;
00696 
00697         } else {        
00698           // Get the incoming value into a virtual register.
00699           //
00700           Value *Val = PN->getIncomingValue(i);
00701 
00702           // If this is a constant or GlobalValue, we may have to insert code
00703           // into the basic block to compute it into a virtual register.
00704           if ((isa<Constant>(Val) && !isa<ConstantExpr>(Val))) {
00705             // Simple constants get emitted at the end of the basic block,
00706             // before any terminator instructions.  We "know" that the code to
00707             // move a constant into a register will never clobber any flags.
00708             ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator());
00709           } else {
00710             // Because we don't want to clobber any values which might be in
00711             // physical registers with the computation of this constant (which
00712             // might be arbitrarily complex if it is a constant expression),
00713             // just insert the computation at the top of the basic block.
00714             MachineBasicBlock::iterator PI = PredMBB->begin();
00715             
00716             // Skip over any PHI nodes though!
00717             while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI)
00718               ++PI;
00719             
00720             ValReg = getReg(Val, PredMBB, PI);
00721           }
00722 
00723           // Remember that we inserted a value for this PHI for this predecessor
00724           PHIValues.insert(EntryIt, std::make_pair(PredMBB, ValReg));
00725         }
00726 
00727         PhiMI->addRegOperand(ValReg);
00728         PhiMI->addMachineBasicBlockOperand(PredMBB);
00729         if (LongPhiMI) {
00730           LongPhiMI->addRegOperand(ValReg+1);
00731           LongPhiMI->addMachineBasicBlockOperand(PredMBB);
00732         }
00733       }
00734 
00735       // Now that we emitted all of the incoming values for the PHI node, make
00736       // sure to reposition the InsertPoint after the PHI that we just added.
00737       // This is needed because we might have inserted a constant into this
00738       // block, right after the PHI's which is before the old insert point!
00739       PHIInsertPoint = LongPhiMI ? LongPhiMI : PhiMI;
00740       ++PHIInsertPoint;
00741     }
00742   }
00743 }
00744 
00745 /// RequiresFPRegKill - The floating point stackifier pass cannot insert
00746 /// compensation code on critical edges.  As such, it requires that we kill all
00747 /// FP registers on the exit from any blocks that either ARE critical edges, or
00748 /// branch to a block that has incoming critical edges.
00749 ///
00750 /// Note that this kill instruction will eventually be eliminated when
00751 /// restrictions in the stackifier are relaxed.
00752 ///
00753 static bool RequiresFPRegKill(const MachineBasicBlock *MBB) {
00754 #if 0
00755   const BasicBlock *BB = MBB->getBasicBlock ();
00756   for (succ_const_iterator SI = succ_begin(BB), E = succ_end(BB); SI!=E; ++SI) {
00757     const BasicBlock *Succ = *SI;
00758     pred_const_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
00759     ++PI;  // Block have at least one predecessory
00760     if (PI != PE) {             // If it has exactly one, this isn't crit edge
00761       // If this block has more than one predecessor, check all of the
00762       // predecessors to see if they have multiple successors.  If so, then the
00763       // block we are analyzing needs an FPRegKill.
00764       for (PI = pred_begin(Succ); PI != PE; ++PI) {
00765         const BasicBlock *Pred = *PI;
00766         succ_const_iterator SI2 = succ_begin(Pred);
00767         ++SI2;  // There must be at least one successor of this block.
00768         if (SI2 != succ_end(Pred))
00769           return true;   // Yes, we must insert the kill on this edge.
00770       }
00771     }
00772   }
00773   // If we got this far, there is no need to insert the kill instruction.
00774   return false;
00775 #else
00776   return true;
00777 #endif
00778 }
00779 
00780 // InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks that
00781 // need them.  This only occurs due to the floating point stackifier not being
00782 // aggressive enough to handle arbitrary global stackification.
00783 //
00784 // Currently we insert an FP_REG_KILL instruction into each block that uses or
00785 // defines a floating point virtual register.
00786 //
00787 // When the global register allocators (like linear scan) finally update live
00788 // variable analysis, we can keep floating point values in registers across
00789 // portions of the CFG that do not involve critical edges.  This will be a big
00790 // win, but we are waiting on the global allocators before we can do this.
00791 //
00792 // With a bit of work, the floating point stackifier pass can be enhanced to
00793 // break critical edges as needed (to make a place to put compensation code),
00794 // but this will require some infrastructure improvements as well.
00795 //
00796 void X86ISel::InsertFPRegKills() {
00797   SSARegMap &RegMap = *F->getSSARegMap();
00798 
00799   for (MachineFunction::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
00800     for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
00801       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
00802       MachineOperand& MO = I->getOperand(i);
00803         if (MO.isRegister() && MO.getReg()) {
00804           unsigned Reg = MO.getReg();
00805           if (MRegisterInfo::isVirtualRegister(Reg)) {
00806             unsigned RegSize = RegMap.getRegClass(Reg)->getSize();
00807             if (RegSize == 10 || RegSize == 8)
00808               goto UsesFPReg;
00809           }
00810         }
00811       }
00812     // If we haven't found an FP register use or def in this basic block, check
00813     // to see if any of our successors has an FP PHI node, which will cause a
00814     // copy to be inserted into this block.
00815     for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
00816          SE = BB->succ_end(); SI != SE; ++SI) {
00817       MachineBasicBlock *SBB = *SI;
00818       for (MachineBasicBlock::iterator I = SBB->begin();
00819            I != SBB->end() && I->getOpcode() == X86::PHI; ++I) {
00820         const TargetRegisterClass *RC =
00821           RegMap.getRegClass(I->getOperand(0).getReg());
00822         if (RC->getSize() == 10 || RC->getSize() == 8)
00823           goto UsesFPReg;
00824       }
00825     }
00826     continue;
00827   UsesFPReg:
00828     // Okay, this block uses an FP register.  If the block has successors (ie,
00829     // it's not an unwind/return), insert the FP_REG_KILL instruction.
00830     if (BB->succ_size () && RequiresFPRegKill(BB)) {
00831       BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
00832       ++NumFPKill;
00833     }
00834   }
00835 }
00836 
00837 
00838 void X86ISel::getAddressingMode(Value *Addr, X86AddressMode &AM) {
00839   AM.BaseType = X86AddressMode::RegBase;
00840   AM.Base.Reg = 0; AM.Scale = 1; AM.IndexReg = 0; AM.Disp = 0;
00841   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) {
00842     if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(),
00843                        AM))
00844       return;
00845   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
00846     if (CE->getOpcode() == Instruction::GetElementPtr)
00847       if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(),
00848                         AM))
00849         return;
00850   } else if (AllocaInst *AI = dyn_castFixedAlloca(Addr)) {
00851     AM.BaseType = X86AddressMode::FrameIndexBase;
00852     AM.Base.FrameIndex = getFixedSizedAllocaFI(AI);
00853     return;
00854   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
00855     AM.GV = GV;
00856     return;
00857   }
00858 
00859   // If it's not foldable, reset addr mode.
00860   AM.BaseType = X86AddressMode::RegBase;
00861   AM.Base.Reg = getReg(Addr);
00862   AM.Scale = 1; AM.IndexReg = 0; AM.Disp = 0;
00863 }
00864 
00865 // canFoldSetCCIntoBranchOrSelect - Return the setcc instruction if we can fold
00866 // it into the conditional branch or select instruction which is the only user
00867 // of the cc instruction.  This is the case if the conditional branch is the
00868 // only user of the setcc.  We also don't handle long arguments below, so we 
00869 // reject them here as well.
00870 //
00871 static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) {
00872   if (SetCondInst *SCI = dyn_cast<SetCondInst>(V))
00873     if (SCI->hasOneUse()) {
00874       Instruction *User = cast<Instruction>(SCI->use_back());
00875       if ((isa<BranchInst>(User) || isa<SelectInst>(User)) &&
00876           (getClassB(SCI->getOperand(0)->getType()) != cLong ||
00877            SCI->getOpcode() == Instruction::SetEQ ||
00878            SCI->getOpcode() == Instruction::SetNE) &&
00879           (isa<BranchInst>(User) || User->getOperand(0) == V))
00880         return SCI;
00881     }
00882   return 0;
00883 }
00884 
00885 // Return a fixed numbering for setcc instructions which does not depend on the
00886 // order of the opcodes.
00887 //
00888 static unsigned getSetCCNumber(unsigned Opcode) {
00889   switch(Opcode) {
00890   default: assert(0 && "Unknown setcc instruction!");
00891   case Instruction::SetEQ: return 0;
00892   case Instruction::SetNE: return 1;
00893   case Instruction::SetLT: return 2;
00894   case Instruction::SetGE: return 3;
00895   case Instruction::SetGT: return 4;
00896   case Instruction::SetLE: return 5;
00897   }
00898 }
00899 
00900 // LLVM  -> X86 signed  X86 unsigned
00901 // -----    ----------  ------------
00902 // seteq -> sete        sete
00903 // setne -> setne       setne
00904 // setlt -> setl        setb
00905 // setge -> setge       setae
00906 // setgt -> setg        seta
00907 // setle -> setle       setbe
00908 // ----
00909 //          sets                       // Used by comparison with 0 optimization
00910 //          setns
00911 static const unsigned SetCCOpcodeTab[2][8] = {
00912   { X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAEr, X86::SETAr, X86::SETBEr,
00913     0, 0 },
00914   { X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGEr, X86::SETGr, X86::SETLEr,
00915     X86::SETSr, X86::SETNSr },
00916 };
00917 
00918 /// emitUCOMr - In the future when we support processors before the P6, this
00919 /// wraps the logic for emitting an FUCOMr vs FUCOMIr.
00920 void X86ISel::emitUCOMr(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
00921                         unsigned LHS, unsigned RHS) {
00922   if (0) { // for processors prior to the P6
00923     BuildMI(*MBB, IP, X86::FUCOMr, 2).addReg(LHS).addReg(RHS);
00924     BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
00925     BuildMI(*MBB, IP, X86::SAHF, 1);
00926   } else {
00927     BuildMI(*MBB, IP, X86::FUCOMIr, 2).addReg(LHS).addReg(RHS);
00928   }
00929 }
00930 
00931 // EmitComparison - This function emits a comparison of the two operands,
00932 // returning the extended setcc code to use.
00933 unsigned X86ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1,
00934                                  MachineBasicBlock *MBB,
00935                                  MachineBasicBlock::iterator IP) {
00936   // The arguments are already supposed to be of the same type.
00937   const Type *CompTy = Op0->getType();
00938   unsigned Class = getClassB(CompTy);
00939 
00940   // Special case handling of: cmp R, i
00941   if (isa<ConstantPointerNull>(Op1)) {
00942     unsigned Op0r = getReg(Op0, MBB, IP);
00943     if (OpNum < 2)    // seteq/setne -> test
00944       BuildMI(*MBB, IP, X86::TEST32rr, 2).addReg(Op0r).addReg(Op0r);
00945     else
00946       BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(0);
00947     return OpNum;
00948 
00949   } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
00950     if (Class == cByte || Class == cShort || Class == cInt) {
00951       unsigned Op1v = CI->getRawValue();
00952 
00953       // Mask off any upper bits of the constant, if there are any...
00954       Op1v &= (1ULL << (8 << Class)) - 1;
00955 
00956       // If this is a comparison against zero, emit more efficient code.  We
00957       // can't handle unsigned comparisons against zero unless they are == or
00958       // !=.  These should have been strength reduced already anyway.
00959       if (Op1v == 0 && (CompTy->isSigned() || OpNum < 2)) {
00960 
00961         // If this is a comparison against zero and the LHS is an and of a
00962         // register with a constant, use the test to do the and.
00963         if (Instruction *Op0I = dyn_cast<Instruction>(Op0))
00964           if (Op0I->getOpcode() == Instruction::And && Op0->hasOneUse() &&
00965               isa<ConstantInt>(Op0I->getOperand(1))) {
00966             static const unsigned TESTTab[] = {
00967               X86::TEST8ri, X86::TEST16ri, X86::TEST32ri
00968             };
00969             
00970             // Emit test X, i
00971             unsigned LHS = getReg(Op0I->getOperand(0), MBB, IP);
00972             unsigned Imm =
00973               cast<ConstantInt>(Op0I->getOperand(1))->getRawValue();
00974             BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(LHS).addImm(Imm);
00975             
00976             if (OpNum == 2) return 6;   // Map jl -> js
00977             if (OpNum == 3) return 7;   // Map jg -> jns
00978             return OpNum;
00979           }
00980 
00981         unsigned Op0r = getReg(Op0, MBB, IP);
00982         static const unsigned TESTTab[] = {
00983           X86::TEST8rr, X86::TEST16rr, X86::TEST32rr
00984         };
00985         BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(Op0r).addReg(Op0r);
00986 
00987         if (OpNum == 2) return 6;   // Map jl -> js
00988         if (OpNum == 3) return 7;   // Map jg -> jns
00989         return OpNum;
00990       }
00991 
00992       static const unsigned CMPTab[] = {
00993         X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
00994       };
00995 
00996       unsigned Op0r = getReg(Op0, MBB, IP);
00997       BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v);
00998       return OpNum;
00999     } else {
01000       unsigned Op0r = getReg(Op0, MBB, IP);
01001       assert(Class == cLong && "Unknown integer class!");
01002       unsigned LowCst = CI->getRawValue();
01003       unsigned HiCst = CI->getRawValue() >> 32;
01004       if (OpNum < 2) {    // seteq, setne
01005         unsigned LoTmp = Op0r;
01006         if (LowCst != 0) {
01007           LoTmp = makeAnotherReg(Type::IntTy);
01008           BuildMI(*MBB, IP, X86::XOR32ri, 2, LoTmp).addReg(Op0r).addImm(LowCst);
01009         }
01010         unsigned HiTmp = Op0r+1;
01011         if (HiCst != 0) {
01012           HiTmp = makeAnotherReg(Type::IntTy);
01013           BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst);
01014         }
01015         unsigned FinalTmp = makeAnotherReg(Type::IntTy);
01016         BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
01017         return OpNum;
01018       } else {
01019         // Emit a sequence of code which compares the high and low parts once
01020         // each, then uses a conditional move to handle the overflow case.  For
01021         // example, a setlt for long would generate code like this:
01022         //
01023         // AL = lo(op1) < lo(op2)   // Always unsigned comparison
01024         // BL = hi(op1) < hi(op2)   // Signedness depends on operands
01025         // dest = hi(op1) == hi(op2) ? BL : AL;
01026         //
01027 
01028         // FIXME: This would be much better if we had hierarchical register
01029         // classes!  Until then, hardcode registers so that we can deal with
01030         // their aliases (because we don't have conditional byte moves).
01031         //
01032         BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst);
01033         BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
01034         BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst);
01035         BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL);
01036         BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
01037         BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
01038         BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
01039           .addReg(X86::AX);
01040         // NOTE: visitSetCondInst knows that the value is dumped into the BL
01041         // register at this point for long values...
01042         return OpNum;
01043       }
01044     }
01045   }
01046 
01047   unsigned Op0r = getReg(Op0, MBB, IP);
01048 
01049   // Special case handling of comparison against +/- 0.0
01050   if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1))
01051     if (CFP->isExactlyValue(+0.0) || CFP->isExactlyValue(-0.0)) {
01052       BuildMI(*MBB, IP, X86::FTST, 1).addReg(Op0r);
01053       BuildMI(*MBB, IP, X86::FNSTSW8r, 0);
01054       BuildMI(*MBB, IP, X86::SAHF, 1);
01055       return OpNum;
01056     }
01057 
01058   unsigned Op1r = getReg(Op1, MBB, IP);
01059   switch (Class) {
01060   default: assert(0 && "Unknown type class!");
01061     // Emit: cmp <var1>, <var2> (do the comparison).  We can
01062     // compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with
01063     // 32-bit.
01064   case cByte:
01065     BuildMI(*MBB, IP, X86::CMP8rr, 2).addReg(Op0r).addReg(Op1r);
01066     break;
01067   case cShort:
01068     BuildMI(*MBB, IP, X86::CMP16rr, 2).addReg(Op0r).addReg(Op1r);
01069     break;
01070   case cInt:
01071     BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
01072     break;
01073   case cFP:
01074     emitUCOMr(MBB, IP, Op0r, Op1r);
01075     break;
01076 
01077   case cLong:
01078     if (OpNum < 2) {    // seteq, setne
01079       unsigned LoTmp = makeAnotherReg(Type::IntTy);
01080       unsigned HiTmp = makeAnotherReg(Type::IntTy);
01081       unsigned FinalTmp = makeAnotherReg(Type::IntTy);
01082       BuildMI(*MBB, IP, X86::XOR32rr, 2, LoTmp).addReg(Op0r).addReg(Op1r);
01083       BuildMI(*MBB, IP, X86::XOR32rr, 2, HiTmp).addReg(Op0r+1).addReg(Op1r+1);
01084       BuildMI(*MBB, IP, X86::OR32rr,  2, FinalTmp).addReg(LoTmp).addReg(HiTmp);
01085       break;  // Allow the sete or setne to be generated from flags set by OR
01086     } else {
01087       // Emit a sequence of code which compares the high and low parts once
01088       // each, then uses a conditional move to handle the overflow case.  For
01089       // example, a setlt for long would generate code like this:
01090       //
01091       // AL = lo(op1) < lo(op2)   // Signedness depends on operands
01092       // BL = hi(op1) < hi(op2)   // Always unsigned comparison
01093       // dest = hi(op1) == hi(op2) ? BL : AL;
01094       //
01095 
01096       // FIXME: This would be much better if we had hierarchical register
01097       // classes!  Until then, hardcode registers so that we can deal with their
01098       // aliases (because we don't have conditional byte moves).
01099       //
01100       BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r);
01101       BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL);
01102       BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r+1).addReg(Op1r+1);
01103       BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0, X86::BL);
01104       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH);
01105       BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH);
01106       BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX)
01107                                                    .addReg(X86::AX);
01108       // NOTE: visitSetCondInst knows that the value is dumped into the BL
01109       // register at this point for long values...
01110       return OpNum;
01111     }
01112   }
01113   return OpNum;
01114 }
01115 
01116 /// SetCC instructions - Here we just emit boilerplate code to set a byte-sized
01117 /// register, then move it to wherever the result should be. 
01118 ///
01119 void X86ISel::visitSetCondInst(SetCondInst &I) {
01120   if (canFoldSetCCIntoBranchOrSelect(&I))
01121     return;  // Fold this into a branch or select.
01122 
01123   unsigned DestReg = getReg(I);
01124   MachineBasicBlock::iterator MII = BB->end();
01125   emitSetCCOperation(BB, MII, I.getOperand(0), I.getOperand(1), I.getOpcode(),
01126                      DestReg);
01127 }
01128 
01129 /// emitSetCCOperation - Common code shared between visitSetCondInst and
01130 /// constant expression support.
01131 ///
01132 void X86ISel::emitSetCCOperation(MachineBasicBlock *MBB,
01133                                  MachineBasicBlock::iterator IP,
01134                                  Value *Op0, Value *Op1, unsigned Opcode,
01135                                  unsigned TargetReg) {
01136   unsigned OpNum = getSetCCNumber(Opcode);
01137   OpNum = EmitComparison(OpNum, Op0, Op1, MBB, IP);
01138 
01139   const Type *CompTy = Op0->getType();
01140   unsigned CompClass = getClassB(CompTy);
01141   bool isSigned = CompTy->isSigned() && CompClass != cFP;
01142 
01143   if (CompClass != cLong || OpNum < 2) {
01144     // Handle normal comparisons with a setcc instruction...
01145     BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, TargetReg);
01146   } else {
01147     // Handle long comparisons by copying the value which is already in BL into
01148     // the register we want...
01149     BuildMI(*MBB, IP, X86::MOV8rr, 1, TargetReg).addReg(X86::BL);
01150   }
01151 }
01152 
01153 void X86ISel::visitSelectInst(SelectInst &SI) {
01154   unsigned DestReg = getReg(SI);
01155   MachineBasicBlock::iterator MII = BB->end();
01156   emitSelectOperation(BB, MII, SI.getCondition(), SI.getTrueValue(),
01157                       SI.getFalseValue(), DestReg);
01158 }
01159  
01160 /// emitSelect - Common code shared between visitSelectInst and the constant
01161 /// expression support.
01162 void X86ISel::emitSelectOperation(MachineBasicBlock *MBB,
01163                                   MachineBasicBlock::iterator IP,
01164                                   Value *Cond, Value *TrueVal, Value *FalseVal,
01165                                   unsigned DestReg) {
01166   unsigned SelectClass = getClassB(TrueVal->getType());
01167   
01168   // We don't support 8-bit conditional moves.  If we have incoming constants,
01169   // transform them into 16-bit constants to avoid having a run-time conversion.
01170   if (SelectClass == cByte) {
01171     if (Constant *T = dyn_cast<Constant>(TrueVal))
01172       TrueVal = ConstantExpr::getCast(T, Type::ShortTy);
01173     if (Constant *F = dyn_cast<Constant>(FalseVal))
01174       FalseVal = ConstantExpr::getCast(F, Type::ShortTy);
01175   }
01176 
01177   unsigned TrueReg  = getReg(TrueVal, MBB, IP);
01178   unsigned FalseReg = getReg(FalseVal, MBB, IP);
01179   if (TrueReg == FalseReg) {
01180     static const unsigned Opcode[] = {
01181       X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr
01182     };
01183     BuildMI(*MBB, IP, Opcode[SelectClass], 1, DestReg).addReg(TrueReg);
01184     if (SelectClass == cLong)
01185       BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(TrueReg+1);
01186     return;
01187   }
01188 
01189   unsigned Opcode;
01190   if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) {
01191     // We successfully folded the setcc into the select instruction.
01192     
01193     unsigned OpNum = getSetCCNumber(SCI->getOpcode());
01194     OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), MBB,
01195                            IP);
01196 
01197     const Type *CompTy = SCI->getOperand(0)->getType();
01198     bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP;
01199   
01200     // LLVM  -> X86 signed  X86 unsigned
01201     // -----    ----------  ------------
01202     // seteq -> cmovNE      cmovNE
01203     // setne -> cmovE       cmovE
01204     // setlt -> cmovGE      cmovAE
01205     // setge -> cmovL       cmovB
01206     // setgt -> cmovLE      cmovBE
01207     // setle -> cmovG       cmovA
01208     // ----
01209     //          cmovNS              // Used by comparison with 0 optimization
01210     //          cmovS
01211     
01212     switch (SelectClass) {
01213     default: assert(0 && "Unknown value class!");
01214     case cFP: {
01215       // Annoyingly, we don't have a full set of floating point conditional
01216       // moves.  :(
01217       static const unsigned OpcodeTab[2][8] = {
01218         { X86::FCMOVNE, X86::FCMOVE, X86::FCMOVAE, X86::FCMOVB,
01219           X86::FCMOVBE, X86::FCMOVA, 0, 0 },
01220         { X86::FCMOVNE, X86::FCMOVE, 0, 0, 0, 0, 0, 0 },
01221       };
01222       Opcode = OpcodeTab[isSigned][OpNum];
01223 
01224       // If opcode == 0, we hit a case that we don't support.  Output a setcc
01225       // and compare the result against zero.
01226       if (Opcode == 0) {
01227         unsigned CompClass = getClassB(CompTy);
01228         unsigned CondReg;
01229         if (CompClass != cLong || OpNum < 2) {
01230           CondReg = makeAnotherReg(Type::BoolTy);
01231           // Handle normal comparisons with a setcc instruction...
01232           BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, CondReg);
01233         } else {
01234           // Long comparisons end up in the BL register.
01235           CondReg = X86::BL;
01236         }
01237         
01238         BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
01239         Opcode = X86::FCMOVE;
01240       }
01241       break;
01242     }
01243     case cByte:
01244     case cShort: {
01245       static const unsigned OpcodeTab[2][8] = {
01246         { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVAE16rr, X86::CMOVB16rr,
01247           X86::CMOVBE16rr, X86::CMOVA16rr, 0, 0 },
01248         { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVGE16rr, X86::CMOVL16rr,
01249           X86::CMOVLE16rr, X86::CMOVG16rr, X86::CMOVNS16rr, X86::CMOVS16rr },
01250       };
01251       Opcode = OpcodeTab[isSigned][OpNum];
01252       break;
01253     }
01254     case cInt:
01255     case cLong: {
01256       static const unsigned OpcodeTab[2][8] = {
01257         { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVAE32rr, X86::CMOVB32rr,
01258           X86::CMOVBE32rr, X86::CMOVA32rr, 0, 0 },
01259         { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVGE32rr, X86::CMOVL32rr,
01260           X86::CMOVLE32rr, X86::CMOVG32rr, X86::CMOVNS32rr, X86::CMOVS32rr },
01261       };
01262       Opcode = OpcodeTab[isSigned][OpNum];
01263       break;
01264     }
01265     }
01266   } else {
01267     // Get the value being branched on, and use it to set the condition codes.
01268     unsigned CondReg = getReg(Cond, MBB, IP);
01269     BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg);
01270     switch (SelectClass) {
01271     default: assert(0 && "Unknown value class!");
01272     case cFP:    Opcode = X86::FCMOVE; break;
01273     case cByte:
01274     case cShort: Opcode = X86::CMOVE16rr; break;
01275     case cInt:
01276     case cLong:  Opcode = X86::CMOVE32rr; break;
01277     }
01278   }
01279 
01280   unsigned RealDestReg = DestReg;
01281 
01282 
01283   // Annoyingly enough, X86 doesn't HAVE 8-bit conditional moves.  Because of
01284   // this, we have to promote the incoming values to 16 bits, perform a 16-bit
01285   // cmove, then truncate the result.
01286   if (SelectClass == cByte) {
01287     DestReg = makeAnotherReg(Type::ShortTy);
01288     if (getClassB(TrueVal->getType()) == cByte) {
01289       // Promote the true value, by storing it into AL, and reading from AX.
01290       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::AL).addReg(TrueReg);
01291       BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::AH).addImm(0);
01292       TrueReg = makeAnotherReg(Type::ShortTy);
01293       BuildMI(*MBB, IP, X86::MOV16rr, 1, TrueReg).addReg(X86::AX);
01294     }
01295     if (getClassB(FalseVal->getType()) == cByte) {
01296       // Promote the true value, by storing it into CL, and reading from CX.
01297       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(FalseReg);
01298       BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::CH).addImm(0);
01299       FalseReg = makeAnotherReg(Type::ShortTy);
01300       BuildMI(*MBB, IP, X86::MOV16rr, 1, FalseReg).addReg(X86::CX);
01301     }
01302   }
01303 
01304   BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(TrueReg).addReg(FalseReg);
01305 
01306   switch (SelectClass) {
01307   case cByte:
01308     // We did the computation with 16-bit registers.  Truncate back to our
01309     // result by copying into AX then copying out AL.
01310     BuildMI(*MBB, IP, X86::MOV16rr, 1, X86::AX).addReg(DestReg);
01311     BuildMI(*MBB, IP, X86::MOV8rr, 1, RealDestReg).addReg(X86::AL);
01312     break;
01313   case cLong:
01314     // Move the upper half of the value as well.
01315     BuildMI(*MBB, IP, Opcode, 2,DestReg+1).addReg(TrueReg+1).addReg(FalseReg+1);
01316     break;
01317   }
01318 }
01319 
01320 
01321 
01322 /// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide
01323 /// operand, in the specified target register.
01324 ///
01325 void X86ISel::promote32(unsigned targetReg, const ValueRecord &VR) {
01326   bool isUnsigned = VR.Ty->isUnsigned() || VR.Ty == Type::BoolTy;
01327 
01328   Value *Val = VR.Val;
01329   const Type *Ty = VR.Ty;
01330   if (Val) {
01331     if (Constant *C = dyn_cast<Constant>(Val)) {
01332       Val = ConstantExpr::getCast(C, Type::IntTy);
01333       Ty = Type::IntTy;
01334     }
01335 
01336     // If this is a simple constant, just emit a MOVri directly to avoid the
01337     // copy.
01338     if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
01339       int TheVal = CI->getRawValue() & 0xFFFFFFFF;
01340       BuildMI(BB, X86::MOV32ri, 1, targetReg).addImm(TheVal);
01341       return;
01342     }
01343   }
01344 
01345   // Make sure we have the register number for this value...
01346   unsigned Reg = Val ? getReg(Val) : VR.Reg;
01347 
01348   switch (getClassB(Ty)) {
01349   case cByte:
01350     // Extend value into target register (8->32)
01351     if (isUnsigned)
01352       BuildMI(BB, X86::MOVZX32rr8, 1, targetReg).addReg(Reg);
01353     else
01354       BuildMI(BB, X86::MOVSX32rr8, 1, targetReg).addReg(Reg);
01355     break;
01356   case cShort:
01357     // Extend value into target register (16->32)
01358     if (isUnsigned)
01359       BuildMI(BB, X86::MOVZX32rr16, 1, targetReg).addReg(Reg);
01360     else
01361       BuildMI(BB, X86::MOVSX32rr16, 1, targetReg).addReg(Reg);
01362     break;
01363   case cInt:
01364     // Move value into target register (32->32)
01365     BuildMI(BB, X86::MOV32rr, 1, targetReg).addReg(Reg);
01366     break;
01367   default:
01368     assert(0 && "Unpromotable operand class in promote32");
01369   }
01370 }
01371 
01372 /// 'ret' instruction - Here we are interested in meeting the x86 ABI.  As such,
01373 /// we have the following possibilities:
01374 ///
01375 ///   ret void: No return value, simply emit a 'ret' instruction
01376 ///   ret sbyte, ubyte : Extend value into EAX and return
01377 ///   ret short, ushort: Extend value into EAX and return
01378 ///   ret int, uint    : Move value into EAX and return
01379 ///   ret pointer      : Move value into EAX and return
01380 ///   ret long, ulong  : Move value into EAX/EDX and return
01381 ///   ret float/double : Top of FP stack
01382 ///
01383 void X86ISel::visitReturnInst(ReturnInst &I) {
01384   if (I.getNumOperands() == 0) {
01385     BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction
01386     return;
01387   }
01388 
01389   Value *RetVal = I.getOperand(0);
01390   switch (getClassB(RetVal->getType())) {
01391   case cByte:   // integral return values: extend or move into EAX and return
01392   case cShort:
01393   case cInt:
01394     promote32(X86::EAX, ValueRecord(RetVal));
01395     // Declare that EAX is live on exit
01396     BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::EAX).addReg(X86::ESP);
01397     break;
01398   case cFP: {                  // Floats & Doubles: Return in ST(0)
01399     unsigned RetReg = getReg(RetVal);
01400     BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg);
01401     // Declare that top-of-stack is live on exit
01402     BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP);
01403     break;
01404   }
01405   case cLong: {
01406     unsigned RetReg = getReg(RetVal);
01407     BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(RetReg);
01408     BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RetReg+1);
01409     // Declare that EAX & EDX are live on exit
01410     BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX)
01411       .addReg(X86::ESP);
01412     break;
01413   }
01414   default:
01415     visitInstruction(I);
01416   }
01417   // Emit a 'ret' instruction
01418   BuildMI(BB, X86::RET, 0);
01419 }
01420 
01421 // getBlockAfter - Return the basic block which occurs lexically after the
01422 // specified one.
01423 static inline BasicBlock *getBlockAfter(BasicBlock *BB) {
01424   Function::iterator I = BB; ++I;  // Get iterator to next block
01425   return I != BB->getParent()->end() ? &*I : 0;
01426 }
01427 
01428 /// visitBranchInst - Handle conditional and unconditional branches here.  Note
01429 /// that since code layout is frozen at this point, that if we are trying to
01430 /// jump to a block that is the immediate successor of the current block, we can
01431 /// just make a fall-through (but we don't currently).
01432 ///
01433 void X86ISel::visitBranchInst(BranchInst &BI) {
01434   // Update machine-CFG edges
01435   BB->addSuccessor (MBBMap[BI.getSuccessor(0)]);
01436   if (BI.isConditional())
01437     BB->addSuccessor (MBBMap[BI.getSuccessor(1)]);
01438 
01439   BasicBlock *NextBB = getBlockAfter(BI.getParent());  // BB after current one
01440 
01441   if (!BI.isConditional()) {  // Unconditional branch?
01442     if (BI.getSuccessor(0) != NextBB)
01443       BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(0)]);
01444     return;
01445   }
01446 
01447   // See if we can fold the setcc into the branch itself...
01448   SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(BI.getCondition());
01449   if (SCI == 0) {
01450     // Nope, cannot fold setcc into this branch.  Emit a branch on a condition
01451     // computed some other way...
01452     unsigned condReg = getReg(BI.getCondition());
01453     BuildMI(BB, X86::TEST8rr, 2).addReg(condReg).addReg(condReg);
01454     if (BI.getSuccessor(1) == NextBB) {
01455       if (BI.getSuccessor(0) != NextBB)
01456         BuildMI(BB, X86::JNE, 1).addMBB(MBBMap[BI.getSuccessor(0)]);
01457     } else {
01458       BuildMI(BB, X86::JE, 1).addMBB(MBBMap[BI.getSuccessor(1)]);
01459       
01460       if (BI.getSuccessor(0) != NextBB)
01461         BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(0)]);
01462     }
01463     return;
01464   }
01465 
01466   unsigned OpNum = getSetCCNumber(SCI->getOpcode());
01467   MachineBasicBlock::iterator MII = BB->end();
01468   OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), BB,MII);
01469 
01470   const Type *CompTy = SCI->getOperand(0)->getType();
01471   bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP;
01472   
01473 
01474   // LLVM  -> X86 signed  X86 unsigned
01475   // -----    ----------  ------------
01476   // seteq -> je          je
01477   // setne -> jne         jne
01478   // setlt -> jl          jb
01479   // setge -> jge         jae
01480   // setgt -> jg          ja
01481   // setle -> jle         jbe
01482   // ----
01483   //          js                  // Used by comparison with 0 optimization
01484   //          jns
01485 
01486   static const unsigned OpcodeTab[2][8] = {
01487     { X86::JE, X86::JNE, X86::JB, X86::JAE, X86::JA, X86::JBE, 0, 0 },
01488     { X86::JE, X86::JNE, X86::JL, X86::JGE, X86::JG, X86::JLE,
01489       X86::JS, X86::JNS },
01490   };
01491   
01492   if (BI.getSuccessor(0) != NextBB) {
01493     BuildMI(BB, OpcodeTab[isSigned][OpNum], 1)
01494       .addMBB(MBBMap[BI.getSuccessor(0)]);
01495     if (BI.getSuccessor(1) != NextBB)
01496       BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(1)]);
01497   } else {
01498     // Change to the inverse condition...
01499     if (BI.getSuccessor(1) != NextBB) {
01500       OpNum ^= 1;
01501       BuildMI(BB, OpcodeTab[isSigned][OpNum], 1)
01502         .addMBB(MBBMap[BI.getSuccessor(1)]);
01503     }
01504   }
01505 }
01506 
01507 
01508 /// doCall - This emits an abstract call instruction, setting up the arguments
01509 /// and the return value as appropriate.  For the actual function call itself,
01510 /// it inserts the specified CallMI instruction into the stream.
01511 ///
01512 void X86ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI,
01513                      const std::vector<ValueRecord> &Args) {
01514   // Count how many bytes are to be pushed on the stack...
01515   unsigned NumBytes = 0;
01516 
01517   if (!Args.empty()) {
01518     for (unsigned i = 0, e = Args.size(); i != e; ++i)
01519       switch (getClassB(Args[i].Ty)) {
01520       case cByte: case cShort: case cInt:
01521         NumBytes += 4; break;
01522       case cLong:
01523         NumBytes += 8; break;
01524       case cFP:
01525         NumBytes += Args[i].Ty == Type::FloatTy ? 4 : 8;
01526         break;
01527       default: assert(0 && "Unknown class!");
01528       }
01529 
01530     // Adjust the stack pointer for the new arguments...
01531     BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(NumBytes);
01532 
01533     // Arguments go on the stack in reverse order, as specified by the ABI.
01534     unsigned ArgOffset = 0;
01535     for (unsigned i = 0, e = Args.size(); i != e; ++i) {
01536       unsigned ArgReg;
01537       switch (getClassB(Args[i].Ty)) {
01538       case cByte:
01539         if (Args[i].Val && isa<ConstantBool>(Args[i].Val)) {
01540           addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset)
01541             .addImm(Args[i].Val == ConstantBool::True);
01542           break;
01543         }
01544         // FALL THROUGH
01545       case cShort:
01546         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
01547           // Zero/Sign extend constant, then stuff into memory.
01548           ConstantInt *Val = cast<ConstantInt>(Args[i].Val);
01549           Val = cast<ConstantInt>(ConstantExpr::getCast(Val, Type::IntTy));
01550           addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset)
01551             .addImm(Val->getRawValue() & 0xFFFFFFFF);
01552         } else {
01553           // Promote arg to 32 bits wide into a temporary register...
01554           ArgReg = makeAnotherReg(Type::UIntTy);
01555           promote32(ArgReg, Args[i]);
01556           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
01557                        X86::ESP, ArgOffset).addReg(ArgReg);
01558         }
01559         break;
01560       case cInt:
01561         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
01562           unsigned Val = cast<ConstantInt>(Args[i].Val)->getRawValue();
01563           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
01564                        X86::ESP, ArgOffset).addImm(Val);
01565         } else if (Args[i].Val && isa<ConstantPointerNull>(Args[i].Val)) {
01566           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
01567                        X86::ESP, ArgOffset).addImm(0);
01568         } else {
01569           ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
01570           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
01571                        X86::ESP, ArgOffset).addReg(ArgReg);
01572         }
01573         break;
01574       case cLong:
01575         if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) {
01576           uint64_t Val = cast<ConstantInt>(Args[i].Val)->getRawValue();
01577           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
01578                        X86::ESP, ArgOffset).addImm(Val & ~0U);
01579           addRegOffset(BuildMI(BB, X86::MOV32mi, 5),
01580                        X86::ESP, ArgOffset+4).addImm(Val >> 32ULL);
01581         } else {
01582           ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
01583           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
01584                        X86::ESP, ArgOffset).addReg(ArgReg);
01585           addRegOffset(BuildMI(BB, X86::MOV32mr, 5),
01586                        X86::ESP, ArgOffset+4).addReg(ArgReg+1);
01587         }
01588         ArgOffset += 4;        // 8 byte entry, not 4.
01589         break;
01590         
01591       case cFP:
01592         ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg;
01593         if (Args[i].Ty == Type::FloatTy) {
01594           addRegOffset(BuildMI(BB, X86::FST32m, 5),
01595                        X86::ESP, ArgOffset).addReg(ArgReg);
01596         } else {
01597           assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!");
01598           addRegOffset(BuildMI(BB, X86::FST64m, 5),
01599                        X86::ESP, ArgOffset).addReg(ArgReg);
01600           ArgOffset += 4;       // 8 byte entry, not 4.
01601         }
01602         break;
01603 
01604       default: assert(0 && "Unknown class!");
01605       }
01606       ArgOffset += 4;
01607     }
01608   } else {
01609     BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(0);
01610   }
01611 
01612   BB->push_back(CallMI);
01613 
01614   BuildMI(BB, X86::ADJCALLSTACKUP, 1).addImm(NumBytes);
01615 
01616   // If there is a return value, scavenge the result from the location the call
01617   // leaves it in...
01618   //
01619   if (Ret.Ty != Type::VoidTy) {
01620     unsigned DestClass = getClassB(Ret.Ty);
01621     switch (DestClass) {
01622     case cByte:
01623     case cShort:
01624     case cInt: {
01625       // Integral results are in %eax, or the appropriate portion
01626       // thereof.
01627       static const unsigned regRegMove[] = {
01628         X86::MOV8rr, X86::MOV16rr, X86::MOV32rr
01629       };
01630       static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX };
01631       BuildMI(BB, regRegMove[DestClass], 1, Ret.Reg).addReg(AReg[DestClass]);
01632       break;
01633     }
01634     case cFP:     // Floating-point return values live in %ST(0)
01635       BuildMI(BB, X86::FpGETRESULT, 1, Ret.Reg);
01636       break;
01637     case cLong:   // Long values are left in EDX:EAX
01638       BuildMI(BB, X86::MOV32rr, 1, Ret.Reg).addReg(X86::EAX);
01639       BuildMI(BB, X86::MOV32rr, 1, Ret.Reg+1).addReg(X86::EDX);
01640       break;
01641     default: assert(0 && "Unknown class!");
01642     }
01643   }
01644 }
01645 
01646 
01647 /// visitCallInst - Push args on stack and do a procedure call instruction.
01648 void X86ISel::visitCallInst(CallInst &CI) {
01649   MachineInstr *TheCall;
01650   if (Function *F = CI.getCalledFunction()) {
01651     // Is it an intrinsic function call?
01652     if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) {
01653       visitIntrinsicCall(ID, CI);   // Special intrinsics are not handled here
01654       return;
01655     }
01656 
01657     // Emit a CALL instruction with PC-relative displacement.
01658     TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true);
01659   } else {  // Emit an indirect call...
01660     unsigned Reg = getReg(CI.getCalledValue());
01661     TheCall = BuildMI(X86::CALL32r, 1).addReg(Reg);
01662   }
01663 
01664   std::vector<ValueRecord> Args;
01665   for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
01666     Args.push_back(ValueRecord(CI.getOperand(i)));
01667 
01668   unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0;
01669   doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args);
01670 }         
01671 
01672 /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the
01673 /// function, lowering any calls to unknown intrinsic functions into the
01674 /// equivalent LLVM code.
01675 ///
01676 void X86ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) {
01677   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
01678     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
01679       if (CallInst *CI = dyn_cast<CallInst>(I++))
01680         if (Function *F = CI->getCalledFunction())
01681           switch (F->getIntrinsicID()) {
01682           case Intrinsic::not_intrinsic:
01683           case Intrinsic::vastart:
01684           case Intrinsic::vacopy:
01685           case Intrinsic::vaend:
01686           case Intrinsic::returnaddress:
01687           case Intrinsic::frameaddress:
01688           case Intrinsic::memcpy:
01689           case Intrinsic::memset:
01690           case Intrinsic::isunordered:
01691           case Intrinsic::readport:
01692           case Intrinsic::writeport:
01693             // We directly implement these intrinsics
01694             break;
01695           case Intrinsic::readio: {
01696             // On X86, memory operations are in-order.  Lower this intrinsic
01697             // into a volatile load.
01698             Instruction *Before = CI->getPrev();
01699             LoadInst * LI = new LoadInst(CI->getOperand(1), "", true, CI);
01700             CI->replaceAllUsesWith(LI);
01701             BB->getInstList().erase(CI);
01702             break;
01703           }
01704           case Intrinsic::writeio: {
01705             // On X86, memory operations are in-order.  Lower this intrinsic
01706             // into a volatile store.
01707             Instruction *Before = CI->getPrev();
01708             StoreInst *LI = new StoreInst(CI->getOperand(1),
01709                                           CI->getOperand(2), true, CI);
01710             CI->replaceAllUsesWith(LI);
01711             BB->getInstList().erase(CI);
01712             break;
01713           }
01714           default:
01715             // All other intrinsic calls we must lower.
01716             Instruction *Before = CI->getPrev();
01717             TM.getIntrinsicLowering().LowerIntrinsicCall(CI);
01718             if (Before) {        // Move iterator to instruction after call
01719               I = Before; ++I;
01720             } else {
01721               I = BB->begin();
01722             }
01723           }
01724 }
01725 
01726 void X86ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) {
01727   unsigned TmpReg1, TmpReg2;
01728   switch (ID) {
01729   case Intrinsic::vastart:
01730     // Get the address of the first vararg value...
01731     TmpReg1 = getReg(CI);
01732     addFrameReference(BuildMI(BB, X86::LEA32r, 5, TmpReg1), VarArgsFrameIndex);
01733     return;
01734 
01735   case Intrinsic::vacopy:
01736     TmpReg1 = getReg(CI);
01737     TmpReg2 = getReg(CI.getOperand(1));
01738     BuildMI(BB, X86::MOV32rr, 1, TmpReg1).addReg(TmpReg2);
01739     return;
01740   case Intrinsic::vaend: return;   // Noop on X86
01741 
01742   case Intrinsic::returnaddress:
01743   case Intrinsic::frameaddress:
01744     TmpReg1 = getReg(CI);
01745     if (cast<Constant>(CI.getOperand(1))->isNullValue()) {
01746       if (ID == Intrinsic::returnaddress) {
01747         // Just load the return address
01748         addFrameReference(BuildMI(BB, X86::MOV32rm, 4, TmpReg1),
01749                           ReturnAddressIndex);
01750       } else {
01751         addFrameReference(BuildMI(BB, X86::LEA32r, 4, TmpReg1),
01752                           ReturnAddressIndex, -4);
01753       }
01754     } else {
01755       // Values other than zero are not implemented yet.
01756       BuildMI(BB, X86::MOV32ri, 1, TmpReg1).addImm(0);
01757     }
01758     return;
01759 
01760   case Intrinsic::isunordered:
01761     TmpReg1 = getReg(CI.getOperand(1));
01762     TmpReg2 = getReg(CI.getOperand(2));
01763     emitUCOMr(BB, BB->end(), TmpReg2, TmpReg1);
01764     TmpReg2 = getReg(CI);
01765     BuildMI(BB, X86::SETPr, 0, TmpReg2);
01766     return;
01767 
01768   case Intrinsic::memcpy: {
01769     assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!");
01770     unsigned Align = 1;
01771     if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
01772       Align = AlignC->getRawValue();
01773       if (Align == 0) Align = 1;
01774     }
01775 
01776     // Turn the byte code into # iterations
01777     unsigned CountReg;
01778     unsigned Opcode;
01779     switch (Align & 3) {
01780     case 2:   // WORD aligned
01781       if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
01782         CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2));
01783       } else {
01784         CountReg = makeAnotherReg(Type::IntTy);
01785         unsigned ByteReg = getReg(CI.getOperand(3));
01786         BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
01787       }
01788       Opcode = X86::REP_MOVSW;
01789       break;
01790     case 0:   // DWORD aligned
01791       if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
01792         CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4));
01793       } else {
01794         CountReg = makeAnotherReg(Type::IntTy);
01795         unsigned ByteReg = getReg(CI.getOperand(3));
01796         BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
01797       }
01798       Opcode = X86::REP_MOVSD;
01799       break;
01800     default:  // BYTE aligned
01801       CountReg = getReg(CI.getOperand(3));
01802       Opcode = X86::REP_MOVSB;
01803       break;
01804     }
01805 
01806     // No matter what the alignment is, we put the source in ESI, the
01807     // destination in EDI, and the count in ECX.
01808     TmpReg1 = getReg(CI.getOperand(1));
01809     TmpReg2 = getReg(CI.getOperand(2));
01810     BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
01811     BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
01812     BuildMI(BB, X86::MOV32rr, 1, X86::ESI).addReg(TmpReg2);
01813     BuildMI(BB, Opcode, 0);
01814     return;
01815   }
01816   case Intrinsic::memset: {
01817     assert(CI.getNumOperands() == 5 && "Illegal llvm.memset call!");
01818     unsigned Align = 1;
01819     if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) {
01820       Align = AlignC->getRawValue();
01821       if (Align == 0) Align = 1;
01822     }
01823 
01824     // Turn the byte code into # iterations
01825     unsigned CountReg;
01826     unsigned Opcode;
01827     if (ConstantInt *ValC = dyn_cast<ConstantInt>(CI.getOperand(2))) {
01828       unsigned Val = ValC->getRawValue() & 255;
01829 
01830       // If the value is a constant, then we can potentially use larger copies.
01831       switch (Align & 3) {
01832       case 2:   // WORD aligned
01833         if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
01834           CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2));
01835         } else {
01836           CountReg = makeAnotherReg(Type::IntTy);
01837           unsigned ByteReg = getReg(CI.getOperand(3));
01838           BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1);
01839         }
01840         BuildMI(BB, X86::MOV16ri, 1, X86::AX).addImm((Val << 8) | Val);
01841         Opcode = X86::REP_STOSW;
01842         break;
01843       case 0:   // DWORD aligned
01844         if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) {
01845           CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4));
01846         } else {
01847           CountReg = makeAnotherReg(Type::IntTy);
01848           unsigned ByteReg = getReg(CI.getOperand(3));
01849           BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2);
01850         }
01851         Val = (Val << 8) | Val;
01852         BuildMI(BB, X86::MOV32ri, 1, X86::EAX).addImm((Val << 16) | Val);
01853         Opcode = X86::REP_STOSD;
01854         break;
01855       default:  // BYTE aligned
01856         CountReg = getReg(CI.getOperand(3));
01857         BuildMI(BB, X86::MOV8ri, 1, X86::AL).addImm(Val);
01858         Opcode = X86::REP_STOSB;
01859         break;
01860       }
01861     } else {
01862       // If it's not a constant value we are storing, just fall back.  We could
01863       // try to be clever to form 16 bit and 32 bit values, but we don't yet.
01864       unsigned ValReg = getReg(CI.getOperand(2));
01865       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
01866       CountReg = getReg(CI.getOperand(3));
01867       Opcode = X86::REP_STOSB;
01868     }
01869 
01870     // No matter what the alignment is, we put the source in ESI, the
01871     // destination in EDI, and the count in ECX.
01872     TmpReg1 = getReg(CI.getOperand(1));
01873     //TmpReg2 = getReg(CI.getOperand(2));
01874     BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
01875     BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
01876     BuildMI(BB, Opcode, 0);
01877     return;
01878   }
01879 
01880   case Intrinsic::readport: {
01881     // First, determine that the size of the operand falls within the acceptable
01882     // range for this architecture.
01883     //
01884     if (getClassB(CI.getOperand(1)->getType()) != cShort) {
01885       std::cerr << "llvm.readport: Address size is not 16 bits\n";
01886       exit(1);
01887     }
01888 
01889     // Now, move the I/O port address into the DX register and use the IN
01890     // instruction to get the input data.
01891     //
01892     unsigned Class = getClass(CI.getCalledFunction()->getReturnType());
01893     unsigned DestReg = getReg(CI);
01894 
01895     // If the port is a single-byte constant, use the immediate form.
01896     if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(1)))
01897       if ((C->getRawValue() & 255) == C->getRawValue()) {
01898         switch (Class) {
01899         case cByte:
01900           BuildMI(BB, X86::IN8ri, 1).addImm((unsigned char)C->getRawValue());
01901           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
01902           return;
01903         case cShort:
01904           BuildMI(BB, X86::IN16ri, 1).addImm((unsigned char)C->getRawValue());
01905           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
01906           return;
01907         case cInt:
01908           BuildMI(BB, X86::IN32ri, 1).addImm((unsigned char)C->getRawValue());
01909           BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
01910           return;
01911         }
01912       }
01913 
01914     unsigned Reg = getReg(CI.getOperand(1));
01915     BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
01916     switch (Class) {
01917     case cByte:
01918       BuildMI(BB, X86::IN8rr, 0);
01919       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
01920       break;
01921     case cShort:
01922       BuildMI(BB, X86::IN16rr, 0);
01923       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX);
01924       break;
01925     case cInt:
01926       BuildMI(BB, X86::IN32rr, 0);
01927       BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX);
01928       break;
01929     default:
01930       std::cerr << "Cannot do input on this data type";
01931       exit (1);
01932     }
01933     return;
01934   }
01935 
01936   case Intrinsic::writeport: {
01937     // First, determine that the size of the operand falls within the
01938     // acceptable range for this architecture.
01939     if (getClass(CI.getOperand(2)->getType()) != cShort) {
01940       std::cerr << "llvm.writeport: Address size is not 16 bits\n";
01941       exit(1);
01942     }
01943 
01944     unsigned Class = getClassB(CI.getOperand(1)->getType());
01945     unsigned ValReg = getReg(CI.getOperand(1));
01946     switch (Class) {
01947     case cByte:
01948       BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg);
01949       break;
01950     case cShort:
01951       BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(ValReg);
01952       break;
01953     case cInt:
01954       BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(ValReg);
01955       break;
01956     default:
01957       std::cerr << "llvm.writeport: invalid data type for X86 target";
01958       exit(1);
01959     }
01960 
01961 
01962     // If the port is a single-byte constant, use the immediate form.
01963     if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(2)))
01964       if ((C->getRawValue() & 255) == C->getRawValue()) {
01965         static const unsigned O[] = { X86::OUT8ir, X86::OUT16ir, X86::OUT32ir };
01966         BuildMI(BB, O[Class], 1).addImm((unsigned char)C->getRawValue());
01967         return;
01968       }
01969 
01970     // Otherwise, move the I/O port address into the DX register and the value
01971     // to write into the AL/AX/EAX register.
01972     static const unsigned Opc[] = { X86::OUT8rr, X86::OUT16rr, X86::OUT32rr };
01973     unsigned Reg = getReg(CI.getOperand(2));
01974     BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg);
01975     BuildMI(BB, Opc[Class], 0);
01976     return;
01977   }
01978     
01979   default: assert(0 && "Error: unknown intrinsics should have been lowered!");
01980   }
01981 }
01982 
01983 static bool isSafeToFoldLoadIntoInstruction(LoadInst &LI, Instruction &User) {
01984   if (LI.getParent() != User.getParent())
01985     return false;
01986   BasicBlock::iterator It = &LI;
01987   // Check all of the instructions between the load and the user.  We should
01988   // really use alias analysis here, but for now we just do something simple.
01989   for (++It; It != BasicBlock::iterator(&User); ++It) {
01990     switch (It->getOpcode()) {
01991     case Instruction::Free:
01992     case Instruction::Store:
01993     case Instruction::Call:
01994     case Instruction::Invoke:
01995       return false;
01996     case Instruction::Load:
01997       if (cast<LoadInst>(It)->isVolatile() && LI.isVolatile())
01998         return false;
01999       break;
02000     }
02001   }
02002   return true;
02003 }
02004 
02005 /// visitSimpleBinary - Implement simple binary operators for integral types...
02006 /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for
02007 /// Xor.
02008 ///
02009 void X86ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) {
02010   unsigned DestReg = getReg(B);
02011   MachineBasicBlock::iterator MI = BB->end();
02012   Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1);
02013   unsigned Class = getClassB(B.getType());
02014 
02015   // If this is AND X, C, and it is only used by a setcc instruction, it will
02016   // be folded.  There is no need to emit this instruction.
02017   if (B.hasOneUse() && OperatorClass == 2 && isa<ConstantInt>(Op1))
02018     if (Class == cByte || Class == cShort || Class == cInt) {
02019       Instruction *Use = cast<Instruction>(B.use_back());
02020       if (isa<SetCondInst>(Use) &&
02021           Use->getOperand(1) == Constant::getNullValue(B.getType())) {
02022         switch (getSetCCNumber(Use->getOpcode())) {
02023         case 0:
02024         case 1:
02025           return;
02026         default:
02027           if (B.getType()->isSigned()) return;
02028         }
02029       }
02030     }
02031 
02032   // Special case: op Reg, load [mem]
02033   if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1) && Class != cLong &&
02034       Op0->hasOneUse() && 
02035       isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B))
02036     if (!B.swapOperands())
02037       std::swap(Op0, Op1);  // Make sure any loads are in the RHS.
02038 
02039   if (isa<LoadInst>(Op1) && Class != cLong && Op1->hasOneUse() &&
02040       isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op1), B)) {
02041 
02042     unsigned Opcode;
02043     if (Class != cFP) {
02044       static const unsigned OpcodeTab[][3] = {
02045         // Arithmetic operators
02046         { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm },  // ADD
02047         { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm },  // SUB
02048         
02049         // Bitwise operators
02050         { X86::AND8rm, X86::AND16rm, X86::AND32rm },  // AND
02051         { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm },  // OR
02052         { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm },  // XOR
02053       };
02054       Opcode = OpcodeTab[OperatorClass][Class];
02055     } else {
02056       static const unsigned OpcodeTab[][2] = {
02057         { X86::FADD32m, X86::FADD64m },  // ADD
02058         { X86::FSUB32m, X86::FSUB64m },  // SUB
02059       };
02060       const Type *Ty = Op0->getType();
02061       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
02062       Opcode = OpcodeTab[OperatorClass][Ty == Type::DoubleTy];
02063     }
02064 
02065     unsigned Op0r = getReg(Op0);
02066     if (AllocaInst *AI =
02067         dyn_castFixedAlloca(cast<LoadInst>(Op1)->getOperand(0))) {
02068       unsigned FI = getFixedSizedAllocaFI(AI);
02069       addFrameReference(BuildMI(BB, Opcode, 5, DestReg).addReg(Op0r), FI);
02070 
02071     } else {
02072       X86AddressMode AM;
02073       getAddressingMode(cast<LoadInst>(Op1)->getOperand(0), AM);
02074       
02075       addFullAddress(BuildMI(BB, Opcode, 5, DestReg).addReg(Op0r), AM);
02076     }
02077     return;
02078   }
02079 
02080   // If this is a floating point subtract, check to see if we can fold the first
02081   // operand in.
02082   if (Class == cFP && OperatorClass == 1 &&
02083       isa<LoadInst>(Op0) && 
02084       isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B)) {
02085     const Type *Ty = Op0->getType();
02086     assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
02087     unsigned Opcode = Ty == Type::FloatTy ? X86::FSUBR32m : X86::FSUBR64m;
02088 
02089     unsigned Op1r = getReg(Op1);
02090     if (AllocaInst *AI =
02091         dyn_castFixedAlloca(cast<LoadInst>(Op0)->getOperand(0))) {
02092       unsigned FI = getFixedSizedAllocaFI(AI);
02093       addFrameReference(BuildMI(BB, Opcode, 5, DestReg).addReg(Op1r), FI);
02094     } else {
02095       X86AddressMode AM;
02096       getAddressingMode(cast<LoadInst>(Op0)->getOperand(0), AM);
02097       
02098       addFullAddress(BuildMI(BB, Opcode, 5, DestReg).addReg(Op1r), AM);
02099     }
02100     return;
02101   }
02102 
02103   emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg);
02104 }
02105 
02106 
02107 /// emitBinaryFPOperation - This method handles emission of floating point
02108 /// Add (0), Sub (1), Mul (2), and Div (3) operations.
02109 void X86ISel::emitBinaryFPOperation(MachineBasicBlock *BB,
02110                                     MachineBasicBlock::iterator IP,
02111                                     Value *Op0, Value *Op1,
02112                                     unsigned OperatorClass, unsigned DestReg) {
02113   // Special case: op Reg, <const fp>
02114   if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1))
02115     if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) {
02116       // Create a constant pool entry for this constant.
02117       MachineConstantPool *CP = F->getConstantPool();
02118       unsigned CPI = CP->getConstantPoolIndex(Op1C);
02119       const Type *Ty = Op1->getType();
02120 
02121       static const unsigned OpcodeTab[][4] = {
02122         { X86::FADD32m, X86::FSUB32m, X86::FMUL32m, X86::FDIV32m },   // Float
02123         { X86::FADD64m, X86::FSUB64m, X86::FMUL64m, X86::FDIV64m },   // Double
02124       };
02125 
02126       assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!");
02127       unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
02128       unsigned Op0r = getReg(Op0, BB, IP);
02129       addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
02130                                        DestReg).addReg(Op0r), CPI);
02131       return;
02132     }
02133   
02134   // Special case: R1 = op <const fp>, R2
02135   if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0))
02136     if (CFP->isExactlyValue(-0.0) && OperatorClass == 1) {
02137       // -0.0 - X === -X
02138       unsigned op1Reg = getReg(Op1, BB, IP);
02139       BuildMI(*BB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg);
02140       return;
02141     } else if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) {
02142       // R1 = op CST, R2  -->  R1 = opr R2, CST
02143 
02144       // Create a constant pool entry for this constant.
02145       MachineConstantPool *CP = F->getConstantPool();
02146       unsigned CPI = CP->getConstantPoolIndex(CFP);
02147       const Type *Ty = CFP->getType();
02148 
02149       static const unsigned OpcodeTab[][4] = {
02150         { X86::FADD32m, X86::FSUBR32m, X86::FMUL32m, X86::FDIVR32m }, // Float
02151         { X86::FADD64m, X86::FSUBR64m, X86::FMUL64m, X86::FDIVR64m }, // Double
02152       };
02153       
02154       assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
02155       unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass];
02156       unsigned Op1r = getReg(Op1, BB, IP);
02157       addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5,
02158                                        DestReg).addReg(Op1r), CPI);
02159       return;
02160     }
02161 
02162   // General case.
02163   static const unsigned OpcodeTab[4] = {
02164     X86::FpADD, X86::FpSUB, X86::FpMUL, X86::FpDIV
02165   };
02166 
02167   unsigned Opcode = OpcodeTab[OperatorClass];
02168   unsigned Op0r = getReg(Op0, BB, IP);
02169   unsigned Op1r = getReg(Op1, BB, IP);
02170   BuildMI(*BB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
02171 }
02172 
02173 /// emitSimpleBinaryOperation - Implement simple binary operators for integral
02174 /// types...  OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for
02175 /// Or, 4 for Xor.
02176 ///
02177 /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary
02178 /// and constant expression support.
02179 ///
02180 void X86ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB,
02181                                         MachineBasicBlock::iterator IP,
02182                                         Value *Op0, Value *Op1,
02183                                         unsigned OperatorClass, 
02184                                         unsigned DestReg) {
02185   unsigned Class = getClassB(Op0->getType());
02186 
02187   if (Class == cFP) {
02188     assert(OperatorClass < 2 && "No logical ops for FP!");
02189     emitBinaryFPOperation(MBB, IP, Op0, Op1, OperatorClass, DestReg);
02190     return;
02191   }
02192 
02193   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0))
02194     if (OperatorClass == 1) {
02195       static unsigned const NEGTab[] = {
02196         X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r
02197       };
02198 
02199       // sub 0, X -> neg X
02200       if (CI->isNullValue()) {
02201         unsigned op1Reg = getReg(Op1, MBB, IP);
02202         BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg);
02203       
02204         if (Class == cLong) {
02205           // We just emitted: Dl = neg Sl
02206           // Now emit       : T  = addc Sh, 0
02207           //                : Dh = neg T
02208           unsigned T = makeAnotherReg(Type::IntTy);
02209           BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0);
02210           BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T);
02211         }
02212         return;
02213       } else if (Op1->hasOneUse() && Class != cLong) {
02214         // sub C, X -> tmp = neg X; DestReg = add tmp, C.  This is better
02215         // than copying C into a temporary register, because of register
02216         // pressure (tmp and destreg can share a register.
02217         static unsigned const ADDRITab[] = { 
02218           X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri
02219         };
02220         unsigned op1Reg = getReg(Op1, MBB, IP);
02221         unsigned Tmp = makeAnotherReg(Op0->getType());
02222         BuildMI(*MBB, IP, NEGTab[Class], 1, Tmp).addReg(op1Reg);
02223         BuildMI(*MBB, IP, ADDRITab[Class], 2,
02224                 DestReg).addReg(Tmp).addImm(CI->getRawValue());
02225         return;
02226       }
02227     }
02228 
02229   // Special case: op Reg, <const int>
02230   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
02231     unsigned Op0r = getReg(Op0, MBB, IP);
02232 
02233     // xor X, -1 -> not X
02234     if (OperatorClass == 4 && Op1C->isAllOnesValue()) {
02235       static unsigned const NOTTab[] = {
02236         X86::NOT8r, X86::NOT16r, X86::NOT32r, 0, X86::NOT32r
02237       };
02238       BuildMI(*MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r);
02239       if (Class == cLong)  // Invert the top part too
02240         BuildMI(*MBB, IP, X86::NOT32r, 1, DestReg+1).addReg(Op0r+1);
02241       return;
02242     }
02243 
02244     // add X, -1 -> dec X
02245     if (OperatorClass == 0 && Op1C->isAllOnesValue() && Class != cLong) {
02246       // Note that we can't use dec for 64-bit decrements, because it does not
02247       // set the carry flag!
02248       static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r };
02249       BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r);
02250       return;
02251     }
02252 
02253     // add X, 1 -> inc X
02254     if (OperatorClass == 0 && Op1C->equalsInt(1) && Class != cLong) {
02255       // Note that we can't use inc for 64-bit increments, because it does not
02256       // set the carry flag!
02257       static unsigned const INCTab[] = { X86::INC8r, X86::INC16r, X86::INC32r };
02258       BuildMI(*MBB, IP, INCTab[Class], 1, DestReg).addReg(Op0r);
02259       return;
02260     }
02261   
02262     static const unsigned OpcodeTab[][5] = {
02263       // Arithmetic operators
02264       { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri },  // ADD
02265       { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri, 0, X86::SUB32ri },  // SUB
02266     
02267       // Bitwise operators
02268       { X86::AND8ri, X86::AND16ri, X86::AND32ri, 0, X86::AND32ri },  // AND
02269       { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri, 0, X86::OR32ri  },  // OR
02270       { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri, 0, X86::XOR32ri },  // XOR
02271     };
02272   
02273     unsigned Opcode = OpcodeTab[OperatorClass][Class];
02274     unsigned Op1l = cast<ConstantInt>(Op1C)->getRawValue();
02275 
02276     if (Class != cLong) {
02277       BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
02278       return;
02279     }
02280     
02281     // If this is a long value and the high or low bits have a special
02282     // property, emit some special cases.
02283     unsigned Op1h = cast<ConstantInt>(Op1C)->getRawValue() >> 32LL;
02284     
02285     // If the constant is zero in the low 32-bits, just copy the low part
02286     // across and apply the normal 32-bit operation to the high parts.  There
02287     // will be no carry or borrow into the top.
02288     if (Op1l == 0) {
02289       if (OperatorClass != 2) // All but and...
02290         BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r);
02291       else
02292         BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
02293       BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1)
02294         .addReg(Op0r+1).addImm(Op1h);
02295       return;
02296     }
02297     
02298     // If this is a logical operation and the top 32-bits are zero, just
02299     // operate on the lower 32.
02300     if (Op1h == 0 && OperatorClass > 1) {
02301       BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg)
02302         .addReg(Op0r).addImm(Op1l);
02303       if (OperatorClass != 2)  // All but and
02304         BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1);
02305       else
02306         BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
02307       return;
02308     }
02309     
02310     // TODO: We could handle lots of other special cases here, such as AND'ing
02311     // with 0xFFFFFFFF00000000 -> noop, etc.
02312     
02313     // Otherwise, code generate the full operation with a constant.
02314     static const unsigned TopTab[] = {
02315       X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri
02316     };
02317     
02318     BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l);
02319     BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1)
02320       .addReg(Op0r+1).addImm(Op1h);
02321     return;
02322   }
02323 
02324   // Finally, handle the general case now.
02325   static const unsigned OpcodeTab[][5] = {
02326     // Arithmetic operators
02327     { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, 0, X86::ADD32rr },  // ADD
02328     { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, 0, X86::SUB32rr },  // SUB
02329       
02330     // Bitwise operators
02331     { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr },  // AND
02332     { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0, X86:: OR32rr },  // OR
02333     { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0, X86::XOR32rr },  // XOR
02334   };
02335     
02336   unsigned Opcode = OpcodeTab[OperatorClass][Class];
02337   unsigned Op0r = getReg(Op0, MBB, IP);
02338   unsigned Op1r = getReg(Op1, MBB, IP);
02339   BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r);
02340     
02341   if (Class == cLong) {        // Handle the upper 32 bits of long values...
02342     static const unsigned TopTab[] = {
02343       X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr
02344     };
02345     BuildMI(*MBB, IP, TopTab[OperatorClass], 2,
02346             DestReg+1).addReg(Op0r+1).addReg(Op1r+1);
02347   }
02348 }
02349 
02350 /// doMultiply - Emit appropriate instructions to multiply together the
02351 /// registers op0Reg and op1Reg, and put the result in DestReg.  The type of the
02352 /// result should be given as DestTy.
02353 ///
02354 void X86ISel::doMultiply(MachineBasicBlock *MBB,
02355                          MachineBasicBlock::iterator MBBI,
02356                          unsigned DestReg, const Type *DestTy,
02357                          unsigned op0Reg, unsigned op1Reg) {
02358   unsigned Class = getClass(DestTy);
02359   switch (Class) {
02360   case cInt:
02361   case cShort:
02362     BuildMI(*MBB, MBBI, Class == cInt ? X86::IMUL32rr:X86::IMUL16rr, 2, DestReg)
02363       .addReg(op0Reg).addReg(op1Reg);
02364     return;
02365   case cByte:
02366     // Must use the MUL instruction, which forces use of AL...
02367     BuildMI(*MBB, MBBI, X86::MOV8rr, 1, X86::AL).addReg(op0Reg);
02368     BuildMI(*MBB, MBBI, X86::MUL8r, 1).addReg(op1Reg);
02369     BuildMI(*MBB, MBBI, X86::MOV8rr, 1, DestReg).addReg(X86::AL);
02370     return;
02371   default:
02372   case cLong: assert(0 && "doMultiply cannot operate on LONG values!");
02373   }
02374 }
02375 
02376 // ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N.  It
02377 // returns zero when the input is not exactly a power of two.
02378 static unsigned ExactLog2(unsigned Val) {
02379   if (Val == 0 || (Val & (Val-1))) return 0;
02380   unsigned Count = 0;
02381   while (Val != 1) {
02382     Val >>= 1;
02383     ++Count;
02384   }
02385   return Count+1;
02386 }
02387 
02388 
02389 /// doMultiplyConst - This function is specialized to efficiently codegen an 8,
02390 /// 16, or 32-bit integer multiply by a constant.
02391 void X86ISel::doMultiplyConst(MachineBasicBlock *MBB,
02392                               MachineBasicBlock::iterator IP,
02393                               unsigned DestReg, const Type *DestTy,
02394                               unsigned op0Reg, unsigned ConstRHS) {
02395   static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr};
02396   static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri};
02397   static const unsigned ADDrrTab[] = {X86::ADD8rr, X86::ADD16rr, X86::ADD32rr};
02398   static const unsigned NEGrTab[]  = {X86::NEG8r , X86::NEG16r , X86::NEG32r };
02399 
02400   unsigned Class = getClass(DestTy);
02401   unsigned TmpReg;
02402 
02403   // Handle special cases here.
02404   switch (ConstRHS) {
02405   case -2:
02406     TmpReg = makeAnotherReg(DestTy);
02407     BuildMI(*MBB, IP, NEGrTab[Class], 1, TmpReg).addReg(op0Reg);
02408     BuildMI(*MBB, IP, ADDrrTab[Class], 1,DestReg).addReg(TmpReg).addReg(TmpReg);
02409     return;
02410   case -1:
02411     BuildMI(*MBB, IP, NEGrTab[Class], 1, DestReg).addReg(op0Reg);
02412     return;
02413   case 0:
02414     BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0);
02415     return;
02416   case 1:
02417     BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg);
02418     return;
02419   case 2:
02420     BuildMI(*MBB, IP, ADDrrTab[Class], 1,DestReg).addReg(op0Reg).addReg(op0Reg);
02421     return;
02422   case 3:
02423   case 5:
02424   case 9:
02425     if (Class == cInt) {
02426       X86AddressMode AM;
02427       AM.BaseType = X86AddressMode::RegBase;
02428       AM.Base.Reg = op0Reg;
02429       AM.Scale = ConstRHS-1;
02430       AM.IndexReg = op0Reg;
02431       AM.Disp = 0;
02432       addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, DestReg), AM);
02433       return;
02434     }
02435   case -3:
02436   case -5:
02437   case -9:
02438     if (Class == cInt) {
02439       TmpReg = makeAnotherReg(DestTy);
02440       X86AddressMode AM;
02441       AM.BaseType = X86AddressMode::RegBase;
02442       AM.Base.Reg = op0Reg;
02443       AM.Scale = -ConstRHS-1;
02444       AM.IndexReg = op0Reg;
02445       AM.Disp = 0;
02446       addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, TmpReg), AM);
02447       BuildMI(*MBB, IP, NEGrTab[Class], 1, DestReg).addReg(TmpReg);
02448       return;
02449     }
02450   }
02451 
02452   // If the element size is exactly a power of 2, use a shift to get it.
02453   if (unsigned Shift = ExactLog2(ConstRHS)) {
02454     switch (Class) {
02455     default: assert(0 && "Unknown class for this function!");
02456     case cByte:
02457       BuildMI(*MBB, IP, X86::SHL8ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
02458       return;
02459     case cShort:
02460       BuildMI(*MBB, IP, X86::SHL16ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
02461       return;
02462     case cInt:
02463       BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1);
02464       return;
02465     }
02466   }
02467 
02468   // If the element size is a negative power of 2, use a shift/neg to get it.
02469   if (unsigned Shift = ExactLog2(-ConstRHS)) {
02470     TmpReg = makeAnotherReg(DestTy);
02471     BuildMI(*MBB, IP, NEGrTab[Class], 1, TmpReg).addReg(op0Reg);
02472     switch (Class) {
02473     default: assert(0 && "Unknown class for this function!");
02474     case cByte:
02475       BuildMI(*MBB, IP, X86::SHL8ri,2, DestReg).addReg(TmpReg).addImm(Shift-1);
02476       return;
02477     case cShort:
02478       BuildMI(*MBB, IP, X86::SHL16ri,2, DestReg).addReg(TmpReg).addImm(Shift-1);
02479       return;
02480     case cInt:
02481       BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(TmpReg).addImm(Shift-1);
02482       return;
02483     }
02484   }
02485   
02486   if (Class == cShort) {
02487     BuildMI(*MBB, IP, X86::IMUL16rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
02488     return;
02489   } else if (Class == cInt) {
02490     BuildMI(*MBB, IP, X86::IMUL32rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS);
02491     return;
02492   }
02493 
02494   // Most general case, emit a normal multiply...
02495   TmpReg = makeAnotherReg(DestTy);
02496   BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS);
02497   
02498   // Emit a MUL to multiply the register holding the index by
02499   // elementSize, putting the result in OffsetReg.
02500   doMultiply(MBB, IP, DestReg, DestTy, op0Reg, TmpReg);
02501 }
02502 
02503 /// visitMul - Multiplies are not simple binary operators because they must deal
02504 /// with the EAX register explicitly.
02505 ///
02506 void X86ISel::visitMul(BinaryOperator &I) {
02507   unsigned ResultReg = getReg(I);
02508 
02509   Value *Op0 = I.getOperand(0);
02510   Value *Op1 = I.getOperand(1);
02511 
02512   // Fold loads into floating point multiplies.
02513   if (getClass(Op0->getType()) == cFP) {
02514     if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1))
02515       if (!I.swapOperands())
02516         std::swap(Op0, Op1);  // Make sure any loads are in the RHS.
02517     if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
02518       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
02519         const Type *Ty = Op0->getType();
02520         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
02521         unsigned Opcode = Ty == Type::FloatTy ? X86::FMUL32m : X86::FMUL64m;
02522         
02523         unsigned Op0r = getReg(Op0);
02524         if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
02525           unsigned FI = getFixedSizedAllocaFI(AI);
02526           addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), FI);
02527         } else {
02528           X86AddressMode AM;
02529           getAddressingMode(LI->getOperand(0), AM);
02530           
02531           addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), AM);
02532         }
02533         return;
02534       }
02535   }
02536 
02537   MachineBasicBlock::iterator IP = BB->end();
02538   emitMultiply(BB, IP, Op0, Op1, ResultReg);
02539 }
02540 
02541 void X86ISel::emitMultiply(MachineBasicBlock *MBB, 
02542                            MachineBasicBlock::iterator IP,
02543                            Value *Op0, Value *Op1, unsigned DestReg) {
02544   MachineBasicBlock &BB = *MBB;
02545   TypeClass Class = getClass(Op0->getType());
02546 
02547   // Simple scalar multiply?
02548   unsigned Op0Reg  = getReg(Op0, &BB, IP);
02549   switch (Class) {
02550   case cByte:
02551   case cShort:
02552   case cInt:
02553     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
02554       unsigned Val = (unsigned)CI->getRawValue(); // Isn't a 64-bit constant
02555       doMultiplyConst(&BB, IP, DestReg, Op0->getType(), Op0Reg, Val);
02556     } else {
02557       unsigned Op1Reg  = getReg(Op1, &BB, IP);
02558       doMultiply(&BB, IP, DestReg, Op1->getType(), Op0Reg, Op1Reg);
02559     }
02560     return;
02561   case cFP:
02562     emitBinaryFPOperation(MBB, IP, Op0, Op1, 2, DestReg);
02563     return;
02564   case cLong:
02565     break;
02566   }
02567 
02568   // Long value.  We have to do things the hard way...
02569   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
02570     unsigned CLow = CI->getRawValue();
02571     unsigned CHi  = CI->getRawValue() >> 32;
02572     
02573     if (CLow == 0) {
02574       // If the low part of the constant is all zeros, things are simple.
02575       BuildMI(BB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
02576       doMultiplyConst(&BB, IP, DestReg+1, Type::UIntTy, Op0Reg, CHi);
02577       return;
02578     }
02579     
02580     // Multiply the two low parts... capturing carry into EDX
02581     unsigned OverflowReg = 0;
02582     if (CLow == 1) {
02583       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
02584     } else {
02585       unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
02586       OverflowReg = makeAnotherReg(Type::UIntTy);
02587       BuildMI(BB, IP, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
02588       BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
02589       BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1RegL);  // AL*BL
02590       
02591       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX);   // AL*BL
02592       BuildMI(BB, IP, X86::MOV32rr, 1,
02593               OverflowReg).addReg(X86::EDX);                    // AL*BL >> 32
02594     }
02595     
02596     unsigned AHBLReg = makeAnotherReg(Type::UIntTy);   // AH*BL
02597     doMultiplyConst(&BB, IP, AHBLReg, Type::UIntTy, Op0Reg+1, CLow);
02598     
02599     unsigned AHBLplusOverflowReg;
02600     if (OverflowReg) {
02601       AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
02602       BuildMI(BB, IP, X86::ADD32rr, 2,                // AH*BL+(AL*BL >> 32)
02603               AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
02604     } else {
02605       AHBLplusOverflowReg = AHBLReg;
02606     }
02607     
02608     if (CHi == 0) {
02609       BuildMI(BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
02610     } else {
02611       unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
02612       doMultiplyConst(&BB, IP, ALBHReg, Type::UIntTy, Op0Reg, CHi);
02613       
02614       BuildMI(BB, IP, X86::ADD32rr, 2,      // AL*BH + AH*BL + (AL*BL >> 32)
02615               DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
02616     }
02617     return;
02618   }
02619 
02620   // General 64x64 multiply
02621 
02622   unsigned Op1Reg  = getReg(Op1, &BB, IP);
02623   // Multiply the two low parts... capturing carry into EDX
02624   BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
02625   BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1Reg);  // AL*BL
02626   
02627   unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
02628   BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX);     // AL*BL
02629   BuildMI(BB, IP, X86::MOV32rr, 1,
02630           OverflowReg).addReg(X86::EDX); // AL*BL >> 32
02631   
02632   unsigned AHBLReg = makeAnotherReg(Type::UIntTy);   // AH*BL
02633   BuildMI(BB, IP, X86::IMUL32rr, 2,
02634           AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
02635   
02636   unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
02637   BuildMI(BB, IP, X86::ADD32rr, 2,                // AH*BL+(AL*BL >> 32)
02638           AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
02639   
02640   unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
02641   BuildMI(BB, IP, X86::IMUL32rr, 2,
02642           ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
02643   
02644   BuildMI(BB, IP, X86::ADD32rr, 2,      // AL*BH + AH*BL + (AL*BL >> 32)
02645           DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
02646 }
02647 
02648 
02649 /// visitDivRem - Handle division and remainder instructions... these
02650 /// instruction both require the same instructions to be generated, they just
02651 /// select the result from a different register.  Note that both of these
02652 /// instructions work differently for signed and unsigned operands.
02653 ///
02654 void X86ISel::visitDivRem(BinaryOperator &I) {
02655   unsigned ResultReg = getReg(I);
02656   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
02657 
02658   // Fold loads into floating point divides.
02659   if (getClass(Op0->getType()) == cFP) {
02660     if (LoadInst *LI = dyn_cast<LoadInst>(Op1))
02661       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
02662         const Type *Ty = Op0->getType();
02663         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
02664         unsigned Opcode = Ty == Type::FloatTy ? X86::FDIV32m : X86::FDIV64m;
02665         
02666         unsigned Op0r = getReg(Op0);
02667         if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
02668           unsigned FI = getFixedSizedAllocaFI(AI);
02669           addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), FI);
02670         } else {
02671           X86AddressMode AM;
02672           getAddressingMode(LI->getOperand(0), AM);
02673           
02674           addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), AM);
02675         }
02676         return;
02677       }
02678 
02679     if (LoadInst *LI = dyn_cast<LoadInst>(Op0))
02680       if (isSafeToFoldLoadIntoInstruction(*LI, I)) {
02681         const Type *Ty = Op0->getType();
02682         assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!");
02683         unsigned Opcode = Ty == Type::FloatTy ? X86::FDIVR32m : X86::FDIVR64m;
02684         
02685         unsigned Op1r = getReg(Op1);
02686         if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) {
02687           unsigned FI = getFixedSizedAllocaFI(AI);
02688           addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op1r), FI);
02689         } else {
02690           X86AddressMode AM;
02691           getAddressingMode(LI->getOperand(0), AM);
02692           addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op1r), AM);
02693         }
02694         return;
02695       }
02696   }
02697 
02698 
02699   MachineBasicBlock::iterator IP = BB->end();
02700   emitDivRemOperation(BB, IP, Op0, Op1,
02701                       I.getOpcode() == Instruction::Div, ResultReg);
02702 }
02703 
02704 void X86ISel::emitDivRemOperation(MachineBasicBlock *BB,
02705                                   MachineBasicBlock::iterator IP,
02706                                   Value *Op0, Value *Op1, bool isDiv,
02707                                   unsigned ResultReg) {
02708   const Type *Ty = Op0->getType();
02709   unsigned Class = getClass(Ty);
02710   switch (Class) {
02711   case cFP:              // Floating point divide
02712     if (isDiv) {
02713       emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg);
02714       return;
02715     } else {               // Floating point remainder...
02716       unsigned Op0Reg = getReg(Op0, BB, IP);
02717       unsigned Op1Reg = getReg(Op1, BB, IP);
02718       MachineInstr *TheCall =
02719         BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true);
02720       std::vector<ValueRecord> Args;
02721       Args.push_back(ValueRecord(Op0Reg, Type::DoubleTy));
02722       Args.push_back(ValueRecord(Op1Reg, Type::DoubleTy));
02723       doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args);
02724     }
02725     return;
02726   case cLong: {
02727     static const char *FnName[] =
02728       { "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" };
02729     unsigned Op0Reg = getReg(Op0, BB, IP);
02730     unsigned Op1Reg = getReg(Op1, BB, IP);
02731     unsigned NameIdx = Ty->isUnsigned()*2 + isDiv;
02732     MachineInstr *TheCall =
02733       BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true);
02734 
02735     std::vector<ValueRecord> Args;
02736     Args.push_back(ValueRecord(Op0Reg, Type::LongTy));
02737     Args.push_back(ValueRecord(Op1Reg, Type::LongTy));
02738     doCall(ValueRecord(ResultReg, Type::LongTy), TheCall, Args);
02739     return;
02740   }
02741   case cByte: case cShort: case cInt:
02742     break;          // Small integrals, handled below...
02743   default: assert(0 && "Unknown class!");
02744   }
02745 
02746   static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr };
02747   static const unsigned NEGOpcode[]={ X86::NEG8r,  X86::NEG16r,  X86::NEG32r };
02748   static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
02749   static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri };
02750   static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
02751 
02752   // Special case signed division by power of 2.
02753   if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1))
02754     if (isDiv) {
02755       assert(Class != cLong && "This doesn't handle 64-bit divides!");
02756       int V = CI->getValue();
02757 
02758       if (V == 1) {       // X /s 1 => X
02759         unsigned Op0Reg = getReg(Op0, BB, IP);
02760         BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(Op0Reg);
02761         return;
02762       }
02763 
02764       if (V == -1) {      // X /s -1 => -X
02765         unsigned Op0Reg = getReg(Op0, BB, IP);
02766         BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(Op0Reg);
02767         return;
02768       }
02769 
02770       if (V == 2 || V == -2) {      // X /s 2
02771         static const unsigned CMPOpcode[] = {
02772           X86::CMP8ri, X86::CMP16ri, X86::CMP32ri
02773         };
02774         static const unsigned SBBOpcode[] = {
02775           X86::SBB8ri, X86::SBB16ri, X86::SBB32ri
02776         };
02777         unsigned Op0Reg = getReg(Op0, BB, IP);
02778         unsigned SignBit = 1 << (CI->getType()->getPrimitiveSize()*8-1);
02779         BuildMI(*BB, IP, CMPOpcode[Class], 2).addReg(Op0Reg).addImm(SignBit);
02780 
02781         unsigned TmpReg = makeAnotherReg(Op0->getType());
02782         BuildMI(*BB, IP, SBBOpcode[Class], 2, TmpReg).addReg(Op0Reg).addImm(-1);
02783 
02784         unsigned TmpReg2 = V == 2 ? ResultReg : makeAnotherReg(Op0->getType());
02785         BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg2).addReg(TmpReg).addImm(1);
02786         if (V == -2) {
02787           BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg2);
02788         }
02789         return;
02790       }
02791 
02792       bool isNeg = false;
02793       if (V < 0) {         // Not a positive power of 2?
02794         V = -V;
02795         isNeg = true;      // Maybe it's a negative power of 2.
02796       }
02797       if (unsigned Log = ExactLog2(V)) {
02798         --Log;
02799         unsigned Op0Reg = getReg(Op0, BB, IP);
02800         unsigned TmpReg = makeAnotherReg(Op0->getType());
02801         BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg)
02802           .addReg(Op0Reg).addImm(Log-1);
02803         unsigned TmpReg2 = makeAnotherReg(Op0->getType());
02804         BuildMI(*BB, IP, SHROpcode[Class], 2, TmpReg2)
02805           .addReg(TmpReg).addImm(32-Log);
02806         unsigned TmpReg3 = makeAnotherReg(Op0->getType());
02807         BuildMI(*BB, IP, ADDOpcode[Class], 2, TmpReg3)
02808           .addReg(Op0Reg).addReg(TmpReg2);
02809 
02810         unsigned TmpReg4 = isNeg ? makeAnotherReg(Op0->getType()) : ResultReg;
02811         BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg4)
02812           .addReg(TmpReg3).addImm(Log);
02813         if (isNeg)
02814           BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4);
02815         return;
02816       }
02817     } else {    // X % C
02818       assert(Class != cLong && "This doesn't handle 64-bit remainder!");
02819       int V = CI->getValue();
02820 
02821       if (V == 2 || V == -2) {       // X % 2, X % -2
02822         static const unsigned SExtOpcode[] = { X86::CBW, X86::CWD, X86::CDQ };
02823         static const unsigned BaseReg[]    = { X86::AL , X86::AX , X86::EAX };
02824         static const unsigned SExtReg[]    = { X86::AH , X86::DX , X86::EDX };
02825         static const unsigned ANDOpcode[]  = {
02826           X86::AND8ri, X86::AND16ri, X86::AND32ri
02827         };
02828         static const unsigned XOROpcode[]  = {
02829           X86::XOR8rr, X86::XOR16rr, X86::XOR32rr
02830         };
02831         static const unsigned SUBOpcode[]  = {
02832           X86::SUB8rr, X86::SUB16rr, X86::SUB32rr
02833         };
02834 
02835         // Sign extend result into reg of -1 or 0.
02836         unsigned Op0Reg = getReg(Op0, BB, IP);
02837         BuildMI(*BB, IP, MovOpcode[Class], 1, BaseReg[Class]).addReg(Op0Reg);
02838         BuildMI(*BB, IP, SExtOpcode[Class], 0);
02839         unsigned TmpReg0 = makeAnotherReg(Op0->getType());
02840         BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg0).addReg(SExtReg[Class]);
02841 
02842         unsigned TmpReg1 = makeAnotherReg(Op0->getType());
02843         BuildMI(*BB, IP, ANDOpcode[Class], 2, TmpReg1).addReg(Op0Reg).addImm(1);
02844         
02845         unsigned TmpReg2 = makeAnotherReg(Op0->getType());
02846         BuildMI(*BB, IP, XOROpcode[Class], 2,
02847                 TmpReg2).addReg(TmpReg1).addReg(TmpReg0);
02848         BuildMI(*BB, IP, SUBOpcode[Class], 2,
02849                 ResultReg).addReg(TmpReg2).addReg(TmpReg0);
02850         return;
02851       }
02852     }
02853 
02854   static const unsigned Regs[]     ={ X86::AL    , X86::AX     , X86::EAX     };
02855   static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri };
02856   static const unsigned ExtRegs[]  ={ X86::AH    , X86::DX     , X86::EDX     };
02857 
02858   static const unsigned DivOpcode[][4] = {
02859     { X86::DIV8r , X86::DIV16r , X86::DIV32r , 0 },  // Unsigned division
02860     { X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 },  // Signed division
02861   };
02862 
02863   unsigned Reg    = Regs[Class];
02864   unsigned ExtReg = ExtRegs[Class];
02865 
02866   // Put the first operand into one of the A registers...
02867   unsigned Op0Reg = getReg(Op0, BB, IP);
02868   unsigned Op1Reg = getReg(Op1, BB, IP);
02869   BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
02870 
02871   if (Ty->isSigned()) {
02872     // Emit a sign extension instruction...
02873     unsigned ShiftResult = makeAnotherReg(Op0->getType());
02874     BuildMI(*BB, IP, SAROpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
02875     BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
02876 
02877     // Emit the appropriate divide or remainder instruction...
02878     BuildMI(*BB, IP, DivOpcode[1][Class], 1).addReg(Op1Reg);
02879   } else {
02880     // If unsigned, emit a zeroing instruction... (reg = 0)
02881     BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0);
02882 
02883     // Emit the appropriate divide or remainder instruction...
02884     BuildMI(*BB, IP, DivOpcode[0][Class], 1).addReg(Op1Reg);
02885   }
02886 
02887   // Figure out which register we want to pick the result out of...
02888   unsigned DestReg = isDiv ? Reg : ExtReg;
02889   
02890   // Put the result into the destination register...
02891   BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(DestReg);
02892 }
02893 
02894 
02895 /// Shift instructions: 'shl', 'sar', 'shr' - Some special cases here
02896 /// for constant immediate shift values, and for constant immediate
02897 /// shift values equal to 1. Even the general case is sort of special,
02898 /// because the shift amount has to be in CL, not just any old register.
02899 ///
02900 void X86ISel::visitShiftInst(ShiftInst &I) {
02901   MachineBasicBlock::iterator IP = BB->end ();
02902   emitShiftOperation (BB, IP, I.getOperand (0), I.getOperand (1),
02903                       I.getOpcode () == Instruction::Shl, I.getType (),
02904                       getReg (I));
02905 }
02906 
02907 /// Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a
02908 /// constant.
02909 void X86ISel::doSHLDConst(MachineBasicBlock *MBB, 
02910                           MachineBasicBlock::iterator IP,
02911                           unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg,
02912                           unsigned Amt) {
02913   // SHLD is a very inefficient operation on every processor, try to do
02914   // somethign simpler for common values of 'Amt'.
02915   if (Amt == 0) {
02916     BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
02917   } else if (Amt == 1) {
02918     unsigned Tmp = makeAnotherReg(Type::UIntTy);
02919     BuildMI(*MBB, IP, X86::ADD32rr, 2, Tmp).addReg(Op1Reg).addReg(Op1Reg);
02920     BuildMI(*MBB, IP, X86::ADC32rr, 2, DestReg).addReg(Op0Reg).addReg(Op0Reg);
02921   } else if (Amt == 2 || Amt == 3) {
02922     // On the P4 and Athlon it is cheaper to replace shld ..., 2|3 with a
02923     // shift/lea pair.  NOTE: This should not be done on the P6 family!
02924     unsigned Tmp = makeAnotherReg(Type::UIntTy);
02925     BuildMI(*MBB, IP, X86::SHR32ri, 2, Tmp).addReg(Op1Reg).addImm(32-Amt);
02926     X86AddressMode AM;
02927     AM.BaseType = X86AddressMode::RegBase;
02928     AM.Base.Reg = Tmp;
02929     AM.Scale = 1 << Amt;
02930     AM.IndexReg = Op0Reg;
02931     AM.Disp = 0;
02932     addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 4, DestReg), AM);
02933   } else {
02934     // NOTE: It is always cheaper on the P4 to emit SHLD as two shifts and an OR
02935     // than it is to emit a real SHLD.
02936 
02937     BuildMI(*MBB, IP, X86::SHLD32rri8, 3, 
02938             DestReg).addReg(Op0Reg).addReg(Op1Reg).addImm(Amt);
02939   }
02940 }
02941 
02942 /// emitShiftOperation - Common code shared between visitShiftInst and
02943 /// constant expression support.
02944 void X86ISel::emitShiftOperation(MachineBasicBlock *MBB,
02945                                  MachineBasicBlock::iterator IP,
02946                                  Value *Op, Value *ShiftAmount, 
02947                                  bool isLeftShift, const Type *ResultTy, 
02948                                  unsigned DestReg) {
02949   unsigned SrcReg = getReg (Op, MBB, IP);
02950   bool isSigned = ResultTy->isSigned ();
02951   unsigned Class = getClass (ResultTy);
02952 
02953   static const unsigned ConstantOperand[][3] = {
02954     { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri },  // SHR
02955     { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri },  // SAR
02956     { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri },  // SHL
02957     { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri },  // SAL = SHL
02958   };
02959 
02960   static const unsigned NonConstantOperand[][3] = {
02961     { X86::SHR8rCL, X86::SHR16rCL, X86::SHR32rCL },  // SHR
02962     { X86::SAR8rCL, X86::SAR16rCL, X86::SAR32rCL },  // SAR
02963     { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL },  // SHL
02964     { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL },  // SAL = SHL
02965   };
02966 
02967   // Longs, as usual, are handled specially.
02968   if (Class == cLong) {
02969     if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) {
02970       unsigned Amount = CUI->getValue();
02971       if (Amount == 1 && isLeftShift) {   // X << 1 == X+X
02972         BuildMI(*MBB, IP, X86::ADD32rr, 2,
02973                 DestReg).addReg(SrcReg).addReg(SrcReg);
02974         BuildMI(*MBB, IP, X86::ADC32rr, 2,
02975                 DestReg+1).addReg(SrcReg+1).addReg(SrcReg+1);
02976       } else if (Amount < 32) {
02977         const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
02978         if (isLeftShift) {
02979           doSHLDConst(MBB, IP, DestReg+1, SrcReg+1, SrcReg, Amount);
02980           BuildMI(*MBB, IP, Opc[2], 2, DestReg).addReg(SrcReg).addImm(Amount);
02981         } else {
02982           BuildMI(*MBB, IP, X86::SHRD32rri8, 3,
02983                   DestReg).addReg(SrcReg  ).addReg(SrcReg+1).addImm(Amount);
02984           BuildMI(*MBB, IP, Opc[2],2,DestReg+1).addReg(SrcReg+1).addImm(Amount);
02985         }
02986       } else if (Amount == 32) {
02987         if (isLeftShift) {
02988           BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg);
02989           BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
02990         } else {
02991           BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1);
02992           if (!isSigned) {
02993             BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
02994           } else {
02995             BuildMI(*MBB, IP, X86::SAR32ri, 2,
02996                     DestReg+1).addReg(SrcReg).addImm(31);
02997           }
02998         }
02999       } else {                 // Shifting more than 32 bits
03000         Amount -= 32;
03001         if (isLeftShift) {
03002           BuildMI(*MBB, IP, X86::SHL32ri, 2,
03003                   DestReg + 1).addReg(SrcReg).addImm(Amount);
03004           BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0);
03005         } else {
03006           BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2,
03007                   DestReg).addReg(SrcReg+1).addImm(Amount);
03008           BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
03009         }
03010       }
03011     } else {
03012       unsigned TmpReg = makeAnotherReg(Type::IntTy);
03013       if (!isLeftShift && isSigned) {
03014         // If this is a SHR of a Long, then we need to do funny sign extension
03015         // stuff.  TmpReg gets the value to use as the high-part if we are
03016         // shifting more than 32 bits.
03017         BuildMI(*MBB, IP, X86::SAR32ri, 2, TmpReg).addReg(SrcReg).addImm(31);
03018       } else {
03019         // Other shifts use a fixed zero value if the shift is more than 32
03020         // bits.
03021         BuildMI(*MBB, IP, X86::MOV32ri, 1, TmpReg).addImm(0);
03022       }
03023 
03024       // Initialize CL with the shift amount...
03025       unsigned ShiftAmountReg = getReg(ShiftAmount, MBB, IP);
03026       BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
03027 
03028       unsigned TmpReg2 = makeAnotherReg(Type::IntTy);
03029       unsigned TmpReg3 = makeAnotherReg(Type::IntTy);
03030       if (isLeftShift) {
03031         // TmpReg2 = shld inHi, inLo
03032         BuildMI(*MBB, IP, X86::SHLD32rrCL,2,TmpReg2).addReg(SrcReg+1)
03033                                                     .addReg(SrcReg);
03034         // TmpReg3 = shl  inLo, CL
03035         BuildMI(*MBB, IP, X86::SHL32rCL, 1, TmpReg3).addReg(SrcReg);
03036 
03037         // Set the flags to indicate whether the shift was by more than 32 bits.
03038         BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
03039 
03040         // DestHi = (>32) ? TmpReg3 : TmpReg2;
03041         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2, 
03042                 DestReg+1).addReg(TmpReg2).addReg(TmpReg3);
03043         // DestLo = (>32) ? TmpReg : TmpReg3;
03044         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2,
03045             DestReg).addReg(TmpReg3).addReg(TmpReg);
03046       } else {
03047         // TmpReg2 = shrd inLo, inHi
03048         BuildMI(*MBB, IP, X86::SHRD32rrCL,2,TmpReg2).addReg(SrcReg)
03049                                                     .addReg(SrcReg+1);
03050         // TmpReg3 = s[ah]r  inHi, CL
03051         BuildMI(*MBB, IP, isSigned ? X86::SAR32rCL : X86::SHR32rCL, 1, TmpReg3)
03052                        .addReg(SrcReg+1);
03053 
03054         // Set the flags to indicate whether the shift was by more than 32 bits.
03055         BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32);
03056 
03057         // DestLo = (>32) ? TmpReg3 : TmpReg2;
03058         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2, 
03059                 DestReg).addReg(TmpReg2).addReg(TmpReg3);
03060 
03061         // DestHi = (>32) ? TmpReg : TmpReg3;
03062         BuildMI(*MBB, IP, X86::CMOVNE32rr, 2, 
03063                 DestReg+1).addReg(TmpReg3).addReg(TmpReg);
03064       }
03065     }
03066     return;
03067   }
03068 
03069   if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) {
03070     // The shift amount is constant, guaranteed to be a ubyte. Get its value.
03071     assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?");
03072 
03073     if (CUI->getValue() == 1 && isLeftShift) {    // X << 1 -> X+X
03074       static const int AddOpC[] = { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
03075       BuildMI(*MBB, IP, AddOpC[Class], 2,DestReg).addReg(SrcReg).addReg(SrcReg);
03076     } else {
03077       const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned];
03078       BuildMI(*MBB, IP, Opc[Class], 2,
03079               DestReg).addReg(SrcReg).addImm(CUI->getValue());
03080     }
03081   } else {                  // The shift amount is non-constant.
03082     unsigned ShiftAmountReg = getReg (ShiftAmount, MBB, IP);
03083     BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg);
03084 
03085     const unsigned *Opc = NonConstantOperand[isLeftShift*2+isSigned];
03086     BuildMI(*MBB, IP, Opc[Class], 1, DestReg).addReg(SrcReg);
03087   }
03088 }
03089 
03090 
03091 /// visitLoadInst - Implement LLVM load instructions in terms of the x86 'mov'
03092 /// instruction.  The load and store instructions are the only place where we
03093 /// need to worry about the memory layout of the target machine.
03094 ///
03095 void X86ISel::visitLoadInst(LoadInst &I) {
03096   // Check to see if this load instruction is going to be folded into a binary
03097   // instruction, like add.  If so, we don't want to emit it.  Wouldn't a real
03098   // pattern matching instruction selector be nice?
03099   unsigned Class = getClassB(I.getType());
03100   if (I.hasOneUse()) {
03101     Instruction *User = cast<Instruction>(I.use_back());
03102     switch (User->getOpcode()) {
03103     case Instruction::Cast:
03104       // If this is a cast from a signed-integer type to a floating point type,
03105       // fold the cast here.
03106       if (getClassB(User->getType()) == cFP &&
03107           (I.getType() == Type::ShortTy || I.getType() == Type::IntTy ||
03108            I.getType() == Type::LongTy)) {
03109         unsigned DestReg = getReg(User);
03110         static const unsigned Opcode[] = {
03111           0/*BYTE*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m
03112         };
03113 
03114         if (AllocaInst *AI = dyn_castFixedAlloca(I.getOperand(0))) {
03115           unsigned FI = getFixedSizedAllocaFI(AI);
03116           addFrameReference(BuildMI(BB, Opcode[Class], 4, DestReg), FI);
03117         } else {
03118           X86AddressMode AM;
03119           getAddressingMode(I.getOperand(0), AM);
03120           addFullAddress(BuildMI(BB, Opcode[Class], 4, DestReg), AM);
03121         }
03122         return;
03123       } else {
03124         User = 0;
03125       }
03126       break;
03127 
03128     case Instruction::Add:
03129     case Instruction::Sub:
03130     case Instruction::And:
03131     case Instruction::Or:
03132     case Instruction::Xor:
03133       if (Class == cLong) User = 0;
03134       break;
03135     case Instruction::Mul:
03136     case Instruction::Div:
03137       if (Class != cFP) User = 0;
03138       break;  // Folding only implemented for floating point.
03139     default: User = 0; break;
03140     }
03141 
03142     if (User) {
03143       // Okay, we found a user.  If the load is the first operand and there is
03144       // no second operand load, reverse the operand ordering.  Note that this
03145       // can fail for a subtract (ie, no change will be made).
03146       bool Swapped = false;
03147       if (!isa<LoadInst>(User->getOperand(1)))
03148         Swapped = !cast<BinaryOperator>(User)->swapOperands();
03149       
03150       // Okay, now that everything is set up, if this load is used by the second
03151       // operand, and if there are no instructions that invalidate the load
03152       // before the binary operator, eliminate the load.
03153       if (User->getOperand(1) == &I &&
03154           isSafeToFoldLoadIntoInstruction(I, *User))
03155         return;   // Eliminate the load!
03156 
03157       // If this is a floating point sub or div, we won't be able to swap the
03158       // operands, but we will still be able to eliminate the load.
03159       if (Class == cFP && User->getOperand(0) == &I &&
03160           !isa<LoadInst>(User->getOperand(1)) &&
03161           (User->getOpcode() == Instruction::Sub ||
03162            User->getOpcode() == Instruction::Div) &&
03163           isSafeToFoldLoadIntoInstruction(I, *User))
03164         return;  // Eliminate the load!
03165 
03166       // If we swapped the operands to the instruction, but couldn't fold the
03167       // load anyway, swap them back.  We don't want to break add X, int 
03168       // folding.
03169       if (Swapped) cast<BinaryOperator>(User)->swapOperands();
03170     }
03171   }
03172 
03173   static const unsigned Opcodes[] = {
03174     X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD32m, X86::MOV32rm
03175   };
03176   unsigned Opcode = Opcodes[Class];
03177   if (I.getType() == Type::DoubleTy) Opcode = X86::FLD64m;
03178 
03179   unsigned DestReg = getReg(I);
03180 
03181   if (AllocaInst *AI = dyn_castFixedAlloca(I.getOperand(0))) {
03182     unsigned FI = getFixedSizedAllocaFI(AI);
03183     if (Class == cLong) {
03184       addFrameReference(BuildMI(BB, X86::MOV32rm, 4, DestReg), FI);
03185       addFrameReference(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), FI, 4);
03186     } else {
03187       addFrameReference(BuildMI(BB, Opcode, 4, DestReg), FI);
03188     }
03189   } else {
03190     X86AddressMode AM;
03191     getAddressingMode(I.getOperand(0), AM);
03192     
03193     if (Class == cLong) {
03194       addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg), AM);
03195       AM.Disp += 4;
03196       addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), AM);
03197     } else {
03198       addFullAddress(BuildMI(BB, Opcode, 4, DestReg), AM);
03199     }
03200   }
03201 }
03202 
03203 /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov'
03204 /// instruction.
03205 ///
03206 void X86ISel::visitStoreInst(StoreInst &I) {
03207   X86AddressMode AM;
03208   getAddressingMode(I.getOperand(1), AM);
03209 
03210   const Type *ValTy = I.getOperand(0)->getType();
03211   unsigned Class = getClassB(ValTy);
03212 
03213   if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) {
03214     uint64_t Val = CI->getRawValue();
03215     if (Class == cLong) {
03216       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(Val & ~0U);
03217       AM.Disp += 4;
03218       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(Val>>32);
03219     } else {
03220       static const unsigned Opcodes[] = {
03221         X86::MOV8mi, X86::MOV16mi, X86::MOV32mi
03222       };
03223       unsigned Opcode = Opcodes[Class];
03224       addFullAddress(BuildMI(BB, Opcode, 5), AM).addImm(Val);
03225     }
03226   } else if (isa<ConstantPointerNull>(I.getOperand(0))) {
03227     addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(0);
03228   } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) {
03229     addFullAddress(BuildMI(BB, X86::MOV8mi, 5), AM).addImm(CB->getValue());
03230   } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) {
03231     // Store constant FP values with integer instructions to avoid having to
03232     // load the constants from the constant pool then do a store.
03233     if (CFP->getType() == Type::FloatTy) {
03234       union {
03235         unsigned I;
03236         float    F;
03237       } V;
03238       V.F = CFP->getValue();
03239       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(V.I);
03240     } else {
03241       union {
03242         uint64_t I;
03243         double   F;
03244       } V;
03245       V.F = CFP->getValue();
03246       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm((unsigned)V.I);
03247       AM.Disp += 4;
03248       addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(
03249                                                           unsigned(V.I >> 32));
03250     }
03251     
03252   } else if (Class == cLong) {
03253     unsigned ValReg = getReg(I.getOperand(0));
03254     addFullAddress(BuildMI(BB, X86::MOV32mr, 5), AM).addReg(ValReg);
03255     AM.Disp += 4;
03256     addFullAddress(BuildMI(BB, X86::MOV32mr, 5), AM).addReg(ValReg+1);
03257   } else {
03258     // FIXME: stop emitting these two instructions:
03259     //    movl $global,%eax
03260     //    movl %eax,(%ebx)
03261     // when one instruction will suffice.  That includes when the global
03262     // has an offset applied to it.
03263     unsigned ValReg = getReg(I.getOperand(0));
03264     static const unsigned Opcodes[] = {
03265       X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST32m
03266     };
03267     unsigned Opcode = Opcodes[Class];
03268     if (ValTy == Type::DoubleTy) Opcode = X86::FST64m;
03269 
03270     addFullAddress(BuildMI(BB, Opcode, 1+4), AM).addReg(ValReg);
03271   }
03272 }
03273 
03274 
03275 /// visitCastInst - Here we have various kinds of copying with or without sign
03276 /// extension going on.
03277 ///
03278 void X86ISel::visitCastInst(CastInst &CI) {
03279   Value *Op = CI.getOperand(0);
03280 
03281   unsigned SrcClass = getClassB(Op->getType());
03282   unsigned DestClass = getClassB(CI.getType());
03283   // Noop casts are not emitted: getReg will return the source operand as the
03284   // register to use for any uses of the noop cast.
03285   if (DestClass == SrcClass) {
03286     // The only detail in this plan is that casts from double -> float are 
03287     // truncating operations that we have to codegen through memory (despite
03288     // the fact that the source/dest registers are the same class).
03289     if (CI.getType() != Type::FloatTy || Op->getType() != Type::DoubleTy)
03290       return;
03291   }
03292 
03293   // If this is a cast from a 32-bit integer to a Long type, and the only uses
03294   // of the case are GEP instructions, then the cast does not need to be
03295   // generated explicitly, it will be folded into the GEP.
03296   if (DestClass == cLong && SrcClass == cInt) {
03297     bool AllUsesAreGEPs = true;
03298     for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I)
03299       if (!isa<GetElementPtrInst>(*I)) {
03300         AllUsesAreGEPs = false;
03301         break;
03302       }        
03303 
03304     // No need to codegen this cast if all users are getelementptr instrs...
03305     if (AllUsesAreGEPs) return;
03306   }
03307 
03308   // If this cast converts a load from a short,int, or long integer to a FP
03309   // value, we will have folded this cast away.
03310   if (DestClass == cFP && isa<LoadInst>(Op) && Op->hasOneUse() &&
03311       (Op->getType() == Type::ShortTy || Op->getType() == Type::IntTy ||
03312        Op->getType() == Type::LongTy))
03313     return;
03314 
03315 
03316   unsigned DestReg = getReg(CI);
03317   MachineBasicBlock::iterator MI = BB->end();
03318   emitCastOperation(BB, MI, Op, CI.getType(), DestReg);
03319 }
03320 
03321 /// emitCastOperation - Common code shared between visitCastInst and constant
03322 /// expression cast support.
03323 ///
03324 void X86ISel::emitCastOperation(MachineBasicBlock *BB,
03325                                 MachineBasicBlock::iterator IP,
03326                                 Value *Src, const Type *DestTy,
03327                                 unsigned DestReg) {
03328   const Type *SrcTy = Src->getType();
03329   unsigned SrcClass = getClassB(SrcTy);
03330   unsigned DestClass = getClassB(DestTy);
03331   unsigned SrcReg = getReg(Src, BB, IP);
03332 
03333   // Implement casts to bool by using compare on the operand followed by set if
03334   // not zero on the result.
03335   if (DestTy == Type::BoolTy) {
03336     switch (SrcClass) {
03337     case cByte:
03338       BuildMI(*BB, IP, X86::TEST8rr, 2).addReg(SrcReg).addReg(SrcReg);
03339       break;
03340     case cShort:
03341       BuildMI(*BB, IP, X86::TEST16rr, 2).addReg(SrcReg).addReg(SrcReg);
03342       break;
03343     case cInt:
03344       BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg).addReg(SrcReg);
03345       break;
03346     case cLong: {
03347       unsigned TmpReg = makeAnotherReg(Type::IntTy);
03348       BuildMI(*BB, IP, X86::OR32rr, 2, TmpReg).addReg(SrcReg).addReg(SrcReg+1);
03349       break;
03350     }
03351     case cFP:
03352       BuildMI(*BB, IP, X86::FTST, 1).addReg(SrcReg);
03353       BuildMI(*BB, IP, X86::FNSTSW8r, 0);
03354       BuildMI(*BB, IP, X86::SAHF, 1);
03355       break;
03356     }
03357 
03358     // If the zero flag is not set, then the value is true, set the byte to
03359     // true.
03360     BuildMI(*BB, IP, X86::SETNEr, 1, DestReg);
03361     return;
03362   }
03363 
03364   static const unsigned RegRegMove[] = {
03365     X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr
03366   };
03367 
03368   // Implement casts between values of the same type class (as determined by
03369   // getClass) by using a register-to-register move.
03370   if (SrcClass == DestClass) {
03371     if (SrcClass <= cInt || (SrcClass == cFP && SrcTy == DestTy)) {
03372       BuildMI(*BB, IP, RegRegMove[SrcClass], 1, DestReg).addReg(SrcReg);
03373     } else if (SrcClass == cFP) {
03374       if (SrcTy == Type::FloatTy) {  // double -> float
03375         assert(DestTy == Type::DoubleTy && "Unknown cFP member!");
03376         BuildMI(*BB, IP, X86::FpMOV, 1, DestReg).addReg(SrcReg);
03377       } else {                       // float -> double
03378         assert(SrcTy == Type::DoubleTy && DestTy == Type::FloatTy &&
03379                "Unknown cFP member!");
03380         // Truncate from double to float by storing to memory as short, then
03381         // reading it back.
03382         unsigned FltAlign = TM.getTargetData().getFloatAlignment();
03383         int FrameIdx = F->getFrameInfo()->CreateStackObject(4, FltAlign);
03384         addFrameReference(BuildMI(*BB, IP, X86::FST32m, 5), FrameIdx).addReg(SrcReg);
03385         addFrameReference(BuildMI(*BB, IP, X86::FLD32m, 5, DestReg), FrameIdx);
03386       }
03387     } else if (SrcClass == cLong) {
03388       BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
03389       BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg+1);
03390     } else {
03391       assert(0 && "Cannot handle this type of cast instruction!");
03392       abort();
03393     }
03394     return;
03395   }
03396 
03397   // Handle cast of SMALLER int to LARGER int using a move with sign extension
03398   // or zero extension, depending on whether the source type was signed.
03399   if (SrcClass <= cInt && (DestClass <= cInt || DestClass == cLong) &&
03400       SrcClass < DestClass) {
03401     bool isLong = DestClass == cLong;
03402     if (isLong) DestClass = cInt;
03403 
03404     static const unsigned Opc[][4] = {
03405       { X86::MOVSX16rr8, X86::MOVSX32rr8, X86::MOVSX32rr16, X86::MOV32rr }, // s
03406       { X86::MOVZX16rr8, X86::MOVZX32rr8, X86::MOVZX32rr16, X86::MOV32rr }  // u
03407     };
03408     
03409     bool isUnsigned = SrcTy->isUnsigned() || SrcTy == Type::BoolTy;
03410     BuildMI(*BB, IP, Opc[isUnsigned][SrcClass + DestClass - 1], 1,
03411         DestReg).addReg(SrcReg);
03412 
03413     if (isLong) {  // Handle upper 32 bits as appropriate...
03414       if (isUnsigned)     // Zero out top bits...
03415         BuildMI(*BB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0);
03416       else                // Sign extend bottom half...
03417         BuildMI(*BB, IP, X86::SAR32ri, 2, DestReg+1).addReg(DestReg).addImm(31);
03418     }
03419     return;
03420   }
03421 
03422   // Special case long -> int ...
03423   if (SrcClass == cLong && DestClass == cInt) {
03424     BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg);
03425     return;
03426   }
03427   
03428   // Handle cast of LARGER int to SMALLER int using a move to EAX followed by a
03429   // move out of AX or AL.
03430   if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt
03431       && SrcClass > DestClass) {
03432     static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX, 0, X86::EAX };
03433     BuildMI(*BB, IP, RegRegMove[SrcClass], 1, AReg[SrcClass]).addReg(SrcReg);
03434     BuildMI(*BB, IP, RegRegMove[DestClass], 1, DestReg).addReg(AReg[DestClass]);
03435     return;
03436   }
03437 
03438   // Handle casts from integer to floating point now...
03439   if (DestClass == cFP) {
03440     // Promote the integer to a type supported by FLD.  We do this because there
03441     // are no unsigned FLD instructions, so we must promote an unsigned value to
03442     // a larger signed value, then use FLD on the larger value.
03443     //
03444     const Type *PromoteType = 0;
03445     unsigned PromoteOpcode = 0;
03446     unsigned RealDestReg = DestReg;
03447     switch (SrcTy->getTypeID()) {
03448     case Type::BoolTyID:
03449     case Type::SByteTyID:
03450       // We don't have the facilities for directly loading byte sized data from
03451       // memory (even signed).  Promote it to 16 bits.
03452       PromoteType = Type::ShortTy;
03453       PromoteOpcode = X86::MOVSX16rr8;
03454       break;
03455     case Type::UByteTyID:
03456       PromoteType = Type::ShortTy;
03457       PromoteOpcode = X86::MOVZX16rr8;
03458       break;
03459     case Type::UShortTyID:
03460       PromoteType = Type::IntTy;
03461       PromoteOpcode = X86::MOVZX32rr16;
03462       break;
03463     case Type::ULongTyID:
03464     case Type::UIntTyID:
03465       // Don't fild into the read destination.
03466       DestReg = makeAnotherReg(Type::DoubleTy);
03467       break;
03468     default:  // No promotion needed...
03469       break;
03470     }
03471     
03472     if (PromoteType) {
03473       unsigned TmpReg = makeAnotherReg(PromoteType);
03474       BuildMI(*BB, IP, PromoteOpcode, 1, TmpReg).addReg(SrcReg);
03475       SrcTy = PromoteType;
03476       SrcClass = getClass(PromoteType);
03477       SrcReg = TmpReg;
03478     }
03479 
03480     // Spill the integer to memory and reload it from there...
03481     int FrameIdx =
03482       F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData());
03483 
03484     if (SrcClass == cLong) {
03485       addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5),
03486                         FrameIdx).addReg(SrcReg);
03487       addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5),
03488                         FrameIdx, 4).addReg(SrcReg+1);
03489     } else {
03490       static const unsigned Op1[] = { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr };
03491       addFrameReference(BuildMI(*BB, IP, Op1[SrcClass], 5),
03492                         FrameIdx).addReg(SrcReg);
03493     }
03494 
03495     static const unsigned Op2[] =
03496       { 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m };
03497     addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);
03498 
03499     if (SrcTy == Type::UIntTy) {
03500       // If this is a cast from uint -> double, we need to be careful about if
03501       // the "sign" bit is set.  If so, we don't want to make a negative number,
03502       // we want to make a positive number.  Emit code to add an offset if the
03503       // sign bit is set.
03504 
03505       // Compute whether the sign bit is set by shifting the reg right 31 bits.
03506       unsigned IsNeg = makeAnotherReg(Type::IntTy);
03507       BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31);
03508 
03509       // Create a CP value that has the offset in one word and 0 in the other.
03510       static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
03511                                                         0x4f80000000000000ULL);
03512       unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
03513       BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(DestReg)
03514         .addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
03515 
03516     } else if (SrcTy == Type::ULongTy) {
03517       // We need special handling for unsigned 64-bit integer sources.  If the
03518       // input number has the "sign bit" set, then we loaded it incorrectly as a
03519       // negative 64-bit number.  In this case, add an offset value.
03520 
03521       // Emit a test instruction to see if the dynamic input value was signed.
03522       BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1);
03523 
03524       // If the sign bit is set, get a pointer to an offset, otherwise get a
03525       // pointer to a zero.
03526       MachineConstantPool *CP = F->getConstantPool();
03527       unsigned Zero = makeAnotherReg(Type::IntTy);
03528       Constant *Null = Constant::getNullValue(Type::UIntTy);
03529       addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Zero), 
03530                                CP->getConstantPoolIndex(Null));
03531       unsigned Offset = makeAnotherReg(Type::IntTy);
03532       Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000);
03533                                              
03534       addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Offset),
03535                                CP->getConstantPoolIndex(OffsetCst));
03536       unsigned Addr = makeAnotherReg(Type::IntTy);
03537       BuildMI(*BB, IP, X86::CMOVS32rr, 2, Addr).addReg(Zero).addReg(Offset);
03538 
03539       // Load the constant for an add.  FIXME: this could make an 'fadd' that
03540       // reads directly from memory, but we don't support these yet.
03541       unsigned ConstReg = makeAnotherReg(Type::DoubleTy);
03542       addDirectMem(BuildMI(*BB, IP, X86::FLD32m, 4, ConstReg), Addr);
03543 
03544       BuildMI(*BB, IP, X86::FpADD, 2, RealDestReg)
03545                 .addReg(ConstReg).addReg(DestReg);
03546     }
03547 
03548     return;
03549   }
03550 
03551   // Handle casts from floating point to integer now...
03552   if (SrcClass == cFP) {
03553     // Change the floating point control register to use "round towards zero"
03554     // mode when truncating to an integer value.
03555     //
03556     int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
03557     addFrameReference(BuildMI(*BB, IP, X86::FNSTCW16m, 4), CWFrameIdx);
03558 
03559     // Load the old value of the high byte of the control word...
03560     unsigned HighPartOfCW = makeAnotherReg(Type::UByteTy);
03561     addFrameReference(BuildMI(*BB, IP, X86::MOV8rm, 4, HighPartOfCW),
03562                       CWFrameIdx, 1);
03563 
03564     // Set the high part to be round to zero...
03565     addFrameReference(BuildMI(*BB, IP, X86::MOV8mi, 5),
03566                       CWFrameIdx, 1).addImm(12);
03567 
03568     // Reload the modified control word now...
03569     addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
03570     
03571     // Restore the memory image of control word to original value
03572     addFrameReference(BuildMI(*BB, IP, X86::MOV8mr, 5),
03573                       CWFrameIdx, 1).addReg(HighPartOfCW);
03574 
03575     // We don't have the facilities for directly storing byte sized data to
03576     // memory.  Promote it to 16 bits.  We also must promote unsigned values to
03577     // larger classes because we only have signed FP stores.
03578     unsigned StoreClass  = DestClass;
03579     const Type *StoreTy  = DestTy;
03580     if (StoreClass == cByte || DestTy->isUnsigned())
03581       switch (StoreClass) {
03582       case cByte:  StoreTy = Type::ShortTy; StoreClass = cShort; break;
03583       case cShort: StoreTy = Type::IntTy;   StoreClass = cInt;   break;
03584       case cInt:   StoreTy = Type::LongTy;  StoreClass = cLong;  break;
03585       // The following treatment of cLong may not be perfectly right,
03586       // but it survives chains of casts of the form
03587       // double->ulong->double.
03588       case cLong:  StoreTy = Type::LongTy;  StoreClass = cLong;  break;
03589       default: assert(0 && "Unknown store class!");
03590       }
03591 
03592     // Spill the integer to memory and reload it from there...
03593     int FrameIdx =
03594       F->getFrameInfo()->CreateStackObject(StoreTy, TM.getTargetData());
03595 
03596     static const unsigned Op1[] =
03597       { 0, X86::FIST16m, X86::FIST32m, 0, X86::FISTP64m };
03598     addFrameReference(BuildMI(*BB, IP, Op1[StoreClass], 5),
03599                       FrameIdx).addReg(SrcReg);
03600 
03601     if (DestClass == cLong) {
03602       addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg), FrameIdx);
03603       addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg+1),
03604                         FrameIdx, 4);
03605     } else {
03606       static const unsigned Op2[] = { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm };
03607       addFrameReference(BuildMI(*BB, IP, Op2[DestClass], 4, DestReg), FrameIdx);
03608     }
03609 
03610     // Reload the original control word now...
03611     addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx);
03612     return;
03613   }
03614 
03615   // Anything we haven't handled already, we can't (yet) handle at all.
03616   assert(0 && "Unhandled cast instruction!");
03617   abort();
03618 }
03619 
03620 /// visitVANextInst - Implement the va_next instruction...
03621 ///
03622 void X86ISel::visitVANextInst(VANextInst &I) {
03623   unsigned VAList = getReg(I.getOperand(0));
03624   unsigned DestReg = getReg(I);
03625 
03626   unsigned Size;
03627   switch (I.getArgType()->getTypeID()) {
03628   default:
03629     std::cerr << I;
03630     assert(0 && "Error: bad type for va_next instruction!");
03631     return;
03632   case Type::PointerTyID:
03633   case Type::UIntTyID:
03634   case Type::IntTyID:
03635     Size = 4;
03636     break;
03637   case Type::ULongTyID:
03638   case Type::LongTyID:
03639   case Type::DoubleTyID:
03640     Size = 8;
03641     break;
03642   }
03643 
03644   // Increment the VAList pointer...
03645   BuildMI(BB, X86::ADD32ri, 2, DestReg).addReg(VAList).addImm(Size);
03646 }
03647 
03648 void X86ISel::visitVAArgInst(VAArgInst &I) {
03649   unsigned VAList = getReg(I.getOperand(0));
03650   unsigned DestReg = getReg(I);
03651 
03652   switch (I.getType()->getTypeID()) {
03653   default:
03654     std::cerr << I;
03655     assert(0 && "Error: bad type for va_next instruction!");
03656     return;
03657   case Type::PointerTyID:
03658   case Type::UIntTyID:
03659   case Type::IntTyID:
03660     addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList);
03661     break;
03662   case Type::ULongTyID:
03663   case Type::LongTyID:
03664     addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList);
03665     addRegOffset(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), VAList, 4);
03666     break;
03667   case Type::DoubleTyID:
03668     addDirectMem(BuildMI(BB, X86::FLD64m, 4, DestReg), VAList);
03669     break;
03670   }
03671 }
03672 
03673 /// visitGetElementPtrInst - instruction-select GEP instructions
03674 ///
03675 void X86ISel::visitGetElementPtrInst(GetElementPtrInst &I) {
03676   // If this GEP instruction will be folded into all of its users, we don't need
03677   // to explicitly calculate it!
03678   X86AddressMode AM;
03679   if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), AM)) {
03680     // Check all of the users of the instruction to see if they are loads and
03681     // stores.
03682     bool AllWillFold = true;
03683     for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI)
03684       if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load)
03685         if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store ||
03686             cast<Instruction>(*UI)->getOperand(0) == &I) {
03687           AllWillFold = false;
03688           break;
03689         }
03690 
03691     // If the instruction is foldable, and will be folded into all users, don't
03692     // emit it!
03693     if (AllWillFold) return;
03694   }
03695 
03696   unsigned outputReg = getReg(I);
03697   emitGEPOperation(BB, BB->end(), I.getOperand(0),
03698                    I.op_begin()+1, I.op_end(), outputReg);
03699 }
03700 
03701 /// getGEPIndex - Inspect the getelementptr operands specified with GEPOps and
03702 /// GEPTypes (the derived types being stepped through at each level).  On return
03703 /// from this function, if some indexes of the instruction are representable as
03704 /// an X86 lea instruction, the machine operands are put into the Ops
03705 /// instruction and the consumed indexes are poped from the GEPOps/GEPTypes
03706 /// lists.  Otherwise, GEPOps.size() is returned.  If this returns a an
03707 /// addressing mode that only partially consumes the input, the BaseReg input of
03708 /// the addressing mode must be left free.
03709 ///
03710 /// Note that there is one fewer entry in GEPTypes than there is in GEPOps.
03711 ///
03712 void X86ISel::getGEPIndex(MachineBasicBlock *MBB, 
03713                           MachineBasicBlock::iterator IP,
03714                           std::vector<Value*> &GEPOps,
03715                           std::vector<const Type*> &GEPTypes,
03716                           X86AddressMode &AM) {
03717   const TargetData &TD = TM.getTargetData();
03718 
03719   // Clear out the state we are working with...
03720   AM.BaseType = X86AddressMode::RegBase;
03721   AM.Base.Reg = 0;   // No base register
03722   AM.Scale = 1;      // Unit scale
03723   AM.IndexReg = 0;   // No index register
03724   AM.Disp = 0;       // No displacement
03725 
03726   // While there are GEP indexes that can be folded into the current address,
03727   // keep processing them.
03728   while (!GEPTypes.empty()) {
03729     if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
03730       // It's a struct access.  CUI is the index into the structure,
03731       // which names the field. This index must have unsigned type.
03732       const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
03733       
03734       // Use the TargetData structure to pick out what the layout of the
03735       // structure is in memory.  Since the structure index must be constant, we
03736       // can get its value and use it to find the right byte offset from the
03737       // StructLayout class's list of structure member offsets.
03738       AM.Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()];
03739       GEPOps.pop_back();        // Consume a GEP operand
03740       GEPTypes.pop_back();
03741     } else {
03742       // It's an array or pointer access: [ArraySize x ElementType].
03743       const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
03744       Value *idx = GEPOps.back();
03745 
03746       // idx is the index into the array.  Unlike with structure
03747       // indices, we may not know its actual value at code-generation
03748       // time.
03749 
03750       // If idx is a constant, fold it into the offset.
03751       unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
03752       if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
03753         AM.Disp += TypeSize*CSI->getValue();
03754       } else if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(idx)) {
03755         AM.Disp += TypeSize*CUI->getValue();
03756       } else {
03757         // If the index reg is already taken, we can't handle this index.
03758         if (AM.IndexReg) return;
03759 
03760         // If this is a size that we can handle, then add the index as 
03761         switch (TypeSize) {
03762         case 1: case 2: case 4: case 8:
03763           // These are all acceptable scales on X86.
03764           AM.Scale = TypeSize;
03765           break;
03766         default:
03767           // Otherwise, we can't handle this scale
03768           return;
03769         }
03770 
03771         if (CastInst *CI = dyn_cast<CastInst>(idx))
03772           if (CI->getOperand(0)->getType() == Type::IntTy ||
03773               CI->getOperand(0)->getType() == Type::UIntTy)
03774             idx = CI->getOperand(0);
03775 
03776         AM.IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
03777       }
03778 
03779       GEPOps.pop_back();        // Consume a GEP operand
03780       GEPTypes.pop_back();
03781     }
03782   }
03783 
03784   // GEPTypes is empty, which means we have a single operand left.  Set it as
03785   // the base register.
03786   //
03787   assert(AM.Base.Reg == 0);
03788 
03789   if (AllocaInst *AI = dyn_castFixedAlloca(GEPOps.back())) {
03790     AM.BaseType = X86AddressMode::FrameIndexBase;
03791     AM.Base.FrameIndex = getFixedSizedAllocaFI(AI);
03792     GEPOps.pop_back();
03793     return;
03794   }
03795 
03796   if (GlobalValue *GV = dyn_cast<GlobalValue>(GEPOps.back())) {
03797     AM.GV = GV;
03798     GEPOps.pop_back();
03799     return;
03800   }
03801 
03802   AM.Base.Reg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
03803   GEPOps.pop_back();        // Consume the last GEP operand
03804 }
03805 
03806 
03807 /// isGEPFoldable - Return true if the specified GEP can be completely
03808 /// folded into the addressing mode of a load/store or lea instruction.
03809 bool X86ISel::isGEPFoldable(MachineBasicBlock *MBB,
03810                             Value *Src, User::op_iterator IdxBegin,
03811                             User::op_iterator IdxEnd, X86AddressMode &AM) {
03812 
03813   std::vector<Value*> GEPOps;
03814   GEPOps.resize(IdxEnd-IdxBegin+1);
03815   GEPOps[0] = Src;
03816   std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
03817   
03818   std::vector<const Type*>
03819     GEPTypes(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
03820              gep_type_end(Src->getType(), IdxBegin, IdxEnd));
03821 
03822   MachineBasicBlock::iterator IP;
03823   if (MBB) IP = MBB->end();
03824   getGEPIndex(MBB, IP, GEPOps, GEPTypes, AM);
03825 
03826   // We can fold it away iff the getGEPIndex call eliminated all operands.
03827   return GEPOps.empty();
03828 }
03829 
03830 void X86ISel::emitGEPOperation(MachineBasicBlock *MBB,
03831                                MachineBasicBlock::iterator IP,
03832                                Value *Src, User::op_iterator IdxBegin,
03833                                User::op_iterator IdxEnd, unsigned TargetReg) {
03834   const TargetData &TD = TM.getTargetData();
03835 
03836   // If this is a getelementptr null, with all constant integer indices, just
03837   // replace it with TargetReg = 42.
03838   if (isa<ConstantPointerNull>(Src)) {
03839     User::op_iterator I = IdxBegin;
03840     for (; I != IdxEnd; ++I)
03841       if (!isa<ConstantInt>(*I))
03842         break;
03843     if (I == IdxEnd) {   // All constant indices
03844       unsigned Offset = TD.getIndexedOffset(Src->getType(),
03845                                          std::vector<Value*>(IdxBegin, IdxEnd));
03846       BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addImm(Offset);
03847       return;
03848     }
03849   }
03850 
03851   std::vector<Value*> GEPOps;
03852   GEPOps.resize(IdxEnd-IdxBegin+1);
03853   GEPOps[0] = Src;
03854   std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1);
03855   
03856   std::vector<const Type*> GEPTypes;
03857   GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd),
03858                   gep_type_end(Src->getType(), IdxBegin, IdxEnd));
03859 
03860   // Keep emitting instructions until we consume the entire GEP instruction.
03861   while (!GEPOps.empty()) {
03862     unsigned OldSize = GEPOps.size();
03863     X86AddressMode AM;
03864     getGEPIndex(MBB, IP, GEPOps, GEPTypes, AM);
03865     
03866     if (GEPOps.size() != OldSize) {
03867       // getGEPIndex consumed some of the input.  Build an LEA instruction here.
03868       unsigned NextTarget = 0;
03869       if (!GEPOps.empty()) {
03870         assert(AM.Base.Reg == 0 &&
03871            "getGEPIndex should have left the base register open for chaining!");
03872         NextTarget = AM.Base.Reg = makeAnotherReg(Type::UIntTy);
03873       }
03874 
03875       if (AM.BaseType == X86AddressMode::RegBase &&
03876           AM.IndexReg == 0 && AM.Disp == 0 && !AM.GV)
03877         BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(AM.Base.Reg);
03878       else if (AM.BaseType == X86AddressMode::RegBase && AM.Base.Reg == 0 &&
03879                AM.IndexReg == 0 && AM.Disp == 0)
03880         BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addGlobalAddress(AM.GV);
03881       else
03882         addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, TargetReg), AM);
03883       --IP;
03884       TargetReg = NextTarget;
03885     } else if (GEPTypes.empty()) {
03886       // The getGEPIndex operation didn't want to build an LEA.  Check to see if
03887       // all operands are consumed but the base pointer.  If so, just load it
03888       // into the register.
03889       if (GlobalValue *GV = dyn_cast<GlobalValue>(GEPOps[0])) {
03890         BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addGlobalAddress(GV);
03891       } else {
03892         unsigned BaseReg = getReg(GEPOps[0], MBB, IP);
03893         BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(BaseReg);
03894       }
03895       break;                // we are now done
03896 
03897     } else {
03898       // It's an array or pointer access: [ArraySize x ElementType].
03899       const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
03900       Value *idx = GEPOps.back();
03901       GEPOps.pop_back();        // Consume a GEP operand
03902       GEPTypes.pop_back();
03903 
03904       // Many GEP instructions use a [cast (int/uint) to LongTy] as their
03905       // operand on X86.  Handle this case directly now...
03906       if (CastInst *CI = dyn_cast<CastInst>(idx))
03907         if (CI->getOperand(0)->getType() == Type::IntTy ||
03908             CI->getOperand(0)->getType() == Type::UIntTy)
03909           idx = CI->getOperand(0);
03910 
03911       // We want to add BaseReg to(idxReg * sizeof ElementType). First, we
03912       // must find the size of the pointed-to type (Not coincidentally, the next
03913       // type is the type of the elements in the array).
03914       const Type *ElTy = SqTy->getElementType();
03915       unsigned elementSize = TD.getTypeSize(ElTy);
03916 
03917       // If idxReg is a constant, we don't need to perform the multiply!
03918       if (ConstantInt *CSI = dyn_cast<ConstantInt>(idx)) {
03919         if (!CSI->isNullValue()) {
03920           unsigned Offset = elementSize*CSI->getRawValue();
03921           unsigned Reg = makeAnotherReg(Type::UIntTy);
03922           BuildMI(*MBB, IP, X86::ADD32ri, 2, TargetReg)
03923                                 .addReg(Reg).addImm(Offset);
03924           --IP;            // Insert the next instruction before this one.
03925           TargetReg = Reg; // Codegen the rest of the GEP into this
03926         }
03927       } else if (elementSize == 1) {
03928         // If the element size is 1, we don't have to multiply, just add
03929         unsigned idxReg = getReg(idx, MBB, IP);
03930         unsigned Reg = makeAnotherReg(Type::UIntTy);
03931         BuildMI(*MBB, IP, X86::ADD32rr, 2,TargetReg).addReg(Reg).addReg(idxReg);
03932         --IP;            // Insert the next instruction before this one.
03933         TargetReg = Reg; // Codegen the rest of the GEP into this
03934       } else {
03935         unsigned idxReg = getReg(idx, MBB, IP);
03936         unsigned OffsetReg = makeAnotherReg(Type::UIntTy);
03937 
03938         // Make sure we can back the iterator up to point to the first
03939         // instruction emitted.
03940         MachineBasicBlock::iterator BeforeIt = IP;
03941         if (IP == MBB->begin())
03942           BeforeIt = MBB->end();
03943         else
03944           --BeforeIt;
03945         doMultiplyConst(MBB, IP, OffsetReg, Type::IntTy, idxReg, elementSize);
03946 
03947         // Emit an ADD to add OffsetReg to the basePtr.
03948         unsigned Reg = makeAnotherReg(Type::UIntTy);
03949         BuildMI(*MBB, IP, X86::ADD32rr, 2, TargetReg)
03950                           .addReg(Reg).addReg(OffsetReg);
03951 
03952         // Step to the first instruction of the multiply.
03953         if (BeforeIt == MBB->end())
03954           IP = MBB->begin();
03955         else
03956           IP = ++BeforeIt;
03957 
03958         TargetReg = Reg; // Codegen the rest of the GEP into this
03959       }
03960     }
03961   }
03962 }
03963 
03964 /// visitAllocaInst - If this is a fixed size alloca, allocate space from the
03965 /// frame manager, otherwise do it the hard way.
03966 ///
03967 void X86ISel::visitAllocaInst(AllocaInst &I) {
03968   // If this is a fixed size alloca in the entry block for the function, we
03969   // statically stack allocate the space, so we don't need to do anything here.
03970   //
03971   if (dyn_castFixedAlloca(&I)) return;
03972   
03973   // Find the data size of the alloca inst's getAllocatedType.
03974   const Type *Ty = I.getAllocatedType();
03975   unsigned TySize = TM.getTargetData().getTypeSize(Ty);
03976 
03977   // Create a register to hold the temporary result of multiplying the type size
03978   // constant by the variable amount.
03979   unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy);
03980   unsigned SrcReg1 = getReg(I.getArraySize());
03981   
03982   // TotalSizeReg = mul <numelements>, <TypeSize>
03983   MachineBasicBlock::iterator MBBI = BB->end();
03984   doMultiplyConst(BB, MBBI, TotalSizeReg, Type::UIntTy, SrcReg1, TySize);
03985 
03986   // AddedSize = add <TotalSizeReg>, 15
03987   unsigned AddedSizeReg = makeAnotherReg(Type::UIntTy);
03988   BuildMI(BB, X86::ADD32ri, 2, AddedSizeReg).addReg(TotalSizeReg).addImm(15);
03989 
03990   // AlignedSize = and <AddedSize>, ~15
03991   unsigned AlignedSize = makeAnotherReg(Type::UIntTy);
03992   BuildMI(BB, X86::AND32ri, 2, AlignedSize).addReg(AddedSizeReg).addImm(~15);
03993   
03994   // Subtract size from stack pointer, thereby allocating some space.
03995   BuildMI(BB, X86::SUB32rr, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize);
03996 
03997   // Put a pointer to the space into the result register, by copying
03998   // the stack pointer.
03999   BuildMI(BB, X86::MOV32rr, 1, getReg(I)).addReg(X86::ESP);
04000 
04001   // Inform the Frame Information that we have just allocated a variable-sized
04002   // object.
04003   F->getFrameInfo()->CreateVariableSizedObject();
04004 }
04005 
04006 /// visitMallocInst - Malloc instructions are code generated into direct calls
04007 /// to the library malloc.
04008 ///
04009 void X86ISel::visitMallocInst(MallocInst &I) {
04010   unsigned AllocSize = TM.getTargetData().getTypeSize(I.getAllocatedType());
04011   unsigned Arg;
04012 
04013   if (ConstantUInt *C = dyn_cast<ConstantUInt>(I.getOperand(0))) {
04014     Arg = getReg(ConstantUInt::get(Type::UIntTy, C->getValue() * AllocSize));
04015   } else {
04016     Arg = makeAnotherReg(Type::UIntTy);
04017     unsigned Op0Reg = getReg(I.getOperand(0));
04018     MachineBasicBlock::iterator MBBI = BB->end();
04019     doMultiplyConst(BB, MBBI, Arg, Type::UIntTy, Op0Reg, AllocSize);
04020   }
04021 
04022   std::vector<ValueRecord> Args;
04023   Args.push_back(ValueRecord(Arg, Type::UIntTy));
04024   MachineInstr *TheCall = BuildMI(X86::CALLpcrel32,
04025                                   1).addExternalSymbol("malloc", true);
04026   doCall(ValueRecord(getReg(I), I.getType()), TheCall, Args);
04027 }
04028 
04029 
04030 /// visitFreeInst - Free instructions are code gen'd to call the free libc
04031 /// function.
04032 ///
04033 void X86ISel::visitFreeInst(FreeInst &I) {
04034   std::vector<ValueRecord> Args;
04035   Args.push_back(ValueRecord(I.getOperand(0)));
04036   MachineInstr *TheCall = BuildMI(X86::CALLpcrel32,
04037                                   1).addExternalSymbol("free", true);
04038   doCall(ValueRecord(0, Type::VoidTy), TheCall, Args);
04039 }
04040    
04041 /// createX86SimpleInstructionSelector - This pass converts an LLVM function
04042 /// into a machine code representation is a very simple peep-hole fashion.  The
04043 /// generated code sucks but the implementation is nice and simple.
04044 ///
04045 FunctionPass *llvm::createX86SimpleInstructionSelector(TargetMachine &TM) {
04046   return new X86ISel(TM);
04047 }