LLVM API Documentation
00001 //===-- X86ISelSimple.cpp - A simple instruction selector for x86 ---------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines a simple peephole instruction selector for the x86 target 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "X86.h" 00015 #include "X86InstrBuilder.h" 00016 #include "X86InstrInfo.h" 00017 #include "llvm/Constants.h" 00018 #include "llvm/DerivedTypes.h" 00019 #include "llvm/Function.h" 00020 #include "llvm/Instructions.h" 00021 #include "llvm/Pass.h" 00022 #include "llvm/CodeGen/IntrinsicLowering.h" 00023 #include "llvm/CodeGen/MachineConstantPool.h" 00024 #include "llvm/CodeGen/MachineFrameInfo.h" 00025 #include "llvm/CodeGen/MachineFunction.h" 00026 #include "llvm/CodeGen/SSARegMap.h" 00027 #include "llvm/Target/MRegisterInfo.h" 00028 #include "llvm/Target/TargetMachine.h" 00029 #include "llvm/Support/GetElementPtrTypeIterator.h" 00030 #include "llvm/Support/InstVisitor.h" 00031 #include "llvm/ADT/Statistic.h" 00032 using namespace llvm; 00033 00034 namespace { 00035 Statistic<> 00036 NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added"); 00037 00038 /// TypeClass - Used by the X86 backend to group LLVM types by their basic X86 00039 /// Representation. 00040 /// 00041 enum TypeClass { 00042 cByte, cShort, cInt, cFP, cLong 00043 }; 00044 } 00045 00046 /// getClass - Turn a primitive type into a "class" number which is based on the 00047 /// size of the type, and whether or not it is floating point. 00048 /// 00049 static inline TypeClass getClass(const Type *Ty) { 00050 switch (Ty->getTypeID()) { 00051 case Type::SByteTyID: 00052 case Type::UByteTyID: return cByte; // Byte operands are class #0 00053 case Type::ShortTyID: 00054 case Type::UShortTyID: return cShort; // Short operands are class #1 00055 case Type::IntTyID: 00056 case Type::UIntTyID: 00057 case Type::PointerTyID: return cInt; // Int's and pointers are class #2 00058 00059 case Type::FloatTyID: 00060 case Type::DoubleTyID: return cFP; // Floating Point is #3 00061 00062 case Type::LongTyID: 00063 case Type::ULongTyID: return cLong; // Longs are class #4 00064 default: 00065 assert(0 && "Invalid type to getClass!"); 00066 return cByte; // not reached 00067 } 00068 } 00069 00070 // getClassB - Just like getClass, but treat boolean values as bytes. 00071 static inline TypeClass getClassB(const Type *Ty) { 00072 if (Ty == Type::BoolTy) return cByte; 00073 return getClass(Ty); 00074 } 00075 00076 namespace { 00077 struct X86ISel : public FunctionPass, InstVisitor<X86ISel> { 00078 TargetMachine &TM; 00079 MachineFunction *F; // The function we are compiling into 00080 MachineBasicBlock *BB; // The current MBB we are compiling 00081 int VarArgsFrameIndex; // FrameIndex for start of varargs area 00082 int ReturnAddressIndex; // FrameIndex for the return address 00083 00084 std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs 00085 00086 // MBBMap - Mapping between LLVM BB -> Machine BB 00087 std::map<const BasicBlock*, MachineBasicBlock*> MBBMap; 00088 00089 // AllocaMap - Mapping from fixed sized alloca instructions to the 00090 // FrameIndex for the alloca. 00091 std::map<AllocaInst*, unsigned> AllocaMap; 00092 00093 X86ISel(TargetMachine &tm) : TM(tm), F(0), BB(0) {} 00094 00095 /// runOnFunction - Top level implementation of instruction selection for 00096 /// the entire function. 00097 /// 00098 bool runOnFunction(Function &Fn) { 00099 // First pass over the function, lower any unknown intrinsic functions 00100 // with the IntrinsicLowering class. 00101 LowerUnknownIntrinsicFunctionCalls(Fn); 00102 00103 F = &MachineFunction::construct(&Fn, TM); 00104 00105 // Create all of the machine basic blocks for the function... 00106 for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) 00107 F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I)); 00108 00109 BB = &F->front(); 00110 00111 // Set up a frame object for the return address. This is used by the 00112 // llvm.returnaddress & llvm.frameaddress intrinisics. 00113 ReturnAddressIndex = F->getFrameInfo()->CreateFixedObject(4, -4); 00114 00115 // Copy incoming arguments off of the stack... 00116 LoadArgumentsToVirtualRegs(Fn); 00117 00118 // Instruction select everything except PHI nodes 00119 visit(Fn); 00120 00121 // Select the PHI nodes 00122 SelectPHINodes(); 00123 00124 // Insert the FP_REG_KILL instructions into blocks that need them. 00125 InsertFPRegKills(); 00126 00127 RegMap.clear(); 00128 MBBMap.clear(); 00129 AllocaMap.clear(); 00130 F = 0; 00131 // We always build a machine code representation for the function 00132 return true; 00133 } 00134 00135 virtual const char *getPassName() const { 00136 return "X86 Simple Instruction Selection"; 00137 } 00138 00139 /// visitBasicBlock - This method is called when we are visiting a new basic 00140 /// block. This simply creates a new MachineBasicBlock to emit code into 00141 /// and adds it to the current MachineFunction. Subsequent visit* for 00142 /// instructions will be invoked for all instructions in the basic block. 00143 /// 00144 void visitBasicBlock(BasicBlock &LLVM_BB) { 00145 BB = MBBMap[&LLVM_BB]; 00146 } 00147 00148 /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the 00149 /// function, lowering any calls to unknown intrinsic functions into the 00150 /// equivalent LLVM code. 00151 /// 00152 void LowerUnknownIntrinsicFunctionCalls(Function &F); 00153 00154 /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function 00155 /// from the stack into virtual registers. 00156 /// 00157 void LoadArgumentsToVirtualRegs(Function &F); 00158 00159 /// SelectPHINodes - Insert machine code to generate phis. This is tricky 00160 /// because we have to generate our sources into the source basic blocks, 00161 /// not the current one. 00162 /// 00163 void SelectPHINodes(); 00164 00165 /// InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks 00166 /// that need them. This only occurs due to the floating point stackifier 00167 /// not being aggressive enough to handle arbitrary global stackification. 00168 /// 00169 void InsertFPRegKills(); 00170 00171 // Visitation methods for various instructions. These methods simply emit 00172 // fixed X86 code for each instruction. 00173 // 00174 00175 // Control flow operators 00176 void visitReturnInst(ReturnInst &RI); 00177 void visitBranchInst(BranchInst &BI); 00178 void visitUnreachableInst(UnreachableInst &UI) {} 00179 00180 struct ValueRecord { 00181 Value *Val; 00182 unsigned Reg; 00183 const Type *Ty; 00184 ValueRecord(unsigned R, const Type *T) : Val(0), Reg(R), Ty(T) {} 00185 ValueRecord(Value *V) : Val(V), Reg(0), Ty(V->getType()) {} 00186 }; 00187 void doCall(const ValueRecord &Ret, MachineInstr *CallMI, 00188 const std::vector<ValueRecord> &Args); 00189 void visitCallInst(CallInst &I); 00190 void visitIntrinsicCall(Intrinsic::ID ID, CallInst &I); 00191 00192 // Arithmetic operators 00193 void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass); 00194 void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); } 00195 void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); } 00196 void visitMul(BinaryOperator &B); 00197 00198 void visitDiv(BinaryOperator &B) { visitDivRem(B); } 00199 void visitRem(BinaryOperator &B) { visitDivRem(B); } 00200 void visitDivRem(BinaryOperator &B); 00201 00202 // Bitwise operators 00203 void visitAnd(BinaryOperator &B) { visitSimpleBinary(B, 2); } 00204 void visitOr (BinaryOperator &B) { visitSimpleBinary(B, 3); } 00205 void visitXor(BinaryOperator &B) { visitSimpleBinary(B, 4); } 00206 00207 // Comparison operators... 00208 void visitSetCondInst(SetCondInst &I); 00209 unsigned EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, 00210 MachineBasicBlock *MBB, 00211 MachineBasicBlock::iterator MBBI); 00212 void visitSelectInst(SelectInst &SI); 00213 00214 00215 // Memory Instructions 00216 void visitLoadInst(LoadInst &I); 00217 void visitStoreInst(StoreInst &I); 00218 void visitGetElementPtrInst(GetElementPtrInst &I); 00219 void visitAllocaInst(AllocaInst &I); 00220 void visitMallocInst(MallocInst &I); 00221 void visitFreeInst(FreeInst &I); 00222 00223 // Other operators 00224 void visitShiftInst(ShiftInst &I); 00225 void visitPHINode(PHINode &I) {} // PHI nodes handled by second pass 00226 void visitCastInst(CastInst &I); 00227 void visitVANextInst(VANextInst &I); 00228 void visitVAArgInst(VAArgInst &I); 00229 00230 void visitInstruction(Instruction &I) { 00231 std::cerr << "Cannot instruction select: " << I; 00232 abort(); 00233 } 00234 00235 /// promote32 - Make a value 32-bits wide, and put it somewhere. 00236 /// 00237 void promote32(unsigned targetReg, const ValueRecord &VR); 00238 00239 /// getAddressingMode - Get the addressing mode to use to address the 00240 /// specified value. The returned value should be used with addFullAddress. 00241 void getAddressingMode(Value *Addr, X86AddressMode &AM); 00242 00243 00244 /// getGEPIndex - This is used to fold GEP instructions into X86 addressing 00245 /// expressions. 00246 void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, 00247 std::vector<Value*> &GEPOps, 00248 std::vector<const Type*> &GEPTypes, 00249 X86AddressMode &AM); 00250 00251 /// isGEPFoldable - Return true if the specified GEP can be completely 00252 /// folded into the addressing mode of a load/store or lea instruction. 00253 bool isGEPFoldable(MachineBasicBlock *MBB, 00254 Value *Src, User::op_iterator IdxBegin, 00255 User::op_iterator IdxEnd, X86AddressMode &AM); 00256 00257 /// emitGEPOperation - Common code shared between visitGetElementPtrInst and 00258 /// constant expression GEP support. 00259 /// 00260 void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP, 00261 Value *Src, User::op_iterator IdxBegin, 00262 User::op_iterator IdxEnd, unsigned TargetReg); 00263 00264 /// emitCastOperation - Common code shared between visitCastInst and 00265 /// constant expression cast support. 00266 /// 00267 void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator IP, 00268 Value *Src, const Type *DestTy, unsigned TargetReg); 00269 00270 /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary 00271 /// and constant expression support. 00272 /// 00273 void emitSimpleBinaryOperation(MachineBasicBlock *BB, 00274 MachineBasicBlock::iterator IP, 00275 Value *Op0, Value *Op1, 00276 unsigned OperatorClass, unsigned TargetReg); 00277 00278 /// emitBinaryFPOperation - This method handles emission of floating point 00279 /// Add (0), Sub (1), Mul (2), and Div (3) operations. 00280 void emitBinaryFPOperation(MachineBasicBlock *BB, 00281 MachineBasicBlock::iterator IP, 00282 Value *Op0, Value *Op1, 00283 unsigned OperatorClass, unsigned TargetReg); 00284 00285 void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP, 00286 Value *Op0, Value *Op1, unsigned TargetReg); 00287 00288 void doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, 00289 unsigned DestReg, const Type *DestTy, 00290 unsigned Op0Reg, unsigned Op1Reg); 00291 void doMultiplyConst(MachineBasicBlock *MBB, 00292 MachineBasicBlock::iterator MBBI, 00293 unsigned DestReg, const Type *DestTy, 00294 unsigned Op0Reg, unsigned Op1Val); 00295 00296 void emitDivRemOperation(MachineBasicBlock *BB, 00297 MachineBasicBlock::iterator IP, 00298 Value *Op0, Value *Op1, bool isDiv, 00299 unsigned TargetReg); 00300 00301 /// emitSetCCOperation - Common code shared between visitSetCondInst and 00302 /// constant expression support. 00303 /// 00304 void emitSetCCOperation(MachineBasicBlock *BB, 00305 MachineBasicBlock::iterator IP, 00306 Value *Op0, Value *Op1, unsigned Opcode, 00307 unsigned TargetReg); 00308 00309 /// emitShiftOperation - Common code shared between visitShiftInst and 00310 /// constant expression support. 00311 /// 00312 void emitShiftOperation(MachineBasicBlock *MBB, 00313 MachineBasicBlock::iterator IP, 00314 Value *Op, Value *ShiftAmount, bool isLeftShift, 00315 const Type *ResultTy, unsigned DestReg); 00316 00317 // Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a 00318 // constant. 00319 void doSHLDConst(MachineBasicBlock *MBB, 00320 MachineBasicBlock::iterator MBBI, 00321 unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg, 00322 unsigned Op1Val); 00323 00324 /// emitSelectOperation - Common code shared between visitSelectInst and the 00325 /// constant expression support. 00326 void emitSelectOperation(MachineBasicBlock *MBB, 00327 MachineBasicBlock::iterator IP, 00328 Value *Cond, Value *TrueVal, Value *FalseVal, 00329 unsigned DestReg); 00330 00331 /// copyConstantToRegister - Output the instructions required to put the 00332 /// specified constant into the specified register. 00333 /// 00334 void copyConstantToRegister(MachineBasicBlock *MBB, 00335 MachineBasicBlock::iterator MBBI, 00336 Constant *C, unsigned Reg); 00337 00338 void emitUCOMr(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, 00339 unsigned LHS, unsigned RHS); 00340 00341 /// makeAnotherReg - This method returns the next register number we haven't 00342 /// yet used. 00343 /// 00344 /// Long values are handled somewhat specially. They are always allocated 00345 /// as pairs of 32 bit integer values. The register number returned is the 00346 /// lower 32 bits of the long value, and the regNum+1 is the upper 32 bits 00347 /// of the long value. 00348 /// 00349 unsigned makeAnotherReg(const Type *Ty) { 00350 assert(dynamic_cast<const X86RegisterInfo*>(TM.getRegisterInfo()) && 00351 "Current target doesn't have X86 reg info??"); 00352 const X86RegisterInfo *MRI = 00353 static_cast<const X86RegisterInfo*>(TM.getRegisterInfo()); 00354 if (Ty == Type::LongTy || Ty == Type::ULongTy) { 00355 const TargetRegisterClass *RC = MRI->getRegClassForType(Type::IntTy); 00356 // Create the lower part 00357 F->getSSARegMap()->createVirtualRegister(RC); 00358 // Create the upper part. 00359 return F->getSSARegMap()->createVirtualRegister(RC)-1; 00360 } 00361 00362 // Add the mapping of regnumber => reg class to MachineFunction 00363 const TargetRegisterClass *RC = MRI->getRegClassForType(Ty); 00364 return F->getSSARegMap()->createVirtualRegister(RC); 00365 } 00366 00367 /// getReg - This method turns an LLVM value into a register number. 00368 /// 00369 unsigned getReg(Value &V) { return getReg(&V); } // Allow references 00370 unsigned getReg(Value *V) { 00371 // Just append to the end of the current bb. 00372 MachineBasicBlock::iterator It = BB->end(); 00373 return getReg(V, BB, It); 00374 } 00375 unsigned getReg(Value *V, MachineBasicBlock *MBB, 00376 MachineBasicBlock::iterator IPt); 00377 00378 /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca 00379 /// that is to be statically allocated with the initial stack frame 00380 /// adjustment. 00381 unsigned getFixedSizedAllocaFI(AllocaInst *AI); 00382 }; 00383 } 00384 00385 /// dyn_castFixedAlloca - If the specified value is a fixed size alloca 00386 /// instruction in the entry block, return it. Otherwise, return a null 00387 /// pointer. 00388 static AllocaInst *dyn_castFixedAlloca(Value *V) { 00389 if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 00390 BasicBlock *BB = AI->getParent(); 00391 if (isa<ConstantUInt>(AI->getArraySize()) && BB ==&BB->getParent()->front()) 00392 return AI; 00393 } 00394 return 0; 00395 } 00396 00397 /// getReg - This method turns an LLVM value into a register number. 00398 /// 00399 unsigned X86ISel::getReg(Value *V, MachineBasicBlock *MBB, 00400 MachineBasicBlock::iterator IPt) { 00401 // If this operand is a constant, emit the code to copy the constant into 00402 // the register here... 00403 if (Constant *C = dyn_cast<Constant>(V)) { 00404 unsigned Reg = makeAnotherReg(V->getType()); 00405 copyConstantToRegister(MBB, IPt, C, Reg); 00406 return Reg; 00407 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 00408 // Do not emit noop casts at all, unless it's a double -> float cast. 00409 if (getClassB(CI->getType()) == getClassB(CI->getOperand(0)->getType()) && 00410 (CI->getType() != Type::FloatTy || 00411 CI->getOperand(0)->getType() != Type::DoubleTy)) 00412 return getReg(CI->getOperand(0), MBB, IPt); 00413 } else if (AllocaInst *AI = dyn_castFixedAlloca(V)) { 00414 // If the alloca address couldn't be folded into the instruction addressing, 00415 // emit an explicit LEA as appropriate. 00416 unsigned Reg = makeAnotherReg(V->getType()); 00417 unsigned FI = getFixedSizedAllocaFI(AI); 00418 addFrameReference(BuildMI(*MBB, IPt, X86::LEA32r, 4, Reg), FI); 00419 return Reg; 00420 } 00421 00422 unsigned &Reg = RegMap[V]; 00423 if (Reg == 0) { 00424 Reg = makeAnotherReg(V->getType()); 00425 RegMap[V] = Reg; 00426 } 00427 00428 return Reg; 00429 } 00430 00431 /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca 00432 /// that is to be statically allocated with the initial stack frame 00433 /// adjustment. 00434 unsigned X86ISel::getFixedSizedAllocaFI(AllocaInst *AI) { 00435 // Already computed this? 00436 std::map<AllocaInst*, unsigned>::iterator I = AllocaMap.lower_bound(AI); 00437 if (I != AllocaMap.end() && I->first == AI) return I->second; 00438 00439 const Type *Ty = AI->getAllocatedType(); 00440 ConstantUInt *CUI = cast<ConstantUInt>(AI->getArraySize()); 00441 unsigned TySize = TM.getTargetData().getTypeSize(Ty); 00442 TySize *= CUI->getValue(); // Get total allocated size... 00443 unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty); 00444 00445 // Create a new stack object using the frame manager... 00446 int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment); 00447 AllocaMap.insert(I, std::make_pair(AI, FrameIdx)); 00448 return FrameIdx; 00449 } 00450 00451 00452 /// copyConstantToRegister - Output the instructions required to put the 00453 /// specified constant into the specified register. 00454 /// 00455 void X86ISel::copyConstantToRegister(MachineBasicBlock *MBB, 00456 MachineBasicBlock::iterator IP, 00457 Constant *C, unsigned R) { 00458 if (isa<UndefValue>(C)) { 00459 switch (getClassB(C->getType())) { 00460 case cFP: 00461 // FIXME: SHOULD TEACH STACKIFIER ABOUT UNDEF VALUES! 00462 BuildMI(*MBB, IP, X86::FLD0, 0, R); 00463 return; 00464 case cLong: 00465 BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R+1); 00466 // FALL THROUGH 00467 default: 00468 BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, R); 00469 return; 00470 } 00471 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 00472 unsigned Class = 0; 00473 switch (CE->getOpcode()) { 00474 case Instruction::GetElementPtr: 00475 emitGEPOperation(MBB, IP, CE->getOperand(0), 00476 CE->op_begin()+1, CE->op_end(), R); 00477 return; 00478 case Instruction::Cast: 00479 emitCastOperation(MBB, IP, CE->getOperand(0), CE->getType(), R); 00480 return; 00481 00482 case Instruction::Xor: ++Class; // FALL THROUGH 00483 case Instruction::Or: ++Class; // FALL THROUGH 00484 case Instruction::And: ++Class; // FALL THROUGH 00485 case Instruction::Sub: ++Class; // FALL THROUGH 00486 case Instruction::Add: 00487 emitSimpleBinaryOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1), 00488 Class, R); 00489 return; 00490 00491 case Instruction::Mul: 00492 emitMultiply(MBB, IP, CE->getOperand(0), CE->getOperand(1), R); 00493 return; 00494 00495 case Instruction::Div: 00496 case Instruction::Rem: 00497 emitDivRemOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1), 00498 CE->getOpcode() == Instruction::Div, R); 00499 return; 00500 00501 case Instruction::SetNE: 00502 case Instruction::SetEQ: 00503 case Instruction::SetLT: 00504 case Instruction::SetGT: 00505 case Instruction::SetLE: 00506 case Instruction::SetGE: 00507 emitSetCCOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1), 00508 CE->getOpcode(), R); 00509 return; 00510 00511 case Instruction::Shl: 00512 case Instruction::Shr: 00513 emitShiftOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1), 00514 CE->getOpcode() == Instruction::Shl, CE->getType(), R); 00515 return; 00516 00517 case Instruction::Select: 00518 emitSelectOperation(MBB, IP, CE->getOperand(0), CE->getOperand(1), 00519 CE->getOperand(2), R); 00520 return; 00521 00522 default: 00523 std::cerr << "Offending expr: " << *C << "\n"; 00524 assert(0 && "Constant expression not yet handled!\n"); 00525 } 00526 } 00527 00528 if (C->getType()->isIntegral()) { 00529 unsigned Class = getClassB(C->getType()); 00530 00531 if (Class == cLong) { 00532 // Copy the value into the register pair. 00533 uint64_t Val = cast<ConstantInt>(C)->getRawValue(); 00534 BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(Val & 0xFFFFFFFF); 00535 BuildMI(*MBB, IP, X86::MOV32ri, 1, R+1).addImm(Val >> 32); 00536 return; 00537 } 00538 00539 assert(Class <= cInt && "Type not handled yet!"); 00540 00541 static const unsigned IntegralOpcodeTab[] = { 00542 X86::MOV8ri, X86::MOV16ri, X86::MOV32ri 00543 }; 00544 00545 if (C->getType() == Type::BoolTy) { 00546 BuildMI(*MBB, IP, X86::MOV8ri, 1, R).addImm(C == ConstantBool::True); 00547 } else { 00548 ConstantInt *CI = cast<ConstantInt>(C); 00549 BuildMI(*MBB, IP, IntegralOpcodeTab[Class],1,R).addImm(CI->getRawValue()); 00550 } 00551 } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { 00552 if (CFP->isExactlyValue(+0.0)) 00553 BuildMI(*MBB, IP, X86::FLD0, 0, R); 00554 else if (CFP->isExactlyValue(+1.0)) 00555 BuildMI(*MBB, IP, X86::FLD1, 0, R); 00556 else { 00557 // Otherwise we need to spill the constant to memory... 00558 MachineConstantPool *CP = F->getConstantPool(); 00559 unsigned CPI = CP->getConstantPoolIndex(CFP); 00560 const Type *Ty = CFP->getType(); 00561 00562 assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!"); 00563 unsigned LoadOpcode = Ty == Type::FloatTy ? X86::FLD32m : X86::FLD64m; 00564 addConstantPoolReference(BuildMI(*MBB, IP, LoadOpcode, 4, R), CPI); 00565 } 00566 00567 } else if (isa<ConstantPointerNull>(C)) { 00568 // Copy zero (null pointer) to the register. 00569 BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addImm(0); 00570 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) { 00571 BuildMI(*MBB, IP, X86::MOV32ri, 1, R).addGlobalAddress(GV); 00572 } else { 00573 std::cerr << "Offending constant: " << *C << "\n"; 00574 assert(0 && "Type not handled yet!"); 00575 } 00576 } 00577 00578 /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from 00579 /// the stack into virtual registers. 00580 /// 00581 void X86ISel::LoadArgumentsToVirtualRegs(Function &Fn) { 00582 // Emit instructions to load the arguments... On entry to a function on the 00583 // X86, the stack frame looks like this: 00584 // 00585 // [ESP] -- return address 00586 // [ESP + 4] -- first argument (leftmost lexically) 00587 // [ESP + 8] -- second argument, if first argument is four bytes in size 00588 // ... 00589 // 00590 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 00591 MachineFrameInfo *MFI = F->getFrameInfo(); 00592 00593 for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) { 00594 bool ArgLive = !I->use_empty(); 00595 unsigned Reg = ArgLive ? getReg(*I) : 0; 00596 int FI; // Frame object index 00597 00598 switch (getClassB(I->getType())) { 00599 case cByte: 00600 if (ArgLive) { 00601 FI = MFI->CreateFixedObject(1, ArgOffset); 00602 addFrameReference(BuildMI(BB, X86::MOV8rm, 4, Reg), FI); 00603 } 00604 break; 00605 case cShort: 00606 if (ArgLive) { 00607 FI = MFI->CreateFixedObject(2, ArgOffset); 00608 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, Reg), FI); 00609 } 00610 break; 00611 case cInt: 00612 if (ArgLive) { 00613 FI = MFI->CreateFixedObject(4, ArgOffset); 00614 addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); 00615 } 00616 break; 00617 case cLong: 00618 if (ArgLive) { 00619 FI = MFI->CreateFixedObject(8, ArgOffset); 00620 addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg), FI); 00621 addFrameReference(BuildMI(BB, X86::MOV32rm, 4, Reg+1), FI, 4); 00622 } 00623 ArgOffset += 4; // longs require 4 additional bytes 00624 break; 00625 case cFP: 00626 if (ArgLive) { 00627 unsigned Opcode; 00628 if (I->getType() == Type::FloatTy) { 00629 Opcode = X86::FLD32m; 00630 FI = MFI->CreateFixedObject(4, ArgOffset); 00631 } else { 00632 Opcode = X86::FLD64m; 00633 FI = MFI->CreateFixedObject(8, ArgOffset); 00634 } 00635 addFrameReference(BuildMI(BB, Opcode, 4, Reg), FI); 00636 } 00637 if (I->getType() == Type::DoubleTy) 00638 ArgOffset += 4; // doubles require 4 additional bytes 00639 break; 00640 default: 00641 assert(0 && "Unhandled argument type!"); 00642 } 00643 ArgOffset += 4; // Each argument takes at least 4 bytes on the stack... 00644 } 00645 00646 // If the function takes variable number of arguments, add a frame offset for 00647 // the start of the first vararg value... this is used to expand 00648 // llvm.va_start. 00649 if (Fn.getFunctionType()->isVarArg()) 00650 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 00651 } 00652 00653 00654 /// SelectPHINodes - Insert machine code to generate phis. This is tricky 00655 /// because we have to generate our sources into the source basic blocks, not 00656 /// the current one. 00657 /// 00658 void X86ISel::SelectPHINodes() { 00659 const TargetInstrInfo &TII = *TM.getInstrInfo(); 00660 const Function &LF = *F->getFunction(); // The LLVM function... 00661 for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) { 00662 const BasicBlock *BB = I; 00663 MachineBasicBlock &MBB = *MBBMap[I]; 00664 00665 // Loop over all of the PHI nodes in the LLVM basic block... 00666 MachineBasicBlock::iterator PHIInsertPoint = MBB.begin(); 00667 for (BasicBlock::const_iterator I = BB->begin(); isa<PHINode>(I); ++I) { 00668 PHINode *PN = const_cast<PHINode*>(dyn_cast<PHINode>(I)); 00669 00670 // Create a new machine instr PHI node, and insert it. 00671 unsigned PHIReg = getReg(*PN); 00672 MachineInstr *PhiMI = BuildMI(MBB, PHIInsertPoint, 00673 X86::PHI, PN->getNumOperands(), PHIReg); 00674 00675 MachineInstr *LongPhiMI = 0; 00676 if (PN->getType() == Type::LongTy || PN->getType() == Type::ULongTy) 00677 LongPhiMI = BuildMI(MBB, PHIInsertPoint, 00678 X86::PHI, PN->getNumOperands(), PHIReg+1); 00679 00680 // PHIValues - Map of blocks to incoming virtual registers. We use this 00681 // so that we only initialize one incoming value for a particular block, 00682 // even if the block has multiple entries in the PHI node. 00683 // 00684 std::map<MachineBasicBlock*, unsigned> PHIValues; 00685 00686 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 00687 MachineBasicBlock *PredMBB = MBBMap[PN->getIncomingBlock(i)]; 00688 unsigned ValReg; 00689 std::map<MachineBasicBlock*, unsigned>::iterator EntryIt = 00690 PHIValues.lower_bound(PredMBB); 00691 00692 if (EntryIt != PHIValues.end() && EntryIt->first == PredMBB) { 00693 // We already inserted an initialization of the register for this 00694 // predecessor. Recycle it. 00695 ValReg = EntryIt->second; 00696 00697 } else { 00698 // Get the incoming value into a virtual register. 00699 // 00700 Value *Val = PN->getIncomingValue(i); 00701 00702 // If this is a constant or GlobalValue, we may have to insert code 00703 // into the basic block to compute it into a virtual register. 00704 if ((isa<Constant>(Val) && !isa<ConstantExpr>(Val))) { 00705 // Simple constants get emitted at the end of the basic block, 00706 // before any terminator instructions. We "know" that the code to 00707 // move a constant into a register will never clobber any flags. 00708 ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator()); 00709 } else { 00710 // Because we don't want to clobber any values which might be in 00711 // physical registers with the computation of this constant (which 00712 // might be arbitrarily complex if it is a constant expression), 00713 // just insert the computation at the top of the basic block. 00714 MachineBasicBlock::iterator PI = PredMBB->begin(); 00715 00716 // Skip over any PHI nodes though! 00717 while (PI != PredMBB->end() && PI->getOpcode() == X86::PHI) 00718 ++PI; 00719 00720 ValReg = getReg(Val, PredMBB, PI); 00721 } 00722 00723 // Remember that we inserted a value for this PHI for this predecessor 00724 PHIValues.insert(EntryIt, std::make_pair(PredMBB, ValReg)); 00725 } 00726 00727 PhiMI->addRegOperand(ValReg); 00728 PhiMI->addMachineBasicBlockOperand(PredMBB); 00729 if (LongPhiMI) { 00730 LongPhiMI->addRegOperand(ValReg+1); 00731 LongPhiMI->addMachineBasicBlockOperand(PredMBB); 00732 } 00733 } 00734 00735 // Now that we emitted all of the incoming values for the PHI node, make 00736 // sure to reposition the InsertPoint after the PHI that we just added. 00737 // This is needed because we might have inserted a constant into this 00738 // block, right after the PHI's which is before the old insert point! 00739 PHIInsertPoint = LongPhiMI ? LongPhiMI : PhiMI; 00740 ++PHIInsertPoint; 00741 } 00742 } 00743 } 00744 00745 /// RequiresFPRegKill - The floating point stackifier pass cannot insert 00746 /// compensation code on critical edges. As such, it requires that we kill all 00747 /// FP registers on the exit from any blocks that either ARE critical edges, or 00748 /// branch to a block that has incoming critical edges. 00749 /// 00750 /// Note that this kill instruction will eventually be eliminated when 00751 /// restrictions in the stackifier are relaxed. 00752 /// 00753 static bool RequiresFPRegKill(const MachineBasicBlock *MBB) { 00754 #if 0 00755 const BasicBlock *BB = MBB->getBasicBlock (); 00756 for (succ_const_iterator SI = succ_begin(BB), E = succ_end(BB); SI!=E; ++SI) { 00757 const BasicBlock *Succ = *SI; 00758 pred_const_iterator PI = pred_begin(Succ), PE = pred_end(Succ); 00759 ++PI; // Block have at least one predecessory 00760 if (PI != PE) { // If it has exactly one, this isn't crit edge 00761 // If this block has more than one predecessor, check all of the 00762 // predecessors to see if they have multiple successors. If so, then the 00763 // block we are analyzing needs an FPRegKill. 00764 for (PI = pred_begin(Succ); PI != PE; ++PI) { 00765 const BasicBlock *Pred = *PI; 00766 succ_const_iterator SI2 = succ_begin(Pred); 00767 ++SI2; // There must be at least one successor of this block. 00768 if (SI2 != succ_end(Pred)) 00769 return true; // Yes, we must insert the kill on this edge. 00770 } 00771 } 00772 } 00773 // If we got this far, there is no need to insert the kill instruction. 00774 return false; 00775 #else 00776 return true; 00777 #endif 00778 } 00779 00780 // InsertFPRegKills - Insert FP_REG_KILL instructions into basic blocks that 00781 // need them. This only occurs due to the floating point stackifier not being 00782 // aggressive enough to handle arbitrary global stackification. 00783 // 00784 // Currently we insert an FP_REG_KILL instruction into each block that uses or 00785 // defines a floating point virtual register. 00786 // 00787 // When the global register allocators (like linear scan) finally update live 00788 // variable analysis, we can keep floating point values in registers across 00789 // portions of the CFG that do not involve critical edges. This will be a big 00790 // win, but we are waiting on the global allocators before we can do this. 00791 // 00792 // With a bit of work, the floating point stackifier pass can be enhanced to 00793 // break critical edges as needed (to make a place to put compensation code), 00794 // but this will require some infrastructure improvements as well. 00795 // 00796 void X86ISel::InsertFPRegKills() { 00797 SSARegMap &RegMap = *F->getSSARegMap(); 00798 00799 for (MachineFunction::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { 00800 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I!=E; ++I) 00801 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 00802 MachineOperand& MO = I->getOperand(i); 00803 if (MO.isRegister() && MO.getReg()) { 00804 unsigned Reg = MO.getReg(); 00805 if (MRegisterInfo::isVirtualRegister(Reg)) { 00806 unsigned RegSize = RegMap.getRegClass(Reg)->getSize(); 00807 if (RegSize == 10 || RegSize == 8) 00808 goto UsesFPReg; 00809 } 00810 } 00811 } 00812 // If we haven't found an FP register use or def in this basic block, check 00813 // to see if any of our successors has an FP PHI node, which will cause a 00814 // copy to be inserted into this block. 00815 for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), 00816 SE = BB->succ_end(); SI != SE; ++SI) { 00817 MachineBasicBlock *SBB = *SI; 00818 for (MachineBasicBlock::iterator I = SBB->begin(); 00819 I != SBB->end() && I->getOpcode() == X86::PHI; ++I) { 00820 const TargetRegisterClass *RC = 00821 RegMap.getRegClass(I->getOperand(0).getReg()); 00822 if (RC->getSize() == 10 || RC->getSize() == 8) 00823 goto UsesFPReg; 00824 } 00825 } 00826 continue; 00827 UsesFPReg: 00828 // Okay, this block uses an FP register. If the block has successors (ie, 00829 // it's not an unwind/return), insert the FP_REG_KILL instruction. 00830 if (BB->succ_size () && RequiresFPRegKill(BB)) { 00831 BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0); 00832 ++NumFPKill; 00833 } 00834 } 00835 } 00836 00837 00838 void X86ISel::getAddressingMode(Value *Addr, X86AddressMode &AM) { 00839 AM.BaseType = X86AddressMode::RegBase; 00840 AM.Base.Reg = 0; AM.Scale = 1; AM.IndexReg = 0; AM.Disp = 0; 00841 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) { 00842 if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), 00843 AM)) 00844 return; 00845 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 00846 if (CE->getOpcode() == Instruction::GetElementPtr) 00847 if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), 00848 AM)) 00849 return; 00850 } else if (AllocaInst *AI = dyn_castFixedAlloca(Addr)) { 00851 AM.BaseType = X86AddressMode::FrameIndexBase; 00852 AM.Base.FrameIndex = getFixedSizedAllocaFI(AI); 00853 return; 00854 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 00855 AM.GV = GV; 00856 return; 00857 } 00858 00859 // If it's not foldable, reset addr mode. 00860 AM.BaseType = X86AddressMode::RegBase; 00861 AM.Base.Reg = getReg(Addr); 00862 AM.Scale = 1; AM.IndexReg = 0; AM.Disp = 0; 00863 } 00864 00865 // canFoldSetCCIntoBranchOrSelect - Return the setcc instruction if we can fold 00866 // it into the conditional branch or select instruction which is the only user 00867 // of the cc instruction. This is the case if the conditional branch is the 00868 // only user of the setcc. We also don't handle long arguments below, so we 00869 // reject them here as well. 00870 // 00871 static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) { 00872 if (SetCondInst *SCI = dyn_cast<SetCondInst>(V)) 00873 if (SCI->hasOneUse()) { 00874 Instruction *User = cast<Instruction>(SCI->use_back()); 00875 if ((isa<BranchInst>(User) || isa<SelectInst>(User)) && 00876 (getClassB(SCI->getOperand(0)->getType()) != cLong || 00877 SCI->getOpcode() == Instruction::SetEQ || 00878 SCI->getOpcode() == Instruction::SetNE) && 00879 (isa<BranchInst>(User) || User->getOperand(0) == V)) 00880 return SCI; 00881 } 00882 return 0; 00883 } 00884 00885 // Return a fixed numbering for setcc instructions which does not depend on the 00886 // order of the opcodes. 00887 // 00888 static unsigned getSetCCNumber(unsigned Opcode) { 00889 switch(Opcode) { 00890 default: assert(0 && "Unknown setcc instruction!"); 00891 case Instruction::SetEQ: return 0; 00892 case Instruction::SetNE: return 1; 00893 case Instruction::SetLT: return 2; 00894 case Instruction::SetGE: return 3; 00895 case Instruction::SetGT: return 4; 00896 case Instruction::SetLE: return 5; 00897 } 00898 } 00899 00900 // LLVM -> X86 signed X86 unsigned 00901 // ----- ---------- ------------ 00902 // seteq -> sete sete 00903 // setne -> setne setne 00904 // setlt -> setl setb 00905 // setge -> setge setae 00906 // setgt -> setg seta 00907 // setle -> setle setbe 00908 // ---- 00909 // sets // Used by comparison with 0 optimization 00910 // setns 00911 static const unsigned SetCCOpcodeTab[2][8] = { 00912 { X86::SETEr, X86::SETNEr, X86::SETBr, X86::SETAEr, X86::SETAr, X86::SETBEr, 00913 0, 0 }, 00914 { X86::SETEr, X86::SETNEr, X86::SETLr, X86::SETGEr, X86::SETGr, X86::SETLEr, 00915 X86::SETSr, X86::SETNSr }, 00916 }; 00917 00918 /// emitUCOMr - In the future when we support processors before the P6, this 00919 /// wraps the logic for emitting an FUCOMr vs FUCOMIr. 00920 void X86ISel::emitUCOMr(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, 00921 unsigned LHS, unsigned RHS) { 00922 if (0) { // for processors prior to the P6 00923 BuildMI(*MBB, IP, X86::FUCOMr, 2).addReg(LHS).addReg(RHS); 00924 BuildMI(*MBB, IP, X86::FNSTSW8r, 0); 00925 BuildMI(*MBB, IP, X86::SAHF, 1); 00926 } else { 00927 BuildMI(*MBB, IP, X86::FUCOMIr, 2).addReg(LHS).addReg(RHS); 00928 } 00929 } 00930 00931 // EmitComparison - This function emits a comparison of the two operands, 00932 // returning the extended setcc code to use. 00933 unsigned X86ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, 00934 MachineBasicBlock *MBB, 00935 MachineBasicBlock::iterator IP) { 00936 // The arguments are already supposed to be of the same type. 00937 const Type *CompTy = Op0->getType(); 00938 unsigned Class = getClassB(CompTy); 00939 00940 // Special case handling of: cmp R, i 00941 if (isa<ConstantPointerNull>(Op1)) { 00942 unsigned Op0r = getReg(Op0, MBB, IP); 00943 if (OpNum < 2) // seteq/setne -> test 00944 BuildMI(*MBB, IP, X86::TEST32rr, 2).addReg(Op0r).addReg(Op0r); 00945 else 00946 BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(0); 00947 return OpNum; 00948 00949 } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 00950 if (Class == cByte || Class == cShort || Class == cInt) { 00951 unsigned Op1v = CI->getRawValue(); 00952 00953 // Mask off any upper bits of the constant, if there are any... 00954 Op1v &= (1ULL << (8 << Class)) - 1; 00955 00956 // If this is a comparison against zero, emit more efficient code. We 00957 // can't handle unsigned comparisons against zero unless they are == or 00958 // !=. These should have been strength reduced already anyway. 00959 if (Op1v == 0 && (CompTy->isSigned() || OpNum < 2)) { 00960 00961 // If this is a comparison against zero and the LHS is an and of a 00962 // register with a constant, use the test to do the and. 00963 if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) 00964 if (Op0I->getOpcode() == Instruction::And && Op0->hasOneUse() && 00965 isa<ConstantInt>(Op0I->getOperand(1))) { 00966 static const unsigned TESTTab[] = { 00967 X86::TEST8ri, X86::TEST16ri, X86::TEST32ri 00968 }; 00969 00970 // Emit test X, i 00971 unsigned LHS = getReg(Op0I->getOperand(0), MBB, IP); 00972 unsigned Imm = 00973 cast<ConstantInt>(Op0I->getOperand(1))->getRawValue(); 00974 BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(LHS).addImm(Imm); 00975 00976 if (OpNum == 2) return 6; // Map jl -> js 00977 if (OpNum == 3) return 7; // Map jg -> jns 00978 return OpNum; 00979 } 00980 00981 unsigned Op0r = getReg(Op0, MBB, IP); 00982 static const unsigned TESTTab[] = { 00983 X86::TEST8rr, X86::TEST16rr, X86::TEST32rr 00984 }; 00985 BuildMI(*MBB, IP, TESTTab[Class], 2).addReg(Op0r).addReg(Op0r); 00986 00987 if (OpNum == 2) return 6; // Map jl -> js 00988 if (OpNum == 3) return 7; // Map jg -> jns 00989 return OpNum; 00990 } 00991 00992 static const unsigned CMPTab[] = { 00993 X86::CMP8ri, X86::CMP16ri, X86::CMP32ri 00994 }; 00995 00996 unsigned Op0r = getReg(Op0, MBB, IP); 00997 BuildMI(*MBB, IP, CMPTab[Class], 2).addReg(Op0r).addImm(Op1v); 00998 return OpNum; 00999 } else { 01000 unsigned Op0r = getReg(Op0, MBB, IP); 01001 assert(Class == cLong && "Unknown integer class!"); 01002 unsigned LowCst = CI->getRawValue(); 01003 unsigned HiCst = CI->getRawValue() >> 32; 01004 if (OpNum < 2) { // seteq, setne 01005 unsigned LoTmp = Op0r; 01006 if (LowCst != 0) { 01007 LoTmp = makeAnotherReg(Type::IntTy); 01008 BuildMI(*MBB, IP, X86::XOR32ri, 2, LoTmp).addReg(Op0r).addImm(LowCst); 01009 } 01010 unsigned HiTmp = Op0r+1; 01011 if (HiCst != 0) { 01012 HiTmp = makeAnotherReg(Type::IntTy); 01013 BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); 01014 } 01015 unsigned FinalTmp = makeAnotherReg(Type::IntTy); 01016 BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); 01017 return OpNum; 01018 } else { 01019 // Emit a sequence of code which compares the high and low parts once 01020 // each, then uses a conditional move to handle the overflow case. For 01021 // example, a setlt for long would generate code like this: 01022 // 01023 // AL = lo(op1) < lo(op2) // Always unsigned comparison 01024 // BL = hi(op1) < hi(op2) // Signedness depends on operands 01025 // dest = hi(op1) == hi(op2) ? BL : AL; 01026 // 01027 01028 // FIXME: This would be much better if we had hierarchical register 01029 // classes! Until then, hardcode registers so that we can deal with 01030 // their aliases (because we don't have conditional byte moves). 01031 // 01032 BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst); 01033 BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL); 01034 BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst); 01035 BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL); 01036 BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH); 01037 BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH); 01038 BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX) 01039 .addReg(X86::AX); 01040 // NOTE: visitSetCondInst knows that the value is dumped into the BL 01041 // register at this point for long values... 01042 return OpNum; 01043 } 01044 } 01045 } 01046 01047 unsigned Op0r = getReg(Op0, MBB, IP); 01048 01049 // Special case handling of comparison against +/- 0.0 01050 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op1)) 01051 if (CFP->isExactlyValue(+0.0) || CFP->isExactlyValue(-0.0)) { 01052 BuildMI(*MBB, IP, X86::FTST, 1).addReg(Op0r); 01053 BuildMI(*MBB, IP, X86::FNSTSW8r, 0); 01054 BuildMI(*MBB, IP, X86::SAHF, 1); 01055 return OpNum; 01056 } 01057 01058 unsigned Op1r = getReg(Op1, MBB, IP); 01059 switch (Class) { 01060 default: assert(0 && "Unknown type class!"); 01061 // Emit: cmp <var1>, <var2> (do the comparison). We can 01062 // compare 8-bit with 8-bit, 16-bit with 16-bit, 32-bit with 01063 // 32-bit. 01064 case cByte: 01065 BuildMI(*MBB, IP, X86::CMP8rr, 2).addReg(Op0r).addReg(Op1r); 01066 break; 01067 case cShort: 01068 BuildMI(*MBB, IP, X86::CMP16rr, 2).addReg(Op0r).addReg(Op1r); 01069 break; 01070 case cInt: 01071 BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r); 01072 break; 01073 case cFP: 01074 emitUCOMr(MBB, IP, Op0r, Op1r); 01075 break; 01076 01077 case cLong: 01078 if (OpNum < 2) { // seteq, setne 01079 unsigned LoTmp = makeAnotherReg(Type::IntTy); 01080 unsigned HiTmp = makeAnotherReg(Type::IntTy); 01081 unsigned FinalTmp = makeAnotherReg(Type::IntTy); 01082 BuildMI(*MBB, IP, X86::XOR32rr, 2, LoTmp).addReg(Op0r).addReg(Op1r); 01083 BuildMI(*MBB, IP, X86::XOR32rr, 2, HiTmp).addReg(Op0r+1).addReg(Op1r+1); 01084 BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); 01085 break; // Allow the sete or setne to be generated from flags set by OR 01086 } else { 01087 // Emit a sequence of code which compares the high and low parts once 01088 // each, then uses a conditional move to handle the overflow case. For 01089 // example, a setlt for long would generate code like this: 01090 // 01091 // AL = lo(op1) < lo(op2) // Signedness depends on operands 01092 // BL = hi(op1) < hi(op2) // Always unsigned comparison 01093 // dest = hi(op1) == hi(op2) ? BL : AL; 01094 // 01095 01096 // FIXME: This would be much better if we had hierarchical register 01097 // classes! Until then, hardcode registers so that we can deal with their 01098 // aliases (because we don't have conditional byte moves). 01099 // 01100 BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r).addReg(Op1r); 01101 BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL); 01102 BuildMI(*MBB, IP, X86::CMP32rr, 2).addReg(Op0r+1).addReg(Op1r+1); 01103 BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0, X86::BL); 01104 BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH); 01105 BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH); 01106 BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX) 01107 .addReg(X86::AX); 01108 // NOTE: visitSetCondInst knows that the value is dumped into the BL 01109 // register at this point for long values... 01110 return OpNum; 01111 } 01112 } 01113 return OpNum; 01114 } 01115 01116 /// SetCC instructions - Here we just emit boilerplate code to set a byte-sized 01117 /// register, then move it to wherever the result should be. 01118 /// 01119 void X86ISel::visitSetCondInst(SetCondInst &I) { 01120 if (canFoldSetCCIntoBranchOrSelect(&I)) 01121 return; // Fold this into a branch or select. 01122 01123 unsigned DestReg = getReg(I); 01124 MachineBasicBlock::iterator MII = BB->end(); 01125 emitSetCCOperation(BB, MII, I.getOperand(0), I.getOperand(1), I.getOpcode(), 01126 DestReg); 01127 } 01128 01129 /// emitSetCCOperation - Common code shared between visitSetCondInst and 01130 /// constant expression support. 01131 /// 01132 void X86ISel::emitSetCCOperation(MachineBasicBlock *MBB, 01133 MachineBasicBlock::iterator IP, 01134 Value *Op0, Value *Op1, unsigned Opcode, 01135 unsigned TargetReg) { 01136 unsigned OpNum = getSetCCNumber(Opcode); 01137 OpNum = EmitComparison(OpNum, Op0, Op1, MBB, IP); 01138 01139 const Type *CompTy = Op0->getType(); 01140 unsigned CompClass = getClassB(CompTy); 01141 bool isSigned = CompTy->isSigned() && CompClass != cFP; 01142 01143 if (CompClass != cLong || OpNum < 2) { 01144 // Handle normal comparisons with a setcc instruction... 01145 BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, TargetReg); 01146 } else { 01147 // Handle long comparisons by copying the value which is already in BL into 01148 // the register we want... 01149 BuildMI(*MBB, IP, X86::MOV8rr, 1, TargetReg).addReg(X86::BL); 01150 } 01151 } 01152 01153 void X86ISel::visitSelectInst(SelectInst &SI) { 01154 unsigned DestReg = getReg(SI); 01155 MachineBasicBlock::iterator MII = BB->end(); 01156 emitSelectOperation(BB, MII, SI.getCondition(), SI.getTrueValue(), 01157 SI.getFalseValue(), DestReg); 01158 } 01159 01160 /// emitSelect - Common code shared between visitSelectInst and the constant 01161 /// expression support. 01162 void X86ISel::emitSelectOperation(MachineBasicBlock *MBB, 01163 MachineBasicBlock::iterator IP, 01164 Value *Cond, Value *TrueVal, Value *FalseVal, 01165 unsigned DestReg) { 01166 unsigned SelectClass = getClassB(TrueVal->getType()); 01167 01168 // We don't support 8-bit conditional moves. If we have incoming constants, 01169 // transform them into 16-bit constants to avoid having a run-time conversion. 01170 if (SelectClass == cByte) { 01171 if (Constant *T = dyn_cast<Constant>(TrueVal)) 01172 TrueVal = ConstantExpr::getCast(T, Type::ShortTy); 01173 if (Constant *F = dyn_cast<Constant>(FalseVal)) 01174 FalseVal = ConstantExpr::getCast(F, Type::ShortTy); 01175 } 01176 01177 unsigned TrueReg = getReg(TrueVal, MBB, IP); 01178 unsigned FalseReg = getReg(FalseVal, MBB, IP); 01179 if (TrueReg == FalseReg) { 01180 static const unsigned Opcode[] = { 01181 X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr 01182 }; 01183 BuildMI(*MBB, IP, Opcode[SelectClass], 1, DestReg).addReg(TrueReg); 01184 if (SelectClass == cLong) 01185 BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(TrueReg+1); 01186 return; 01187 } 01188 01189 unsigned Opcode; 01190 if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) { 01191 // We successfully folded the setcc into the select instruction. 01192 01193 unsigned OpNum = getSetCCNumber(SCI->getOpcode()); 01194 OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), MBB, 01195 IP); 01196 01197 const Type *CompTy = SCI->getOperand(0)->getType(); 01198 bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP; 01199 01200 // LLVM -> X86 signed X86 unsigned 01201 // ----- ---------- ------------ 01202 // seteq -> cmovNE cmovNE 01203 // setne -> cmovE cmovE 01204 // setlt -> cmovGE cmovAE 01205 // setge -> cmovL cmovB 01206 // setgt -> cmovLE cmovBE 01207 // setle -> cmovG cmovA 01208 // ---- 01209 // cmovNS // Used by comparison with 0 optimization 01210 // cmovS 01211 01212 switch (SelectClass) { 01213 default: assert(0 && "Unknown value class!"); 01214 case cFP: { 01215 // Annoyingly, we don't have a full set of floating point conditional 01216 // moves. :( 01217 static const unsigned OpcodeTab[2][8] = { 01218 { X86::FCMOVNE, X86::FCMOVE, X86::FCMOVAE, X86::FCMOVB, 01219 X86::FCMOVBE, X86::FCMOVA, 0, 0 }, 01220 { X86::FCMOVNE, X86::FCMOVE, 0, 0, 0, 0, 0, 0 }, 01221 }; 01222 Opcode = OpcodeTab[isSigned][OpNum]; 01223 01224 // If opcode == 0, we hit a case that we don't support. Output a setcc 01225 // and compare the result against zero. 01226 if (Opcode == 0) { 01227 unsigned CompClass = getClassB(CompTy); 01228 unsigned CondReg; 01229 if (CompClass != cLong || OpNum < 2) { 01230 CondReg = makeAnotherReg(Type::BoolTy); 01231 // Handle normal comparisons with a setcc instruction... 01232 BuildMI(*MBB, IP, SetCCOpcodeTab[isSigned][OpNum], 0, CondReg); 01233 } else { 01234 // Long comparisons end up in the BL register. 01235 CondReg = X86::BL; 01236 } 01237 01238 BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg); 01239 Opcode = X86::FCMOVE; 01240 } 01241 break; 01242 } 01243 case cByte: 01244 case cShort: { 01245 static const unsigned OpcodeTab[2][8] = { 01246 { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVAE16rr, X86::CMOVB16rr, 01247 X86::CMOVBE16rr, X86::CMOVA16rr, 0, 0 }, 01248 { X86::CMOVNE16rr, X86::CMOVE16rr, X86::CMOVGE16rr, X86::CMOVL16rr, 01249 X86::CMOVLE16rr, X86::CMOVG16rr, X86::CMOVNS16rr, X86::CMOVS16rr }, 01250 }; 01251 Opcode = OpcodeTab[isSigned][OpNum]; 01252 break; 01253 } 01254 case cInt: 01255 case cLong: { 01256 static const unsigned OpcodeTab[2][8] = { 01257 { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVAE32rr, X86::CMOVB32rr, 01258 X86::CMOVBE32rr, X86::CMOVA32rr, 0, 0 }, 01259 { X86::CMOVNE32rr, X86::CMOVE32rr, X86::CMOVGE32rr, X86::CMOVL32rr, 01260 X86::CMOVLE32rr, X86::CMOVG32rr, X86::CMOVNS32rr, X86::CMOVS32rr }, 01261 }; 01262 Opcode = OpcodeTab[isSigned][OpNum]; 01263 break; 01264 } 01265 } 01266 } else { 01267 // Get the value being branched on, and use it to set the condition codes. 01268 unsigned CondReg = getReg(Cond, MBB, IP); 01269 BuildMI(*MBB, IP, X86::TEST8rr, 2).addReg(CondReg).addReg(CondReg); 01270 switch (SelectClass) { 01271 default: assert(0 && "Unknown value class!"); 01272 case cFP: Opcode = X86::FCMOVE; break; 01273 case cByte: 01274 case cShort: Opcode = X86::CMOVE16rr; break; 01275 case cInt: 01276 case cLong: Opcode = X86::CMOVE32rr; break; 01277 } 01278 } 01279 01280 unsigned RealDestReg = DestReg; 01281 01282 01283 // Annoyingly enough, X86 doesn't HAVE 8-bit conditional moves. Because of 01284 // this, we have to promote the incoming values to 16 bits, perform a 16-bit 01285 // cmove, then truncate the result. 01286 if (SelectClass == cByte) { 01287 DestReg = makeAnotherReg(Type::ShortTy); 01288 if (getClassB(TrueVal->getType()) == cByte) { 01289 // Promote the true value, by storing it into AL, and reading from AX. 01290 BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::AL).addReg(TrueReg); 01291 BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::AH).addImm(0); 01292 TrueReg = makeAnotherReg(Type::ShortTy); 01293 BuildMI(*MBB, IP, X86::MOV16rr, 1, TrueReg).addReg(X86::AX); 01294 } 01295 if (getClassB(FalseVal->getType()) == cByte) { 01296 // Promote the true value, by storing it into CL, and reading from CX. 01297 BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(FalseReg); 01298 BuildMI(*MBB, IP, X86::MOV8ri, 1, X86::CH).addImm(0); 01299 FalseReg = makeAnotherReg(Type::ShortTy); 01300 BuildMI(*MBB, IP, X86::MOV16rr, 1, FalseReg).addReg(X86::CX); 01301 } 01302 } 01303 01304 BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(TrueReg).addReg(FalseReg); 01305 01306 switch (SelectClass) { 01307 case cByte: 01308 // We did the computation with 16-bit registers. Truncate back to our 01309 // result by copying into AX then copying out AL. 01310 BuildMI(*MBB, IP, X86::MOV16rr, 1, X86::AX).addReg(DestReg); 01311 BuildMI(*MBB, IP, X86::MOV8rr, 1, RealDestReg).addReg(X86::AL); 01312 break; 01313 case cLong: 01314 // Move the upper half of the value as well. 01315 BuildMI(*MBB, IP, Opcode, 2,DestReg+1).addReg(TrueReg+1).addReg(FalseReg+1); 01316 break; 01317 } 01318 } 01319 01320 01321 01322 /// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide 01323 /// operand, in the specified target register. 01324 /// 01325 void X86ISel::promote32(unsigned targetReg, const ValueRecord &VR) { 01326 bool isUnsigned = VR.Ty->isUnsigned() || VR.Ty == Type::BoolTy; 01327 01328 Value *Val = VR.Val; 01329 const Type *Ty = VR.Ty; 01330 if (Val) { 01331 if (Constant *C = dyn_cast<Constant>(Val)) { 01332 Val = ConstantExpr::getCast(C, Type::IntTy); 01333 Ty = Type::IntTy; 01334 } 01335 01336 // If this is a simple constant, just emit a MOVri directly to avoid the 01337 // copy. 01338 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 01339 int TheVal = CI->getRawValue() & 0xFFFFFFFF; 01340 BuildMI(BB, X86::MOV32ri, 1, targetReg).addImm(TheVal); 01341 return; 01342 } 01343 } 01344 01345 // Make sure we have the register number for this value... 01346 unsigned Reg = Val ? getReg(Val) : VR.Reg; 01347 01348 switch (getClassB(Ty)) { 01349 case cByte: 01350 // Extend value into target register (8->32) 01351 if (isUnsigned) 01352 BuildMI(BB, X86::MOVZX32rr8, 1, targetReg).addReg(Reg); 01353 else 01354 BuildMI(BB, X86::MOVSX32rr8, 1, targetReg).addReg(Reg); 01355 break; 01356 case cShort: 01357 // Extend value into target register (16->32) 01358 if (isUnsigned) 01359 BuildMI(BB, X86::MOVZX32rr16, 1, targetReg).addReg(Reg); 01360 else 01361 BuildMI(BB, X86::MOVSX32rr16, 1, targetReg).addReg(Reg); 01362 break; 01363 case cInt: 01364 // Move value into target register (32->32) 01365 BuildMI(BB, X86::MOV32rr, 1, targetReg).addReg(Reg); 01366 break; 01367 default: 01368 assert(0 && "Unpromotable operand class in promote32"); 01369 } 01370 } 01371 01372 /// 'ret' instruction - Here we are interested in meeting the x86 ABI. As such, 01373 /// we have the following possibilities: 01374 /// 01375 /// ret void: No return value, simply emit a 'ret' instruction 01376 /// ret sbyte, ubyte : Extend value into EAX and return 01377 /// ret short, ushort: Extend value into EAX and return 01378 /// ret int, uint : Move value into EAX and return 01379 /// ret pointer : Move value into EAX and return 01380 /// ret long, ulong : Move value into EAX/EDX and return 01381 /// ret float/double : Top of FP stack 01382 /// 01383 void X86ISel::visitReturnInst(ReturnInst &I) { 01384 if (I.getNumOperands() == 0) { 01385 BuildMI(BB, X86::RET, 0); // Just emit a 'ret' instruction 01386 return; 01387 } 01388 01389 Value *RetVal = I.getOperand(0); 01390 switch (getClassB(RetVal->getType())) { 01391 case cByte: // integral return values: extend or move into EAX and return 01392 case cShort: 01393 case cInt: 01394 promote32(X86::EAX, ValueRecord(RetVal)); 01395 // Declare that EAX is live on exit 01396 BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::EAX).addReg(X86::ESP); 01397 break; 01398 case cFP: { // Floats & Doubles: Return in ST(0) 01399 unsigned RetReg = getReg(RetVal); 01400 BuildMI(BB, X86::FpSETRESULT, 1).addReg(RetReg); 01401 // Declare that top-of-stack is live on exit 01402 BuildMI(BB, X86::IMPLICIT_USE, 2).addReg(X86::ST0).addReg(X86::ESP); 01403 break; 01404 } 01405 case cLong: { 01406 unsigned RetReg = getReg(RetVal); 01407 BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(RetReg); 01408 BuildMI(BB, X86::MOV32rr, 1, X86::EDX).addReg(RetReg+1); 01409 // Declare that EAX & EDX are live on exit 01410 BuildMI(BB, X86::IMPLICIT_USE, 3).addReg(X86::EAX).addReg(X86::EDX) 01411 .addReg(X86::ESP); 01412 break; 01413 } 01414 default: 01415 visitInstruction(I); 01416 } 01417 // Emit a 'ret' instruction 01418 BuildMI(BB, X86::RET, 0); 01419 } 01420 01421 // getBlockAfter - Return the basic block which occurs lexically after the 01422 // specified one. 01423 static inline BasicBlock *getBlockAfter(BasicBlock *BB) { 01424 Function::iterator I = BB; ++I; // Get iterator to next block 01425 return I != BB->getParent()->end() ? &*I : 0; 01426 } 01427 01428 /// visitBranchInst - Handle conditional and unconditional branches here. Note 01429 /// that since code layout is frozen at this point, that if we are trying to 01430 /// jump to a block that is the immediate successor of the current block, we can 01431 /// just make a fall-through (but we don't currently). 01432 /// 01433 void X86ISel::visitBranchInst(BranchInst &BI) { 01434 // Update machine-CFG edges 01435 BB->addSuccessor (MBBMap[BI.getSuccessor(0)]); 01436 if (BI.isConditional()) 01437 BB->addSuccessor (MBBMap[BI.getSuccessor(1)]); 01438 01439 BasicBlock *NextBB = getBlockAfter(BI.getParent()); // BB after current one 01440 01441 if (!BI.isConditional()) { // Unconditional branch? 01442 if (BI.getSuccessor(0) != NextBB) 01443 BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(0)]); 01444 return; 01445 } 01446 01447 // See if we can fold the setcc into the branch itself... 01448 SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(BI.getCondition()); 01449 if (SCI == 0) { 01450 // Nope, cannot fold setcc into this branch. Emit a branch on a condition 01451 // computed some other way... 01452 unsigned condReg = getReg(BI.getCondition()); 01453 BuildMI(BB, X86::TEST8rr, 2).addReg(condReg).addReg(condReg); 01454 if (BI.getSuccessor(1) == NextBB) { 01455 if (BI.getSuccessor(0) != NextBB) 01456 BuildMI(BB, X86::JNE, 1).addMBB(MBBMap[BI.getSuccessor(0)]); 01457 } else { 01458 BuildMI(BB, X86::JE, 1).addMBB(MBBMap[BI.getSuccessor(1)]); 01459 01460 if (BI.getSuccessor(0) != NextBB) 01461 BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(0)]); 01462 } 01463 return; 01464 } 01465 01466 unsigned OpNum = getSetCCNumber(SCI->getOpcode()); 01467 MachineBasicBlock::iterator MII = BB->end(); 01468 OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), BB,MII); 01469 01470 const Type *CompTy = SCI->getOperand(0)->getType(); 01471 bool isSigned = CompTy->isSigned() && getClassB(CompTy) != cFP; 01472 01473 01474 // LLVM -> X86 signed X86 unsigned 01475 // ----- ---------- ------------ 01476 // seteq -> je je 01477 // setne -> jne jne 01478 // setlt -> jl jb 01479 // setge -> jge jae 01480 // setgt -> jg ja 01481 // setle -> jle jbe 01482 // ---- 01483 // js // Used by comparison with 0 optimization 01484 // jns 01485 01486 static const unsigned OpcodeTab[2][8] = { 01487 { X86::JE, X86::JNE, X86::JB, X86::JAE, X86::JA, X86::JBE, 0, 0 }, 01488 { X86::JE, X86::JNE, X86::JL, X86::JGE, X86::JG, X86::JLE, 01489 X86::JS, X86::JNS }, 01490 }; 01491 01492 if (BI.getSuccessor(0) != NextBB) { 01493 BuildMI(BB, OpcodeTab[isSigned][OpNum], 1) 01494 .addMBB(MBBMap[BI.getSuccessor(0)]); 01495 if (BI.getSuccessor(1) != NextBB) 01496 BuildMI(BB, X86::JMP, 1).addMBB(MBBMap[BI.getSuccessor(1)]); 01497 } else { 01498 // Change to the inverse condition... 01499 if (BI.getSuccessor(1) != NextBB) { 01500 OpNum ^= 1; 01501 BuildMI(BB, OpcodeTab[isSigned][OpNum], 1) 01502 .addMBB(MBBMap[BI.getSuccessor(1)]); 01503 } 01504 } 01505 } 01506 01507 01508 /// doCall - This emits an abstract call instruction, setting up the arguments 01509 /// and the return value as appropriate. For the actual function call itself, 01510 /// it inserts the specified CallMI instruction into the stream. 01511 /// 01512 void X86ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, 01513 const std::vector<ValueRecord> &Args) { 01514 // Count how many bytes are to be pushed on the stack... 01515 unsigned NumBytes = 0; 01516 01517 if (!Args.empty()) { 01518 for (unsigned i = 0, e = Args.size(); i != e; ++i) 01519 switch (getClassB(Args[i].Ty)) { 01520 case cByte: case cShort: case cInt: 01521 NumBytes += 4; break; 01522 case cLong: 01523 NumBytes += 8; break; 01524 case cFP: 01525 NumBytes += Args[i].Ty == Type::FloatTy ? 4 : 8; 01526 break; 01527 default: assert(0 && "Unknown class!"); 01528 } 01529 01530 // Adjust the stack pointer for the new arguments... 01531 BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(NumBytes); 01532 01533 // Arguments go on the stack in reverse order, as specified by the ABI. 01534 unsigned ArgOffset = 0; 01535 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 01536 unsigned ArgReg; 01537 switch (getClassB(Args[i].Ty)) { 01538 case cByte: 01539 if (Args[i].Val && isa<ConstantBool>(Args[i].Val)) { 01540 addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset) 01541 .addImm(Args[i].Val == ConstantBool::True); 01542 break; 01543 } 01544 // FALL THROUGH 01545 case cShort: 01546 if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) { 01547 // Zero/Sign extend constant, then stuff into memory. 01548 ConstantInt *Val = cast<ConstantInt>(Args[i].Val); 01549 Val = cast<ConstantInt>(ConstantExpr::getCast(Val, Type::IntTy)); 01550 addRegOffset(BuildMI(BB, X86::MOV32mi, 5), X86::ESP, ArgOffset) 01551 .addImm(Val->getRawValue() & 0xFFFFFFFF); 01552 } else { 01553 // Promote arg to 32 bits wide into a temporary register... 01554 ArgReg = makeAnotherReg(Type::UIntTy); 01555 promote32(ArgReg, Args[i]); 01556 addRegOffset(BuildMI(BB, X86::MOV32mr, 5), 01557 X86::ESP, ArgOffset).addReg(ArgReg); 01558 } 01559 break; 01560 case cInt: 01561 if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) { 01562 unsigned Val = cast<ConstantInt>(Args[i].Val)->getRawValue(); 01563 addRegOffset(BuildMI(BB, X86::MOV32mi, 5), 01564 X86::ESP, ArgOffset).addImm(Val); 01565 } else if (Args[i].Val && isa<ConstantPointerNull>(Args[i].Val)) { 01566 addRegOffset(BuildMI(BB, X86::MOV32mi, 5), 01567 X86::ESP, ArgOffset).addImm(0); 01568 } else { 01569 ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; 01570 addRegOffset(BuildMI(BB, X86::MOV32mr, 5), 01571 X86::ESP, ArgOffset).addReg(ArgReg); 01572 } 01573 break; 01574 case cLong: 01575 if (Args[i].Val && isa<ConstantInt>(Args[i].Val)) { 01576 uint64_t Val = cast<ConstantInt>(Args[i].Val)->getRawValue(); 01577 addRegOffset(BuildMI(BB, X86::MOV32mi, 5), 01578 X86::ESP, ArgOffset).addImm(Val & ~0U); 01579 addRegOffset(BuildMI(BB, X86::MOV32mi, 5), 01580 X86::ESP, ArgOffset+4).addImm(Val >> 32ULL); 01581 } else { 01582 ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; 01583 addRegOffset(BuildMI(BB, X86::MOV32mr, 5), 01584 X86::ESP, ArgOffset).addReg(ArgReg); 01585 addRegOffset(BuildMI(BB, X86::MOV32mr, 5), 01586 X86::ESP, ArgOffset+4).addReg(ArgReg+1); 01587 } 01588 ArgOffset += 4; // 8 byte entry, not 4. 01589 break; 01590 01591 case cFP: 01592 ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; 01593 if (Args[i].Ty == Type::FloatTy) { 01594 addRegOffset(BuildMI(BB, X86::FST32m, 5), 01595 X86::ESP, ArgOffset).addReg(ArgReg); 01596 } else { 01597 assert(Args[i].Ty == Type::DoubleTy && "Unknown FP type!"); 01598 addRegOffset(BuildMI(BB, X86::FST64m, 5), 01599 X86::ESP, ArgOffset).addReg(ArgReg); 01600 ArgOffset += 4; // 8 byte entry, not 4. 01601 } 01602 break; 01603 01604 default: assert(0 && "Unknown class!"); 01605 } 01606 ArgOffset += 4; 01607 } 01608 } else { 01609 BuildMI(BB, X86::ADJCALLSTACKDOWN, 1).addImm(0); 01610 } 01611 01612 BB->push_back(CallMI); 01613 01614 BuildMI(BB, X86::ADJCALLSTACKUP, 1).addImm(NumBytes); 01615 01616 // If there is a return value, scavenge the result from the location the call 01617 // leaves it in... 01618 // 01619 if (Ret.Ty != Type::VoidTy) { 01620 unsigned DestClass = getClassB(Ret.Ty); 01621 switch (DestClass) { 01622 case cByte: 01623 case cShort: 01624 case cInt: { 01625 // Integral results are in %eax, or the appropriate portion 01626 // thereof. 01627 static const unsigned regRegMove[] = { 01628 X86::MOV8rr, X86::MOV16rr, X86::MOV32rr 01629 }; 01630 static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX }; 01631 BuildMI(BB, regRegMove[DestClass], 1, Ret.Reg).addReg(AReg[DestClass]); 01632 break; 01633 } 01634 case cFP: // Floating-point return values live in %ST(0) 01635 BuildMI(BB, X86::FpGETRESULT, 1, Ret.Reg); 01636 break; 01637 case cLong: // Long values are left in EDX:EAX 01638 BuildMI(BB, X86::MOV32rr, 1, Ret.Reg).addReg(X86::EAX); 01639 BuildMI(BB, X86::MOV32rr, 1, Ret.Reg+1).addReg(X86::EDX); 01640 break; 01641 default: assert(0 && "Unknown class!"); 01642 } 01643 } 01644 } 01645 01646 01647 /// visitCallInst - Push args on stack and do a procedure call instruction. 01648 void X86ISel::visitCallInst(CallInst &CI) { 01649 MachineInstr *TheCall; 01650 if (Function *F = CI.getCalledFunction()) { 01651 // Is it an intrinsic function call? 01652 if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) { 01653 visitIntrinsicCall(ID, CI); // Special intrinsics are not handled here 01654 return; 01655 } 01656 01657 // Emit a CALL instruction with PC-relative displacement. 01658 TheCall = BuildMI(X86::CALLpcrel32, 1).addGlobalAddress(F, true); 01659 } else { // Emit an indirect call... 01660 unsigned Reg = getReg(CI.getCalledValue()); 01661 TheCall = BuildMI(X86::CALL32r, 1).addReg(Reg); 01662 } 01663 01664 std::vector<ValueRecord> Args; 01665 for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) 01666 Args.push_back(ValueRecord(CI.getOperand(i))); 01667 01668 unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0; 01669 doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args); 01670 } 01671 01672 /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the 01673 /// function, lowering any calls to unknown intrinsic functions into the 01674 /// equivalent LLVM code. 01675 /// 01676 void X86ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) { 01677 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 01678 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) 01679 if (CallInst *CI = dyn_cast<CallInst>(I++)) 01680 if (Function *F = CI->getCalledFunction()) 01681 switch (F->getIntrinsicID()) { 01682 case Intrinsic::not_intrinsic: 01683 case Intrinsic::vastart: 01684 case Intrinsic::vacopy: 01685 case Intrinsic::vaend: 01686 case Intrinsic::returnaddress: 01687 case Intrinsic::frameaddress: 01688 case Intrinsic::memcpy: 01689 case Intrinsic::memset: 01690 case Intrinsic::isunordered: 01691 case Intrinsic::readport: 01692 case Intrinsic::writeport: 01693 // We directly implement these intrinsics 01694 break; 01695 case Intrinsic::readio: { 01696 // On X86, memory operations are in-order. Lower this intrinsic 01697 // into a volatile load. 01698 Instruction *Before = CI->getPrev(); 01699 LoadInst * LI = new LoadInst(CI->getOperand(1), "", true, CI); 01700 CI->replaceAllUsesWith(LI); 01701 BB->getInstList().erase(CI); 01702 break; 01703 } 01704 case Intrinsic::writeio: { 01705 // On X86, memory operations are in-order. Lower this intrinsic 01706 // into a volatile store. 01707 Instruction *Before = CI->getPrev(); 01708 StoreInst *LI = new StoreInst(CI->getOperand(1), 01709 CI->getOperand(2), true, CI); 01710 CI->replaceAllUsesWith(LI); 01711 BB->getInstList().erase(CI); 01712 break; 01713 } 01714 default: 01715 // All other intrinsic calls we must lower. 01716 Instruction *Before = CI->getPrev(); 01717 TM.getIntrinsicLowering().LowerIntrinsicCall(CI); 01718 if (Before) { // Move iterator to instruction after call 01719 I = Before; ++I; 01720 } else { 01721 I = BB->begin(); 01722 } 01723 } 01724 } 01725 01726 void X86ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { 01727 unsigned TmpReg1, TmpReg2; 01728 switch (ID) { 01729 case Intrinsic::vastart: 01730 // Get the address of the first vararg value... 01731 TmpReg1 = getReg(CI); 01732 addFrameReference(BuildMI(BB, X86::LEA32r, 5, TmpReg1), VarArgsFrameIndex); 01733 return; 01734 01735 case Intrinsic::vacopy: 01736 TmpReg1 = getReg(CI); 01737 TmpReg2 = getReg(CI.getOperand(1)); 01738 BuildMI(BB, X86::MOV32rr, 1, TmpReg1).addReg(TmpReg2); 01739 return; 01740 case Intrinsic::vaend: return; // Noop on X86 01741 01742 case Intrinsic::returnaddress: 01743 case Intrinsic::frameaddress: 01744 TmpReg1 = getReg(CI); 01745 if (cast<Constant>(CI.getOperand(1))->isNullValue()) { 01746 if (ID == Intrinsic::returnaddress) { 01747 // Just load the return address 01748 addFrameReference(BuildMI(BB, X86::MOV32rm, 4, TmpReg1), 01749 ReturnAddressIndex); 01750 } else { 01751 addFrameReference(BuildMI(BB, X86::LEA32r, 4, TmpReg1), 01752 ReturnAddressIndex, -4); 01753 } 01754 } else { 01755 // Values other than zero are not implemented yet. 01756 BuildMI(BB, X86::MOV32ri, 1, TmpReg1).addImm(0); 01757 } 01758 return; 01759 01760 case Intrinsic::isunordered: 01761 TmpReg1 = getReg(CI.getOperand(1)); 01762 TmpReg2 = getReg(CI.getOperand(2)); 01763 emitUCOMr(BB, BB->end(), TmpReg2, TmpReg1); 01764 TmpReg2 = getReg(CI); 01765 BuildMI(BB, X86::SETPr, 0, TmpReg2); 01766 return; 01767 01768 case Intrinsic::memcpy: { 01769 assert(CI.getNumOperands() == 5 && "Illegal llvm.memcpy call!"); 01770 unsigned Align = 1; 01771 if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) { 01772 Align = AlignC->getRawValue(); 01773 if (Align == 0) Align = 1; 01774 } 01775 01776 // Turn the byte code into # iterations 01777 unsigned CountReg; 01778 unsigned Opcode; 01779 switch (Align & 3) { 01780 case 2: // WORD aligned 01781 if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) { 01782 CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2)); 01783 } else { 01784 CountReg = makeAnotherReg(Type::IntTy); 01785 unsigned ByteReg = getReg(CI.getOperand(3)); 01786 BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1); 01787 } 01788 Opcode = X86::REP_MOVSW; 01789 break; 01790 case 0: // DWORD aligned 01791 if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) { 01792 CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4)); 01793 } else { 01794 CountReg = makeAnotherReg(Type::IntTy); 01795 unsigned ByteReg = getReg(CI.getOperand(3)); 01796 BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2); 01797 } 01798 Opcode = X86::REP_MOVSD; 01799 break; 01800 default: // BYTE aligned 01801 CountReg = getReg(CI.getOperand(3)); 01802 Opcode = X86::REP_MOVSB; 01803 break; 01804 } 01805 01806 // No matter what the alignment is, we put the source in ESI, the 01807 // destination in EDI, and the count in ECX. 01808 TmpReg1 = getReg(CI.getOperand(1)); 01809 TmpReg2 = getReg(CI.getOperand(2)); 01810 BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg); 01811 BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1); 01812 BuildMI(BB, X86::MOV32rr, 1, X86::ESI).addReg(TmpReg2); 01813 BuildMI(BB, Opcode, 0); 01814 return; 01815 } 01816 case Intrinsic::memset: { 01817 assert(CI.getNumOperands() == 5 && "Illegal llvm.memset call!"); 01818 unsigned Align = 1; 01819 if (ConstantInt *AlignC = dyn_cast<ConstantInt>(CI.getOperand(4))) { 01820 Align = AlignC->getRawValue(); 01821 if (Align == 0) Align = 1; 01822 } 01823 01824 // Turn the byte code into # iterations 01825 unsigned CountReg; 01826 unsigned Opcode; 01827 if (ConstantInt *ValC = dyn_cast<ConstantInt>(CI.getOperand(2))) { 01828 unsigned Val = ValC->getRawValue() & 255; 01829 01830 // If the value is a constant, then we can potentially use larger copies. 01831 switch (Align & 3) { 01832 case 2: // WORD aligned 01833 if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) { 01834 CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2)); 01835 } else { 01836 CountReg = makeAnotherReg(Type::IntTy); 01837 unsigned ByteReg = getReg(CI.getOperand(3)); 01838 BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(1); 01839 } 01840 BuildMI(BB, X86::MOV16ri, 1, X86::AX).addImm((Val << 8) | Val); 01841 Opcode = X86::REP_STOSW; 01842 break; 01843 case 0: // DWORD aligned 01844 if (ConstantInt *I = dyn_cast<ConstantInt>(CI.getOperand(3))) { 01845 CountReg =getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4)); 01846 } else { 01847 CountReg = makeAnotherReg(Type::IntTy); 01848 unsigned ByteReg = getReg(CI.getOperand(3)); 01849 BuildMI(BB, X86::SHR32ri, 2, CountReg).addReg(ByteReg).addImm(2); 01850 } 01851 Val = (Val << 8) | Val; 01852 BuildMI(BB, X86::MOV32ri, 1, X86::EAX).addImm((Val << 16) | Val); 01853 Opcode = X86::REP_STOSD; 01854 break; 01855 default: // BYTE aligned 01856 CountReg = getReg(CI.getOperand(3)); 01857 BuildMI(BB, X86::MOV8ri, 1, X86::AL).addImm(Val); 01858 Opcode = X86::REP_STOSB; 01859 break; 01860 } 01861 } else { 01862 // If it's not a constant value we are storing, just fall back. We could 01863 // try to be clever to form 16 bit and 32 bit values, but we don't yet. 01864 unsigned ValReg = getReg(CI.getOperand(2)); 01865 BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg); 01866 CountReg = getReg(CI.getOperand(3)); 01867 Opcode = X86::REP_STOSB; 01868 } 01869 01870 // No matter what the alignment is, we put the source in ESI, the 01871 // destination in EDI, and the count in ECX. 01872 TmpReg1 = getReg(CI.getOperand(1)); 01873 //TmpReg2 = getReg(CI.getOperand(2)); 01874 BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg); 01875 BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1); 01876 BuildMI(BB, Opcode, 0); 01877 return; 01878 } 01879 01880 case Intrinsic::readport: { 01881 // First, determine that the size of the operand falls within the acceptable 01882 // range for this architecture. 01883 // 01884 if (getClassB(CI.getOperand(1)->getType()) != cShort) { 01885 std::cerr << "llvm.readport: Address size is not 16 bits\n"; 01886 exit(1); 01887 } 01888 01889 // Now, move the I/O port address into the DX register and use the IN 01890 // instruction to get the input data. 01891 // 01892 unsigned Class = getClass(CI.getCalledFunction()->getReturnType()); 01893 unsigned DestReg = getReg(CI); 01894 01895 // If the port is a single-byte constant, use the immediate form. 01896 if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(1))) 01897 if ((C->getRawValue() & 255) == C->getRawValue()) { 01898 switch (Class) { 01899 case cByte: 01900 BuildMI(BB, X86::IN8ri, 1).addImm((unsigned char)C->getRawValue()); 01901 BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL); 01902 return; 01903 case cShort: 01904 BuildMI(BB, X86::IN16ri, 1).addImm((unsigned char)C->getRawValue()); 01905 BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX); 01906 return; 01907 case cInt: 01908 BuildMI(BB, X86::IN32ri, 1).addImm((unsigned char)C->getRawValue()); 01909 BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX); 01910 return; 01911 } 01912 } 01913 01914 unsigned Reg = getReg(CI.getOperand(1)); 01915 BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg); 01916 switch (Class) { 01917 case cByte: 01918 BuildMI(BB, X86::IN8rr, 0); 01919 BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AL); 01920 break; 01921 case cShort: 01922 BuildMI(BB, X86::IN16rr, 0); 01923 BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::AX); 01924 break; 01925 case cInt: 01926 BuildMI(BB, X86::IN32rr, 0); 01927 BuildMI(BB, X86::MOV8rr, 1, DestReg).addReg(X86::EAX); 01928 break; 01929 default: 01930 std::cerr << "Cannot do input on this data type"; 01931 exit (1); 01932 } 01933 return; 01934 } 01935 01936 case Intrinsic::writeport: { 01937 // First, determine that the size of the operand falls within the 01938 // acceptable range for this architecture. 01939 if (getClass(CI.getOperand(2)->getType()) != cShort) { 01940 std::cerr << "llvm.writeport: Address size is not 16 bits\n"; 01941 exit(1); 01942 } 01943 01944 unsigned Class = getClassB(CI.getOperand(1)->getType()); 01945 unsigned ValReg = getReg(CI.getOperand(1)); 01946 switch (Class) { 01947 case cByte: 01948 BuildMI(BB, X86::MOV8rr, 1, X86::AL).addReg(ValReg); 01949 break; 01950 case cShort: 01951 BuildMI(BB, X86::MOV16rr, 1, X86::AX).addReg(ValReg); 01952 break; 01953 case cInt: 01954 BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(ValReg); 01955 break; 01956 default: 01957 std::cerr << "llvm.writeport: invalid data type for X86 target"; 01958 exit(1); 01959 } 01960 01961 01962 // If the port is a single-byte constant, use the immediate form. 01963 if (ConstantInt *C = dyn_cast<ConstantInt>(CI.getOperand(2))) 01964 if ((C->getRawValue() & 255) == C->getRawValue()) { 01965 static const unsigned O[] = { X86::OUT8ir, X86::OUT16ir, X86::OUT32ir }; 01966 BuildMI(BB, O[Class], 1).addImm((unsigned char)C->getRawValue()); 01967 return; 01968 } 01969 01970 // Otherwise, move the I/O port address into the DX register and the value 01971 // to write into the AL/AX/EAX register. 01972 static const unsigned Opc[] = { X86::OUT8rr, X86::OUT16rr, X86::OUT32rr }; 01973 unsigned Reg = getReg(CI.getOperand(2)); 01974 BuildMI(BB, X86::MOV16rr, 1, X86::DX).addReg(Reg); 01975 BuildMI(BB, Opc[Class], 0); 01976 return; 01977 } 01978 01979 default: assert(0 && "Error: unknown intrinsics should have been lowered!"); 01980 } 01981 } 01982 01983 static bool isSafeToFoldLoadIntoInstruction(LoadInst &LI, Instruction &User) { 01984 if (LI.getParent() != User.getParent()) 01985 return false; 01986 BasicBlock::iterator It = &LI; 01987 // Check all of the instructions between the load and the user. We should 01988 // really use alias analysis here, but for now we just do something simple. 01989 for (++It; It != BasicBlock::iterator(&User); ++It) { 01990 switch (It->getOpcode()) { 01991 case Instruction::Free: 01992 case Instruction::Store: 01993 case Instruction::Call: 01994 case Instruction::Invoke: 01995 return false; 01996 case Instruction::Load: 01997 if (cast<LoadInst>(It)->isVolatile() && LI.isVolatile()) 01998 return false; 01999 break; 02000 } 02001 } 02002 return true; 02003 } 02004 02005 /// visitSimpleBinary - Implement simple binary operators for integral types... 02006 /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for 02007 /// Xor. 02008 /// 02009 void X86ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { 02010 unsigned DestReg = getReg(B); 02011 MachineBasicBlock::iterator MI = BB->end(); 02012 Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1); 02013 unsigned Class = getClassB(B.getType()); 02014 02015 // If this is AND X, C, and it is only used by a setcc instruction, it will 02016 // be folded. There is no need to emit this instruction. 02017 if (B.hasOneUse() && OperatorClass == 2 && isa<ConstantInt>(Op1)) 02018 if (Class == cByte || Class == cShort || Class == cInt) { 02019 Instruction *Use = cast<Instruction>(B.use_back()); 02020 if (isa<SetCondInst>(Use) && 02021 Use->getOperand(1) == Constant::getNullValue(B.getType())) { 02022 switch (getSetCCNumber(Use->getOpcode())) { 02023 case 0: 02024 case 1: 02025 return; 02026 default: 02027 if (B.getType()->isSigned()) return; 02028 } 02029 } 02030 } 02031 02032 // Special case: op Reg, load [mem] 02033 if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1) && Class != cLong && 02034 Op0->hasOneUse() && 02035 isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B)) 02036 if (!B.swapOperands()) 02037 std::swap(Op0, Op1); // Make sure any loads are in the RHS. 02038 02039 if (isa<LoadInst>(Op1) && Class != cLong && Op1->hasOneUse() && 02040 isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op1), B)) { 02041 02042 unsigned Opcode; 02043 if (Class != cFP) { 02044 static const unsigned OpcodeTab[][3] = { 02045 // Arithmetic operators 02046 { X86::ADD8rm, X86::ADD16rm, X86::ADD32rm }, // ADD 02047 { X86::SUB8rm, X86::SUB16rm, X86::SUB32rm }, // SUB 02048 02049 // Bitwise operators 02050 { X86::AND8rm, X86::AND16rm, X86::AND32rm }, // AND 02051 { X86:: OR8rm, X86:: OR16rm, X86:: OR32rm }, // OR 02052 { X86::XOR8rm, X86::XOR16rm, X86::XOR32rm }, // XOR 02053 }; 02054 Opcode = OpcodeTab[OperatorClass][Class]; 02055 } else { 02056 static const unsigned OpcodeTab[][2] = { 02057 { X86::FADD32m, X86::FADD64m }, // ADD 02058 { X86::FSUB32m, X86::FSUB64m }, // SUB 02059 }; 02060 const Type *Ty = Op0->getType(); 02061 assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!"); 02062 Opcode = OpcodeTab[OperatorClass][Ty == Type::DoubleTy]; 02063 } 02064 02065 unsigned Op0r = getReg(Op0); 02066 if (AllocaInst *AI = 02067 dyn_castFixedAlloca(cast<LoadInst>(Op1)->getOperand(0))) { 02068 unsigned FI = getFixedSizedAllocaFI(AI); 02069 addFrameReference(BuildMI(BB, Opcode, 5, DestReg).addReg(Op0r), FI); 02070 02071 } else { 02072 X86AddressMode AM; 02073 getAddressingMode(cast<LoadInst>(Op1)->getOperand(0), AM); 02074 02075 addFullAddress(BuildMI(BB, Opcode, 5, DestReg).addReg(Op0r), AM); 02076 } 02077 return; 02078 } 02079 02080 // If this is a floating point subtract, check to see if we can fold the first 02081 // operand in. 02082 if (Class == cFP && OperatorClass == 1 && 02083 isa<LoadInst>(Op0) && 02084 isSafeToFoldLoadIntoInstruction(*cast<LoadInst>(Op0), B)) { 02085 const Type *Ty = Op0->getType(); 02086 assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!"); 02087 unsigned Opcode = Ty == Type::FloatTy ? X86::FSUBR32m : X86::FSUBR64m; 02088 02089 unsigned Op1r = getReg(Op1); 02090 if (AllocaInst *AI = 02091 dyn_castFixedAlloca(cast<LoadInst>(Op0)->getOperand(0))) { 02092 unsigned FI = getFixedSizedAllocaFI(AI); 02093 addFrameReference(BuildMI(BB, Opcode, 5, DestReg).addReg(Op1r), FI); 02094 } else { 02095 X86AddressMode AM; 02096 getAddressingMode(cast<LoadInst>(Op0)->getOperand(0), AM); 02097 02098 addFullAddress(BuildMI(BB, Opcode, 5, DestReg).addReg(Op1r), AM); 02099 } 02100 return; 02101 } 02102 02103 emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg); 02104 } 02105 02106 02107 /// emitBinaryFPOperation - This method handles emission of floating point 02108 /// Add (0), Sub (1), Mul (2), and Div (3) operations. 02109 void X86ISel::emitBinaryFPOperation(MachineBasicBlock *BB, 02110 MachineBasicBlock::iterator IP, 02111 Value *Op0, Value *Op1, 02112 unsigned OperatorClass, unsigned DestReg) { 02113 // Special case: op Reg, <const fp> 02114 if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) 02115 if (!Op1C->isExactlyValue(+0.0) && !Op1C->isExactlyValue(+1.0)) { 02116 // Create a constant pool entry for this constant. 02117 MachineConstantPool *CP = F->getConstantPool(); 02118 unsigned CPI = CP->getConstantPoolIndex(Op1C); 02119 const Type *Ty = Op1->getType(); 02120 02121 static const unsigned OpcodeTab[][4] = { 02122 { X86::FADD32m, X86::FSUB32m, X86::FMUL32m, X86::FDIV32m }, // Float 02123 { X86::FADD64m, X86::FSUB64m, X86::FMUL64m, X86::FDIV64m }, // Double 02124 }; 02125 02126 assert(Ty == Type::FloatTy || Ty == Type::DoubleTy && "Unknown FP type!"); 02127 unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass]; 02128 unsigned Op0r = getReg(Op0, BB, IP); 02129 addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5, 02130 DestReg).addReg(Op0r), CPI); 02131 return; 02132 } 02133 02134 // Special case: R1 = op <const fp>, R2 02135 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Op0)) 02136 if (CFP->isExactlyValue(-0.0) && OperatorClass == 1) { 02137 // -0.0 - X === -X 02138 unsigned op1Reg = getReg(Op1, BB, IP); 02139 BuildMI(*BB, IP, X86::FCHS, 1, DestReg).addReg(op1Reg); 02140 return; 02141 } else if (!CFP->isExactlyValue(+0.0) && !CFP->isExactlyValue(+1.0)) { 02142 // R1 = op CST, R2 --> R1 = opr R2, CST 02143 02144 // Create a constant pool entry for this constant. 02145 MachineConstantPool *CP = F->getConstantPool(); 02146 unsigned CPI = CP->getConstantPoolIndex(CFP); 02147 const Type *Ty = CFP->getType(); 02148 02149 static const unsigned OpcodeTab[][4] = { 02150 { X86::FADD32m, X86::FSUBR32m, X86::FMUL32m, X86::FDIVR32m }, // Float 02151 { X86::FADD64m, X86::FSUBR64m, X86::FMUL64m, X86::FDIVR64m }, // Double 02152 }; 02153 02154 assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!"); 02155 unsigned Opcode = OpcodeTab[Ty != Type::FloatTy][OperatorClass]; 02156 unsigned Op1r = getReg(Op1, BB, IP); 02157 addConstantPoolReference(BuildMI(*BB, IP, Opcode, 5, 02158 DestReg).addReg(Op1r), CPI); 02159 return; 02160 } 02161 02162 // General case. 02163 static const unsigned OpcodeTab[4] = { 02164 X86::FpADD, X86::FpSUB, X86::FpMUL, X86::FpDIV 02165 }; 02166 02167 unsigned Opcode = OpcodeTab[OperatorClass]; 02168 unsigned Op0r = getReg(Op0, BB, IP); 02169 unsigned Op1r = getReg(Op1, BB, IP); 02170 BuildMI(*BB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); 02171 } 02172 02173 /// emitSimpleBinaryOperation - Implement simple binary operators for integral 02174 /// types... OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for 02175 /// Or, 4 for Xor. 02176 /// 02177 /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary 02178 /// and constant expression support. 02179 /// 02180 void X86ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB, 02181 MachineBasicBlock::iterator IP, 02182 Value *Op0, Value *Op1, 02183 unsigned OperatorClass, 02184 unsigned DestReg) { 02185 unsigned Class = getClassB(Op0->getType()); 02186 02187 if (Class == cFP) { 02188 assert(OperatorClass < 2 && "No logical ops for FP!"); 02189 emitBinaryFPOperation(MBB, IP, Op0, Op1, OperatorClass, DestReg); 02190 return; 02191 } 02192 02193 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0)) 02194 if (OperatorClass == 1) { 02195 static unsigned const NEGTab[] = { 02196 X86::NEG8r, X86::NEG16r, X86::NEG32r, 0, X86::NEG32r 02197 }; 02198 02199 // sub 0, X -> neg X 02200 if (CI->isNullValue()) { 02201 unsigned op1Reg = getReg(Op1, MBB, IP); 02202 BuildMI(*MBB, IP, NEGTab[Class], 1, DestReg).addReg(op1Reg); 02203 02204 if (Class == cLong) { 02205 // We just emitted: Dl = neg Sl 02206 // Now emit : T = addc Sh, 0 02207 // : Dh = neg T 02208 unsigned T = makeAnotherReg(Type::IntTy); 02209 BuildMI(*MBB, IP, X86::ADC32ri, 2, T).addReg(op1Reg+1).addImm(0); 02210 BuildMI(*MBB, IP, X86::NEG32r, 1, DestReg+1).addReg(T); 02211 } 02212 return; 02213 } else if (Op1->hasOneUse() && Class != cLong) { 02214 // sub C, X -> tmp = neg X; DestReg = add tmp, C. This is better 02215 // than copying C into a temporary register, because of register 02216 // pressure (tmp and destreg can share a register. 02217 static unsigned const ADDRITab[] = { 02218 X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri 02219 }; 02220 unsigned op1Reg = getReg(Op1, MBB, IP); 02221 unsigned Tmp = makeAnotherReg(Op0->getType()); 02222 BuildMI(*MBB, IP, NEGTab[Class], 1, Tmp).addReg(op1Reg); 02223 BuildMI(*MBB, IP, ADDRITab[Class], 2, 02224 DestReg).addReg(Tmp).addImm(CI->getRawValue()); 02225 return; 02226 } 02227 } 02228 02229 // Special case: op Reg, <const int> 02230 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 02231 unsigned Op0r = getReg(Op0, MBB, IP); 02232 02233 // xor X, -1 -> not X 02234 if (OperatorClass == 4 && Op1C->isAllOnesValue()) { 02235 static unsigned const NOTTab[] = { 02236 X86::NOT8r, X86::NOT16r, X86::NOT32r, 0, X86::NOT32r 02237 }; 02238 BuildMI(*MBB, IP, NOTTab[Class], 1, DestReg).addReg(Op0r); 02239 if (Class == cLong) // Invert the top part too 02240 BuildMI(*MBB, IP, X86::NOT32r, 1, DestReg+1).addReg(Op0r+1); 02241 return; 02242 } 02243 02244 // add X, -1 -> dec X 02245 if (OperatorClass == 0 && Op1C->isAllOnesValue() && Class != cLong) { 02246 // Note that we can't use dec for 64-bit decrements, because it does not 02247 // set the carry flag! 02248 static unsigned const DECTab[] = { X86::DEC8r, X86::DEC16r, X86::DEC32r }; 02249 BuildMI(*MBB, IP, DECTab[Class], 1, DestReg).addReg(Op0r); 02250 return; 02251 } 02252 02253 // add X, 1 -> inc X 02254 if (OperatorClass == 0 && Op1C->equalsInt(1) && Class != cLong) { 02255 // Note that we can't use inc for 64-bit increments, because it does not 02256 // set the carry flag! 02257 static unsigned const INCTab[] = { X86::INC8r, X86::INC16r, X86::INC32r }; 02258 BuildMI(*MBB, IP, INCTab[Class], 1, DestReg).addReg(Op0r); 02259 return; 02260 } 02261 02262 static const unsigned OpcodeTab[][5] = { 02263 // Arithmetic operators 02264 { X86::ADD8ri, X86::ADD16ri, X86::ADD32ri, 0, X86::ADD32ri }, // ADD 02265 { X86::SUB8ri, X86::SUB16ri, X86::SUB32ri, 0, X86::SUB32ri }, // SUB 02266 02267 // Bitwise operators 02268 { X86::AND8ri, X86::AND16ri, X86::AND32ri, 0, X86::AND32ri }, // AND 02269 { X86:: OR8ri, X86:: OR16ri, X86:: OR32ri, 0, X86::OR32ri }, // OR 02270 { X86::XOR8ri, X86::XOR16ri, X86::XOR32ri, 0, X86::XOR32ri }, // XOR 02271 }; 02272 02273 unsigned Opcode = OpcodeTab[OperatorClass][Class]; 02274 unsigned Op1l = cast<ConstantInt>(Op1C)->getRawValue(); 02275 02276 if (Class != cLong) { 02277 BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l); 02278 return; 02279 } 02280 02281 // If this is a long value and the high or low bits have a special 02282 // property, emit some special cases. 02283 unsigned Op1h = cast<ConstantInt>(Op1C)->getRawValue() >> 32LL; 02284 02285 // If the constant is zero in the low 32-bits, just copy the low part 02286 // across and apply the normal 32-bit operation to the high parts. There 02287 // will be no carry or borrow into the top. 02288 if (Op1l == 0) { 02289 if (OperatorClass != 2) // All but and... 02290 BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0r); 02291 else 02292 BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0); 02293 BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg+1) 02294 .addReg(Op0r+1).addImm(Op1h); 02295 return; 02296 } 02297 02298 // If this is a logical operation and the top 32-bits are zero, just 02299 // operate on the lower 32. 02300 if (Op1h == 0 && OperatorClass > 1) { 02301 BuildMI(*MBB, IP, OpcodeTab[OperatorClass][cLong], 2, DestReg) 02302 .addReg(Op0r).addImm(Op1l); 02303 if (OperatorClass != 2) // All but and 02304 BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(Op0r+1); 02305 else 02306 BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0); 02307 return; 02308 } 02309 02310 // TODO: We could handle lots of other special cases here, such as AND'ing 02311 // with 0xFFFFFFFF00000000 -> noop, etc. 02312 02313 // Otherwise, code generate the full operation with a constant. 02314 static const unsigned TopTab[] = { 02315 X86::ADC32ri, X86::SBB32ri, X86::AND32ri, X86::OR32ri, X86::XOR32ri 02316 }; 02317 02318 BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addImm(Op1l); 02319 BuildMI(*MBB, IP, TopTab[OperatorClass], 2, DestReg+1) 02320 .addReg(Op0r+1).addImm(Op1h); 02321 return; 02322 } 02323 02324 // Finally, handle the general case now. 02325 static const unsigned OpcodeTab[][5] = { 02326 // Arithmetic operators 02327 { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr, 0, X86::ADD32rr }, // ADD 02328 { X86::SUB8rr, X86::SUB16rr, X86::SUB32rr, 0, X86::SUB32rr }, // SUB 02329 02330 // Bitwise operators 02331 { X86::AND8rr, X86::AND16rr, X86::AND32rr, 0, X86::AND32rr }, // AND 02332 { X86:: OR8rr, X86:: OR16rr, X86:: OR32rr, 0, X86:: OR32rr }, // OR 02333 { X86::XOR8rr, X86::XOR16rr, X86::XOR32rr, 0, X86::XOR32rr }, // XOR 02334 }; 02335 02336 unsigned Opcode = OpcodeTab[OperatorClass][Class]; 02337 unsigned Op0r = getReg(Op0, MBB, IP); 02338 unsigned Op1r = getReg(Op1, MBB, IP); 02339 BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); 02340 02341 if (Class == cLong) { // Handle the upper 32 bits of long values... 02342 static const unsigned TopTab[] = { 02343 X86::ADC32rr, X86::SBB32rr, X86::AND32rr, X86::OR32rr, X86::XOR32rr 02344 }; 02345 BuildMI(*MBB, IP, TopTab[OperatorClass], 2, 02346 DestReg+1).addReg(Op0r+1).addReg(Op1r+1); 02347 } 02348 } 02349 02350 /// doMultiply - Emit appropriate instructions to multiply together the 02351 /// registers op0Reg and op1Reg, and put the result in DestReg. The type of the 02352 /// result should be given as DestTy. 02353 /// 02354 void X86ISel::doMultiply(MachineBasicBlock *MBB, 02355 MachineBasicBlock::iterator MBBI, 02356 unsigned DestReg, const Type *DestTy, 02357 unsigned op0Reg, unsigned op1Reg) { 02358 unsigned Class = getClass(DestTy); 02359 switch (Class) { 02360 case cInt: 02361 case cShort: 02362 BuildMI(*MBB, MBBI, Class == cInt ? X86::IMUL32rr:X86::IMUL16rr, 2, DestReg) 02363 .addReg(op0Reg).addReg(op1Reg); 02364 return; 02365 case cByte: 02366 // Must use the MUL instruction, which forces use of AL... 02367 BuildMI(*MBB, MBBI, X86::MOV8rr, 1, X86::AL).addReg(op0Reg); 02368 BuildMI(*MBB, MBBI, X86::MUL8r, 1).addReg(op1Reg); 02369 BuildMI(*MBB, MBBI, X86::MOV8rr, 1, DestReg).addReg(X86::AL); 02370 return; 02371 default: 02372 case cLong: assert(0 && "doMultiply cannot operate on LONG values!"); 02373 } 02374 } 02375 02376 // ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It 02377 // returns zero when the input is not exactly a power of two. 02378 static unsigned ExactLog2(unsigned Val) { 02379 if (Val == 0 || (Val & (Val-1))) return 0; 02380 unsigned Count = 0; 02381 while (Val != 1) { 02382 Val >>= 1; 02383 ++Count; 02384 } 02385 return Count+1; 02386 } 02387 02388 02389 /// doMultiplyConst - This function is specialized to efficiently codegen an 8, 02390 /// 16, or 32-bit integer multiply by a constant. 02391 void X86ISel::doMultiplyConst(MachineBasicBlock *MBB, 02392 MachineBasicBlock::iterator IP, 02393 unsigned DestReg, const Type *DestTy, 02394 unsigned op0Reg, unsigned ConstRHS) { 02395 static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr}; 02396 static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri}; 02397 static const unsigned ADDrrTab[] = {X86::ADD8rr, X86::ADD16rr, X86::ADD32rr}; 02398 static const unsigned NEGrTab[] = {X86::NEG8r , X86::NEG16r , X86::NEG32r }; 02399 02400 unsigned Class = getClass(DestTy); 02401 unsigned TmpReg; 02402 02403 // Handle special cases here. 02404 switch (ConstRHS) { 02405 case -2: 02406 TmpReg = makeAnotherReg(DestTy); 02407 BuildMI(*MBB, IP, NEGrTab[Class], 1, TmpReg).addReg(op0Reg); 02408 BuildMI(*MBB, IP, ADDrrTab[Class], 1,DestReg).addReg(TmpReg).addReg(TmpReg); 02409 return; 02410 case -1: 02411 BuildMI(*MBB, IP, NEGrTab[Class], 1, DestReg).addReg(op0Reg); 02412 return; 02413 case 0: 02414 BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0); 02415 return; 02416 case 1: 02417 BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg); 02418 return; 02419 case 2: 02420 BuildMI(*MBB, IP, ADDrrTab[Class], 1,DestReg).addReg(op0Reg).addReg(op0Reg); 02421 return; 02422 case 3: 02423 case 5: 02424 case 9: 02425 if (Class == cInt) { 02426 X86AddressMode AM; 02427 AM.BaseType = X86AddressMode::RegBase; 02428 AM.Base.Reg = op0Reg; 02429 AM.Scale = ConstRHS-1; 02430 AM.IndexReg = op0Reg; 02431 AM.Disp = 0; 02432 addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, DestReg), AM); 02433 return; 02434 } 02435 case -3: 02436 case -5: 02437 case -9: 02438 if (Class == cInt) { 02439 TmpReg = makeAnotherReg(DestTy); 02440 X86AddressMode AM; 02441 AM.BaseType = X86AddressMode::RegBase; 02442 AM.Base.Reg = op0Reg; 02443 AM.Scale = -ConstRHS-1; 02444 AM.IndexReg = op0Reg; 02445 AM.Disp = 0; 02446 addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, TmpReg), AM); 02447 BuildMI(*MBB, IP, NEGrTab[Class], 1, DestReg).addReg(TmpReg); 02448 return; 02449 } 02450 } 02451 02452 // If the element size is exactly a power of 2, use a shift to get it. 02453 if (unsigned Shift = ExactLog2(ConstRHS)) { 02454 switch (Class) { 02455 default: assert(0 && "Unknown class for this function!"); 02456 case cByte: 02457 BuildMI(*MBB, IP, X86::SHL8ri,2, DestReg).addReg(op0Reg).addImm(Shift-1); 02458 return; 02459 case cShort: 02460 BuildMI(*MBB, IP, X86::SHL16ri,2, DestReg).addReg(op0Reg).addImm(Shift-1); 02461 return; 02462 case cInt: 02463 BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(op0Reg).addImm(Shift-1); 02464 return; 02465 } 02466 } 02467 02468 // If the element size is a negative power of 2, use a shift/neg to get it. 02469 if (unsigned Shift = ExactLog2(-ConstRHS)) { 02470 TmpReg = makeAnotherReg(DestTy); 02471 BuildMI(*MBB, IP, NEGrTab[Class], 1, TmpReg).addReg(op0Reg); 02472 switch (Class) { 02473 default: assert(0 && "Unknown class for this function!"); 02474 case cByte: 02475 BuildMI(*MBB, IP, X86::SHL8ri,2, DestReg).addReg(TmpReg).addImm(Shift-1); 02476 return; 02477 case cShort: 02478 BuildMI(*MBB, IP, X86::SHL16ri,2, DestReg).addReg(TmpReg).addImm(Shift-1); 02479 return; 02480 case cInt: 02481 BuildMI(*MBB, IP, X86::SHL32ri,2, DestReg).addReg(TmpReg).addImm(Shift-1); 02482 return; 02483 } 02484 } 02485 02486 if (Class == cShort) { 02487 BuildMI(*MBB, IP, X86::IMUL16rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS); 02488 return; 02489 } else if (Class == cInt) { 02490 BuildMI(*MBB, IP, X86::IMUL32rri,2,DestReg).addReg(op0Reg).addImm(ConstRHS); 02491 return; 02492 } 02493 02494 // Most general case, emit a normal multiply... 02495 TmpReg = makeAnotherReg(DestTy); 02496 BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS); 02497 02498 // Emit a MUL to multiply the register holding the index by 02499 // elementSize, putting the result in OffsetReg. 02500 doMultiply(MBB, IP, DestReg, DestTy, op0Reg, TmpReg); 02501 } 02502 02503 /// visitMul - Multiplies are not simple binary operators because they must deal 02504 /// with the EAX register explicitly. 02505 /// 02506 void X86ISel::visitMul(BinaryOperator &I) { 02507 unsigned ResultReg = getReg(I); 02508 02509 Value *Op0 = I.getOperand(0); 02510 Value *Op1 = I.getOperand(1); 02511 02512 // Fold loads into floating point multiplies. 02513 if (getClass(Op0->getType()) == cFP) { 02514 if (isa<LoadInst>(Op0) && !isa<LoadInst>(Op1)) 02515 if (!I.swapOperands()) 02516 std::swap(Op0, Op1); // Make sure any loads are in the RHS. 02517 if (LoadInst *LI = dyn_cast<LoadInst>(Op1)) 02518 if (isSafeToFoldLoadIntoInstruction(*LI, I)) { 02519 const Type *Ty = Op0->getType(); 02520 assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!"); 02521 unsigned Opcode = Ty == Type::FloatTy ? X86::FMUL32m : X86::FMUL64m; 02522 02523 unsigned Op0r = getReg(Op0); 02524 if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) { 02525 unsigned FI = getFixedSizedAllocaFI(AI); 02526 addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), FI); 02527 } else { 02528 X86AddressMode AM; 02529 getAddressingMode(LI->getOperand(0), AM); 02530 02531 addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), AM); 02532 } 02533 return; 02534 } 02535 } 02536 02537 MachineBasicBlock::iterator IP = BB->end(); 02538 emitMultiply(BB, IP, Op0, Op1, ResultReg); 02539 } 02540 02541 void X86ISel::emitMultiply(MachineBasicBlock *MBB, 02542 MachineBasicBlock::iterator IP, 02543 Value *Op0, Value *Op1, unsigned DestReg) { 02544 MachineBasicBlock &BB = *MBB; 02545 TypeClass Class = getClass(Op0->getType()); 02546 02547 // Simple scalar multiply? 02548 unsigned Op0Reg = getReg(Op0, &BB, IP); 02549 switch (Class) { 02550 case cByte: 02551 case cShort: 02552 case cInt: 02553 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 02554 unsigned Val = (unsigned)CI->getRawValue(); // Isn't a 64-bit constant 02555 doMultiplyConst(&BB, IP, DestReg, Op0->getType(), Op0Reg, Val); 02556 } else { 02557 unsigned Op1Reg = getReg(Op1, &BB, IP); 02558 doMultiply(&BB, IP, DestReg, Op1->getType(), Op0Reg, Op1Reg); 02559 } 02560 return; 02561 case cFP: 02562 emitBinaryFPOperation(MBB, IP, Op0, Op1, 2, DestReg); 02563 return; 02564 case cLong: 02565 break; 02566 } 02567 02568 // Long value. We have to do things the hard way... 02569 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 02570 unsigned CLow = CI->getRawValue(); 02571 unsigned CHi = CI->getRawValue() >> 32; 02572 02573 if (CLow == 0) { 02574 // If the low part of the constant is all zeros, things are simple. 02575 BuildMI(BB, IP, X86::MOV32ri, 1, DestReg).addImm(0); 02576 doMultiplyConst(&BB, IP, DestReg+1, Type::UIntTy, Op0Reg, CHi); 02577 return; 02578 } 02579 02580 // Multiply the two low parts... capturing carry into EDX 02581 unsigned OverflowReg = 0; 02582 if (CLow == 1) { 02583 BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg); 02584 } else { 02585 unsigned Op1RegL = makeAnotherReg(Type::UIntTy); 02586 OverflowReg = makeAnotherReg(Type::UIntTy); 02587 BuildMI(BB, IP, X86::MOV32ri, 1, Op1RegL).addImm(CLow); 02588 BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); 02589 BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL 02590 02591 BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL 02592 BuildMI(BB, IP, X86::MOV32rr, 1, 02593 OverflowReg).addReg(X86::EDX); // AL*BL >> 32 02594 } 02595 02596 unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL 02597 doMultiplyConst(&BB, IP, AHBLReg, Type::UIntTy, Op0Reg+1, CLow); 02598 02599 unsigned AHBLplusOverflowReg; 02600 if (OverflowReg) { 02601 AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); 02602 BuildMI(BB, IP, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) 02603 AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); 02604 } else { 02605 AHBLplusOverflowReg = AHBLReg; 02606 } 02607 02608 if (CHi == 0) { 02609 BuildMI(BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg); 02610 } else { 02611 unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH 02612 doMultiplyConst(&BB, IP, ALBHReg, Type::UIntTy, Op0Reg, CHi); 02613 02614 BuildMI(BB, IP, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) 02615 DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); 02616 } 02617 return; 02618 } 02619 02620 // General 64x64 multiply 02621 02622 unsigned Op1Reg = getReg(Op1, &BB, IP); 02623 // Multiply the two low parts... capturing carry into EDX 02624 BuildMI(BB, IP, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); 02625 BuildMI(BB, IP, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL 02626 02627 unsigned OverflowReg = makeAnotherReg(Type::UIntTy); 02628 BuildMI(BB, IP, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL 02629 BuildMI(BB, IP, X86::MOV32rr, 1, 02630 OverflowReg).addReg(X86::EDX); // AL*BL >> 32 02631 02632 unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL 02633 BuildMI(BB, IP, X86::IMUL32rr, 2, 02634 AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); 02635 02636 unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); 02637 BuildMI(BB, IP, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) 02638 AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); 02639 02640 unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH 02641 BuildMI(BB, IP, X86::IMUL32rr, 2, 02642 ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); 02643 02644 BuildMI(BB, IP, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) 02645 DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); 02646 } 02647 02648 02649 /// visitDivRem - Handle division and remainder instructions... these 02650 /// instruction both require the same instructions to be generated, they just 02651 /// select the result from a different register. Note that both of these 02652 /// instructions work differently for signed and unsigned operands. 02653 /// 02654 void X86ISel::visitDivRem(BinaryOperator &I) { 02655 unsigned ResultReg = getReg(I); 02656 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 02657 02658 // Fold loads into floating point divides. 02659 if (getClass(Op0->getType()) == cFP) { 02660 if (LoadInst *LI = dyn_cast<LoadInst>(Op1)) 02661 if (isSafeToFoldLoadIntoInstruction(*LI, I)) { 02662 const Type *Ty = Op0->getType(); 02663 assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!"); 02664 unsigned Opcode = Ty == Type::FloatTy ? X86::FDIV32m : X86::FDIV64m; 02665 02666 unsigned Op0r = getReg(Op0); 02667 if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) { 02668 unsigned FI = getFixedSizedAllocaFI(AI); 02669 addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), FI); 02670 } else { 02671 X86AddressMode AM; 02672 getAddressingMode(LI->getOperand(0), AM); 02673 02674 addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op0r), AM); 02675 } 02676 return; 02677 } 02678 02679 if (LoadInst *LI = dyn_cast<LoadInst>(Op0)) 02680 if (isSafeToFoldLoadIntoInstruction(*LI, I)) { 02681 const Type *Ty = Op0->getType(); 02682 assert(Ty == Type::FloatTy||Ty == Type::DoubleTy && "Unknown FP type!"); 02683 unsigned Opcode = Ty == Type::FloatTy ? X86::FDIVR32m : X86::FDIVR64m; 02684 02685 unsigned Op1r = getReg(Op1); 02686 if (AllocaInst *AI = dyn_castFixedAlloca(LI->getOperand(0))) { 02687 unsigned FI = getFixedSizedAllocaFI(AI); 02688 addFrameReference(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op1r), FI); 02689 } else { 02690 X86AddressMode AM; 02691 getAddressingMode(LI->getOperand(0), AM); 02692 addFullAddress(BuildMI(BB, Opcode, 5, ResultReg).addReg(Op1r), AM); 02693 } 02694 return; 02695 } 02696 } 02697 02698 02699 MachineBasicBlock::iterator IP = BB->end(); 02700 emitDivRemOperation(BB, IP, Op0, Op1, 02701 I.getOpcode() == Instruction::Div, ResultReg); 02702 } 02703 02704 void X86ISel::emitDivRemOperation(MachineBasicBlock *BB, 02705 MachineBasicBlock::iterator IP, 02706 Value *Op0, Value *Op1, bool isDiv, 02707 unsigned ResultReg) { 02708 const Type *Ty = Op0->getType(); 02709 unsigned Class = getClass(Ty); 02710 switch (Class) { 02711 case cFP: // Floating point divide 02712 if (isDiv) { 02713 emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg); 02714 return; 02715 } else { // Floating point remainder... 02716 unsigned Op0Reg = getReg(Op0, BB, IP); 02717 unsigned Op1Reg = getReg(Op1, BB, IP); 02718 MachineInstr *TheCall = 02719 BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("fmod", true); 02720 std::vector<ValueRecord> Args; 02721 Args.push_back(ValueRecord(Op0Reg, Type::DoubleTy)); 02722 Args.push_back(ValueRecord(Op1Reg, Type::DoubleTy)); 02723 doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args); 02724 } 02725 return; 02726 case cLong: { 02727 static const char *FnName[] = 02728 { "__moddi3", "__divdi3", "__umoddi3", "__udivdi3" }; 02729 unsigned Op0Reg = getReg(Op0, BB, IP); 02730 unsigned Op1Reg = getReg(Op1, BB, IP); 02731 unsigned NameIdx = Ty->isUnsigned()*2 + isDiv; 02732 MachineInstr *TheCall = 02733 BuildMI(X86::CALLpcrel32, 1).addExternalSymbol(FnName[NameIdx], true); 02734 02735 std::vector<ValueRecord> Args; 02736 Args.push_back(ValueRecord(Op0Reg, Type::LongTy)); 02737 Args.push_back(ValueRecord(Op1Reg, Type::LongTy)); 02738 doCall(ValueRecord(ResultReg, Type::LongTy), TheCall, Args); 02739 return; 02740 } 02741 case cByte: case cShort: case cInt: 02742 break; // Small integrals, handled below... 02743 default: assert(0 && "Unknown class!"); 02744 } 02745 02746 static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr }; 02747 static const unsigned NEGOpcode[]={ X86::NEG8r, X86::NEG16r, X86::NEG32r }; 02748 static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri }; 02749 static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri }; 02750 static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr }; 02751 02752 // Special case signed division by power of 2. 02753 if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1)) 02754 if (isDiv) { 02755 assert(Class != cLong && "This doesn't handle 64-bit divides!"); 02756 int V = CI->getValue(); 02757 02758 if (V == 1) { // X /s 1 => X 02759 unsigned Op0Reg = getReg(Op0, BB, IP); 02760 BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(Op0Reg); 02761 return; 02762 } 02763 02764 if (V == -1) { // X /s -1 => -X 02765 unsigned Op0Reg = getReg(Op0, BB, IP); 02766 BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(Op0Reg); 02767 return; 02768 } 02769 02770 if (V == 2 || V == -2) { // X /s 2 02771 static const unsigned CMPOpcode[] = { 02772 X86::CMP8ri, X86::CMP16ri, X86::CMP32ri 02773 }; 02774 static const unsigned SBBOpcode[] = { 02775 X86::SBB8ri, X86::SBB16ri, X86::SBB32ri 02776 }; 02777 unsigned Op0Reg = getReg(Op0, BB, IP); 02778 unsigned SignBit = 1 << (CI->getType()->getPrimitiveSize()*8-1); 02779 BuildMI(*BB, IP, CMPOpcode[Class], 2).addReg(Op0Reg).addImm(SignBit); 02780 02781 unsigned TmpReg = makeAnotherReg(Op0->getType()); 02782 BuildMI(*BB, IP, SBBOpcode[Class], 2, TmpReg).addReg(Op0Reg).addImm(-1); 02783 02784 unsigned TmpReg2 = V == 2 ? ResultReg : makeAnotherReg(Op0->getType()); 02785 BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg2).addReg(TmpReg).addImm(1); 02786 if (V == -2) { 02787 BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg2); 02788 } 02789 return; 02790 } 02791 02792 bool isNeg = false; 02793 if (V < 0) { // Not a positive power of 2? 02794 V = -V; 02795 isNeg = true; // Maybe it's a negative power of 2. 02796 } 02797 if (unsigned Log = ExactLog2(V)) { 02798 --Log; 02799 unsigned Op0Reg = getReg(Op0, BB, IP); 02800 unsigned TmpReg = makeAnotherReg(Op0->getType()); 02801 BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg) 02802 .addReg(Op0Reg).addImm(Log-1); 02803 unsigned TmpReg2 = makeAnotherReg(Op0->getType()); 02804 BuildMI(*BB, IP, SHROpcode[Class], 2, TmpReg2) 02805 .addReg(TmpReg).addImm(32-Log); 02806 unsigned TmpReg3 = makeAnotherReg(Op0->getType()); 02807 BuildMI(*BB, IP, ADDOpcode[Class], 2, TmpReg3) 02808 .addReg(Op0Reg).addReg(TmpReg2); 02809 02810 unsigned TmpReg4 = isNeg ? makeAnotherReg(Op0->getType()) : ResultReg; 02811 BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg4) 02812 .addReg(TmpReg3).addImm(Log); 02813 if (isNeg) 02814 BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4); 02815 return; 02816 } 02817 } else { // X % C 02818 assert(Class != cLong && "This doesn't handle 64-bit remainder!"); 02819 int V = CI->getValue(); 02820 02821 if (V == 2 || V == -2) { // X % 2, X % -2 02822 static const unsigned SExtOpcode[] = { X86::CBW, X86::CWD, X86::CDQ }; 02823 static const unsigned BaseReg[] = { X86::AL , X86::AX , X86::EAX }; 02824 static const unsigned SExtReg[] = { X86::AH , X86::DX , X86::EDX }; 02825 static const unsigned ANDOpcode[] = { 02826 X86::AND8ri, X86::AND16ri, X86::AND32ri 02827 }; 02828 static const unsigned XOROpcode[] = { 02829 X86::XOR8rr, X86::XOR16rr, X86::XOR32rr 02830 }; 02831 static const unsigned SUBOpcode[] = { 02832 X86::SUB8rr, X86::SUB16rr, X86::SUB32rr 02833 }; 02834 02835 // Sign extend result into reg of -1 or 0. 02836 unsigned Op0Reg = getReg(Op0, BB, IP); 02837 BuildMI(*BB, IP, MovOpcode[Class], 1, BaseReg[Class]).addReg(Op0Reg); 02838 BuildMI(*BB, IP, SExtOpcode[Class], 0); 02839 unsigned TmpReg0 = makeAnotherReg(Op0->getType()); 02840 BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg0).addReg(SExtReg[Class]); 02841 02842 unsigned TmpReg1 = makeAnotherReg(Op0->getType()); 02843 BuildMI(*BB, IP, ANDOpcode[Class], 2, TmpReg1).addReg(Op0Reg).addImm(1); 02844 02845 unsigned TmpReg2 = makeAnotherReg(Op0->getType()); 02846 BuildMI(*BB, IP, XOROpcode[Class], 2, 02847 TmpReg2).addReg(TmpReg1).addReg(TmpReg0); 02848 BuildMI(*BB, IP, SUBOpcode[Class], 2, 02849 ResultReg).addReg(TmpReg2).addReg(TmpReg0); 02850 return; 02851 } 02852 } 02853 02854 static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; 02855 static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri }; 02856 static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX }; 02857 02858 static const unsigned DivOpcode[][4] = { 02859 { X86::DIV8r , X86::DIV16r , X86::DIV32r , 0 }, // Unsigned division 02860 { X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 }, // Signed division 02861 }; 02862 02863 unsigned Reg = Regs[Class]; 02864 unsigned ExtReg = ExtRegs[Class]; 02865 02866 // Put the first operand into one of the A registers... 02867 unsigned Op0Reg = getReg(Op0, BB, IP); 02868 unsigned Op1Reg = getReg(Op1, BB, IP); 02869 BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg); 02870 02871 if (Ty->isSigned()) { 02872 // Emit a sign extension instruction... 02873 unsigned ShiftResult = makeAnotherReg(Op0->getType()); 02874 BuildMI(*BB, IP, SAROpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31); 02875 BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult); 02876 02877 // Emit the appropriate divide or remainder instruction... 02878 BuildMI(*BB, IP, DivOpcode[1][Class], 1).addReg(Op1Reg); 02879 } else { 02880 // If unsigned, emit a zeroing instruction... (reg = 0) 02881 BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0); 02882 02883 // Emit the appropriate divide or remainder instruction... 02884 BuildMI(*BB, IP, DivOpcode[0][Class], 1).addReg(Op1Reg); 02885 } 02886 02887 // Figure out which register we want to pick the result out of... 02888 unsigned DestReg = isDiv ? Reg : ExtReg; 02889 02890 // Put the result into the destination register... 02891 BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(DestReg); 02892 } 02893 02894 02895 /// Shift instructions: 'shl', 'sar', 'shr' - Some special cases here 02896 /// for constant immediate shift values, and for constant immediate 02897 /// shift values equal to 1. Even the general case is sort of special, 02898 /// because the shift amount has to be in CL, not just any old register. 02899 /// 02900 void X86ISel::visitShiftInst(ShiftInst &I) { 02901 MachineBasicBlock::iterator IP = BB->end (); 02902 emitShiftOperation (BB, IP, I.getOperand (0), I.getOperand (1), 02903 I.getOpcode () == Instruction::Shl, I.getType (), 02904 getReg (I)); 02905 } 02906 02907 /// Emit code for a 'SHLD DestReg, Op0, Op1, Amt' operation, where Amt is a 02908 /// constant. 02909 void X86ISel::doSHLDConst(MachineBasicBlock *MBB, 02910 MachineBasicBlock::iterator IP, 02911 unsigned DestReg, unsigned Op0Reg, unsigned Op1Reg, 02912 unsigned Amt) { 02913 // SHLD is a very inefficient operation on every processor, try to do 02914 // somethign simpler for common values of 'Amt'. 02915 if (Amt == 0) { 02916 BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(Op0Reg); 02917 } else if (Amt == 1) { 02918 unsigned Tmp = makeAnotherReg(Type::UIntTy); 02919 BuildMI(*MBB, IP, X86::ADD32rr, 2, Tmp).addReg(Op1Reg).addReg(Op1Reg); 02920 BuildMI(*MBB, IP, X86::ADC32rr, 2, DestReg).addReg(Op0Reg).addReg(Op0Reg); 02921 } else if (Amt == 2 || Amt == 3) { 02922 // On the P4 and Athlon it is cheaper to replace shld ..., 2|3 with a 02923 // shift/lea pair. NOTE: This should not be done on the P6 family! 02924 unsigned Tmp = makeAnotherReg(Type::UIntTy); 02925 BuildMI(*MBB, IP, X86::SHR32ri, 2, Tmp).addReg(Op1Reg).addImm(32-Amt); 02926 X86AddressMode AM; 02927 AM.BaseType = X86AddressMode::RegBase; 02928 AM.Base.Reg = Tmp; 02929 AM.Scale = 1 << Amt; 02930 AM.IndexReg = Op0Reg; 02931 AM.Disp = 0; 02932 addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 4, DestReg), AM); 02933 } else { 02934 // NOTE: It is always cheaper on the P4 to emit SHLD as two shifts and an OR 02935 // than it is to emit a real SHLD. 02936 02937 BuildMI(*MBB, IP, X86::SHLD32rri8, 3, 02938 DestReg).addReg(Op0Reg).addReg(Op1Reg).addImm(Amt); 02939 } 02940 } 02941 02942 /// emitShiftOperation - Common code shared between visitShiftInst and 02943 /// constant expression support. 02944 void X86ISel::emitShiftOperation(MachineBasicBlock *MBB, 02945 MachineBasicBlock::iterator IP, 02946 Value *Op, Value *ShiftAmount, 02947 bool isLeftShift, const Type *ResultTy, 02948 unsigned DestReg) { 02949 unsigned SrcReg = getReg (Op, MBB, IP); 02950 bool isSigned = ResultTy->isSigned (); 02951 unsigned Class = getClass (ResultTy); 02952 02953 static const unsigned ConstantOperand[][3] = { 02954 { X86::SHR8ri, X86::SHR16ri, X86::SHR32ri }, // SHR 02955 { X86::SAR8ri, X86::SAR16ri, X86::SAR32ri }, // SAR 02956 { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri }, // SHL 02957 { X86::SHL8ri, X86::SHL16ri, X86::SHL32ri }, // SAL = SHL 02958 }; 02959 02960 static const unsigned NonConstantOperand[][3] = { 02961 { X86::SHR8rCL, X86::SHR16rCL, X86::SHR32rCL }, // SHR 02962 { X86::SAR8rCL, X86::SAR16rCL, X86::SAR32rCL }, // SAR 02963 { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL }, // SHL 02964 { X86::SHL8rCL, X86::SHL16rCL, X86::SHL32rCL }, // SAL = SHL 02965 }; 02966 02967 // Longs, as usual, are handled specially. 02968 if (Class == cLong) { 02969 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) { 02970 unsigned Amount = CUI->getValue(); 02971 if (Amount == 1 && isLeftShift) { // X << 1 == X+X 02972 BuildMI(*MBB, IP, X86::ADD32rr, 2, 02973 DestReg).addReg(SrcReg).addReg(SrcReg); 02974 BuildMI(*MBB, IP, X86::ADC32rr, 2, 02975 DestReg+1).addReg(SrcReg+1).addReg(SrcReg+1); 02976 } else if (Amount < 32) { 02977 const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned]; 02978 if (isLeftShift) { 02979 doSHLDConst(MBB, IP, DestReg+1, SrcReg+1, SrcReg, Amount); 02980 BuildMI(*MBB, IP, Opc[2], 2, DestReg).addReg(SrcReg).addImm(Amount); 02981 } else { 02982 BuildMI(*MBB, IP, X86::SHRD32rri8, 3, 02983 DestReg).addReg(SrcReg ).addReg(SrcReg+1).addImm(Amount); 02984 BuildMI(*MBB, IP, Opc[2],2,DestReg+1).addReg(SrcReg+1).addImm(Amount); 02985 } 02986 } else if (Amount == 32) { 02987 if (isLeftShift) { 02988 BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg); 02989 BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0); 02990 } else { 02991 BuildMI(*MBB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg+1); 02992 if (!isSigned) { 02993 BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0); 02994 } else { 02995 BuildMI(*MBB, IP, X86::SAR32ri, 2, 02996 DestReg+1).addReg(SrcReg).addImm(31); 02997 } 02998 } 02999 } else { // Shifting more than 32 bits 03000 Amount -= 32; 03001 if (isLeftShift) { 03002 BuildMI(*MBB, IP, X86::SHL32ri, 2, 03003 DestReg + 1).addReg(SrcReg).addImm(Amount); 03004 BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg).addImm(0); 03005 } else { 03006 BuildMI(*MBB, IP, isSigned ? X86::SAR32ri : X86::SHR32ri, 2, 03007 DestReg).addReg(SrcReg+1).addImm(Amount); 03008 BuildMI(*MBB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0); 03009 } 03010 } 03011 } else { 03012 unsigned TmpReg = makeAnotherReg(Type::IntTy); 03013 if (!isLeftShift && isSigned) { 03014 // If this is a SHR of a Long, then we need to do funny sign extension 03015 // stuff. TmpReg gets the value to use as the high-part if we are 03016 // shifting more than 32 bits. 03017 BuildMI(*MBB, IP, X86::SAR32ri, 2, TmpReg).addReg(SrcReg).addImm(31); 03018 } else { 03019 // Other shifts use a fixed zero value if the shift is more than 32 03020 // bits. 03021 BuildMI(*MBB, IP, X86::MOV32ri, 1, TmpReg).addImm(0); 03022 } 03023 03024 // Initialize CL with the shift amount... 03025 unsigned ShiftAmountReg = getReg(ShiftAmount, MBB, IP); 03026 BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg); 03027 03028 unsigned TmpReg2 = makeAnotherReg(Type::IntTy); 03029 unsigned TmpReg3 = makeAnotherReg(Type::IntTy); 03030 if (isLeftShift) { 03031 // TmpReg2 = shld inHi, inLo 03032 BuildMI(*MBB, IP, X86::SHLD32rrCL,2,TmpReg2).addReg(SrcReg+1) 03033 .addReg(SrcReg); 03034 // TmpReg3 = shl inLo, CL 03035 BuildMI(*MBB, IP, X86::SHL32rCL, 1, TmpReg3).addReg(SrcReg); 03036 03037 // Set the flags to indicate whether the shift was by more than 32 bits. 03038 BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32); 03039 03040 // DestHi = (>32) ? TmpReg3 : TmpReg2; 03041 BuildMI(*MBB, IP, X86::CMOVNE32rr, 2, 03042 DestReg+1).addReg(TmpReg2).addReg(TmpReg3); 03043 // DestLo = (>32) ? TmpReg : TmpReg3; 03044 BuildMI(*MBB, IP, X86::CMOVNE32rr, 2, 03045 DestReg).addReg(TmpReg3).addReg(TmpReg); 03046 } else { 03047 // TmpReg2 = shrd inLo, inHi 03048 BuildMI(*MBB, IP, X86::SHRD32rrCL,2,TmpReg2).addReg(SrcReg) 03049 .addReg(SrcReg+1); 03050 // TmpReg3 = s[ah]r inHi, CL 03051 BuildMI(*MBB, IP, isSigned ? X86::SAR32rCL : X86::SHR32rCL, 1, TmpReg3) 03052 .addReg(SrcReg+1); 03053 03054 // Set the flags to indicate whether the shift was by more than 32 bits. 03055 BuildMI(*MBB, IP, X86::TEST8ri, 2).addReg(X86::CL).addImm(32); 03056 03057 // DestLo = (>32) ? TmpReg3 : TmpReg2; 03058 BuildMI(*MBB, IP, X86::CMOVNE32rr, 2, 03059 DestReg).addReg(TmpReg2).addReg(TmpReg3); 03060 03061 // DestHi = (>32) ? TmpReg : TmpReg3; 03062 BuildMI(*MBB, IP, X86::CMOVNE32rr, 2, 03063 DestReg+1).addReg(TmpReg3).addReg(TmpReg); 03064 } 03065 } 03066 return; 03067 } 03068 03069 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) { 03070 // The shift amount is constant, guaranteed to be a ubyte. Get its value. 03071 assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?"); 03072 03073 if (CUI->getValue() == 1 && isLeftShift) { // X << 1 -> X+X 03074 static const int AddOpC[] = { X86::ADD8rr, X86::ADD16rr, X86::ADD32rr }; 03075 BuildMI(*MBB, IP, AddOpC[Class], 2,DestReg).addReg(SrcReg).addReg(SrcReg); 03076 } else { 03077 const unsigned *Opc = ConstantOperand[isLeftShift*2+isSigned]; 03078 BuildMI(*MBB, IP, Opc[Class], 2, 03079 DestReg).addReg(SrcReg).addImm(CUI->getValue()); 03080 } 03081 } else { // The shift amount is non-constant. 03082 unsigned ShiftAmountReg = getReg (ShiftAmount, MBB, IP); 03083 BuildMI(*MBB, IP, X86::MOV8rr, 1, X86::CL).addReg(ShiftAmountReg); 03084 03085 const unsigned *Opc = NonConstantOperand[isLeftShift*2+isSigned]; 03086 BuildMI(*MBB, IP, Opc[Class], 1, DestReg).addReg(SrcReg); 03087 } 03088 } 03089 03090 03091 /// visitLoadInst - Implement LLVM load instructions in terms of the x86 'mov' 03092 /// instruction. The load and store instructions are the only place where we 03093 /// need to worry about the memory layout of the target machine. 03094 /// 03095 void X86ISel::visitLoadInst(LoadInst &I) { 03096 // Check to see if this load instruction is going to be folded into a binary 03097 // instruction, like add. If so, we don't want to emit it. Wouldn't a real 03098 // pattern matching instruction selector be nice? 03099 unsigned Class = getClassB(I.getType()); 03100 if (I.hasOneUse()) { 03101 Instruction *User = cast<Instruction>(I.use_back()); 03102 switch (User->getOpcode()) { 03103 case Instruction::Cast: 03104 // If this is a cast from a signed-integer type to a floating point type, 03105 // fold the cast here. 03106 if (getClassB(User->getType()) == cFP && 03107 (I.getType() == Type::ShortTy || I.getType() == Type::IntTy || 03108 I.getType() == Type::LongTy)) { 03109 unsigned DestReg = getReg(User); 03110 static const unsigned Opcode[] = { 03111 0/*BYTE*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m 03112 }; 03113 03114 if (AllocaInst *AI = dyn_castFixedAlloca(I.getOperand(0))) { 03115 unsigned FI = getFixedSizedAllocaFI(AI); 03116 addFrameReference(BuildMI(BB, Opcode[Class], 4, DestReg), FI); 03117 } else { 03118 X86AddressMode AM; 03119 getAddressingMode(I.getOperand(0), AM); 03120 addFullAddress(BuildMI(BB, Opcode[Class], 4, DestReg), AM); 03121 } 03122 return; 03123 } else { 03124 User = 0; 03125 } 03126 break; 03127 03128 case Instruction::Add: 03129 case Instruction::Sub: 03130 case Instruction::And: 03131 case Instruction::Or: 03132 case Instruction::Xor: 03133 if (Class == cLong) User = 0; 03134 break; 03135 case Instruction::Mul: 03136 case Instruction::Div: 03137 if (Class != cFP) User = 0; 03138 break; // Folding only implemented for floating point. 03139 default: User = 0; break; 03140 } 03141 03142 if (User) { 03143 // Okay, we found a user. If the load is the first operand and there is 03144 // no second operand load, reverse the operand ordering. Note that this 03145 // can fail for a subtract (ie, no change will be made). 03146 bool Swapped = false; 03147 if (!isa<LoadInst>(User->getOperand(1))) 03148 Swapped = !cast<BinaryOperator>(User)->swapOperands(); 03149 03150 // Okay, now that everything is set up, if this load is used by the second 03151 // operand, and if there are no instructions that invalidate the load 03152 // before the binary operator, eliminate the load. 03153 if (User->getOperand(1) == &I && 03154 isSafeToFoldLoadIntoInstruction(I, *User)) 03155 return; // Eliminate the load! 03156 03157 // If this is a floating point sub or div, we won't be able to swap the 03158 // operands, but we will still be able to eliminate the load. 03159 if (Class == cFP && User->getOperand(0) == &I && 03160 !isa<LoadInst>(User->getOperand(1)) && 03161 (User->getOpcode() == Instruction::Sub || 03162 User->getOpcode() == Instruction::Div) && 03163 isSafeToFoldLoadIntoInstruction(I, *User)) 03164 return; // Eliminate the load! 03165 03166 // If we swapped the operands to the instruction, but couldn't fold the 03167 // load anyway, swap them back. We don't want to break add X, int 03168 // folding. 03169 if (Swapped) cast<BinaryOperator>(User)->swapOperands(); 03170 } 03171 } 03172 03173 static const unsigned Opcodes[] = { 03174 X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD32m, X86::MOV32rm 03175 }; 03176 unsigned Opcode = Opcodes[Class]; 03177 if (I.getType() == Type::DoubleTy) Opcode = X86::FLD64m; 03178 03179 unsigned DestReg = getReg(I); 03180 03181 if (AllocaInst *AI = dyn_castFixedAlloca(I.getOperand(0))) { 03182 unsigned FI = getFixedSizedAllocaFI(AI); 03183 if (Class == cLong) { 03184 addFrameReference(BuildMI(BB, X86::MOV32rm, 4, DestReg), FI); 03185 addFrameReference(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), FI, 4); 03186 } else { 03187 addFrameReference(BuildMI(BB, Opcode, 4, DestReg), FI); 03188 } 03189 } else { 03190 X86AddressMode AM; 03191 getAddressingMode(I.getOperand(0), AM); 03192 03193 if (Class == cLong) { 03194 addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg), AM); 03195 AM.Disp += 4; 03196 addFullAddress(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), AM); 03197 } else { 03198 addFullAddress(BuildMI(BB, Opcode, 4, DestReg), AM); 03199 } 03200 } 03201 } 03202 03203 /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov' 03204 /// instruction. 03205 /// 03206 void X86ISel::visitStoreInst(StoreInst &I) { 03207 X86AddressMode AM; 03208 getAddressingMode(I.getOperand(1), AM); 03209 03210 const Type *ValTy = I.getOperand(0)->getType(); 03211 unsigned Class = getClassB(ValTy); 03212 03213 if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(0))) { 03214 uint64_t Val = CI->getRawValue(); 03215 if (Class == cLong) { 03216 addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(Val & ~0U); 03217 AM.Disp += 4; 03218 addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(Val>>32); 03219 } else { 03220 static const unsigned Opcodes[] = { 03221 X86::MOV8mi, X86::MOV16mi, X86::MOV32mi 03222 }; 03223 unsigned Opcode = Opcodes[Class]; 03224 addFullAddress(BuildMI(BB, Opcode, 5), AM).addImm(Val); 03225 } 03226 } else if (isa<ConstantPointerNull>(I.getOperand(0))) { 03227 addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(0); 03228 } else if (ConstantBool *CB = dyn_cast<ConstantBool>(I.getOperand(0))) { 03229 addFullAddress(BuildMI(BB, X86::MOV8mi, 5), AM).addImm(CB->getValue()); 03230 } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) { 03231 // Store constant FP values with integer instructions to avoid having to 03232 // load the constants from the constant pool then do a store. 03233 if (CFP->getType() == Type::FloatTy) { 03234 union { 03235 unsigned I; 03236 float F; 03237 } V; 03238 V.F = CFP->getValue(); 03239 addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm(V.I); 03240 } else { 03241 union { 03242 uint64_t I; 03243 double F; 03244 } V; 03245 V.F = CFP->getValue(); 03246 addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm((unsigned)V.I); 03247 AM.Disp += 4; 03248 addFullAddress(BuildMI(BB, X86::MOV32mi, 5), AM).addImm( 03249 unsigned(V.I >> 32)); 03250 } 03251 03252 } else if (Class == cLong) { 03253 unsigned ValReg = getReg(I.getOperand(0)); 03254 addFullAddress(BuildMI(BB, X86::MOV32mr, 5), AM).addReg(ValReg); 03255 AM.Disp += 4; 03256 addFullAddress(BuildMI(BB, X86::MOV32mr, 5), AM).addReg(ValReg+1); 03257 } else { 03258 // FIXME: stop emitting these two instructions: 03259 // movl $global,%eax 03260 // movl %eax,(%ebx) 03261 // when one instruction will suffice. That includes when the global 03262 // has an offset applied to it. 03263 unsigned ValReg = getReg(I.getOperand(0)); 03264 static const unsigned Opcodes[] = { 03265 X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST32m 03266 }; 03267 unsigned Opcode = Opcodes[Class]; 03268 if (ValTy == Type::DoubleTy) Opcode = X86::FST64m; 03269 03270 addFullAddress(BuildMI(BB, Opcode, 1+4), AM).addReg(ValReg); 03271 } 03272 } 03273 03274 03275 /// visitCastInst - Here we have various kinds of copying with or without sign 03276 /// extension going on. 03277 /// 03278 void X86ISel::visitCastInst(CastInst &CI) { 03279 Value *Op = CI.getOperand(0); 03280 03281 unsigned SrcClass = getClassB(Op->getType()); 03282 unsigned DestClass = getClassB(CI.getType()); 03283 // Noop casts are not emitted: getReg will return the source operand as the 03284 // register to use for any uses of the noop cast. 03285 if (DestClass == SrcClass) { 03286 // The only detail in this plan is that casts from double -> float are 03287 // truncating operations that we have to codegen through memory (despite 03288 // the fact that the source/dest registers are the same class). 03289 if (CI.getType() != Type::FloatTy || Op->getType() != Type::DoubleTy) 03290 return; 03291 } 03292 03293 // If this is a cast from a 32-bit integer to a Long type, and the only uses 03294 // of the case are GEP instructions, then the cast does not need to be 03295 // generated explicitly, it will be folded into the GEP. 03296 if (DestClass == cLong && SrcClass == cInt) { 03297 bool AllUsesAreGEPs = true; 03298 for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I) 03299 if (!isa<GetElementPtrInst>(*I)) { 03300 AllUsesAreGEPs = false; 03301 break; 03302 } 03303 03304 // No need to codegen this cast if all users are getelementptr instrs... 03305 if (AllUsesAreGEPs) return; 03306 } 03307 03308 // If this cast converts a load from a short,int, or long integer to a FP 03309 // value, we will have folded this cast away. 03310 if (DestClass == cFP && isa<LoadInst>(Op) && Op->hasOneUse() && 03311 (Op->getType() == Type::ShortTy || Op->getType() == Type::IntTy || 03312 Op->getType() == Type::LongTy)) 03313 return; 03314 03315 03316 unsigned DestReg = getReg(CI); 03317 MachineBasicBlock::iterator MI = BB->end(); 03318 emitCastOperation(BB, MI, Op, CI.getType(), DestReg); 03319 } 03320 03321 /// emitCastOperation - Common code shared between visitCastInst and constant 03322 /// expression cast support. 03323 /// 03324 void X86ISel::emitCastOperation(MachineBasicBlock *BB, 03325 MachineBasicBlock::iterator IP, 03326 Value *Src, const Type *DestTy, 03327 unsigned DestReg) { 03328 const Type *SrcTy = Src->getType(); 03329 unsigned SrcClass = getClassB(SrcTy); 03330 unsigned DestClass = getClassB(DestTy); 03331 unsigned SrcReg = getReg(Src, BB, IP); 03332 03333 // Implement casts to bool by using compare on the operand followed by set if 03334 // not zero on the result. 03335 if (DestTy == Type::BoolTy) { 03336 switch (SrcClass) { 03337 case cByte: 03338 BuildMI(*BB, IP, X86::TEST8rr, 2).addReg(SrcReg).addReg(SrcReg); 03339 break; 03340 case cShort: 03341 BuildMI(*BB, IP, X86::TEST16rr, 2).addReg(SrcReg).addReg(SrcReg); 03342 break; 03343 case cInt: 03344 BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg).addReg(SrcReg); 03345 break; 03346 case cLong: { 03347 unsigned TmpReg = makeAnotherReg(Type::IntTy); 03348 BuildMI(*BB, IP, X86::OR32rr, 2, TmpReg).addReg(SrcReg).addReg(SrcReg+1); 03349 break; 03350 } 03351 case cFP: 03352 BuildMI(*BB, IP, X86::FTST, 1).addReg(SrcReg); 03353 BuildMI(*BB, IP, X86::FNSTSW8r, 0); 03354 BuildMI(*BB, IP, X86::SAHF, 1); 03355 break; 03356 } 03357 03358 // If the zero flag is not set, then the value is true, set the byte to 03359 // true. 03360 BuildMI(*BB, IP, X86::SETNEr, 1, DestReg); 03361 return; 03362 } 03363 03364 static const unsigned RegRegMove[] = { 03365 X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::MOV32rr 03366 }; 03367 03368 // Implement casts between values of the same type class (as determined by 03369 // getClass) by using a register-to-register move. 03370 if (SrcClass == DestClass) { 03371 if (SrcClass <= cInt || (SrcClass == cFP && SrcTy == DestTy)) { 03372 BuildMI(*BB, IP, RegRegMove[SrcClass], 1, DestReg).addReg(SrcReg); 03373 } else if (SrcClass == cFP) { 03374 if (SrcTy == Type::FloatTy) { // double -> float 03375 assert(DestTy == Type::DoubleTy && "Unknown cFP member!"); 03376 BuildMI(*BB, IP, X86::FpMOV, 1, DestReg).addReg(SrcReg); 03377 } else { // float -> double 03378 assert(SrcTy == Type::DoubleTy && DestTy == Type::FloatTy && 03379 "Unknown cFP member!"); 03380 // Truncate from double to float by storing to memory as short, then 03381 // reading it back. 03382 unsigned FltAlign = TM.getTargetData().getFloatAlignment(); 03383 int FrameIdx = F->getFrameInfo()->CreateStackObject(4, FltAlign); 03384 addFrameReference(BuildMI(*BB, IP, X86::FST32m, 5), FrameIdx).addReg(SrcReg); 03385 addFrameReference(BuildMI(*BB, IP, X86::FLD32m, 5, DestReg), FrameIdx); 03386 } 03387 } else if (SrcClass == cLong) { 03388 BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg); 03389 BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg+1).addReg(SrcReg+1); 03390 } else { 03391 assert(0 && "Cannot handle this type of cast instruction!"); 03392 abort(); 03393 } 03394 return; 03395 } 03396 03397 // Handle cast of SMALLER int to LARGER int using a move with sign extension 03398 // or zero extension, depending on whether the source type was signed. 03399 if (SrcClass <= cInt && (DestClass <= cInt || DestClass == cLong) && 03400 SrcClass < DestClass) { 03401 bool isLong = DestClass == cLong; 03402 if (isLong) DestClass = cInt; 03403 03404 static const unsigned Opc[][4] = { 03405 { X86::MOVSX16rr8, X86::MOVSX32rr8, X86::MOVSX32rr16, X86::MOV32rr }, // s 03406 { X86::MOVZX16rr8, X86::MOVZX32rr8, X86::MOVZX32rr16, X86::MOV32rr } // u 03407 }; 03408 03409 bool isUnsigned = SrcTy->isUnsigned() || SrcTy == Type::BoolTy; 03410 BuildMI(*BB, IP, Opc[isUnsigned][SrcClass + DestClass - 1], 1, 03411 DestReg).addReg(SrcReg); 03412 03413 if (isLong) { // Handle upper 32 bits as appropriate... 03414 if (isUnsigned) // Zero out top bits... 03415 BuildMI(*BB, IP, X86::MOV32ri, 1, DestReg+1).addImm(0); 03416 else // Sign extend bottom half... 03417 BuildMI(*BB, IP, X86::SAR32ri, 2, DestReg+1).addReg(DestReg).addImm(31); 03418 } 03419 return; 03420 } 03421 03422 // Special case long -> int ... 03423 if (SrcClass == cLong && DestClass == cInt) { 03424 BuildMI(*BB, IP, X86::MOV32rr, 1, DestReg).addReg(SrcReg); 03425 return; 03426 } 03427 03428 // Handle cast of LARGER int to SMALLER int using a move to EAX followed by a 03429 // move out of AX or AL. 03430 if ((SrcClass <= cInt || SrcClass == cLong) && DestClass <= cInt 03431 && SrcClass > DestClass) { 03432 static const unsigned AReg[] = { X86::AL, X86::AX, X86::EAX, 0, X86::EAX }; 03433 BuildMI(*BB, IP, RegRegMove[SrcClass], 1, AReg[SrcClass]).addReg(SrcReg); 03434 BuildMI(*BB, IP, RegRegMove[DestClass], 1, DestReg).addReg(AReg[DestClass]); 03435 return; 03436 } 03437 03438 // Handle casts from integer to floating point now... 03439 if (DestClass == cFP) { 03440 // Promote the integer to a type supported by FLD. We do this because there 03441 // are no unsigned FLD instructions, so we must promote an unsigned value to 03442 // a larger signed value, then use FLD on the larger value. 03443 // 03444 const Type *PromoteType = 0; 03445 unsigned PromoteOpcode = 0; 03446 unsigned RealDestReg = DestReg; 03447 switch (SrcTy->getTypeID()) { 03448 case Type::BoolTyID: 03449 case Type::SByteTyID: 03450 // We don't have the facilities for directly loading byte sized data from 03451 // memory (even signed). Promote it to 16 bits. 03452 PromoteType = Type::ShortTy; 03453 PromoteOpcode = X86::MOVSX16rr8; 03454 break; 03455 case Type::UByteTyID: 03456 PromoteType = Type::ShortTy; 03457 PromoteOpcode = X86::MOVZX16rr8; 03458 break; 03459 case Type::UShortTyID: 03460 PromoteType = Type::IntTy; 03461 PromoteOpcode = X86::MOVZX32rr16; 03462 break; 03463 case Type::ULongTyID: 03464 case Type::UIntTyID: 03465 // Don't fild into the read destination. 03466 DestReg = makeAnotherReg(Type::DoubleTy); 03467 break; 03468 default: // No promotion needed... 03469 break; 03470 } 03471 03472 if (PromoteType) { 03473 unsigned TmpReg = makeAnotherReg(PromoteType); 03474 BuildMI(*BB, IP, PromoteOpcode, 1, TmpReg).addReg(SrcReg); 03475 SrcTy = PromoteType; 03476 SrcClass = getClass(PromoteType); 03477 SrcReg = TmpReg; 03478 } 03479 03480 // Spill the integer to memory and reload it from there... 03481 int FrameIdx = 03482 F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); 03483 03484 if (SrcClass == cLong) { 03485 addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5), 03486 FrameIdx).addReg(SrcReg); 03487 addFrameReference(BuildMI(*BB, IP, X86::MOV32mr, 5), 03488 FrameIdx, 4).addReg(SrcReg+1); 03489 } else { 03490 static const unsigned Op1[] = { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr }; 03491 addFrameReference(BuildMI(*BB, IP, Op1[SrcClass], 5), 03492 FrameIdx).addReg(SrcReg); 03493 } 03494 03495 static const unsigned Op2[] = 03496 { 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m }; 03497 addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx); 03498 03499 if (SrcTy == Type::UIntTy) { 03500 // If this is a cast from uint -> double, we need to be careful about if 03501 // the "sign" bit is set. If so, we don't want to make a negative number, 03502 // we want to make a positive number. Emit code to add an offset if the 03503 // sign bit is set. 03504 03505 // Compute whether the sign bit is set by shifting the reg right 31 bits. 03506 unsigned IsNeg = makeAnotherReg(Type::IntTy); 03507 BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31); 03508 03509 // Create a CP value that has the offset in one word and 0 in the other. 03510 static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy, 03511 0x4f80000000000000ULL); 03512 unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset); 03513 BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(DestReg) 03514 .addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0); 03515 03516 } else if (SrcTy == Type::ULongTy) { 03517 // We need special handling for unsigned 64-bit integer sources. If the 03518 // input number has the "sign bit" set, then we loaded it incorrectly as a 03519 // negative 64-bit number. In this case, add an offset value. 03520 03521 // Emit a test instruction to see if the dynamic input value was signed. 03522 BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1); 03523 03524 // If the sign bit is set, get a pointer to an offset, otherwise get a 03525 // pointer to a zero. 03526 MachineConstantPool *CP = F->getConstantPool(); 03527 unsigned Zero = makeAnotherReg(Type::IntTy); 03528 Constant *Null = Constant::getNullValue(Type::UIntTy); 03529 addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Zero), 03530 CP->getConstantPoolIndex(Null)); 03531 unsigned Offset = makeAnotherReg(Type::IntTy); 03532 Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000); 03533 03534 addConstantPoolReference(BuildMI(*BB, IP, X86::LEA32r, 5, Offset), 03535 CP->getConstantPoolIndex(OffsetCst)); 03536 unsigned Addr = makeAnotherReg(Type::IntTy); 03537 BuildMI(*BB, IP, X86::CMOVS32rr, 2, Addr).addReg(Zero).addReg(Offset); 03538 03539 // Load the constant for an add. FIXME: this could make an 'fadd' that 03540 // reads directly from memory, but we don't support these yet. 03541 unsigned ConstReg = makeAnotherReg(Type::DoubleTy); 03542 addDirectMem(BuildMI(*BB, IP, X86::FLD32m, 4, ConstReg), Addr); 03543 03544 BuildMI(*BB, IP, X86::FpADD, 2, RealDestReg) 03545 .addReg(ConstReg).addReg(DestReg); 03546 } 03547 03548 return; 03549 } 03550 03551 // Handle casts from floating point to integer now... 03552 if (SrcClass == cFP) { 03553 // Change the floating point control register to use "round towards zero" 03554 // mode when truncating to an integer value. 03555 // 03556 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 03557 addFrameReference(BuildMI(*BB, IP, X86::FNSTCW16m, 4), CWFrameIdx); 03558 03559 // Load the old value of the high byte of the control word... 03560 unsigned HighPartOfCW = makeAnotherReg(Type::UByteTy); 03561 addFrameReference(BuildMI(*BB, IP, X86::MOV8rm, 4, HighPartOfCW), 03562 CWFrameIdx, 1); 03563 03564 // Set the high part to be round to zero... 03565 addFrameReference(BuildMI(*BB, IP, X86::MOV8mi, 5), 03566 CWFrameIdx, 1).addImm(12); 03567 03568 // Reload the modified control word now... 03569 addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx); 03570 03571 // Restore the memory image of control word to original value 03572 addFrameReference(BuildMI(*BB, IP, X86::MOV8mr, 5), 03573 CWFrameIdx, 1).addReg(HighPartOfCW); 03574 03575 // We don't have the facilities for directly storing byte sized data to 03576 // memory. Promote it to 16 bits. We also must promote unsigned values to 03577 // larger classes because we only have signed FP stores. 03578 unsigned StoreClass = DestClass; 03579 const Type *StoreTy = DestTy; 03580 if (StoreClass == cByte || DestTy->isUnsigned()) 03581 switch (StoreClass) { 03582 case cByte: StoreTy = Type::ShortTy; StoreClass = cShort; break; 03583 case cShort: StoreTy = Type::IntTy; StoreClass = cInt; break; 03584 case cInt: StoreTy = Type::LongTy; StoreClass = cLong; break; 03585 // The following treatment of cLong may not be perfectly right, 03586 // but it survives chains of casts of the form 03587 // double->ulong->double. 03588 case cLong: StoreTy = Type::LongTy; StoreClass = cLong; break; 03589 default: assert(0 && "Unknown store class!"); 03590 } 03591 03592 // Spill the integer to memory and reload it from there... 03593 int FrameIdx = 03594 F->getFrameInfo()->CreateStackObject(StoreTy, TM.getTargetData()); 03595 03596 static const unsigned Op1[] = 03597 { 0, X86::FIST16m, X86::FIST32m, 0, X86::FISTP64m }; 03598 addFrameReference(BuildMI(*BB, IP, Op1[StoreClass], 5), 03599 FrameIdx).addReg(SrcReg); 03600 03601 if (DestClass == cLong) { 03602 addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg), FrameIdx); 03603 addFrameReference(BuildMI(*BB, IP, X86::MOV32rm, 4, DestReg+1), 03604 FrameIdx, 4); 03605 } else { 03606 static const unsigned Op2[] = { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm }; 03607 addFrameReference(BuildMI(*BB, IP, Op2[DestClass], 4, DestReg), FrameIdx); 03608 } 03609 03610 // Reload the original control word now... 03611 addFrameReference(BuildMI(*BB, IP, X86::FLDCW16m, 4), CWFrameIdx); 03612 return; 03613 } 03614 03615 // Anything we haven't handled already, we can't (yet) handle at all. 03616 assert(0 && "Unhandled cast instruction!"); 03617 abort(); 03618 } 03619 03620 /// visitVANextInst - Implement the va_next instruction... 03621 /// 03622 void X86ISel::visitVANextInst(VANextInst &I) { 03623 unsigned VAList = getReg(I.getOperand(0)); 03624 unsigned DestReg = getReg(I); 03625 03626 unsigned Size; 03627 switch (I.getArgType()->getTypeID()) { 03628 default: 03629 std::cerr << I; 03630 assert(0 && "Error: bad type for va_next instruction!"); 03631 return; 03632 case Type::PointerTyID: 03633 case Type::UIntTyID: 03634 case Type::IntTyID: 03635 Size = 4; 03636 break; 03637 case Type::ULongTyID: 03638 case Type::LongTyID: 03639 case Type::DoubleTyID: 03640 Size = 8; 03641 break; 03642 } 03643 03644 // Increment the VAList pointer... 03645 BuildMI(BB, X86::ADD32ri, 2, DestReg).addReg(VAList).addImm(Size); 03646 } 03647 03648 void X86ISel::visitVAArgInst(VAArgInst &I) { 03649 unsigned VAList = getReg(I.getOperand(0)); 03650 unsigned DestReg = getReg(I); 03651 03652 switch (I.getType()->getTypeID()) { 03653 default: 03654 std::cerr << I; 03655 assert(0 && "Error: bad type for va_next instruction!"); 03656 return; 03657 case Type::PointerTyID: 03658 case Type::UIntTyID: 03659 case Type::IntTyID: 03660 addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList); 03661 break; 03662 case Type::ULongTyID: 03663 case Type::LongTyID: 03664 addDirectMem(BuildMI(BB, X86::MOV32rm, 4, DestReg), VAList); 03665 addRegOffset(BuildMI(BB, X86::MOV32rm, 4, DestReg+1), VAList, 4); 03666 break; 03667 case Type::DoubleTyID: 03668 addDirectMem(BuildMI(BB, X86::FLD64m, 4, DestReg), VAList); 03669 break; 03670 } 03671 } 03672 03673 /// visitGetElementPtrInst - instruction-select GEP instructions 03674 /// 03675 void X86ISel::visitGetElementPtrInst(GetElementPtrInst &I) { 03676 // If this GEP instruction will be folded into all of its users, we don't need 03677 // to explicitly calculate it! 03678 X86AddressMode AM; 03679 if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), AM)) { 03680 // Check all of the users of the instruction to see if they are loads and 03681 // stores. 03682 bool AllWillFold = true; 03683 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) 03684 if (cast<Instruction>(*UI)->getOpcode() != Instruction::Load) 03685 if (cast<Instruction>(*UI)->getOpcode() != Instruction::Store || 03686 cast<Instruction>(*UI)->getOperand(0) == &I) { 03687 AllWillFold = false; 03688 break; 03689 } 03690 03691 // If the instruction is foldable, and will be folded into all users, don't 03692 // emit it! 03693 if (AllWillFold) return; 03694 } 03695 03696 unsigned outputReg = getReg(I); 03697 emitGEPOperation(BB, BB->end(), I.getOperand(0), 03698 I.op_begin()+1, I.op_end(), outputReg); 03699 } 03700 03701 /// getGEPIndex - Inspect the getelementptr operands specified with GEPOps and 03702 /// GEPTypes (the derived types being stepped through at each level). On return 03703 /// from this function, if some indexes of the instruction are representable as 03704 /// an X86 lea instruction, the machine operands are put into the Ops 03705 /// instruction and the consumed indexes are poped from the GEPOps/GEPTypes 03706 /// lists. Otherwise, GEPOps.size() is returned. If this returns a an 03707 /// addressing mode that only partially consumes the input, the BaseReg input of 03708 /// the addressing mode must be left free. 03709 /// 03710 /// Note that there is one fewer entry in GEPTypes than there is in GEPOps. 03711 /// 03712 void X86ISel::getGEPIndex(MachineBasicBlock *MBB, 03713 MachineBasicBlock::iterator IP, 03714 std::vector<Value*> &GEPOps, 03715 std::vector<const Type*> &GEPTypes, 03716 X86AddressMode &AM) { 03717 const TargetData &TD = TM.getTargetData(); 03718 03719 // Clear out the state we are working with... 03720 AM.BaseType = X86AddressMode::RegBase; 03721 AM.Base.Reg = 0; // No base register 03722 AM.Scale = 1; // Unit scale 03723 AM.IndexReg = 0; // No index register 03724 AM.Disp = 0; // No displacement 03725 03726 // While there are GEP indexes that can be folded into the current address, 03727 // keep processing them. 03728 while (!GEPTypes.empty()) { 03729 if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) { 03730 // It's a struct access. CUI is the index into the structure, 03731 // which names the field. This index must have unsigned type. 03732 const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back()); 03733 03734 // Use the TargetData structure to pick out what the layout of the 03735 // structure is in memory. Since the structure index must be constant, we 03736 // can get its value and use it to find the right byte offset from the 03737 // StructLayout class's list of structure member offsets. 03738 AM.Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()]; 03739 GEPOps.pop_back(); // Consume a GEP operand 03740 GEPTypes.pop_back(); 03741 } else { 03742 // It's an array or pointer access: [ArraySize x ElementType]. 03743 const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back()); 03744 Value *idx = GEPOps.back(); 03745 03746 // idx is the index into the array. Unlike with structure 03747 // indices, we may not know its actual value at code-generation 03748 // time. 03749 03750 // If idx is a constant, fold it into the offset. 03751 unsigned TypeSize = TD.getTypeSize(SqTy->getElementType()); 03752 if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) { 03753 AM.Disp += TypeSize*CSI->getValue(); 03754 } else if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(idx)) { 03755 AM.Disp += TypeSize*CUI->getValue(); 03756 } else { 03757 // If the index reg is already taken, we can't handle this index. 03758 if (AM.IndexReg) return; 03759 03760 // If this is a size that we can handle, then add the index as 03761 switch (TypeSize) { 03762 case 1: case 2: case 4: case 8: 03763 // These are all acceptable scales on X86. 03764 AM.Scale = TypeSize; 03765 break; 03766 default: 03767 // Otherwise, we can't handle this scale 03768 return; 03769 } 03770 03771 if (CastInst *CI = dyn_cast<CastInst>(idx)) 03772 if (CI->getOperand(0)->getType() == Type::IntTy || 03773 CI->getOperand(0)->getType() == Type::UIntTy) 03774 idx = CI->getOperand(0); 03775 03776 AM.IndexReg = MBB ? getReg(idx, MBB, IP) : 1; 03777 } 03778 03779 GEPOps.pop_back(); // Consume a GEP operand 03780 GEPTypes.pop_back(); 03781 } 03782 } 03783 03784 // GEPTypes is empty, which means we have a single operand left. Set it as 03785 // the base register. 03786 // 03787 assert(AM.Base.Reg == 0); 03788 03789 if (AllocaInst *AI = dyn_castFixedAlloca(GEPOps.back())) { 03790 AM.BaseType = X86AddressMode::FrameIndexBase; 03791 AM.Base.FrameIndex = getFixedSizedAllocaFI(AI); 03792 GEPOps.pop_back(); 03793 return; 03794 } 03795 03796 if (GlobalValue *GV = dyn_cast<GlobalValue>(GEPOps.back())) { 03797 AM.GV = GV; 03798 GEPOps.pop_back(); 03799 return; 03800 } 03801 03802 AM.Base.Reg = MBB ? getReg(GEPOps[0], MBB, IP) : 1; 03803 GEPOps.pop_back(); // Consume the last GEP operand 03804 } 03805 03806 03807 /// isGEPFoldable - Return true if the specified GEP can be completely 03808 /// folded into the addressing mode of a load/store or lea instruction. 03809 bool X86ISel::isGEPFoldable(MachineBasicBlock *MBB, 03810 Value *Src, User::op_iterator IdxBegin, 03811 User::op_iterator IdxEnd, X86AddressMode &AM) { 03812 03813 std::vector<Value*> GEPOps; 03814 GEPOps.resize(IdxEnd-IdxBegin+1); 03815 GEPOps[0] = Src; 03816 std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1); 03817 03818 std::vector<const Type*> 03819 GEPTypes(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), 03820 gep_type_end(Src->getType(), IdxBegin, IdxEnd)); 03821 03822 MachineBasicBlock::iterator IP; 03823 if (MBB) IP = MBB->end(); 03824 getGEPIndex(MBB, IP, GEPOps, GEPTypes, AM); 03825 03826 // We can fold it away iff the getGEPIndex call eliminated all operands. 03827 return GEPOps.empty(); 03828 } 03829 03830 void X86ISel::emitGEPOperation(MachineBasicBlock *MBB, 03831 MachineBasicBlock::iterator IP, 03832 Value *Src, User::op_iterator IdxBegin, 03833 User::op_iterator IdxEnd, unsigned TargetReg) { 03834 const TargetData &TD = TM.getTargetData(); 03835 03836 // If this is a getelementptr null, with all constant integer indices, just 03837 // replace it with TargetReg = 42. 03838 if (isa<ConstantPointerNull>(Src)) { 03839 User::op_iterator I = IdxBegin; 03840 for (; I != IdxEnd; ++I) 03841 if (!isa<ConstantInt>(*I)) 03842 break; 03843 if (I == IdxEnd) { // All constant indices 03844 unsigned Offset = TD.getIndexedOffset(Src->getType(), 03845 std::vector<Value*>(IdxBegin, IdxEnd)); 03846 BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addImm(Offset); 03847 return; 03848 } 03849 } 03850 03851 std::vector<Value*> GEPOps; 03852 GEPOps.resize(IdxEnd-IdxBegin+1); 03853 GEPOps[0] = Src; 03854 std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1); 03855 03856 std::vector<const Type*> GEPTypes; 03857 GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), 03858 gep_type_end(Src->getType(), IdxBegin, IdxEnd)); 03859 03860 // Keep emitting instructions until we consume the entire GEP instruction. 03861 while (!GEPOps.empty()) { 03862 unsigned OldSize = GEPOps.size(); 03863 X86AddressMode AM; 03864 getGEPIndex(MBB, IP, GEPOps, GEPTypes, AM); 03865 03866 if (GEPOps.size() != OldSize) { 03867 // getGEPIndex consumed some of the input. Build an LEA instruction here. 03868 unsigned NextTarget = 0; 03869 if (!GEPOps.empty()) { 03870 assert(AM.Base.Reg == 0 && 03871 "getGEPIndex should have left the base register open for chaining!"); 03872 NextTarget = AM.Base.Reg = makeAnotherReg(Type::UIntTy); 03873 } 03874 03875 if (AM.BaseType == X86AddressMode::RegBase && 03876 AM.IndexReg == 0 && AM.Disp == 0 && !AM.GV) 03877 BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(AM.Base.Reg); 03878 else if (AM.BaseType == X86AddressMode::RegBase && AM.Base.Reg == 0 && 03879 AM.IndexReg == 0 && AM.Disp == 0) 03880 BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addGlobalAddress(AM.GV); 03881 else 03882 addFullAddress(BuildMI(*MBB, IP, X86::LEA32r, 5, TargetReg), AM); 03883 --IP; 03884 TargetReg = NextTarget; 03885 } else if (GEPTypes.empty()) { 03886 // The getGEPIndex operation didn't want to build an LEA. Check to see if 03887 // all operands are consumed but the base pointer. If so, just load it 03888 // into the register. 03889 if (GlobalValue *GV = dyn_cast<GlobalValue>(GEPOps[0])) { 03890 BuildMI(*MBB, IP, X86::MOV32ri, 1, TargetReg).addGlobalAddress(GV); 03891 } else { 03892 unsigned BaseReg = getReg(GEPOps[0], MBB, IP); 03893 BuildMI(*MBB, IP, X86::MOV32rr, 1, TargetReg).addReg(BaseReg); 03894 } 03895 break; // we are now done 03896 03897 } else { 03898 // It's an array or pointer access: [ArraySize x ElementType]. 03899 const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back()); 03900 Value *idx = GEPOps.back(); 03901 GEPOps.pop_back(); // Consume a GEP operand 03902 GEPTypes.pop_back(); 03903 03904 // Many GEP instructions use a [cast (int/uint) to LongTy] as their 03905 // operand on X86. Handle this case directly now... 03906 if (CastInst *CI = dyn_cast<CastInst>(idx)) 03907 if (CI->getOperand(0)->getType() == Type::IntTy || 03908 CI->getOperand(0)->getType() == Type::UIntTy) 03909 idx = CI->getOperand(0); 03910 03911 // We want to add BaseReg to(idxReg * sizeof ElementType). First, we 03912 // must find the size of the pointed-to type (Not coincidentally, the next 03913 // type is the type of the elements in the array). 03914 const Type *ElTy = SqTy->getElementType(); 03915 unsigned elementSize = TD.getTypeSize(ElTy); 03916 03917 // If idxReg is a constant, we don't need to perform the multiply! 03918 if (ConstantInt *CSI = dyn_cast<ConstantInt>(idx)) { 03919 if (!CSI->isNullValue()) { 03920 unsigned Offset = elementSize*CSI->getRawValue(); 03921 unsigned Reg = makeAnotherReg(Type::UIntTy); 03922 BuildMI(*MBB, IP, X86::ADD32ri, 2, TargetReg) 03923 .addReg(Reg).addImm(Offset); 03924 --IP; // Insert the next instruction before this one. 03925 TargetReg = Reg; // Codegen the rest of the GEP into this 03926 } 03927 } else if (elementSize == 1) { 03928 // If the element size is 1, we don't have to multiply, just add 03929 unsigned idxReg = getReg(idx, MBB, IP); 03930 unsigned Reg = makeAnotherReg(Type::UIntTy); 03931 BuildMI(*MBB, IP, X86::ADD32rr, 2,TargetReg).addReg(Reg).addReg(idxReg); 03932 --IP; // Insert the next instruction before this one. 03933 TargetReg = Reg; // Codegen the rest of the GEP into this 03934 } else { 03935 unsigned idxReg = getReg(idx, MBB, IP); 03936 unsigned OffsetReg = makeAnotherReg(Type::UIntTy); 03937 03938 // Make sure we can back the iterator up to point to the first 03939 // instruction emitted. 03940 MachineBasicBlock::iterator BeforeIt = IP; 03941 if (IP == MBB->begin()) 03942 BeforeIt = MBB->end(); 03943 else 03944 --BeforeIt; 03945 doMultiplyConst(MBB, IP, OffsetReg, Type::IntTy, idxReg, elementSize); 03946 03947 // Emit an ADD to add OffsetReg to the basePtr. 03948 unsigned Reg = makeAnotherReg(Type::UIntTy); 03949 BuildMI(*MBB, IP, X86::ADD32rr, 2, TargetReg) 03950 .addReg(Reg).addReg(OffsetReg); 03951 03952 // Step to the first instruction of the multiply. 03953 if (BeforeIt == MBB->end()) 03954 IP = MBB->begin(); 03955 else 03956 IP = ++BeforeIt; 03957 03958 TargetReg = Reg; // Codegen the rest of the GEP into this 03959 } 03960 } 03961 } 03962 } 03963 03964 /// visitAllocaInst - If this is a fixed size alloca, allocate space from the 03965 /// frame manager, otherwise do it the hard way. 03966 /// 03967 void X86ISel::visitAllocaInst(AllocaInst &I) { 03968 // If this is a fixed size alloca in the entry block for the function, we 03969 // statically stack allocate the space, so we don't need to do anything here. 03970 // 03971 if (dyn_castFixedAlloca(&I)) return; 03972 03973 // Find the data size of the alloca inst's getAllocatedType. 03974 const Type *Ty = I.getAllocatedType(); 03975 unsigned TySize = TM.getTargetData().getTypeSize(Ty); 03976 03977 // Create a register to hold the temporary result of multiplying the type size 03978 // constant by the variable amount. 03979 unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy); 03980 unsigned SrcReg1 = getReg(I.getArraySize()); 03981 03982 // TotalSizeReg = mul <numelements>, <TypeSize> 03983 MachineBasicBlock::iterator MBBI = BB->end(); 03984 doMultiplyConst(BB, MBBI, TotalSizeReg, Type::UIntTy, SrcReg1, TySize); 03985 03986 // AddedSize = add <TotalSizeReg>, 15 03987 unsigned AddedSizeReg = makeAnotherReg(Type::UIntTy); 03988 BuildMI(BB, X86::ADD32ri, 2, AddedSizeReg).addReg(TotalSizeReg).addImm(15); 03989 03990 // AlignedSize = and <AddedSize>, ~15 03991 unsigned AlignedSize = makeAnotherReg(Type::UIntTy); 03992 BuildMI(BB, X86::AND32ri, 2, AlignedSize).addReg(AddedSizeReg).addImm(~15); 03993 03994 // Subtract size from stack pointer, thereby allocating some space. 03995 BuildMI(BB, X86::SUB32rr, 2, X86::ESP).addReg(X86::ESP).addReg(AlignedSize); 03996 03997 // Put a pointer to the space into the result register, by copying 03998 // the stack pointer. 03999 BuildMI(BB, X86::MOV32rr, 1, getReg(I)).addReg(X86::ESP); 04000 04001 // Inform the Frame Information that we have just allocated a variable-sized 04002 // object. 04003 F->getFrameInfo()->CreateVariableSizedObject(); 04004 } 04005 04006 /// visitMallocInst - Malloc instructions are code generated into direct calls 04007 /// to the library malloc. 04008 /// 04009 void X86ISel::visitMallocInst(MallocInst &I) { 04010 unsigned AllocSize = TM.getTargetData().getTypeSize(I.getAllocatedType()); 04011 unsigned Arg; 04012 04013 if (ConstantUInt *C = dyn_cast<ConstantUInt>(I.getOperand(0))) { 04014 Arg = getReg(ConstantUInt::get(Type::UIntTy, C->getValue() * AllocSize)); 04015 } else { 04016 Arg = makeAnotherReg(Type::UIntTy); 04017 unsigned Op0Reg = getReg(I.getOperand(0)); 04018 MachineBasicBlock::iterator MBBI = BB->end(); 04019 doMultiplyConst(BB, MBBI, Arg, Type::UIntTy, Op0Reg, AllocSize); 04020 } 04021 04022 std::vector<ValueRecord> Args; 04023 Args.push_back(ValueRecord(Arg, Type::UIntTy)); 04024 MachineInstr *TheCall = BuildMI(X86::CALLpcrel32, 04025 1).addExternalSymbol("malloc", true); 04026 doCall(ValueRecord(getReg(I), I.getType()), TheCall, Args); 04027 } 04028 04029 04030 /// visitFreeInst - Free instructions are code gen'd to call the free libc 04031 /// function. 04032 /// 04033 void X86ISel::visitFreeInst(FreeInst &I) { 04034 std::vector<ValueRecord> Args; 04035 Args.push_back(ValueRecord(I.getOperand(0))); 04036 MachineInstr *TheCall = BuildMI(X86::CALLpcrel32, 04037 1).addExternalSymbol("free", true); 04038 doCall(ValueRecord(0, Type::VoidTy), TheCall, Args); 04039 } 04040 04041 /// createX86SimpleInstructionSelector - This pass converts an LLVM function 04042 /// into a machine code representation is a very simple peep-hole fashion. The 04043 /// generated code sucks but the implementation is nice and simple. 04044 /// 04045 FunctionPass *llvm::createX86SimpleInstructionSelector(TargetMachine &TM) { 04046 return new X86ISel(TM); 04047 }