LLVM API Documentation
00001 //===-- PPC64ISelSimple.cpp - A simple instruction selector for PowerPC ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 00010 #define DEBUG_TYPE "isel" 00011 #include "PowerPC.h" 00012 #include "PowerPCInstrBuilder.h" 00013 #include "PowerPCInstrInfo.h" 00014 #include "PPC64TargetMachine.h" 00015 #include "llvm/Constants.h" 00016 #include "llvm/DerivedTypes.h" 00017 #include "llvm/Function.h" 00018 #include "llvm/Instructions.h" 00019 #include "llvm/Pass.h" 00020 #include "llvm/CodeGen/IntrinsicLowering.h" 00021 #include "llvm/CodeGen/MachineConstantPool.h" 00022 #include "llvm/CodeGen/MachineFrameInfo.h" 00023 #include "llvm/CodeGen/MachineFunction.h" 00024 #include "llvm/CodeGen/SSARegMap.h" 00025 #include "llvm/Target/MRegisterInfo.h" 00026 #include "llvm/Target/TargetMachine.h" 00027 #include "llvm/Support/GetElementPtrTypeIterator.h" 00028 #include "llvm/Support/InstVisitor.h" 00029 #include "llvm/Support/Debug.h" 00030 #include "llvm/ADT/Statistic.h" 00031 #include <vector> 00032 using namespace llvm; 00033 00034 namespace { 00035 Statistic<> GEPFolds("ppc64-codegen", "Number of GEPs folded"); 00036 00037 /// TypeClass - Used by the PowerPC backend to group LLVM types by their basic 00038 /// PPC Representation. 00039 /// 00040 enum TypeClass { 00041 cByte, cShort, cInt, cFP32, cFP64, cLong 00042 }; 00043 } 00044 00045 /// getClass - Turn a primitive type into a "class" number which is based on the 00046 /// size of the type, and whether or not it is floating point. 00047 /// 00048 static inline TypeClass getClass(const Type *Ty) { 00049 switch (Ty->getTypeID()) { 00050 case Type::SByteTyID: 00051 case Type::UByteTyID: return cByte; // Byte operands are class #0 00052 case Type::ShortTyID: 00053 case Type::UShortTyID: return cShort; // Short operands are class #1 00054 case Type::IntTyID: 00055 case Type::UIntTyID: return cInt; // Ints are class #2 00056 00057 case Type::FloatTyID: return cFP32; // Single float is #3 00058 case Type::DoubleTyID: return cFP64; // Double Point is #4 00059 00060 case Type::PointerTyID: 00061 case Type::LongTyID: 00062 case Type::ULongTyID: return cLong; // Longs and pointers are class #5 00063 default: 00064 assert(0 && "Invalid type to getClass!"); 00065 return cByte; // not reached 00066 } 00067 } 00068 00069 // getClassB - Just like getClass, but treat boolean values as ints. 00070 static inline TypeClass getClassB(const Type *Ty) { 00071 if (Ty == Type::BoolTy) return cInt; 00072 return getClass(Ty); 00073 } 00074 00075 namespace { 00076 struct PPC64ISel : public FunctionPass, InstVisitor<PPC64ISel> { 00077 PPC64TargetMachine &TM; 00078 MachineFunction *F; // The function we are compiling into 00079 MachineBasicBlock *BB; // The current MBB we are compiling 00080 int VarArgsFrameIndex; // FrameIndex for start of varargs area 00081 00082 std::map<Value*, unsigned> RegMap; // Mapping between Values and SSA Regs 00083 00084 // External functions used in the Module 00085 Function *fmodfFn, *fmodFn, *__cmpdi2Fn, *__fixsfdiFn, *__fixdfdiFn, 00086 *__fixunssfdiFn, *__fixunsdfdiFn, *mallocFn, *freeFn; 00087 00088 // MBBMap - Mapping between LLVM BB -> Machine BB 00089 std::map<const BasicBlock*, MachineBasicBlock*> MBBMap; 00090 00091 // AllocaMap - Mapping from fixed sized alloca instructions to the 00092 // FrameIndex for the alloca. 00093 std::map<AllocaInst*, unsigned> AllocaMap; 00094 00095 // Target configuration data 00096 const unsigned ParameterSaveAreaOffset, MaxArgumentStackSpace; 00097 00098 PPC64ISel(TargetMachine &tm):TM(reinterpret_cast<PPC64TargetMachine&>(tm)), 00099 F(0), BB(0), ParameterSaveAreaOffset(24), MaxArgumentStackSpace(32) {} 00100 00101 bool doInitialization(Module &M) { 00102 // Add external functions that we may call 00103 Type *i = Type::IntTy; 00104 Type *d = Type::DoubleTy; 00105 Type *f = Type::FloatTy; 00106 Type *l = Type::LongTy; 00107 Type *ul = Type::ULongTy; 00108 Type *voidPtr = PointerType::get(Type::SByteTy); 00109 // float fmodf(float, float); 00110 fmodfFn = M.getOrInsertFunction("fmodf", f, f, f, 0); 00111 // double fmod(double, double); 00112 fmodFn = M.getOrInsertFunction("fmod", d, d, d, 0); 00113 // int __cmpdi2(long, long); 00114 __cmpdi2Fn = M.getOrInsertFunction("__cmpdi2", i, l, l, 0); 00115 // long __fixsfdi(float) 00116 __fixsfdiFn = M.getOrInsertFunction("__fixsfdi", l, f, 0); 00117 // long __fixdfdi(double) 00118 __fixdfdiFn = M.getOrInsertFunction("__fixdfdi", l, d, 0); 00119 // unsigned long __fixunssfdi(float) 00120 __fixunssfdiFn = M.getOrInsertFunction("__fixunssfdi", ul, f, 0); 00121 // unsigned long __fixunsdfdi(double) 00122 __fixunsdfdiFn = M.getOrInsertFunction("__fixunsdfdi", ul, d, 0); 00123 // void* malloc(size_t) 00124 mallocFn = M.getOrInsertFunction("malloc", voidPtr, Type::UIntTy, 0); 00125 // void free(void*) 00126 freeFn = M.getOrInsertFunction("free", Type::VoidTy, voidPtr, 0); 00127 return false; 00128 } 00129 00130 /// runOnFunction - Top level implementation of instruction selection for 00131 /// the entire function. 00132 /// 00133 bool runOnFunction(Function &Fn) { 00134 // First pass over the function, lower any unknown intrinsic functions 00135 // with the IntrinsicLowering class. 00136 LowerUnknownIntrinsicFunctionCalls(Fn); 00137 00138 F = &MachineFunction::construct(&Fn, TM); 00139 00140 // Create all of the machine basic blocks for the function... 00141 for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) 00142 F->getBasicBlockList().push_back(MBBMap[I] = new MachineBasicBlock(I)); 00143 00144 BB = &F->front(); 00145 00146 // Copy incoming arguments off of the stack... 00147 LoadArgumentsToVirtualRegs(Fn); 00148 00149 // Instruction select everything except PHI nodes 00150 visit(Fn); 00151 00152 // Select the PHI nodes 00153 SelectPHINodes(); 00154 00155 RegMap.clear(); 00156 MBBMap.clear(); 00157 AllocaMap.clear(); 00158 F = 0; 00159 // We always build a machine code representation for the function 00160 return true; 00161 } 00162 00163 virtual const char *getPassName() const { 00164 return "PowerPC Simple Instruction Selection"; 00165 } 00166 00167 /// visitBasicBlock - This method is called when we are visiting a new basic 00168 /// block. This simply creates a new MachineBasicBlock to emit code into 00169 /// and adds it to the current MachineFunction. Subsequent visit* for 00170 /// instructions will be invoked for all instructions in the basic block. 00171 /// 00172 void visitBasicBlock(BasicBlock &LLVM_BB) { 00173 BB = MBBMap[&LLVM_BB]; 00174 } 00175 00176 /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the 00177 /// function, lowering any calls to unknown intrinsic functions into the 00178 /// equivalent LLVM code. 00179 /// 00180 void LowerUnknownIntrinsicFunctionCalls(Function &F); 00181 00182 /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function 00183 /// from the stack into virtual registers. 00184 /// 00185 void LoadArgumentsToVirtualRegs(Function &F); 00186 00187 /// SelectPHINodes - Insert machine code to generate phis. This is tricky 00188 /// because we have to generate our sources into the source basic blocks, 00189 /// not the current one. 00190 /// 00191 void SelectPHINodes(); 00192 00193 // Visitation methods for various instructions. These methods simply emit 00194 // fixed PowerPC code for each instruction. 00195 00196 // Control flow operators 00197 void visitReturnInst(ReturnInst &RI); 00198 void visitBranchInst(BranchInst &BI); 00199 00200 struct ValueRecord { 00201 Value *Val; 00202 unsigned Reg; 00203 const Type *Ty; 00204 ValueRecord(unsigned R, const Type *T) : Val(0), Reg(R), Ty(T) {} 00205 ValueRecord(Value *V) : Val(V), Reg(0), Ty(V->getType()) {} 00206 }; 00207 00208 // This struct is for recording the necessary operations to emit the GEP 00209 struct CollapsedGepOp { 00210 bool isMul; 00211 Value *index; 00212 ConstantSInt *size; 00213 CollapsedGepOp(bool mul, Value *i, ConstantSInt *s) : 00214 isMul(mul), index(i), size(s) {} 00215 }; 00216 00217 void doCall(const ValueRecord &Ret, MachineInstr *CallMI, 00218 const std::vector<ValueRecord> &Args, bool isVarArg); 00219 void visitCallInst(CallInst &I); 00220 void visitIntrinsicCall(Intrinsic::ID ID, CallInst &I); 00221 00222 // Arithmetic operators 00223 void visitSimpleBinary(BinaryOperator &B, unsigned OpcodeClass); 00224 void visitAdd(BinaryOperator &B) { visitSimpleBinary(B, 0); } 00225 void visitSub(BinaryOperator &B) { visitSimpleBinary(B, 1); } 00226 void visitMul(BinaryOperator &B); 00227 00228 void visitDiv(BinaryOperator &B) { visitDivRem(B); } 00229 void visitRem(BinaryOperator &B) { visitDivRem(B); } 00230 void visitDivRem(BinaryOperator &B); 00231 00232 // Bitwise operators 00233 void visitAnd(BinaryOperator &B) { visitSimpleBinary(B, 2); } 00234 void visitOr (BinaryOperator &B) { visitSimpleBinary(B, 3); } 00235 void visitXor(BinaryOperator &B) { visitSimpleBinary(B, 4); } 00236 00237 // Comparison operators... 00238 void visitSetCondInst(SetCondInst &I); 00239 unsigned EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, 00240 MachineBasicBlock *MBB, 00241 MachineBasicBlock::iterator MBBI); 00242 void visitSelectInst(SelectInst &SI); 00243 00244 00245 // Memory Instructions 00246 void visitLoadInst(LoadInst &I); 00247 void visitStoreInst(StoreInst &I); 00248 void visitGetElementPtrInst(GetElementPtrInst &I); 00249 void visitAllocaInst(AllocaInst &I); 00250 void visitMallocInst(MallocInst &I); 00251 void visitFreeInst(FreeInst &I); 00252 00253 // Other operators 00254 void visitShiftInst(ShiftInst &I); 00255 void visitPHINode(PHINode &I) {} // PHI nodes handled by second pass 00256 void visitCastInst(CastInst &I); 00257 void visitVANextInst(VANextInst &I); 00258 void visitVAArgInst(VAArgInst &I); 00259 00260 void visitInstruction(Instruction &I) { 00261 std::cerr << "Cannot instruction select: " << I; 00262 abort(); 00263 } 00264 00265 /// promote32 - Make a value 32-bits wide, and put it somewhere. 00266 /// 00267 void promote32(unsigned targetReg, const ValueRecord &VR); 00268 00269 /// emitGEPOperation - Common code shared between visitGetElementPtrInst and 00270 /// constant expression GEP support. 00271 /// 00272 void emitGEPOperation(MachineBasicBlock *BB, MachineBasicBlock::iterator IP, 00273 Value *Src, User::op_iterator IdxBegin, 00274 User::op_iterator IdxEnd, unsigned TargetReg, 00275 bool CollapseRemainder, ConstantSInt **Remainder, 00276 unsigned *PendingAddReg); 00277 00278 /// emitCastOperation - Common code shared between visitCastInst and 00279 /// constant expression cast support. 00280 /// 00281 void emitCastOperation(MachineBasicBlock *BB,MachineBasicBlock::iterator IP, 00282 Value *Src, const Type *DestTy, unsigned TargetReg); 00283 00284 /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary 00285 /// and constant expression support. 00286 /// 00287 void emitSimpleBinaryOperation(MachineBasicBlock *BB, 00288 MachineBasicBlock::iterator IP, 00289 Value *Op0, Value *Op1, 00290 unsigned OperatorClass, unsigned TargetReg); 00291 00292 /// emitBinaryFPOperation - This method handles emission of floating point 00293 /// Add (0), Sub (1), Mul (2), and Div (3) operations. 00294 void emitBinaryFPOperation(MachineBasicBlock *BB, 00295 MachineBasicBlock::iterator IP, 00296 Value *Op0, Value *Op1, 00297 unsigned OperatorClass, unsigned TargetReg); 00298 00299 void emitMultiply(MachineBasicBlock *BB, MachineBasicBlock::iterator IP, 00300 Value *Op0, Value *Op1, unsigned TargetReg); 00301 00302 void doMultiply(MachineBasicBlock *MBB, 00303 MachineBasicBlock::iterator IP, 00304 unsigned DestReg, Value *Op0, Value *Op1); 00305 00306 /// doMultiplyConst - This method will multiply the value in Op0Reg by the 00307 /// value of the ContantInt *CI 00308 void doMultiplyConst(MachineBasicBlock *MBB, 00309 MachineBasicBlock::iterator IP, 00310 unsigned DestReg, Value *Op0, ConstantInt *CI); 00311 00312 void emitDivRemOperation(MachineBasicBlock *BB, 00313 MachineBasicBlock::iterator IP, 00314 Value *Op0, Value *Op1, bool isDiv, 00315 unsigned TargetReg); 00316 00317 /// emitSetCCOperation - Common code shared between visitSetCondInst and 00318 /// constant expression support. 00319 /// 00320 void emitSetCCOperation(MachineBasicBlock *BB, 00321 MachineBasicBlock::iterator IP, 00322 Value *Op0, Value *Op1, unsigned Opcode, 00323 unsigned TargetReg); 00324 00325 /// emitShiftOperation - Common code shared between visitShiftInst and 00326 /// constant expression support. 00327 /// 00328 void emitShiftOperation(MachineBasicBlock *MBB, 00329 MachineBasicBlock::iterator IP, 00330 Value *Op, Value *ShiftAmount, bool isLeftShift, 00331 const Type *ResultTy, unsigned DestReg); 00332 00333 /// emitSelectOperation - Common code shared between visitSelectInst and the 00334 /// constant expression support. 00335 /// 00336 void emitSelectOperation(MachineBasicBlock *MBB, 00337 MachineBasicBlock::iterator IP, 00338 Value *Cond, Value *TrueVal, Value *FalseVal, 00339 unsigned DestReg); 00340 00341 /// copyConstantToRegister - Output the instructions required to put the 00342 /// specified constant into the specified register. 00343 /// 00344 void copyConstantToRegister(MachineBasicBlock *MBB, 00345 MachineBasicBlock::iterator MBBI, 00346 Constant *C, unsigned Reg); 00347 00348 void emitUCOM(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, 00349 unsigned LHS, unsigned RHS); 00350 00351 /// makeAnotherReg - This method returns the next register number we haven't 00352 /// yet used. 00353 /// 00354 unsigned makeAnotherReg(const Type *Ty) { 00355 assert(dynamic_cast<const PPC64RegisterInfo*>(TM.getRegisterInfo()) && 00356 "Current target doesn't have PPC reg info??"); 00357 const PPC64RegisterInfo *PPCRI = 00358 static_cast<const PPC64RegisterInfo*>(TM.getRegisterInfo()); 00359 // Add the mapping of regnumber => reg class to MachineFunction 00360 const TargetRegisterClass *RC = PPCRI->getRegClassForType(Ty); 00361 return F->getSSARegMap()->createVirtualRegister(RC); 00362 } 00363 00364 /// getReg - This method turns an LLVM value into a register number. 00365 /// 00366 unsigned getReg(Value &V) { return getReg(&V); } // Allow references 00367 unsigned getReg(Value *V) { 00368 // Just append to the end of the current bb. 00369 MachineBasicBlock::iterator It = BB->end(); 00370 return getReg(V, BB, It); 00371 } 00372 unsigned getReg(Value *V, MachineBasicBlock *MBB, 00373 MachineBasicBlock::iterator IPt); 00374 00375 /// canUseAsImmediateForOpcode - This method returns whether a ConstantInt 00376 /// is okay to use as an immediate argument to a certain binary operation 00377 bool canUseAsImmediateForOpcode(ConstantInt *CI, unsigned Opcode); 00378 00379 /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca 00380 /// that is to be statically allocated with the initial stack frame 00381 /// adjustment. 00382 unsigned getFixedSizedAllocaFI(AllocaInst *AI); 00383 }; 00384 } 00385 00386 /// dyn_castFixedAlloca - If the specified value is a fixed size alloca 00387 /// instruction in the entry block, return it. Otherwise, return a null 00388 /// pointer. 00389 static AllocaInst *dyn_castFixedAlloca(Value *V) { 00390 if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 00391 BasicBlock *BB = AI->getParent(); 00392 if (isa<ConstantUInt>(AI->getArraySize()) && BB ==&BB->getParent()->front()) 00393 return AI; 00394 } 00395 return 0; 00396 } 00397 00398 /// getReg - This method turns an LLVM value into a register number. 00399 /// 00400 unsigned PPC64ISel::getReg(Value *V, MachineBasicBlock *MBB, 00401 MachineBasicBlock::iterator IPt) { 00402 if (Constant *C = dyn_cast<Constant>(V)) { 00403 unsigned Reg = makeAnotherReg(V->getType()); 00404 copyConstantToRegister(MBB, IPt, C, Reg); 00405 return Reg; 00406 } else if (AllocaInst *AI = dyn_castFixedAlloca(V)) { 00407 unsigned Reg = makeAnotherReg(V->getType()); 00408 unsigned FI = getFixedSizedAllocaFI(AI); 00409 addFrameReference(BuildMI(*MBB, IPt, PPC::ADDI, 2, Reg), FI, 0, false); 00410 return Reg; 00411 } 00412 00413 unsigned &Reg = RegMap[V]; 00414 if (Reg == 0) { 00415 Reg = makeAnotherReg(V->getType()); 00416 RegMap[V] = Reg; 00417 } 00418 00419 return Reg; 00420 } 00421 00422 /// canUseAsImmediateForOpcode - This method returns whether a ConstantInt 00423 /// is okay to use as an immediate argument to a certain binary operator. 00424 /// 00425 /// Operator is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for Xor. 00426 bool PPC64ISel::canUseAsImmediateForOpcode(ConstantInt *CI, unsigned Operator) { 00427 ConstantSInt *Op1Cs; 00428 ConstantUInt *Op1Cu; 00429 00430 // ADDI, Compare, and non-indexed Load take SIMM 00431 bool cond1 = (Operator == 0) 00432 && (Op1Cs = dyn_cast<ConstantSInt>(CI)) 00433 && (Op1Cs->getValue() <= 32767) 00434 && (Op1Cs->getValue() >= -32768); 00435 00436 // SUBI takes -SIMM since it is a mnemonic for ADDI 00437 bool cond2 = (Operator == 1) 00438 && (Op1Cs = dyn_cast<ConstantSInt>(CI)) 00439 && (Op1Cs->getValue() <= 32768) 00440 && (Op1Cs->getValue() >= -32767); 00441 00442 // ANDIo, ORI, and XORI take unsigned values 00443 bool cond3 = (Operator >= 2) 00444 && (Op1Cs = dyn_cast<ConstantSInt>(CI)) 00445 && (Op1Cs->getValue() >= 0) 00446 && (Op1Cs->getValue() <= 32767); 00447 00448 // ADDI and SUBI take SIMMs, so we have to make sure the UInt would fit 00449 bool cond4 = (Operator < 2) 00450 && (Op1Cu = dyn_cast<ConstantUInt>(CI)) 00451 && (Op1Cu->getValue() <= 32767); 00452 00453 // ANDIo, ORI, and XORI take UIMMs, so they can be larger 00454 bool cond5 = (Operator >= 2) 00455 && (Op1Cu = dyn_cast<ConstantUInt>(CI)) 00456 && (Op1Cu->getValue() <= 65535); 00457 00458 if (cond1 || cond2 || cond3 || cond4 || cond5) 00459 return true; 00460 00461 return false; 00462 } 00463 00464 /// getFixedSizedAllocaFI - Return the frame index for a fixed sized alloca 00465 /// that is to be statically allocated with the initial stack frame 00466 /// adjustment. 00467 unsigned PPC64ISel::getFixedSizedAllocaFI(AllocaInst *AI) { 00468 // Already computed this? 00469 std::map<AllocaInst*, unsigned>::iterator I = AllocaMap.lower_bound(AI); 00470 if (I != AllocaMap.end() && I->first == AI) return I->second; 00471 00472 const Type *Ty = AI->getAllocatedType(); 00473 ConstantUInt *CUI = cast<ConstantUInt>(AI->getArraySize()); 00474 unsigned TySize = TM.getTargetData().getTypeSize(Ty); 00475 TySize *= CUI->getValue(); // Get total allocated size... 00476 unsigned Alignment = TM.getTargetData().getTypeAlignment(Ty); 00477 00478 // Create a new stack object using the frame manager... 00479 int FrameIdx = F->getFrameInfo()->CreateStackObject(TySize, Alignment); 00480 AllocaMap.insert(I, std::make_pair(AI, FrameIdx)); 00481 return FrameIdx; 00482 } 00483 00484 00485 /// copyConstantToRegister - Output the instructions required to put the 00486 /// specified constant into the specified register. 00487 /// 00488 void PPC64ISel::copyConstantToRegister(MachineBasicBlock *MBB, 00489 MachineBasicBlock::iterator IP, 00490 Constant *C, unsigned R) { 00491 if (C->getType()->isIntegral()) { 00492 unsigned Class = getClassB(C->getType()); 00493 00494 if (Class == cLong) { 00495 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(C)) { 00496 uint64_t uval = CUI->getValue(); 00497 if (uval < (1LL << 32)) { 00498 ConstantUInt *CU = ConstantUInt::get(Type::UIntTy, uval); 00499 copyConstantToRegister(MBB, IP, CU, R); 00500 return; 00501 } 00502 } else if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(C)) { 00503 int64_t val = CUI->getValue(); 00504 if (val < (1LL << 31)) { 00505 ConstantUInt *CU = ConstantUInt::get(Type::UIntTy, val); 00506 copyConstantToRegister(MBB, IP, CU, R); 00507 return; 00508 } 00509 } else { 00510 std::cerr << "Unhandled long constant type!\n"; 00511 abort(); 00512 } 00513 // Spill long to the constant pool and load it 00514 MachineConstantPool *CP = F->getConstantPool(); 00515 unsigned CPI = CP->getConstantPoolIndex(C); 00516 BuildMI(*MBB, IP, PPC::LD, 1, R) 00517 .addReg(PPC::R2).addConstantPoolIndex(CPI); 00518 return; 00519 } 00520 00521 assert(Class <= cInt && "Type not handled yet!"); 00522 00523 // Handle bool 00524 if (C->getType() == Type::BoolTy) { 00525 BuildMI(*MBB, IP, PPC::LI, 1, R).addSImm(C == ConstantBool::True); 00526 return; 00527 } 00528 00529 // Handle int 00530 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(C)) { 00531 unsigned uval = CUI->getValue(); 00532 if (uval < 32768) { 00533 BuildMI(*MBB, IP, PPC::LI, 1, R).addSImm(uval); 00534 } else { 00535 unsigned Temp = makeAnotherReg(Type::IntTy); 00536 BuildMI(*MBB, IP, PPC::LIS, 1, Temp).addSImm(uval >> 16); 00537 BuildMI(*MBB, IP, PPC::ORI, 2, R).addReg(Temp).addImm(uval); 00538 } 00539 return; 00540 } else if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(C)) { 00541 int sval = CSI->getValue(); 00542 if (sval < 32768 && sval >= -32768) { 00543 BuildMI(*MBB, IP, PPC::LI, 1, R).addSImm(sval); 00544 } else { 00545 unsigned Temp = makeAnotherReg(Type::IntTy); 00546 BuildMI(*MBB, IP, PPC::LIS, 1, Temp).addSImm(sval >> 16); 00547 BuildMI(*MBB, IP, PPC::ORI, 2, R).addReg(Temp).addImm(sval); 00548 } 00549 return; 00550 } 00551 std::cerr << "Unhandled integer constant!\n"; 00552 abort(); 00553 } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { 00554 // We need to spill the constant to memory... 00555 MachineConstantPool *CP = F->getConstantPool(); 00556 unsigned CPI = CP->getConstantPoolIndex(CFP); 00557 const Type *Ty = CFP->getType(); 00558 unsigned LoadOpcode = (Ty == Type::FloatTy) ? PPC::LFS : PPC::LFD; 00559 BuildMI(*MBB,IP,LoadOpcode,2,R).addConstantPoolIndex(CPI).addReg(PPC::R2); 00560 } else if (isa<ConstantPointerNull>(C)) { 00561 // Copy zero (null pointer) to the register. 00562 BuildMI(*MBB, IP, PPC::LI, 1, R).addSImm(0); 00563 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) { 00564 static unsigned OpcodeTable[] = { 00565 PPC::LBZ, PPC::LHZ, PPC::LWZ, PPC::LFS, PPC::LFD, PPC::LD 00566 }; 00567 unsigned Opcode = OpcodeTable[getClassB(GV->getType())]; 00568 BuildMI(*MBB, IP, Opcode, 2, R).addGlobalAddress(GV).addReg(PPC::R2); 00569 } else { 00570 std::cerr << "Offending constant: " << *C << "\n"; 00571 assert(0 && "Type not handled yet!"); 00572 } 00573 } 00574 00575 /// LoadArgumentsToVirtualRegs - Load all of the arguments to this function from 00576 /// the stack into virtual registers. 00577 void PPC64ISel::LoadArgumentsToVirtualRegs(Function &Fn) { 00578 unsigned ArgOffset = ParameterSaveAreaOffset; 00579 unsigned GPR_remaining = 8; 00580 unsigned FPR_remaining = 13; 00581 unsigned GPR_idx = 0, FPR_idx = 0; 00582 static const unsigned GPR[] = { 00583 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 00584 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 00585 }; 00586 static const unsigned FPR[] = { 00587 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 00588 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 00589 }; 00590 00591 MachineFrameInfo *MFI = F->getFrameInfo(); 00592 00593 for (Function::aiterator I = Fn.abegin(), E = Fn.aend(); I != E; ++I) { 00594 bool ArgLive = !I->use_empty(); 00595 unsigned Reg = ArgLive ? getReg(*I) : 0; 00596 int FI; // Frame object index 00597 00598 switch (getClassB(I->getType())) { 00599 case cByte: 00600 if (ArgLive) { 00601 FI = MFI->CreateFixedObject(4, ArgOffset); 00602 if (GPR_remaining > 0) { 00603 BuildMI(BB, PPC::IMPLICIT_DEF, 0, GPR[GPR_idx]); 00604 BuildMI(BB, PPC::OR, 2, Reg).addReg(GPR[GPR_idx]) 00605 .addReg(GPR[GPR_idx]); 00606 } else { 00607 addFrameReference(BuildMI(BB, PPC::LBZ, 2, Reg), FI); 00608 } 00609 } 00610 break; 00611 case cShort: 00612 if (ArgLive) { 00613 FI = MFI->CreateFixedObject(4, ArgOffset); 00614 if (GPR_remaining > 0) { 00615 BuildMI(BB, PPC::IMPLICIT_DEF, 0, GPR[GPR_idx]); 00616 BuildMI(BB, PPC::OR, 2, Reg).addReg(GPR[GPR_idx]) 00617 .addReg(GPR[GPR_idx]); 00618 } else { 00619 addFrameReference(BuildMI(BB, PPC::LHZ, 2, Reg), FI); 00620 } 00621 } 00622 break; 00623 case cInt: 00624 if (ArgLive) { 00625 FI = MFI->CreateFixedObject(4, ArgOffset); 00626 if (GPR_remaining > 0) { 00627 BuildMI(BB, PPC::IMPLICIT_DEF, 0, GPR[GPR_idx]); 00628 BuildMI(BB, PPC::OR, 2, Reg).addReg(GPR[GPR_idx]) 00629 .addReg(GPR[GPR_idx]); 00630 } else { 00631 addFrameReference(BuildMI(BB, PPC::LWZ, 2, Reg), FI); 00632 } 00633 } 00634 break; 00635 case cLong: 00636 if (ArgLive) { 00637 FI = MFI->CreateFixedObject(8, ArgOffset); 00638 if (GPR_remaining > 1) { 00639 BuildMI(BB, PPC::IMPLICIT_DEF, 0, GPR[GPR_idx]); 00640 BuildMI(BB, PPC::OR, 2, Reg).addReg(GPR[GPR_idx]) 00641 .addReg(GPR[GPR_idx]); 00642 } else { 00643 addFrameReference(BuildMI(BB, PPC::LD, 2, Reg), FI); 00644 } 00645 } 00646 // longs require 4 additional bytes 00647 ArgOffset += 4; 00648 break; 00649 case cFP32: 00650 if (ArgLive) { 00651 FI = MFI->CreateFixedObject(4, ArgOffset); 00652 00653 if (FPR_remaining > 0) { 00654 BuildMI(BB, PPC::IMPLICIT_DEF, 0, FPR[FPR_idx]); 00655 BuildMI(BB, PPC::FMR, 1, Reg).addReg(FPR[FPR_idx]); 00656 FPR_remaining--; 00657 FPR_idx++; 00658 } else { 00659 addFrameReference(BuildMI(BB, PPC::LFS, 2, Reg), FI); 00660 } 00661 } 00662 break; 00663 case cFP64: 00664 if (ArgLive) { 00665 FI = MFI->CreateFixedObject(8, ArgOffset); 00666 00667 if (FPR_remaining > 0) { 00668 BuildMI(BB, PPC::IMPLICIT_DEF, 0, FPR[FPR_idx]); 00669 BuildMI(BB, PPC::FMR, 1, Reg).addReg(FPR[FPR_idx]); 00670 FPR_remaining--; 00671 FPR_idx++; 00672 } else { 00673 addFrameReference(BuildMI(BB, PPC::LFD, 2, Reg), FI); 00674 } 00675 } 00676 00677 // doubles require 4 additional bytes and use 2 GPRs of param space 00678 ArgOffset += 4; 00679 if (GPR_remaining > 0) { 00680 GPR_remaining--; 00681 GPR_idx++; 00682 } 00683 break; 00684 default: 00685 assert(0 && "Unhandled argument type!"); 00686 } 00687 ArgOffset += 4; // Each argument takes at least 4 bytes on the stack... 00688 if (GPR_remaining > 0) { 00689 GPR_remaining--; // uses up 2 GPRs 00690 GPR_idx++; 00691 } 00692 } 00693 00694 // If the function takes variable number of arguments, add a frame offset for 00695 // the start of the first vararg value... this is used to expand 00696 // llvm.va_start. 00697 if (Fn.getFunctionType()->isVarArg()) 00698 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); 00699 } 00700 00701 00702 /// SelectPHINodes - Insert machine code to generate phis. This is tricky 00703 /// because we have to generate our sources into the source basic blocks, not 00704 /// the current one. 00705 /// 00706 void PPC64ISel::SelectPHINodes() { 00707 const TargetInstrInfo &TII = *TM.getInstrInfo(); 00708 const Function &LF = *F->getFunction(); // The LLVM function... 00709 for (Function::const_iterator I = LF.begin(), E = LF.end(); I != E; ++I) { 00710 const BasicBlock *BB = I; 00711 MachineBasicBlock &MBB = *MBBMap[I]; 00712 00713 // Loop over all of the PHI nodes in the LLVM basic block... 00714 MachineBasicBlock::iterator PHIInsertPoint = MBB.begin(); 00715 for (BasicBlock::const_iterator I = BB->begin(); 00716 PHINode *PN = const_cast<PHINode*>(dyn_cast<PHINode>(I)); ++I) { 00717 00718 // Create a new machine instr PHI node, and insert it. 00719 unsigned PHIReg = getReg(*PN); 00720 MachineInstr *PhiMI = BuildMI(MBB, PHIInsertPoint, 00721 PPC::PHI, PN->getNumOperands(), PHIReg); 00722 00723 // PHIValues - Map of blocks to incoming virtual registers. We use this 00724 // so that we only initialize one incoming value for a particular block, 00725 // even if the block has multiple entries in the PHI node. 00726 // 00727 std::map<MachineBasicBlock*, unsigned> PHIValues; 00728 00729 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 00730 MachineBasicBlock *PredMBB = 0; 00731 for (MachineBasicBlock::pred_iterator PI = MBB.pred_begin (), 00732 PE = MBB.pred_end (); PI != PE; ++PI) 00733 if (PN->getIncomingBlock(i) == (*PI)->getBasicBlock()) { 00734 PredMBB = *PI; 00735 break; 00736 } 00737 assert (PredMBB && "Couldn't find incoming machine-cfg edge for phi"); 00738 00739 unsigned ValReg; 00740 std::map<MachineBasicBlock*, unsigned>::iterator EntryIt = 00741 PHIValues.lower_bound(PredMBB); 00742 00743 if (EntryIt != PHIValues.end() && EntryIt->first == PredMBB) { 00744 // We already inserted an initialization of the register for this 00745 // predecessor. Recycle it. 00746 ValReg = EntryIt->second; 00747 } else { 00748 // Get the incoming value into a virtual register. 00749 // 00750 Value *Val = PN->getIncomingValue(i); 00751 00752 // If this is a constant or GlobalValue, we may have to insert code 00753 // into the basic block to compute it into a virtual register. 00754 if ((isa<Constant>(Val) && !isa<ConstantExpr>(Val)) || 00755 isa<GlobalValue>(Val)) { 00756 // Simple constants get emitted at the end of the basic block, 00757 // before any terminator instructions. We "know" that the code to 00758 // move a constant into a register will never clobber any flags. 00759 ValReg = getReg(Val, PredMBB, PredMBB->getFirstTerminator()); 00760 } else { 00761 // Because we don't want to clobber any values which might be in 00762 // physical registers with the computation of this constant (which 00763 // might be arbitrarily complex if it is a constant expression), 00764 // just insert the computation at the top of the basic block. 00765 MachineBasicBlock::iterator PI = PredMBB->begin(); 00766 00767 // Skip over any PHI nodes though! 00768 while (PI != PredMBB->end() && PI->getOpcode() == PPC::PHI) 00769 ++PI; 00770 00771 ValReg = getReg(Val, PredMBB, PI); 00772 } 00773 00774 // Remember that we inserted a value for this PHI for this predecessor 00775 PHIValues.insert(EntryIt, std::make_pair(PredMBB, ValReg)); 00776 } 00777 00778 PhiMI->addRegOperand(ValReg); 00779 PhiMI->addMachineBasicBlockOperand(PredMBB); 00780 } 00781 00782 // Now that we emitted all of the incoming values for the PHI node, make 00783 // sure to reposition the InsertPoint after the PHI that we just added. 00784 // This is needed because we might have inserted a constant into this 00785 // block, right after the PHI's which is before the old insert point! 00786 PHIInsertPoint = PhiMI; 00787 ++PHIInsertPoint; 00788 } 00789 } 00790 } 00791 00792 00793 // canFoldSetCCIntoBranchOrSelect - Return the setcc instruction if we can fold 00794 // it into the conditional branch or select instruction which is the only user 00795 // of the cc instruction. This is the case if the conditional branch is the 00796 // only user of the setcc, and if the setcc is in the same basic block as the 00797 // conditional branch. 00798 // 00799 static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) { 00800 if (SetCondInst *SCI = dyn_cast<SetCondInst>(V)) 00801 if (SCI->hasOneUse()) { 00802 Instruction *User = cast<Instruction>(SCI->use_back()); 00803 if ((isa<BranchInst>(User) || isa<SelectInst>(User)) && 00804 SCI->getParent() == User->getParent()) 00805 return SCI; 00806 } 00807 return 0; 00808 } 00809 00810 00811 // canFoldGEPIntoLoadOrStore - Return the GEP instruction if we can fold it into 00812 // the load or store instruction that is the only user of the GEP. 00813 // 00814 static GetElementPtrInst *canFoldGEPIntoLoadOrStore(Value *V) { 00815 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) 00816 if (GEPI->hasOneUse()) { 00817 Instruction *User = cast<Instruction>(GEPI->use_back()); 00818 if (isa<StoreInst>(User) && 00819 GEPI->getParent() == User->getParent() && 00820 User->getOperand(0) != GEPI && 00821 User->getOperand(1) == GEPI) { 00822 ++GEPFolds; 00823 return GEPI; 00824 } 00825 if (isa<LoadInst>(User) && 00826 GEPI->getParent() == User->getParent() && 00827 User->getOperand(0) == GEPI) { 00828 ++GEPFolds; 00829 return GEPI; 00830 } 00831 } 00832 return 0; 00833 } 00834 00835 00836 // Return a fixed numbering for setcc instructions which does not depend on the 00837 // order of the opcodes. 00838 // 00839 static unsigned getSetCCNumber(unsigned Opcode) { 00840 switch (Opcode) { 00841 default: assert(0 && "Unknown setcc instruction!"); 00842 case Instruction::SetEQ: return 0; 00843 case Instruction::SetNE: return 1; 00844 case Instruction::SetLT: return 2; 00845 case Instruction::SetGE: return 3; 00846 case Instruction::SetGT: return 4; 00847 case Instruction::SetLE: return 5; 00848 } 00849 } 00850 00851 static unsigned getPPCOpcodeForSetCCNumber(unsigned Opcode) { 00852 switch (Opcode) { 00853 default: assert(0 && "Unknown setcc instruction!"); 00854 case Instruction::SetEQ: return PPC::BEQ; 00855 case Instruction::SetNE: return PPC::BNE; 00856 case Instruction::SetLT: return PPC::BLT; 00857 case Instruction::SetGE: return PPC::BGE; 00858 case Instruction::SetGT: return PPC::BGT; 00859 case Instruction::SetLE: return PPC::BLE; 00860 } 00861 } 00862 00863 /// emitUCOM - emits an unordered FP compare. 00864 void PPC64ISel::emitUCOM(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, 00865 unsigned LHS, unsigned RHS) { 00866 BuildMI(*MBB, IP, PPC::FCMPU, 2, PPC::CR0).addReg(LHS).addReg(RHS); 00867 } 00868 00869 /// EmitComparison - emits a comparison of the two operands, returning the 00870 /// extended setcc code to use. The result is in CR0. 00871 /// 00872 unsigned PPC64ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, 00873 MachineBasicBlock *MBB, 00874 MachineBasicBlock::iterator IP) { 00875 // The arguments are already supposed to be of the same type. 00876 const Type *CompTy = Op0->getType(); 00877 unsigned Class = getClassB(CompTy); 00878 unsigned Op0r = getReg(Op0, MBB, IP); 00879 00880 // Before we do a comparison, we have to make sure that we're truncating our 00881 // registers appropriately. 00882 if (Class == cByte) { 00883 unsigned TmpReg = makeAnotherReg(CompTy); 00884 if (CompTy->isSigned()) 00885 BuildMI(*MBB, IP, PPC::EXTSB, 1, TmpReg).addReg(Op0r); 00886 else 00887 BuildMI(*MBB, IP, PPC::RLWINM, 4, TmpReg).addReg(Op0r).addImm(0) 00888 .addImm(24).addImm(31); 00889 Op0r = TmpReg; 00890 } else if (Class == cShort) { 00891 unsigned TmpReg = makeAnotherReg(CompTy); 00892 if (CompTy->isSigned()) 00893 BuildMI(*MBB, IP, PPC::EXTSH, 1, TmpReg).addReg(Op0r); 00894 else 00895 BuildMI(*MBB, IP, PPC::RLWINM, 4, TmpReg).addReg(Op0r).addImm(0) 00896 .addImm(16).addImm(31); 00897 Op0r = TmpReg; 00898 } 00899 00900 // Use crand for lt, gt and crandc for le, ge 00901 unsigned CROpcode = (OpNum == 2 || OpNum == 4) ? PPC::CRAND : PPC::CRANDC; 00902 unsigned Opcode = CompTy->isSigned() ? PPC::CMPW : PPC::CMPLW; 00903 unsigned OpcodeImm = CompTy->isSigned() ? PPC::CMPWI : PPC::CMPLWI; 00904 if (Class == cLong) { 00905 Opcode = CompTy->isSigned() ? PPC::CMPD : PPC::CMPLD; 00906 OpcodeImm = CompTy->isSigned() ? PPC::CMPDI : PPC::CMPLDI; 00907 } 00908 00909 // Special case handling of: cmp R, i 00910 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 00911 unsigned Op1v = CI->getRawValue() & 0xFFFF; 00912 00913 // Treat compare like ADDI for the purposes of immediate suitability 00914 if (canUseAsImmediateForOpcode(CI, 0)) { 00915 BuildMI(*MBB, IP, OpcodeImm, 2, PPC::CR0).addReg(Op0r).addSImm(Op1v); 00916 } else { 00917 unsigned Op1r = getReg(Op1, MBB, IP); 00918 BuildMI(*MBB, IP, Opcode, 2, PPC::CR0).addReg(Op0r).addReg(Op1r); 00919 } 00920 return OpNum; 00921 } 00922 00923 unsigned Op1r = getReg(Op1, MBB, IP); 00924 00925 switch (Class) { 00926 default: assert(0 && "Unknown type class!"); 00927 case cByte: 00928 case cShort: 00929 case cInt: 00930 case cLong: 00931 BuildMI(*MBB, IP, Opcode, 2, PPC::CR0).addReg(Op0r).addReg(Op1r); 00932 break; 00933 00934 case cFP32: 00935 case cFP64: 00936 emitUCOM(MBB, IP, Op0r, Op1r); 00937 break; 00938 } 00939 00940 return OpNum; 00941 } 00942 00943 /// visitSetCondInst - emit code to calculate the condition via 00944 /// EmitComparison(), and possibly store a 0 or 1 to a register as a result 00945 /// 00946 void PPC64ISel::visitSetCondInst(SetCondInst &I) { 00947 if (canFoldSetCCIntoBranchOrSelect(&I)) 00948 return; 00949 00950 unsigned DestReg = getReg(I); 00951 unsigned OpNum = I.getOpcode(); 00952 const Type *Ty = I.getOperand (0)->getType(); 00953 00954 EmitComparison(OpNum, I.getOperand(0), I.getOperand(1), BB, BB->end()); 00955 00956 unsigned Opcode = getPPCOpcodeForSetCCNumber(OpNum); 00957 MachineBasicBlock *thisMBB = BB; 00958 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 00959 ilist<MachineBasicBlock>::iterator It = BB; 00960 ++It; 00961 00962 // thisMBB: 00963 // ... 00964 // cmpTY cr0, r1, r2 00965 // bCC copy1MBB 00966 // b copy0MBB 00967 00968 // FIXME: we wouldn't need copy0MBB (we could fold it into thisMBB) 00969 // if we could insert other, non-terminator instructions after the 00970 // bCC. But MBB->getFirstTerminator() can't understand this. 00971 MachineBasicBlock *copy1MBB = new MachineBasicBlock(LLVM_BB); 00972 F->getBasicBlockList().insert(It, copy1MBB); 00973 BuildMI(BB, Opcode, 2).addReg(PPC::CR0).addMBB(copy1MBB); 00974 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 00975 F->getBasicBlockList().insert(It, copy0MBB); 00976 BuildMI(BB, PPC::B, 1).addMBB(copy0MBB); 00977 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 00978 F->getBasicBlockList().insert(It, sinkMBB); 00979 // Update machine-CFG edges 00980 BB->addSuccessor(copy1MBB); 00981 BB->addSuccessor(copy0MBB); 00982 00983 // copy1MBB: 00984 // %TrueValue = li 1 00985 // b sinkMBB 00986 BB = copy1MBB; 00987 unsigned TrueValue = makeAnotherReg(I.getType()); 00988 BuildMI(BB, PPC::LI, 1, TrueValue).addSImm(1); 00989 BuildMI(BB, PPC::B, 1).addMBB(sinkMBB); 00990 // Update machine-CFG edges 00991 BB->addSuccessor(sinkMBB); 00992 00993 // copy0MBB: 00994 // %FalseValue = li 0 00995 // fallthrough 00996 BB = copy0MBB; 00997 unsigned FalseValue = makeAnotherReg(I.getType()); 00998 BuildMI(BB, PPC::LI, 1, FalseValue).addSImm(0); 00999 // Update machine-CFG edges 01000 BB->addSuccessor(sinkMBB); 01001 01002 // sinkMBB: 01003 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, copy1MBB ] 01004 // ... 01005 BB = sinkMBB; 01006 BuildMI(BB, PPC::PHI, 4, DestReg).addReg(FalseValue) 01007 .addMBB(copy0MBB).addReg(TrueValue).addMBB(copy1MBB); 01008 } 01009 01010 void PPC64ISel::visitSelectInst(SelectInst &SI) { 01011 unsigned DestReg = getReg(SI); 01012 MachineBasicBlock::iterator MII = BB->end(); 01013 emitSelectOperation(BB, MII, SI.getCondition(), SI.getTrueValue(), 01014 SI.getFalseValue(), DestReg); 01015 } 01016 01017 /// emitSelect - Common code shared between visitSelectInst and the constant 01018 /// expression support. 01019 /// FIXME: this is most likely broken in one or more ways. Namely, PowerPC has 01020 /// no select instruction. FSEL only works for comparisons against zero. 01021 void PPC64ISel::emitSelectOperation(MachineBasicBlock *MBB, 01022 MachineBasicBlock::iterator IP, 01023 Value *Cond, Value *TrueVal, 01024 Value *FalseVal, unsigned DestReg) { 01025 unsigned SelectClass = getClassB(TrueVal->getType()); 01026 unsigned Opcode; 01027 01028 // See if we can fold the setcc into the select instruction, or if we have 01029 // to get the register of the Cond value 01030 if (SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(Cond)) { 01031 // We successfully folded the setcc into the select instruction. 01032 unsigned OpNum = getSetCCNumber(SCI->getOpcode()); 01033 OpNum = EmitComparison(OpNum, SCI->getOperand(0),SCI->getOperand(1),MBB,IP); 01034 Opcode = getPPCOpcodeForSetCCNumber(SCI->getOpcode()); 01035 } else { 01036 unsigned CondReg = getReg(Cond, MBB, IP); 01037 BuildMI(*MBB, IP, PPC::CMPI, 2, PPC::CR0).addReg(CondReg).addSImm(0); 01038 Opcode = getPPCOpcodeForSetCCNumber(Instruction::SetNE); 01039 } 01040 01041 // thisMBB: 01042 // ... 01043 // cmpTY cr0, r1, r2 01044 // bCC copy1MBB 01045 // b copy0MBB 01046 01047 MachineBasicBlock *thisMBB = BB; 01048 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 01049 ilist<MachineBasicBlock>::iterator It = BB; 01050 ++It; 01051 01052 // FIXME: we wouldn't need copy0MBB (we could fold it into thisMBB) 01053 // if we could insert other, non-terminator instructions after the 01054 // bCC. But MBB->getFirstTerminator() can't understand this. 01055 MachineBasicBlock *copy1MBB = new MachineBasicBlock(LLVM_BB); 01056 F->getBasicBlockList().insert(It, copy1MBB); 01057 BuildMI(BB, Opcode, 2).addReg(PPC::CR0).addMBB(copy1MBB); 01058 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 01059 F->getBasicBlockList().insert(It, copy0MBB); 01060 BuildMI(BB, PPC::B, 1).addMBB(copy0MBB); 01061 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 01062 F->getBasicBlockList().insert(It, sinkMBB); 01063 // Update machine-CFG edges 01064 BB->addSuccessor(copy1MBB); 01065 BB->addSuccessor(copy0MBB); 01066 01067 // copy1MBB: 01068 // %TrueValue = ... 01069 // b sinkMBB 01070 BB = copy1MBB; 01071 unsigned TrueValue = getReg(TrueVal, BB, BB->begin()); 01072 BuildMI(BB, PPC::B, 1).addMBB(sinkMBB); 01073 // Update machine-CFG edges 01074 BB->addSuccessor(sinkMBB); 01075 01076 // copy0MBB: 01077 // %FalseValue = ... 01078 // fallthrough 01079 BB = copy0MBB; 01080 unsigned FalseValue = getReg(FalseVal, BB, BB->begin()); 01081 // Update machine-CFG edges 01082 BB->addSuccessor(sinkMBB); 01083 01084 // sinkMBB: 01085 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, copy1MBB ] 01086 // ... 01087 BB = sinkMBB; 01088 BuildMI(BB, PPC::PHI, 4, DestReg).addReg(FalseValue) 01089 .addMBB(copy0MBB).addReg(TrueValue).addMBB(copy1MBB); 01090 return; 01091 } 01092 01093 01094 01095 /// promote32 - Emit instructions to turn a narrow operand into a 32-bit-wide 01096 /// operand, in the specified target register. 01097 /// 01098 void PPC64ISel::promote32(unsigned targetReg, const ValueRecord &VR) { 01099 bool isUnsigned = VR.Ty->isUnsigned() || VR.Ty == Type::BoolTy; 01100 01101 Value *Val = VR.Val; 01102 const Type *Ty = VR.Ty; 01103 if (Val) { 01104 if (Constant *C = dyn_cast<Constant>(Val)) { 01105 Val = ConstantExpr::getCast(C, Type::IntTy); 01106 if (isa<ConstantExpr>(Val)) // Could not fold 01107 Val = C; 01108 else 01109 Ty = Type::IntTy; // Folded! 01110 } 01111 01112 // If this is a simple constant, just emit a load directly to avoid the copy 01113 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 01114 int TheVal = CI->getRawValue() & 0xFFFFFFFF; 01115 01116 if (TheVal < 32768 && TheVal >= -32768) { 01117 BuildMI(BB, PPC::LI, 1, targetReg).addSImm(TheVal); 01118 } else { 01119 unsigned TmpReg = makeAnotherReg(Type::IntTy); 01120 BuildMI(BB, PPC::LIS, 1, TmpReg).addSImm(TheVal >> 16); 01121 BuildMI(BB, PPC::ORI, 2, targetReg).addReg(TmpReg) 01122 .addImm(TheVal & 0xFFFF); 01123 } 01124 return; 01125 } 01126 } 01127 01128 // Make sure we have the register number for this value... 01129 unsigned Reg = Val ? getReg(Val) : VR.Reg; 01130 switch (getClassB(Ty)) { 01131 case cByte: 01132 // Extend value into target register (8->32) 01133 if (isUnsigned) 01134 BuildMI(BB, PPC::RLWINM, 4, targetReg).addReg(Reg).addZImm(0) 01135 .addZImm(24).addZImm(31); 01136 else 01137 BuildMI(BB, PPC::EXTSB, 1, targetReg).addReg(Reg); 01138 break; 01139 case cShort: 01140 // Extend value into target register (16->32) 01141 if (isUnsigned) 01142 BuildMI(BB, PPC::RLWINM, 4, targetReg).addReg(Reg).addZImm(0) 01143 .addZImm(16).addZImm(31); 01144 else 01145 BuildMI(BB, PPC::EXTSH, 1, targetReg).addReg(Reg); 01146 break; 01147 case cInt: 01148 case cLong: 01149 // Move value into target register (32->32) 01150 BuildMI(BB, PPC::OR, 2, targetReg).addReg(Reg).addReg(Reg); 01151 break; 01152 default: 01153 assert(0 && "Unpromotable operand class in promote32"); 01154 } 01155 } 01156 01157 /// visitReturnInst - implemented with BLR 01158 /// 01159 void PPC64ISel::visitReturnInst(ReturnInst &I) { 01160 // Only do the processing if this is a non-void return 01161 if (I.getNumOperands() > 0) { 01162 Value *RetVal = I.getOperand(0); 01163 switch (getClassB(RetVal->getType())) { 01164 case cByte: // integral return values: extend or move into r3 and return 01165 case cShort: 01166 case cInt: 01167 case cLong: 01168 promote32(PPC::R3, ValueRecord(RetVal)); 01169 break; 01170 case cFP32: 01171 case cFP64: { // Floats & Doubles: Return in f1 01172 unsigned RetReg = getReg(RetVal); 01173 BuildMI(BB, PPC::FMR, 1, PPC::F1).addReg(RetReg); 01174 break; 01175 } 01176 default: 01177 visitInstruction(I); 01178 } 01179 } 01180 BuildMI(BB, PPC::BLR, 1).addImm(1); 01181 } 01182 01183 // getBlockAfter - Return the basic block which occurs lexically after the 01184 // specified one. 01185 static inline BasicBlock *getBlockAfter(BasicBlock *BB) { 01186 Function::iterator I = BB; ++I; // Get iterator to next block 01187 return I != BB->getParent()->end() ? &*I : 0; 01188 } 01189 01190 /// visitBranchInst - Handle conditional and unconditional branches here. Note 01191 /// that since code layout is frozen at this point, that if we are trying to 01192 /// jump to a block that is the immediate successor of the current block, we can 01193 /// just make a fall-through (but we don't currently). 01194 /// 01195 void PPC64ISel::visitBranchInst(BranchInst &BI) { 01196 // Update machine-CFG edges 01197 BB->addSuccessor(MBBMap[BI.getSuccessor(0)]); 01198 if (BI.isConditional()) 01199 BB->addSuccessor(MBBMap[BI.getSuccessor(1)]); 01200 01201 BasicBlock *NextBB = getBlockAfter(BI.getParent()); // BB after current one 01202 01203 if (!BI.isConditional()) { // Unconditional branch? 01204 if (BI.getSuccessor(0) != NextBB) 01205 BuildMI(BB, PPC::B, 1).addMBB(MBBMap[BI.getSuccessor(0)]); 01206 return; 01207 } 01208 01209 // See if we can fold the setcc into the branch itself... 01210 SetCondInst *SCI = canFoldSetCCIntoBranchOrSelect(BI.getCondition()); 01211 if (SCI == 0) { 01212 // Nope, cannot fold setcc into this branch. Emit a branch on a condition 01213 // computed some other way... 01214 unsigned condReg = getReg(BI.getCondition()); 01215 BuildMI(BB, PPC::CMPLI, 3, PPC::CR0).addImm(0).addReg(condReg) 01216 .addImm(0); 01217 if (BI.getSuccessor(1) == NextBB) { 01218 if (BI.getSuccessor(0) != NextBB) 01219 BuildMI(BB, PPC::COND_BRANCH, 3).addReg(PPC::CR0).addImm(PPC::BNE) 01220 .addMBB(MBBMap[BI.getSuccessor(0)]) 01221 .addMBB(MBBMap[BI.getSuccessor(1)]); 01222 } else { 01223 BuildMI(BB, PPC::COND_BRANCH, 3).addReg(PPC::CR0).addImm(PPC::BEQ) 01224 .addMBB(MBBMap[BI.getSuccessor(1)]) 01225 .addMBB(MBBMap[BI.getSuccessor(0)]); 01226 if (BI.getSuccessor(0) != NextBB) 01227 BuildMI(BB, PPC::B, 1).addMBB(MBBMap[BI.getSuccessor(0)]); 01228 } 01229 return; 01230 } 01231 01232 unsigned OpNum = getSetCCNumber(SCI->getOpcode()); 01233 unsigned Opcode = getPPCOpcodeForSetCCNumber(SCI->getOpcode()); 01234 MachineBasicBlock::iterator MII = BB->end(); 01235 OpNum = EmitComparison(OpNum, SCI->getOperand(0), SCI->getOperand(1), BB,MII); 01236 01237 if (BI.getSuccessor(0) != NextBB) { 01238 BuildMI(BB, PPC::COND_BRANCH, 3).addReg(PPC::CR0).addImm(Opcode) 01239 .addMBB(MBBMap[BI.getSuccessor(0)]) 01240 .addMBB(MBBMap[BI.getSuccessor(1)]); 01241 if (BI.getSuccessor(1) != NextBB) 01242 BuildMI(BB, PPC::B, 1).addMBB(MBBMap[BI.getSuccessor(1)]); 01243 } else { 01244 // Change to the inverse condition... 01245 if (BI.getSuccessor(1) != NextBB) { 01246 Opcode = PPC64InstrInfo::invertPPCBranchOpcode(Opcode); 01247 BuildMI(BB, PPC::COND_BRANCH, 3).addReg(PPC::CR0).addImm(Opcode) 01248 .addMBB(MBBMap[BI.getSuccessor(1)]) 01249 .addMBB(MBBMap[BI.getSuccessor(0)]); 01250 } 01251 } 01252 } 01253 01254 /// doCall - This emits an abstract call instruction, setting up the arguments 01255 /// and the return value as appropriate. For the actual function call itself, 01256 /// it inserts the specified CallMI instruction into the stream. 01257 /// 01258 void PPC64ISel::doCall(const ValueRecord &Ret, MachineInstr *CallMI, 01259 const std::vector<ValueRecord> &Args, bool isVarArg) { 01260 // Count how many bytes are to be pushed on the stack, including the linkage 01261 // area, and parameter passing area. 01262 unsigned NumBytes = ParameterSaveAreaOffset; 01263 unsigned ArgOffset = ParameterSaveAreaOffset; 01264 01265 if (!Args.empty()) { 01266 for (unsigned i = 0, e = Args.size(); i != e; ++i) 01267 switch (getClassB(Args[i].Ty)) { 01268 case cByte: case cShort: case cInt: 01269 NumBytes += 4; break; 01270 case cLong: 01271 NumBytes += 8; break; 01272 case cFP32: 01273 NumBytes += 4; break; 01274 case cFP64: 01275 NumBytes += 8; break; 01276 break; 01277 default: assert(0 && "Unknown class!"); 01278 } 01279 01280 // Just to be safe, we'll always reserve the full argument passing space in 01281 // case any called code gets funky on us. 01282 if (NumBytes < ParameterSaveAreaOffset + MaxArgumentStackSpace) 01283 NumBytes = ParameterSaveAreaOffset + MaxArgumentStackSpace; 01284 01285 // Adjust the stack pointer for the new arguments... 01286 // These functions are automatically eliminated by the prolog/epilog pass 01287 BuildMI(BB, PPC::ADJCALLSTACKDOWN, 1).addImm(NumBytes); 01288 01289 // Arguments go on the stack in reverse order, as specified by the ABI. 01290 int GPR_remaining = 8, FPR_remaining = 13; 01291 unsigned GPR_idx = 0, FPR_idx = 0; 01292 static const unsigned GPR[] = { 01293 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 01294 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 01295 }; 01296 static const unsigned FPR[] = { 01297 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, 01298 PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, 01299 PPC::F13 01300 }; 01301 01302 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 01303 unsigned ArgReg; 01304 switch (getClassB(Args[i].Ty)) { 01305 case cByte: 01306 case cShort: 01307 // Promote arg to 32 bits wide into a temporary register... 01308 ArgReg = makeAnotherReg(Type::UIntTy); 01309 promote32(ArgReg, Args[i]); 01310 01311 // Reg or stack? 01312 if (GPR_remaining > 0) { 01313 BuildMI(BB, PPC::OR, 2, GPR[GPR_idx]).addReg(ArgReg) 01314 .addReg(ArgReg); 01315 CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use); 01316 } 01317 if (GPR_remaining <= 0 || isVarArg) { 01318 BuildMI(BB, PPC::STW, 3).addReg(ArgReg).addSImm(ArgOffset) 01319 .addReg(PPC::R1); 01320 } 01321 break; 01322 case cInt: 01323 ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; 01324 01325 // Reg or stack? 01326 if (GPR_remaining > 0) { 01327 BuildMI(BB, PPC::OR, 2, GPR[GPR_idx]).addReg(ArgReg) 01328 .addReg(ArgReg); 01329 CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use); 01330 } 01331 if (GPR_remaining <= 0 || isVarArg) { 01332 BuildMI(BB, PPC::STW, 3).addReg(ArgReg).addSImm(ArgOffset) 01333 .addReg(PPC::R1); 01334 } 01335 break; 01336 case cLong: 01337 ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; 01338 01339 // Reg or stack? 01340 if (GPR_remaining > 0) { 01341 BuildMI(BB, PPC::OR, 2, GPR[GPR_idx]).addReg(ArgReg) 01342 .addReg(ArgReg); 01343 CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use); 01344 } 01345 if (GPR_remaining <= 0 || isVarArg) { 01346 BuildMI(BB, PPC::STD, 3).addReg(ArgReg).addSImm(ArgOffset) 01347 .addReg(PPC::R1); 01348 } 01349 ArgOffset += 4; // 8 byte entry, not 4. 01350 break; 01351 case cFP32: 01352 ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; 01353 // Reg or stack? 01354 if (FPR_remaining > 0) { 01355 BuildMI(BB, PPC::FMR, 1, FPR[FPR_idx]).addReg(ArgReg); 01356 CallMI->addRegOperand(FPR[FPR_idx], MachineOperand::Use); 01357 FPR_remaining--; 01358 FPR_idx++; 01359 01360 // If this is a vararg function, and there are GPRs left, also 01361 // pass the float in an int. Otherwise, put it on the stack. 01362 if (isVarArg) { 01363 BuildMI(BB, PPC::STFS, 3).addReg(ArgReg).addSImm(ArgOffset) 01364 .addReg(PPC::R1); 01365 if (GPR_remaining > 0) { 01366 BuildMI(BB, PPC::LWZ, 2, GPR[GPR_idx]) 01367 .addSImm(ArgOffset).addReg(ArgReg); 01368 CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use); 01369 } 01370 } 01371 } else { 01372 BuildMI(BB, PPC::STFS, 3).addReg(ArgReg).addSImm(ArgOffset) 01373 .addReg(PPC::R1); 01374 } 01375 break; 01376 case cFP64: 01377 ArgReg = Args[i].Val ? getReg(Args[i].Val) : Args[i].Reg; 01378 // Reg or stack? 01379 if (FPR_remaining > 0) { 01380 BuildMI(BB, PPC::FMR, 1, FPR[FPR_idx]).addReg(ArgReg); 01381 CallMI->addRegOperand(FPR[FPR_idx], MachineOperand::Use); 01382 FPR_remaining--; 01383 FPR_idx++; 01384 // For vararg functions, must pass doubles via int regs as well 01385 if (isVarArg) { 01386 BuildMI(BB, PPC::STFD, 3).addReg(ArgReg).addSImm(ArgOffset) 01387 .addReg(PPC::R1); 01388 01389 if (GPR_remaining > 0) { 01390 BuildMI(BB, PPC::LD, 2, GPR[GPR_idx]).addSImm(ArgOffset) 01391 .addReg(PPC::R1); 01392 CallMI->addRegOperand(GPR[GPR_idx], MachineOperand::Use); 01393 } 01394 } 01395 } else { 01396 BuildMI(BB, PPC::STFD, 3).addReg(ArgReg).addSImm(ArgOffset) 01397 .addReg(PPC::R1); 01398 } 01399 // Doubles use 8 bytes 01400 ArgOffset += 4; 01401 break; 01402 01403 default: assert(0 && "Unknown class!"); 01404 } 01405 ArgOffset += 4; 01406 GPR_remaining--; 01407 GPR_idx++; 01408 } 01409 } else { 01410 BuildMI(BB, PPC::ADJCALLSTACKDOWN, 1).addImm(0); 01411 } 01412 01413 BuildMI(BB, PPC::IMPLICIT_DEF, 0, PPC::LR); 01414 BB->push_back(CallMI); 01415 BuildMI(BB, PPC::NOP, 0); 01416 01417 // These functions are automatically eliminated by the prolog/epilog pass 01418 BuildMI(BB, PPC::ADJCALLSTACKUP, 1).addImm(NumBytes); 01419 01420 // If there is a return value, scavenge the result from the location the call 01421 // leaves it in... 01422 // 01423 if (Ret.Ty != Type::VoidTy) { 01424 unsigned DestClass = getClassB(Ret.Ty); 01425 switch (DestClass) { 01426 case cByte: 01427 case cShort: 01428 case cInt: 01429 case cLong: 01430 // Integral results are in r3 01431 BuildMI(BB, PPC::OR, 2, Ret.Reg).addReg(PPC::R3).addReg(PPC::R3); 01432 break; 01433 case cFP32: // Floating-point return values live in f1 01434 case cFP64: 01435 BuildMI(BB, PPC::FMR, 1, Ret.Reg).addReg(PPC::F1); 01436 break; 01437 default: assert(0 && "Unknown class!"); 01438 } 01439 } 01440 } 01441 01442 01443 /// visitCallInst - Push args on stack and do a procedure call instruction. 01444 void PPC64ISel::visitCallInst(CallInst &CI) { 01445 MachineInstr *TheCall; 01446 Function *F = CI.getCalledFunction(); 01447 if (F) { 01448 // Is it an intrinsic function call? 01449 if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) { 01450 visitIntrinsicCall(ID, CI); // Special intrinsics are not handled here 01451 return; 01452 } 01453 // Emit a CALL instruction with PC-relative displacement. 01454 TheCall = BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(F, true); 01455 } else { // Emit an indirect call through the CTR 01456 unsigned Reg = getReg(CI.getCalledValue()); 01457 BuildMI(BB, PPC::MTCTR, 1).addReg(Reg); 01458 TheCall = BuildMI(PPC::CALLindirect, 2).addZImm(20).addZImm(0); 01459 } 01460 01461 std::vector<ValueRecord> Args; 01462 for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) 01463 Args.push_back(ValueRecord(CI.getOperand(i))); 01464 01465 unsigned DestReg = CI.getType() != Type::VoidTy ? getReg(CI) : 0; 01466 bool isVarArg = F ? F->getFunctionType()->isVarArg() : true; 01467 doCall(ValueRecord(DestReg, CI.getType()), TheCall, Args, isVarArg); 01468 } 01469 01470 01471 /// dyncastIsNan - Return the operand of an isnan operation if this is an isnan. 01472 /// 01473 static Value *dyncastIsNan(Value *V) { 01474 if (CallInst *CI = dyn_cast<CallInst>(V)) 01475 if (Function *F = CI->getCalledFunction()) 01476 if (F->getIntrinsicID() == Intrinsic::isunordered) 01477 return CI->getOperand(1); 01478 return 0; 01479 } 01480 01481 /// isOnlyUsedByUnorderedComparisons - Return true if this value is only used by 01482 /// or's whos operands are all calls to the isnan predicate. 01483 static bool isOnlyUsedByUnorderedComparisons(Value *V) { 01484 assert(dyncastIsNan(V) && "The value isn't an isnan call!"); 01485 01486 // Check all uses, which will be or's of isnans if this predicate is true. 01487 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 01488 Instruction *I = cast<Instruction>(*UI); 01489 if (I->getOpcode() != Instruction::Or) return false; 01490 if (I->getOperand(0) != V && !dyncastIsNan(I->getOperand(0))) return false; 01491 if (I->getOperand(1) != V && !dyncastIsNan(I->getOperand(1))) return false; 01492 } 01493 01494 return true; 01495 } 01496 01497 /// LowerUnknownIntrinsicFunctionCalls - This performs a prepass over the 01498 /// function, lowering any calls to unknown intrinsic functions into the 01499 /// equivalent LLVM code. 01500 /// 01501 void PPC64ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) { 01502 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) 01503 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) 01504 if (CallInst *CI = dyn_cast<CallInst>(I++)) 01505 if (Function *F = CI->getCalledFunction()) 01506 switch (F->getIntrinsicID()) { 01507 case Intrinsic::not_intrinsic: 01508 case Intrinsic::vastart: 01509 case Intrinsic::vacopy: 01510 case Intrinsic::vaend: 01511 case Intrinsic::returnaddress: 01512 case Intrinsic::frameaddress: 01513 // FIXME: should lower these ourselves 01514 // case Intrinsic::isunordered: 01515 // case Intrinsic::memcpy: -> doCall(). system memcpy almost 01516 // guaranteed to be faster than anything we generate ourselves 01517 // We directly implement these intrinsics 01518 break; 01519 case Intrinsic::readio: { 01520 // On PPC, memory operations are in-order. Lower this intrinsic 01521 // into a volatile load. 01522 Instruction *Before = CI->getPrev(); 01523 LoadInst * LI = new LoadInst(CI->getOperand(1), "", true, CI); 01524 CI->replaceAllUsesWith(LI); 01525 BB->getInstList().erase(CI); 01526 break; 01527 } 01528 case Intrinsic::writeio: { 01529 // On PPC, memory operations are in-order. Lower this intrinsic 01530 // into a volatile store. 01531 Instruction *Before = CI->getPrev(); 01532 StoreInst *SI = new StoreInst(CI->getOperand(1), 01533 CI->getOperand(2), true, CI); 01534 CI->replaceAllUsesWith(SI); 01535 BB->getInstList().erase(CI); 01536 break; 01537 } 01538 default: 01539 // All other intrinsic calls we must lower. 01540 Instruction *Before = CI->getPrev(); 01541 TM.getIntrinsicLowering().LowerIntrinsicCall(CI); 01542 if (Before) { // Move iterator to instruction after call 01543 I = Before; ++I; 01544 } else { 01545 I = BB->begin(); 01546 } 01547 } 01548 } 01549 01550 void PPC64ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { 01551 unsigned TmpReg1, TmpReg2, TmpReg3; 01552 switch (ID) { 01553 case Intrinsic::vastart: 01554 // Get the address of the first vararg value... 01555 TmpReg1 = getReg(CI); 01556 addFrameReference(BuildMI(BB, PPC::ADDI, 2, TmpReg1), VarArgsFrameIndex, 01557 0, false); 01558 return; 01559 01560 case Intrinsic::vacopy: 01561 TmpReg1 = getReg(CI); 01562 TmpReg2 = getReg(CI.getOperand(1)); 01563 BuildMI(BB, PPC::OR, 2, TmpReg1).addReg(TmpReg2).addReg(TmpReg2); 01564 return; 01565 case Intrinsic::vaend: return; 01566 01567 case Intrinsic::returnaddress: 01568 TmpReg1 = getReg(CI); 01569 if (cast<Constant>(CI.getOperand(1))->isNullValue()) { 01570 MachineFrameInfo *MFI = F->getFrameInfo(); 01571 unsigned NumBytes = MFI->getStackSize(); 01572 01573 BuildMI(BB, PPC::LWZ, 2, TmpReg1).addSImm(NumBytes+8) 01574 .addReg(PPC::R1); 01575 } else { 01576 // Values other than zero are not implemented yet. 01577 BuildMI(BB, PPC::LI, 1, TmpReg1).addSImm(0); 01578 } 01579 return; 01580 01581 case Intrinsic::frameaddress: 01582 TmpReg1 = getReg(CI); 01583 if (cast<Constant>(CI.getOperand(1))->isNullValue()) { 01584 BuildMI(BB, PPC::OR, 2, TmpReg1).addReg(PPC::R1).addReg(PPC::R1); 01585 } else { 01586 // Values other than zero are not implemented yet. 01587 BuildMI(BB, PPC::LI, 1, TmpReg1).addSImm(0); 01588 } 01589 return; 01590 01591 #if 0 01592 // This may be useful for supporting isunordered 01593 case Intrinsic::isnan: 01594 // If this is only used by 'isunordered' style comparisons, don't emit it. 01595 if (isOnlyUsedByUnorderedComparisons(&CI)) return; 01596 TmpReg1 = getReg(CI.getOperand(1)); 01597 emitUCOM(BB, BB->end(), TmpReg1, TmpReg1); 01598 TmpReg2 = makeAnotherReg(Type::IntTy); 01599 BuildMI(BB, PPC::MFCR, TmpReg2); 01600 TmpReg3 = getReg(CI); 01601 BuildMI(BB, PPC::RLWINM, 4, TmpReg3).addReg(TmpReg2).addImm(4).addImm(31).addImm(31); 01602 return; 01603 #endif 01604 01605 default: assert(0 && "Error: unknown intrinsics should have been lowered!"); 01606 } 01607 } 01608 01609 /// visitSimpleBinary - Implement simple binary operators for integral types... 01610 /// OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for Or, 4 for 01611 /// Xor. 01612 /// 01613 void PPC64ISel::visitSimpleBinary(BinaryOperator &B, unsigned OperatorClass) { 01614 unsigned DestReg = getReg(B); 01615 MachineBasicBlock::iterator MI = BB->end(); 01616 Value *Op0 = B.getOperand(0), *Op1 = B.getOperand(1); 01617 unsigned Class = getClassB(B.getType()); 01618 01619 emitSimpleBinaryOperation(BB, MI, Op0, Op1, OperatorClass, DestReg); 01620 } 01621 01622 /// emitBinaryFPOperation - This method handles emission of floating point 01623 /// Add (0), Sub (1), Mul (2), and Div (3) operations. 01624 void PPC64ISel::emitBinaryFPOperation(MachineBasicBlock *BB, 01625 MachineBasicBlock::iterator IP, 01626 Value *Op0, Value *Op1, 01627 unsigned OperatorClass, unsigned DestReg){ 01628 01629 static const unsigned OpcodeTab[][4] = { 01630 { PPC::FADDS, PPC::FSUBS, PPC::FMULS, PPC::FDIVS }, // Float 01631 { PPC::FADD, PPC::FSUB, PPC::FMUL, PPC::FDIV }, // Double 01632 }; 01633 01634 // Special case: R1 = op <const fp>, R2 01635 if (ConstantFP *Op0C = dyn_cast<ConstantFP>(Op0)) 01636 if (Op0C->isExactlyValue(-0.0) && OperatorClass == 1) { 01637 // -0.0 - X === -X 01638 unsigned op1Reg = getReg(Op1, BB, IP); 01639 BuildMI(*BB, IP, PPC::FNEG, 1, DestReg).addReg(op1Reg); 01640 return; 01641 } 01642 01643 unsigned Opcode = OpcodeTab[Op0->getType() == Type::DoubleTy][OperatorClass]; 01644 unsigned Op0r = getReg(Op0, BB, IP); 01645 unsigned Op1r = getReg(Op1, BB, IP); 01646 BuildMI(*BB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); 01647 } 01648 01649 /// emitSimpleBinaryOperation - Implement simple binary operators for integral 01650 /// types... OperatorClass is one of: 0 for Add, 1 for Sub, 2 for And, 3 for 01651 /// Or, 4 for Xor. 01652 /// 01653 /// emitSimpleBinaryOperation - Common code shared between visitSimpleBinary 01654 /// and constant expression support. 01655 /// 01656 void PPC64ISel::emitSimpleBinaryOperation(MachineBasicBlock *MBB, 01657 MachineBasicBlock::iterator IP, 01658 Value *Op0, Value *Op1, 01659 unsigned OperatorClass, 01660 unsigned DestReg) { 01661 unsigned Class = getClassB(Op0->getType()); 01662 01663 // Arithmetic and Bitwise operators 01664 static const unsigned OpcodeTab[] = { 01665 PPC::ADD, PPC::SUB, PPC::AND, PPC::OR, PPC::XOR 01666 }; 01667 // FIXME: Convert this to the version from PPC32ISel 01668 static const unsigned ImmOpcodeTab[] = { 01669 PPC::ADDI, PPC::ADDI, PPC::ANDIo, PPC::ORI, PPC::XORI 01670 }; 01671 static const unsigned RImmOpcodeTab[] = { 01672 PPC::ADDI, PPC::SUBFIC, PPC::ANDIo, PPC::ORI, PPC::XORI 01673 }; 01674 01675 if (Class == cFP32 || Class == cFP64) { 01676 assert(OperatorClass < 2 && "No logical ops for FP!"); 01677 emitBinaryFPOperation(MBB, IP, Op0, Op1, OperatorClass, DestReg); 01678 return; 01679 } 01680 01681 if (Op0->getType() == Type::BoolTy) { 01682 if (OperatorClass == 3) 01683 // If this is an or of two isnan's, emit an FP comparison directly instead 01684 // of or'ing two isnan's together. 01685 if (Value *LHS = dyncastIsNan(Op0)) 01686 if (Value *RHS = dyncastIsNan(Op1)) { 01687 unsigned Op0Reg = getReg(RHS, MBB, IP), Op1Reg = getReg(LHS, MBB, IP); 01688 unsigned TmpReg = makeAnotherReg(Type::IntTy); 01689 emitUCOM(MBB, IP, Op0Reg, Op1Reg); 01690 BuildMI(*MBB, IP, PPC::MFCR, TmpReg); 01691 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(TmpReg).addImm(4) 01692 .addImm(31).addImm(31); 01693 return; 01694 } 01695 } 01696 01697 // Special case: op <const int>, Reg 01698 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0)) { 01699 // sub 0, X -> subfic 01700 if (OperatorClass == 1 && canUseAsImmediateForOpcode(CI, 0)) { 01701 unsigned Op1r = getReg(Op1, MBB, IP); 01702 int imm = CI->getRawValue() & 0xFFFF; 01703 BuildMI(*MBB, IP, PPC::SUBFIC, 2, DestReg).addReg(Op1r).addSImm(imm); 01704 return; 01705 } 01706 01707 // If it is easy to do, swap the operands and emit an immediate op 01708 if (Class != cLong && OperatorClass != 1 && 01709 canUseAsImmediateForOpcode(CI, OperatorClass)) { 01710 unsigned Op1r = getReg(Op1, MBB, IP); 01711 int imm = CI->getRawValue() & 0xFFFF; 01712 01713 if (OperatorClass < 2) 01714 BuildMI(*MBB, IP, RImmOpcodeTab[OperatorClass], 2, DestReg).addReg(Op1r) 01715 .addSImm(imm); 01716 else 01717 BuildMI(*MBB, IP, RImmOpcodeTab[OperatorClass], 2, DestReg).addReg(Op1r) 01718 .addZImm(imm); 01719 return; 01720 } 01721 } 01722 01723 // Special case: op Reg, <const int> 01724 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 01725 unsigned Op0r = getReg(Op0, MBB, IP); 01726 01727 // xor X, -1 -> not X 01728 if (OperatorClass == 4 && Op1C->isAllOnesValue()) { 01729 BuildMI(*MBB, IP, PPC::NOR, 2, DestReg).addReg(Op0r).addReg(Op0r); 01730 return; 01731 } 01732 01733 if (canUseAsImmediateForOpcode(Op1C, OperatorClass)) { 01734 int immediate = Op1C->getRawValue() & 0xFFFF; 01735 01736 if (OperatorClass < 2) 01737 BuildMI(*MBB, IP, ImmOpcodeTab[OperatorClass], 2,DestReg).addReg(Op0r) 01738 .addSImm(immediate); 01739 else 01740 BuildMI(*MBB, IP, ImmOpcodeTab[OperatorClass], 2,DestReg).addReg(Op0r) 01741 .addZImm(immediate); 01742 } else { 01743 unsigned Op1r = getReg(Op1, MBB, IP); 01744 BuildMI(*MBB, IP, OpcodeTab[OperatorClass], 2, DestReg).addReg(Op0r) 01745 .addReg(Op1r); 01746 } 01747 return; 01748 } 01749 01750 // We couldn't generate an immediate variant of the op, load both halves into 01751 // registers and emit the appropriate opcode. 01752 unsigned Op0r = getReg(Op0, MBB, IP); 01753 unsigned Op1r = getReg(Op1, MBB, IP); 01754 unsigned Opcode = OpcodeTab[OperatorClass]; 01755 BuildMI(*MBB, IP, Opcode, 2, DestReg).addReg(Op0r).addReg(Op1r); 01756 } 01757 01758 // ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It 01759 // returns zero when the input is not exactly a power of two. 01760 static unsigned ExactLog2(unsigned Val) { 01761 if (Val == 0 || (Val & (Val-1))) return 0; 01762 unsigned Count = 0; 01763 while (Val != 1) { 01764 Val >>= 1; 01765 ++Count; 01766 } 01767 return Count; 01768 } 01769 01770 /// doMultiply - Emit appropriate instructions to multiply together the 01771 /// Values Op0 and Op1, and put the result in DestReg. 01772 /// 01773 void PPC64ISel::doMultiply(MachineBasicBlock *MBB, 01774 MachineBasicBlock::iterator IP, 01775 unsigned DestReg, Value *Op0, Value *Op1) { 01776 unsigned Class0 = getClass(Op0->getType()); 01777 unsigned Class1 = getClass(Op1->getType()); 01778 01779 unsigned Op0r = getReg(Op0, MBB, IP); 01780 unsigned Op1r = getReg(Op1, MBB, IP); 01781 01782 // 64 x 64 -> 64 01783 if (Class0 == cLong && Class1 == cLong) { 01784 BuildMI(*MBB, IP, PPC::MULLD, 2, DestReg).addReg(Op0r).addReg(Op1r); 01785 return; 01786 } 01787 01788 // 64 x 32 or less, promote 32 to 64 and do a 64 x 64 01789 if (Class0 == cLong && Class1 <= cInt) { 01790 // FIXME: CLEAR or SIGN EXTEND Op1 01791 BuildMI(*MBB, IP, PPC::MULLD, 2, DestReg).addReg(Op0r).addReg(Op1r); 01792 return; 01793 } 01794 01795 // 32 x 32 -> 32 01796 if (Class0 <= cInt && Class1 <= cInt) { 01797 BuildMI(*MBB, IP, PPC::MULLW, 2, DestReg).addReg(Op0r).addReg(Op1r); 01798 return; 01799 } 01800 01801 assert(0 && "doMultiply cannot operate on unknown type!"); 01802 } 01803 01804 /// doMultiplyConst - This method will multiply the value in Op0 by the 01805 /// value of the ContantInt *CI 01806 void PPC64ISel::doMultiplyConst(MachineBasicBlock *MBB, 01807 MachineBasicBlock::iterator IP, 01808 unsigned DestReg, Value *Op0, ConstantInt *CI) { 01809 unsigned Class = getClass(Op0->getType()); 01810 01811 // Mul op0, 0 ==> 0 01812 if (CI->isNullValue()) { 01813 BuildMI(*MBB, IP, PPC::LI, 1, DestReg).addSImm(0); 01814 return; 01815 } 01816 01817 // Mul op0, 1 ==> op0 01818 if (CI->equalsInt(1)) { 01819 unsigned Op0r = getReg(Op0, MBB, IP); 01820 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(Op0r).addReg(Op0r); 01821 return; 01822 } 01823 01824 // If the element size is exactly a power of 2, use a shift to get it. 01825 if (unsigned Shift = ExactLog2(CI->getRawValue())) { 01826 ConstantUInt *ShiftCI = ConstantUInt::get(Type::UByteTy, Shift); 01827 emitShiftOperation(MBB, IP, Op0, ShiftCI, true, Op0->getType(), DestReg); 01828 return; 01829 } 01830 01831 // If 32 bits or less and immediate is in right range, emit mul by immediate 01832 if (Class == cByte || Class == cShort || Class == cInt) { 01833 if (canUseAsImmediateForOpcode(CI, 0)) { 01834 unsigned Op0r = getReg(Op0, MBB, IP); 01835 unsigned imm = CI->getRawValue() & 0xFFFF; 01836 BuildMI(*MBB, IP, PPC::MULLI, 2, DestReg).addReg(Op0r).addSImm(imm); 01837 return; 01838 } 01839 } 01840 01841 doMultiply(MBB, IP, DestReg, Op0, CI); 01842 } 01843 01844 void PPC64ISel::visitMul(BinaryOperator &I) { 01845 unsigned ResultReg = getReg(I); 01846 01847 Value *Op0 = I.getOperand(0); 01848 Value *Op1 = I.getOperand(1); 01849 01850 MachineBasicBlock::iterator IP = BB->end(); 01851 emitMultiply(BB, IP, Op0, Op1, ResultReg); 01852 } 01853 01854 void PPC64ISel::emitMultiply(MachineBasicBlock *MBB, 01855 MachineBasicBlock::iterator IP, 01856 Value *Op0, Value *Op1, unsigned DestReg) { 01857 TypeClass Class = getClass(Op0->getType()); 01858 01859 switch (Class) { 01860 case cByte: 01861 case cShort: 01862 case cInt: 01863 case cLong: 01864 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { 01865 doMultiplyConst(MBB, IP, DestReg, Op0, CI); 01866 } else { 01867 doMultiply(MBB, IP, DestReg, Op0, Op1); 01868 } 01869 return; 01870 case cFP32: 01871 case cFP64: 01872 emitBinaryFPOperation(MBB, IP, Op0, Op1, 2, DestReg); 01873 return; 01874 break; 01875 } 01876 } 01877 01878 01879 /// visitDivRem - Handle division and remainder instructions... these 01880 /// instruction both require the same instructions to be generated, they just 01881 /// select the result from a different register. Note that both of these 01882 /// instructions work differently for signed and unsigned operands. 01883 /// 01884 void PPC64ISel::visitDivRem(BinaryOperator &I) { 01885 unsigned ResultReg = getReg(I); 01886 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 01887 01888 MachineBasicBlock::iterator IP = BB->end(); 01889 emitDivRemOperation(BB, IP, Op0, Op1, I.getOpcode() == Instruction::Div, 01890 ResultReg); 01891 } 01892 01893 void PPC64ISel::emitDivRemOperation(MachineBasicBlock *BB, 01894 MachineBasicBlock::iterator IP, 01895 Value *Op0, Value *Op1, bool isDiv, 01896 unsigned ResultReg) { 01897 const Type *Ty = Op0->getType(); 01898 unsigned Class = getClass(Ty); 01899 switch (Class) { 01900 case cFP32: 01901 if (isDiv) { 01902 // Floating point divide... 01903 emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg); 01904 return; 01905 } else { 01906 // Floating point remainder via fmodf(float x, float y); 01907 unsigned Op0Reg = getReg(Op0, BB, IP); 01908 unsigned Op1Reg = getReg(Op1, BB, IP); 01909 MachineInstr *TheCall = 01910 BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(fmodfFn, true); 01911 std::vector<ValueRecord> Args; 01912 Args.push_back(ValueRecord(Op0Reg, Type::FloatTy)); 01913 Args.push_back(ValueRecord(Op1Reg, Type::FloatTy)); 01914 doCall(ValueRecord(ResultReg, Type::FloatTy), TheCall, Args, false); 01915 } 01916 return; 01917 case cFP64: 01918 if (isDiv) { 01919 // Floating point divide... 01920 emitBinaryFPOperation(BB, IP, Op0, Op1, 3, ResultReg); 01921 return; 01922 } else { 01923 // Floating point remainder via fmod(double x, double y); 01924 unsigned Op0Reg = getReg(Op0, BB, IP); 01925 unsigned Op1Reg = getReg(Op1, BB, IP); 01926 MachineInstr *TheCall = 01927 BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(fmodFn, true); 01928 std::vector<ValueRecord> Args; 01929 Args.push_back(ValueRecord(Op0Reg, Type::DoubleTy)); 01930 Args.push_back(ValueRecord(Op1Reg, Type::DoubleTy)); 01931 doCall(ValueRecord(ResultReg, Type::DoubleTy), TheCall, Args, false); 01932 } 01933 return; 01934 case cLong: case cByte: case cShort: case cInt: 01935 break; // Small integrals, handled below... 01936 default: assert(0 && "Unknown class!"); 01937 } 01938 01939 // Special case signed division by power of 2. 01940 if (isDiv) 01941 if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1)) { 01942 assert(Class != cLong && "This doesn't handle 64-bit divides!"); 01943 int V = CI->getValue(); 01944 01945 if (V == 1) { // X /s 1 => X 01946 unsigned Op0Reg = getReg(Op0, BB, IP); 01947 BuildMI(*BB, IP, PPC::OR, 2, ResultReg).addReg(Op0Reg).addReg(Op0Reg); 01948 return; 01949 } 01950 01951 if (V == -1) { // X /s -1 => -X 01952 unsigned Op0Reg = getReg(Op0, BB, IP); 01953 BuildMI(*BB, IP, PPC::NEG, 1, ResultReg).addReg(Op0Reg); 01954 return; 01955 } 01956 01957 unsigned log2V = ExactLog2(V); 01958 if (log2V != 0 && Ty->isSigned()) { 01959 unsigned Op0Reg = getReg(Op0, BB, IP); 01960 unsigned TmpReg = makeAnotherReg(Op0->getType()); 01961 unsigned Opcode = Class == cLong ? PPC::SRADI : PPC::SRAWI; 01962 01963 BuildMI(*BB, IP, Opcode, 2, TmpReg).addReg(Op0Reg).addImm(log2V); 01964 BuildMI(*BB, IP, PPC::ADDZE, 1, ResultReg).addReg(TmpReg); 01965 return; 01966 } 01967 } 01968 01969 static const unsigned DivOpcodes[] = 01970 { PPC::DIVWU, PPC::DIVW, PPC::DIVDU, PPC::DIVD }; 01971 01972 unsigned Op0Reg = getReg(Op0, BB, IP); 01973 unsigned Op1Reg = getReg(Op1, BB, IP); 01974 unsigned Opcode = DivOpcodes[2*(Class == cLong) + Ty->isSigned()]; 01975 01976 if (isDiv) { 01977 BuildMI(*BB, IP, Opcode, 2, ResultReg).addReg(Op0Reg).addReg(Op1Reg); 01978 } else { // Remainder 01979 unsigned TmpReg1 = makeAnotherReg(Op0->getType()); 01980 unsigned TmpReg2 = makeAnotherReg(Op0->getType()); 01981 unsigned MulOpcode = Class == cLong ? PPC::MULLD : PPC::MULLW; 01982 01983 BuildMI(*BB, IP, Opcode, 2, TmpReg1).addReg(Op0Reg).addReg(Op1Reg); 01984 BuildMI(*BB, IP, MulOpcode, 2, TmpReg2).addReg(TmpReg1).addReg(Op1Reg); 01985 BuildMI(*BB, IP, PPC::SUBF, 2, ResultReg).addReg(TmpReg2).addReg(Op0Reg); 01986 } 01987 } 01988 01989 01990 /// Shift instructions: 'shl', 'sar', 'shr' - Some special cases here 01991 /// for constant immediate shift values, and for constant immediate 01992 /// shift values equal to 1. Even the general case is sort of special, 01993 /// because the shift amount has to be in CL, not just any old register. 01994 /// 01995 void PPC64ISel::visitShiftInst(ShiftInst &I) { 01996 MachineBasicBlock::iterator IP = BB->end(); 01997 emitShiftOperation(BB, IP, I.getOperand(0), I.getOperand(1), 01998 I.getOpcode() == Instruction::Shl, I.getType(), 01999 getReg(I)); 02000 } 02001 02002 /// emitShiftOperation - Common code shared between visitShiftInst and 02003 /// constant expression support. 02004 /// 02005 void PPC64ISel::emitShiftOperation(MachineBasicBlock *MBB, 02006 MachineBasicBlock::iterator IP, 02007 Value *Op, Value *ShiftAmount, 02008 bool isLeftShift, const Type *ResultTy, 02009 unsigned DestReg) { 02010 unsigned SrcReg = getReg (Op, MBB, IP); 02011 bool isSigned = ResultTy->isSigned (); 02012 unsigned Class = getClass (ResultTy); 02013 02014 // Longs, as usual, are handled specially... 02015 if (Class == cLong) { 02016 // If we have a constant shift, we can generate much more efficient code 02017 // than otherwise... 02018 // 02019 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) { 02020 unsigned Amount = CUI->getValue(); 02021 assert(Amount < 64 && "Invalid immediate shift amount!"); 02022 if (isLeftShift) { 02023 BuildMI(*MBB, IP, PPC::RLDICR, 3, DestReg).addReg(SrcReg).addImm(Amount) 02024 .addImm(63-Amount); 02025 } else { 02026 if (isSigned) { 02027 BuildMI(*MBB, IP, PPC::SRADI, 2, DestReg).addReg(SrcReg) 02028 .addImm(Amount); 02029 } else { 02030 BuildMI(*MBB, IP, PPC::RLDICL, 3, DestReg).addReg(SrcReg) 02031 .addImm(64-Amount).addImm(Amount); 02032 } 02033 } 02034 } else { 02035 unsigned ShiftReg = getReg (ShiftAmount, MBB, IP); 02036 02037 if (isLeftShift) { 02038 BuildMI(*MBB, IP, PPC::SLD, 2, DestReg).addReg(SrcReg).addReg(ShiftReg); 02039 } else { 02040 unsigned Opcode = (isSigned) ? PPC::SRAD : PPC::SRD; 02041 BuildMI(*MBB, IP, Opcode, DestReg).addReg(SrcReg).addReg(ShiftReg); 02042 } 02043 } 02044 return; 02045 } 02046 02047 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(ShiftAmount)) { 02048 // The shift amount is constant, guaranteed to be a ubyte. Get its value. 02049 assert(CUI->getType() == Type::UByteTy && "Shift amount not a ubyte?"); 02050 unsigned Amount = CUI->getValue(); 02051 02052 if (isLeftShift) { 02053 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(SrcReg) 02054 .addImm(Amount).addImm(0).addImm(31-Amount); 02055 } else { 02056 if (isSigned) { 02057 BuildMI(*MBB, IP, PPC::SRAWI,2,DestReg).addReg(SrcReg).addImm(Amount); 02058 } else { 02059 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(SrcReg) 02060 .addImm(32-Amount).addImm(Amount).addImm(31); 02061 } 02062 } 02063 } else { // The shift amount is non-constant. 02064 unsigned ShiftAmountReg = getReg(ShiftAmount, MBB, IP); 02065 02066 if (isLeftShift) { 02067 BuildMI(*MBB, IP, PPC::SLW, 2, DestReg).addReg(SrcReg) 02068 .addReg(ShiftAmountReg); 02069 } else { 02070 BuildMI(*MBB, IP, isSigned ? PPC::SRAW : PPC::SRW, 2, DestReg) 02071 .addReg(SrcReg).addReg(ShiftAmountReg); 02072 } 02073 } 02074 } 02075 02076 02077 /// visitLoadInst - Implement LLVM load instructions. Pretty straightforward 02078 /// mapping of LLVM classes to PPC load instructions, with the exception of 02079 /// signed byte loads, which need a sign extension following them. 02080 /// 02081 void PPC64ISel::visitLoadInst(LoadInst &I) { 02082 // Immediate opcodes, for reg+imm addressing 02083 static const unsigned ImmOpcodes[] = { 02084 PPC::LBZ, PPC::LHZ, PPC::LWZ, 02085 PPC::LFS, PPC::LFD, PPC::LWZ 02086 }; 02087 // Indexed opcodes, for reg+reg addressing 02088 static const unsigned IdxOpcodes[] = { 02089 PPC::LBZX, PPC::LHZX, PPC::LWZX, 02090 PPC::LFSX, PPC::LFDX, PPC::LWZX 02091 }; 02092 02093 unsigned Class = getClassB(I.getType()); 02094 unsigned ImmOpcode = ImmOpcodes[Class]; 02095 unsigned IdxOpcode = IdxOpcodes[Class]; 02096 unsigned DestReg = getReg(I); 02097 Value *SourceAddr = I.getOperand(0); 02098 02099 if (Class == cShort && I.getType()->isSigned()) ImmOpcode = PPC::LHA; 02100 if (Class == cShort && I.getType()->isSigned()) IdxOpcode = PPC::LHAX; 02101 02102 if (AllocaInst *AI = dyn_castFixedAlloca(SourceAddr)) { 02103 unsigned FI = getFixedSizedAllocaFI(AI); 02104 if (Class == cByte && I.getType()->isSigned()) { 02105 unsigned TmpReg = makeAnotherReg(I.getType()); 02106 addFrameReference(BuildMI(BB, ImmOpcode, 2, TmpReg), FI); 02107 BuildMI(BB, PPC::EXTSB, 1, DestReg).addReg(TmpReg); 02108 } else { 02109 addFrameReference(BuildMI(BB, ImmOpcode, 2, DestReg), FI); 02110 } 02111 return; 02112 } 02113 02114 // If this load is the only use of the GEP instruction that is its address, 02115 // then we can fold the GEP directly into the load instruction. 02116 // emitGEPOperation with a second to last arg of 'true' will place the 02117 // base register for the GEP into baseReg, and the constant offset from that 02118 // into offset. If the offset fits in 16 bits, we can emit a reg+imm store 02119 // otherwise, we copy the offset into another reg, and use reg+reg addressing. 02120 if (GetElementPtrInst *GEPI = canFoldGEPIntoLoadOrStore(SourceAddr)) { 02121 unsigned baseReg = getReg(GEPI); 02122 unsigned pendingAdd; 02123 ConstantSInt *offset; 02124 02125 emitGEPOperation(BB, BB->end(), GEPI->getOperand(0), GEPI->op_begin()+1, 02126 GEPI->op_end(), baseReg, true, &offset, &pendingAdd); 02127 02128 if (pendingAdd == 0 && Class != cLong && 02129 canUseAsImmediateForOpcode(offset, 0)) { 02130 if (Class == cByte && I.getType()->isSigned()) { 02131 unsigned TmpReg = makeAnotherReg(I.getType()); 02132 BuildMI(BB, ImmOpcode, 2, TmpReg).addSImm(offset->getValue()) 02133 .addReg(baseReg); 02134 BuildMI(BB, PPC::EXTSB, 1, DestReg).addReg(TmpReg); 02135 } else { 02136 BuildMI(BB, ImmOpcode, 2, DestReg).addSImm(offset->getValue()) 02137 .addReg(baseReg); 02138 } 02139 return; 02140 } 02141 02142 unsigned indexReg = (pendingAdd != 0) ? pendingAdd : getReg(offset); 02143 02144 if (Class == cByte && I.getType()->isSigned()) { 02145 unsigned TmpReg = makeAnotherReg(I.getType()); 02146 BuildMI(BB, IdxOpcode, 2, TmpReg).addReg(indexReg).addReg(baseReg); 02147 BuildMI(BB, PPC::EXTSB, 1, DestReg).addReg(TmpReg); 02148 } else { 02149 BuildMI(BB, IdxOpcode, 2, DestReg).addReg(indexReg).addReg(baseReg); 02150 } 02151 return; 02152 } 02153 02154 // The fallback case, where the load was from a source that could not be 02155 // folded into the load instruction. 02156 unsigned SrcAddrReg = getReg(SourceAddr); 02157 02158 if (Class == cByte && I.getType()->isSigned()) { 02159 unsigned TmpReg = makeAnotherReg(I.getType()); 02160 BuildMI(BB, ImmOpcode, 2, TmpReg).addSImm(0).addReg(SrcAddrReg); 02161 BuildMI(BB, PPC::EXTSB, 1, DestReg).addReg(TmpReg); 02162 } else { 02163 BuildMI(BB, ImmOpcode, 2, DestReg).addSImm(0).addReg(SrcAddrReg); 02164 } 02165 } 02166 02167 /// visitStoreInst - Implement LLVM store instructions 02168 /// 02169 void PPC64ISel::visitStoreInst(StoreInst &I) { 02170 // Immediate opcodes, for reg+imm addressing 02171 static const unsigned ImmOpcodes[] = { 02172 PPC::STB, PPC::STH, PPC::STW, 02173 PPC::STFS, PPC::STFD, PPC::STW 02174 }; 02175 // Indexed opcodes, for reg+reg addressing 02176 static const unsigned IdxOpcodes[] = { 02177 PPC::STBX, PPC::STHX, PPC::STWX, 02178 PPC::STFSX, PPC::STFDX, PPC::STWX 02179 }; 02180 02181 Value *SourceAddr = I.getOperand(1); 02182 const Type *ValTy = I.getOperand(0)->getType(); 02183 unsigned Class = getClassB(ValTy); 02184 unsigned ImmOpcode = ImmOpcodes[Class]; 02185 unsigned IdxOpcode = IdxOpcodes[Class]; 02186 unsigned ValReg = getReg(I.getOperand(0)); 02187 02188 // If this store is the only use of the GEP instruction that is its address, 02189 // then we can fold the GEP directly into the store instruction. 02190 // emitGEPOperation with a second to last arg of 'true' will place the 02191 // base register for the GEP into baseReg, and the constant offset from that 02192 // into offset. If the offset fits in 16 bits, we can emit a reg+imm store 02193 // otherwise, we copy the offset into another reg, and use reg+reg addressing. 02194 if (GetElementPtrInst *GEPI = canFoldGEPIntoLoadOrStore(SourceAddr)) { 02195 unsigned baseReg = getReg(GEPI); 02196 unsigned pendingAdd; 02197 ConstantSInt *offset; 02198 02199 emitGEPOperation(BB, BB->end(), GEPI->getOperand(0), GEPI->op_begin()+1, 02200 GEPI->op_end(), baseReg, true, &offset, &pendingAdd); 02201 02202 if (0 == pendingAdd && Class != cLong && 02203 canUseAsImmediateForOpcode(offset, 0)) { 02204 BuildMI(BB, ImmOpcode, 3).addReg(ValReg).addSImm(offset->getValue()) 02205 .addReg(baseReg); 02206 return; 02207 } 02208 02209 unsigned indexReg = (pendingAdd != 0) ? pendingAdd : getReg(offset); 02210 BuildMI(BB, IdxOpcode, 3).addReg(ValReg).addReg(indexReg).addReg(baseReg); 02211 return; 02212 } 02213 02214 // If the store address wasn't the only use of a GEP, we fall back to the 02215 // standard path: store the ValReg at the value in AddressReg. 02216 unsigned AddressReg = getReg(I.getOperand(1)); 02217 BuildMI(BB, ImmOpcode, 3).addReg(ValReg).addSImm(0).addReg(AddressReg); 02218 } 02219 02220 02221 /// visitCastInst - Here we have various kinds of copying with or without sign 02222 /// extension going on. 02223 /// 02224 void PPC64ISel::visitCastInst(CastInst &CI) { 02225 Value *Op = CI.getOperand(0); 02226 02227 unsigned SrcClass = getClassB(Op->getType()); 02228 unsigned DestClass = getClassB(CI.getType()); 02229 02230 // If this is a cast from a 32-bit integer to a Long type, and the only uses 02231 // of the case are GEP instructions, then the cast does not need to be 02232 // generated explicitly, it will be folded into the GEP. 02233 if (DestClass == cLong && SrcClass == cInt) { 02234 bool AllUsesAreGEPs = true; 02235 for (Value::use_iterator I = CI.use_begin(), E = CI.use_end(); I != E; ++I) 02236 if (!isa<GetElementPtrInst>(*I)) { 02237 AllUsesAreGEPs = false; 02238 break; 02239 } 02240 02241 // No need to codegen this cast if all users are getelementptr instrs... 02242 if (AllUsesAreGEPs) return; 02243 } 02244 02245 unsigned DestReg = getReg(CI); 02246 MachineBasicBlock::iterator MI = BB->end(); 02247 emitCastOperation(BB, MI, Op, CI.getType(), DestReg); 02248 } 02249 02250 /// emitCastOperation - Common code shared between visitCastInst and constant 02251 /// expression cast support. 02252 /// 02253 void PPC64ISel::emitCastOperation(MachineBasicBlock *MBB, 02254 MachineBasicBlock::iterator IP, 02255 Value *Src, const Type *DestTy, 02256 unsigned DestReg) { 02257 const Type *SrcTy = Src->getType(); 02258 unsigned SrcClass = getClassB(SrcTy); 02259 unsigned DestClass = getClassB(DestTy); 02260 unsigned SrcReg = getReg(Src, MBB, IP); 02261 02262 // Implement casts to bool by using compare on the operand followed by set if 02263 // not zero on the result. 02264 if (DestTy == Type::BoolTy) { 02265 switch (SrcClass) { 02266 case cByte: 02267 case cShort: 02268 case cInt: 02269 case cLong: { 02270 unsigned TmpReg = makeAnotherReg(Type::IntTy); 02271 BuildMI(*MBB, IP, PPC::ADDIC, 2, TmpReg).addReg(SrcReg).addSImm(-1); 02272 BuildMI(*MBB, IP, PPC::SUBFE, 2, DestReg).addReg(TmpReg).addReg(SrcReg); 02273 break; 02274 } 02275 case cFP32: 02276 case cFP64: 02277 // FSEL perhaps? 02278 std::cerr << "ERROR: Cast fp-to-bool not implemented!\n"; 02279 abort(); 02280 } 02281 return; 02282 } 02283 02284 // Handle cast of Float -> Double 02285 if (SrcClass == cFP32 && DestClass == cFP64) { 02286 BuildMI(*MBB, IP, PPC::FMR, 1, DestReg).addReg(SrcReg); 02287 return; 02288 } 02289 02290 // Handle cast of Double -> Float 02291 if (SrcClass == cFP64 && DestClass == cFP32) { 02292 BuildMI(*MBB, IP, PPC::FRSP, 1, DestReg).addReg(SrcReg); 02293 return; 02294 } 02295 02296 // Handle casts from integer to floating point now... 02297 if (DestClass == cFP32 || DestClass == cFP64) { 02298 02299 // Spill the integer to memory and reload it from there. 02300 unsigned TmpReg = makeAnotherReg(Type::DoubleTy); 02301 int ValueFrameIdx = 02302 F->getFrameInfo()->CreateStackObject(Type::DoubleTy, TM.getTargetData()); 02303 02304 if (SrcClass == cLong) { 02305 if (SrcTy->isSigned()) { 02306 addFrameReference(BuildMI(*MBB, IP, PPC::STD, 3).addReg(SrcReg), 02307 ValueFrameIdx); 02308 addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, TmpReg), 02309 ValueFrameIdx); 02310 BuildMI(*MBB, IP, PPC::FCFID, 1, DestReg).addReg(TmpReg); 02311 } else { 02312 unsigned Scale = getReg(ConstantFP::get(Type::DoubleTy, 0x1p32)); 02313 unsigned TmpHi = makeAnotherReg(Type::IntTy); 02314 unsigned TmpLo = makeAnotherReg(Type::IntTy); 02315 unsigned FPLow = makeAnotherReg(Type::DoubleTy); 02316 unsigned FPTmpHi = makeAnotherReg(Type::DoubleTy); 02317 unsigned FPTmpLo = makeAnotherReg(Type::DoubleTy); 02318 int OtherFrameIdx = F->getFrameInfo()->CreateStackObject(Type::DoubleTy, 02319 TM.getTargetData()); 02320 BuildMI(*MBB, IP, PPC::RLDICL, 3, TmpHi).addReg(SrcReg).addImm(32) 02321 .addImm(32); 02322 BuildMI(*MBB, IP, PPC::RLDICL, 3, TmpLo).addReg(SrcReg).addImm(0) 02323 .addImm(32); 02324 addFrameReference(BuildMI(*MBB, IP, PPC::STD, 3).addReg(TmpHi), 02325 ValueFrameIdx); 02326 addFrameReference(BuildMI(*MBB, IP, PPC::STD, 3).addReg(TmpLo), 02327 OtherFrameIdx); 02328 addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, TmpReg), 02329 ValueFrameIdx); 02330 addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, FPLow), 02331 OtherFrameIdx); 02332 BuildMI(*MBB, IP, PPC::FCFID, 1, FPTmpHi).addReg(TmpReg); 02333 BuildMI(*MBB, IP, PPC::FCFID, 1, FPTmpLo).addReg(FPLow); 02334 BuildMI(*MBB, IP, PPC::FMADD, 3, DestReg).addReg(Scale).addReg(FPTmpHi) 02335 .addReg(FPTmpLo); 02336 } 02337 return; 02338 } 02339 02340 // FIXME: really want a promote64 02341 unsigned IntTmp = makeAnotherReg(Type::IntTy); 02342 02343 if (SrcTy->isSigned()) 02344 BuildMI(*MBB, IP, PPC::EXTSW, 1, IntTmp).addReg(SrcReg); 02345 else 02346 BuildMI(*MBB, IP, PPC::RLDICL, 3, IntTmp).addReg(SrcReg).addImm(0) 02347 .addImm(32); 02348 addFrameReference(BuildMI(*MBB, IP, PPC::STD, 3).addReg(IntTmp), 02349 ValueFrameIdx); 02350 addFrameReference(BuildMI(*MBB, IP, PPC::LFD, 2, TmpReg), 02351 ValueFrameIdx); 02352 BuildMI(*MBB, IP, PPC::FCFID, 1, DestReg).addReg(TmpReg); 02353 return; 02354 } 02355 02356 // Handle casts from floating point to integer now... 02357 if (SrcClass == cFP32 || SrcClass == cFP64) { 02358 static Function* const Funcs[] = 02359 { __fixsfdiFn, __fixdfdiFn, __fixunssfdiFn, __fixunsdfdiFn }; 02360 // emit library call 02361 if (DestClass == cLong) { 02362 bool isDouble = SrcClass == cFP64; 02363 unsigned nameIndex = 2 * DestTy->isSigned() + isDouble; 02364 std::vector<ValueRecord> Args; 02365 Args.push_back(ValueRecord(SrcReg, SrcTy)); 02366 Function *floatFn = Funcs[nameIndex]; 02367 MachineInstr *TheCall = 02368 BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(floatFn, true); 02369 doCall(ValueRecord(DestReg, DestTy), TheCall, Args, false); 02370 return; 02371 } 02372 02373 int ValueFrameIdx = 02374 F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); 02375 02376 if (DestTy->isSigned()) { 02377 unsigned TempReg = makeAnotherReg(Type::DoubleTy); 02378 02379 // Convert to integer in the FP reg and store it to a stack slot 02380 BuildMI(*BB, IP, PPC::FCTIWZ, 1, TempReg).addReg(SrcReg); 02381 addFrameReference(BuildMI(*BB, IP, PPC::STFD, 3) 02382 .addReg(TempReg), ValueFrameIdx); 02383 02384 // There is no load signed byte opcode, so we must emit a sign extend for 02385 // that particular size. Make sure to source the new integer from the 02386 // correct offset. 02387 if (DestClass == cByte) { 02388 unsigned TempReg2 = makeAnotherReg(DestTy); 02389 addFrameReference(BuildMI(*BB, IP, PPC::LBZ, 2, TempReg2), 02390 ValueFrameIdx, 7); 02391 BuildMI(*MBB, IP, PPC::EXTSB, DestReg).addReg(TempReg2); 02392 } else { 02393 int offset = (DestClass == cShort) ? 6 : 4; 02394 unsigned LoadOp = (DestClass == cShort) ? PPC::LHA : PPC::LWZ; 02395 addFrameReference(BuildMI(*BB, IP, LoadOp, 2, DestReg), 02396 ValueFrameIdx, offset); 02397 } 02398 } else { 02399 unsigned Zero = getReg(ConstantFP::get(Type::DoubleTy, 0.0f)); 02400 double maxInt = (1LL << 32) - 1; 02401 unsigned MaxInt = getReg(ConstantFP::get(Type::DoubleTy, maxInt)); 02402 double border = 1LL << 31; 02403 unsigned Border = getReg(ConstantFP::get(Type::DoubleTy, border)); 02404 unsigned UseZero = makeAnotherReg(Type::DoubleTy); 02405 unsigned UseMaxInt = makeAnotherReg(Type::DoubleTy); 02406 unsigned UseChoice = makeAnotherReg(Type::DoubleTy); 02407 unsigned TmpReg = makeAnotherReg(Type::DoubleTy); 02408 unsigned TmpReg2 = makeAnotherReg(Type::DoubleTy); 02409 unsigned ConvReg = makeAnotherReg(Type::DoubleTy); 02410 unsigned IntTmp = makeAnotherReg(Type::IntTy); 02411 unsigned XorReg = makeAnotherReg(Type::IntTy); 02412 int FrameIdx = 02413 F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); 02414 // Update machine-CFG edges 02415 MachineBasicBlock *XorMBB = new MachineBasicBlock(BB->getBasicBlock()); 02416 MachineBasicBlock *PhiMBB = new MachineBasicBlock(BB->getBasicBlock()); 02417 MachineBasicBlock *OldMBB = BB; 02418 ilist<MachineBasicBlock>::iterator It = BB; ++It; 02419 F->getBasicBlockList().insert(It, XorMBB); 02420 F->getBasicBlockList().insert(It, PhiMBB); 02421 BB->addSuccessor(XorMBB); 02422 BB->addSuccessor(PhiMBB); 02423 02424 // Convert from floating point to unsigned 32-bit value 02425 // Use 0 if incoming value is < 0.0 02426 BuildMI(*BB, IP, PPC::FSEL, 3, UseZero).addReg(SrcReg).addReg(SrcReg) 02427 .addReg(Zero); 02428 // Use 2**32 - 1 if incoming value is >= 2**32 02429 BuildMI(*BB, IP, PPC::FSUB, 2, UseMaxInt).addReg(MaxInt).addReg(SrcReg); 02430 BuildMI(*BB, IP, PPC::FSEL, 3, UseChoice).addReg(UseMaxInt) 02431 .addReg(UseZero).addReg(MaxInt); 02432 // Subtract 2**31 02433 BuildMI(*BB, IP, PPC::FSUB, 2, TmpReg).addReg(UseChoice).addReg(Border); 02434 // Use difference if >= 2**31 02435 BuildMI(*BB, IP, PPC::FCMPU, 2, PPC::CR0).addReg(UseChoice) 02436 .addReg(Border); 02437 BuildMI(*BB, IP, PPC::FSEL, 3, TmpReg2).addReg(TmpReg).addReg(TmpReg) 02438 .addReg(UseChoice); 02439 // Convert to integer 02440 BuildMI(*BB, IP, PPC::FCTIWZ, 1, ConvReg).addReg(TmpReg2); 02441 addFrameReference(BuildMI(*BB, IP, PPC::STFD, 3).addReg(ConvReg), 02442 FrameIdx); 02443 if (DestClass == cByte) { 02444 addFrameReference(BuildMI(*BB, IP, PPC::LBZ, 2, DestReg), 02445 FrameIdx, 7); 02446 } else if (DestClass == cShort) { 02447 addFrameReference(BuildMI(*BB, IP, PPC::LHZ, 2, DestReg), 02448 FrameIdx, 6); 02449 } if (DestClass == cInt) { 02450 addFrameReference(BuildMI(*BB, IP, PPC::LWZ, 2, IntTmp), 02451 FrameIdx, 4); 02452 BuildMI(*BB, IP, PPC::BLT, 2).addReg(PPC::CR0).addMBB(PhiMBB); 02453 BuildMI(*BB, IP, PPC::B, 1).addMBB(XorMBB); 02454 02455 // XorMBB: 02456 // add 2**31 if input was >= 2**31 02457 BB = XorMBB; 02458 BuildMI(BB, PPC::XORIS, 2, XorReg).addReg(IntTmp).addImm(0x8000); 02459 XorMBB->addSuccessor(PhiMBB); 02460 02461 // PhiMBB: 02462 // DestReg = phi [ IntTmp, OldMBB ], [ XorReg, XorMBB ] 02463 BB = PhiMBB; 02464 BuildMI(BB, PPC::PHI, 4, DestReg).addReg(IntTmp).addMBB(OldMBB) 02465 .addReg(XorReg).addMBB(XorMBB); 02466 } 02467 } 02468 return; 02469 } 02470 02471 // Check our invariants 02472 assert((SrcClass <= cInt || SrcClass == cLong) && 02473 "Unhandled source class for cast operation!"); 02474 assert((DestClass <= cInt || DestClass == cLong) && 02475 "Unhandled destination class for cast operation!"); 02476 02477 bool sourceUnsigned = SrcTy->isUnsigned() || SrcTy == Type::BoolTy; 02478 bool destUnsigned = DestTy->isUnsigned(); 02479 02480 // Unsigned -> Unsigned, clear if larger 02481 if (sourceUnsigned && destUnsigned) { 02482 // handle long dest class now to keep switch clean 02483 if (DestClass == cLong) { 02484 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02485 return; 02486 } 02487 02488 // handle u{ byte, short, int } x u{ byte, short, int } 02489 unsigned clearBits = (SrcClass == cByte || DestClass == cByte) ? 24 : 16; 02490 switch (SrcClass) { 02491 case cByte: 02492 case cShort: 02493 if (SrcClass == DestClass) 02494 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02495 else 02496 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(SrcReg) 02497 .addImm(0).addImm(clearBits).addImm(31); 02498 break; 02499 case cInt: 02500 case cLong: 02501 if (DestClass == cInt) 02502 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02503 else 02504 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(SrcReg) 02505 .addImm(0).addImm(clearBits).addImm(31); 02506 break; 02507 } 02508 return; 02509 } 02510 02511 // Signed -> Signed 02512 if (!sourceUnsigned && !destUnsigned) { 02513 // handle long dest class now to keep switch clean 02514 if (DestClass == cLong) { 02515 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02516 return; 02517 } 02518 02519 // handle { byte, short, int } x { byte, short, int } 02520 switch (SrcClass) { 02521 case cByte: 02522 if (DestClass == cByte) 02523 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02524 else 02525 BuildMI(*MBB, IP, PPC::EXTSB, 1, DestReg).addReg(SrcReg); 02526 break; 02527 case cShort: 02528 if (DestClass == cByte) 02529 BuildMI(*MBB, IP, PPC::EXTSB, 1, DestReg).addReg(SrcReg); 02530 else if (DestClass == cShort) 02531 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02532 else 02533 BuildMI(*MBB, IP, PPC::EXTSH, 1, DestReg).addReg(SrcReg); 02534 break; 02535 case cInt: 02536 case cLong: 02537 if (DestClass == cByte) 02538 BuildMI(*MBB, IP, PPC::EXTSB, 1, DestReg).addReg(SrcReg); 02539 else if (DestClass == cShort) 02540 BuildMI(*MBB, IP, PPC::EXTSH, 1, DestReg).addReg(SrcReg); 02541 else 02542 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02543 break; 02544 } 02545 return; 02546 } 02547 02548 // Unsigned -> Signed 02549 if (sourceUnsigned && !destUnsigned) { 02550 // handle long dest class now to keep switch clean 02551 if (DestClass == cLong) { 02552 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02553 return; 02554 } 02555 02556 // handle u{ byte, short, int } -> { byte, short, int } 02557 switch (SrcClass) { 02558 case cByte: 02559 if (DestClass == cByte) 02560 // uByte 255 -> signed byte == -1 02561 BuildMI(*MBB, IP, PPC::EXTSB, 1, DestReg).addReg(SrcReg); 02562 else 02563 // uByte 255 -> signed short/int == 255 02564 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(SrcReg).addImm(0) 02565 .addImm(24).addImm(31); 02566 break; 02567 case cShort: 02568 if (DestClass == cByte) 02569 BuildMI(*MBB, IP, PPC::EXTSB, 1, DestReg).addReg(SrcReg); 02570 else if (DestClass == cShort) 02571 BuildMI(*MBB, IP, PPC::EXTSH, 1, DestReg).addReg(SrcReg); 02572 else 02573 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(SrcReg).addImm(0) 02574 .addImm(16).addImm(31); 02575 break; 02576 case cInt: 02577 case cLong: 02578 if (DestClass == cByte) 02579 BuildMI(*MBB, IP, PPC::EXTSB, 1, DestReg).addReg(SrcReg); 02580 else if (DestClass == cShort) 02581 BuildMI(*MBB, IP, PPC::EXTSH, 1, DestReg).addReg(SrcReg); 02582 else 02583 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02584 break; 02585 } 02586 return; 02587 } 02588 02589 // Signed -> Unsigned 02590 if (!sourceUnsigned && destUnsigned) { 02591 // handle long dest class now to keep switch clean 02592 if (DestClass == cLong) { 02593 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02594 return; 02595 } 02596 02597 // handle { byte, short, int } -> u{ byte, short, int } 02598 unsigned clearBits = (DestClass == cByte) ? 24 : 16; 02599 switch (SrcClass) { 02600 case cByte: 02601 case cShort: 02602 if (DestClass == cByte || DestClass == cShort) 02603 // sbyte -1 -> ubyte 0x000000FF 02604 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(SrcReg) 02605 .addImm(0).addImm(clearBits).addImm(31); 02606 else 02607 // sbyte -1 -> ubyte 0xFFFFFFFF 02608 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02609 break; 02610 case cInt: 02611 case cLong: 02612 if (DestClass == cInt) 02613 BuildMI(*MBB, IP, PPC::OR, 2, DestReg).addReg(SrcReg).addReg(SrcReg); 02614 else 02615 BuildMI(*MBB, IP, PPC::RLWINM, 4, DestReg).addReg(SrcReg) 02616 .addImm(0).addImm(clearBits).addImm(31); 02617 break; 02618 } 02619 return; 02620 } 02621 02622 // Anything we haven't handled already, we can't (yet) handle at all. 02623 std::cerr << "Unhandled cast from " << SrcTy->getDescription() 02624 << "to " << DestTy->getDescription() << '\n'; 02625 abort(); 02626 } 02627 02628 /// visitVANextInst - Implement the va_next instruction... 02629 /// 02630 void PPC64ISel::visitVANextInst(VANextInst &I) { 02631 unsigned VAList = getReg(I.getOperand(0)); 02632 unsigned DestReg = getReg(I); 02633 02634 unsigned Size; 02635 switch (I.getArgType()->getTypeID()) { 02636 default: 02637 std::cerr << I; 02638 assert(0 && "Error: bad type for va_next instruction!"); 02639 return; 02640 case Type::PointerTyID: 02641 case Type::UIntTyID: 02642 case Type::IntTyID: 02643 Size = 4; 02644 break; 02645 case Type::ULongTyID: 02646 case Type::LongTyID: 02647 case Type::DoubleTyID: 02648 Size = 8; 02649 break; 02650 } 02651 02652 // Increment the VAList pointer... 02653 BuildMI(BB, PPC::ADDI, 2, DestReg).addReg(VAList).addSImm(Size); 02654 } 02655 02656 void PPC64ISel::visitVAArgInst(VAArgInst &I) { 02657 unsigned VAList = getReg(I.getOperand(0)); 02658 unsigned DestReg = getReg(I); 02659 02660 switch (I.getType()->getTypeID()) { 02661 default: 02662 std::cerr << I; 02663 assert(0 && "Error: bad type for va_next instruction!"); 02664 return; 02665 case Type::PointerTyID: 02666 case Type::UIntTyID: 02667 case Type::IntTyID: 02668 BuildMI(BB, PPC::LWZ, 2, DestReg).addSImm(0).addReg(VAList); 02669 break; 02670 case Type::ULongTyID: 02671 case Type::LongTyID: 02672 BuildMI(BB, PPC::LD, 2, DestReg).addSImm(0).addReg(VAList); 02673 break; 02674 case Type::FloatTyID: 02675 BuildMI(BB, PPC::LFS, 2, DestReg).addSImm(0).addReg(VAList); 02676 break; 02677 case Type::DoubleTyID: 02678 BuildMI(BB, PPC::LFD, 2, DestReg).addSImm(0).addReg(VAList); 02679 break; 02680 } 02681 } 02682 02683 /// visitGetElementPtrInst - instruction-select GEP instructions 02684 /// 02685 void PPC64ISel::visitGetElementPtrInst(GetElementPtrInst &I) { 02686 if (canFoldGEPIntoLoadOrStore(&I)) 02687 return; 02688 02689 unsigned outputReg = getReg(I); 02690 emitGEPOperation(BB, BB->end(), I.getOperand(0), I.op_begin()+1, I.op_end(), 02691 outputReg, false, 0, 0); 02692 } 02693 02694 /// emitGEPOperation - Common code shared between visitGetElementPtrInst and 02695 /// constant expression GEP support. 02696 /// 02697 void PPC64ISel::emitGEPOperation(MachineBasicBlock *MBB, 02698 MachineBasicBlock::iterator IP, 02699 Value *Src, User::op_iterator IdxBegin, 02700 User::op_iterator IdxEnd, unsigned TargetReg, 02701 bool GEPIsFolded, ConstantSInt **RemainderPtr, 02702 unsigned *PendingAddReg) { 02703 const TargetData &TD = TM.getTargetData(); 02704 const Type *Ty = Src->getType(); 02705 unsigned basePtrReg = getReg(Src, MBB, IP); 02706 int64_t constValue = 0; 02707 02708 // Record the operations to emit the GEP in a vector so that we can emit them 02709 // after having analyzed the entire instruction. 02710 std::vector<CollapsedGepOp> ops; 02711 02712 // GEPs have zero or more indices; we must perform a struct access 02713 // or array access for each one. 02714 for (GetElementPtrInst::op_iterator oi = IdxBegin, oe = IdxEnd; oi != oe; 02715 ++oi) { 02716 Value *idx = *oi; 02717 if (const StructType *StTy = dyn_cast<StructType>(Ty)) { 02718 // It's a struct access. idx is the index into the structure, 02719 // which names the field. Use the TargetData structure to 02720 // pick out what the layout of the structure is in memory. 02721 // Use the (constant) structure index's value to find the 02722 // right byte offset from the StructLayout class's list of 02723 // structure member offsets. 02724 unsigned fieldIndex = cast<ConstantUInt>(idx)->getValue(); 02725 unsigned memberOffset = 02726 TD.getStructLayout(StTy)->MemberOffsets[fieldIndex]; 02727 02728 // StructType member offsets are always constant values. Add it to the 02729 // running total. 02730 constValue += memberOffset; 02731 02732 // The next type is the member of the structure selected by the 02733 // index. 02734 Ty = StTy->getElementType (fieldIndex); 02735 } else if (const SequentialType *SqTy = dyn_cast<SequentialType> (Ty)) { 02736 // Many GEP instructions use a [cast (int/uint) to LongTy] as their 02737 // operand. Handle this case directly now... 02738 if (CastInst *CI = dyn_cast<CastInst>(idx)) 02739 if (CI->getOperand(0)->getType() == Type::IntTy || 02740 CI->getOperand(0)->getType() == Type::UIntTy) 02741 idx = CI->getOperand(0); 02742 02743 // It's an array or pointer access: [ArraySize x ElementType]. 02744 // We want to add basePtrReg to (idxReg * sizeof ElementType). First, we 02745 // must find the size of the pointed-to type (Not coincidentally, the next 02746 // type is the type of the elements in the array). 02747 Ty = SqTy->getElementType(); 02748 unsigned elementSize = TD.getTypeSize(Ty); 02749 02750 if (ConstantInt *C = dyn_cast<ConstantInt>(idx)) { 02751 if (ConstantSInt *CS = dyn_cast<ConstantSInt>(C)) 02752 constValue += CS->getValue() * elementSize; 02753 else if (ConstantUInt *CU = dyn_cast<ConstantUInt>(C)) 02754 constValue += CU->getValue() * elementSize; 02755 else 02756 assert(0 && "Invalid ConstantInt GEP index type!"); 02757 } else { 02758 // Push current gep state to this point as an add 02759 ops.push_back(CollapsedGepOp(false, 0, 02760 ConstantSInt::get(Type::IntTy,constValue))); 02761 02762 // Push multiply gep op and reset constant value 02763 ops.push_back(CollapsedGepOp(true, idx, 02764 ConstantSInt::get(Type::IntTy, elementSize))); 02765 02766 constValue = 0; 02767 } 02768 } 02769 } 02770 // Emit instructions for all the collapsed ops 02771 bool pendingAdd = false; 02772 unsigned pendingAddReg = 0; 02773 02774 for(std::vector<CollapsedGepOp>::iterator cgo_i = ops.begin(), 02775 cgo_e = ops.end(); cgo_i != cgo_e; ++cgo_i) { 02776 CollapsedGepOp& cgo = *cgo_i; 02777 unsigned nextBasePtrReg = makeAnotherReg(Type::IntTy); 02778 02779 // If we didn't emit an add last time through the loop, we need to now so 02780 // that the base reg is updated appropriately. 02781 if (pendingAdd) { 02782 assert(pendingAddReg != 0 && "Uninitialized register in pending add!"); 02783 BuildMI(*MBB, IP, PPC::ADD, 2, nextBasePtrReg).addReg(basePtrReg) 02784 .addReg(pendingAddReg); 02785 basePtrReg = nextBasePtrReg; 02786 nextBasePtrReg = makeAnotherReg(Type::IntTy); 02787 pendingAddReg = 0; 02788 pendingAdd = false; 02789 } 02790 02791 if (cgo.isMul) { 02792 // We know the elementSize is a constant, so we can emit a constant mul 02793 unsigned TmpReg = makeAnotherReg(Type::IntTy); 02794 doMultiplyConst(MBB, IP, nextBasePtrReg, cgo.index, cgo.size); 02795 pendingAddReg = basePtrReg; 02796 pendingAdd = true; 02797 } else { 02798 // Try and generate an immediate addition if possible 02799 if (cgo.size->isNullValue()) { 02800 BuildMI(*MBB, IP, PPC::OR, 2, nextBasePtrReg).addReg(basePtrReg) 02801 .addReg(basePtrReg); 02802 } else if (canUseAsImmediateForOpcode(cgo.size, 0)) { 02803 BuildMI(*MBB, IP, PPC::ADDI, 2, nextBasePtrReg).addReg(basePtrReg) 02804 .addSImm(cgo.size->getValue()); 02805 } else { 02806 unsigned Op1r = getReg(cgo.size, MBB, IP); 02807 BuildMI(*MBB, IP, PPC::ADD, 2, nextBasePtrReg).addReg(basePtrReg) 02808 .addReg(Op1r); 02809 } 02810 } 02811 02812 basePtrReg = nextBasePtrReg; 02813 } 02814 // Add the current base register plus any accumulated constant value 02815 ConstantSInt *remainder = ConstantSInt::get(Type::IntTy, constValue); 02816 02817 // If we are emitting this during a fold, copy the current base register to 02818 // the target, and save the current constant offset so the folding load or 02819 // store can try and use it as an immediate. 02820 if (GEPIsFolded) { 02821 // If this is a folded GEP and the last element was an index, then we need 02822 // to do some extra work to turn a shift/add/stw into a shift/stwx 02823 if (pendingAdd && 0 == remainder->getValue()) { 02824 assert(pendingAddReg != 0 && "Uninitialized register in pending add!"); 02825 *PendingAddReg = pendingAddReg; 02826 } else { 02827 *PendingAddReg = 0; 02828 if (pendingAdd) { 02829 unsigned nextBasePtrReg = makeAnotherReg(Type::IntTy); 02830 assert(pendingAddReg != 0 && "Uninitialized register in pending add!"); 02831 BuildMI(*MBB, IP, PPC::ADD, 2, nextBasePtrReg).addReg(basePtrReg) 02832 .addReg(pendingAddReg); 02833 basePtrReg = nextBasePtrReg; 02834 } 02835 } 02836 BuildMI (*MBB, IP, PPC::OR, 2, TargetReg).addReg(basePtrReg) 02837 .addReg(basePtrReg); 02838 *RemainderPtr = remainder; 02839 return; 02840 } 02841 02842 // If we still have a pending add at this point, emit it now 02843 if (pendingAdd) { 02844 unsigned TmpReg = makeAnotherReg(Type::IntTy); 02845 BuildMI(*MBB, IP, PPC::ADD, 2, TmpReg).addReg(pendingAddReg) 02846 .addReg(basePtrReg); 02847 basePtrReg = TmpReg; 02848 } 02849 02850 // After we have processed all the indices, the result is left in 02851 // basePtrReg. Move it to the register where we were expected to 02852 // put the answer. 02853 if (remainder->isNullValue()) { 02854 BuildMI (*MBB, IP, PPC::OR, 2, TargetReg).addReg(basePtrReg) 02855 .addReg(basePtrReg); 02856 } else if (canUseAsImmediateForOpcode(remainder, 0)) { 02857 BuildMI(*MBB, IP, PPC::ADDI, 2, TargetReg).addReg(basePtrReg) 02858 .addSImm(remainder->getValue()); 02859 } else { 02860 unsigned Op1r = getReg(remainder, MBB, IP); 02861 BuildMI(*MBB, IP, PPC::ADD, 2, TargetReg).addReg(basePtrReg).addReg(Op1r); 02862 } 02863 } 02864 02865 /// visitAllocaInst - If this is a fixed size alloca, allocate space from the 02866 /// frame manager, otherwise do it the hard way. 02867 /// 02868 void PPC64ISel::visitAllocaInst(AllocaInst &I) { 02869 // If this is a fixed size alloca in the entry block for the function, we 02870 // statically stack allocate the space, so we don't need to do anything here. 02871 // 02872 if (dyn_castFixedAlloca(&I)) return; 02873 02874 // Find the data size of the alloca inst's getAllocatedType. 02875 const Type *Ty = I.getAllocatedType(); 02876 unsigned TySize = TM.getTargetData().getTypeSize(Ty); 02877 02878 // Create a register to hold the temporary result of multiplying the type size 02879 // constant by the variable amount. 02880 unsigned TotalSizeReg = makeAnotherReg(Type::UIntTy); 02881 02882 // TotalSizeReg = mul <numelements>, <TypeSize> 02883 MachineBasicBlock::iterator MBBI = BB->end(); 02884 ConstantUInt *CUI = ConstantUInt::get(Type::UIntTy, TySize); 02885 doMultiplyConst(BB, MBBI, TotalSizeReg, I.getArraySize(), CUI); 02886 02887 // AddedSize = add <TotalSizeReg>, 15 02888 unsigned AddedSizeReg = makeAnotherReg(Type::UIntTy); 02889 BuildMI(BB, PPC::ADDI, 2, AddedSizeReg).addReg(TotalSizeReg).addSImm(15); 02890 02891 // AlignedSize = and <AddedSize>, ~15 02892 unsigned AlignedSize = makeAnotherReg(Type::UIntTy); 02893 BuildMI(BB, PPC::RLWINM, 4, AlignedSize).addReg(AddedSizeReg).addImm(0) 02894 .addImm(0).addImm(27); 02895 02896 // Subtract size from stack pointer, thereby allocating some space. 02897 BuildMI(BB, PPC::SUB, 2, PPC::R1).addReg(PPC::R1).addReg(AlignedSize); 02898 02899 // Put a pointer to the space into the result register, by copying 02900 // the stack pointer. 02901 BuildMI(BB, PPC::OR, 2, getReg(I)).addReg(PPC::R1).addReg(PPC::R1); 02902 02903 // Inform the Frame Information that we have just allocated a variable-sized 02904 // object. 02905 F->getFrameInfo()->CreateVariableSizedObject(); 02906 } 02907 02908 /// visitMallocInst - Malloc instructions are code generated into direct calls 02909 /// to the library malloc. 02910 /// 02911 void PPC64ISel::visitMallocInst(MallocInst &I) { 02912 unsigned AllocSize = TM.getTargetData().getTypeSize(I.getAllocatedType()); 02913 unsigned Arg; 02914 02915 if (ConstantUInt *C = dyn_cast<ConstantUInt>(I.getOperand(0))) { 02916 Arg = getReg(ConstantUInt::get(Type::UIntTy, C->getValue() * AllocSize)); 02917 } else { 02918 Arg = makeAnotherReg(Type::UIntTy); 02919 MachineBasicBlock::iterator MBBI = BB->end(); 02920 ConstantUInt *CUI = ConstantUInt::get(Type::UIntTy, AllocSize); 02921 doMultiplyConst(BB, MBBI, Arg, I.getOperand(0), CUI); 02922 } 02923 02924 std::vector<ValueRecord> Args; 02925 Args.push_back(ValueRecord(Arg, Type::UIntTy)); 02926 MachineInstr *TheCall = 02927 BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(mallocFn, true); 02928 doCall(ValueRecord(getReg(I), I.getType()), TheCall, Args, false); 02929 } 02930 02931 02932 /// visitFreeInst - Free instructions are code gen'd to call the free libc 02933 /// function. 02934 /// 02935 void PPC64ISel::visitFreeInst(FreeInst &I) { 02936 std::vector<ValueRecord> Args; 02937 Args.push_back(ValueRecord(I.getOperand(0))); 02938 MachineInstr *TheCall = 02939 BuildMI(PPC::CALLpcrel, 1).addGlobalAddress(freeFn, true); 02940 doCall(ValueRecord(0, Type::VoidTy), TheCall, Args, false); 02941 } 02942 02943 /// createPPC64ISelSimple - This pass converts an LLVM function into a machine 02944 /// code representation is a very simple peep-hole fashion. 02945 /// 02946 FunctionPass *llvm::createPPC64ISelSimple(TargetMachine &TM) { 02947 return new PPC64ISel(TM); 02948 }