LLVM API Documentation
00001 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Chris Lattner and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the PPCISelLowering class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "PPCISelLowering.h" 00015 #include "PPCTargetMachine.h" 00016 #include "PPCPerfectShuffle.h" 00017 #include "llvm/ADT/VectorExtras.h" 00018 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 00019 #include "llvm/CodeGen/MachineFrameInfo.h" 00020 #include "llvm/CodeGen/MachineFunction.h" 00021 #include "llvm/CodeGen/MachineInstrBuilder.h" 00022 #include "llvm/CodeGen/SelectionDAG.h" 00023 #include "llvm/CodeGen/SSARegMap.h" 00024 #include "llvm/Constants.h" 00025 #include "llvm/Function.h" 00026 #include "llvm/Intrinsics.h" 00027 #include "llvm/Support/MathExtras.h" 00028 #include "llvm/Target/TargetOptions.h" 00029 using namespace llvm; 00030 00031 PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) 00032 : TargetLowering(TM) { 00033 00034 // Fold away setcc operations if possible. 00035 setSetCCIsExpensive(); 00036 setPow2DivIsCheap(); 00037 00038 // Use _setjmp/_longjmp instead of setjmp/longjmp. 00039 setUseUnderscoreSetJmpLongJmp(true); 00040 00041 // Set up the register classes. 00042 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 00043 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 00044 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 00045 00046 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 00047 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 00048 00049 // PowerPC has no intrinsics for these particular operations 00050 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); 00051 setOperationAction(ISD::MEMSET, MVT::Other, Expand); 00052 setOperationAction(ISD::MEMCPY, MVT::Other, Expand); 00053 00054 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 00055 setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand); 00056 setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand); 00057 00058 // PowerPC has no SREM/UREM instructions 00059 setOperationAction(ISD::SREM, MVT::i32, Expand); 00060 setOperationAction(ISD::UREM, MVT::i32, Expand); 00061 setOperationAction(ISD::SREM, MVT::i64, Expand); 00062 setOperationAction(ISD::UREM, MVT::i64, Expand); 00063 00064 // We don't support sin/cos/sqrt/fmod 00065 setOperationAction(ISD::FSIN , MVT::f64, Expand); 00066 setOperationAction(ISD::FCOS , MVT::f64, Expand); 00067 setOperationAction(ISD::FREM , MVT::f64, Expand); 00068 setOperationAction(ISD::FSIN , MVT::f32, Expand); 00069 setOperationAction(ISD::FCOS , MVT::f32, Expand); 00070 setOperationAction(ISD::FREM , MVT::f32, Expand); 00071 00072 // If we're enabling GP optimizations, use hardware square root 00073 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 00074 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 00075 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 00076 } 00077 00078 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 00079 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 00080 00081 // PowerPC does not have BSWAP, CTPOP or CTTZ 00082 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 00083 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 00084 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 00085 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 00086 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 00087 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 00088 00089 // PowerPC does not have ROTR 00090 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 00091 00092 // PowerPC does not have Select 00093 setOperationAction(ISD::SELECT, MVT::i32, Expand); 00094 setOperationAction(ISD::SELECT, MVT::i64, Expand); 00095 setOperationAction(ISD::SELECT, MVT::f32, Expand); 00096 setOperationAction(ISD::SELECT, MVT::f64, Expand); 00097 00098 // PowerPC wants to turn select_cc of FP into fsel when possible. 00099 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 00100 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 00101 00102 // PowerPC wants to optimize integer setcc a bit 00103 setOperationAction(ISD::SETCC, MVT::i32, Custom); 00104 00105 // PowerPC does not have BRCOND which requires SetCC 00106 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 00107 00108 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 00109 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 00110 00111 // PowerPC does not have [U|S]INT_TO_FP 00112 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 00113 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 00114 00115 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 00116 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 00117 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); 00118 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); 00119 00120 // PowerPC does not have truncstore for i1. 00121 setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote); 00122 00123 // We cannot sextinreg(i1). Expand to shifts. 00124 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 00125 00126 00127 // Support label based line numbers. 00128 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 00129 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 00130 // FIXME - use subtarget debug flags 00131 if (!TM.getSubtarget<PPCSubtarget>().isDarwin()) 00132 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 00133 00134 // We want to legalize GlobalAddress and ConstantPool nodes into the 00135 // appropriate instructions to materialize the address. 00136 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 00137 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 00138 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 00139 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 00140 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 00141 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 00142 00143 // RET must be custom lowered, to meet ABI requirements 00144 setOperationAction(ISD::RET , MVT::Other, Custom); 00145 00146 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 00147 setOperationAction(ISD::VASTART , MVT::Other, Custom); 00148 00149 // Use the default implementation. 00150 setOperationAction(ISD::VAARG , MVT::Other, Expand); 00151 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 00152 setOperationAction(ISD::VAEND , MVT::Other, Expand); 00153 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 00154 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 00155 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 00156 00157 // We want to custom lower some of our intrinsics. 00158 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 00159 00160 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 00161 // They also have instructions for converting between i64 and fp. 00162 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 00163 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 00164 00165 // FIXME: disable this lowered code. This generates 64-bit register values, 00166 // and we don't model the fact that the top part is clobbered by calls. We 00167 // need to flag these together so that the value isn't live across a call. 00168 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 00169 00170 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 00171 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 00172 } else { 00173 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 00174 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 00175 } 00176 00177 if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { 00178 // 64 bit PowerPC implementations can support i64 types directly 00179 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 00180 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 00181 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 00182 } else { 00183 // 32 bit PowerPC wants to expand i64 shifts itself. 00184 setOperationAction(ISD::SHL, MVT::i64, Custom); 00185 setOperationAction(ISD::SRL, MVT::i64, Custom); 00186 setOperationAction(ISD::SRA, MVT::i64, Custom); 00187 } 00188 00189 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 00190 // First set operation action for all vector types to expand. Then we 00191 // will selectively turn on ones that can be effectively codegen'd. 00192 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 00193 VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 00194 // add/sub are legal for all supported vector VT's. 00195 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); 00196 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); 00197 00198 // We promote all shuffles to v16i8. 00199 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); 00200 AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); 00201 00202 // We promote all non-typed operations to v4i32. 00203 setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote); 00204 AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32); 00205 setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote); 00206 AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32); 00207 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote); 00208 AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32); 00209 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote); 00210 AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32); 00211 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 00212 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32); 00213 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote); 00214 AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32); 00215 00216 // No other operations are legal. 00217 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 00218 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 00219 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 00220 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 00221 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 00222 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 00223 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 00224 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 00225 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); 00226 00227 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); 00228 } 00229 00230 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 00231 // with merges, splats, etc. 00232 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 00233 00234 setOperationAction(ISD::AND , MVT::v4i32, Legal); 00235 setOperationAction(ISD::OR , MVT::v4i32, Legal); 00236 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 00237 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 00238 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 00239 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 00240 00241 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 00242 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 00243 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 00244 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 00245 00246 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 00247 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 00248 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 00249 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 00250 00251 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 00252 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 00253 00254 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 00255 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 00256 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 00257 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 00258 } 00259 00260 setSetCCResultType(MVT::i32); 00261 setShiftAmountType(MVT::i32); 00262 setSetCCResultContents(ZeroOrOneSetCCResult); 00263 setStackPointerRegisterToSaveRestore(PPC::R1); 00264 00265 // We have target-specific dag combine patterns for the following nodes: 00266 setTargetDAGCombine(ISD::SINT_TO_FP); 00267 setTargetDAGCombine(ISD::STORE); 00268 setTargetDAGCombine(ISD::BR_CC); 00269 setTargetDAGCombine(ISD::BSWAP); 00270 00271 computeRegisterProperties(); 00272 } 00273 00274 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 00275 switch (Opcode) { 00276 default: return 0; 00277 case PPCISD::FSEL: return "PPCISD::FSEL"; 00278 case PPCISD::FCFID: return "PPCISD::FCFID"; 00279 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 00280 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 00281 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 00282 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 00283 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 00284 case PPCISD::VPERM: return "PPCISD::VPERM"; 00285 case PPCISD::Hi: return "PPCISD::Hi"; 00286 case PPCISD::Lo: return "PPCISD::Lo"; 00287 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 00288 case PPCISD::SRL: return "PPCISD::SRL"; 00289 case PPCISD::SRA: return "PPCISD::SRA"; 00290 case PPCISD::SHL: return "PPCISD::SHL"; 00291 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 00292 case PPCISD::STD_32: return "PPCISD::STD_32"; 00293 case PPCISD::CALL: return "PPCISD::CALL"; 00294 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 00295 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 00296 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 00297 case PPCISD::MFCR: return "PPCISD::MFCR"; 00298 case PPCISD::VCMP: return "PPCISD::VCMP"; 00299 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 00300 case PPCISD::LBRX: return "PPCISD::LBRX"; 00301 case PPCISD::STBRX: return "PPCISD::STBRX"; 00302 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 00303 } 00304 } 00305 00306 //===----------------------------------------------------------------------===// 00307 // Node matching predicates, for use by the tblgen matching code. 00308 //===----------------------------------------------------------------------===// 00309 00310 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 00311 static bool isFloatingPointZero(SDOperand Op) { 00312 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 00313 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 00314 else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) { 00315 // Maybe this has already been legalized into the constant pool? 00316 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 00317 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get())) 00318 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 00319 } 00320 return false; 00321 } 00322 00323 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 00324 /// true if Op is undef or if it matches the specified value. 00325 static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 00326 return Op.getOpcode() == ISD::UNDEF || 00327 cast<ConstantSDNode>(Op)->getValue() == Val; 00328 } 00329 00330 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 00331 /// VPKUHUM instruction. 00332 bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 00333 if (!isUnary) { 00334 for (unsigned i = 0; i != 16; ++i) 00335 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 00336 return false; 00337 } else { 00338 for (unsigned i = 0; i != 8; ++i) 00339 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 00340 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 00341 return false; 00342 } 00343 return true; 00344 } 00345 00346 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 00347 /// VPKUWUM instruction. 00348 bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 00349 if (!isUnary) { 00350 for (unsigned i = 0; i != 16; i += 2) 00351 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 00352 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 00353 return false; 00354 } else { 00355 for (unsigned i = 0; i != 8; i += 2) 00356 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 00357 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 00358 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 00359 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 00360 return false; 00361 } 00362 return true; 00363 } 00364 00365 /// isVMerge - Common function, used to match vmrg* shuffles. 00366 /// 00367 static bool isVMerge(SDNode *N, unsigned UnitSize, 00368 unsigned LHSStart, unsigned RHSStart) { 00369 assert(N->getOpcode() == ISD::BUILD_VECTOR && 00370 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 00371 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 00372 "Unsupported merge size!"); 00373 00374 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 00375 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 00376 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 00377 LHSStart+j+i*UnitSize) || 00378 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 00379 RHSStart+j+i*UnitSize)) 00380 return false; 00381 } 00382 return true; 00383 } 00384 00385 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 00386 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 00387 bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 00388 if (!isUnary) 00389 return isVMerge(N, UnitSize, 8, 24); 00390 return isVMerge(N, UnitSize, 8, 8); 00391 } 00392 00393 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 00394 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 00395 bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 00396 if (!isUnary) 00397 return isVMerge(N, UnitSize, 0, 16); 00398 return isVMerge(N, UnitSize, 0, 0); 00399 } 00400 00401 00402 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 00403 /// amount, otherwise return -1. 00404 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 00405 assert(N->getOpcode() == ISD::BUILD_VECTOR && 00406 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 00407 // Find the first non-undef value in the shuffle mask. 00408 unsigned i; 00409 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 00410 /*search*/; 00411 00412 if (i == 16) return -1; // all undef. 00413 00414 // Otherwise, check to see if the rest of the elements are consequtively 00415 // numbered from this value. 00416 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 00417 if (ShiftAmt < i) return -1; 00418 ShiftAmt -= i; 00419 00420 if (!isUnary) { 00421 // Check the rest of the elements to see if they are consequtive. 00422 for (++i; i != 16; ++i) 00423 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 00424 return -1; 00425 } else { 00426 // Check the rest of the elements to see if they are consequtive. 00427 for (++i; i != 16; ++i) 00428 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 00429 return -1; 00430 } 00431 00432 return ShiftAmt; 00433 } 00434 00435 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 00436 /// specifies a splat of a single element that is suitable for input to 00437 /// VSPLTB/VSPLTH/VSPLTW. 00438 bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 00439 assert(N->getOpcode() == ISD::BUILD_VECTOR && 00440 N->getNumOperands() == 16 && 00441 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 00442 00443 // This is a splat operation if each element of the permute is the same, and 00444 // if the value doesn't reference the second vector. 00445 unsigned ElementBase = 0; 00446 SDOperand Elt = N->getOperand(0); 00447 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 00448 ElementBase = EltV->getValue(); 00449 else 00450 return false; // FIXME: Handle UNDEF elements too! 00451 00452 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 00453 return false; 00454 00455 // Check that they are consequtive. 00456 for (unsigned i = 1; i != EltSize; ++i) { 00457 if (!isa<ConstantSDNode>(N->getOperand(i)) || 00458 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 00459 return false; 00460 } 00461 00462 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 00463 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 00464 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 00465 assert(isa<ConstantSDNode>(N->getOperand(i)) && 00466 "Invalid VECTOR_SHUFFLE mask!"); 00467 for (unsigned j = 0; j != EltSize; ++j) 00468 if (N->getOperand(i+j) != N->getOperand(j)) 00469 return false; 00470 } 00471 00472 return true; 00473 } 00474 00475 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 00476 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 00477 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 00478 assert(isSplatShuffleMask(N, EltSize)); 00479 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 00480 } 00481 00482 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 00483 /// by using a vspltis[bhw] instruction of the specified element size, return 00484 /// the constant being splatted. The ByteSize field indicates the number of 00485 /// bytes of each element [124] -> [bhw]. 00486 SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 00487 SDOperand OpVal(0, 0); 00488 00489 // If ByteSize of the splat is bigger than the element size of the 00490 // build_vector, then we have a case where we are checking for a splat where 00491 // multiple elements of the buildvector are folded together into a single 00492 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 00493 unsigned EltSize = 16/N->getNumOperands(); 00494 if (EltSize < ByteSize) { 00495 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 00496 SDOperand UniquedVals[4]; 00497 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 00498 00499 // See if all of the elements in the buildvector agree across. 00500 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 00501 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 00502 // If the element isn't a constant, bail fully out. 00503 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand(); 00504 00505 00506 if (UniquedVals[i&(Multiple-1)].Val == 0) 00507 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 00508 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 00509 return SDOperand(); // no match. 00510 } 00511 00512 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 00513 // either constant or undef values that are identical for each chunk. See 00514 // if these chunks can form into a larger vspltis*. 00515 00516 // Check to see if all of the leading entries are either 0 or -1. If 00517 // neither, then this won't fit into the immediate field. 00518 bool LeadingZero = true; 00519 bool LeadingOnes = true; 00520 for (unsigned i = 0; i != Multiple-1; ++i) { 00521 if (UniquedVals[i].Val == 0) continue; // Must have been undefs. 00522 00523 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 00524 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 00525 } 00526 // Finally, check the least significant entry. 00527 if (LeadingZero) { 00528 if (UniquedVals[Multiple-1].Val == 0) 00529 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 00530 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 00531 if (Val < 16) 00532 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 00533 } 00534 if (LeadingOnes) { 00535 if (UniquedVals[Multiple-1].Val == 0) 00536 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 00537 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 00538 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 00539 return DAG.getTargetConstant(Val, MVT::i32); 00540 } 00541 00542 return SDOperand(); 00543 } 00544 00545 // Check to see if this buildvec has a single non-undef value in its elements. 00546 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 00547 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 00548 if (OpVal.Val == 0) 00549 OpVal = N->getOperand(i); 00550 else if (OpVal != N->getOperand(i)) 00551 return SDOperand(); 00552 } 00553 00554 if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def. 00555 00556 unsigned ValSizeInBytes = 0; 00557 uint64_t Value = 0; 00558 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 00559 Value = CN->getValue(); 00560 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; 00561 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 00562 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 00563 Value = FloatToBits(CN->getValue()); 00564 ValSizeInBytes = 4; 00565 } 00566 00567 // If the splat value is larger than the element value, then we can never do 00568 // this splat. The only case that we could fit the replicated bits into our 00569 // immediate field for would be zero, and we prefer to use vxor for it. 00570 if (ValSizeInBytes < ByteSize) return SDOperand(); 00571 00572 // If the element value is larger than the splat value, cut it in half and 00573 // check to see if the two halves are equal. Continue doing this until we 00574 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 00575 while (ValSizeInBytes > ByteSize) { 00576 ValSizeInBytes >>= 1; 00577 00578 // If the top half equals the bottom half, we're still ok. 00579 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 00580 (Value & ((1 << (8*ValSizeInBytes))-1))) 00581 return SDOperand(); 00582 } 00583 00584 // Properly sign extend the value. 00585 int ShAmt = (4-ByteSize)*8; 00586 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 00587 00588 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 00589 if (MaskVal == 0) return SDOperand(); 00590 00591 // Finally, if this value fits in a 5 bit sext field, return it 00592 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 00593 return DAG.getTargetConstant(MaskVal, MVT::i32); 00594 return SDOperand(); 00595 } 00596 00597 //===----------------------------------------------------------------------===// 00598 // LowerOperation implementation 00599 //===----------------------------------------------------------------------===// 00600 00601 static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 00602 MVT::ValueType PtrVT = Op.getValueType(); 00603 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 00604 Constant *C = CP->get(); 00605 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 00606 SDOperand Zero = DAG.getConstant(0, PtrVT); 00607 00608 const TargetMachine &TM = DAG.getTarget(); 00609 00610 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero); 00611 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero); 00612 00613 // If this is a non-darwin platform, we don't support non-static relo models 00614 // yet. 00615 if (TM.getRelocationModel() == Reloc::Static || 00616 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 00617 // Generate non-pic code that has direct accesses to the constant pool. 00618 // The address of the global is just (hi(&g)+lo(&g)). 00619 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 00620 } 00621 00622 if (TM.getRelocationModel() == Reloc::PIC_) { 00623 // With PIC, the first instruction is actually "GR+hi(&G)". 00624 Hi = DAG.getNode(ISD::ADD, PtrVT, 00625 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 00626 } 00627 00628 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 00629 return Lo; 00630 } 00631 00632 static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 00633 MVT::ValueType PtrVT = Op.getValueType(); 00634 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 00635 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 00636 SDOperand Zero = DAG.getConstant(0, PtrVT); 00637 00638 const TargetMachine &TM = DAG.getTarget(); 00639 00640 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero); 00641 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero); 00642 00643 // If this is a non-darwin platform, we don't support non-static relo models 00644 // yet. 00645 if (TM.getRelocationModel() == Reloc::Static || 00646 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 00647 // Generate non-pic code that has direct accesses to the constant pool. 00648 // The address of the global is just (hi(&g)+lo(&g)). 00649 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 00650 } 00651 00652 if (TM.getRelocationModel() == Reloc::PIC_) { 00653 // With PIC, the first instruction is actually "GR+hi(&G)". 00654 Hi = DAG.getNode(ISD::ADD, PtrVT, 00655 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 00656 } 00657 00658 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 00659 return Lo; 00660 } 00661 00662 static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 00663 MVT::ValueType PtrVT = Op.getValueType(); 00664 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 00665 GlobalValue *GV = GSDN->getGlobal(); 00666 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 00667 SDOperand Zero = DAG.getConstant(0, PtrVT); 00668 00669 const TargetMachine &TM = DAG.getTarget(); 00670 00671 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero); 00672 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero); 00673 00674 // If this is a non-darwin platform, we don't support non-static relo models 00675 // yet. 00676 if (TM.getRelocationModel() == Reloc::Static || 00677 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 00678 // Generate non-pic code that has direct accesses to globals. 00679 // The address of the global is just (hi(&g)+lo(&g)). 00680 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 00681 } 00682 00683 if (TM.getRelocationModel() == Reloc::PIC_) { 00684 // With PIC, the first instruction is actually "GR+hi(&G)". 00685 Hi = DAG.getNode(ISD::ADD, PtrVT, 00686 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 00687 } 00688 00689 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 00690 00691 if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() && 00692 (!GV->isExternal() || GV->hasNotBeenReadFromBytecode())) 00693 return Lo; 00694 00695 // If the global is weak or external, we have to go through the lazy 00696 // resolution stub. 00697 return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, DAG.getSrcValue(0)); 00698 } 00699 00700 static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 00701 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 00702 00703 // If we're comparing for equality to zero, expose the fact that this is 00704 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 00705 // fold the new nodes. 00706 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 00707 if (C->isNullValue() && CC == ISD::SETEQ) { 00708 MVT::ValueType VT = Op.getOperand(0).getValueType(); 00709 SDOperand Zext = Op.getOperand(0); 00710 if (VT < MVT::i32) { 00711 VT = MVT::i32; 00712 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 00713 } 00714 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT)); 00715 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 00716 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 00717 DAG.getConstant(Log2b, MVT::i32)); 00718 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 00719 } 00720 // Leave comparisons against 0 and -1 alone for now, since they're usually 00721 // optimized. FIXME: revisit this when we can custom lower all setcc 00722 // optimizations. 00723 if (C->isAllOnesValue() || C->isNullValue()) 00724 return SDOperand(); 00725 } 00726 00727 // If we have an integer seteq/setne, turn it into a compare against zero 00728 // by subtracting the rhs from the lhs, which is faster than setting a 00729 // condition register, reading it back out, and masking the correct bit. 00730 MVT::ValueType LHSVT = Op.getOperand(0).getValueType(); 00731 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 00732 MVT::ValueType VT = Op.getValueType(); 00733 SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0), 00734 Op.getOperand(1)); 00735 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 00736 } 00737 return SDOperand(); 00738 } 00739 00740 static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, 00741 unsigned VarArgsFrameIndex) { 00742 // vastart just stores the address of the VarArgsFrameIndex slot into the 00743 // memory location argument. 00744 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 00745 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 00746 Op.getOperand(1), Op.getOperand(2)); 00747 } 00748 00749 static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, 00750 int &VarArgsFrameIndex) { 00751 // TODO: add description of PPC stack frame format, or at least some docs. 00752 // 00753 MachineFunction &MF = DAG.getMachineFunction(); 00754 MachineFrameInfo *MFI = MF.getFrameInfo(); 00755 SSARegMap *RegMap = MF.getSSARegMap(); 00756 std::vector<SDOperand> ArgValues; 00757 SDOperand Root = Op.getOperand(0); 00758 00759 unsigned ArgOffset = 24; 00760 const unsigned Num_GPR_Regs = 8; 00761 const unsigned Num_FPR_Regs = 13; 00762 const unsigned Num_VR_Regs = 12; 00763 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 00764 00765 static const unsigned GPR_32[] = { // 32-bit registers. 00766 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 00767 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 00768 }; 00769 static const unsigned GPR_64[] = { // 64-bit registers. 00770 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 00771 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 00772 }; 00773 static const unsigned FPR[] = { 00774 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 00775 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 00776 }; 00777 static const unsigned VR[] = { 00778 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 00779 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 00780 }; 00781 00782 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 00783 bool isPPC64 = PtrVT == MVT::i64; 00784 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 00785 00786 // Add DAG nodes to load the arguments or copy them out of registers. On 00787 // entry to a function on PPC, the arguments start at offset 24, although the 00788 // first ones are often in registers. 00789 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { 00790 SDOperand ArgVal; 00791 bool needsLoad = false; 00792 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); 00793 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8; 00794 00795 unsigned CurArgOffset = ArgOffset; 00796 switch (ObjectVT) { 00797 default: assert(0 && "Unhandled argument type!"); 00798 case MVT::i32: 00799 // All int arguments reserve stack space. 00800 ArgOffset += isPPC64 ? 8 : 4; 00801 00802 if (GPR_idx != Num_GPR_Regs) { 00803 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 00804 MF.addLiveIn(GPR[GPR_idx], VReg); 00805 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); 00806 ++GPR_idx; 00807 } else { 00808 needsLoad = true; 00809 } 00810 break; 00811 case MVT::i64: // PPC64 00812 // All int arguments reserve stack space. 00813 ArgOffset += 8; 00814 00815 if (GPR_idx != Num_GPR_Regs) { 00816 unsigned VReg = RegMap->createVirtualRegister(&PPC::G8RCRegClass); 00817 MF.addLiveIn(GPR[GPR_idx], VReg); 00818 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); 00819 ++GPR_idx; 00820 } else { 00821 needsLoad = true; 00822 } 00823 break; 00824 case MVT::f32: 00825 case MVT::f64: 00826 // All FP arguments reserve stack space. 00827 ArgOffset += ObjSize; 00828 00829 // Every 4 bytes of argument space consumes one of the GPRs available for 00830 // argument passing. 00831 if (GPR_idx != Num_GPR_Regs) { 00832 ++GPR_idx; 00833 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs) 00834 ++GPR_idx; 00835 } 00836 if (FPR_idx != Num_FPR_Regs) { 00837 unsigned VReg; 00838 if (ObjectVT == MVT::f32) 00839 VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass); 00840 else 00841 VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass); 00842 MF.addLiveIn(FPR[FPR_idx], VReg); 00843 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 00844 ++FPR_idx; 00845 } else { 00846 needsLoad = true; 00847 } 00848 break; 00849 case MVT::v4f32: 00850 case MVT::v4i32: 00851 case MVT::v8i16: 00852 case MVT::v16i8: 00853 // Note that vector arguments in registers don't reserve stack space. 00854 if (VR_idx != Num_VR_Regs) { 00855 unsigned VReg = RegMap->createVirtualRegister(&PPC::VRRCRegClass); 00856 MF.addLiveIn(VR[VR_idx], VReg); 00857 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 00858 ++VR_idx; 00859 } else { 00860 // This should be simple, but requires getting 16-byte aligned stack 00861 // values. 00862 assert(0 && "Loading VR argument not implemented yet!"); 00863 needsLoad = true; 00864 } 00865 break; 00866 } 00867 00868 // We need to load the argument to a virtual register if we determined above 00869 // that we ran out of physical registers of the appropriate type 00870 if (needsLoad) { 00871 // If the argument is actually used, emit a load from the right stack 00872 // slot. 00873 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { 00874 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); 00875 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 00876 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, 00877 DAG.getSrcValue(NULL)); 00878 } else { 00879 // Don't emit a dead load. 00880 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT); 00881 } 00882 } 00883 00884 ArgValues.push_back(ArgVal); 00885 } 00886 00887 // If the function takes variable number of arguments, make a frame index for 00888 // the start of the first vararg value... for expansion of llvm.va_start. 00889 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 00890 if (isVarArg) { 00891 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, 00892 ArgOffset); 00893 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 00894 // If this function is vararg, store any remaining integer argument regs 00895 // to their spots on the stack so that they may be loaded by deferencing the 00896 // result of va_next. 00897 std::vector<SDOperand> MemOps; 00898 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 00899 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 00900 MF.addLiveIn(GPR[GPR_idx], VReg); 00901 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 00902 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 00903 Val, FIN, DAG.getSrcValue(NULL)); 00904 MemOps.push_back(Store); 00905 // Increment the address by four for the next argument to store 00906 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); 00907 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 00908 } 00909 if (!MemOps.empty()) 00910 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps); 00911 } 00912 00913 ArgValues.push_back(Root); 00914 00915 // Return the new list of results. 00916 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(), 00917 Op.Val->value_end()); 00918 return DAG.getNode(ISD::MERGE_VALUES, RetVT, ArgValues); 00919 } 00920 00921 /// isCallCompatibleAddress - Return the immediate to use if the specified 00922 /// 32-bit value is representable in the immediate field of a BxA instruction. 00923 static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) { 00924 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 00925 if (!C) return 0; 00926 00927 int Addr = C->getValue(); 00928 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 00929 (Addr << 6 >> 6) != Addr) 00930 return 0; // Top 6 bits have to be sext of immediate. 00931 00932 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val; 00933 } 00934 00935 00936 static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) { 00937 SDOperand Chain = Op.getOperand(0); 00938 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 00939 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 00940 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 00941 SDOperand Callee = Op.getOperand(4); 00942 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 00943 00944 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 00945 bool isPPC64 = PtrVT == MVT::i64; 00946 unsigned PtrByteSize = isPPC64 ? 8 : 4; 00947 00948 00949 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 00950 // SelectExpr to use to put the arguments in the appropriate registers. 00951 std::vector<SDOperand> args_to_use; 00952 00953 // Count how many bytes are to be pushed on the stack, including the linkage 00954 // area, and parameter passing area. We start with 24/48 bytes, which is 00955 // prereserved space for [SP][CR][LR][3 x unused]. 00956 unsigned NumBytes = 6*PtrByteSize; 00957 00958 // Add up all the space actually used. 00959 for (unsigned i = 0; i != NumOps; ++i) 00960 NumBytes += MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8; 00961 00962 // The prolog code of the callee may store up to 8 GPR argument registers to 00963 // the stack, allowing va_start to index over them in memory if its varargs. 00964 // Because we cannot tell if this is needed on the caller side, we have to 00965 // conservatively assume that it is needed. As such, make sure we have at 00966 // least enough stack space for the caller to store the 8 GPRs. 00967 if (NumBytes < 6*PtrByteSize+8*PtrByteSize) 00968 NumBytes = 6*PtrByteSize+8*PtrByteSize; 00969 00970 // Adjust the stack pointer for the new arguments... 00971 // These operations are automatically eliminated by the prolog/epilog pass 00972 Chain = DAG.getCALLSEQ_START(Chain, 00973 DAG.getConstant(NumBytes, PtrVT)); 00974 00975 // Set up a copy of the stack pointer for use loading and storing any 00976 // arguments that may not fit in the registers available for argument 00977 // passing. 00978 SDOperand StackPtr; 00979 if (isPPC64) 00980 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 00981 else 00982 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 00983 00984 // Figure out which arguments are going to go in registers, and which in 00985 // memory. Also, if this is a vararg function, floating point operations 00986 // must be stored to our stack, and loaded into integer regs as well, if 00987 // any integer regs are available for argument passing. 00988 unsigned ArgOffset = 6*PtrByteSize; 00989 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 00990 static const unsigned GPR_32[] = { // 32-bit registers. 00991 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 00992 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 00993 }; 00994 static const unsigned GPR_64[] = { // 64-bit registers. 00995 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 00996 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 00997 }; 00998 static const unsigned FPR[] = { 00999 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 01000 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 01001 }; 01002 static const unsigned VR[] = { 01003 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 01004 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 01005 }; 01006 const unsigned NumGPRs = sizeof(GPR_32)/sizeof(GPR_32[0]); 01007 const unsigned NumFPRs = sizeof(FPR)/sizeof(FPR[0]); 01008 const unsigned NumVRs = sizeof( VR)/sizeof( VR[0]); 01009 01010 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 01011 01012 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 01013 std::vector<SDOperand> MemOpChains; 01014 for (unsigned i = 0; i != NumOps; ++i) { 01015 SDOperand Arg = Op.getOperand(5+2*i); 01016 01017 // PtrOff will be used to store the current argument to the stack if a 01018 // register cannot be found for it. 01019 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 01020 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); 01021 01022 // On PPC64, promote integers to 64-bit values. 01023 if (isPPC64 && Arg.getValueType() == MVT::i32) { 01024 unsigned ExtOp = ISD::ZERO_EXTEND; 01025 if (cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue()) 01026 ExtOp = ISD::SIGN_EXTEND; 01027 Arg = DAG.getNode(ExtOp, MVT::i64, Arg); 01028 } 01029 01030 switch (Arg.getValueType()) { 01031 default: assert(0 && "Unexpected ValueType for argument!"); 01032 case MVT::i32: 01033 case MVT::i64: 01034 if (GPR_idx != NumGPRs) { 01035 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 01036 } else { 01037 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 01038 Arg, PtrOff, DAG.getSrcValue(NULL))); 01039 } 01040 ArgOffset += PtrByteSize; 01041 break; 01042 case MVT::f32: 01043 case MVT::f64: 01044 if (FPR_idx != NumFPRs) { 01045 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 01046 01047 if (isVarArg) { 01048 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain, 01049 Arg, PtrOff, 01050 DAG.getSrcValue(NULL)); 01051 MemOpChains.push_back(Store); 01052 01053 // Float varargs are always shadowed in available integer registers 01054 if (GPR_idx != NumGPRs) { 01055 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, 01056 DAG.getSrcValue(NULL)); 01057 MemOpChains.push_back(Load.getValue(1)); 01058 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 01059 } 01060 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64) { 01061 SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 01062 PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour); 01063 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, 01064 DAG.getSrcValue(NULL)); 01065 MemOpChains.push_back(Load.getValue(1)); 01066 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 01067 } 01068 } else { 01069 // If we have any FPRs remaining, we may also have GPRs remaining. 01070 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 01071 // GPRs. 01072 if (GPR_idx != NumGPRs) 01073 ++GPR_idx; 01074 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64) 01075 ++GPR_idx; 01076 } 01077 } else { 01078 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 01079 Arg, PtrOff, DAG.getSrcValue(NULL))); 01080 } 01081 if (isPPC64) 01082 ArgOffset += 8; 01083 else 01084 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 01085 break; 01086 case MVT::v4f32: 01087 case MVT::v4i32: 01088 case MVT::v8i16: 01089 case MVT::v16i8: 01090 assert(!isVarArg && "Don't support passing vectors to varargs yet!"); 01091 assert(VR_idx != NumVRs && 01092 "Don't support passing more than 12 vector args yet!"); 01093 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 01094 break; 01095 } 01096 } 01097 if (!MemOpChains.empty()) 01098 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains); 01099 01100 // Build a sequence of copy-to-reg nodes chained together with token chain 01101 // and flag operands which copy the outgoing args into the appropriate regs. 01102 SDOperand InFlag; 01103 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 01104 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 01105 InFlag); 01106 InFlag = Chain.getValue(1); 01107 } 01108 01109 std::vector<MVT::ValueType> NodeTys; 01110 NodeTys.push_back(MVT::Other); // Returns a chain 01111 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 01112 01113 std::vector<SDOperand> Ops; 01114 unsigned CallOpc = PPCISD::CALL; 01115 01116 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 01117 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 01118 // node so that legalize doesn't hack it. 01119 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 01120 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); 01121 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 01122 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); 01123 else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) 01124 // If this is an absolute destination address, use the munged value. 01125 Callee = SDOperand(Dest, 0); 01126 else { 01127 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 01128 // to do the call, we can't use PPCISD::CALL. 01129 Ops.push_back(Chain); 01130 Ops.push_back(Callee); 01131 01132 if (InFlag.Val) 01133 Ops.push_back(InFlag); 01134 Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, Ops); 01135 InFlag = Chain.getValue(1); 01136 01137 // Copy the callee address into R12 on darwin. 01138 Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag); 01139 InFlag = Chain.getValue(1); 01140 01141 NodeTys.clear(); 01142 NodeTys.push_back(MVT::Other); 01143 NodeTys.push_back(MVT::Flag); 01144 Ops.clear(); 01145 Ops.push_back(Chain); 01146 CallOpc = PPCISD::BCTRL; 01147 Callee.Val = 0; 01148 } 01149 01150 // If this is a direct call, pass the chain and the callee. 01151 if (Callee.Val) { 01152 Ops.push_back(Chain); 01153 Ops.push_back(Callee); 01154 } 01155 01156 // Add argument registers to the end of the list so that they are known live 01157 // into the call. 01158 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 01159 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 01160 RegsToPass[i].second.getValueType())); 01161 01162 if (InFlag.Val) 01163 Ops.push_back(InFlag); 01164 Chain = DAG.getNode(CallOpc, NodeTys, Ops); 01165 InFlag = Chain.getValue(1); 01166 01167 std::vector<SDOperand> ResultVals; 01168 NodeTys.clear(); 01169 01170 // If the call has results, copy the values out of the ret val registers. 01171 switch (Op.Val->getValueType(0)) { 01172 default: assert(0 && "Unexpected ret value!"); 01173 case MVT::Other: break; 01174 case MVT::i32: 01175 if (Op.Val->getValueType(1) == MVT::i32) { 01176 Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, InFlag).getValue(1); 01177 ResultVals.push_back(Chain.getValue(0)); 01178 Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, 01179 Chain.getValue(2)).getValue(1); 01180 ResultVals.push_back(Chain.getValue(0)); 01181 NodeTys.push_back(MVT::i32); 01182 } else { 01183 Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1); 01184 ResultVals.push_back(Chain.getValue(0)); 01185 } 01186 NodeTys.push_back(MVT::i32); 01187 break; 01188 case MVT::i64: 01189 Chain = DAG.getCopyFromReg(Chain, PPC::X3, MVT::i64, InFlag).getValue(1); 01190 ResultVals.push_back(Chain.getValue(0)); 01191 NodeTys.push_back(MVT::i64); 01192 break; 01193 case MVT::f32: 01194 case MVT::f64: 01195 Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0), 01196 InFlag).getValue(1); 01197 ResultVals.push_back(Chain.getValue(0)); 01198 NodeTys.push_back(Op.Val->getValueType(0)); 01199 break; 01200 case MVT::v4f32: 01201 case MVT::v4i32: 01202 case MVT::v8i16: 01203 case MVT::v16i8: 01204 Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0), 01205 InFlag).getValue(1); 01206 ResultVals.push_back(Chain.getValue(0)); 01207 NodeTys.push_back(Op.Val->getValueType(0)); 01208 break; 01209 } 01210 01211 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, 01212 DAG.getConstant(NumBytes, PtrVT)); 01213 NodeTys.push_back(MVT::Other); 01214 01215 // If the function returns void, just return the chain. 01216 if (ResultVals.empty()) 01217 return Chain; 01218 01219 // Otherwise, merge everything together with a MERGE_VALUES node. 01220 ResultVals.push_back(Chain); 01221 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals); 01222 return Res.getValue(Op.ResNo); 01223 } 01224 01225 static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) { 01226 SDOperand Copy; 01227 switch(Op.getNumOperands()) { 01228 default: 01229 assert(0 && "Do not know how to return this many arguments!"); 01230 abort(); 01231 case 1: 01232 return SDOperand(); // ret void is legal 01233 case 3: { 01234 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 01235 unsigned ArgReg; 01236 if (ArgVT == MVT::i32) { 01237 ArgReg = PPC::R3; 01238 } else if (ArgVT == MVT::i64) { 01239 ArgReg = PPC::X3; 01240 } else if (MVT::isFloatingPoint(ArgVT)) { 01241 ArgReg = PPC::F1; 01242 } else { 01243 assert(MVT::isVector(ArgVT)); 01244 ArgReg = PPC::V2; 01245 } 01246 01247 Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1), 01248 SDOperand()); 01249 01250 // If we haven't noted the R3/F1 are live out, do so now. 01251 if (DAG.getMachineFunction().liveout_empty()) 01252 DAG.getMachineFunction().addLiveOut(ArgReg); 01253 break; 01254 } 01255 case 5: 01256 Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(3), 01257 SDOperand()); 01258 Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1)); 01259 // If we haven't noted the R3+R4 are live out, do so now. 01260 if (DAG.getMachineFunction().liveout_empty()) { 01261 DAG.getMachineFunction().addLiveOut(PPC::R3); 01262 DAG.getMachineFunction().addLiveOut(PPC::R4); 01263 } 01264 break; 01265 } 01266 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); 01267 } 01268 01269 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 01270 /// possible. 01271 static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { 01272 // Not FP? Not a fsel. 01273 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || 01274 !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) 01275 return SDOperand(); 01276 01277 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 01278 01279 // Cannot handle SETEQ/SETNE. 01280 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand(); 01281 01282 MVT::ValueType ResVT = Op.getValueType(); 01283 MVT::ValueType CmpVT = Op.getOperand(0).getValueType(); 01284 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 01285 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 01286 01287 // If the RHS of the comparison is a 0.0, we don't need to do the 01288 // subtraction at all. 01289 if (isFloatingPointZero(RHS)) 01290 switch (CC) { 01291 default: break; // SETUO etc aren't handled by fsel. 01292 case ISD::SETULT: 01293 case ISD::SETOLT: 01294 case ISD::SETLT: 01295 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 01296 case ISD::SETUGE: 01297 case ISD::SETOGE: 01298 case ISD::SETGE: 01299 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 01300 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 01301 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 01302 case ISD::SETUGT: 01303 case ISD::SETOGT: 01304 case ISD::SETGT: 01305 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 01306 case ISD::SETULE: 01307 case ISD::SETOLE: 01308 case ISD::SETLE: 01309 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 01310 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 01311 return DAG.getNode(PPCISD::FSEL, ResVT, 01312 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 01313 } 01314 01315 SDOperand Cmp; 01316 switch (CC) { 01317 default: break; // SETUO etc aren't handled by fsel. 01318 case ISD::SETULT: 01319 case ISD::SETOLT: 01320 case ISD::SETLT: 01321 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 01322 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 01323 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 01324 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 01325 case ISD::SETUGE: 01326 case ISD::SETOGE: 01327 case ISD::SETGE: 01328 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 01329 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 01330 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 01331 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 01332 case ISD::SETUGT: 01333 case ISD::SETOGT: 01334 case ISD::SETGT: 01335 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 01336 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 01337 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 01338 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 01339 case ISD::SETULE: 01340 case ISD::SETOLE: 01341 case ISD::SETLE: 01342 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 01343 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 01344 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 01345 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 01346 } 01347 return SDOperand(); 01348 } 01349 01350 static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 01351 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); 01352 SDOperand Src = Op.getOperand(0); 01353 if (Src.getValueType() == MVT::f32) 01354 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 01355 01356 SDOperand Tmp; 01357 switch (Op.getValueType()) { 01358 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 01359 case MVT::i32: 01360 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 01361 break; 01362 case MVT::i64: 01363 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 01364 break; 01365 } 01366 01367 // Convert the FP value to an int value through memory. 01368 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp); 01369 if (Op.getValueType() == MVT::i32) 01370 Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits); 01371 return Bits; 01372 } 01373 01374 static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 01375 if (Op.getOperand(0).getValueType() == MVT::i64) { 01376 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 01377 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 01378 if (Op.getValueType() == MVT::f32) 01379 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 01380 return FP; 01381 } 01382 01383 assert(Op.getOperand(0).getValueType() == MVT::i32 && 01384 "Unhandled SINT_TO_FP type in custom expander!"); 01385 // Since we only generate this in 64-bit mode, we can take advantage of 01386 // 64-bit registers. In particular, sign extend the input value into the 01387 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 01388 // then lfd it and fcfid it. 01389 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 01390 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 01391 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 01392 01393 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 01394 Op.getOperand(0)); 01395 01396 // STD the extended value into the stack slot. 01397 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 01398 DAG.getEntryNode(), Ext64, FIdx, 01399 DAG.getSrcValue(NULL)); 01400 // Load the value as a double. 01401 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL)); 01402 01403 // FCFID it and return it. 01404 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 01405 if (Op.getValueType() == MVT::f32) 01406 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 01407 return FP; 01408 } 01409 01410 static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG, 01411 MVT::ValueType PtrVT) { 01412 assert(Op.getValueType() == MVT::i64 && 01413 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 01414 // The generic code does a fine job expanding shift by a constant. 01415 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 01416 01417 // Otherwise, expand into a bunch of logical ops. Note that these ops 01418 // depend on the PPC behavior for oversized shift amounts. 01419 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 01420 DAG.getConstant(0, PtrVT)); 01421 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 01422 DAG.getConstant(1, PtrVT)); 01423 SDOperand Amt = Op.getOperand(1); 01424 01425 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 01426 DAG.getConstant(32, MVT::i32), Amt); 01427 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt); 01428 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1); 01429 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 01430 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 01431 DAG.getConstant(-32U, MVT::i32)); 01432 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5); 01433 SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 01434 SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt); 01435 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 01436 } 01437 01438 static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG, 01439 MVT::ValueType PtrVT) { 01440 assert(Op.getValueType() == MVT::i64 && 01441 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 01442 // The generic code does a fine job expanding shift by a constant. 01443 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 01444 01445 // Otherwise, expand into a bunch of logical ops. Note that these ops 01446 // depend on the PPC behavior for oversized shift amounts. 01447 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 01448 DAG.getConstant(0, PtrVT)); 01449 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 01450 DAG.getConstant(1, PtrVT)); 01451 SDOperand Amt = Op.getOperand(1); 01452 01453 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 01454 DAG.getConstant(32, MVT::i32), Amt); 01455 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 01456 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 01457 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 01458 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 01459 DAG.getConstant(-32U, MVT::i32)); 01460 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5); 01461 SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 01462 SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt); 01463 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 01464 } 01465 01466 static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG, 01467 MVT::ValueType PtrVT) { 01468 assert(Op.getValueType() == MVT::i64 && 01469 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!"); 01470 // The generic code does a fine job expanding shift by a constant. 01471 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 01472 01473 // Otherwise, expand into a bunch of logical ops, followed by a select_cc. 01474 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 01475 DAG.getConstant(0, PtrVT)); 01476 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 01477 DAG.getConstant(1, PtrVT)); 01478 SDOperand Amt = Op.getOperand(1); 01479 01480 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 01481 DAG.getConstant(32, MVT::i32), Amt); 01482 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 01483 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 01484 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 01485 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 01486 DAG.getConstant(-32U, MVT::i32)); 01487 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5); 01488 SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt); 01489 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32), 01490 Tmp4, Tmp6, ISD::SETLE); 01491 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 01492 } 01493 01494 //===----------------------------------------------------------------------===// 01495 // Vector related lowering. 01496 // 01497 01498 // If this is a vector of constants or undefs, get the bits. A bit in 01499 // UndefBits is set if the corresponding element of the vector is an 01500 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 01501 // zero. Return true if this is not an array of constants, false if it is. 01502 // 01503 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 01504 uint64_t UndefBits[2]) { 01505 // Start with zero'd results. 01506 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 01507 01508 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); 01509 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 01510 SDOperand OpVal = BV->getOperand(i); 01511 01512 unsigned PartNo = i >= e/2; // In the upper 128 bits? 01513 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 01514 01515 uint64_t EltBits = 0; 01516 if (OpVal.getOpcode() == ISD::UNDEF) { 01517 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 01518 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 01519 continue; 01520 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 01521 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 01522 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 01523 assert(CN->getValueType(0) == MVT::f32 && 01524 "Only one legal FP vector type!"); 01525 EltBits = FloatToBits(CN->getValue()); 01526 } else { 01527 // Nonconstant element. 01528 return true; 01529 } 01530 01531 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 01532 } 01533 01534 //printf("%llx %llx %llx %llx\n", 01535 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 01536 return false; 01537 } 01538 01539 // If this is a splat (repetition) of a value across the whole vector, return 01540 // the smallest size that splats it. For example, "0x01010101010101..." is a 01541 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 01542 // SplatSize = 1 byte. 01543 static bool isConstantSplat(const uint64_t Bits128[2], 01544 const uint64_t Undef128[2], 01545 unsigned &SplatBits, unsigned &SplatUndef, 01546 unsigned &SplatSize) { 01547 01548 // Don't let undefs prevent splats from matching. See if the top 64-bits are 01549 // the same as the lower 64-bits, ignoring undefs. 01550 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 01551 return false; // Can't be a splat if two pieces don't match. 01552 01553 uint64_t Bits64 = Bits128[0] | Bits128[1]; 01554 uint64_t Undef64 = Undef128[0] & Undef128[1]; 01555 01556 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 01557 // undefs. 01558 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 01559 return false; // Can't be a splat if two pieces don't match. 01560 01561 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 01562 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 01563 01564 // If the top 16-bits are different than the lower 16-bits, ignoring 01565 // undefs, we have an i32 splat. 01566 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 01567 SplatBits = Bits32; 01568 SplatUndef = Undef32; 01569 SplatSize = 4; 01570 return true; 01571 } 01572 01573 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 01574 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 01575 01576 // If the top 8-bits are different than the lower 8-bits, ignoring 01577 // undefs, we have an i16 splat. 01578 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 01579 SplatBits = Bits16; 01580 SplatUndef = Undef16; 01581 SplatSize = 2; 01582 return true; 01583 } 01584 01585 // Otherwise, we have an 8-bit splat. 01586 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 01587 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 01588 SplatSize = 1; 01589 return true; 01590 } 01591 01592 /// BuildSplatI - Build a canonical splati of Val with an element size of 01593 /// SplatSize. Cast the result to VT. 01594 static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT, 01595 SelectionDAG &DAG) { 01596 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 01597 01598 // Force vspltis[hw] -1 to vspltisb -1. 01599 if (Val == -1) SplatSize = 1; 01600 01601 static const MVT::ValueType VTys[] = { // canonical VT to use for each size. 01602 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 01603 }; 01604 MVT::ValueType CanonicalVT = VTys[SplatSize-1]; 01605 01606 // Build a canonical splat for this value. 01607 SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT)); 01608 std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt); 01609 SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops); 01610 return DAG.getNode(ISD::BIT_CONVERT, VT, Res); 01611 } 01612 01613 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 01614 /// specified intrinsic ID. 01615 static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS, 01616 SelectionDAG &DAG, 01617 MVT::ValueType DestVT = MVT::Other) { 01618 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 01619 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 01620 DAG.getConstant(IID, MVT::i32), LHS, RHS); 01621 } 01622 01623 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 01624 /// specified intrinsic ID. 01625 static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1, 01626 SDOperand Op2, SelectionDAG &DAG, 01627 MVT::ValueType DestVT = MVT::Other) { 01628 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 01629 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 01630 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 01631 } 01632 01633 01634 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 01635 /// amount. The result has the specified value type. 01636 static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, 01637 MVT::ValueType VT, SelectionDAG &DAG) { 01638 // Force LHS/RHS to be the right type. 01639 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 01640 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 01641 01642 std::vector<SDOperand> Ops; 01643 for (unsigned i = 0; i != 16; ++i) 01644 Ops.push_back(DAG.getConstant(i+Amt, MVT::i32)); 01645 SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 01646 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 01647 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 01648 } 01649 01650 // If this is a case we can't handle, return null and let the default 01651 // expansion code take care of it. If we CAN select this case, and if it 01652 // selects to a single instruction, return Op. Otherwise, if we can codegen 01653 // this case more efficiently than a constant pool load, lower it to the 01654 // sequence of ops that should be used. 01655 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 01656 // If this is a vector of constants or undefs, get the bits. A bit in 01657 // UndefBits is set if the corresponding element of the vector is an 01658 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 01659 // zero. 01660 uint64_t VectorBits[2]; 01661 uint64_t UndefBits[2]; 01662 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) 01663 return SDOperand(); // Not a constant vector. 01664 01665 // If this is a splat (repetition) of a value across the whole vector, return 01666 // the smallest size that splats it. For example, "0x01010101010101..." is a 01667 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 01668 // SplatSize = 1 byte. 01669 unsigned SplatBits, SplatUndef, SplatSize; 01670 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 01671 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 01672 01673 // First, handle single instruction cases. 01674 01675 // All zeros? 01676 if (SplatBits == 0) { 01677 // Canonicalize all zero vectors to be v4i32. 01678 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 01679 SDOperand Z = DAG.getConstant(0, MVT::i32); 01680 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 01681 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 01682 } 01683 return Op; 01684 } 01685 01686 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 01687 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 01688 if (SextVal >= -16 && SextVal <= 15) 01689 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 01690 01691 01692 // Two instruction sequences. 01693 01694 // If this value is in the range [-32,30] and is even, use: 01695 // tmp = VSPLTI[bhw], result = add tmp, tmp 01696 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 01697 Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG); 01698 return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op); 01699 } 01700 01701 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 01702 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 01703 // for fneg/fabs. 01704 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 01705 // Make -1 and vspltisw -1: 01706 SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 01707 01708 // Make the VSLW intrinsic, computing 0x8000_0000. 01709 SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 01710 OnesV, DAG); 01711 01712 // xor by OnesV to invert it. 01713 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 01714 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 01715 } 01716 01717 // Check to see if this is a wide variety of vsplti*, binop self cases. 01718 unsigned SplatBitSize = SplatSize*8; 01719 static const char SplatCsts[] = { 01720 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 01721 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 01722 }; 01723 for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){ 01724 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 01725 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 01726 int i = SplatCsts[idx]; 01727 01728 // Figure out what shift amount will be used by altivec if shifted by i in 01729 // this splat size. 01730 unsigned TypeShiftAmt = i & (SplatBitSize-1); 01731 01732 // vsplti + shl self. 01733 if (SextVal == (i << (int)TypeShiftAmt)) { 01734 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 01735 static const unsigned IIDs[] = { // Intrinsic to use for each size. 01736 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 01737 Intrinsic::ppc_altivec_vslw 01738 }; 01739 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 01740 } 01741 01742 // vsplti + srl self. 01743 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 01744 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 01745 static const unsigned IIDs[] = { // Intrinsic to use for each size. 01746 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 01747 Intrinsic::ppc_altivec_vsrw 01748 }; 01749 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 01750 } 01751 01752 // vsplti + sra self. 01753 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 01754 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 01755 static const unsigned IIDs[] = { // Intrinsic to use for each size. 01756 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 01757 Intrinsic::ppc_altivec_vsraw 01758 }; 01759 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 01760 } 01761 01762 // vsplti + rol self. 01763 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 01764 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 01765 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 01766 static const unsigned IIDs[] = { // Intrinsic to use for each size. 01767 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 01768 Intrinsic::ppc_altivec_vrlw 01769 }; 01770 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 01771 } 01772 01773 // t = vsplti c, result = vsldoi t, t, 1 01774 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 01775 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 01776 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 01777 } 01778 // t = vsplti c, result = vsldoi t, t, 2 01779 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 01780 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 01781 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 01782 } 01783 // t = vsplti c, result = vsldoi t, t, 3 01784 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 01785 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 01786 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 01787 } 01788 } 01789 01790 // Three instruction sequences. 01791 01792 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 01793 if (SextVal >= 0 && SextVal <= 31) { 01794 SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG); 01795 SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG); 01796 return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS); 01797 } 01798 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 01799 if (SextVal >= -31 && SextVal <= 0) { 01800 SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG); 01801 SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG); 01802 return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS); 01803 } 01804 } 01805 01806 return SDOperand(); 01807 } 01808 01809 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 01810 /// the specified operations to build the shuffle. 01811 static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, 01812 SDOperand RHS, SelectionDAG &DAG) { 01813 unsigned OpNum = (PFEntry >> 26) & 0x0F; 01814 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 01815 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 01816 01817 enum { 01818 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 01819 OP_VMRGHW, 01820 OP_VMRGLW, 01821 OP_VSPLTISW0, 01822 OP_VSPLTISW1, 01823 OP_VSPLTISW2, 01824 OP_VSPLTISW3, 01825 OP_VSLDOI4, 01826 OP_VSLDOI8, 01827 OP_VSLDOI12 01828 }; 01829 01830 if (OpNum == OP_COPY) { 01831 if (LHSID == (1*9+2)*9+3) return LHS; 01832 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 01833 return RHS; 01834 } 01835 01836 SDOperand OpLHS, OpRHS; 01837 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 01838 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 01839 01840 unsigned ShufIdxs[16]; 01841 switch (OpNum) { 01842 default: assert(0 && "Unknown i32 permute!"); 01843 case OP_VMRGHW: 01844 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 01845 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 01846 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 01847 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 01848 break; 01849 case OP_VMRGLW: 01850 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 01851 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 01852 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 01853 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 01854 break; 01855 case OP_VSPLTISW0: 01856 for (unsigned i = 0; i != 16; ++i) 01857 ShufIdxs[i] = (i&3)+0; 01858 break; 01859 case OP_VSPLTISW1: 01860 for (unsigned i = 0; i != 16; ++i) 01861 ShufIdxs[i] = (i&3)+4; 01862 break; 01863 case OP_VSPLTISW2: 01864 for (unsigned i = 0; i != 16; ++i) 01865 ShufIdxs[i] = (i&3)+8; 01866 break; 01867 case OP_VSPLTISW3: 01868 for (unsigned i = 0; i != 16; ++i) 01869 ShufIdxs[i] = (i&3)+12; 01870 break; 01871 case OP_VSLDOI4: 01872 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 01873 case OP_VSLDOI8: 01874 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 01875 case OP_VSLDOI12: 01876 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 01877 } 01878 std::vector<SDOperand> Ops; 01879 for (unsigned i = 0; i != 16; ++i) 01880 Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32)); 01881 01882 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 01883 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 01884 } 01885 01886 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 01887 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 01888 /// return the code it can be lowered into. Worst case, it can always be 01889 /// lowered into a vperm. 01890 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 01891 SDOperand V1 = Op.getOperand(0); 01892 SDOperand V2 = Op.getOperand(1); 01893 SDOperand PermMask = Op.getOperand(2); 01894 01895 // Cases that are handled by instructions that take permute immediates 01896 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 01897 // selected by the instruction selector. 01898 if (V2.getOpcode() == ISD::UNDEF) { 01899 if (PPC::isSplatShuffleMask(PermMask.Val, 1) || 01900 PPC::isSplatShuffleMask(PermMask.Val, 2) || 01901 PPC::isSplatShuffleMask(PermMask.Val, 4) || 01902 PPC::isVPKUWUMShuffleMask(PermMask.Val, true) || 01903 PPC::isVPKUHUMShuffleMask(PermMask.Val, true) || 01904 PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 || 01905 PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) || 01906 PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) || 01907 PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) || 01908 PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) || 01909 PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) || 01910 PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) { 01911 return Op; 01912 } 01913 } 01914 01915 // Altivec has a variety of "shuffle immediates" that take two vector inputs 01916 // and produce a fixed permutation. If any of these match, do not lower to 01917 // VPERM. 01918 if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) || 01919 PPC::isVPKUHUMShuffleMask(PermMask.Val, false) || 01920 PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 || 01921 PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) || 01922 PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) || 01923 PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) || 01924 PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) || 01925 PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) || 01926 PPC::isVMRGHShuffleMask(PermMask.Val, 4, false)) 01927 return Op; 01928 01929 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 01930 // perfect shuffle table to emit an optimal matching sequence. 01931 unsigned PFIndexes[4]; 01932 bool isFourElementShuffle = true; 01933 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 01934 unsigned EltNo = 8; // Start out undef. 01935 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 01936 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 01937 continue; // Undef, ignore it. 01938 01939 unsigned ByteSource = 01940 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 01941 if ((ByteSource & 3) != j) { 01942 isFourElementShuffle = false; 01943 break; 01944 } 01945 01946 if (EltNo == 8) { 01947 EltNo = ByteSource/4; 01948 } else if (EltNo != ByteSource/4) { 01949 isFourElementShuffle = false; 01950 break; 01951 } 01952 } 01953 PFIndexes[i] = EltNo; 01954 } 01955 01956 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 01957 // perfect shuffle vector to determine if it is cost effective to do this as 01958 // discrete instructions, or whether we should use a vperm. 01959 if (isFourElementShuffle) { 01960 // Compute the index in the perfect shuffle table. 01961 unsigned PFTableIndex = 01962 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 01963 01964 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 01965 unsigned Cost = (PFEntry >> 30); 01966 01967 // Determining when to avoid vperm is tricky. Many things affect the cost 01968 // of vperm, particularly how many times the perm mask needs to be computed. 01969 // For example, if the perm mask can be hoisted out of a loop or is already 01970 // used (perhaps because there are multiple permutes with the same shuffle 01971 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 01972 // the loop requires an extra register. 01973 // 01974 // As a compromise, we only emit discrete instructions if the shuffle can be 01975 // generated in 3 or fewer operations. When we have loop information 01976 // available, if this block is within a loop, we should avoid using vperm 01977 // for 3-operation perms and use a constant pool load instead. 01978 if (Cost < 3) 01979 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 01980 } 01981 01982 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 01983 // vector that will get spilled to the constant pool. 01984 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 01985 01986 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 01987 // that it is in input element units, not in bytes. Convert now. 01988 MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType()); 01989 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; 01990 01991 std::vector<SDOperand> ResultMask; 01992 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 01993 unsigned SrcElt; 01994 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 01995 SrcElt = 0; 01996 else 01997 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 01998 01999 for (unsigned j = 0; j != BytesPerElement; ++j) 02000 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 02001 MVT::i8)); 02002 } 02003 02004 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask); 02005 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 02006 } 02007 02008 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 02009 /// altivec comparison. If it is, return true and fill in Opc/isDot with 02010 /// information about the intrinsic. 02011 static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc, 02012 bool &isDot) { 02013 unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue(); 02014 CompareOpc = -1; 02015 isDot = false; 02016 switch (IntrinsicID) { 02017 default: return false; 02018 // Comparison predicates. 02019 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 02020 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 02021 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 02022 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 02023 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 02024 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 02025 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 02026 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 02027 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 02028 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 02029 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 02030 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 02031 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 02032 02033 // Normal Comparisons. 02034 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 02035 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 02036 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 02037 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 02038 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 02039 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 02040 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 02041 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 02042 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 02043 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 02044 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 02045 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 02046 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 02047 } 02048 return true; 02049 } 02050 02051 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 02052 /// lower, do it, otherwise return null. 02053 static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 02054 // If this is a lowered altivec predicate compare, CompareOpc is set to the 02055 // opcode number of the comparison. 02056 int CompareOpc; 02057 bool isDot; 02058 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 02059 return SDOperand(); // Don't custom lower most intrinsics. 02060 02061 // If this is a non-dot comparison, make the VCMP node and we are done. 02062 if (!isDot) { 02063 SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 02064 Op.getOperand(1), Op.getOperand(2), 02065 DAG.getConstant(CompareOpc, MVT::i32)); 02066 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 02067 } 02068 02069 // Create the PPCISD altivec 'dot' comparison node. 02070 std::vector<SDOperand> Ops; 02071 std::vector<MVT::ValueType> VTs; 02072 Ops.push_back(Op.getOperand(2)); // LHS 02073 Ops.push_back(Op.getOperand(3)); // RHS 02074 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 02075 VTs.push_back(Op.getOperand(2).getValueType()); 02076 VTs.push_back(MVT::Flag); 02077 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 02078 02079 // Now that we have the comparison, emit a copy from the CR to a GPR. 02080 // This is flagged to the above dot comparison. 02081 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 02082 DAG.getRegister(PPC::CR6, MVT::i32), 02083 CompNode.getValue(1)); 02084 02085 // Unpack the result based on how the target uses it. 02086 unsigned BitNo; // Bit # of CR6. 02087 bool InvertBit; // Invert result? 02088 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 02089 default: // Can't happen, don't crash on invalid number though. 02090 case 0: // Return the value of the EQ bit of CR6. 02091 BitNo = 0; InvertBit = false; 02092 break; 02093 case 1: // Return the inverted value of the EQ bit of CR6. 02094 BitNo = 0; InvertBit = true; 02095 break; 02096 case 2: // Return the value of the LT bit of CR6. 02097 BitNo = 2; InvertBit = false; 02098 break; 02099 case 3: // Return the inverted value of the LT bit of CR6. 02100 BitNo = 2; InvertBit = true; 02101 break; 02102 } 02103 02104 // Shift the bit into the low position. 02105 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 02106 DAG.getConstant(8-(3-BitNo), MVT::i32)); 02107 // Isolate the bit. 02108 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 02109 DAG.getConstant(1, MVT::i32)); 02110 02111 // If we are supposed to, toggle the bit. 02112 if (InvertBit) 02113 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 02114 DAG.getConstant(1, MVT::i32)); 02115 return Flags; 02116 } 02117 02118 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 02119 // Create a stack slot that is 16-byte aligned. 02120 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 02121 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 02122 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 02123 02124 // Store the input value into Value#0 of the stack slot. 02125 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(), 02126 Op.getOperand(0), FIdx,DAG.getSrcValue(NULL)); 02127 // Load it out. 02128 return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL)); 02129 } 02130 02131 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) { 02132 if (Op.getValueType() == MVT::v4i32) { 02133 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 02134 02135 SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 02136 SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 02137 02138 SDOperand RHSSwap = // = vrlw RHS, 16 02139 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 02140 02141 // Shrinkify inputs to v8i16. 02142 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 02143 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 02144 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 02145 02146 // Low parts multiplied together, generating 32-bit results (we ignore the 02147 // top parts). 02148 SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 02149 LHS, RHS, DAG, MVT::v4i32); 02150 02151 SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 02152 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 02153 // Shift the high parts up 16 bits. 02154 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 02155 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 02156 } else if (Op.getValueType() == MVT::v8i16) { 02157 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 02158 02159 SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); 02160 02161 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 02162 LHS, RHS, Zero, DAG); 02163 } else if (Op.getValueType() == MVT::v16i8) { 02164 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 02165 02166 // Multiply the even 8-bit parts, producing 16-bit sums. 02167 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 02168 LHS, RHS, DAG, MVT::v8i16); 02169 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 02170 02171 // Multiply the odd 8-bit parts, producing 16-bit sums. 02172 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 02173 LHS, RHS, DAG, MVT::v8i16); 02174 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 02175 02176 // Merge the results together. 02177 std::vector<SDOperand> Ops; 02178 for (unsigned i = 0; i != 8; ++i) { 02179 Ops.push_back(DAG.getConstant(2*i+1, MVT::i8)); 02180 Ops.push_back(DAG.getConstant(2*i+1+16, MVT::i8)); 02181 } 02182 02183 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 02184 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 02185 } else { 02186 assert(0 && "Unknown mul to lower!"); 02187 abort(); 02188 } 02189 } 02190 02191 /// LowerOperation - Provide custom lowering hooks for some operations. 02192 /// 02193 SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 02194 switch (Op.getOpcode()) { 02195 default: assert(0 && "Wasn't expecting to be able to lower this!"); 02196 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 02197 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 02198 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 02199 case ISD::SETCC: return LowerSETCC(Op, DAG); 02200 case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); 02201 case ISD::FORMAL_ARGUMENTS: 02202 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); 02203 case ISD::CALL: return LowerCALL(Op, DAG); 02204 case ISD::RET: return LowerRET(Op, DAG); 02205 02206 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 02207 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 02208 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 02209 02210 // Lower 64-bit shifts. 02211 case ISD::SHL: return LowerSHL(Op, DAG, getPointerTy()); 02212 case ISD::SRL: return LowerSRL(Op, DAG, getPointerTy()); 02213 case ISD::SRA: return LowerSRA(Op, DAG, getPointerTy()); 02214 02215 // Vector-related lowering. 02216 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 02217 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 02218 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 02219 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 02220 case ISD::MUL: return LowerMUL(Op, DAG); 02221 } 02222 return SDOperand(); 02223 } 02224 02225 //===----------------------------------------------------------------------===// 02226 // Other Lowering Code 02227 //===----------------------------------------------------------------------===// 02228 02229 MachineBasicBlock * 02230 PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 02231 MachineBasicBlock *BB) { 02232 assert((MI->getOpcode() == PPC::SELECT_CC_I4 || 02233 MI->getOpcode() == PPC::SELECT_CC_I8 || 02234 MI->getOpcode() == PPC::SELECT_CC_F4 || 02235 MI->getOpcode() == PPC::SELECT_CC_F8 || 02236 MI->getOpcode() == PPC::SELECT_CC_VRRC) && 02237 "Unexpected instr type to insert"); 02238 02239 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 02240 // control-flow pattern. The incoming instruction knows the destination vreg 02241 // to set, the condition code register to branch on, the true/false values to 02242 // select between, and a branch opcode to use. 02243 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 02244 ilist<MachineBasicBlock>::iterator It = BB; 02245 ++It; 02246 02247 // thisMBB: 02248 // ... 02249 // TrueVal = ... 02250 // cmpTY ccX, r1, r2 02251 // bCC copy1MBB 02252 // fallthrough --> copy0MBB 02253 MachineBasicBlock *thisMBB = BB; 02254 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 02255 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 02256 BuildMI(BB, MI->getOperand(4).getImmedValue(), 2) 02257 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 02258 MachineFunction *F = BB->getParent(); 02259 F->getBasicBlockList().insert(It, copy0MBB); 02260 F->getBasicBlockList().insert(It, sinkMBB); 02261 // Update machine-CFG edges by first adding all successors of the current 02262 // block to the new block which will contain the Phi node for the select. 02263 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 02264 e = BB->succ_end(); i != e; ++i) 02265 sinkMBB->addSuccessor(*i); 02266 // Next, remove all successors of the current block, and add the true 02267 // and fallthrough blocks as its successors. 02268 while(!BB->succ_empty()) 02269 BB->removeSuccessor(BB->succ_begin()); 02270 BB->addSuccessor(copy0MBB); 02271 BB->addSuccessor(sinkMBB); 02272 02273 // copy0MBB: 02274 // %FalseValue = ... 02275 // # fallthrough to sinkMBB 02276 BB = copy0MBB; 02277 02278 // Update machine-CFG edges 02279 BB->addSuccessor(sinkMBB); 02280 02281 // sinkMBB: 02282 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 02283 // ... 02284 BB = sinkMBB; 02285 BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg()) 02286 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 02287 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 02288 02289 delete MI; // The pseudo instruction is gone now. 02290 return BB; 02291 } 02292 02293 //===----------------------------------------------------------------------===// 02294 // Target Optimization Hooks 02295 //===----------------------------------------------------------------------===// 02296 02297 SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 02298 DAGCombinerInfo &DCI) const { 02299 TargetMachine &TM = getTargetMachine(); 02300 SelectionDAG &DAG = DCI.DAG; 02301 switch (N->getOpcode()) { 02302 default: break; 02303 case ISD::SINT_TO_FP: 02304 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 02305 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 02306 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 02307 // We allow the src/dst to be either f32/f64, but the intermediate 02308 // type must be i64. 02309 if (N->getOperand(0).getValueType() == MVT::i64) { 02310 SDOperand Val = N->getOperand(0).getOperand(0); 02311 if (Val.getValueType() == MVT::f32) { 02312 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 02313 DCI.AddToWorklist(Val.Val); 02314 } 02315 02316 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 02317 DCI.AddToWorklist(Val.Val); 02318 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 02319 DCI.AddToWorklist(Val.Val); 02320 if (N->getValueType(0) == MVT::f32) { 02321 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); 02322 DCI.AddToWorklist(Val.Val); 02323 } 02324 return Val; 02325 } else if (N->getOperand(0).getValueType() == MVT::i32) { 02326 // If the intermediate type is i32, we can avoid the load/store here 02327 // too. 02328 } 02329 } 02330 } 02331 break; 02332 case ISD::STORE: 02333 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 02334 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 02335 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 02336 N->getOperand(1).getValueType() == MVT::i32) { 02337 SDOperand Val = N->getOperand(1).getOperand(0); 02338 if (Val.getValueType() == MVT::f32) { 02339 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 02340 DCI.AddToWorklist(Val.Val); 02341 } 02342 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 02343 DCI.AddToWorklist(Val.Val); 02344 02345 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 02346 N->getOperand(2), N->getOperand(3)); 02347 DCI.AddToWorklist(Val.Val); 02348 return Val; 02349 } 02350 02351 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 02352 if (N->getOperand(1).getOpcode() == ISD::BSWAP && 02353 N->getOperand(1).Val->hasOneUse() && 02354 (N->getOperand(1).getValueType() == MVT::i32 || 02355 N->getOperand(1).getValueType() == MVT::i16)) { 02356 SDOperand BSwapOp = N->getOperand(1).getOperand(0); 02357 // Do an any-extend to 32-bits if this is a half-word input. 02358 if (BSwapOp.getValueType() == MVT::i16) 02359 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp); 02360 02361 return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp, 02362 N->getOperand(2), N->getOperand(3), 02363 DAG.getValueType(N->getOperand(1).getValueType())); 02364 } 02365 break; 02366 case ISD::BSWAP: 02367 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 02368 if (N->getOperand(0).getOpcode() == ISD::LOAD && 02369 N->getOperand(0).hasOneUse() && 02370 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { 02371 SDOperand Load = N->getOperand(0); 02372 // Create the byte-swapping load. 02373 std::vector<MVT::ValueType> VTs; 02374 VTs.push_back(MVT::i32); 02375 VTs.push_back(MVT::Other); 02376 std::vector<SDOperand> Ops; 02377 Ops.push_back(Load.getOperand(0)); // Chain 02378 Ops.push_back(Load.getOperand(1)); // Ptr 02379 Ops.push_back(Load.getOperand(2)); // SrcValue 02380 Ops.push_back(DAG.getValueType(N->getValueType(0))); // VT 02381 SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops); 02382 02383 // If this is an i16 load, insert the truncate. 02384 SDOperand ResVal = BSLoad; 02385 if (N->getValueType(0) == MVT::i16) 02386 ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad); 02387 02388 // First, combine the bswap away. This makes the value produced by the 02389 // load dead. 02390 DCI.CombineTo(N, ResVal); 02391 02392 // Next, combine the load away, we give it a bogus result value but a real 02393 // chain result. The result value is dead because the bswap is dead. 02394 DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1)); 02395 02396 // Return N so it doesn't get rechecked! 02397 return SDOperand(N, 0); 02398 } 02399 02400 break; 02401 case PPCISD::VCMP: { 02402 // If a VCMPo node already exists with exactly the same operands as this 02403 // node, use its result instead of this node (VCMPo computes both a CR6 and 02404 // a normal output). 02405 // 02406 if (!N->getOperand(0).hasOneUse() && 02407 !N->getOperand(1).hasOneUse() && 02408 !N->getOperand(2).hasOneUse()) { 02409 02410 // Scan all of the users of the LHS, looking for VCMPo's that match. 02411 SDNode *VCMPoNode = 0; 02412 02413 SDNode *LHSN = N->getOperand(0).Val; 02414 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 02415 UI != E; ++UI) 02416 if ((*UI)->getOpcode() == PPCISD::VCMPo && 02417 (*UI)->getOperand(1) == N->getOperand(1) && 02418 (*UI)->getOperand(2) == N->getOperand(2) && 02419 (*UI)->getOperand(0) == N->getOperand(0)) { 02420 VCMPoNode = *UI; 02421 break; 02422 } 02423 02424 // If there is no VCMPo node, or if the flag value has a single use, don't 02425 // transform this. 02426 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 02427 break; 02428 02429 // Look at the (necessarily single) use of the flag value. If it has a 02430 // chain, this transformation is more complex. Note that multiple things 02431 // could use the value result, which we should ignore. 02432 SDNode *FlagUser = 0; 02433 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 02434 FlagUser == 0; ++UI) { 02435 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 02436 SDNode *User = *UI; 02437 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 02438 if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) { 02439 FlagUser = User; 02440 break; 02441 } 02442 } 02443 } 02444 02445 // If the user is a MFCR instruction, we know this is safe. Otherwise we 02446 // give up for right now. 02447 if (FlagUser->getOpcode() == PPCISD::MFCR) 02448 return SDOperand(VCMPoNode, 0); 02449 } 02450 break; 02451 } 02452 case ISD::BR_CC: { 02453 // If this is a branch on an altivec predicate comparison, lower this so 02454 // that we don't have to do a MFCR: instead, branch directly on CR6. This 02455 // lowering is done pre-legalize, because the legalizer lowers the predicate 02456 // compare down to code that is difficult to reassemble. 02457 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 02458 SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3); 02459 int CompareOpc; 02460 bool isDot; 02461 02462 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 02463 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 02464 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 02465 assert(isDot && "Can't compare against a vector result!"); 02466 02467 // If this is a comparison against something other than 0/1, then we know 02468 // that the condition is never/always true. 02469 unsigned Val = cast<ConstantSDNode>(RHS)->getValue(); 02470 if (Val != 0 && Val != 1) { 02471 if (CC == ISD::SETEQ) // Cond never true, remove branch. 02472 return N->getOperand(0); 02473 // Always !=, turn it into an unconditional branch. 02474 return DAG.getNode(ISD::BR, MVT::Other, 02475 N->getOperand(0), N->getOperand(4)); 02476 } 02477 02478 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 02479 02480 // Create the PPCISD altivec 'dot' comparison node. 02481 std::vector<SDOperand> Ops; 02482 std::vector<MVT::ValueType> VTs; 02483 Ops.push_back(LHS.getOperand(2)); // LHS of compare 02484 Ops.push_back(LHS.getOperand(3)); // RHS of compare 02485 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 02486 VTs.push_back(LHS.getOperand(2).getValueType()); 02487 VTs.push_back(MVT::Flag); 02488 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 02489 02490 // Unpack the result based on how the target uses it. 02491 unsigned CompOpc; 02492 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) { 02493 default: // Can't happen, don't crash on invalid number though. 02494 case 0: // Branch on the value of the EQ bit of CR6. 02495 CompOpc = BranchOnWhenPredTrue ? PPC::BEQ : PPC::BNE; 02496 break; 02497 case 1: // Branch on the inverted value of the EQ bit of CR6. 02498 CompOpc = BranchOnWhenPredTrue ? PPC::BNE : PPC::BEQ; 02499 break; 02500 case 2: // Branch on the value of the LT bit of CR6. 02501 CompOpc = BranchOnWhenPredTrue ? PPC::BLT : PPC::BGE; 02502 break; 02503 case 3: // Branch on the inverted value of the LT bit of CR6. 02504 CompOpc = BranchOnWhenPredTrue ? PPC::BGE : PPC::BLT; 02505 break; 02506 } 02507 02508 return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), 02509 DAG.getRegister(PPC::CR6, MVT::i32), 02510 DAG.getConstant(CompOpc, MVT::i32), 02511 N->getOperand(4), CompNode.getValue(1)); 02512 } 02513 break; 02514 } 02515 } 02516 02517 return SDOperand(); 02518 } 02519 02520 //===----------------------------------------------------------------------===// 02521 // Inline Assembly Support 02522 //===----------------------------------------------------------------------===// 02523 02524 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 02525 uint64_t Mask, 02526 uint64_t &KnownZero, 02527 uint64_t &KnownOne, 02528 unsigned Depth) const { 02529 KnownZero = 0; 02530 KnownOne = 0; 02531 switch (Op.getOpcode()) { 02532 default: break; 02533 case PPCISD::LBRX: { 02534 // lhbrx is known to have the top bits cleared out. 02535 if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16) 02536 KnownZero = 0xFFFF0000; 02537 break; 02538 } 02539 case ISD::INTRINSIC_WO_CHAIN: { 02540 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 02541 default: break; 02542 case Intrinsic::ppc_altivec_vcmpbfp_p: 02543 case Intrinsic::ppc_altivec_vcmpeqfp_p: 02544 case Intrinsic::ppc_altivec_vcmpequb_p: 02545 case Intrinsic::ppc_altivec_vcmpequh_p: 02546 case Intrinsic::ppc_altivec_vcmpequw_p: 02547 case Intrinsic::ppc_altivec_vcmpgefp_p: 02548 case Intrinsic::ppc_altivec_vcmpgtfp_p: 02549 case Intrinsic::ppc_altivec_vcmpgtsb_p: 02550 case Intrinsic::ppc_altivec_vcmpgtsh_p: 02551 case Intrinsic::ppc_altivec_vcmpgtsw_p: 02552 case Intrinsic::ppc_altivec_vcmpgtub_p: 02553 case Intrinsic::ppc_altivec_vcmpgtuh_p: 02554 case Intrinsic::ppc_altivec_vcmpgtuw_p: 02555 KnownZero = ~1U; // All bits but the low one are known to be zero. 02556 break; 02557 } 02558 } 02559 } 02560 } 02561 02562 02563 /// getConstraintType - Given a constraint letter, return the type of 02564 /// constraint it is for this target. 02565 PPCTargetLowering::ConstraintType 02566 PPCTargetLowering::getConstraintType(char ConstraintLetter) const { 02567 switch (ConstraintLetter) { 02568 default: break; 02569 case 'b': 02570 case 'r': 02571 case 'f': 02572 case 'v': 02573 case 'y': 02574 return C_RegisterClass; 02575 } 02576 return TargetLowering::getConstraintType(ConstraintLetter); 02577 } 02578 02579 02580 std::vector<unsigned> PPCTargetLowering:: 02581 getRegClassForInlineAsmConstraint(const std::string &Constraint, 02582 MVT::ValueType VT) const { 02583 if (Constraint.size() == 1) { 02584 switch (Constraint[0]) { // GCC RS6000 Constraint Letters 02585 default: break; // Unknown constriant letter 02586 case 'b': 02587 return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 , 02588 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 02589 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 02590 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 02591 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 02592 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 02593 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 02594 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 02595 0); 02596 case 'r': 02597 return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 , 02598 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 02599 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 02600 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 02601 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 02602 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 02603 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 02604 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 02605 0); 02606 case 'f': 02607 return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 , 02608 PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 , 02609 PPC::F8 , PPC::F9 , PPC::F10, PPC::F11, 02610 PPC::F12, PPC::F13, PPC::F14, PPC::F15, 02611 PPC::F16, PPC::F17, PPC::F18, PPC::F19, 02612 PPC::F20, PPC::F21, PPC::F22, PPC::F23, 02613 PPC::F24, PPC::F25, PPC::F26, PPC::F27, 02614 PPC::F28, PPC::F29, PPC::F30, PPC::F31, 02615 0); 02616 case 'v': 02617 return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , 02618 PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 02619 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, 02620 PPC::V12, PPC::V13, PPC::V14, PPC::V15, 02621 PPC::V16, PPC::V17, PPC::V18, PPC::V19, 02622 PPC::V20, PPC::V21, PPC::V22, PPC::V23, 02623 PPC::V24, PPC::V25, PPC::V26, PPC::V27, 02624 PPC::V28, PPC::V29, PPC::V30, PPC::V31, 02625 0); 02626 case 'y': 02627 return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, 02628 PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7, 02629 0); 02630 } 02631 } 02632 02633 return std::vector<unsigned>(); 02634 } 02635 02636 // isOperandValidForConstraint 02637 bool PPCTargetLowering:: 02638 isOperandValidForConstraint(SDOperand Op, char Letter) { 02639 switch (Letter) { 02640 default: break; 02641 case 'I': 02642 case 'J': 02643 case 'K': 02644 case 'L': 02645 case 'M': 02646 case 'N': 02647 case 'O': 02648 case 'P': { 02649 if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate. 02650 unsigned Value = cast<ConstantSDNode>(Op)->getValue(); 02651 switch (Letter) { 02652 default: assert(0 && "Unknown constraint letter!"); 02653 case 'I': // "I" is a signed 16-bit constant. 02654 return (short)Value == (int)Value; 02655 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 02656 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 02657 return (short)Value == 0; 02658 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 02659 return (Value >> 16) == 0; 02660 case 'M': // "M" is a constant that is greater than 31. 02661 return Value > 31; 02662 case 'N': // "N" is a positive constant that is an exact power of two. 02663 return (int)Value > 0 && isPowerOf2_32(Value); 02664 case 'O': // "O" is the constant zero. 02665 return Value == 0; 02666 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 02667 return (short)-Value == (int)-Value; 02668 } 02669 break; 02670 } 02671 } 02672 02673 // Handle standard constraint letters. 02674 return TargetLowering::isOperandValidForConstraint(Op, Letter); 02675 } 02676 02677 /// isLegalAddressImmediate - Return true if the integer value can be used 02678 /// as the offset of the target addressing mode. 02679 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const { 02680 // PPC allows a sign-extended 16-bit immediate field. 02681 return (V > -(1 << 16) && V < (1 << 16)-1); 02682 }