LLVM API Documentation
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Chris Lattner and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the interfaces that X86 uses to lower LLVM code into a 00011 // selection DAG. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "X86.h" 00016 #include "X86InstrBuilder.h" 00017 #include "X86ISelLowering.h" 00018 #include "X86MachineFunctionInfo.h" 00019 #include "X86TargetMachine.h" 00020 #include "llvm/CallingConv.h" 00021 #include "llvm/Constants.h" 00022 #include "llvm/DerivedTypes.h" 00023 #include "llvm/Function.h" 00024 #include "llvm/Intrinsics.h" 00025 #include "llvm/ADT/VectorExtras.h" 00026 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 00027 #include "llvm/CodeGen/MachineFrameInfo.h" 00028 #include "llvm/CodeGen/MachineFunction.h" 00029 #include "llvm/CodeGen/MachineInstrBuilder.h" 00030 #include "llvm/CodeGen/SelectionDAG.h" 00031 #include "llvm/CodeGen/SSARegMap.h" 00032 #include "llvm/Support/MathExtras.h" 00033 #include "llvm/Target/TargetOptions.h" 00034 using namespace llvm; 00035 00036 // FIXME: temporary. 00037 #include "llvm/Support/CommandLine.h" 00038 static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 00039 cl::desc("Enable fastcc on X86")); 00040 00041 X86TargetLowering::X86TargetLowering(TargetMachine &TM) 00042 : TargetLowering(TM) { 00043 Subtarget = &TM.getSubtarget<X86Subtarget>(); 00044 X86ScalarSSE = Subtarget->hasSSE2(); 00045 00046 // Set up the TargetLowering object. 00047 00048 // X86 is weird, it always uses i8 for shift amounts and setcc results. 00049 setShiftAmountType(MVT::i8); 00050 setSetCCResultType(MVT::i8); 00051 setSetCCResultContents(ZeroOrOneSetCCResult); 00052 setSchedulingPreference(SchedulingForRegPressure); 00053 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 00054 setStackPointerRegisterToSaveRestore(X86::ESP); 00055 00056 if (!Subtarget->isTargetDarwin()) 00057 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 00058 setUseUnderscoreSetJmpLongJmp(true); 00059 00060 // Add legal addressing mode scale values. 00061 addLegalAddressScale(8); 00062 addLegalAddressScale(4); 00063 addLegalAddressScale(2); 00064 // Enter the ones which require both scale + index last. These are more 00065 // expensive. 00066 addLegalAddressScale(9); 00067 addLegalAddressScale(5); 00068 addLegalAddressScale(3); 00069 00070 // Set up the register classes. 00071 addRegisterClass(MVT::i8, X86::GR8RegisterClass); 00072 addRegisterClass(MVT::i16, X86::GR16RegisterClass); 00073 addRegisterClass(MVT::i32, X86::GR32RegisterClass); 00074 00075 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 00076 // operation. 00077 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 00078 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 00079 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 00080 00081 if (X86ScalarSSE) 00082 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 00083 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 00084 else 00085 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 00086 00087 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 00088 // this operation. 00089 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 00090 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 00091 // SSE has no i16 to fp conversion, only i32 00092 if (X86ScalarSSE) 00093 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 00094 else { 00095 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 00096 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 00097 } 00098 00099 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 00100 // isn't legal. 00101 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 00102 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 00103 00104 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 00105 // this operation. 00106 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 00107 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 00108 00109 if (X86ScalarSSE) { 00110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 00111 } else { 00112 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 00113 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 00114 } 00115 00116 // Handle FP_TO_UINT by promoting the destination to a larger signed 00117 // conversion. 00118 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 00119 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 00120 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 00121 00122 if (X86ScalarSSE && !Subtarget->hasSSE3()) 00123 // Expand FP_TO_UINT into a select. 00124 // FIXME: We would like to use a Custom expander here eventually to do 00125 // the optimal thing for SSE vs. the default expansion in the legalizer. 00126 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 00127 else 00128 // With SSE3 we can use fisttpll to convert to a signed i64. 00129 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 00130 00131 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 00132 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 00133 00134 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 00135 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 00136 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 00137 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 00138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 00139 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 00140 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 00141 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 00142 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 00143 setOperationAction(ISD::FREM , MVT::f64 , Expand); 00144 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 00145 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 00146 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 00147 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 00148 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 00149 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 00150 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 00151 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 00152 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 00153 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 00154 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 00155 00156 // These should be promoted to a larger select which is supported. 00157 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 00158 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 00159 00160 // X86 wants to expand cmov itself. 00161 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 00162 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 00163 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 00164 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 00165 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 00166 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 00167 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 00168 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 00169 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 00170 // X86 ret instruction may pop stack. 00171 setOperationAction(ISD::RET , MVT::Other, Custom); 00172 // Darwin ABI issue. 00173 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 00174 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 00175 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 00176 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 00177 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 00178 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 00179 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 00180 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 00181 // X86 wants to expand memset / memcpy itself. 00182 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 00183 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 00184 00185 // We don't have line number support yet. 00186 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 00187 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 00188 // FIXME - use subtarget debug flags 00189 if (!Subtarget->isTargetDarwin()) 00190 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 00191 00192 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 00193 setOperationAction(ISD::VASTART , MVT::Other, Custom); 00194 00195 // Use the default implementation. 00196 setOperationAction(ISD::VAARG , MVT::Other, Expand); 00197 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 00198 setOperationAction(ISD::VAEND , MVT::Other, Expand); 00199 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 00200 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 00201 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 00202 00203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 00204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 00205 00206 if (X86ScalarSSE) { 00207 // Set up the FP register classes. 00208 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 00209 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 00210 00211 // Use ANDPD to simulate FABS. 00212 setOperationAction(ISD::FABS , MVT::f64, Custom); 00213 setOperationAction(ISD::FABS , MVT::f32, Custom); 00214 00215 // Use XORP to simulate FNEG. 00216 setOperationAction(ISD::FNEG , MVT::f64, Custom); 00217 setOperationAction(ISD::FNEG , MVT::f32, Custom); 00218 00219 // We don't support sin/cos/fmod 00220 setOperationAction(ISD::FSIN , MVT::f64, Expand); 00221 setOperationAction(ISD::FCOS , MVT::f64, Expand); 00222 setOperationAction(ISD::FREM , MVT::f64, Expand); 00223 setOperationAction(ISD::FSIN , MVT::f32, Expand); 00224 setOperationAction(ISD::FCOS , MVT::f32, Expand); 00225 setOperationAction(ISD::FREM , MVT::f32, Expand); 00226 00227 // Expand FP immediates into loads from the stack, except for the special 00228 // cases we handle. 00229 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 00230 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 00231 addLegalFPImmediate(+0.0); // xorps / xorpd 00232 } else { 00233 // Set up the FP register classes. 00234 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 00235 00236 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 00237 00238 if (!UnsafeFPMath) { 00239 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 00240 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 00241 } 00242 00243 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 00244 addLegalFPImmediate(+0.0); // FLD0 00245 addLegalFPImmediate(+1.0); // FLD1 00246 addLegalFPImmediate(-0.0); // FLD0/FCHS 00247 addLegalFPImmediate(-1.0); // FLD1/FCHS 00248 } 00249 00250 // First set operation action for all vector types to expand. Then we 00251 // will selectively turn on ones that can be effectively codegen'd. 00252 for (unsigned VT = (unsigned)MVT::Vector + 1; 00253 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 00254 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 00255 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 00256 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 00257 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 00258 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 00259 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 00260 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 00261 } 00262 00263 if (Subtarget->hasMMX()) { 00264 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 00265 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 00266 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 00267 00268 // FIXME: add MMX packed arithmetics 00269 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 00270 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 00271 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 00272 } 00273 00274 if (Subtarget->hasSSE1()) { 00275 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 00276 00277 setOperationAction(ISD::AND, MVT::v4f32, Legal); 00278 setOperationAction(ISD::OR, MVT::v4f32, Legal); 00279 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 00280 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 00281 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 00282 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 00283 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 00284 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 00285 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 00286 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 00287 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 00288 } 00289 00290 if (Subtarget->hasSSE2()) { 00291 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 00292 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 00293 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 00294 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 00295 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 00296 00297 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 00298 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 00299 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 00300 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 00301 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 00302 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 00303 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 00304 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 00305 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 00306 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 00307 00308 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 00309 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 00310 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 00311 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 00312 // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones. 00313 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 00314 00315 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 00316 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 00317 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 00318 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 00319 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 00320 } 00321 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 00322 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 00323 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 00324 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 00325 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 00326 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 00327 00328 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 00329 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 00330 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 00331 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 00332 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 00333 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 00334 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 00335 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 00336 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 00337 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 00338 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 00339 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 00340 } 00341 00342 // Custom lower v2i64 and v2f64 selects. 00343 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 00344 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 00345 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 00346 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 00347 } 00348 00349 // We want to custom lower some of our intrinsics. 00350 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 00351 00352 // We have target-specific dag combine patterns for the following nodes: 00353 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 00354 00355 computeRegisterProperties(); 00356 00357 // FIXME: These should be based on subtarget info. Plus, the values should 00358 // be smaller when we are in optimizing for size mode. 00359 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 00360 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 00361 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 00362 allowUnalignedMemoryAccesses = true; // x86 supports it! 00363 } 00364 00365 //===----------------------------------------------------------------------===// 00366 // C Calling Convention implementation 00367 //===----------------------------------------------------------------------===// 00368 00369 /// AddLiveIn - This helper function adds the specified physical register to the 00370 /// MachineFunction as a live in value. It also creates a corresponding virtual 00371 /// register for it. 00372 static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 00373 TargetRegisterClass *RC) { 00374 assert(RC->contains(PReg) && "Not the correct regclass!"); 00375 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 00376 MF.addLiveIn(PReg, VReg); 00377 return VReg; 00378 } 00379 00380 /// HowToPassCCCArgument - Returns how an formal argument of the specified type 00381 /// should be passed. If it is through stack, returns the size of the stack 00382 /// slot; if it is through XMM register, returns the number of XMM registers 00383 /// are needed. 00384 static void 00385 HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs, 00386 unsigned &ObjSize, unsigned &ObjXMMRegs) { 00387 ObjXMMRegs = 0; 00388 00389 switch (ObjectVT) { 00390 default: assert(0 && "Unhandled argument type!"); 00391 case MVT::i8: ObjSize = 1; break; 00392 case MVT::i16: ObjSize = 2; break; 00393 case MVT::i32: ObjSize = 4; break; 00394 case MVT::i64: ObjSize = 8; break; 00395 case MVT::f32: ObjSize = 4; break; 00396 case MVT::f64: ObjSize = 8; break; 00397 case MVT::v16i8: 00398 case MVT::v8i16: 00399 case MVT::v4i32: 00400 case MVT::v2i64: 00401 case MVT::v4f32: 00402 case MVT::v2f64: 00403 if (NumXMMRegs < 4) 00404 ObjXMMRegs = 1; 00405 else 00406 ObjSize = 16; 00407 break; 00408 } 00409 } 00410 00411 SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) { 00412 unsigned NumArgs = Op.Val->getNumValues() - 1; 00413 MachineFunction &MF = DAG.getMachineFunction(); 00414 MachineFrameInfo *MFI = MF.getFrameInfo(); 00415 SDOperand Root = Op.getOperand(0); 00416 std::vector<SDOperand> ArgValues; 00417 00418 // Add DAG nodes to load the arguments... On entry to a function on the X86, 00419 // the stack frame looks like this: 00420 // 00421 // [ESP] -- return address 00422 // [ESP + 4] -- first argument (leftmost lexically) 00423 // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size 00424 // ... 00425 // 00426 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 00427 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 00428 static const unsigned XMMArgRegs[] = { 00429 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 00430 }; 00431 for (unsigned i = 0; i < NumArgs; ++i) { 00432 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 00433 unsigned ArgIncrement = 4; 00434 unsigned ObjSize = 0; 00435 unsigned ObjXMMRegs = 0; 00436 HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs); 00437 if (ObjSize > 4) 00438 ArgIncrement = ObjSize; 00439 00440 SDOperand ArgValue; 00441 if (ObjXMMRegs) { 00442 // Passed in a XMM register. 00443 unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], 00444 X86::VR128RegisterClass); 00445 ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT); 00446 ArgValues.push_back(ArgValue); 00447 NumXMMRegs += ObjXMMRegs; 00448 } else { 00449 // XMM arguments have to be aligned on 16-byte boundary. 00450 if (ObjSize == 16) 00451 ArgOffset = ((ArgOffset + 15) / 16) * 16; 00452 // Create the frame index object for this incoming parameter... 00453 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 00454 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 00455 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 00456 DAG.getSrcValue(NULL)); 00457 ArgValues.push_back(ArgValue); 00458 ArgOffset += ArgIncrement; // Move on to the next argument... 00459 } 00460 } 00461 00462 ArgValues.push_back(Root); 00463 00464 // If the function takes variable number of arguments, make a frame index for 00465 // the start of the first vararg value... for expansion of llvm.va_start. 00466 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 00467 if (isVarArg) 00468 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 00469 ReturnAddrIndex = 0; // No return address slot generated yet. 00470 BytesToPopOnReturn = 0; // Callee pops nothing. 00471 BytesCallerReserves = ArgOffset; 00472 00473 // If this is a struct return on Darwin/X86, the callee pops the hidden struct 00474 // pointer. 00475 if (MF.getFunction()->getCallingConv() == CallingConv::CSRet && 00476 Subtarget->isTargetDarwin()) 00477 BytesToPopOnReturn = 4; 00478 00479 // Return the new list of results. 00480 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 00481 Op.Val->value_end()); 00482 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, ArgValues); 00483 } 00484 00485 00486 SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) { 00487 SDOperand Chain = Op.getOperand(0); 00488 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 00489 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 00490 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 00491 SDOperand Callee = Op.getOperand(4); 00492 MVT::ValueType RetVT= Op.Val->getValueType(0); 00493 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 00494 00495 // Keep track of the number of XMM regs passed so far. 00496 unsigned NumXMMRegs = 0; 00497 static const unsigned XMMArgRegs[] = { 00498 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 00499 }; 00500 00501 // Count how many bytes are to be pushed on the stack. 00502 unsigned NumBytes = 0; 00503 for (unsigned i = 0; i != NumOps; ++i) { 00504 SDOperand Arg = Op.getOperand(5+2*i); 00505 00506 switch (Arg.getValueType()) { 00507 default: assert(0 && "Unexpected ValueType for argument!"); 00508 case MVT::i8: 00509 case MVT::i16: 00510 case MVT::i32: 00511 case MVT::f32: 00512 NumBytes += 4; 00513 break; 00514 case MVT::i64: 00515 case MVT::f64: 00516 NumBytes += 8; 00517 break; 00518 case MVT::v16i8: 00519 case MVT::v8i16: 00520 case MVT::v4i32: 00521 case MVT::v2i64: 00522 case MVT::v4f32: 00523 case MVT::v2f64: 00524 if (NumXMMRegs < 4) 00525 ++NumXMMRegs; 00526 else { 00527 // XMM arguments have to be aligned on 16-byte boundary. 00528 NumBytes = ((NumBytes + 15) / 16) * 16; 00529 NumBytes += 16; 00530 } 00531 break; 00532 } 00533 } 00534 00535 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 00536 00537 // Arguments go on the stack in reverse order, as specified by the ABI. 00538 unsigned ArgOffset = 0; 00539 NumXMMRegs = 0; 00540 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 00541 std::vector<SDOperand> MemOpChains; 00542 SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy()); 00543 for (unsigned i = 0; i != NumOps; ++i) { 00544 SDOperand Arg = Op.getOperand(5+2*i); 00545 00546 switch (Arg.getValueType()) { 00547 default: assert(0 && "Unexpected ValueType for argument!"); 00548 case MVT::i8: 00549 case MVT::i16: { 00550 // Promote the integer to 32 bits. If the input type is signed use a 00551 // sign extend, otherwise use a zero extend. 00552 unsigned ExtOp = 00553 dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ? 00554 ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 00555 Arg = DAG.getNode(ExtOp, MVT::i32, Arg); 00556 } 00557 // Fallthrough 00558 00559 case MVT::i32: 00560 case MVT::f32: { 00561 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 00562 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 00563 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 00564 Arg, PtrOff, DAG.getSrcValue(NULL))); 00565 ArgOffset += 4; 00566 break; 00567 } 00568 case MVT::i64: 00569 case MVT::f64: { 00570 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 00571 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 00572 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 00573 Arg, PtrOff, DAG.getSrcValue(NULL))); 00574 ArgOffset += 8; 00575 break; 00576 } 00577 case MVT::v16i8: 00578 case MVT::v8i16: 00579 case MVT::v4i32: 00580 case MVT::v2i64: 00581 case MVT::v4f32: 00582 case MVT::v2f64: 00583 if (NumXMMRegs < 4) { 00584 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 00585 NumXMMRegs++; 00586 } else { 00587 // XMM arguments have to be aligned on 16-byte boundary. 00588 ArgOffset = ((ArgOffset + 15) / 16) * 16; 00589 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 00590 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 00591 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 00592 Arg, PtrOff, DAG.getSrcValue(NULL))); 00593 ArgOffset += 16; 00594 } 00595 } 00596 } 00597 00598 if (!MemOpChains.empty()) 00599 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains); 00600 00601 // Build a sequence of copy-to-reg nodes chained together with token chain 00602 // and flag operands which copy the outgoing args into registers. 00603 SDOperand InFlag; 00604 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 00605 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 00606 InFlag); 00607 InFlag = Chain.getValue(1); 00608 } 00609 00610 // If the callee is a GlobalAddress node (quite common, every direct call is) 00611 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 00612 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 00613 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 00614 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 00615 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 00616 00617 std::vector<MVT::ValueType> NodeTys; 00618 NodeTys.push_back(MVT::Other); // Returns a chain 00619 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 00620 std::vector<SDOperand> Ops; 00621 Ops.push_back(Chain); 00622 Ops.push_back(Callee); 00623 00624 // Add argument registers to the end of the list so that they are known live 00625 // into the call. 00626 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 00627 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 00628 RegsToPass[i].second.getValueType())); 00629 00630 if (InFlag.Val) 00631 Ops.push_back(InFlag); 00632 00633 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 00634 NodeTys, Ops); 00635 InFlag = Chain.getValue(1); 00636 00637 // Create the CALLSEQ_END node. 00638 unsigned NumBytesForCalleeToPush = 0; 00639 00640 // If this is is a call to a struct-return function on Darwin/X86, the callee 00641 // pops the hidden struct pointer, so we have to push it back. 00642 if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin()) 00643 NumBytesForCalleeToPush = 4; 00644 00645 NodeTys.clear(); 00646 NodeTys.push_back(MVT::Other); // Returns a chain 00647 if (RetVT != MVT::Other) 00648 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 00649 Ops.clear(); 00650 Ops.push_back(Chain); 00651 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 00652 Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); 00653 Ops.push_back(InFlag); 00654 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 00655 if (RetVT != MVT::Other) 00656 InFlag = Chain.getValue(1); 00657 00658 std::vector<SDOperand> ResultVals; 00659 NodeTys.clear(); 00660 switch (RetVT) { 00661 default: assert(0 && "Unknown value type to return!"); 00662 case MVT::Other: break; 00663 case MVT::i8: 00664 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 00665 ResultVals.push_back(Chain.getValue(0)); 00666 NodeTys.push_back(MVT::i8); 00667 break; 00668 case MVT::i16: 00669 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 00670 ResultVals.push_back(Chain.getValue(0)); 00671 NodeTys.push_back(MVT::i16); 00672 break; 00673 case MVT::i32: 00674 if (Op.Val->getValueType(1) == MVT::i32) { 00675 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 00676 ResultVals.push_back(Chain.getValue(0)); 00677 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 00678 Chain.getValue(2)).getValue(1); 00679 ResultVals.push_back(Chain.getValue(0)); 00680 NodeTys.push_back(MVT::i32); 00681 } else { 00682 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 00683 ResultVals.push_back(Chain.getValue(0)); 00684 } 00685 NodeTys.push_back(MVT::i32); 00686 break; 00687 case MVT::v16i8: 00688 case MVT::v8i16: 00689 case MVT::v4i32: 00690 case MVT::v2i64: 00691 case MVT::v4f32: 00692 case MVT::v2f64: 00693 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 00694 ResultVals.push_back(Chain.getValue(0)); 00695 NodeTys.push_back(RetVT); 00696 break; 00697 case MVT::f32: 00698 case MVT::f64: { 00699 std::vector<MVT::ValueType> Tys; 00700 Tys.push_back(MVT::f64); 00701 Tys.push_back(MVT::Other); 00702 Tys.push_back(MVT::Flag); 00703 std::vector<SDOperand> Ops; 00704 Ops.push_back(Chain); 00705 Ops.push_back(InFlag); 00706 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 00707 Chain = RetVal.getValue(1); 00708 InFlag = RetVal.getValue(2); 00709 if (X86ScalarSSE) { 00710 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 00711 // shouldn't be necessary except that RFP cannot be live across 00712 // multiple blocks. When stackifier is fixed, they can be uncoupled. 00713 MachineFunction &MF = DAG.getMachineFunction(); 00714 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 00715 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 00716 Tys.clear(); 00717 Tys.push_back(MVT::Other); 00718 Ops.clear(); 00719 Ops.push_back(Chain); 00720 Ops.push_back(RetVal); 00721 Ops.push_back(StackSlot); 00722 Ops.push_back(DAG.getValueType(RetVT)); 00723 Ops.push_back(InFlag); 00724 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 00725 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, 00726 DAG.getSrcValue(NULL)); 00727 Chain = RetVal.getValue(1); 00728 } 00729 00730 if (RetVT == MVT::f32 && !X86ScalarSSE) 00731 // FIXME: we would really like to remember that this FP_ROUND 00732 // operation is okay to eliminate if we allow excess FP precision. 00733 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 00734 ResultVals.push_back(RetVal); 00735 NodeTys.push_back(RetVT); 00736 break; 00737 } 00738 } 00739 00740 // If the function returns void, just return the chain. 00741 if (ResultVals.empty()) 00742 return Chain; 00743 00744 // Otherwise, merge everything together with a MERGE_VALUES node. 00745 NodeTys.push_back(MVT::Other); 00746 ResultVals.push_back(Chain); 00747 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals); 00748 return Res.getValue(Op.ResNo); 00749 } 00750 00751 //===----------------------------------------------------------------------===// 00752 // Fast Calling Convention implementation 00753 //===----------------------------------------------------------------------===// 00754 // 00755 // The X86 'fast' calling convention passes up to two integer arguments in 00756 // registers (an appropriate portion of EAX/EDX), passes arguments in C order, 00757 // and requires that the callee pop its arguments off the stack (allowing proper 00758 // tail calls), and has the same return value conventions as C calling convs. 00759 // 00760 // This calling convention always arranges for the callee pop value to be 8n+4 00761 // bytes, which is needed for tail recursion elimination and stack alignment 00762 // reasons. 00763 // 00764 // Note that this can be enhanced in the future to pass fp vals in registers 00765 // (when we have a global fp allocator) and do other tricks. 00766 // 00767 00768 /// HowToPassFastCCArgument - Returns how an formal argument of the specified 00769 /// type should be passed. If it is through stack, returns the size of the stack 00770 /// slot; if it is through integer or XMM register, returns the number of 00771 /// integer or XMM registers are needed. 00772 static void 00773 HowToPassFastCCArgument(MVT::ValueType ObjectVT, 00774 unsigned NumIntRegs, unsigned NumXMMRegs, 00775 unsigned &ObjSize, unsigned &ObjIntRegs, 00776 unsigned &ObjXMMRegs) { 00777 ObjSize = 0; 00778 ObjIntRegs = 0; 00779 ObjXMMRegs = 0; 00780 00781 switch (ObjectVT) { 00782 default: assert(0 && "Unhandled argument type!"); 00783 case MVT::i8: 00784 #if FASTCC_NUM_INT_ARGS_INREGS > 0 00785 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 00786 ObjIntRegs = 1; 00787 else 00788 #endif 00789 ObjSize = 1; 00790 break; 00791 case MVT::i16: 00792 #if FASTCC_NUM_INT_ARGS_INREGS > 0 00793 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 00794 ObjIntRegs = 1; 00795 else 00796 #endif 00797 ObjSize = 2; 00798 break; 00799 case MVT::i32: 00800 #if FASTCC_NUM_INT_ARGS_INREGS > 0 00801 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) 00802 ObjIntRegs = 1; 00803 else 00804 #endif 00805 ObjSize = 4; 00806 break; 00807 case MVT::i64: 00808 #if FASTCC_NUM_INT_ARGS_INREGS > 0 00809 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 00810 ObjIntRegs = 2; 00811 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 00812 ObjIntRegs = 1; 00813 ObjSize = 4; 00814 } else 00815 #endif 00816 ObjSize = 8; 00817 case MVT::f32: 00818 ObjSize = 4; 00819 break; 00820 case MVT::f64: 00821 ObjSize = 8; 00822 break; 00823 case MVT::v16i8: 00824 case MVT::v8i16: 00825 case MVT::v4i32: 00826 case MVT::v2i64: 00827 case MVT::v4f32: 00828 case MVT::v2f64: 00829 if (NumXMMRegs < 4) 00830 ObjXMMRegs = 1; 00831 else 00832 ObjSize = 16; 00833 break; 00834 } 00835 } 00836 00837 SDOperand 00838 X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { 00839 unsigned NumArgs = Op.Val->getNumValues()-1; 00840 MachineFunction &MF = DAG.getMachineFunction(); 00841 MachineFrameInfo *MFI = MF.getFrameInfo(); 00842 SDOperand Root = Op.getOperand(0); 00843 std::vector<SDOperand> ArgValues; 00844 00845 // Add DAG nodes to load the arguments... On entry to a function the stack 00846 // frame looks like this: 00847 // 00848 // [ESP] -- return address 00849 // [ESP + 4] -- first nonreg argument (leftmost lexically) 00850 // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size 00851 // ... 00852 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 00853 00854 // Keep track of the number of integer regs passed so far. This can be either 00855 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 00856 // used). 00857 unsigned NumIntRegs = 0; 00858 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 00859 00860 static const unsigned XMMArgRegs[] = { 00861 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 00862 }; 00863 00864 for (unsigned i = 0; i < NumArgs; ++i) { 00865 MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); 00866 unsigned ArgIncrement = 4; 00867 unsigned ObjSize = 0; 00868 unsigned ObjIntRegs = 0; 00869 unsigned ObjXMMRegs = 0; 00870 00871 HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs, 00872 ObjSize, ObjIntRegs, ObjXMMRegs); 00873 if (ObjSize > 4) 00874 ArgIncrement = ObjSize; 00875 00876 unsigned Reg = 0; 00877 SDOperand ArgValue; 00878 if (ObjIntRegs || ObjXMMRegs) { 00879 switch (ObjectVT) { 00880 default: assert(0 && "Unhandled argument type!"); 00881 case MVT::i8: 00882 Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 00883 X86::GR8RegisterClass); 00884 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8); 00885 break; 00886 case MVT::i16: 00887 Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 00888 X86::GR16RegisterClass); 00889 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16); 00890 break; 00891 case MVT::i32: 00892 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 00893 X86::GR32RegisterClass); 00894 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 00895 break; 00896 case MVT::i64: 00897 Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 00898 X86::GR32RegisterClass); 00899 ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32); 00900 if (ObjIntRegs == 2) { 00901 Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass); 00902 SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32); 00903 ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 00904 } 00905 break; 00906 case MVT::v16i8: 00907 case MVT::v8i16: 00908 case MVT::v4i32: 00909 case MVT::v2i64: 00910 case MVT::v4f32: 00911 case MVT::v2f64: 00912 Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); 00913 ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); 00914 break; 00915 } 00916 NumIntRegs += ObjIntRegs; 00917 NumXMMRegs += ObjXMMRegs; 00918 } 00919 00920 if (ObjSize) { 00921 // XMM arguments have to be aligned on 16-byte boundary. 00922 if (ObjSize == 16) 00923 ArgOffset = ((ArgOffset + 15) / 16) * 16; 00924 // Create the SelectionDAG nodes corresponding to a load from this 00925 // parameter. 00926 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 00927 SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); 00928 if (ObjectVT == MVT::i64 && ObjIntRegs) { 00929 SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 00930 DAG.getSrcValue(NULL)); 00931 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); 00932 } else 00933 ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, 00934 DAG.getSrcValue(NULL)); 00935 ArgOffset += ArgIncrement; // Move on to the next argument. 00936 } 00937 00938 ArgValues.push_back(ArgValue); 00939 } 00940 00941 ArgValues.push_back(Root); 00942 00943 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 00944 // arguments and the arguments after the retaddr has been pushed are aligned. 00945 if ((ArgOffset & 7) == 0) 00946 ArgOffset += 4; 00947 00948 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 00949 ReturnAddrIndex = 0; // No return address slot generated yet. 00950 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 00951 BytesCallerReserves = 0; 00952 00953 // Finally, inform the code generator which regs we return values in. 00954 switch (getValueType(MF.getFunction()->getReturnType())) { 00955 default: assert(0 && "Unknown type!"); 00956 case MVT::isVoid: break; 00957 case MVT::i8: 00958 case MVT::i16: 00959 case MVT::i32: 00960 MF.addLiveOut(X86::EAX); 00961 break; 00962 case MVT::i64: 00963 MF.addLiveOut(X86::EAX); 00964 MF.addLiveOut(X86::EDX); 00965 break; 00966 case MVT::f32: 00967 case MVT::f64: 00968 MF.addLiveOut(X86::ST0); 00969 break; 00970 case MVT::v16i8: 00971 case MVT::v8i16: 00972 case MVT::v4i32: 00973 case MVT::v2i64: 00974 case MVT::v4f32: 00975 case MVT::v2f64: 00976 MF.addLiveOut(X86::XMM0); 00977 break; 00978 } 00979 00980 // Return the new list of results. 00981 std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(), 00982 Op.Val->value_end()); 00983 return DAG.getNode(ISD::MERGE_VALUES, RetVTs, ArgValues); 00984 } 00985 00986 SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG) { 00987 SDOperand Chain = Op.getOperand(0); 00988 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 00989 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 00990 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; 00991 SDOperand Callee = Op.getOperand(4); 00992 MVT::ValueType RetVT= Op.Val->getValueType(0); 00993 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 00994 00995 // Count how many bytes are to be pushed on the stack. 00996 unsigned NumBytes = 0; 00997 00998 // Keep track of the number of integer regs passed so far. This can be either 00999 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 01000 // used). 01001 unsigned NumIntRegs = 0; 01002 unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. 01003 01004 static const unsigned GPRArgRegs[][2] = { 01005 { X86::AL, X86::DL }, 01006 { X86::AX, X86::DX }, 01007 { X86::EAX, X86::EDX } 01008 }; 01009 static const unsigned XMMArgRegs[] = { 01010 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 01011 }; 01012 01013 for (unsigned i = 0; i != NumOps; ++i) { 01014 SDOperand Arg = Op.getOperand(5+2*i); 01015 01016 switch (Arg.getValueType()) { 01017 default: assert(0 && "Unknown value type!"); 01018 case MVT::i8: 01019 case MVT::i16: 01020 case MVT::i32: 01021 #if FASTCC_NUM_INT_ARGS_INREGS > 0 01022 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 01023 ++NumIntRegs; 01024 break; 01025 } 01026 #endif 01027 // Fall through 01028 case MVT::f32: 01029 NumBytes += 4; 01030 break; 01031 case MVT::f64: 01032 NumBytes += 8; 01033 break; 01034 case MVT::v16i8: 01035 case MVT::v8i16: 01036 case MVT::v4i32: 01037 case MVT::v2i64: 01038 case MVT::v4f32: 01039 case MVT::v2f64: 01040 if (NumXMMRegs < 4) 01041 NumXMMRegs++; 01042 else { 01043 // XMM arguments have to be aligned on 16-byte boundary. 01044 NumBytes = ((NumBytes + 15) / 16) * 16; 01045 NumBytes += 16; 01046 } 01047 break; 01048 } 01049 } 01050 01051 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 01052 // arguments and the arguments after the retaddr has been pushed are aligned. 01053 if ((NumBytes & 7) == 0) 01054 NumBytes += 4; 01055 01056 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 01057 01058 // Arguments go on the stack in reverse order, as specified by the ABI. 01059 unsigned ArgOffset = 0; 01060 NumIntRegs = 0; 01061 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 01062 std::vector<SDOperand> MemOpChains; 01063 SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy()); 01064 for (unsigned i = 0; i != NumOps; ++i) { 01065 SDOperand Arg = Op.getOperand(5+2*i); 01066 01067 switch (Arg.getValueType()) { 01068 default: assert(0 && "Unexpected ValueType for argument!"); 01069 case MVT::i8: 01070 case MVT::i16: 01071 case MVT::i32: 01072 #if FASTCC_NUM_INT_ARGS_INREGS > 0 01073 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 01074 RegsToPass.push_back( 01075 std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs], 01076 Arg)); 01077 ++NumIntRegs; 01078 break; 01079 } 01080 #endif 01081 // Fall through 01082 case MVT::f32: { 01083 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 01084 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 01085 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 01086 Arg, PtrOff, DAG.getSrcValue(NULL))); 01087 ArgOffset += 4; 01088 break; 01089 } 01090 case MVT::f64: { 01091 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 01092 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 01093 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 01094 Arg, PtrOff, DAG.getSrcValue(NULL))); 01095 ArgOffset += 8; 01096 break; 01097 } 01098 case MVT::v16i8: 01099 case MVT::v8i16: 01100 case MVT::v4i32: 01101 case MVT::v2i64: 01102 case MVT::v4f32: 01103 case MVT::v2f64: 01104 if (NumXMMRegs < 4) { 01105 RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); 01106 NumXMMRegs++; 01107 } else { 01108 // XMM arguments have to be aligned on 16-byte boundary. 01109 ArgOffset = ((ArgOffset + 15) / 16) * 16; 01110 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 01111 PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); 01112 MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 01113 Arg, PtrOff, DAG.getSrcValue(NULL))); 01114 ArgOffset += 16; 01115 } 01116 } 01117 } 01118 01119 if (!MemOpChains.empty()) 01120 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains); 01121 01122 // Build a sequence of copy-to-reg nodes chained together with token chain 01123 // and flag operands which copy the outgoing args into registers. 01124 SDOperand InFlag; 01125 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 01126 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 01127 InFlag); 01128 InFlag = Chain.getValue(1); 01129 } 01130 01131 // If the callee is a GlobalAddress node (quite common, every direct call is) 01132 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 01133 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 01134 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 01135 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 01136 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 01137 01138 std::vector<MVT::ValueType> NodeTys; 01139 NodeTys.push_back(MVT::Other); // Returns a chain 01140 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 01141 std::vector<SDOperand> Ops; 01142 Ops.push_back(Chain); 01143 Ops.push_back(Callee); 01144 01145 // Add argument registers to the end of the list so that they are known live 01146 // into the call. 01147 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 01148 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 01149 RegsToPass[i].second.getValueType())); 01150 01151 if (InFlag.Val) 01152 Ops.push_back(InFlag); 01153 01154 // FIXME: Do not generate X86ISD::TAILCALL for now. 01155 Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, 01156 NodeTys, Ops); 01157 InFlag = Chain.getValue(1); 01158 01159 NodeTys.clear(); 01160 NodeTys.push_back(MVT::Other); // Returns a chain 01161 if (RetVT != MVT::Other) 01162 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 01163 Ops.clear(); 01164 Ops.push_back(Chain); 01165 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 01166 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 01167 Ops.push_back(InFlag); 01168 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 01169 if (RetVT != MVT::Other) 01170 InFlag = Chain.getValue(1); 01171 01172 std::vector<SDOperand> ResultVals; 01173 NodeTys.clear(); 01174 switch (RetVT) { 01175 default: assert(0 && "Unknown value type to return!"); 01176 case MVT::Other: break; 01177 case MVT::i8: 01178 Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1); 01179 ResultVals.push_back(Chain.getValue(0)); 01180 NodeTys.push_back(MVT::i8); 01181 break; 01182 case MVT::i16: 01183 Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1); 01184 ResultVals.push_back(Chain.getValue(0)); 01185 NodeTys.push_back(MVT::i16); 01186 break; 01187 case MVT::i32: 01188 if (Op.Val->getValueType(1) == MVT::i32) { 01189 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 01190 ResultVals.push_back(Chain.getValue(0)); 01191 Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32, 01192 Chain.getValue(2)).getValue(1); 01193 ResultVals.push_back(Chain.getValue(0)); 01194 NodeTys.push_back(MVT::i32); 01195 } else { 01196 Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1); 01197 ResultVals.push_back(Chain.getValue(0)); 01198 } 01199 NodeTys.push_back(MVT::i32); 01200 break; 01201 case MVT::v16i8: 01202 case MVT::v8i16: 01203 case MVT::v4i32: 01204 case MVT::v2i64: 01205 case MVT::v4f32: 01206 case MVT::v2f64: 01207 Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1); 01208 ResultVals.push_back(Chain.getValue(0)); 01209 NodeTys.push_back(RetVT); 01210 break; 01211 case MVT::f32: 01212 case MVT::f64: { 01213 std::vector<MVT::ValueType> Tys; 01214 Tys.push_back(MVT::f64); 01215 Tys.push_back(MVT::Other); 01216 Tys.push_back(MVT::Flag); 01217 std::vector<SDOperand> Ops; 01218 Ops.push_back(Chain); 01219 Ops.push_back(InFlag); 01220 SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 01221 Chain = RetVal.getValue(1); 01222 InFlag = RetVal.getValue(2); 01223 if (X86ScalarSSE) { 01224 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 01225 // shouldn't be necessary except that RFP cannot be live across 01226 // multiple blocks. When stackifier is fixed, they can be uncoupled. 01227 MachineFunction &MF = DAG.getMachineFunction(); 01228 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 01229 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 01230 Tys.clear(); 01231 Tys.push_back(MVT::Other); 01232 Ops.clear(); 01233 Ops.push_back(Chain); 01234 Ops.push_back(RetVal); 01235 Ops.push_back(StackSlot); 01236 Ops.push_back(DAG.getValueType(RetVT)); 01237 Ops.push_back(InFlag); 01238 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 01239 RetVal = DAG.getLoad(RetVT, Chain, StackSlot, 01240 DAG.getSrcValue(NULL)); 01241 Chain = RetVal.getValue(1); 01242 } 01243 01244 if (RetVT == MVT::f32 && !X86ScalarSSE) 01245 // FIXME: we would really like to remember that this FP_ROUND 01246 // operation is okay to eliminate if we allow excess FP precision. 01247 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 01248 ResultVals.push_back(RetVal); 01249 NodeTys.push_back(RetVT); 01250 break; 01251 } 01252 } 01253 01254 01255 // If the function returns void, just return the chain. 01256 if (ResultVals.empty()) 01257 return Chain; 01258 01259 // Otherwise, merge everything together with a MERGE_VALUES node. 01260 NodeTys.push_back(MVT::Other); 01261 ResultVals.push_back(Chain); 01262 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals); 01263 return Res.getValue(Op.ResNo); 01264 } 01265 01266 SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 01267 if (ReturnAddrIndex == 0) { 01268 // Set up a frame object for the return address. 01269 MachineFunction &MF = DAG.getMachineFunction(); 01270 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 01271 } 01272 01273 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 01274 } 01275 01276 01277 01278 std::pair<SDOperand, SDOperand> X86TargetLowering:: 01279 LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 01280 SelectionDAG &DAG) { 01281 SDOperand Result; 01282 if (Depth) // Depths > 0 not supported yet! 01283 Result = DAG.getConstant(0, getPointerTy()); 01284 else { 01285 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 01286 if (!isFrameAddress) 01287 // Just load the return address 01288 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 01289 DAG.getSrcValue(NULL)); 01290 else 01291 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 01292 DAG.getConstant(4, MVT::i32)); 01293 } 01294 return std::make_pair(Result, Chain); 01295 } 01296 01297 /// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 01298 /// which corresponds to the condition code. 01299 static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 01300 switch (X86CC) { 01301 default: assert(0 && "Unknown X86 conditional code!"); 01302 case X86ISD::COND_A: return X86::JA; 01303 case X86ISD::COND_AE: return X86::JAE; 01304 case X86ISD::COND_B: return X86::JB; 01305 case X86ISD::COND_BE: return X86::JBE; 01306 case X86ISD::COND_E: return X86::JE; 01307 case X86ISD::COND_G: return X86::JG; 01308 case X86ISD::COND_GE: return X86::JGE; 01309 case X86ISD::COND_L: return X86::JL; 01310 case X86ISD::COND_LE: return X86::JLE; 01311 case X86ISD::COND_NE: return X86::JNE; 01312 case X86ISD::COND_NO: return X86::JNO; 01313 case X86ISD::COND_NP: return X86::JNP; 01314 case X86ISD::COND_NS: return X86::JNS; 01315 case X86ISD::COND_O: return X86::JO; 01316 case X86ISD::COND_P: return X86::JP; 01317 case X86ISD::COND_S: return X86::JS; 01318 } 01319 } 01320 01321 /// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 01322 /// specific condition code. It returns a false if it cannot do a direct 01323 /// translation. X86CC is the translated CondCode. Flip is set to true if the 01324 /// the order of comparison operands should be flipped. 01325 static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 01326 unsigned &X86CC, bool &Flip) { 01327 Flip = false; 01328 X86CC = X86ISD::COND_INVALID; 01329 if (!isFP) { 01330 switch (SetCCOpcode) { 01331 default: break; 01332 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 01333 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 01334 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 01335 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 01336 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 01337 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 01338 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 01339 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 01340 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 01341 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 01342 } 01343 } else { 01344 // On a floating point condition, the flags are set as follows: 01345 // ZF PF CF op 01346 // 0 | 0 | 0 | X > Y 01347 // 0 | 0 | 1 | X < Y 01348 // 1 | 0 | 0 | X == Y 01349 // 1 | 1 | 1 | unordered 01350 switch (SetCCOpcode) { 01351 default: break; 01352 case ISD::SETUEQ: 01353 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 01354 case ISD::SETOLT: Flip = true; // Fallthrough 01355 case ISD::SETOGT: 01356 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 01357 case ISD::SETOLE: Flip = true; // Fallthrough 01358 case ISD::SETOGE: 01359 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 01360 case ISD::SETUGT: Flip = true; // Fallthrough 01361 case ISD::SETULT: 01362 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 01363 case ISD::SETUGE: Flip = true; // Fallthrough 01364 case ISD::SETULE: 01365 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 01366 case ISD::SETONE: 01367 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 01368 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 01369 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 01370 } 01371 } 01372 01373 return X86CC != X86ISD::COND_INVALID; 01374 } 01375 01376 static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 01377 bool &Flip) { 01378 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 01379 } 01380 01381 /// hasFPCMov - is there a floating point cmov for the specific X86 condition 01382 /// code. Current x86 isa includes the following FP cmov instructions: 01383 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 01384 static bool hasFPCMov(unsigned X86CC) { 01385 switch (X86CC) { 01386 default: 01387 return false; 01388 case X86ISD::COND_B: 01389 case X86ISD::COND_BE: 01390 case X86ISD::COND_E: 01391 case X86ISD::COND_P: 01392 case X86ISD::COND_A: 01393 case X86ISD::COND_AE: 01394 case X86ISD::COND_NE: 01395 case X86ISD::COND_NP: 01396 return true; 01397 } 01398 } 01399 01400 /// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 01401 /// load. For Darwin, external and weak symbols are indirect, loading the value 01402 /// at address GV rather then the value of GV itself. This means that the 01403 /// GlobalAddress must be in the base or index register of the address, not the 01404 /// GV offset field. 01405 static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 01406 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 01407 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 01408 } 01409 01410 /// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 01411 /// true if Op is undef or if its value falls within the specified range (L, H]. 01412 static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 01413 if (Op.getOpcode() == ISD::UNDEF) 01414 return true; 01415 01416 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 01417 return (Val >= Low && Val < Hi); 01418 } 01419 01420 /// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 01421 /// true if Op is undef or if its value equal to the specified value. 01422 static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 01423 if (Op.getOpcode() == ISD::UNDEF) 01424 return true; 01425 return cast<ConstantSDNode>(Op)->getValue() == Val; 01426 } 01427 01428 /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 01429 /// specifies a shuffle of elements that is suitable for input to PSHUFD. 01430 bool X86::isPSHUFDMask(SDNode *N) { 01431 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01432 01433 if (N->getNumOperands() != 4) 01434 return false; 01435 01436 // Check if the value doesn't reference the second vector. 01437 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 01438 SDOperand Arg = N->getOperand(i); 01439 if (Arg.getOpcode() == ISD::UNDEF) continue; 01440 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01441 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 01442 return false; 01443 } 01444 01445 return true; 01446 } 01447 01448 /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 01449 /// specifies a shuffle of elements that is suitable for input to PSHUFHW. 01450 bool X86::isPSHUFHWMask(SDNode *N) { 01451 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01452 01453 if (N->getNumOperands() != 8) 01454 return false; 01455 01456 // Lower quadword copied in order. 01457 for (unsigned i = 0; i != 4; ++i) { 01458 SDOperand Arg = N->getOperand(i); 01459 if (Arg.getOpcode() == ISD::UNDEF) continue; 01460 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01461 if (cast<ConstantSDNode>(Arg)->getValue() != i) 01462 return false; 01463 } 01464 01465 // Upper quadword shuffled. 01466 for (unsigned i = 4; i != 8; ++i) { 01467 SDOperand Arg = N->getOperand(i); 01468 if (Arg.getOpcode() == ISD::UNDEF) continue; 01469 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01470 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01471 if (Val < 4 || Val > 7) 01472 return false; 01473 } 01474 01475 return true; 01476 } 01477 01478 /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 01479 /// specifies a shuffle of elements that is suitable for input to PSHUFLW. 01480 bool X86::isPSHUFLWMask(SDNode *N) { 01481 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01482 01483 if (N->getNumOperands() != 8) 01484 return false; 01485 01486 // Upper quadword copied in order. 01487 for (unsigned i = 4; i != 8; ++i) 01488 if (!isUndefOrEqual(N->getOperand(i), i)) 01489 return false; 01490 01491 // Lower quadword shuffled. 01492 for (unsigned i = 0; i != 4; ++i) 01493 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 01494 return false; 01495 01496 return true; 01497 } 01498 01499 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 01500 /// specifies a shuffle of elements that is suitable for input to SHUFP*. 01501 static bool isSHUFPMask(std::vector<SDOperand> &N) { 01502 unsigned NumElems = N.size(); 01503 if (NumElems != 2 && NumElems != 4) return false; 01504 01505 unsigned Half = NumElems / 2; 01506 for (unsigned i = 0; i < Half; ++i) 01507 if (!isUndefOrInRange(N[i], 0, NumElems)) 01508 return false; 01509 for (unsigned i = Half; i < NumElems; ++i) 01510 if (!isUndefOrInRange(N[i], NumElems, NumElems*2)) 01511 return false; 01512 01513 return true; 01514 } 01515 01516 bool X86::isSHUFPMask(SDNode *N) { 01517 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01518 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 01519 return ::isSHUFPMask(Ops); 01520 } 01521 01522 /// isCommutedSHUFP - Returns true if the shuffle mask is except 01523 /// the reverse of what x86 shuffles want. x86 shuffles requires the lower 01524 /// half elements to come from vector 1 (which would equal the dest.) and 01525 /// the upper half to come from vector 2. 01526 static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) { 01527 unsigned NumElems = Ops.size(); 01528 if (NumElems != 2 && NumElems != 4) return false; 01529 01530 unsigned Half = NumElems / 2; 01531 for (unsigned i = 0; i < Half; ++i) 01532 if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2)) 01533 return false; 01534 for (unsigned i = Half; i < NumElems; ++i) 01535 if (!isUndefOrInRange(Ops[i], 0, NumElems)) 01536 return false; 01537 return true; 01538 } 01539 01540 static bool isCommutedSHUFP(SDNode *N) { 01541 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01542 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 01543 return isCommutedSHUFP(Ops); 01544 } 01545 01546 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 01547 /// specifies a shuffle of elements that is suitable for input to MOVHLPS. 01548 bool X86::isMOVHLPSMask(SDNode *N) { 01549 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01550 01551 if (N->getNumOperands() != 4) 01552 return false; 01553 01554 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 01555 return isUndefOrEqual(N->getOperand(0), 6) && 01556 isUndefOrEqual(N->getOperand(1), 7) && 01557 isUndefOrEqual(N->getOperand(2), 2) && 01558 isUndefOrEqual(N->getOperand(3), 3); 01559 } 01560 01561 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 01562 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 01563 bool X86::isMOVLPMask(SDNode *N) { 01564 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01565 01566 unsigned NumElems = N->getNumOperands(); 01567 if (NumElems != 2 && NumElems != 4) 01568 return false; 01569 01570 for (unsigned i = 0; i < NumElems/2; ++i) 01571 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 01572 return false; 01573 01574 for (unsigned i = NumElems/2; i < NumElems; ++i) 01575 if (!isUndefOrEqual(N->getOperand(i), i)) 01576 return false; 01577 01578 return true; 01579 } 01580 01581 /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 01582 /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} 01583 /// and MOVLHPS. 01584 bool X86::isMOVHPMask(SDNode *N) { 01585 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01586 01587 unsigned NumElems = N->getNumOperands(); 01588 if (NumElems != 2 && NumElems != 4) 01589 return false; 01590 01591 for (unsigned i = 0; i < NumElems/2; ++i) 01592 if (!isUndefOrEqual(N->getOperand(i), i)) 01593 return false; 01594 01595 for (unsigned i = 0; i < NumElems/2; ++i) { 01596 SDOperand Arg = N->getOperand(i + NumElems/2); 01597 if (!isUndefOrEqual(Arg, i + NumElems)) 01598 return false; 01599 } 01600 01601 return true; 01602 } 01603 01604 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 01605 /// specifies a shuffle of elements that is suitable for input to UNPCKL. 01606 bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 01607 unsigned NumElems = N.size(); 01608 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 01609 return false; 01610 01611 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 01612 SDOperand BitI = N[i]; 01613 SDOperand BitI1 = N[i+1]; 01614 if (!isUndefOrEqual(BitI, j)) 01615 return false; 01616 if (V2IsSplat) { 01617 if (isUndefOrEqual(BitI1, NumElems)) 01618 return false; 01619 } else { 01620 if (!isUndefOrEqual(BitI1, j + NumElems)) 01621 return false; 01622 } 01623 } 01624 01625 return true; 01626 } 01627 01628 bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) { 01629 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01630 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 01631 return ::isUNPCKLMask(Ops, V2IsSplat); 01632 } 01633 01634 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 01635 /// specifies a shuffle of elements that is suitable for input to UNPCKH. 01636 bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) { 01637 unsigned NumElems = N.size(); 01638 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 01639 return false; 01640 01641 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 01642 SDOperand BitI = N[i]; 01643 SDOperand BitI1 = N[i+1]; 01644 if (!isUndefOrEqual(BitI, j + NumElems/2)) 01645 return false; 01646 if (V2IsSplat) { 01647 if (isUndefOrEqual(BitI1, NumElems)) 01648 return false; 01649 } else { 01650 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 01651 return false; 01652 } 01653 } 01654 01655 return true; 01656 } 01657 01658 bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) { 01659 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01660 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 01661 return ::isUNPCKHMask(Ops, V2IsSplat); 01662 } 01663 01664 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 01665 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 01666 /// <0, 0, 1, 1> 01667 bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 01668 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01669 01670 unsigned NumElems = N->getNumOperands(); 01671 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 01672 return false; 01673 01674 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 01675 SDOperand BitI = N->getOperand(i); 01676 SDOperand BitI1 = N->getOperand(i+1); 01677 01678 if (!isUndefOrEqual(BitI, j)) 01679 return false; 01680 if (!isUndefOrEqual(BitI1, j)) 01681 return false; 01682 } 01683 01684 return true; 01685 } 01686 01687 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 01688 /// specifies a shuffle of elements that is suitable for input to MOVSS, 01689 /// MOVSD, and MOVD, i.e. setting the lowest element. 01690 static bool isMOVLMask(std::vector<SDOperand> &N) { 01691 unsigned NumElems = N.size(); 01692 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 01693 return false; 01694 01695 if (!isUndefOrEqual(N[0], NumElems)) 01696 return false; 01697 01698 for (unsigned i = 1; i < NumElems; ++i) { 01699 SDOperand Arg = N[i]; 01700 if (!isUndefOrEqual(Arg, i)) 01701 return false; 01702 } 01703 01704 return true; 01705 } 01706 01707 bool X86::isMOVLMask(SDNode *N) { 01708 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01709 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 01710 return ::isMOVLMask(Ops); 01711 } 01712 01713 /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 01714 /// of what x86 movss want. X86 movs requires the lowest element to be lowest 01715 /// element of vector 2 and the other elements to come from vector 1 in order. 01716 static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) { 01717 unsigned NumElems = Ops.size(); 01718 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 01719 return false; 01720 01721 if (!isUndefOrEqual(Ops[0], 0)) 01722 return false; 01723 01724 for (unsigned i = 1; i < NumElems; ++i) { 01725 SDOperand Arg = Ops[i]; 01726 if (V2IsSplat) { 01727 if (!isUndefOrEqual(Arg, NumElems)) 01728 return false; 01729 } else { 01730 if (!isUndefOrEqual(Arg, i+NumElems)) 01731 return false; 01732 } 01733 } 01734 01735 return true; 01736 } 01737 01738 static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) { 01739 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01740 std::vector<SDOperand> Ops(N->op_begin(), N->op_end()); 01741 return isCommutedMOVL(Ops, V2IsSplat); 01742 } 01743 01744 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 01745 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 01746 bool X86::isMOVSHDUPMask(SDNode *N) { 01747 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01748 01749 if (N->getNumOperands() != 4) 01750 return false; 01751 01752 // Expect 1, 1, 3, 3 01753 for (unsigned i = 0; i < 2; ++i) { 01754 SDOperand Arg = N->getOperand(i); 01755 if (Arg.getOpcode() == ISD::UNDEF) continue; 01756 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01757 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01758 if (Val != 1) return false; 01759 } 01760 01761 bool HasHi = false; 01762 for (unsigned i = 2; i < 4; ++i) { 01763 SDOperand Arg = N->getOperand(i); 01764 if (Arg.getOpcode() == ISD::UNDEF) continue; 01765 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01766 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01767 if (Val != 3) return false; 01768 HasHi = true; 01769 } 01770 01771 // Don't use movshdup if it can be done with a shufps. 01772 return HasHi; 01773 } 01774 01775 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 01776 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 01777 bool X86::isMOVSLDUPMask(SDNode *N) { 01778 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01779 01780 if (N->getNumOperands() != 4) 01781 return false; 01782 01783 // Expect 0, 0, 2, 2 01784 for (unsigned i = 0; i < 2; ++i) { 01785 SDOperand Arg = N->getOperand(i); 01786 if (Arg.getOpcode() == ISD::UNDEF) continue; 01787 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01788 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01789 if (Val != 0) return false; 01790 } 01791 01792 bool HasHi = false; 01793 for (unsigned i = 2; i < 4; ++i) { 01794 SDOperand Arg = N->getOperand(i); 01795 if (Arg.getOpcode() == ISD::UNDEF) continue; 01796 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01797 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01798 if (Val != 2) return false; 01799 HasHi = true; 01800 } 01801 01802 // Don't use movshdup if it can be done with a shufps. 01803 return HasHi; 01804 } 01805 01806 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 01807 /// a splat of a single element. 01808 static bool isSplatMask(SDNode *N) { 01809 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01810 01811 // This is a splat operation if each element of the permute is the same, and 01812 // if the value doesn't reference the second vector. 01813 unsigned NumElems = N->getNumOperands(); 01814 SDOperand ElementBase; 01815 unsigned i = 0; 01816 for (; i != NumElems; ++i) { 01817 SDOperand Elt = N->getOperand(i); 01818 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) { 01819 ElementBase = Elt; 01820 break; 01821 } 01822 } 01823 01824 if (!ElementBase.Val) 01825 return false; 01826 01827 for (; i != NumElems; ++i) { 01828 SDOperand Arg = N->getOperand(i); 01829 if (Arg.getOpcode() == ISD::UNDEF) continue; 01830 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01831 if (Arg != ElementBase) return false; 01832 } 01833 01834 // Make sure it is a splat of the first vector operand. 01835 return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems; 01836 } 01837 01838 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 01839 /// a splat of a single element and it's a 2 or 4 element mask. 01840 bool X86::isSplatMask(SDNode *N) { 01841 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01842 01843 // We can only splat 64-bit, and 32-bit quantities with a single instruction. 01844 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 01845 return false; 01846 return ::isSplatMask(N); 01847 } 01848 01849 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 01850 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 01851 /// instructions. 01852 unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 01853 unsigned NumOperands = N->getNumOperands(); 01854 unsigned Shift = (NumOperands == 4) ? 2 : 1; 01855 unsigned Mask = 0; 01856 for (unsigned i = 0; i < NumOperands; ++i) { 01857 unsigned Val = 0; 01858 SDOperand Arg = N->getOperand(NumOperands-i-1); 01859 if (Arg.getOpcode() != ISD::UNDEF) 01860 Val = cast<ConstantSDNode>(Arg)->getValue(); 01861 if (Val >= NumOperands) Val -= NumOperands; 01862 Mask |= Val; 01863 if (i != NumOperands - 1) 01864 Mask <<= Shift; 01865 } 01866 01867 return Mask; 01868 } 01869 01870 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 01871 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 01872 /// instructions. 01873 unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 01874 unsigned Mask = 0; 01875 // 8 nodes, but we only care about the last 4. 01876 for (unsigned i = 7; i >= 4; --i) { 01877 unsigned Val = 0; 01878 SDOperand Arg = N->getOperand(i); 01879 if (Arg.getOpcode() != ISD::UNDEF) 01880 Val = cast<ConstantSDNode>(Arg)->getValue(); 01881 Mask |= (Val - 4); 01882 if (i != 4) 01883 Mask <<= 2; 01884 } 01885 01886 return Mask; 01887 } 01888 01889 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 01890 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 01891 /// instructions. 01892 unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 01893 unsigned Mask = 0; 01894 // 8 nodes, but we only care about the first 4. 01895 for (int i = 3; i >= 0; --i) { 01896 unsigned Val = 0; 01897 SDOperand Arg = N->getOperand(i); 01898 if (Arg.getOpcode() != ISD::UNDEF) 01899 Val = cast<ConstantSDNode>(Arg)->getValue(); 01900 Mask |= Val; 01901 if (i != 0) 01902 Mask <<= 2; 01903 } 01904 01905 return Mask; 01906 } 01907 01908 /// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 01909 /// specifies a 8 element shuffle that can be broken into a pair of 01910 /// PSHUFHW and PSHUFLW. 01911 static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 01912 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01913 01914 if (N->getNumOperands() != 8) 01915 return false; 01916 01917 // Lower quadword shuffled. 01918 for (unsigned i = 0; i != 4; ++i) { 01919 SDOperand Arg = N->getOperand(i); 01920 if (Arg.getOpcode() == ISD::UNDEF) continue; 01921 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01922 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01923 if (Val > 4) 01924 return false; 01925 } 01926 01927 // Upper quadword shuffled. 01928 for (unsigned i = 4; i != 8; ++i) { 01929 SDOperand Arg = N->getOperand(i); 01930 if (Arg.getOpcode() == ISD::UNDEF) continue; 01931 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01932 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01933 if (Val < 4 || Val > 7) 01934 return false; 01935 } 01936 01937 return true; 01938 } 01939 01940 /// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 01941 /// values in ther permute mask. 01942 static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 01943 SDOperand V1 = Op.getOperand(0); 01944 SDOperand V2 = Op.getOperand(1); 01945 SDOperand Mask = Op.getOperand(2); 01946 MVT::ValueType VT = Op.getValueType(); 01947 MVT::ValueType MaskVT = Mask.getValueType(); 01948 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 01949 unsigned NumElems = Mask.getNumOperands(); 01950 std::vector<SDOperand> MaskVec; 01951 01952 for (unsigned i = 0; i != NumElems; ++i) { 01953 SDOperand Arg = Mask.getOperand(i); 01954 if (Arg.getOpcode() == ISD::UNDEF) { 01955 MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT)); 01956 continue; 01957 } 01958 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01959 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01960 if (Val < NumElems) 01961 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 01962 else 01963 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 01964 } 01965 01966 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 01967 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 01968 } 01969 01970 /// ShouldXformToMOVHLPS - Return true if the node should be transformed to 01971 /// match movhlps. The lower half elements should come from upper half of 01972 /// V1 (and in order), and the upper half elements should come from the upper 01973 /// half of V2 (and in order). 01974 static bool ShouldXformToMOVHLPS(SDNode *Mask) { 01975 unsigned NumElems = Mask->getNumOperands(); 01976 if (NumElems != 4) 01977 return false; 01978 for (unsigned i = 0, e = 2; i != e; ++i) 01979 if (!isUndefOrEqual(Mask->getOperand(i), i+2)) 01980 return false; 01981 for (unsigned i = 2; i != 4; ++i) 01982 if (!isUndefOrEqual(Mask->getOperand(i), i+4)) 01983 return false; 01984 return true; 01985 } 01986 01987 /// isScalarLoadToVector - Returns true if the node is a scalar load that 01988 /// is promoted to a vector. 01989 static inline bool isScalarLoadToVector(SDNode *N) { 01990 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 01991 N = N->getOperand(0).Val; 01992 return (N->getOpcode() == ISD::LOAD); 01993 } 01994 return false; 01995 } 01996 01997 /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 01998 /// match movlp{s|d}. The lower half elements should come from lower half of 01999 /// V1 (and in order), and the upper half elements should come from the upper 02000 /// half of V2 (and in order). And since V1 will become the source of the 02001 /// MOVLP, it must be either a vector load or a scalar load to vector. 02002 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) { 02003 if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1)) 02004 return false; 02005 02006 unsigned NumElems = Mask->getNumOperands(); 02007 if (NumElems != 2 && NumElems != 4) 02008 return false; 02009 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 02010 if (!isUndefOrEqual(Mask->getOperand(i), i)) 02011 return false; 02012 for (unsigned i = NumElems/2; i != NumElems; ++i) 02013 if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems)) 02014 return false; 02015 return true; 02016 } 02017 02018 /// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 02019 /// all the same. 02020 static bool isSplatVector(SDNode *N) { 02021 if (N->getOpcode() != ISD::BUILD_VECTOR) 02022 return false; 02023 02024 SDOperand SplatValue = N->getOperand(0); 02025 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 02026 if (N->getOperand(i) != SplatValue) 02027 return false; 02028 return true; 02029 } 02030 02031 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 02032 /// that point to V2 points to its first element. 02033 static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { 02034 assert(Mask.getOpcode() == ISD::BUILD_VECTOR); 02035 02036 bool Changed = false; 02037 std::vector<SDOperand> MaskVec; 02038 unsigned NumElems = Mask.getNumOperands(); 02039 for (unsigned i = 0; i != NumElems; ++i) { 02040 SDOperand Arg = Mask.getOperand(i); 02041 if (Arg.getOpcode() != ISD::UNDEF) { 02042 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 02043 if (Val > NumElems) { 02044 Arg = DAG.getConstant(NumElems, Arg.getValueType()); 02045 Changed = true; 02046 } 02047 } 02048 MaskVec.push_back(Arg); 02049 } 02050 02051 if (Changed) 02052 Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); 02053 return Mask; 02054 } 02055 02056 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 02057 /// operation of specified width. 02058 static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { 02059 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02060 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 02061 02062 std::vector<SDOperand> MaskVec; 02063 MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); 02064 for (unsigned i = 1; i != NumElems; ++i) 02065 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 02066 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02067 } 02068 02069 /// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation 02070 /// of specified width. 02071 static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { 02072 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02073 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 02074 std::vector<SDOperand> MaskVec; 02075 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 02076 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 02077 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 02078 } 02079 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02080 } 02081 02082 /// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation 02083 /// of specified width. 02084 static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { 02085 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02086 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 02087 unsigned Half = NumElems/2; 02088 std::vector<SDOperand> MaskVec; 02089 for (unsigned i = 0; i != Half; ++i) { 02090 MaskVec.push_back(DAG.getConstant(i + Half, BaseVT)); 02091 MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT)); 02092 } 02093 return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02094 } 02095 02096 /// getZeroVector - Returns a vector of specified type with all zero elements. 02097 /// 02098 static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { 02099 assert(MVT::isVector(VT) && "Expected a vector type"); 02100 unsigned NumElems = getVectorNumElements(VT); 02101 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 02102 bool isFP = MVT::isFloatingPoint(EVT); 02103 SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); 02104 std::vector<SDOperand> ZeroVec(NumElems, Zero); 02105 return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec); 02106 } 02107 02108 /// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. 02109 /// 02110 static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { 02111 SDOperand V1 = Op.getOperand(0); 02112 SDOperand Mask = Op.getOperand(2); 02113 MVT::ValueType VT = Op.getValueType(); 02114 unsigned NumElems = Mask.getNumOperands(); 02115 Mask = getUnpacklMask(NumElems, DAG); 02116 while (NumElems != 4) { 02117 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); 02118 NumElems >>= 1; 02119 } 02120 V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); 02121 02122 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 02123 Mask = getZeroVector(MaskVT, DAG); 02124 SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, 02125 DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); 02126 return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); 02127 } 02128 02129 /// isZeroNode - Returns true if Elt is a constant zero or a floating point 02130 /// constant +0.0. 02131 static inline bool isZeroNode(SDOperand Elt) { 02132 return ((isa<ConstantSDNode>(Elt) && 02133 cast<ConstantSDNode>(Elt)->getValue() == 0) || 02134 (isa<ConstantFPSDNode>(Elt) && 02135 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0))); 02136 } 02137 02138 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 02139 /// vector and zero or undef vector. 02140 static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, 02141 unsigned NumElems, unsigned Idx, 02142 bool isZero, SelectionDAG &DAG) { 02143 SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); 02144 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02145 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 02146 SDOperand Zero = DAG.getConstant(0, EVT); 02147 std::vector<SDOperand> MaskVec(NumElems, Zero); 02148 MaskVec[Idx] = DAG.getConstant(NumElems, EVT); 02149 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02150 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 02151 } 02152 02153 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 02154 /// 02155 static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, 02156 unsigned NumNonZero, unsigned NumZero, 02157 SelectionDAG &DAG) { 02158 if (NumNonZero > 8) 02159 return SDOperand(); 02160 02161 SDOperand V(0, 0); 02162 bool First = true; 02163 for (unsigned i = 0; i < 16; ++i) { 02164 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 02165 if (ThisIsNonZero && First) { 02166 if (NumZero) 02167 V = getZeroVector(MVT::v8i16, DAG); 02168 else 02169 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 02170 First = false; 02171 } 02172 02173 if ((i & 1) != 0) { 02174 SDOperand ThisElt(0, 0), LastElt(0, 0); 02175 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 02176 if (LastIsNonZero) { 02177 LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1)); 02178 } 02179 if (ThisIsNonZero) { 02180 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i)); 02181 ThisElt = DAG.getNode(ISD::SHL, MVT::i16, 02182 ThisElt, DAG.getConstant(8, MVT::i8)); 02183 if (LastIsNonZero) 02184 ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt); 02185 } else 02186 ThisElt = LastElt; 02187 02188 if (ThisElt.Val) 02189 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt, 02190 DAG.getConstant(i/2, MVT::i32)); 02191 } 02192 } 02193 02194 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); 02195 } 02196 02197 /// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. 02198 /// 02199 static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, 02200 unsigned NumNonZero, unsigned NumZero, 02201 SelectionDAG &DAG) { 02202 if (NumNonZero > 4) 02203 return SDOperand(); 02204 02205 SDOperand V(0, 0); 02206 bool First = true; 02207 for (unsigned i = 0; i < 8; ++i) { 02208 bool isNonZero = (NonZeros & (1 << i)) != 0; 02209 if (isNonZero) { 02210 if (First) { 02211 if (NumZero) 02212 V = getZeroVector(MVT::v8i16, DAG); 02213 else 02214 V = DAG.getNode(ISD::UNDEF, MVT::v8i16); 02215 First = false; 02216 } 02217 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i), 02218 DAG.getConstant(i, MVT::i32)); 02219 } 02220 } 02221 02222 return V; 02223 } 02224 02225 SDOperand 02226 X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 02227 // All zero's are handled with pxor. 02228 if (ISD::isBuildVectorAllZeros(Op.Val)) 02229 return Op; 02230 02231 // All one's are handled with pcmpeqd. 02232 if (ISD::isBuildVectorAllOnes(Op.Val)) 02233 return Op; 02234 02235 MVT::ValueType VT = Op.getValueType(); 02236 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 02237 unsigned EVTBits = MVT::getSizeInBits(EVT); 02238 02239 unsigned NumElems = Op.getNumOperands(); 02240 unsigned NumZero = 0; 02241 unsigned NumNonZero = 0; 02242 unsigned NonZeros = 0; 02243 std::set<SDOperand> Values; 02244 for (unsigned i = 0; i < NumElems; ++i) { 02245 SDOperand Elt = Op.getOperand(i); 02246 if (Elt.getOpcode() != ISD::UNDEF) { 02247 Values.insert(Elt); 02248 if (isZeroNode(Elt)) 02249 NumZero++; 02250 else { 02251 NonZeros |= (1 << i); 02252 NumNonZero++; 02253 } 02254 } 02255 } 02256 02257 if (NumNonZero == 0) 02258 // Must be a mix of zero and undef. Return a zero vector. 02259 return getZeroVector(VT, DAG); 02260 02261 // Splat is obviously ok. Let legalizer expand it to a shuffle. 02262 if (Values.size() == 1) 02263 return SDOperand(); 02264 02265 // Special case for single non-zero element. 02266 if (NumNonZero == 1) { 02267 unsigned Idx = CountTrailingZeros_32(NonZeros); 02268 SDOperand Item = Op.getOperand(Idx); 02269 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item); 02270 if (Idx == 0) 02271 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 02272 return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx, 02273 NumZero > 0, DAG); 02274 02275 if (EVTBits == 32) { 02276 // Turn it into a shuffle of zero and zero-extended scalar to vector. 02277 Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, 02278 DAG); 02279 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02280 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 02281 std::vector<SDOperand> MaskVec; 02282 for (unsigned i = 0; i < NumElems; i++) 02283 MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); 02284 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02285 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item, 02286 DAG.getNode(ISD::UNDEF, VT), Mask); 02287 } 02288 } 02289 02290 // Let legalizer expand 2-widde build_vector's. 02291 if (EVTBits == 64) 02292 return SDOperand(); 02293 02294 // If element VT is < 32 bits, convert it to inserts into a zero vector. 02295 if (EVTBits == 8) { 02296 SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG); 02297 if (V.Val) return V; 02298 } 02299 02300 if (EVTBits == 16) { 02301 SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG); 02302 if (V.Val) return V; 02303 } 02304 02305 // If element VT is == 32 bits, turn it into a number of shuffles. 02306 std::vector<SDOperand> V(NumElems); 02307 if (NumElems == 4 && NumZero > 0) { 02308 for (unsigned i = 0; i < 4; ++i) { 02309 bool isZero = !(NonZeros & (1 << i)); 02310 if (isZero) 02311 V[i] = getZeroVector(VT, DAG); 02312 else 02313 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 02314 } 02315 02316 for (unsigned i = 0; i < 2; ++i) { 02317 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 02318 default: break; 02319 case 0: 02320 V[i] = V[i*2]; // Must be a zero vector. 02321 break; 02322 case 1: 02323 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2], 02324 getMOVLMask(NumElems, DAG)); 02325 break; 02326 case 2: 02327 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 02328 getMOVLMask(NumElems, DAG)); 02329 break; 02330 case 3: 02331 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1], 02332 getUnpacklMask(NumElems, DAG)); 02333 break; 02334 } 02335 } 02336 02337 // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd) 02338 // clears the upper bits. 02339 // FIXME: we can do the same for v4f32 case when we know both parts of 02340 // the lower half come from scalar_to_vector (loadf32). We should do 02341 // that in post legalizer dag combiner with target specific hooks. 02342 if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) 02343 return V[0]; 02344 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02345 MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); 02346 std::vector<SDOperand> MaskVec; 02347 bool Reverse = (NonZeros & 0x3) == 2; 02348 for (unsigned i = 0; i < 2; ++i) 02349 if (Reverse) 02350 MaskVec.push_back(DAG.getConstant(1-i, EVT)); 02351 else 02352 MaskVec.push_back(DAG.getConstant(i, EVT)); 02353 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 02354 for (unsigned i = 0; i < 2; ++i) 02355 if (Reverse) 02356 MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT)); 02357 else 02358 MaskVec.push_back(DAG.getConstant(i+NumElems, EVT)); 02359 SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02360 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask); 02361 } 02362 02363 if (Values.size() > 2) { 02364 // Expand into a number of unpckl*. 02365 // e.g. for v4f32 02366 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 02367 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 02368 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 02369 SDOperand UnpckMask = getUnpacklMask(NumElems, DAG); 02370 for (unsigned i = 0; i < NumElems; ++i) 02371 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 02372 NumElems >>= 1; 02373 while (NumElems != 0) { 02374 for (unsigned i = 0; i < NumElems; ++i) 02375 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 02376 UnpckMask); 02377 NumElems >>= 1; 02378 } 02379 return V[0]; 02380 } 02381 02382 return SDOperand(); 02383 } 02384 02385 SDOperand 02386 X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 02387 SDOperand V1 = Op.getOperand(0); 02388 SDOperand V2 = Op.getOperand(1); 02389 SDOperand PermMask = Op.getOperand(2); 02390 MVT::ValueType VT = Op.getValueType(); 02391 unsigned NumElems = PermMask.getNumOperands(); 02392 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 02393 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 02394 02395 if (isSplatMask(PermMask.Val)) { 02396 if (NumElems <= 4) return Op; 02397 // Promote it to a v4i32 splat. 02398 return PromoteSplat(Op, DAG); 02399 } 02400 02401 if (X86::isMOVLMask(PermMask.Val)) 02402 return (V1IsUndef) ? V2 : Op; 02403 02404 if (X86::isMOVSHDUPMask(PermMask.Val) || 02405 X86::isMOVSLDUPMask(PermMask.Val) || 02406 X86::isMOVHLPSMask(PermMask.Val) || 02407 X86::isMOVHPMask(PermMask.Val) || 02408 X86::isMOVLPMask(PermMask.Val)) 02409 return Op; 02410 02411 if (ShouldXformToMOVHLPS(PermMask.Val) || 02412 ShouldXformToMOVLP(V1.Val, PermMask.Val)) 02413 return CommuteVectorShuffle(Op, DAG); 02414 02415 bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF; 02416 bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF; 02417 if (V1IsSplat && !V2IsSplat) { 02418 Op = CommuteVectorShuffle(Op, DAG); 02419 V1 = Op.getOperand(0); 02420 V2 = Op.getOperand(1); 02421 PermMask = Op.getOperand(2); 02422 V2IsSplat = true; 02423 } 02424 02425 if (isCommutedMOVL(PermMask.Val, V2IsSplat)) { 02426 if (V2IsUndef) return V1; 02427 Op = CommuteVectorShuffle(Op, DAG); 02428 V1 = Op.getOperand(0); 02429 V2 = Op.getOperand(1); 02430 PermMask = Op.getOperand(2); 02431 if (V2IsSplat) { 02432 // V2 is a splat, so the mask may be malformed. That is, it may point 02433 // to any V2 element. The instruction selectior won't like this. Get 02434 // a corrected mask and commute to form a proper MOVS{S|D}. 02435 SDOperand NewMask = getMOVLMask(NumElems, DAG); 02436 if (NewMask.Val != PermMask.Val) 02437 Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 02438 } 02439 return Op; 02440 } 02441 02442 if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 02443 X86::isUNPCKLMask(PermMask.Val) || 02444 X86::isUNPCKHMask(PermMask.Val)) 02445 return Op; 02446 02447 if (V2IsSplat) { 02448 // Normalize mask so all entries that point to V2 points to its first 02449 // element then try to match unpck{h|l} again. If match, return a 02450 // new vector_shuffle with the corrected mask. 02451 SDOperand NewMask = NormalizeMask(PermMask, DAG); 02452 if (NewMask.Val != PermMask.Val) { 02453 if (X86::isUNPCKLMask(PermMask.Val, true)) { 02454 SDOperand NewMask = getUnpacklMask(NumElems, DAG); 02455 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 02456 } else if (X86::isUNPCKHMask(PermMask.Val, true)) { 02457 SDOperand NewMask = getUnpackhMask(NumElems, DAG); 02458 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask); 02459 } 02460 } 02461 } 02462 02463 // Normalize the node to match x86 shuffle ops if needed 02464 if (V2.getOpcode() != ISD::UNDEF) 02465 if (isCommutedSHUFP(PermMask.Val)) { 02466 Op = CommuteVectorShuffle(Op, DAG); 02467 V1 = Op.getOperand(0); 02468 V2 = Op.getOperand(1); 02469 PermMask = Op.getOperand(2); 02470 } 02471 02472 // If VT is integer, try PSHUF* first, then SHUFP*. 02473 if (MVT::isInteger(VT)) { 02474 if (X86::isPSHUFDMask(PermMask.Val) || 02475 X86::isPSHUFHWMask(PermMask.Val) || 02476 X86::isPSHUFLWMask(PermMask.Val)) { 02477 if (V2.getOpcode() != ISD::UNDEF) 02478 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 02479 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 02480 return Op; 02481 } 02482 02483 if (X86::isSHUFPMask(PermMask.Val)) 02484 return Op; 02485 02486 // Handle v8i16 shuffle high / low shuffle node pair. 02487 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 02488 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02489 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 02490 std::vector<SDOperand> MaskVec; 02491 for (unsigned i = 0; i != 4; ++i) 02492 MaskVec.push_back(PermMask.getOperand(i)); 02493 for (unsigned i = 4; i != 8; ++i) 02494 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 02495 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02496 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 02497 MaskVec.clear(); 02498 for (unsigned i = 0; i != 4; ++i) 02499 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 02500 for (unsigned i = 4; i != 8; ++i) 02501 MaskVec.push_back(PermMask.getOperand(i)); 02502 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02503 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 02504 } 02505 } else { 02506 // Floating point cases in the other order. 02507 if (X86::isSHUFPMask(PermMask.Val)) 02508 return Op; 02509 if (X86::isPSHUFDMask(PermMask.Val) || 02510 X86::isPSHUFHWMask(PermMask.Val) || 02511 X86::isPSHUFLWMask(PermMask.Val)) { 02512 if (V2.getOpcode() != ISD::UNDEF) 02513 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 02514 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 02515 return Op; 02516 } 02517 } 02518 02519 if (NumElems == 4) { 02520 MVT::ValueType MaskVT = PermMask.getValueType(); 02521 MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); 02522 std::vector<std::pair<int, int> > Locs; 02523 Locs.reserve(NumElems); 02524 std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 02525 std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 02526 unsigned NumHi = 0; 02527 unsigned NumLo = 0; 02528 // If no more than two elements come from either vector. This can be 02529 // implemented with two shuffles. First shuffle gather the elements. 02530 // The second shuffle, which takes the first shuffle as both of its 02531 // vector operands, put the elements into the right order. 02532 for (unsigned i = 0; i != NumElems; ++i) { 02533 SDOperand Elt = PermMask.getOperand(i); 02534 if (Elt.getOpcode() == ISD::UNDEF) { 02535 Locs[i] = std::make_pair(-1, -1); 02536 } else { 02537 unsigned Val = cast<ConstantSDNode>(Elt)->getValue(); 02538 if (Val < NumElems) { 02539 Locs[i] = std::make_pair(0, NumLo); 02540 Mask1[NumLo] = Elt; 02541 NumLo++; 02542 } else { 02543 Locs[i] = std::make_pair(1, NumHi); 02544 if (2+NumHi < NumElems) 02545 Mask1[2+NumHi] = Elt; 02546 NumHi++; 02547 } 02548 } 02549 } 02550 if (NumLo <= 2 && NumHi <= 2) { 02551 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 02552 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, Mask1)); 02553 for (unsigned i = 0; i != NumElems; ++i) { 02554 if (Locs[i].first == -1) 02555 continue; 02556 else { 02557 unsigned Idx = (i < NumElems/2) ? 0 : NumElems; 02558 Idx += Locs[i].first * (NumElems/2) + Locs[i].second; 02559 Mask2[i] = DAG.getConstant(Idx, MaskEVT); 02560 } 02561 } 02562 02563 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, 02564 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, Mask2)); 02565 } 02566 02567 // Break it into (shuffle shuffle_hi, shuffle_lo). 02568 Locs.clear(); 02569 std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 02570 std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); 02571 std::vector<SDOperand> *MaskPtr = &LoMask; 02572 unsigned MaskIdx = 0; 02573 unsigned LoIdx = 0; 02574 unsigned HiIdx = NumElems/2; 02575 for (unsigned i = 0; i != NumElems; ++i) { 02576 if (i == NumElems/2) { 02577 MaskPtr = &HiMask; 02578 MaskIdx = 1; 02579 LoIdx = 0; 02580 HiIdx = NumElems/2; 02581 } 02582 SDOperand Elt = PermMask.getOperand(i); 02583 if (Elt.getOpcode() == ISD::UNDEF) { 02584 Locs[i] = std::make_pair(-1, -1); 02585 } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) { 02586 Locs[i] = std::make_pair(MaskIdx, LoIdx); 02587 (*MaskPtr)[LoIdx] = Elt; 02588 LoIdx++; 02589 } else { 02590 Locs[i] = std::make_pair(MaskIdx, HiIdx); 02591 (*MaskPtr)[HiIdx] = Elt; 02592 HiIdx++; 02593 } 02594 } 02595 02596 SDOperand LoShuffle = 02597 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 02598 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask)); 02599 SDOperand HiShuffle = 02600 DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, 02601 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask)); 02602 std::vector<SDOperand> MaskOps; 02603 for (unsigned i = 0; i != NumElems; ++i) { 02604 if (Locs[i].first == -1) { 02605 MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); 02606 } else { 02607 unsigned Idx = Locs[i].first * NumElems + Locs[i].second; 02608 MaskOps.push_back(DAG.getConstant(Idx, MaskEVT)); 02609 } 02610 } 02611 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle, 02612 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps)); 02613 } 02614 02615 return SDOperand(); 02616 } 02617 02618 SDOperand 02619 X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 02620 if (!isa<ConstantSDNode>(Op.getOperand(1))) 02621 return SDOperand(); 02622 02623 MVT::ValueType VT = Op.getValueType(); 02624 // TODO: handle v16i8. 02625 if (MVT::getSizeInBits(VT) == 16) { 02626 // Transform it so it match pextrw which produces a 32-bit result. 02627 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 02628 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 02629 Op.getOperand(0), Op.getOperand(1)); 02630 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 02631 DAG.getValueType(VT)); 02632 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 02633 } else if (MVT::getSizeInBits(VT) == 32) { 02634 SDOperand Vec = Op.getOperand(0); 02635 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 02636 if (Idx == 0) 02637 return Op; 02638 // SHUFPS the element to the lowest double word, then movss. 02639 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 02640 std::vector<SDOperand> IdxVec; 02641 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 02642 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 02643 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 02644 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 02645 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 02646 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 02647 Vec, Vec, Mask); 02648 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 02649 DAG.getConstant(0, getPointerTy())); 02650 } else if (MVT::getSizeInBits(VT) == 64) { 02651 SDOperand Vec = Op.getOperand(0); 02652 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 02653 if (Idx == 0) 02654 return Op; 02655 02656 // UNPCKHPD the element to the lowest double word, then movsd. 02657 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 02658 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 02659 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 02660 std::vector<SDOperand> IdxVec; 02661 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 02662 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 02663 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 02664 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 02665 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 02666 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 02667 DAG.getConstant(0, getPointerTy())); 02668 } 02669 02670 return SDOperand(); 02671 } 02672 02673 SDOperand 02674 X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { 02675 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 02676 // as its second argument. 02677 MVT::ValueType VT = Op.getValueType(); 02678 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 02679 SDOperand N0 = Op.getOperand(0); 02680 SDOperand N1 = Op.getOperand(1); 02681 SDOperand N2 = Op.getOperand(2); 02682 if (MVT::getSizeInBits(BaseVT) == 16) { 02683 if (N1.getValueType() != MVT::i32) 02684 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 02685 if (N2.getValueType() != MVT::i32) 02686 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 02687 return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); 02688 } else if (MVT::getSizeInBits(BaseVT) == 32) { 02689 unsigned Idx = cast<ConstantSDNode>(N2)->getValue(); 02690 if (Idx == 0) { 02691 // Use a movss. 02692 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); 02693 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 02694 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 02695 std::vector<SDOperand> MaskVec; 02696 MaskVec.push_back(DAG.getConstant(4, BaseVT)); 02697 for (unsigned i = 1; i <= 3; ++i) 02698 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 02699 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1, 02700 DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec)); 02701 } else { 02702 // Use two pinsrw instructions to insert a 32 bit value. 02703 Idx <<= 1; 02704 if (MVT::isFloatingPoint(N1.getValueType())) { 02705 if (N1.getOpcode() == ISD::LOAD) { 02706 // Just load directly from f32mem to GR32. 02707 N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1), 02708 N1.getOperand(2)); 02709 } else { 02710 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1); 02711 N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1); 02712 N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1, 02713 DAG.getConstant(0, getPointerTy())); 02714 } 02715 } 02716 N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0); 02717 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 02718 DAG.getConstant(Idx, getPointerTy())); 02719 N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8)); 02720 N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1, 02721 DAG.getConstant(Idx+1, getPointerTy())); 02722 return DAG.getNode(ISD::BIT_CONVERT, VT, N0); 02723 } 02724 } 02725 02726 return SDOperand(); 02727 } 02728 02729 SDOperand 02730 X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 02731 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 02732 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 02733 } 02734 02735 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 02736 // their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 02737 // one of the above mentioned nodes. It has to be wrapped because otherwise 02738 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 02739 // be used to form addressing mode. These wrapped nodes will be selected 02740 // into MOV32ri. 02741 SDOperand 02742 X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 02743 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 02744 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 02745 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 02746 CP->getAlignment())); 02747 if (Subtarget->isTargetDarwin()) { 02748 // With PIC, the address is actually $g + Offset. 02749 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 02750 Result = DAG.getNode(ISD::ADD, getPointerTy(), 02751 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 02752 } 02753 02754 return Result; 02755 } 02756 02757 SDOperand 02758 X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 02759 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 02760 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 02761 DAG.getTargetGlobalAddress(GV, 02762 getPointerTy())); 02763 if (Subtarget->isTargetDarwin()) { 02764 // With PIC, the address is actually $g + Offset. 02765 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 02766 Result = DAG.getNode(ISD::ADD, getPointerTy(), 02767 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 02768 Result); 02769 02770 // For Darwin, external and weak symbols are indirect, so we want to load 02771 // the value at address GV, not the value of GV itself. This means that 02772 // the GlobalAddress must be in the base or index register of the address, 02773 // not the GV offset field. 02774 if (getTargetMachine().getRelocationModel() != Reloc::Static && 02775 DarwinGVRequiresExtraLoad(GV)) 02776 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 02777 Result, DAG.getSrcValue(NULL)); 02778 } 02779 02780 return Result; 02781 } 02782 02783 SDOperand 02784 X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { 02785 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 02786 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 02787 DAG.getTargetExternalSymbol(Sym, 02788 getPointerTy())); 02789 if (Subtarget->isTargetDarwin()) { 02790 // With PIC, the address is actually $g + Offset. 02791 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 02792 Result = DAG.getNode(ISD::ADD, getPointerTy(), 02793 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 02794 Result); 02795 } 02796 02797 return Result; 02798 } 02799 02800 SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { 02801 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 02802 "Not an i64 shift!"); 02803 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 02804 SDOperand ShOpLo = Op.getOperand(0); 02805 SDOperand ShOpHi = Op.getOperand(1); 02806 SDOperand ShAmt = Op.getOperand(2); 02807 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 02808 DAG.getConstant(31, MVT::i8)) 02809 : DAG.getConstant(0, MVT::i32); 02810 02811 SDOperand Tmp2, Tmp3; 02812 if (Op.getOpcode() == ISD::SHL_PARTS) { 02813 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 02814 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 02815 } else { 02816 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 02817 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 02818 } 02819 02820 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 02821 ShAmt, DAG.getConstant(32, MVT::i8)); 02822 02823 SDOperand Hi, Lo; 02824 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 02825 02826 std::vector<MVT::ValueType> Tys; 02827 Tys.push_back(MVT::i32); 02828 Tys.push_back(MVT::Flag); 02829 std::vector<SDOperand> Ops; 02830 if (Op.getOpcode() == ISD::SHL_PARTS) { 02831 Ops.push_back(Tmp2); 02832 Ops.push_back(Tmp3); 02833 Ops.push_back(CC); 02834 Ops.push_back(InFlag); 02835 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 02836 InFlag = Hi.getValue(1); 02837 02838 Ops.clear(); 02839 Ops.push_back(Tmp3); 02840 Ops.push_back(Tmp1); 02841 Ops.push_back(CC); 02842 Ops.push_back(InFlag); 02843 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 02844 } else { 02845 Ops.push_back(Tmp2); 02846 Ops.push_back(Tmp3); 02847 Ops.push_back(CC); 02848 Ops.push_back(InFlag); 02849 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 02850 InFlag = Lo.getValue(1); 02851 02852 Ops.clear(); 02853 Ops.push_back(Tmp3); 02854 Ops.push_back(Tmp1); 02855 Ops.push_back(CC); 02856 Ops.push_back(InFlag); 02857 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 02858 } 02859 02860 Tys.clear(); 02861 Tys.push_back(MVT::i32); 02862 Tys.push_back(MVT::i32); 02863 Ops.clear(); 02864 Ops.push_back(Lo); 02865 Ops.push_back(Hi); 02866 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 02867 } 02868 02869 SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 02870 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 02871 Op.getOperand(0).getValueType() >= MVT::i16 && 02872 "Unknown SINT_TO_FP to lower!"); 02873 02874 SDOperand Result; 02875 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 02876 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 02877 MachineFunction &MF = DAG.getMachineFunction(); 02878 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 02879 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 02880 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 02881 DAG.getEntryNode(), Op.getOperand(0), 02882 StackSlot, DAG.getSrcValue(NULL)); 02883 02884 // Build the FILD 02885 std::vector<MVT::ValueType> Tys; 02886 Tys.push_back(MVT::f64); 02887 Tys.push_back(MVT::Other); 02888 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 02889 std::vector<SDOperand> Ops; 02890 Ops.push_back(Chain); 02891 Ops.push_back(StackSlot); 02892 Ops.push_back(DAG.getValueType(SrcVT)); 02893 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 02894 Tys, Ops); 02895 02896 if (X86ScalarSSE) { 02897 Chain = Result.getValue(1); 02898 SDOperand InFlag = Result.getValue(2); 02899 02900 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 02901 // shouldn't be necessary except that RFP cannot be live across 02902 // multiple blocks. When stackifier is fixed, they can be uncoupled. 02903 MachineFunction &MF = DAG.getMachineFunction(); 02904 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 02905 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 02906 std::vector<MVT::ValueType> Tys; 02907 Tys.push_back(MVT::Other); 02908 std::vector<SDOperand> Ops; 02909 Ops.push_back(Chain); 02910 Ops.push_back(Result); 02911 Ops.push_back(StackSlot); 02912 Ops.push_back(DAG.getValueType(Op.getValueType())); 02913 Ops.push_back(InFlag); 02914 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 02915 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 02916 DAG.getSrcValue(NULL)); 02917 } 02918 02919 return Result; 02920 } 02921 02922 SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 02923 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 02924 "Unknown FP_TO_SINT to lower!"); 02925 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 02926 // stack slot. 02927 MachineFunction &MF = DAG.getMachineFunction(); 02928 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 02929 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 02930 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 02931 02932 unsigned Opc; 02933 switch (Op.getValueType()) { 02934 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 02935 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 02936 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 02937 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 02938 } 02939 02940 SDOperand Chain = DAG.getEntryNode(); 02941 SDOperand Value = Op.getOperand(0); 02942 if (X86ScalarSSE) { 02943 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 02944 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 02945 DAG.getSrcValue(0)); 02946 std::vector<MVT::ValueType> Tys; 02947 Tys.push_back(MVT::f64); 02948 Tys.push_back(MVT::Other); 02949 std::vector<SDOperand> Ops; 02950 Ops.push_back(Chain); 02951 Ops.push_back(StackSlot); 02952 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 02953 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 02954 Chain = Value.getValue(1); 02955 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 02956 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 02957 } 02958 02959 // Build the FP_TO_INT*_IN_MEM 02960 std::vector<SDOperand> Ops; 02961 Ops.push_back(Chain); 02962 Ops.push_back(Value); 02963 Ops.push_back(StackSlot); 02964 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 02965 02966 // Load the result. 02967 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 02968 DAG.getSrcValue(NULL)); 02969 } 02970 02971 SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { 02972 MVT::ValueType VT = Op.getValueType(); 02973 const Type *OpNTy = MVT::getTypeForValueType(VT); 02974 std::vector<Constant*> CV; 02975 if (VT == MVT::f64) { 02976 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 02977 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02978 } else { 02979 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 02980 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02981 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02982 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02983 } 02984 Constant *CS = ConstantStruct::get(CV); 02985 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 02986 SDOperand Mask 02987 = DAG.getNode(X86ISD::LOAD_PACK, 02988 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 02989 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 02990 } 02991 02992 SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { 02993 MVT::ValueType VT = Op.getValueType(); 02994 const Type *OpNTy = MVT::getTypeForValueType(VT); 02995 std::vector<Constant*> CV; 02996 if (VT == MVT::f64) { 02997 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 02998 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02999 } else { 03000 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 03001 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 03002 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 03003 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 03004 } 03005 Constant *CS = ConstantStruct::get(CV); 03006 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 03007 SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, 03008 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 03009 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 03010 } 03011 03012 SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 03013 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 03014 SDOperand Cond; 03015 SDOperand CC = Op.getOperand(2); 03016 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 03017 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 03018 bool Flip; 03019 unsigned X86CC; 03020 if (translateX86CC(CC, isFP, X86CC, Flip)) { 03021 if (Flip) 03022 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 03023 Op.getOperand(1), Op.getOperand(0)); 03024 else 03025 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 03026 Op.getOperand(0), Op.getOperand(1)); 03027 return DAG.getNode(X86ISD::SETCC, MVT::i8, 03028 DAG.getConstant(X86CC, MVT::i8), Cond); 03029 } else { 03030 assert(isFP && "Illegal integer SetCC!"); 03031 03032 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 03033 Op.getOperand(0), Op.getOperand(1)); 03034 std::vector<MVT::ValueType> Tys; 03035 std::vector<SDOperand> Ops; 03036 switch (SetCCOpcode) { 03037 default: assert(false && "Illegal floating point SetCC!"); 03038 case ISD::SETOEQ: { // !PF & ZF 03039 Tys.push_back(MVT::i8); 03040 Tys.push_back(MVT::Flag); 03041 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 03042 Ops.push_back(Cond); 03043 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 03044 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 03045 DAG.getConstant(X86ISD::COND_E, MVT::i8), 03046 Tmp1.getValue(1)); 03047 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 03048 } 03049 case ISD::SETUNE: { // PF | !ZF 03050 Tys.push_back(MVT::i8); 03051 Tys.push_back(MVT::Flag); 03052 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 03053 Ops.push_back(Cond); 03054 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 03055 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 03056 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 03057 Tmp1.getValue(1)); 03058 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 03059 } 03060 } 03061 } 03062 } 03063 03064 SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { 03065 MVT::ValueType VT = Op.getValueType(); 03066 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 03067 bool addTest = false; 03068 SDOperand Op0 = Op.getOperand(0); 03069 SDOperand Cond, CC; 03070 if (Op0.getOpcode() == ISD::SETCC) 03071 Op0 = LowerOperation(Op0, DAG); 03072 03073 if (Op0.getOpcode() == X86ISD::SETCC) { 03074 // If condition flag is set by a X86ISD::CMP, then make a copy of it 03075 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 03076 // have another use it will be eliminated. 03077 // If the X86ISD::SETCC has more than one use, then it's probably better 03078 // to use a test instead of duplicating the X86ISD::CMP (for register 03079 // pressure reason). 03080 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 03081 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 03082 CmpOpc == X86ISD::UCOMI) { 03083 if (!Op0.hasOneUse()) { 03084 std::vector<MVT::ValueType> Tys; 03085 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 03086 Tys.push_back(Op0.Val->getValueType(i)); 03087 std::vector<SDOperand> Ops; 03088 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 03089 Ops.push_back(Op0.getOperand(i)); 03090 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 03091 } 03092 03093 CC = Op0.getOperand(0); 03094 Cond = Op0.getOperand(1); 03095 // Make a copy as flag result cannot be used by more than one. 03096 Cond = DAG.getNode(CmpOpc, MVT::Flag, 03097 Cond.getOperand(0), Cond.getOperand(1)); 03098 addTest = 03099 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 03100 } else 03101 addTest = true; 03102 } else 03103 addTest = true; 03104 03105 if (addTest) { 03106 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 03107 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 03108 } 03109 03110 std::vector<MVT::ValueType> Tys; 03111 Tys.push_back(Op.getValueType()); 03112 Tys.push_back(MVT::Flag); 03113 std::vector<SDOperand> Ops; 03114 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 03115 // condition is true. 03116 Ops.push_back(Op.getOperand(2)); 03117 Ops.push_back(Op.getOperand(1)); 03118 Ops.push_back(CC); 03119 Ops.push_back(Cond); 03120 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 03121 } 03122 03123 SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { 03124 bool addTest = false; 03125 SDOperand Cond = Op.getOperand(1); 03126 SDOperand Dest = Op.getOperand(2); 03127 SDOperand CC; 03128 if (Cond.getOpcode() == ISD::SETCC) 03129 Cond = LowerOperation(Cond, DAG); 03130 03131 if (Cond.getOpcode() == X86ISD::SETCC) { 03132 // If condition flag is set by a X86ISD::CMP, then make a copy of it 03133 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 03134 // have another use it will be eliminated. 03135 // If the X86ISD::SETCC has more than one use, then it's probably better 03136 // to use a test instead of duplicating the X86ISD::CMP (for register 03137 // pressure reason). 03138 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 03139 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 03140 CmpOpc == X86ISD::UCOMI) { 03141 if (!Cond.hasOneUse()) { 03142 std::vector<MVT::ValueType> Tys; 03143 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 03144 Tys.push_back(Cond.Val->getValueType(i)); 03145 std::vector<SDOperand> Ops; 03146 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 03147 Ops.push_back(Cond.getOperand(i)); 03148 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 03149 } 03150 03151 CC = Cond.getOperand(0); 03152 Cond = Cond.getOperand(1); 03153 // Make a copy as flag result cannot be used by more than one. 03154 Cond = DAG.getNode(CmpOpc, MVT::Flag, 03155 Cond.getOperand(0), Cond.getOperand(1)); 03156 } else 03157 addTest = true; 03158 } else 03159 addTest = true; 03160 03161 if (addTest) { 03162 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 03163 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 03164 } 03165 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 03166 Op.getOperand(0), Op.getOperand(2), CC, Cond); 03167 } 03168 03169 SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 03170 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 03171 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 03172 DAG.getTargetJumpTable(JT->getIndex(), 03173 getPointerTy())); 03174 if (Subtarget->isTargetDarwin()) { 03175 // With PIC, the address is actually $g + Offset. 03176 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 03177 Result = DAG.getNode(ISD::ADD, getPointerTy(), 03178 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), 03179 Result); 03180 } 03181 03182 return Result; 03183 } 03184 03185 SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { 03186 unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 03187 if (CallingConv == CallingConv::Fast && EnableFastCC) 03188 return LowerFastCCCallTo(Op, DAG); 03189 else 03190 return LowerCCCCallTo(Op, DAG); 03191 } 03192 03193 SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { 03194 SDOperand Copy; 03195 03196 switch(Op.getNumOperands()) { 03197 default: 03198 assert(0 && "Do not know how to return this many arguments!"); 03199 abort(); 03200 case 1: // ret void. 03201 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 03202 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 03203 case 3: { 03204 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 03205 03206 if (MVT::isVector(ArgVT)) { 03207 // Integer or FP vector result -> XMM0. 03208 if (DAG.getMachineFunction().liveout_empty()) 03209 DAG.getMachineFunction().addLiveOut(X86::XMM0); 03210 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1), 03211 SDOperand()); 03212 } else if (MVT::isInteger(ArgVT)) { 03213 // Integer result -> EAX 03214 if (DAG.getMachineFunction().liveout_empty()) 03215 DAG.getMachineFunction().addLiveOut(X86::EAX); 03216 03217 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 03218 SDOperand()); 03219 } else if (!X86ScalarSSE) { 03220 // FP return with fp-stack value. 03221 if (DAG.getMachineFunction().liveout_empty()) 03222 DAG.getMachineFunction().addLiveOut(X86::ST0); 03223 03224 std::vector<MVT::ValueType> Tys; 03225 Tys.push_back(MVT::Other); 03226 Tys.push_back(MVT::Flag); 03227 std::vector<SDOperand> Ops; 03228 Ops.push_back(Op.getOperand(0)); 03229 Ops.push_back(Op.getOperand(1)); 03230 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 03231 } else { 03232 // FP return with ScalarSSE (return on fp-stack). 03233 if (DAG.getMachineFunction().liveout_empty()) 03234 DAG.getMachineFunction().addLiveOut(X86::ST0); 03235 03236 SDOperand MemLoc; 03237 SDOperand Chain = Op.getOperand(0); 03238 SDOperand Value = Op.getOperand(1); 03239 03240 if (Value.getOpcode() == ISD::LOAD && 03241 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 03242 Chain = Value.getOperand(0); 03243 MemLoc = Value.getOperand(1); 03244 } else { 03245 // Spill the value to memory and reload it into top of stack. 03246 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 03247 MachineFunction &MF = DAG.getMachineFunction(); 03248 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 03249 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 03250 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 03251 Value, MemLoc, DAG.getSrcValue(0)); 03252 } 03253 std::vector<MVT::ValueType> Tys; 03254 Tys.push_back(MVT::f64); 03255 Tys.push_back(MVT::Other); 03256 std::vector<SDOperand> Ops; 03257 Ops.push_back(Chain); 03258 Ops.push_back(MemLoc); 03259 Ops.push_back(DAG.getValueType(ArgVT)); 03260 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 03261 Tys.clear(); 03262 Tys.push_back(MVT::Other); 03263 Tys.push_back(MVT::Flag); 03264 Ops.clear(); 03265 Ops.push_back(Copy.getValue(1)); 03266 Ops.push_back(Copy); 03267 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 03268 } 03269 break; 03270 } 03271 case 5: 03272 if (DAG.getMachineFunction().liveout_empty()) { 03273 DAG.getMachineFunction().addLiveOut(X86::EAX); 03274 DAG.getMachineFunction().addLiveOut(X86::EDX); 03275 } 03276 03277 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(3), 03278 SDOperand()); 03279 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 03280 break; 03281 } 03282 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 03283 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 03284 Copy.getValue(1)); 03285 } 03286 03287 SDOperand 03288 X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { 03289 MachineFunction &MF = DAG.getMachineFunction(); 03290 const Function* Fn = MF.getFunction(); 03291 if (Fn->hasExternalLinkage() && 03292 Subtarget->TargetType == X86Subtarget::isCygwin && 03293 Fn->getName() == "main") 03294 MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true); 03295 03296 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 03297 if (CC == CallingConv::Fast && EnableFastCC) 03298 return LowerFastCCArguments(Op, DAG); 03299 else 03300 return LowerCCCArguments(Op, DAG); 03301 } 03302 03303 SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { 03304 SDOperand InFlag(0, 0); 03305 SDOperand Chain = Op.getOperand(0); 03306 unsigned Align = 03307 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 03308 if (Align == 0) Align = 1; 03309 03310 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 03311 // If not DWORD aligned, call memset if size is less than the threshold. 03312 // It knows how to align to the right boundary first. 03313 if ((Align & 3) != 0 || 03314 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 03315 MVT::ValueType IntPtr = getPointerTy(); 03316 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 03317 std::vector<std::pair<SDOperand, const Type*> > Args; 03318 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 03319 // Extend the ubyte argument to be an int value for the call. 03320 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 03321 Args.push_back(std::make_pair(Val, IntPtrTy)); 03322 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 03323 std::pair<SDOperand,SDOperand> CallResult = 03324 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 03325 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 03326 return CallResult.second; 03327 } 03328 03329 MVT::ValueType AVT; 03330 SDOperand Count; 03331 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 03332 unsigned BytesLeft = 0; 03333 bool TwoRepStos = false; 03334 if (ValC) { 03335 unsigned ValReg; 03336 unsigned Val = ValC->getValue() & 255; 03337 03338 // If the value is a constant, then we can potentially use larger sets. 03339 switch (Align & 3) { 03340 case 2: // WORD aligned 03341 AVT = MVT::i16; 03342 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 03343 BytesLeft = I->getValue() % 2; 03344 Val = (Val << 8) | Val; 03345 ValReg = X86::AX; 03346 break; 03347 case 0: // DWORD aligned 03348 AVT = MVT::i32; 03349 if (I) { 03350 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 03351 BytesLeft = I->getValue() % 4; 03352 } else { 03353 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 03354 DAG.getConstant(2, MVT::i8)); 03355 TwoRepStos = true; 03356 } 03357 Val = (Val << 8) | Val; 03358 Val = (Val << 16) | Val; 03359 ValReg = X86::EAX; 03360 break; 03361 default: // Byte aligned 03362 AVT = MVT::i8; 03363 Count = Op.getOperand(3); 03364 ValReg = X86::AL; 03365 break; 03366 } 03367 03368 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 03369 InFlag); 03370 InFlag = Chain.getValue(1); 03371 } else { 03372 AVT = MVT::i8; 03373 Count = Op.getOperand(3); 03374 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 03375 InFlag = Chain.getValue(1); 03376 } 03377 03378 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 03379 InFlag = Chain.getValue(1); 03380 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 03381 InFlag = Chain.getValue(1); 03382 03383 std::vector<MVT::ValueType> Tys; 03384 Tys.push_back(MVT::Other); 03385 Tys.push_back(MVT::Flag); 03386 std::vector<SDOperand> Ops; 03387 Ops.push_back(Chain); 03388 Ops.push_back(DAG.getValueType(AVT)); 03389 Ops.push_back(InFlag); 03390 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 03391 03392 if (TwoRepStos) { 03393 InFlag = Chain.getValue(1); 03394 Count = Op.getOperand(3); 03395 MVT::ValueType CVT = Count.getValueType(); 03396 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 03397 DAG.getConstant(3, CVT)); 03398 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 03399 InFlag = Chain.getValue(1); 03400 Tys.clear(); 03401 Tys.push_back(MVT::Other); 03402 Tys.push_back(MVT::Flag); 03403 Ops.clear(); 03404 Ops.push_back(Chain); 03405 Ops.push_back(DAG.getValueType(MVT::i8)); 03406 Ops.push_back(InFlag); 03407 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 03408 } else if (BytesLeft) { 03409 // Issue stores for the last 1 - 3 bytes. 03410 SDOperand Value; 03411 unsigned Val = ValC->getValue() & 255; 03412 unsigned Offset = I->getValue() - BytesLeft; 03413 SDOperand DstAddr = Op.getOperand(1); 03414 MVT::ValueType AddrVT = DstAddr.getValueType(); 03415 if (BytesLeft >= 2) { 03416 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 03417 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 03418 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 03419 DAG.getConstant(Offset, AddrVT)), 03420 DAG.getSrcValue(NULL)); 03421 BytesLeft -= 2; 03422 Offset += 2; 03423 } 03424 03425 if (BytesLeft == 1) { 03426 Value = DAG.getConstant(Val, MVT::i8); 03427 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 03428 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 03429 DAG.getConstant(Offset, AddrVT)), 03430 DAG.getSrcValue(NULL)); 03431 } 03432 } 03433 03434 return Chain; 03435 } 03436 03437 SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { 03438 SDOperand Chain = Op.getOperand(0); 03439 unsigned Align = 03440 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 03441 if (Align == 0) Align = 1; 03442 03443 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 03444 // If not DWORD aligned, call memcpy if size is less than the threshold. 03445 // It knows how to align to the right boundary first. 03446 if ((Align & 3) != 0 || 03447 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 03448 MVT::ValueType IntPtr = getPointerTy(); 03449 const Type *IntPtrTy = getTargetData()->getIntPtrType(); 03450 std::vector<std::pair<SDOperand, const Type*> > Args; 03451 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 03452 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 03453 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 03454 std::pair<SDOperand,SDOperand> CallResult = 03455 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 03456 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 03457 return CallResult.second; 03458 } 03459 03460 MVT::ValueType AVT; 03461 SDOperand Count; 03462 unsigned BytesLeft = 0; 03463 bool TwoRepMovs = false; 03464 switch (Align & 3) { 03465 case 2: // WORD aligned 03466 AVT = MVT::i16; 03467 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 03468 BytesLeft = I->getValue() % 2; 03469 break; 03470 case 0: // DWORD aligned 03471 AVT = MVT::i32; 03472 if (I) { 03473 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 03474 BytesLeft = I->getValue() % 4; 03475 } else { 03476 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 03477 DAG.getConstant(2, MVT::i8)); 03478 TwoRepMovs = true; 03479 } 03480 break; 03481 default: // Byte aligned 03482 AVT = MVT::i8; 03483 Count = Op.getOperand(3); 03484 break; 03485 } 03486 03487 SDOperand InFlag(0, 0); 03488 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 03489 InFlag = Chain.getValue(1); 03490 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 03491 InFlag = Chain.getValue(1); 03492 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 03493 InFlag = Chain.getValue(1); 03494 03495 std::vector<MVT::ValueType> Tys; 03496 Tys.push_back(MVT::Other); 03497 Tys.push_back(MVT::Flag); 03498 std::vector<SDOperand> Ops; 03499 Ops.push_back(Chain); 03500 Ops.push_back(DAG.getValueType(AVT)); 03501 Ops.push_back(InFlag); 03502 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 03503 03504 if (TwoRepMovs) { 03505 InFlag = Chain.getValue(1); 03506 Count = Op.getOperand(3); 03507 MVT::ValueType CVT = Count.getValueType(); 03508 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 03509 DAG.getConstant(3, CVT)); 03510 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 03511 InFlag = Chain.getValue(1); 03512 Tys.clear(); 03513 Tys.push_back(MVT::Other); 03514 Tys.push_back(MVT::Flag); 03515 Ops.clear(); 03516 Ops.push_back(Chain); 03517 Ops.push_back(DAG.getValueType(MVT::i8)); 03518 Ops.push_back(InFlag); 03519 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 03520 } else if (BytesLeft) { 03521 // Issue loads and stores for the last 1 - 3 bytes. 03522 unsigned Offset = I->getValue() - BytesLeft; 03523 SDOperand DstAddr = Op.getOperand(1); 03524 MVT::ValueType DstVT = DstAddr.getValueType(); 03525 SDOperand SrcAddr = Op.getOperand(2); 03526 MVT::ValueType SrcVT = SrcAddr.getValueType(); 03527 SDOperand Value; 03528 if (BytesLeft >= 2) { 03529 Value = DAG.getLoad(MVT::i16, Chain, 03530 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 03531 DAG.getConstant(Offset, SrcVT)), 03532 DAG.getSrcValue(NULL)); 03533 Chain = Value.getValue(1); 03534 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 03535 DAG.getNode(ISD::ADD, DstVT, DstAddr, 03536 DAG.getConstant(Offset, DstVT)), 03537 DAG.getSrcValue(NULL)); 03538 BytesLeft -= 2; 03539 Offset += 2; 03540 } 03541 03542 if (BytesLeft == 1) { 03543 Value = DAG.getLoad(MVT::i8, Chain, 03544 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 03545 DAG.getConstant(Offset, SrcVT)), 03546 DAG.getSrcValue(NULL)); 03547 Chain = Value.getValue(1); 03548 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 03549 DAG.getNode(ISD::ADD, DstVT, DstAddr, 03550 DAG.getConstant(Offset, DstVT)), 03551 DAG.getSrcValue(NULL)); 03552 } 03553 } 03554 03555 return Chain; 03556 } 03557 03558 SDOperand 03559 X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { 03560 std::vector<MVT::ValueType> Tys; 03561 Tys.push_back(MVT::Other); 03562 Tys.push_back(MVT::Flag); 03563 std::vector<SDOperand> Ops; 03564 Ops.push_back(Op.getOperand(0)); 03565 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 03566 Ops.clear(); 03567 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 03568 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 03569 MVT::i32, Ops[0].getValue(2))); 03570 Ops.push_back(Ops[1].getValue(1)); 03571 Tys[0] = Tys[1] = MVT::i32; 03572 Tys.push_back(MVT::Other); 03573 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 03574 } 03575 03576 SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { 03577 // vastart just stores the address of the VarArgsFrameIndex slot into the 03578 // memory location argument. 03579 // FIXME: Replace MVT::i32 with PointerTy 03580 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 03581 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 03582 Op.getOperand(1), Op.getOperand(2)); 03583 } 03584 03585 SDOperand 03586 X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 03587 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 03588 switch (IntNo) { 03589 default: return SDOperand(); // Don't custom lower most intrinsics. 03590 // Comparison intrinsics. 03591 case Intrinsic::x86_sse_comieq_ss: 03592 case Intrinsic::x86_sse_comilt_ss: 03593 case Intrinsic::x86_sse_comile_ss: 03594 case Intrinsic::x86_sse_comigt_ss: 03595 case Intrinsic::x86_sse_comige_ss: 03596 case Intrinsic::x86_sse_comineq_ss: 03597 case Intrinsic::x86_sse_ucomieq_ss: 03598 case Intrinsic::x86_sse_ucomilt_ss: 03599 case Intrinsic::x86_sse_ucomile_ss: 03600 case Intrinsic::x86_sse_ucomigt_ss: 03601 case Intrinsic::x86_sse_ucomige_ss: 03602 case Intrinsic::x86_sse_ucomineq_ss: 03603 case Intrinsic::x86_sse2_comieq_sd: 03604 case Intrinsic::x86_sse2_comilt_sd: 03605 case Intrinsic::x86_sse2_comile_sd: 03606 case Intrinsic::x86_sse2_comigt_sd: 03607 case Intrinsic::x86_sse2_comige_sd: 03608 case Intrinsic::x86_sse2_comineq_sd: 03609 case Intrinsic::x86_sse2_ucomieq_sd: 03610 case Intrinsic::x86_sse2_ucomilt_sd: 03611 case Intrinsic::x86_sse2_ucomile_sd: 03612 case Intrinsic::x86_sse2_ucomigt_sd: 03613 case Intrinsic::x86_sse2_ucomige_sd: 03614 case Intrinsic::x86_sse2_ucomineq_sd: { 03615 unsigned Opc = 0; 03616 ISD::CondCode CC = ISD::SETCC_INVALID; 03617 switch (IntNo) { 03618 default: break; 03619 case Intrinsic::x86_sse_comieq_ss: 03620 case Intrinsic::x86_sse2_comieq_sd: 03621 Opc = X86ISD::COMI; 03622 CC = ISD::SETEQ; 03623 break; 03624 case Intrinsic::x86_sse_comilt_ss: 03625 case Intrinsic::x86_sse2_comilt_sd: 03626 Opc = X86ISD::COMI; 03627 CC = ISD::SETLT; 03628 break; 03629 case Intrinsic::x86_sse_comile_ss: 03630 case Intrinsic::x86_sse2_comile_sd: 03631 Opc = X86ISD::COMI; 03632 CC = ISD::SETLE; 03633 break; 03634 case Intrinsic::x86_sse_comigt_ss: 03635 case Intrinsic::x86_sse2_comigt_sd: 03636 Opc = X86ISD::COMI; 03637 CC = ISD::SETGT; 03638 break; 03639 case Intrinsic::x86_sse_comige_ss: 03640 case Intrinsic::x86_sse2_comige_sd: 03641 Opc = X86ISD::COMI; 03642 CC = ISD::SETGE; 03643 break; 03644 case Intrinsic::x86_sse_comineq_ss: 03645 case Intrinsic::x86_sse2_comineq_sd: 03646 Opc = X86ISD::COMI; 03647 CC = ISD::SETNE; 03648 break; 03649 case Intrinsic::x86_sse_ucomieq_ss: 03650 case Intrinsic::x86_sse2_ucomieq_sd: 03651 Opc = X86ISD::UCOMI; 03652 CC = ISD::SETEQ; 03653 break; 03654 case Intrinsic::x86_sse_ucomilt_ss: 03655 case Intrinsic::x86_sse2_ucomilt_sd: 03656 Opc = X86ISD::UCOMI; 03657 CC = ISD::SETLT; 03658 break; 03659 case Intrinsic::x86_sse_ucomile_ss: 03660 case Intrinsic::x86_sse2_ucomile_sd: 03661 Opc = X86ISD::UCOMI; 03662 CC = ISD::SETLE; 03663 break; 03664 case Intrinsic::x86_sse_ucomigt_ss: 03665 case Intrinsic::x86_sse2_ucomigt_sd: 03666 Opc = X86ISD::UCOMI; 03667 CC = ISD::SETGT; 03668 break; 03669 case Intrinsic::x86_sse_ucomige_ss: 03670 case Intrinsic::x86_sse2_ucomige_sd: 03671 Opc = X86ISD::UCOMI; 03672 CC = ISD::SETGE; 03673 break; 03674 case Intrinsic::x86_sse_ucomineq_ss: 03675 case Intrinsic::x86_sse2_ucomineq_sd: 03676 Opc = X86ISD::UCOMI; 03677 CC = ISD::SETNE; 03678 break; 03679 } 03680 bool Flip; 03681 unsigned X86CC; 03682 translateX86CC(CC, true, X86CC, Flip); 03683 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 03684 Op.getOperand(Flip?1:2)); 03685 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 03686 DAG.getConstant(X86CC, MVT::i8), Cond); 03687 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 03688 } 03689 } 03690 } 03691 03692 /// LowerOperation - Provide custom lowering hooks for some operations. 03693 /// 03694 SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 03695 switch (Op.getOpcode()) { 03696 default: assert(0 && "Should not custom lower this!"); 03697 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 03698 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 03699 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 03700 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 03701 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 03702 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 03703 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 03704 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 03705 case ISD::SHL_PARTS: 03706 case ISD::SRA_PARTS: 03707 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 03708 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 03709 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 03710 case ISD::FABS: return LowerFABS(Op, DAG); 03711 case ISD::FNEG: return LowerFNEG(Op, DAG); 03712 case ISD::SETCC: return LowerSETCC(Op, DAG); 03713 case ISD::SELECT: return LowerSELECT(Op, DAG); 03714 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 03715 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 03716 case ISD::CALL: return LowerCALL(Op, DAG); 03717 case ISD::RET: return LowerRET(Op, DAG); 03718 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); 03719 case ISD::MEMSET: return LowerMEMSET(Op, DAG); 03720 case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); 03721 case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); 03722 case ISD::VASTART: return LowerVASTART(Op, DAG); 03723 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 03724 } 03725 } 03726 03727 const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 03728 switch (Opcode) { 03729 default: return NULL; 03730 case X86ISD::SHLD: return "X86ISD::SHLD"; 03731 case X86ISD::SHRD: return "X86ISD::SHRD"; 03732 case X86ISD::FAND: return "X86ISD::FAND"; 03733 case X86ISD::FXOR: return "X86ISD::FXOR"; 03734 case X86ISD::FILD: return "X86ISD::FILD"; 03735 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 03736 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 03737 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 03738 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 03739 case X86ISD::FLD: return "X86ISD::FLD"; 03740 case X86ISD::FST: return "X86ISD::FST"; 03741 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 03742 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 03743 case X86ISD::CALL: return "X86ISD::CALL"; 03744 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 03745 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 03746 case X86ISD::CMP: return "X86ISD::CMP"; 03747 case X86ISD::TEST: return "X86ISD::TEST"; 03748 case X86ISD::COMI: return "X86ISD::COMI"; 03749 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 03750 case X86ISD::SETCC: return "X86ISD::SETCC"; 03751 case X86ISD::CMOV: return "X86ISD::CMOV"; 03752 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 03753 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 03754 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 03755 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 03756 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 03757 case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA"; 03758 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 03759 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 03760 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 03761 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 03762 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 03763 } 03764 } 03765 03766 /// isLegalAddressImmediate - Return true if the integer value or 03767 /// GlobalValue can be used as the offset of the target addressing mode. 03768 bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 03769 // X86 allows a sign-extended 32-bit immediate field. 03770 return (V > -(1LL << 32) && V < (1LL << 32)-1); 03771 } 03772 03773 bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 03774 // GV is 64-bit but displacement field is 32-bit unless we are in small code 03775 // model. Mac OS X happens to support only small PIC code model. 03776 // FIXME: better support for other OS's. 03777 if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin()) 03778 return false; 03779 if (Subtarget->isTargetDarwin()) { 03780 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 03781 if (RModel == Reloc::Static) 03782 return true; 03783 else if (RModel == Reloc::DynamicNoPIC) 03784 return !DarwinGVRequiresExtraLoad(GV); 03785 else 03786 return false; 03787 } else 03788 return true; 03789 } 03790 03791 /// isShuffleMaskLegal - Targets can use this to indicate that they only 03792 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 03793 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 03794 /// are assumed to be legal. 03795 bool 03796 X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 03797 // Only do shuffles on 128-bit vector types for now. 03798 if (MVT::getSizeInBits(VT) == 64) return false; 03799 return (Mask.Val->getNumOperands() <= 4 || 03800 isSplatMask(Mask.Val) || 03801 isPSHUFHW_PSHUFLWMask(Mask.Val) || 03802 X86::isUNPCKLMask(Mask.Val) || 03803 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 03804 X86::isUNPCKHMask(Mask.Val)); 03805 } 03806 03807 bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps, 03808 MVT::ValueType EVT, 03809 SelectionDAG &DAG) const { 03810 unsigned NumElts = BVOps.size(); 03811 // Only do shuffles on 128-bit vector types for now. 03812 if (MVT::getSizeInBits(EVT) * NumElts == 64) return false; 03813 if (NumElts == 2) return true; 03814 if (NumElts == 4) { 03815 return (isMOVLMask(BVOps) || isCommutedMOVL(BVOps, true) || 03816 isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps)); 03817 } 03818 return false; 03819 } 03820 03821 //===----------------------------------------------------------------------===// 03822 // X86 Scheduler Hooks 03823 //===----------------------------------------------------------------------===// 03824 03825 MachineBasicBlock * 03826 X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 03827 MachineBasicBlock *BB) { 03828 switch (MI->getOpcode()) { 03829 default: assert(false && "Unexpected instr type to insert"); 03830 case X86::CMOV_FR32: 03831 case X86::CMOV_FR64: 03832 case X86::CMOV_V4F32: 03833 case X86::CMOV_V2F64: 03834 case X86::CMOV_V2I64: { 03835 // To "insert" a SELECT_CC instruction, we actually have to insert the 03836 // diamond control-flow pattern. The incoming instruction knows the 03837 // destination vreg to set, the condition code register to branch on, the 03838 // true/false values to select between, and a branch opcode to use. 03839 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 03840 ilist<MachineBasicBlock>::iterator It = BB; 03841 ++It; 03842 03843 // thisMBB: 03844 // ... 03845 // TrueVal = ... 03846 // cmpTY ccX, r1, r2 03847 // bCC copy1MBB 03848 // fallthrough --> copy0MBB 03849 MachineBasicBlock *thisMBB = BB; 03850 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 03851 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 03852 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 03853 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 03854 MachineFunction *F = BB->getParent(); 03855 F->getBasicBlockList().insert(It, copy0MBB); 03856 F->getBasicBlockList().insert(It, sinkMBB); 03857 // Update machine-CFG edges by first adding all successors of the current 03858 // block to the new block which will contain the Phi node for the select. 03859 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 03860 e = BB->succ_end(); i != e; ++i) 03861 sinkMBB->addSuccessor(*i); 03862 // Next, remove all successors of the current block, and add the true 03863 // and fallthrough blocks as its successors. 03864 while(!BB->succ_empty()) 03865 BB->removeSuccessor(BB->succ_begin()); 03866 BB->addSuccessor(copy0MBB); 03867 BB->addSuccessor(sinkMBB); 03868 03869 // copy0MBB: 03870 // %FalseValue = ... 03871 // # fallthrough to sinkMBB 03872 BB = copy0MBB; 03873 03874 // Update machine-CFG edges 03875 BB->addSuccessor(sinkMBB); 03876 03877 // sinkMBB: 03878 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 03879 // ... 03880 BB = sinkMBB; 03881 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 03882 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 03883 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 03884 03885 delete MI; // The pseudo instruction is gone now. 03886 return BB; 03887 } 03888 03889 case X86::FP_TO_INT16_IN_MEM: 03890 case X86::FP_TO_INT32_IN_MEM: 03891 case X86::FP_TO_INT64_IN_MEM: { 03892 // Change the floating point control register to use "round towards zero" 03893 // mode when truncating to an integer value. 03894 MachineFunction *F = BB->getParent(); 03895 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 03896 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 03897 03898 // Load the old value of the high byte of the control word... 03899 unsigned OldCW = 03900 F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass); 03901 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 03902 03903 // Set the high part to be round to zero... 03904 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 03905 03906 // Reload the modified control word now... 03907 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 03908 03909 // Restore the memory image of control word to original value 03910 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 03911 03912 // Get the X86 opcode to use. 03913 unsigned Opc; 03914 switch (MI->getOpcode()) { 03915 default: assert(0 && "illegal opcode!"); 03916 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 03917 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 03918 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 03919 } 03920 03921 X86AddressMode AM; 03922 MachineOperand &Op = MI->getOperand(0); 03923 if (Op.isRegister()) { 03924 AM.BaseType = X86AddressMode::RegBase; 03925 AM.Base.Reg = Op.getReg(); 03926 } else { 03927 AM.BaseType = X86AddressMode::FrameIndexBase; 03928 AM.Base.FrameIndex = Op.getFrameIndex(); 03929 } 03930 Op = MI->getOperand(1); 03931 if (Op.isImmediate()) 03932 AM.Scale = Op.getImmedValue(); 03933 Op = MI->getOperand(2); 03934 if (Op.isImmediate()) 03935 AM.IndexReg = Op.getImmedValue(); 03936 Op = MI->getOperand(3); 03937 if (Op.isGlobalAddress()) { 03938 AM.GV = Op.getGlobal(); 03939 } else { 03940 AM.Disp = Op.getImmedValue(); 03941 } 03942 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 03943 03944 // Reload the original control word now. 03945 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 03946 03947 delete MI; // The pseudo instruction is gone now. 03948 return BB; 03949 } 03950 } 03951 } 03952 03953 //===----------------------------------------------------------------------===// 03954 // X86 Optimization Hooks 03955 //===----------------------------------------------------------------------===// 03956 03957 void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 03958 uint64_t Mask, 03959 uint64_t &KnownZero, 03960 uint64_t &KnownOne, 03961 unsigned Depth) const { 03962 unsigned Opc = Op.getOpcode(); 03963 assert((Opc >= ISD::BUILTIN_OP_END || 03964 Opc == ISD::INTRINSIC_WO_CHAIN || 03965 Opc == ISD::INTRINSIC_W_CHAIN || 03966 Opc == ISD::INTRINSIC_VOID) && 03967 "Should use MaskedValueIsZero if you don't know whether Op" 03968 " is a target node!"); 03969 03970 KnownZero = KnownOne = 0; // Don't know anything. 03971 switch (Opc) { 03972 default: break; 03973 case X86ISD::SETCC: 03974 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 03975 break; 03976 } 03977 } 03978 03979 /// getShuffleScalarElt - Returns the scalar element that will make up the ith 03980 /// element of the result of the vector shuffle. 03981 static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { 03982 MVT::ValueType VT = N->getValueType(0); 03983 SDOperand PermMask = N->getOperand(2); 03984 unsigned NumElems = PermMask.getNumOperands(); 03985 SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1); 03986 i %= NumElems; 03987 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { 03988 return (i == 0) 03989 ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 03990 } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { 03991 SDOperand Idx = PermMask.getOperand(i); 03992 if (Idx.getOpcode() == ISD::UNDEF) 03993 return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); 03994 return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG); 03995 } 03996 return SDOperand(); 03997 } 03998 03999 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 04000 /// node is a GlobalAddress + an offset. 04001 static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) { 04002 if (N->getOpcode() == X86ISD::Wrapper) { 04003 if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) { 04004 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 04005 return true; 04006 } 04007 } else if (N->getOpcode() == ISD::ADD) { 04008 SDOperand N1 = N->getOperand(0); 04009 SDOperand N2 = N->getOperand(1); 04010 if (isGAPlusOffset(N1.Val, GA, Offset)) { 04011 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 04012 if (V) { 04013 Offset += V->getSignExtended(); 04014 return true; 04015 } 04016 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 04017 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 04018 if (V) { 04019 Offset += V->getSignExtended(); 04020 return true; 04021 } 04022 } 04023 } 04024 return false; 04025 } 04026 04027 /// isConsecutiveLoad - Returns true if N is loading from an address of Base 04028 /// + Dist * Size. 04029 static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, 04030 MachineFrameInfo *MFI) { 04031 if (N->getOperand(0).Val != Base->getOperand(0).Val) 04032 return false; 04033 04034 SDOperand Loc = N->getOperand(1); 04035 SDOperand BaseLoc = Base->getOperand(1); 04036 if (Loc.getOpcode() == ISD::FrameIndex) { 04037 if (BaseLoc.getOpcode() != ISD::FrameIndex) 04038 return false; 04039 int FI = dyn_cast<FrameIndexSDNode>(Loc)->getIndex(); 04040 int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 04041 int FS = MFI->getObjectSize(FI); 04042 int BFS = MFI->getObjectSize(BFI); 04043 if (FS != BFS || FS != Size) return false; 04044 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size); 04045 } else { 04046 GlobalValue *GV1 = NULL; 04047 GlobalValue *GV2 = NULL; 04048 int64_t Offset1 = 0; 04049 int64_t Offset2 = 0; 04050 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 04051 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 04052 if (isGA1 && isGA2 && GV1 == GV2) 04053 return Offset1 == (Offset2 + Dist*Size); 04054 } 04055 04056 return false; 04057 } 04058 04059 static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, 04060 const X86Subtarget *Subtarget) { 04061 GlobalValue *GV; 04062 int64_t Offset; 04063 if (isGAPlusOffset(Base, GV, Offset)) 04064 return (GV->getAlignment() >= 16 && (Offset % 16) == 0); 04065 else { 04066 assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); 04067 int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex(); 04068 if (BFI < 0) 04069 // Fixed objects do not specify alignment, however the offsets are known. 04070 return ((Subtarget->getStackAlignment() % 16) == 0 && 04071 (MFI->getObjectOffset(BFI) % 16) == 0); 04072 else 04073 return MFI->getObjectAlignment(BFI) >= 16; 04074 } 04075 return false; 04076 } 04077 04078 04079 /// PerformShuffleCombine - Combine a vector_shuffle that is equal to 04080 /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 04081 /// if the load addresses are consecutive, non-overlapping, and in the right 04082 /// order. 04083 static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 04084 const X86Subtarget *Subtarget) { 04085 MachineFunction &MF = DAG.getMachineFunction(); 04086 MachineFrameInfo *MFI = MF.getFrameInfo(); 04087 MVT::ValueType VT = N->getValueType(0); 04088 MVT::ValueType EVT = MVT::getVectorBaseType(VT); 04089 SDOperand PermMask = N->getOperand(2); 04090 int NumElems = (int)PermMask.getNumOperands(); 04091 SDNode *Base = NULL; 04092 for (int i = 0; i < NumElems; ++i) { 04093 SDOperand Idx = PermMask.getOperand(i); 04094 if (Idx.getOpcode() == ISD::UNDEF) { 04095 if (!Base) return SDOperand(); 04096 } else { 04097 SDOperand Arg = 04098 getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG); 04099 if (!Arg.Val || Arg.getOpcode() != ISD::LOAD) 04100 return SDOperand(); 04101 if (!Base) 04102 Base = Arg.Val; 04103 else if (!isConsecutiveLoad(Arg.Val, Base, 04104 i, MVT::getSizeInBits(EVT)/8,MFI)) 04105 return SDOperand(); 04106 } 04107 } 04108 04109 bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget); 04110 if (isAlign16) 04111 return DAG.getLoad(VT, Base->getOperand(0), Base->getOperand(1), 04112 Base->getOperand(2)); 04113 else 04114 // Just use movups, it's shorter. 04115 return DAG.getNode(ISD::BIT_CONVERT, VT, 04116 DAG.getNode(X86ISD::LOAD_UA, MVT::v4f32, 04117 Base->getOperand(0), Base->getOperand(1), 04118 Base->getOperand(2))); 04119 } 04120 04121 SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 04122 DAGCombinerInfo &DCI) const { 04123 TargetMachine &TM = getTargetMachine(); 04124 SelectionDAG &DAG = DCI.DAG; 04125 switch (N->getOpcode()) { 04126 default: break; 04127 case ISD::VECTOR_SHUFFLE: 04128 return PerformShuffleCombine(N, DAG, Subtarget); 04129 } 04130 04131 return SDOperand(); 04132 } 04133 04134 //===----------------------------------------------------------------------===// 04135 // X86 Inline Assembly Support 04136 //===----------------------------------------------------------------------===// 04137 04138 /// getConstraintType - Given a constraint letter, return the type of 04139 /// constraint it is for this target. 04140 X86TargetLowering::ConstraintType 04141 X86TargetLowering::getConstraintType(char ConstraintLetter) const { 04142 switch (ConstraintLetter) { 04143 case 'A': 04144 case 'r': 04145 case 'R': 04146 case 'l': 04147 case 'q': 04148 case 'Q': 04149 case 'x': 04150 case 'Y': 04151 return C_RegisterClass; 04152 default: return TargetLowering::getConstraintType(ConstraintLetter); 04153 } 04154 } 04155 04156 std::vector<unsigned> X86TargetLowering:: 04157 getRegClassForInlineAsmConstraint(const std::string &Constraint, 04158 MVT::ValueType VT) const { 04159 if (Constraint.size() == 1) { 04160 // FIXME: not handling fp-stack yet! 04161 // FIXME: not handling MMX registers yet ('y' constraint). 04162 switch (Constraint[0]) { // GCC X86 Constraint Letters 04163 default: break; // Unknown constraint letter 04164 case 'A': // EAX/EDX 04165 if (VT == MVT::i32 || VT == MVT::i64) 04166 return make_vector<unsigned>(X86::EAX, X86::EDX, 0); 04167 break; 04168 case 'r': // GENERAL_REGS 04169 case 'R': // LEGACY_REGS 04170 if (VT == MVT::i32) 04171 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 04172 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 04173 else if (VT == MVT::i16) 04174 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 04175 X86::SI, X86::DI, X86::BP, X86::SP, 0); 04176 else if (VT == MVT::i8) 04177 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 04178 break; 04179 case 'l': // INDEX_REGS 04180 if (VT == MVT::i32) 04181 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 04182 X86::ESI, X86::EDI, X86::EBP, 0); 04183 else if (VT == MVT::i16) 04184 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 04185 X86::SI, X86::DI, X86::BP, 0); 04186 else if (VT == MVT::i8) 04187 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 04188 break; 04189 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 04190 case 'Q': // Q_REGS 04191 if (VT == MVT::i32) 04192 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 04193 else if (VT == MVT::i16) 04194 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 04195 else if (VT == MVT::i8) 04196 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0); 04197 break; 04198 case 'x': // SSE_REGS if SSE1 allowed 04199 if (Subtarget->hasSSE1()) 04200 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 04201 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 04202 0); 04203 return std::vector<unsigned>(); 04204 case 'Y': // SSE_REGS if SSE2 allowed 04205 if (Subtarget->hasSSE2()) 04206 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 04207 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 04208 0); 04209 return std::vector<unsigned>(); 04210 } 04211 } 04212 04213 return std::vector<unsigned>(); 04214 }