LLVM API Documentation
00001 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Chris Lattner and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines the interfaces that X86 uses to lower LLVM code into a 00011 // selection DAG. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "X86.h" 00016 #include "X86InstrBuilder.h" 00017 #include "X86ISelLowering.h" 00018 #include "X86TargetMachine.h" 00019 #include "llvm/CallingConv.h" 00020 #include "llvm/Constants.h" 00021 #include "llvm/Function.h" 00022 #include "llvm/Intrinsics.h" 00023 #include "llvm/ADT/VectorExtras.h" 00024 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 00025 #include "llvm/CodeGen/MachineFrameInfo.h" 00026 #include "llvm/CodeGen/MachineFunction.h" 00027 #include "llvm/CodeGen/MachineInstrBuilder.h" 00028 #include "llvm/CodeGen/SelectionDAG.h" 00029 #include "llvm/CodeGen/SSARegMap.h" 00030 #include "llvm/Support/MathExtras.h" 00031 #include "llvm/Target/TargetOptions.h" 00032 using namespace llvm; 00033 00034 // FIXME: temporary. 00035 #include "llvm/Support/CommandLine.h" 00036 static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden, 00037 cl::desc("Enable fastcc on X86")); 00038 00039 X86TargetLowering::X86TargetLowering(TargetMachine &TM) 00040 : TargetLowering(TM) { 00041 Subtarget = &TM.getSubtarget<X86Subtarget>(); 00042 X86ScalarSSE = Subtarget->hasSSE2(); 00043 00044 // Set up the TargetLowering object. 00045 00046 // X86 is weird, it always uses i8 for shift amounts and setcc results. 00047 setShiftAmountType(MVT::i8); 00048 setSetCCResultType(MVT::i8); 00049 setSetCCResultContents(ZeroOrOneSetCCResult); 00050 setSchedulingPreference(SchedulingForRegPressure); 00051 setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 00052 setStackPointerRegisterToSaveRestore(X86::ESP); 00053 00054 if (!Subtarget->isTargetDarwin()) 00055 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 00056 setUseUnderscoreSetJmpLongJmp(true); 00057 00058 // Add legal addressing mode scale values. 00059 addLegalAddressScale(8); 00060 addLegalAddressScale(4); 00061 addLegalAddressScale(2); 00062 // Enter the ones which require both scale + index last. These are more 00063 // expensive. 00064 addLegalAddressScale(9); 00065 addLegalAddressScale(5); 00066 addLegalAddressScale(3); 00067 00068 // Set up the register classes. 00069 addRegisterClass(MVT::i8, X86::R8RegisterClass); 00070 addRegisterClass(MVT::i16, X86::R16RegisterClass); 00071 addRegisterClass(MVT::i32, X86::R32RegisterClass); 00072 00073 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 00074 // operation. 00075 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 00076 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 00077 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 00078 00079 if (X86ScalarSSE) 00080 // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead. 00081 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); 00082 else 00083 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 00084 00085 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 00086 // this operation. 00087 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 00088 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 00089 // SSE has no i16 to fp conversion, only i32 00090 if (X86ScalarSSE) 00091 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 00092 else { 00093 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 00094 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 00095 } 00096 00097 // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64 00098 // isn't legal. 00099 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 00100 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 00101 00102 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 00103 // this operation. 00104 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 00105 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 00106 00107 if (X86ScalarSSE) { 00108 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 00109 } else { 00110 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 00111 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 00112 } 00113 00114 // Handle FP_TO_UINT by promoting the destination to a larger signed 00115 // conversion. 00116 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 00117 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 00118 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 00119 00120 if (X86ScalarSSE && !Subtarget->hasSSE3()) 00121 // Expand FP_TO_UINT into a select. 00122 // FIXME: We would like to use a Custom expander here eventually to do 00123 // the optimal thing for SSE vs. the default expansion in the legalizer. 00124 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 00125 else 00126 // With SSE3 we can use fisttpll to convert to a signed i64. 00127 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 00128 00129 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 00130 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 00131 00132 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 00133 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 00134 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 00135 setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); 00136 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand); 00137 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); 00138 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 00139 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 00140 setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand); 00141 setOperationAction(ISD::FREM , MVT::f64 , Expand); 00142 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 00143 setOperationAction(ISD::CTTZ , MVT::i8 , Expand); 00144 setOperationAction(ISD::CTLZ , MVT::i8 , Expand); 00145 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 00146 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 00147 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 00148 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 00149 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 00150 setOperationAction(ISD::CTLZ , MVT::i32 , Expand); 00151 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 00152 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 00153 00154 // These should be promoted to a larger select which is supported. 00155 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 00156 setOperationAction(ISD::SELECT , MVT::i8 , Promote); 00157 00158 // X86 wants to expand cmov itself. 00159 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 00160 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 00161 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 00162 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 00163 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 00164 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 00165 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 00166 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 00167 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 00168 // X86 ret instruction may pop stack. 00169 setOperationAction(ISD::RET , MVT::Other, Custom); 00170 // Darwin ABI issue. 00171 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 00172 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 00173 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 00174 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 00175 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 00176 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 00177 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 00178 // X86 wants to expand memset / memcpy itself. 00179 setOperationAction(ISD::MEMSET , MVT::Other, Custom); 00180 setOperationAction(ISD::MEMCPY , MVT::Other, Custom); 00181 00182 // We don't have line number support yet. 00183 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 00184 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 00185 // FIXME - use subtarget debug flags 00186 if (!Subtarget->isTargetDarwin()) 00187 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 00188 00189 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 00190 setOperationAction(ISD::VASTART , MVT::Other, Custom); 00191 00192 // Use the default implementation. 00193 setOperationAction(ISD::VAARG , MVT::Other, Expand); 00194 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 00195 setOperationAction(ISD::VAEND , MVT::Other, Expand); 00196 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 00197 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 00198 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 00199 00200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 00201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 00202 00203 if (X86ScalarSSE) { 00204 // Set up the FP register classes. 00205 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 00206 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 00207 00208 // SSE has no load+extend ops 00209 setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); 00210 setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); 00211 00212 // Use ANDPD to simulate FABS. 00213 setOperationAction(ISD::FABS , MVT::f64, Custom); 00214 setOperationAction(ISD::FABS , MVT::f32, Custom); 00215 00216 // Use XORP to simulate FNEG. 00217 setOperationAction(ISD::FNEG , MVT::f64, Custom); 00218 setOperationAction(ISD::FNEG , MVT::f32, Custom); 00219 00220 // We don't support sin/cos/fmod 00221 setOperationAction(ISD::FSIN , MVT::f64, Expand); 00222 setOperationAction(ISD::FCOS , MVT::f64, Expand); 00223 setOperationAction(ISD::FREM , MVT::f64, Expand); 00224 setOperationAction(ISD::FSIN , MVT::f32, Expand); 00225 setOperationAction(ISD::FCOS , MVT::f32, Expand); 00226 setOperationAction(ISD::FREM , MVT::f32, Expand); 00227 00228 // Expand FP immediates into loads from the stack, except for the special 00229 // cases we handle. 00230 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 00231 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 00232 addLegalFPImmediate(+0.0); // xorps / xorpd 00233 } else { 00234 // Set up the FP register classes. 00235 addRegisterClass(MVT::f64, X86::RFPRegisterClass); 00236 00237 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 00238 00239 if (!UnsafeFPMath) { 00240 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 00241 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 00242 } 00243 00244 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 00245 addLegalFPImmediate(+0.0); // FLD0 00246 addLegalFPImmediate(+1.0); // FLD1 00247 addLegalFPImmediate(-0.0); // FLD0/FCHS 00248 addLegalFPImmediate(-1.0); // FLD1/FCHS 00249 } 00250 00251 // First set operation action for all vector types to expand. Then we 00252 // will selectively turn on ones that can be effectively codegen'd. 00253 for (unsigned VT = (unsigned)MVT::Vector + 1; 00254 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { 00255 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); 00256 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); 00257 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 00258 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); 00259 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); 00260 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 00261 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 00262 } 00263 00264 if (Subtarget->hasMMX()) { 00265 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 00266 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 00267 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 00268 00269 // FIXME: add MMX packed arithmetics 00270 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); 00271 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); 00272 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); 00273 } 00274 00275 if (Subtarget->hasSSE1()) { 00276 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 00277 00278 setOperationAction(ISD::AND, MVT::v4f32, Legal); 00279 setOperationAction(ISD::OR, MVT::v4f32, Legal); 00280 setOperationAction(ISD::XOR, MVT::v4f32, Legal); 00281 setOperationAction(ISD::ADD, MVT::v4f32, Legal); 00282 setOperationAction(ISD::SUB, MVT::v4f32, Legal); 00283 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 00284 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 00285 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 00286 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 00287 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 00288 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 00289 } 00290 00291 if (Subtarget->hasSSE2()) { 00292 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 00293 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 00294 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 00295 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 00296 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 00297 00298 setOperationAction(ISD::ADD, MVT::v2f64, Legal); 00299 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 00300 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 00301 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 00302 setOperationAction(ISD::SUB, MVT::v2f64, Legal); 00303 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 00304 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 00305 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 00306 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 00307 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 00308 00309 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 00310 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 00311 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 00312 00313 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 00314 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 00315 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); 00316 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); 00317 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); 00318 } 00319 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 00320 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 00321 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 00322 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 00323 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 00324 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 00325 00326 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 00327 for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { 00328 setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); 00329 AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); 00330 setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); 00331 AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); 00332 setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); 00333 AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); 00334 setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); 00335 AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); 00336 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 00337 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); 00338 } 00339 00340 // Custom lower v2i64 and v2f64 selects. 00341 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 00342 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 00343 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 00344 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 00345 } 00346 00347 // We want to custom lower some of our intrinsics. 00348 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 00349 00350 computeRegisterProperties(); 00351 00352 // FIXME: These should be based on subtarget info. Plus, the values should 00353 // be smaller when we are in optimizing for size mode. 00354 maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores 00355 maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores 00356 maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores 00357 allowUnalignedMemoryAccesses = true; // x86 supports it! 00358 } 00359 00360 std::vector<SDOperand> 00361 X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 00362 if (F.getCallingConv() == CallingConv::Fast && EnableFastCC) 00363 return LowerFastCCArguments(F, DAG); 00364 return LowerCCCArguments(F, DAG); 00365 } 00366 00367 std::pair<SDOperand, SDOperand> 00368 X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, 00369 bool isVarArg, unsigned CallingConv, 00370 bool isTailCall, 00371 SDOperand Callee, ArgListTy &Args, 00372 SelectionDAG &DAG) { 00373 assert((!isVarArg || CallingConv == CallingConv::C) && 00374 "Only C takes varargs!"); 00375 00376 // If the callee is a GlobalAddress node (quite common, every direct call is) 00377 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 00378 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 00379 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); 00380 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 00381 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 00382 00383 if (CallingConv == CallingConv::Fast && EnableFastCC) 00384 return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG); 00385 return LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG); 00386 } 00387 00388 //===----------------------------------------------------------------------===// 00389 // C Calling Convention implementation 00390 //===----------------------------------------------------------------------===// 00391 00392 std::vector<SDOperand> 00393 X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) { 00394 std::vector<SDOperand> ArgValues; 00395 00396 MachineFunction &MF = DAG.getMachineFunction(); 00397 MachineFrameInfo *MFI = MF.getFrameInfo(); 00398 00399 // Add DAG nodes to load the arguments... On entry to a function on the X86, 00400 // the stack frame looks like this: 00401 // 00402 // [ESP] -- return address 00403 // [ESP + 4] -- first argument (leftmost lexically) 00404 // [ESP + 8] -- second argument, if first argument is four bytes in size 00405 // ... 00406 // 00407 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 00408 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 00409 MVT::ValueType ObjectVT = getValueType(I->getType()); 00410 unsigned ArgIncrement = 4; 00411 unsigned ObjSize; 00412 switch (ObjectVT) { 00413 default: assert(0 && "Unhandled argument type!"); 00414 case MVT::i1: 00415 case MVT::i8: ObjSize = 1; break; 00416 case MVT::i16: ObjSize = 2; break; 00417 case MVT::i32: ObjSize = 4; break; 00418 case MVT::i64: ObjSize = ArgIncrement = 8; break; 00419 case MVT::f32: ObjSize = 4; break; 00420 case MVT::f64: ObjSize = ArgIncrement = 8; break; 00421 } 00422 // Create the frame index object for this incoming parameter... 00423 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 00424 00425 // Create the SelectionDAG nodes corresponding to a load from this parameter 00426 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 00427 00428 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 00429 // dead loads. 00430 SDOperand ArgValue; 00431 if (!I->use_empty()) 00432 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 00433 DAG.getSrcValue(NULL)); 00434 else { 00435 if (MVT::isInteger(ObjectVT)) 00436 ArgValue = DAG.getConstant(0, ObjectVT); 00437 else 00438 ArgValue = DAG.getConstantFP(0, ObjectVT); 00439 } 00440 ArgValues.push_back(ArgValue); 00441 00442 ArgOffset += ArgIncrement; // Move on to the next argument... 00443 } 00444 00445 // If the function takes variable number of arguments, make a frame index for 00446 // the start of the first vararg value... for expansion of llvm.va_start. 00447 if (F.isVarArg()) 00448 VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset); 00449 ReturnAddrIndex = 0; // No return address slot generated yet. 00450 BytesToPopOnReturn = 0; // Callee pops nothing. 00451 BytesCallerReserves = ArgOffset; 00452 00453 // Finally, inform the code generator which regs we return values in. 00454 switch (getValueType(F.getReturnType())) { 00455 default: assert(0 && "Unknown type!"); 00456 case MVT::isVoid: break; 00457 case MVT::i1: 00458 case MVT::i8: 00459 case MVT::i16: 00460 case MVT::i32: 00461 MF.addLiveOut(X86::EAX); 00462 break; 00463 case MVT::i64: 00464 MF.addLiveOut(X86::EAX); 00465 MF.addLiveOut(X86::EDX); 00466 break; 00467 case MVT::f32: 00468 case MVT::f64: 00469 MF.addLiveOut(X86::ST0); 00470 break; 00471 } 00472 return ArgValues; 00473 } 00474 00475 std::pair<SDOperand, SDOperand> 00476 X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy, 00477 bool isVarArg, bool isTailCall, 00478 SDOperand Callee, ArgListTy &Args, 00479 SelectionDAG &DAG) { 00480 // Count how many bytes are to be pushed on the stack. 00481 unsigned NumBytes = 0; 00482 00483 if (Args.empty()) { 00484 // Save zero bytes. 00485 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy())); 00486 } else { 00487 for (unsigned i = 0, e = Args.size(); i != e; ++i) 00488 switch (getValueType(Args[i].second)) { 00489 default: assert(0 && "Unknown value type!"); 00490 case MVT::i1: 00491 case MVT::i8: 00492 case MVT::i16: 00493 case MVT::i32: 00494 case MVT::f32: 00495 NumBytes += 4; 00496 break; 00497 case MVT::i64: 00498 case MVT::f64: 00499 NumBytes += 8; 00500 break; 00501 } 00502 00503 Chain = DAG.getCALLSEQ_START(Chain, 00504 DAG.getConstant(NumBytes, getPointerTy())); 00505 00506 // Arguments go on the stack in reverse order, as specified by the ABI. 00507 unsigned ArgOffset = 0; 00508 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 00509 std::vector<SDOperand> Stores; 00510 00511 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 00512 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 00513 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 00514 00515 switch (getValueType(Args[i].second)) { 00516 default: assert(0 && "Unexpected ValueType for argument!"); 00517 case MVT::i1: 00518 case MVT::i8: 00519 case MVT::i16: 00520 // Promote the integer to 32 bits. If the input type is signed use a 00521 // sign extend, otherwise use a zero extend. 00522 if (Args[i].second->isSigned()) 00523 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 00524 else 00525 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 00526 00527 // FALL THROUGH 00528 case MVT::i32: 00529 case MVT::f32: 00530 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 00531 Args[i].first, PtrOff, 00532 DAG.getSrcValue(NULL))); 00533 ArgOffset += 4; 00534 break; 00535 case MVT::i64: 00536 case MVT::f64: 00537 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 00538 Args[i].first, PtrOff, 00539 DAG.getSrcValue(NULL))); 00540 ArgOffset += 8; 00541 break; 00542 } 00543 } 00544 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 00545 } 00546 00547 std::vector<MVT::ValueType> RetVals; 00548 MVT::ValueType RetTyVT = getValueType(RetTy); 00549 RetVals.push_back(MVT::Other); 00550 00551 // The result values produced have to be legal. Promote the result. 00552 switch (RetTyVT) { 00553 case MVT::isVoid: break; 00554 default: 00555 RetVals.push_back(RetTyVT); 00556 break; 00557 case MVT::i1: 00558 case MVT::i8: 00559 case MVT::i16: 00560 RetVals.push_back(MVT::i32); 00561 break; 00562 case MVT::f32: 00563 if (X86ScalarSSE) 00564 RetVals.push_back(MVT::f32); 00565 else 00566 RetVals.push_back(MVT::f64); 00567 break; 00568 case MVT::i64: 00569 RetVals.push_back(MVT::i32); 00570 RetVals.push_back(MVT::i32); 00571 break; 00572 } 00573 00574 std::vector<MVT::ValueType> NodeTys; 00575 NodeTys.push_back(MVT::Other); // Returns a chain 00576 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 00577 std::vector<SDOperand> Ops; 00578 Ops.push_back(Chain); 00579 Ops.push_back(Callee); 00580 00581 // FIXME: Do not generate X86ISD::TAILCALL for now. 00582 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 00583 SDOperand InFlag = Chain.getValue(1); 00584 00585 NodeTys.clear(); 00586 NodeTys.push_back(MVT::Other); // Returns a chain 00587 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 00588 Ops.clear(); 00589 Ops.push_back(Chain); 00590 Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); 00591 Ops.push_back(DAG.getConstant(0, getPointerTy())); 00592 Ops.push_back(InFlag); 00593 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 00594 InFlag = Chain.getValue(1); 00595 00596 SDOperand RetVal; 00597 if (RetTyVT != MVT::isVoid) { 00598 switch (RetTyVT) { 00599 default: assert(0 && "Unknown value type to return!"); 00600 case MVT::i1: 00601 case MVT::i8: 00602 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 00603 Chain = RetVal.getValue(1); 00604 if (RetTyVT == MVT::i1) 00605 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 00606 break; 00607 case MVT::i16: 00608 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 00609 Chain = RetVal.getValue(1); 00610 break; 00611 case MVT::i32: 00612 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 00613 Chain = RetVal.getValue(1); 00614 break; 00615 case MVT::i64: { 00616 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 00617 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 00618 Lo.getValue(2)); 00619 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 00620 Chain = Hi.getValue(1); 00621 break; 00622 } 00623 case MVT::f32: 00624 case MVT::f64: { 00625 std::vector<MVT::ValueType> Tys; 00626 Tys.push_back(MVT::f64); 00627 Tys.push_back(MVT::Other); 00628 Tys.push_back(MVT::Flag); 00629 std::vector<SDOperand> Ops; 00630 Ops.push_back(Chain); 00631 Ops.push_back(InFlag); 00632 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 00633 Chain = RetVal.getValue(1); 00634 InFlag = RetVal.getValue(2); 00635 if (X86ScalarSSE) { 00636 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 00637 // shouldn't be necessary except that RFP cannot be live across 00638 // multiple blocks. When stackifier is fixed, they can be uncoupled. 00639 MachineFunction &MF = DAG.getMachineFunction(); 00640 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 00641 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 00642 Tys.clear(); 00643 Tys.push_back(MVT::Other); 00644 Ops.clear(); 00645 Ops.push_back(Chain); 00646 Ops.push_back(RetVal); 00647 Ops.push_back(StackSlot); 00648 Ops.push_back(DAG.getValueType(RetTyVT)); 00649 Ops.push_back(InFlag); 00650 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 00651 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 00652 DAG.getSrcValue(NULL)); 00653 Chain = RetVal.getValue(1); 00654 } 00655 00656 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 00657 // FIXME: we would really like to remember that this FP_ROUND 00658 // operation is okay to eliminate if we allow excess FP precision. 00659 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 00660 break; 00661 } 00662 } 00663 } 00664 00665 return std::make_pair(RetVal, Chain); 00666 } 00667 00668 //===----------------------------------------------------------------------===// 00669 // Fast Calling Convention implementation 00670 //===----------------------------------------------------------------------===// 00671 // 00672 // The X86 'fast' calling convention passes up to two integer arguments in 00673 // registers (an appropriate portion of EAX/EDX), passes arguments in C order, 00674 // and requires that the callee pop its arguments off the stack (allowing proper 00675 // tail calls), and has the same return value conventions as C calling convs. 00676 // 00677 // This calling convention always arranges for the callee pop value to be 8n+4 00678 // bytes, which is needed for tail recursion elimination and stack alignment 00679 // reasons. 00680 // 00681 // Note that this can be enhanced in the future to pass fp vals in registers 00682 // (when we have a global fp allocator) and do other tricks. 00683 // 00684 00685 /// AddLiveIn - This helper function adds the specified physical register to the 00686 /// MachineFunction as a live in value. It also creates a corresponding virtual 00687 /// register for it. 00688 static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, 00689 TargetRegisterClass *RC) { 00690 assert(RC->contains(PReg) && "Not the correct regclass!"); 00691 unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC); 00692 MF.addLiveIn(PReg, VReg); 00693 return VReg; 00694 } 00695 00696 // FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments 00697 // to pass in registers. 0 is none, 1 is is "use EAX", 2 is "use EAX and 00698 // EDX". Anything more is illegal. 00699 // 00700 // FIXME: The linscan register allocator currently has problem with 00701 // coalescing. At the time of this writing, whenever it decides to coalesce 00702 // a physreg with a virtreg, this increases the size of the physreg's live 00703 // range, and the live range cannot ever be reduced. This causes problems if 00704 // too many physregs are coaleced with virtregs, which can cause the register 00705 // allocator to wedge itself. 00706 // 00707 // This code triggers this problem more often if we pass args in registers, 00708 // so disable it until this is fixed. 00709 // 00710 // NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings 00711 // about code being dead. 00712 // 00713 static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0; 00714 00715 00716 std::vector<SDOperand> 00717 X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) { 00718 std::vector<SDOperand> ArgValues; 00719 00720 MachineFunction &MF = DAG.getMachineFunction(); 00721 MachineFrameInfo *MFI = MF.getFrameInfo(); 00722 00723 // Add DAG nodes to load the arguments... On entry to a function the stack 00724 // frame looks like this: 00725 // 00726 // [ESP] -- return address 00727 // [ESP + 4] -- first nonreg argument (leftmost lexically) 00728 // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size 00729 // ... 00730 unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot 00731 00732 // Keep track of the number of integer regs passed so far. This can be either 00733 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 00734 // used). 00735 unsigned NumIntRegs = 0; 00736 00737 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 00738 MVT::ValueType ObjectVT = getValueType(I->getType()); 00739 unsigned ArgIncrement = 4; 00740 unsigned ObjSize = 0; 00741 SDOperand ArgValue; 00742 00743 switch (ObjectVT) { 00744 default: assert(0 && "Unhandled argument type!"); 00745 case MVT::i1: 00746 case MVT::i8: 00747 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 00748 if (!I->use_empty()) { 00749 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL, 00750 X86::R8RegisterClass); 00751 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8); 00752 DAG.setRoot(ArgValue.getValue(1)); 00753 if (ObjectVT == MVT::i1) 00754 // FIXME: Should insert a assertzext here. 00755 ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue); 00756 } 00757 ++NumIntRegs; 00758 break; 00759 } 00760 00761 ObjSize = 1; 00762 break; 00763 case MVT::i16: 00764 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 00765 if (!I->use_empty()) { 00766 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX, 00767 X86::R16RegisterClass); 00768 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16); 00769 DAG.setRoot(ArgValue.getValue(1)); 00770 } 00771 ++NumIntRegs; 00772 break; 00773 } 00774 ObjSize = 2; 00775 break; 00776 case MVT::i32: 00777 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 00778 if (!I->use_empty()) { 00779 unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX, 00780 X86::R32RegisterClass); 00781 ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 00782 DAG.setRoot(ArgValue.getValue(1)); 00783 } 00784 ++NumIntRegs; 00785 break; 00786 } 00787 ObjSize = 4; 00788 break; 00789 case MVT::i64: 00790 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 00791 if (!I->use_empty()) { 00792 unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass); 00793 unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 00794 00795 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 00796 SDOperand Hi = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32); 00797 DAG.setRoot(Hi.getValue(1)); 00798 00799 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 00800 } 00801 NumIntRegs += 2; 00802 break; 00803 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 00804 if (!I->use_empty()) { 00805 unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass); 00806 SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32); 00807 DAG.setRoot(Low.getValue(1)); 00808 00809 // Load the high part from memory. 00810 // Create the frame index object for this incoming parameter... 00811 int FI = MFI->CreateFixedObject(4, ArgOffset); 00812 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 00813 SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 00814 DAG.getSrcValue(NULL)); 00815 ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi); 00816 } 00817 ArgOffset += 4; 00818 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 00819 break; 00820 } 00821 ObjSize = ArgIncrement = 8; 00822 break; 00823 case MVT::f32: ObjSize = 4; break; 00824 case MVT::f64: ObjSize = ArgIncrement = 8; break; 00825 } 00826 00827 // Don't codegen dead arguments. FIXME: remove this check when we can nuke 00828 // dead loads. 00829 if (ObjSize && !I->use_empty()) { 00830 // Create the frame index object for this incoming parameter... 00831 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 00832 00833 // Create the SelectionDAG nodes corresponding to a load from this 00834 // parameter. 00835 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 00836 00837 ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 00838 DAG.getSrcValue(NULL)); 00839 } else if (ArgValue.Val == 0) { 00840 if (MVT::isInteger(ObjectVT)) 00841 ArgValue = DAG.getConstant(0, ObjectVT); 00842 else 00843 ArgValue = DAG.getConstantFP(0, ObjectVT); 00844 } 00845 ArgValues.push_back(ArgValue); 00846 00847 if (ObjSize) 00848 ArgOffset += ArgIncrement; // Move on to the next argument. 00849 } 00850 00851 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 00852 // arguments and the arguments after the retaddr has been pushed are aligned. 00853 if ((ArgOffset & 7) == 0) 00854 ArgOffset += 4; 00855 00856 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 00857 ReturnAddrIndex = 0; // No return address slot generated yet. 00858 BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. 00859 BytesCallerReserves = 0; 00860 00861 // Finally, inform the code generator which regs we return values in. 00862 switch (getValueType(F.getReturnType())) { 00863 default: assert(0 && "Unknown type!"); 00864 case MVT::isVoid: break; 00865 case MVT::i1: 00866 case MVT::i8: 00867 case MVT::i16: 00868 case MVT::i32: 00869 MF.addLiveOut(X86::EAX); 00870 break; 00871 case MVT::i64: 00872 MF.addLiveOut(X86::EAX); 00873 MF.addLiveOut(X86::EDX); 00874 break; 00875 case MVT::f32: 00876 case MVT::f64: 00877 MF.addLiveOut(X86::ST0); 00878 break; 00879 } 00880 return ArgValues; 00881 } 00882 00883 std::pair<SDOperand, SDOperand> 00884 X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy, 00885 bool isTailCall, SDOperand Callee, 00886 ArgListTy &Args, SelectionDAG &DAG) { 00887 // Count how many bytes are to be pushed on the stack. 00888 unsigned NumBytes = 0; 00889 00890 // Keep track of the number of integer regs passed so far. This can be either 00891 // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both 00892 // used). 00893 unsigned NumIntRegs = 0; 00894 00895 for (unsigned i = 0, e = Args.size(); i != e; ++i) 00896 switch (getValueType(Args[i].second)) { 00897 default: assert(0 && "Unknown value type!"); 00898 case MVT::i1: 00899 case MVT::i8: 00900 case MVT::i16: 00901 case MVT::i32: 00902 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 00903 ++NumIntRegs; 00904 break; 00905 } 00906 // fall through 00907 case MVT::f32: 00908 NumBytes += 4; 00909 break; 00910 case MVT::i64: 00911 if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) { 00912 NumIntRegs += 2; 00913 break; 00914 } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) { 00915 NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS; 00916 NumBytes += 4; 00917 break; 00918 } 00919 00920 // fall through 00921 case MVT::f64: 00922 NumBytes += 8; 00923 break; 00924 } 00925 00926 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 00927 // arguments and the arguments after the retaddr has been pushed are aligned. 00928 if ((NumBytes & 7) == 0) 00929 NumBytes += 4; 00930 00931 Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); 00932 00933 // Arguments go on the stack in reverse order, as specified by the ABI. 00934 unsigned ArgOffset = 0; 00935 SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32); 00936 NumIntRegs = 0; 00937 std::vector<SDOperand> Stores; 00938 std::vector<SDOperand> RegValuesToPass; 00939 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 00940 switch (getValueType(Args[i].second)) { 00941 default: assert(0 && "Unexpected ValueType for argument!"); 00942 case MVT::i1: 00943 Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first); 00944 // Fall through. 00945 case MVT::i8: 00946 case MVT::i16: 00947 case MVT::i32: 00948 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 00949 RegValuesToPass.push_back(Args[i].first); 00950 ++NumIntRegs; 00951 break; 00952 } 00953 // Fall through 00954 case MVT::f32: { 00955 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 00956 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 00957 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 00958 Args[i].first, PtrOff, 00959 DAG.getSrcValue(NULL))); 00960 ArgOffset += 4; 00961 break; 00962 } 00963 case MVT::i64: 00964 // Can pass (at least) part of it in regs? 00965 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 00966 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 00967 Args[i].first, DAG.getConstant(1, MVT::i32)); 00968 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 00969 Args[i].first, DAG.getConstant(0, MVT::i32)); 00970 RegValuesToPass.push_back(Lo); 00971 ++NumIntRegs; 00972 00973 // Pass both parts in regs? 00974 if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) { 00975 RegValuesToPass.push_back(Hi); 00976 ++NumIntRegs; 00977 } else { 00978 // Pass the high part in memory. 00979 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 00980 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 00981 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 00982 Hi, PtrOff, DAG.getSrcValue(NULL))); 00983 ArgOffset += 4; 00984 } 00985 break; 00986 } 00987 // Fall through 00988 case MVT::f64: 00989 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 00990 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 00991 Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 00992 Args[i].first, PtrOff, 00993 DAG.getSrcValue(NULL))); 00994 ArgOffset += 8; 00995 break; 00996 } 00997 } 00998 if (!Stores.empty()) 00999 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores); 01000 01001 // Make sure the instruction takes 8n+4 bytes to make sure the start of the 01002 // arguments and the arguments after the retaddr has been pushed are aligned. 01003 if ((ArgOffset & 7) == 0) 01004 ArgOffset += 4; 01005 01006 std::vector<MVT::ValueType> RetVals; 01007 MVT::ValueType RetTyVT = getValueType(RetTy); 01008 01009 RetVals.push_back(MVT::Other); 01010 01011 // The result values produced have to be legal. Promote the result. 01012 switch (RetTyVT) { 01013 case MVT::isVoid: break; 01014 default: 01015 RetVals.push_back(RetTyVT); 01016 break; 01017 case MVT::i1: 01018 case MVT::i8: 01019 case MVT::i16: 01020 RetVals.push_back(MVT::i32); 01021 break; 01022 case MVT::f32: 01023 if (X86ScalarSSE) 01024 RetVals.push_back(MVT::f32); 01025 else 01026 RetVals.push_back(MVT::f64); 01027 break; 01028 case MVT::i64: 01029 RetVals.push_back(MVT::i32); 01030 RetVals.push_back(MVT::i32); 01031 break; 01032 } 01033 01034 // Build a sequence of copy-to-reg nodes chained together with token chain 01035 // and flag operands which copy the outgoing args into registers. 01036 SDOperand InFlag; 01037 for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) { 01038 unsigned CCReg; 01039 SDOperand RegToPass = RegValuesToPass[i]; 01040 switch (RegToPass.getValueType()) { 01041 default: assert(0 && "Bad thing to pass in regs"); 01042 case MVT::i8: 01043 CCReg = (i == 0) ? X86::AL : X86::DL; 01044 break; 01045 case MVT::i16: 01046 CCReg = (i == 0) ? X86::AX : X86::DX; 01047 break; 01048 case MVT::i32: 01049 CCReg = (i == 0) ? X86::EAX : X86::EDX; 01050 break; 01051 } 01052 01053 Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag); 01054 InFlag = Chain.getValue(1); 01055 } 01056 01057 std::vector<MVT::ValueType> NodeTys; 01058 NodeTys.push_back(MVT::Other); // Returns a chain 01059 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 01060 std::vector<SDOperand> Ops; 01061 Ops.push_back(Chain); 01062 Ops.push_back(Callee); 01063 if (InFlag.Val) 01064 Ops.push_back(InFlag); 01065 01066 // FIXME: Do not generate X86ISD::TAILCALL for now. 01067 Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops); 01068 InFlag = Chain.getValue(1); 01069 01070 NodeTys.clear(); 01071 NodeTys.push_back(MVT::Other); // Returns a chain 01072 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 01073 Ops.clear(); 01074 Ops.push_back(Chain); 01075 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 01076 Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy())); 01077 Ops.push_back(InFlag); 01078 Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops); 01079 InFlag = Chain.getValue(1); 01080 01081 SDOperand RetVal; 01082 if (RetTyVT != MVT::isVoid) { 01083 switch (RetTyVT) { 01084 default: assert(0 && "Unknown value type to return!"); 01085 case MVT::i1: 01086 case MVT::i8: 01087 RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag); 01088 Chain = RetVal.getValue(1); 01089 if (RetTyVT == MVT::i1) 01090 RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal); 01091 break; 01092 case MVT::i16: 01093 RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag); 01094 Chain = RetVal.getValue(1); 01095 break; 01096 case MVT::i32: 01097 RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 01098 Chain = RetVal.getValue(1); 01099 break; 01100 case MVT::i64: { 01101 SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag); 01102 SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 01103 Lo.getValue(2)); 01104 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); 01105 Chain = Hi.getValue(1); 01106 break; 01107 } 01108 case MVT::f32: 01109 case MVT::f64: { 01110 std::vector<MVT::ValueType> Tys; 01111 Tys.push_back(MVT::f64); 01112 Tys.push_back(MVT::Other); 01113 Tys.push_back(MVT::Flag); 01114 std::vector<SDOperand> Ops; 01115 Ops.push_back(Chain); 01116 Ops.push_back(InFlag); 01117 RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops); 01118 Chain = RetVal.getValue(1); 01119 InFlag = RetVal.getValue(2); 01120 if (X86ScalarSSE) { 01121 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This 01122 // shouldn't be necessary except that RFP cannot be live across 01123 // multiple blocks. When stackifier is fixed, they can be uncoupled. 01124 MachineFunction &MF = DAG.getMachineFunction(); 01125 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 01126 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 01127 Tys.clear(); 01128 Tys.push_back(MVT::Other); 01129 Ops.clear(); 01130 Ops.push_back(Chain); 01131 Ops.push_back(RetVal); 01132 Ops.push_back(StackSlot); 01133 Ops.push_back(DAG.getValueType(RetTyVT)); 01134 Ops.push_back(InFlag); 01135 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 01136 RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot, 01137 DAG.getSrcValue(NULL)); 01138 Chain = RetVal.getValue(1); 01139 } 01140 01141 if (RetTyVT == MVT::f32 && !X86ScalarSSE) 01142 // FIXME: we would really like to remember that this FP_ROUND 01143 // operation is okay to eliminate if we allow excess FP precision. 01144 RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); 01145 break; 01146 } 01147 } 01148 } 01149 01150 return std::make_pair(RetVal, Chain); 01151 } 01152 01153 SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 01154 if (ReturnAddrIndex == 0) { 01155 // Set up a frame object for the return address. 01156 MachineFunction &MF = DAG.getMachineFunction(); 01157 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4); 01158 } 01159 01160 return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32); 01161 } 01162 01163 01164 01165 std::pair<SDOperand, SDOperand> X86TargetLowering:: 01166 LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth, 01167 SelectionDAG &DAG) { 01168 SDOperand Result; 01169 if (Depth) // Depths > 0 not supported yet! 01170 Result = DAG.getConstant(0, getPointerTy()); 01171 else { 01172 SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG); 01173 if (!isFrameAddress) 01174 // Just load the return address 01175 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI, 01176 DAG.getSrcValue(NULL)); 01177 else 01178 Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI, 01179 DAG.getConstant(4, MVT::i32)); 01180 } 01181 return std::make_pair(Result, Chain); 01182 } 01183 01184 /// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode 01185 /// which corresponds to the condition code. 01186 static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) { 01187 switch (X86CC) { 01188 default: assert(0 && "Unknown X86 conditional code!"); 01189 case X86ISD::COND_A: return X86::JA; 01190 case X86ISD::COND_AE: return X86::JAE; 01191 case X86ISD::COND_B: return X86::JB; 01192 case X86ISD::COND_BE: return X86::JBE; 01193 case X86ISD::COND_E: return X86::JE; 01194 case X86ISD::COND_G: return X86::JG; 01195 case X86ISD::COND_GE: return X86::JGE; 01196 case X86ISD::COND_L: return X86::JL; 01197 case X86ISD::COND_LE: return X86::JLE; 01198 case X86ISD::COND_NE: return X86::JNE; 01199 case X86ISD::COND_NO: return X86::JNO; 01200 case X86ISD::COND_NP: return X86::JNP; 01201 case X86ISD::COND_NS: return X86::JNS; 01202 case X86ISD::COND_O: return X86::JO; 01203 case X86ISD::COND_P: return X86::JP; 01204 case X86ISD::COND_S: return X86::JS; 01205 } 01206 } 01207 01208 /// translateX86CC - do a one to one translation of a ISD::CondCode to the X86 01209 /// specific condition code. It returns a false if it cannot do a direct 01210 /// translation. X86CC is the translated CondCode. Flip is set to true if the 01211 /// the order of comparison operands should be flipped. 01212 static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 01213 unsigned &X86CC, bool &Flip) { 01214 Flip = false; 01215 X86CC = X86ISD::COND_INVALID; 01216 if (!isFP) { 01217 switch (SetCCOpcode) { 01218 default: break; 01219 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 01220 case ISD::SETGT: X86CC = X86ISD::COND_G; break; 01221 case ISD::SETGE: X86CC = X86ISD::COND_GE; break; 01222 case ISD::SETLT: X86CC = X86ISD::COND_L; break; 01223 case ISD::SETLE: X86CC = X86ISD::COND_LE; break; 01224 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 01225 case ISD::SETULT: X86CC = X86ISD::COND_B; break; 01226 case ISD::SETUGT: X86CC = X86ISD::COND_A; break; 01227 case ISD::SETULE: X86CC = X86ISD::COND_BE; break; 01228 case ISD::SETUGE: X86CC = X86ISD::COND_AE; break; 01229 } 01230 } else { 01231 // On a floating point condition, the flags are set as follows: 01232 // ZF PF CF op 01233 // 0 | 0 | 0 | X > Y 01234 // 0 | 0 | 1 | X < Y 01235 // 1 | 0 | 0 | X == Y 01236 // 1 | 1 | 1 | unordered 01237 switch (SetCCOpcode) { 01238 default: break; 01239 case ISD::SETUEQ: 01240 case ISD::SETEQ: X86CC = X86ISD::COND_E; break; 01241 case ISD::SETOLE: Flip = true; // Fallthrough 01242 case ISD::SETOGT: 01243 case ISD::SETGT: X86CC = X86ISD::COND_A; break; 01244 case ISD::SETOLT: Flip = true; // Fallthrough 01245 case ISD::SETOGE: 01246 case ISD::SETGE: X86CC = X86ISD::COND_AE; break; 01247 case ISD::SETUGE: Flip = true; // Fallthrough 01248 case ISD::SETULT: 01249 case ISD::SETLT: X86CC = X86ISD::COND_B; break; 01250 case ISD::SETUGT: Flip = true; // Fallthrough 01251 case ISD::SETULE: 01252 case ISD::SETLE: X86CC = X86ISD::COND_BE; break; 01253 case ISD::SETONE: 01254 case ISD::SETNE: X86CC = X86ISD::COND_NE; break; 01255 case ISD::SETUO: X86CC = X86ISD::COND_P; break; 01256 case ISD::SETO: X86CC = X86ISD::COND_NP; break; 01257 } 01258 } 01259 01260 return X86CC != X86ISD::COND_INVALID; 01261 } 01262 01263 static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC, 01264 bool &Flip) { 01265 return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip); 01266 } 01267 01268 /// hasFPCMov - is there a floating point cmov for the specific X86 condition 01269 /// code. Current x86 isa includes the following FP cmov instructions: 01270 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 01271 static bool hasFPCMov(unsigned X86CC) { 01272 switch (X86CC) { 01273 default: 01274 return false; 01275 case X86ISD::COND_B: 01276 case X86ISD::COND_BE: 01277 case X86ISD::COND_E: 01278 case X86ISD::COND_P: 01279 case X86ISD::COND_A: 01280 case X86ISD::COND_AE: 01281 case X86ISD::COND_NE: 01282 case X86ISD::COND_NP: 01283 return true; 01284 } 01285 } 01286 01287 MachineBasicBlock * 01288 X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 01289 MachineBasicBlock *BB) { 01290 switch (MI->getOpcode()) { 01291 default: assert(false && "Unexpected instr type to insert"); 01292 case X86::CMOV_FR32: 01293 case X86::CMOV_FR64: 01294 case X86::CMOV_V4F32: 01295 case X86::CMOV_V2F64: 01296 case X86::CMOV_V2I64: { 01297 // To "insert" a SELECT_CC instruction, we actually have to insert the 01298 // diamond control-flow pattern. The incoming instruction knows the 01299 // destination vreg to set, the condition code register to branch on, the 01300 // true/false values to select between, and a branch opcode to use. 01301 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 01302 ilist<MachineBasicBlock>::iterator It = BB; 01303 ++It; 01304 01305 // thisMBB: 01306 // ... 01307 // TrueVal = ... 01308 // cmpTY ccX, r1, r2 01309 // bCC copy1MBB 01310 // fallthrough --> copy0MBB 01311 MachineBasicBlock *thisMBB = BB; 01312 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 01313 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 01314 unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue()); 01315 BuildMI(BB, Opc, 1).addMBB(sinkMBB); 01316 MachineFunction *F = BB->getParent(); 01317 F->getBasicBlockList().insert(It, copy0MBB); 01318 F->getBasicBlockList().insert(It, sinkMBB); 01319 // Update machine-CFG edges by first adding all successors of the current 01320 // block to the new block which will contain the Phi node for the select. 01321 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 01322 e = BB->succ_end(); i != e; ++i) 01323 sinkMBB->addSuccessor(*i); 01324 // Next, remove all successors of the current block, and add the true 01325 // and fallthrough blocks as its successors. 01326 while(!BB->succ_empty()) 01327 BB->removeSuccessor(BB->succ_begin()); 01328 BB->addSuccessor(copy0MBB); 01329 BB->addSuccessor(sinkMBB); 01330 01331 // copy0MBB: 01332 // %FalseValue = ... 01333 // # fallthrough to sinkMBB 01334 BB = copy0MBB; 01335 01336 // Update machine-CFG edges 01337 BB->addSuccessor(sinkMBB); 01338 01339 // sinkMBB: 01340 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 01341 // ... 01342 BB = sinkMBB; 01343 BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg()) 01344 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 01345 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 01346 01347 delete MI; // The pseudo instruction is gone now. 01348 return BB; 01349 } 01350 01351 case X86::FP_TO_INT16_IN_MEM: 01352 case X86::FP_TO_INT32_IN_MEM: 01353 case X86::FP_TO_INT64_IN_MEM: { 01354 // Change the floating point control register to use "round towards zero" 01355 // mode when truncating to an integer value. 01356 MachineFunction *F = BB->getParent(); 01357 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); 01358 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 01359 01360 // Load the old value of the high byte of the control word... 01361 unsigned OldCW = 01362 F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass); 01363 addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx); 01364 01365 // Set the high part to be round to zero... 01366 addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F); 01367 01368 // Reload the modified control word now... 01369 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 01370 01371 // Restore the memory image of control word to original value 01372 addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW); 01373 01374 // Get the X86 opcode to use. 01375 unsigned Opc; 01376 switch (MI->getOpcode()) { 01377 default: assert(0 && "illegal opcode!"); 01378 case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; 01379 case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; 01380 case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; 01381 } 01382 01383 X86AddressMode AM; 01384 MachineOperand &Op = MI->getOperand(0); 01385 if (Op.isRegister()) { 01386 AM.BaseType = X86AddressMode::RegBase; 01387 AM.Base.Reg = Op.getReg(); 01388 } else { 01389 AM.BaseType = X86AddressMode::FrameIndexBase; 01390 AM.Base.FrameIndex = Op.getFrameIndex(); 01391 } 01392 Op = MI->getOperand(1); 01393 if (Op.isImmediate()) 01394 AM.Scale = Op.getImmedValue(); 01395 Op = MI->getOperand(2); 01396 if (Op.isImmediate()) 01397 AM.IndexReg = Op.getImmedValue(); 01398 Op = MI->getOperand(3); 01399 if (Op.isGlobalAddress()) { 01400 AM.GV = Op.getGlobal(); 01401 } else { 01402 AM.Disp = Op.getImmedValue(); 01403 } 01404 addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg()); 01405 01406 // Reload the original control word now. 01407 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 01408 01409 delete MI; // The pseudo instruction is gone now. 01410 return BB; 01411 } 01412 } 01413 } 01414 01415 01416 //===----------------------------------------------------------------------===// 01417 // X86 Custom Lowering Hooks 01418 //===----------------------------------------------------------------------===// 01419 01420 /// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra 01421 /// load. For Darwin, external and weak symbols are indirect, loading the value 01422 /// at address GV rather then the value of GV itself. This means that the 01423 /// GlobalAddress must be in the base or index register of the address, not the 01424 /// GV offset field. 01425 static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) { 01426 return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || 01427 (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); 01428 } 01429 01430 /// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return 01431 /// true if Op is undef or if its value falls within the specified range (L, H]. 01432 static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) { 01433 if (Op.getOpcode() == ISD::UNDEF) 01434 return true; 01435 01436 unsigned Val = cast<ConstantSDNode>(Op)->getValue(); 01437 return (Val >= Low && Val < Hi); 01438 } 01439 01440 /// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return 01441 /// true if Op is undef or if its value equal to the specified value. 01442 static bool isUndefOrEqual(SDOperand Op, unsigned Val) { 01443 if (Op.getOpcode() == ISD::UNDEF) 01444 return true; 01445 return cast<ConstantSDNode>(Op)->getValue() == Val; 01446 } 01447 01448 /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 01449 /// specifies a shuffle of elements that is suitable for input to PSHUFD. 01450 bool X86::isPSHUFDMask(SDNode *N) { 01451 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01452 01453 if (N->getNumOperands() != 4) 01454 return false; 01455 01456 // Check if the value doesn't reference the second vector. 01457 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 01458 SDOperand Arg = N->getOperand(i); 01459 if (Arg.getOpcode() == ISD::UNDEF) continue; 01460 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01461 if (cast<ConstantSDNode>(Arg)->getValue() >= 4) 01462 return false; 01463 } 01464 01465 return true; 01466 } 01467 01468 /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 01469 /// specifies a shuffle of elements that is suitable for input to PSHUFHW. 01470 bool X86::isPSHUFHWMask(SDNode *N) { 01471 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01472 01473 if (N->getNumOperands() != 8) 01474 return false; 01475 01476 // Lower quadword copied in order. 01477 for (unsigned i = 0; i != 4; ++i) { 01478 SDOperand Arg = N->getOperand(i); 01479 if (Arg.getOpcode() == ISD::UNDEF) continue; 01480 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01481 if (cast<ConstantSDNode>(Arg)->getValue() != i) 01482 return false; 01483 } 01484 01485 // Upper quadword shuffled. 01486 for (unsigned i = 4; i != 8; ++i) { 01487 SDOperand Arg = N->getOperand(i); 01488 if (Arg.getOpcode() == ISD::UNDEF) continue; 01489 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01490 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01491 if (Val < 4 || Val > 7) 01492 return false; 01493 } 01494 01495 return true; 01496 } 01497 01498 /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 01499 /// specifies a shuffle of elements that is suitable for input to PSHUFLW. 01500 bool X86::isPSHUFLWMask(SDNode *N) { 01501 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01502 01503 if (N->getNumOperands() != 8) 01504 return false; 01505 01506 // Upper quadword copied in order. 01507 for (unsigned i = 4; i != 8; ++i) 01508 if (!isUndefOrEqual(N->getOperand(i), i)) 01509 return false; 01510 01511 // Lower quadword shuffled. 01512 for (unsigned i = 0; i != 4; ++i) 01513 if (!isUndefOrInRange(N->getOperand(i), 0, 4)) 01514 return false; 01515 01516 return true; 01517 } 01518 01519 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 01520 /// specifies a shuffle of elements that is suitable for input to SHUFP*. 01521 bool X86::isSHUFPMask(SDNode *N) { 01522 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01523 01524 unsigned NumElems = N->getNumOperands(); 01525 if (NumElems == 2) { 01526 // The only cases that ought be handled by SHUFPD is 01527 // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 01528 // Dest { 3, 0 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } 01529 // Expect bit 0 == 1, bit1 == 2 01530 SDOperand Bit0 = N->getOperand(0); 01531 SDOperand Bit1 = N->getOperand(1); 01532 if (isUndefOrEqual(Bit0, 0) && isUndefOrEqual(Bit1, 3)) 01533 return true; 01534 if (isUndefOrEqual(Bit0, 1) && isUndefOrEqual(Bit1, 2)) 01535 return true; 01536 return false; 01537 } 01538 01539 if (NumElems != 4) return false; 01540 01541 // Each half must refer to only one of the vector. 01542 for (unsigned i = 0; i < 2; ++i) { 01543 SDOperand Arg = N->getOperand(i); 01544 if (Arg.getOpcode() == ISD::UNDEF) continue; 01545 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01546 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01547 if (Val >= 4) return false; 01548 } 01549 for (unsigned i = 2; i < 4; ++i) { 01550 SDOperand Arg = N->getOperand(i); 01551 if (Arg.getOpcode() == ISD::UNDEF) continue; 01552 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01553 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01554 if (Val < 4) return false; 01555 } 01556 01557 return true; 01558 } 01559 01560 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 01561 /// specifies a shuffle of elements that is suitable for input to MOVHLPS. 01562 bool X86::isMOVHLPSMask(SDNode *N) { 01563 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01564 01565 if (N->getNumOperands() != 4) 01566 return false; 01567 01568 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 01569 return isUndefOrEqual(N->getOperand(0), 6) && 01570 isUndefOrEqual(N->getOperand(1), 7) && 01571 isUndefOrEqual(N->getOperand(2), 2) && 01572 isUndefOrEqual(N->getOperand(3), 3); 01573 } 01574 01575 /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 01576 /// specifies a shuffle of elements that is suitable for input to MOVHLPS. 01577 bool X86::isMOVLHPSMask(SDNode *N) { 01578 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01579 01580 if (N->getNumOperands() != 4) 01581 return false; 01582 01583 // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5 01584 return isUndefOrEqual(N->getOperand(0), 0) && 01585 isUndefOrEqual(N->getOperand(1), 1) && 01586 isUndefOrEqual(N->getOperand(2), 4) && 01587 isUndefOrEqual(N->getOperand(3), 5); 01588 } 01589 01590 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 01591 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 01592 bool X86::isMOVLPMask(SDNode *N) { 01593 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01594 01595 unsigned NumElems = N->getNumOperands(); 01596 if (NumElems != 2 && NumElems != 4) 01597 return false; 01598 01599 for (unsigned i = 0; i < NumElems/2; ++i) 01600 if (!isUndefOrEqual(N->getOperand(i), i + NumElems)) 01601 return false; 01602 01603 for (unsigned i = NumElems/2; i < NumElems; ++i) 01604 if (!isUndefOrEqual(N->getOperand(i), i)) 01605 return false; 01606 01607 return true; 01608 } 01609 01610 /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 01611 /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}. 01612 bool X86::isMOVHPMask(SDNode *N) { 01613 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01614 01615 unsigned NumElems = N->getNumOperands(); 01616 if (NumElems != 2 && NumElems != 4) 01617 return false; 01618 01619 for (unsigned i = 0; i < NumElems/2; ++i) 01620 if (!isUndefOrEqual(N->getOperand(i), i)) 01621 return false; 01622 01623 for (unsigned i = 0; i < NumElems/2; ++i) { 01624 SDOperand Arg = N->getOperand(i + NumElems/2); 01625 if (!isUndefOrEqual(Arg, i + NumElems)) 01626 return false; 01627 } 01628 01629 return true; 01630 } 01631 01632 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 01633 /// specifies a shuffle of elements that is suitable for input to UNPCKL. 01634 bool X86::isUNPCKLMask(SDNode *N) { 01635 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01636 01637 unsigned NumElems = N->getNumOperands(); 01638 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 01639 return false; 01640 01641 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 01642 SDOperand BitI = N->getOperand(i); 01643 SDOperand BitI1 = N->getOperand(i+1); 01644 if (!isUndefOrEqual(BitI, j)) 01645 return false; 01646 if (!isUndefOrEqual(BitI1, j + NumElems)) 01647 return false; 01648 } 01649 01650 return true; 01651 } 01652 01653 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 01654 /// specifies a shuffle of elements that is suitable for input to UNPCKH. 01655 bool X86::isUNPCKHMask(SDNode *N) { 01656 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01657 01658 unsigned NumElems = N->getNumOperands(); 01659 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 01660 return false; 01661 01662 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 01663 SDOperand BitI = N->getOperand(i); 01664 SDOperand BitI1 = N->getOperand(i+1); 01665 if (!isUndefOrEqual(BitI, j + NumElems/2)) 01666 return false; 01667 if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems)) 01668 return false; 01669 } 01670 01671 return true; 01672 } 01673 01674 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 01675 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 01676 /// <0, 0, 1, 1> 01677 bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { 01678 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01679 01680 unsigned NumElems = N->getNumOperands(); 01681 if (NumElems != 4 && NumElems != 8 && NumElems != 16) 01682 return false; 01683 01684 for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { 01685 SDOperand BitI = N->getOperand(i); 01686 SDOperand BitI1 = N->getOperand(i+1); 01687 01688 if (!isUndefOrEqual(BitI, j)) 01689 return false; 01690 if (!isUndefOrEqual(BitI1, j)) 01691 return false; 01692 } 01693 01694 return true; 01695 } 01696 01697 /// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand 01698 /// specifies a shuffle of elements that is suitable for input to MOVS{S|D}. 01699 bool X86::isMOVSMask(SDNode *N) { 01700 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01701 01702 unsigned NumElems = N->getNumOperands(); 01703 if (NumElems != 2 && NumElems != 4) 01704 return false; 01705 01706 if (!isUndefOrEqual(N->getOperand(0), NumElems)) 01707 return false; 01708 01709 for (unsigned i = 1; i < NumElems; ++i) { 01710 SDOperand Arg = N->getOperand(i); 01711 if (!isUndefOrEqual(Arg, i)) 01712 return false; 01713 } 01714 01715 return true; 01716 } 01717 01718 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies 01719 /// a splat of a single element. 01720 bool X86::isSplatMask(SDNode *N) { 01721 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01722 01723 // We can only splat 64-bit, and 32-bit quantities. 01724 if (N->getNumOperands() != 4 && N->getNumOperands() != 2) 01725 return false; 01726 01727 // This is a splat operation if each element of the permute is the same, and 01728 // if the value doesn't reference the second vector. 01729 SDOperand Elt = N->getOperand(0); 01730 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 01731 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) { 01732 SDOperand Arg = N->getOperand(i); 01733 if (Arg.getOpcode() == ISD::UNDEF) continue; 01734 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01735 if (Arg != Elt) return false; 01736 } 01737 01738 // Make sure it is a splat of the first vector operand. 01739 return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands(); 01740 } 01741 01742 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 01743 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 01744 /// instructions. 01745 unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 01746 unsigned NumOperands = N->getNumOperands(); 01747 unsigned Shift = (NumOperands == 4) ? 2 : 1; 01748 unsigned Mask = 0; 01749 for (unsigned i = 0; i < NumOperands; ++i) { 01750 unsigned Val = 0; 01751 SDOperand Arg = N->getOperand(NumOperands-i-1); 01752 if (Arg.getOpcode() != ISD::UNDEF) 01753 Val = cast<ConstantSDNode>(Arg)->getValue(); 01754 if (Val >= NumOperands) Val -= NumOperands; 01755 Mask |= Val; 01756 if (i != NumOperands - 1) 01757 Mask <<= Shift; 01758 } 01759 01760 return Mask; 01761 } 01762 01763 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 01764 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW 01765 /// instructions. 01766 unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 01767 unsigned Mask = 0; 01768 // 8 nodes, but we only care about the last 4. 01769 for (unsigned i = 7; i >= 4; --i) { 01770 unsigned Val = 0; 01771 SDOperand Arg = N->getOperand(i); 01772 if (Arg.getOpcode() != ISD::UNDEF) 01773 Val = cast<ConstantSDNode>(Arg)->getValue(); 01774 Mask |= (Val - 4); 01775 if (i != 4) 01776 Mask <<= 2; 01777 } 01778 01779 return Mask; 01780 } 01781 01782 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 01783 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW 01784 /// instructions. 01785 unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 01786 unsigned Mask = 0; 01787 // 8 nodes, but we only care about the first 4. 01788 for (int i = 3; i >= 0; --i) { 01789 unsigned Val = 0; 01790 SDOperand Arg = N->getOperand(i); 01791 if (Arg.getOpcode() != ISD::UNDEF) 01792 Val = cast<ConstantSDNode>(Arg)->getValue(); 01793 Mask |= Val; 01794 if (i != 0) 01795 Mask <<= 2; 01796 } 01797 01798 return Mask; 01799 } 01800 01801 /// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand 01802 /// specifies a 8 element shuffle that can be broken into a pair of 01803 /// PSHUFHW and PSHUFLW. 01804 static bool isPSHUFHW_PSHUFLWMask(SDNode *N) { 01805 assert(N->getOpcode() == ISD::BUILD_VECTOR); 01806 01807 if (N->getNumOperands() != 8) 01808 return false; 01809 01810 // Lower quadword shuffled. 01811 for (unsigned i = 0; i != 4; ++i) { 01812 SDOperand Arg = N->getOperand(i); 01813 if (Arg.getOpcode() == ISD::UNDEF) continue; 01814 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01815 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01816 if (Val > 4) 01817 return false; 01818 } 01819 01820 // Upper quadword shuffled. 01821 for (unsigned i = 4; i != 8; ++i) { 01822 SDOperand Arg = N->getOperand(i); 01823 if (Arg.getOpcode() == ISD::UNDEF) continue; 01824 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01825 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01826 if (Val < 4 || Val > 7) 01827 return false; 01828 } 01829 01830 return true; 01831 } 01832 01833 /// CommuteVectorShuffle - Swap vector_shuffle operandsas well as 01834 /// values in ther permute mask. 01835 static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) { 01836 SDOperand V1 = Op.getOperand(0); 01837 SDOperand V2 = Op.getOperand(1); 01838 SDOperand Mask = Op.getOperand(2); 01839 MVT::ValueType VT = Op.getValueType(); 01840 MVT::ValueType MaskVT = Mask.getValueType(); 01841 MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); 01842 unsigned NumElems = Mask.getNumOperands(); 01843 std::vector<SDOperand> MaskVec; 01844 01845 for (unsigned i = 0; i != NumElems; ++i) { 01846 SDOperand Arg = Mask.getOperand(i); 01847 if (Arg.getOpcode() == ISD::UNDEF) continue; 01848 assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); 01849 unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); 01850 if (Val < NumElems) 01851 MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT)); 01852 else 01853 MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT)); 01854 } 01855 01856 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 01857 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); 01858 } 01859 01860 /// isScalarLoadToVector - Returns true if the node is a scalar load that 01861 /// is promoted to a vector. 01862 static inline bool isScalarLoadToVector(SDOperand Op) { 01863 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) { 01864 Op = Op.getOperand(0); 01865 return (Op.getOpcode() == ISD::LOAD); 01866 } 01867 return false; 01868 } 01869 01870 /// ShouldXformedToMOVLP - Return true if the node should be transformed to 01871 /// match movlp{d|s}. The lower half elements should come from V1 (and in 01872 /// order), and the upper half elements should come from the upper half of 01873 /// V2 (not necessarily in order). And since V1 will become the source of 01874 /// the MOVLP, it must be a scalar load. 01875 static bool ShouldXformedToMOVLP(SDOperand V1, SDOperand V2, SDOperand Mask) { 01876 if (isScalarLoadToVector(V1)) { 01877 unsigned NumElems = Mask.getNumOperands(); 01878 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 01879 if (!isUndefOrEqual(Mask.getOperand(i), i)) 01880 return false; 01881 for (unsigned i = NumElems/2; i != NumElems; ++i) 01882 if (!isUndefOrInRange(Mask.getOperand(i), 01883 NumElems+NumElems/2, NumElems*2)) 01884 return false; 01885 return true; 01886 } 01887 01888 return false; 01889 } 01890 01891 /// isLowerFromV2UpperFromV1 - Returns true if the shuffle mask is except 01892 /// the reverse of what x86 shuffles want. x86 shuffles requires the lower 01893 /// half elements to come from vector 1 (which would equal the dest.) and 01894 /// the upper half to come from vector 2. 01895 static bool isLowerFromV2UpperFromV1(SDOperand Op) { 01896 assert(Op.getOpcode() == ISD::BUILD_VECTOR); 01897 01898 unsigned NumElems = Op.getNumOperands(); 01899 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 01900 if (!isUndefOrInRange(Op.getOperand(i), NumElems, NumElems*2)) 01901 return false; 01902 for (unsigned i = NumElems/2; i != NumElems; ++i) 01903 if (!isUndefOrInRange(Op.getOperand(i), 0, NumElems)) 01904 return false; 01905 return true; 01906 } 01907 01908 /// LowerOperation - Provide custom lowering hooks for some operations. 01909 /// 01910 SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 01911 switch (Op.getOpcode()) { 01912 default: assert(0 && "Should not custom lower this!"); 01913 case ISD::SHL_PARTS: 01914 case ISD::SRA_PARTS: 01915 case ISD::SRL_PARTS: { 01916 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 01917 "Not an i64 shift!"); 01918 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 01919 SDOperand ShOpLo = Op.getOperand(0); 01920 SDOperand ShOpHi = Op.getOperand(1); 01921 SDOperand ShAmt = Op.getOperand(2); 01922 SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, 01923 DAG.getConstant(31, MVT::i8)) 01924 : DAG.getConstant(0, MVT::i32); 01925 01926 SDOperand Tmp2, Tmp3; 01927 if (Op.getOpcode() == ISD::SHL_PARTS) { 01928 Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); 01929 Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); 01930 } else { 01931 Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); 01932 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); 01933 } 01934 01935 SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag, 01936 ShAmt, DAG.getConstant(32, MVT::i8)); 01937 01938 SDOperand Hi, Lo; 01939 SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 01940 01941 std::vector<MVT::ValueType> Tys; 01942 Tys.push_back(MVT::i32); 01943 Tys.push_back(MVT::Flag); 01944 std::vector<SDOperand> Ops; 01945 if (Op.getOpcode() == ISD::SHL_PARTS) { 01946 Ops.push_back(Tmp2); 01947 Ops.push_back(Tmp3); 01948 Ops.push_back(CC); 01949 Ops.push_back(InFlag); 01950 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 01951 InFlag = Hi.getValue(1); 01952 01953 Ops.clear(); 01954 Ops.push_back(Tmp3); 01955 Ops.push_back(Tmp1); 01956 Ops.push_back(CC); 01957 Ops.push_back(InFlag); 01958 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 01959 } else { 01960 Ops.push_back(Tmp2); 01961 Ops.push_back(Tmp3); 01962 Ops.push_back(CC); 01963 Ops.push_back(InFlag); 01964 Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops); 01965 InFlag = Lo.getValue(1); 01966 01967 Ops.clear(); 01968 Ops.push_back(Tmp3); 01969 Ops.push_back(Tmp1); 01970 Ops.push_back(CC); 01971 Ops.push_back(InFlag); 01972 Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops); 01973 } 01974 01975 Tys.clear(); 01976 Tys.push_back(MVT::i32); 01977 Tys.push_back(MVT::i32); 01978 Ops.clear(); 01979 Ops.push_back(Lo); 01980 Ops.push_back(Hi); 01981 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 01982 } 01983 case ISD::SINT_TO_FP: { 01984 assert(Op.getOperand(0).getValueType() <= MVT::i64 && 01985 Op.getOperand(0).getValueType() >= MVT::i16 && 01986 "Unknown SINT_TO_FP to lower!"); 01987 01988 SDOperand Result; 01989 MVT::ValueType SrcVT = Op.getOperand(0).getValueType(); 01990 unsigned Size = MVT::getSizeInBits(SrcVT)/8; 01991 MachineFunction &MF = DAG.getMachineFunction(); 01992 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 01993 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 01994 SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other, 01995 DAG.getEntryNode(), Op.getOperand(0), 01996 StackSlot, DAG.getSrcValue(NULL)); 01997 01998 // Build the FILD 01999 std::vector<MVT::ValueType> Tys; 02000 Tys.push_back(MVT::f64); 02001 Tys.push_back(MVT::Other); 02002 if (X86ScalarSSE) Tys.push_back(MVT::Flag); 02003 std::vector<SDOperand> Ops; 02004 Ops.push_back(Chain); 02005 Ops.push_back(StackSlot); 02006 Ops.push_back(DAG.getValueType(SrcVT)); 02007 Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD, 02008 Tys, Ops); 02009 02010 if (X86ScalarSSE) { 02011 Chain = Result.getValue(1); 02012 SDOperand InFlag = Result.getValue(2); 02013 02014 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 02015 // shouldn't be necessary except that RFP cannot be live across 02016 // multiple blocks. When stackifier is fixed, they can be uncoupled. 02017 MachineFunction &MF = DAG.getMachineFunction(); 02018 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 02019 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 02020 std::vector<MVT::ValueType> Tys; 02021 Tys.push_back(MVT::Other); 02022 std::vector<SDOperand> Ops; 02023 Ops.push_back(Chain); 02024 Ops.push_back(Result); 02025 Ops.push_back(StackSlot); 02026 Ops.push_back(DAG.getValueType(Op.getValueType())); 02027 Ops.push_back(InFlag); 02028 Chain = DAG.getNode(X86ISD::FST, Tys, Ops); 02029 Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, 02030 DAG.getSrcValue(NULL)); 02031 } 02032 02033 return Result; 02034 } 02035 case ISD::FP_TO_SINT: { 02036 assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && 02037 "Unknown FP_TO_SINT to lower!"); 02038 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 02039 // stack slot. 02040 MachineFunction &MF = DAG.getMachineFunction(); 02041 unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; 02042 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 02043 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 02044 02045 unsigned Opc; 02046 switch (Op.getValueType()) { 02047 default: assert(0 && "Invalid FP_TO_SINT to lower!"); 02048 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 02049 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 02050 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 02051 } 02052 02053 SDOperand Chain = DAG.getEntryNode(); 02054 SDOperand Value = Op.getOperand(0); 02055 if (X86ScalarSSE) { 02056 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 02057 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 02058 DAG.getSrcValue(0)); 02059 std::vector<MVT::ValueType> Tys; 02060 Tys.push_back(MVT::f64); 02061 Tys.push_back(MVT::Other); 02062 std::vector<SDOperand> Ops; 02063 Ops.push_back(Chain); 02064 Ops.push_back(StackSlot); 02065 Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType())); 02066 Value = DAG.getNode(X86ISD::FLD, Tys, Ops); 02067 Chain = Value.getValue(1); 02068 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); 02069 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 02070 } 02071 02072 // Build the FP_TO_INT*_IN_MEM 02073 std::vector<SDOperand> Ops; 02074 Ops.push_back(Chain); 02075 Ops.push_back(Value); 02076 Ops.push_back(StackSlot); 02077 SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops); 02078 02079 // Load the result. 02080 return DAG.getLoad(Op.getValueType(), FIST, StackSlot, 02081 DAG.getSrcValue(NULL)); 02082 } 02083 case ISD::READCYCLECOUNTER: { 02084 std::vector<MVT::ValueType> Tys; 02085 Tys.push_back(MVT::Other); 02086 Tys.push_back(MVT::Flag); 02087 std::vector<SDOperand> Ops; 02088 Ops.push_back(Op.getOperand(0)); 02089 SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops); 02090 Ops.clear(); 02091 Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1))); 02092 Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 02093 MVT::i32, Ops[0].getValue(2))); 02094 Ops.push_back(Ops[1].getValue(1)); 02095 Tys[0] = Tys[1] = MVT::i32; 02096 Tys.push_back(MVT::Other); 02097 return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops); 02098 } 02099 case ISD::FABS: { 02100 MVT::ValueType VT = Op.getValueType(); 02101 const Type *OpNTy = MVT::getTypeForValueType(VT); 02102 std::vector<Constant*> CV; 02103 if (VT == MVT::f64) { 02104 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); 02105 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02106 } else { 02107 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); 02108 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02109 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02110 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02111 } 02112 Constant *CS = ConstantStruct::get(CV); 02113 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 02114 SDOperand Mask 02115 = DAG.getNode(X86ISD::LOAD_PACK, 02116 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 02117 return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); 02118 } 02119 case ISD::FNEG: { 02120 MVT::ValueType VT = Op.getValueType(); 02121 const Type *OpNTy = MVT::getTypeForValueType(VT); 02122 std::vector<Constant*> CV; 02123 if (VT == MVT::f64) { 02124 CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); 02125 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02126 } else { 02127 CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); 02128 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02129 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02130 CV.push_back(ConstantFP::get(OpNTy, 0.0)); 02131 } 02132 Constant *CS = ConstantStruct::get(CV); 02133 SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); 02134 SDOperand Mask 02135 = DAG.getNode(X86ISD::LOAD_PACK, 02136 VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); 02137 return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); 02138 } 02139 case ISD::SETCC: { 02140 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 02141 SDOperand Cond; 02142 SDOperand CC = Op.getOperand(2); 02143 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 02144 bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); 02145 bool Flip; 02146 unsigned X86CC; 02147 if (translateX86CC(CC, isFP, X86CC, Flip)) { 02148 if (Flip) 02149 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 02150 Op.getOperand(1), Op.getOperand(0)); 02151 else 02152 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 02153 Op.getOperand(0), Op.getOperand(1)); 02154 return DAG.getNode(X86ISD::SETCC, MVT::i8, 02155 DAG.getConstant(X86CC, MVT::i8), Cond); 02156 } else { 02157 assert(isFP && "Illegal integer SetCC!"); 02158 02159 Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, 02160 Op.getOperand(0), Op.getOperand(1)); 02161 std::vector<MVT::ValueType> Tys; 02162 std::vector<SDOperand> Ops; 02163 switch (SetCCOpcode) { 02164 default: assert(false && "Illegal floating point SetCC!"); 02165 case ISD::SETOEQ: { // !PF & ZF 02166 Tys.push_back(MVT::i8); 02167 Tys.push_back(MVT::Flag); 02168 Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8)); 02169 Ops.push_back(Cond); 02170 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 02171 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 02172 DAG.getConstant(X86ISD::COND_E, MVT::i8), 02173 Tmp1.getValue(1)); 02174 return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); 02175 } 02176 case ISD::SETUNE: { // PF | !ZF 02177 Tys.push_back(MVT::i8); 02178 Tys.push_back(MVT::Flag); 02179 Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8)); 02180 Ops.push_back(Cond); 02181 SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 02182 SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, 02183 DAG.getConstant(X86ISD::COND_NE, MVT::i8), 02184 Tmp1.getValue(1)); 02185 return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); 02186 } 02187 } 02188 } 02189 } 02190 case ISD::SELECT: { 02191 MVT::ValueType VT = Op.getValueType(); 02192 bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE; 02193 bool addTest = false; 02194 SDOperand Op0 = Op.getOperand(0); 02195 SDOperand Cond, CC; 02196 if (Op0.getOpcode() == ISD::SETCC) 02197 Op0 = LowerOperation(Op0, DAG); 02198 02199 if (Op0.getOpcode() == X86ISD::SETCC) { 02200 // If condition flag is set by a X86ISD::CMP, then make a copy of it 02201 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 02202 // have another use it will be eliminated. 02203 // If the X86ISD::SETCC has more than one use, then it's probably better 02204 // to use a test instead of duplicating the X86ISD::CMP (for register 02205 // pressure reason). 02206 unsigned CmpOpc = Op0.getOperand(1).getOpcode(); 02207 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 02208 CmpOpc == X86ISD::UCOMI) { 02209 if (!Op0.hasOneUse()) { 02210 std::vector<MVT::ValueType> Tys; 02211 for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i) 02212 Tys.push_back(Op0.Val->getValueType(i)); 02213 std::vector<SDOperand> Ops; 02214 for (unsigned i = 0; i < Op0.getNumOperands(); ++i) 02215 Ops.push_back(Op0.getOperand(i)); 02216 Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops); 02217 } 02218 02219 CC = Op0.getOperand(0); 02220 Cond = Op0.getOperand(1); 02221 // Make a copy as flag result cannot be used by more than one. 02222 Cond = DAG.getNode(CmpOpc, MVT::Flag, 02223 Cond.getOperand(0), Cond.getOperand(1)); 02224 addTest = 02225 isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended()); 02226 } else 02227 addTest = true; 02228 } else 02229 addTest = true; 02230 02231 if (addTest) { 02232 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 02233 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0); 02234 } 02235 02236 std::vector<MVT::ValueType> Tys; 02237 Tys.push_back(Op.getValueType()); 02238 Tys.push_back(MVT::Flag); 02239 std::vector<SDOperand> Ops; 02240 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 02241 // condition is true. 02242 Ops.push_back(Op.getOperand(2)); 02243 Ops.push_back(Op.getOperand(1)); 02244 Ops.push_back(CC); 02245 Ops.push_back(Cond); 02246 return DAG.getNode(X86ISD::CMOV, Tys, Ops); 02247 } 02248 case ISD::BRCOND: { 02249 bool addTest = false; 02250 SDOperand Cond = Op.getOperand(1); 02251 SDOperand Dest = Op.getOperand(2); 02252 SDOperand CC; 02253 if (Cond.getOpcode() == ISD::SETCC) 02254 Cond = LowerOperation(Cond, DAG); 02255 02256 if (Cond.getOpcode() == X86ISD::SETCC) { 02257 // If condition flag is set by a X86ISD::CMP, then make a copy of it 02258 // (since flag operand cannot be shared). If the X86ISD::SETCC does not 02259 // have another use it will be eliminated. 02260 // If the X86ISD::SETCC has more than one use, then it's probably better 02261 // to use a test instead of duplicating the X86ISD::CMP (for register 02262 // pressure reason). 02263 unsigned CmpOpc = Cond.getOperand(1).getOpcode(); 02264 if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI || 02265 CmpOpc == X86ISD::UCOMI) { 02266 if (!Cond.hasOneUse()) { 02267 std::vector<MVT::ValueType> Tys; 02268 for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i) 02269 Tys.push_back(Cond.Val->getValueType(i)); 02270 std::vector<SDOperand> Ops; 02271 for (unsigned i = 0; i < Cond.getNumOperands(); ++i) 02272 Ops.push_back(Cond.getOperand(i)); 02273 Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops); 02274 } 02275 02276 CC = Cond.getOperand(0); 02277 Cond = Cond.getOperand(1); 02278 // Make a copy as flag result cannot be used by more than one. 02279 Cond = DAG.getNode(CmpOpc, MVT::Flag, 02280 Cond.getOperand(0), Cond.getOperand(1)); 02281 } else 02282 addTest = true; 02283 } else 02284 addTest = true; 02285 02286 if (addTest) { 02287 CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8); 02288 Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond); 02289 } 02290 return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), 02291 Op.getOperand(0), Op.getOperand(2), CC, Cond); 02292 } 02293 case ISD::MEMSET: { 02294 SDOperand InFlag(0, 0); 02295 SDOperand Chain = Op.getOperand(0); 02296 unsigned Align = 02297 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 02298 if (Align == 0) Align = 1; 02299 02300 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 02301 // If not DWORD aligned, call memset if size is less than the threshold. 02302 // It knows how to align to the right boundary first. 02303 if ((Align & 3) != 0 || 02304 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 02305 MVT::ValueType IntPtr = getPointerTy(); 02306 const Type *IntPtrTy = getTargetData().getIntPtrType(); 02307 std::vector<std::pair<SDOperand, const Type*> > Args; 02308 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 02309 // Extend the ubyte argument to be an int value for the call. 02310 SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); 02311 Args.push_back(std::make_pair(Val, IntPtrTy)); 02312 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 02313 std::pair<SDOperand,SDOperand> CallResult = 02314 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 02315 DAG.getExternalSymbol("memset", IntPtr), Args, DAG); 02316 return CallResult.second; 02317 } 02318 02319 MVT::ValueType AVT; 02320 SDOperand Count; 02321 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 02322 unsigned BytesLeft = 0; 02323 bool TwoRepStos = false; 02324 if (ValC) { 02325 unsigned ValReg; 02326 unsigned Val = ValC->getValue() & 255; 02327 02328 // If the value is a constant, then we can potentially use larger sets. 02329 switch (Align & 3) { 02330 case 2: // WORD aligned 02331 AVT = MVT::i16; 02332 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 02333 BytesLeft = I->getValue() % 2; 02334 Val = (Val << 8) | Val; 02335 ValReg = X86::AX; 02336 break; 02337 case 0: // DWORD aligned 02338 AVT = MVT::i32; 02339 if (I) { 02340 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 02341 BytesLeft = I->getValue() % 4; 02342 } else { 02343 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 02344 DAG.getConstant(2, MVT::i8)); 02345 TwoRepStos = true; 02346 } 02347 Val = (Val << 8) | Val; 02348 Val = (Val << 16) | Val; 02349 ValReg = X86::EAX; 02350 break; 02351 default: // Byte aligned 02352 AVT = MVT::i8; 02353 Count = Op.getOperand(3); 02354 ValReg = X86::AL; 02355 break; 02356 } 02357 02358 Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), 02359 InFlag); 02360 InFlag = Chain.getValue(1); 02361 } else { 02362 AVT = MVT::i8; 02363 Count = Op.getOperand(3); 02364 Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); 02365 InFlag = Chain.getValue(1); 02366 } 02367 02368 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 02369 InFlag = Chain.getValue(1); 02370 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 02371 InFlag = Chain.getValue(1); 02372 02373 std::vector<MVT::ValueType> Tys; 02374 Tys.push_back(MVT::Other); 02375 Tys.push_back(MVT::Flag); 02376 std::vector<SDOperand> Ops; 02377 Ops.push_back(Chain); 02378 Ops.push_back(DAG.getValueType(AVT)); 02379 Ops.push_back(InFlag); 02380 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 02381 02382 if (TwoRepStos) { 02383 InFlag = Chain.getValue(1); 02384 Count = Op.getOperand(3); 02385 MVT::ValueType CVT = Count.getValueType(); 02386 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 02387 DAG.getConstant(3, CVT)); 02388 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 02389 InFlag = Chain.getValue(1); 02390 Tys.clear(); 02391 Tys.push_back(MVT::Other); 02392 Tys.push_back(MVT::Flag); 02393 Ops.clear(); 02394 Ops.push_back(Chain); 02395 Ops.push_back(DAG.getValueType(MVT::i8)); 02396 Ops.push_back(InFlag); 02397 Chain = DAG.getNode(X86ISD::REP_STOS, Tys, Ops); 02398 } else if (BytesLeft) { 02399 // Issue stores for the last 1 - 3 bytes. 02400 SDOperand Value; 02401 unsigned Val = ValC->getValue() & 255; 02402 unsigned Offset = I->getValue() - BytesLeft; 02403 SDOperand DstAddr = Op.getOperand(1); 02404 MVT::ValueType AddrVT = DstAddr.getValueType(); 02405 if (BytesLeft >= 2) { 02406 Value = DAG.getConstant((Val << 8) | Val, MVT::i16); 02407 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 02408 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 02409 DAG.getConstant(Offset, AddrVT)), 02410 DAG.getSrcValue(NULL)); 02411 BytesLeft -= 2; 02412 Offset += 2; 02413 } 02414 02415 if (BytesLeft == 1) { 02416 Value = DAG.getConstant(Val, MVT::i8); 02417 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 02418 DAG.getNode(ISD::ADD, AddrVT, DstAddr, 02419 DAG.getConstant(Offset, AddrVT)), 02420 DAG.getSrcValue(NULL)); 02421 } 02422 } 02423 02424 return Chain; 02425 } 02426 case ISD::MEMCPY: { 02427 SDOperand Chain = Op.getOperand(0); 02428 unsigned Align = 02429 (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); 02430 if (Align == 0) Align = 1; 02431 02432 ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); 02433 // If not DWORD aligned, call memcpy if size is less than the threshold. 02434 // It knows how to align to the right boundary first. 02435 if ((Align & 3) != 0 || 02436 (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { 02437 MVT::ValueType IntPtr = getPointerTy(); 02438 const Type *IntPtrTy = getTargetData().getIntPtrType(); 02439 std::vector<std::pair<SDOperand, const Type*> > Args; 02440 Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy)); 02441 Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy)); 02442 Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy)); 02443 std::pair<SDOperand,SDOperand> CallResult = 02444 LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false, 02445 DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); 02446 return CallResult.second; 02447 } 02448 02449 MVT::ValueType AVT; 02450 SDOperand Count; 02451 unsigned BytesLeft = 0; 02452 bool TwoRepMovs = false; 02453 switch (Align & 3) { 02454 case 2: // WORD aligned 02455 AVT = MVT::i16; 02456 Count = DAG.getConstant(I->getValue() / 2, MVT::i32); 02457 BytesLeft = I->getValue() % 2; 02458 break; 02459 case 0: // DWORD aligned 02460 AVT = MVT::i32; 02461 if (I) { 02462 Count = DAG.getConstant(I->getValue() / 4, MVT::i32); 02463 BytesLeft = I->getValue() % 4; 02464 } else { 02465 Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), 02466 DAG.getConstant(2, MVT::i8)); 02467 TwoRepMovs = true; 02468 } 02469 break; 02470 default: // Byte aligned 02471 AVT = MVT::i8; 02472 Count = Op.getOperand(3); 02473 break; 02474 } 02475 02476 SDOperand InFlag(0, 0); 02477 Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); 02478 InFlag = Chain.getValue(1); 02479 Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); 02480 InFlag = Chain.getValue(1); 02481 Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); 02482 InFlag = Chain.getValue(1); 02483 02484 std::vector<MVT::ValueType> Tys; 02485 Tys.push_back(MVT::Other); 02486 Tys.push_back(MVT::Flag); 02487 std::vector<SDOperand> Ops; 02488 Ops.push_back(Chain); 02489 Ops.push_back(DAG.getValueType(AVT)); 02490 Ops.push_back(InFlag); 02491 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 02492 02493 if (TwoRepMovs) { 02494 InFlag = Chain.getValue(1); 02495 Count = Op.getOperand(3); 02496 MVT::ValueType CVT = Count.getValueType(); 02497 SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, 02498 DAG.getConstant(3, CVT)); 02499 Chain = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag); 02500 InFlag = Chain.getValue(1); 02501 Tys.clear(); 02502 Tys.push_back(MVT::Other); 02503 Tys.push_back(MVT::Flag); 02504 Ops.clear(); 02505 Ops.push_back(Chain); 02506 Ops.push_back(DAG.getValueType(MVT::i8)); 02507 Ops.push_back(InFlag); 02508 Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops); 02509 } else if (BytesLeft) { 02510 // Issue loads and stores for the last 1 - 3 bytes. 02511 unsigned Offset = I->getValue() - BytesLeft; 02512 SDOperand DstAddr = Op.getOperand(1); 02513 MVT::ValueType DstVT = DstAddr.getValueType(); 02514 SDOperand SrcAddr = Op.getOperand(2); 02515 MVT::ValueType SrcVT = SrcAddr.getValueType(); 02516 SDOperand Value; 02517 if (BytesLeft >= 2) { 02518 Value = DAG.getLoad(MVT::i16, Chain, 02519 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 02520 DAG.getConstant(Offset, SrcVT)), 02521 DAG.getSrcValue(NULL)); 02522 Chain = Value.getValue(1); 02523 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 02524 DAG.getNode(ISD::ADD, DstVT, DstAddr, 02525 DAG.getConstant(Offset, DstVT)), 02526 DAG.getSrcValue(NULL)); 02527 BytesLeft -= 2; 02528 Offset += 2; 02529 } 02530 02531 if (BytesLeft == 1) { 02532 Value = DAG.getLoad(MVT::i8, Chain, 02533 DAG.getNode(ISD::ADD, SrcVT, SrcAddr, 02534 DAG.getConstant(Offset, SrcVT)), 02535 DAG.getSrcValue(NULL)); 02536 Chain = Value.getValue(1); 02537 Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 02538 DAG.getNode(ISD::ADD, DstVT, DstAddr, 02539 DAG.getConstant(Offset, DstVT)), 02540 DAG.getSrcValue(NULL)); 02541 } 02542 } 02543 02544 return Chain; 02545 } 02546 02547 // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their 02548 // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 02549 // one of the above mentioned nodes. It has to be wrapped because otherwise 02550 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 02551 // be used to form addressing mode. These wrapped nodes will be selected 02552 // into MOV32ri. 02553 case ISD::ConstantPool: { 02554 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 02555 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 02556 DAG.getTargetConstantPool(CP->get(), getPointerTy(), 02557 CP->getAlignment())); 02558 if (Subtarget->isTargetDarwin()) { 02559 // With PIC, the address is actually $g + Offset. 02560 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 02561 Result = DAG.getNode(ISD::ADD, getPointerTy(), 02562 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 02563 } 02564 02565 return Result; 02566 } 02567 case ISD::GlobalAddress: { 02568 GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 02569 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 02570 DAG.getTargetGlobalAddress(GV, getPointerTy())); 02571 if (Subtarget->isTargetDarwin()) { 02572 // With PIC, the address is actually $g + Offset. 02573 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 02574 Result = DAG.getNode(ISD::ADD, getPointerTy(), 02575 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 02576 02577 // For Darwin, external and weak symbols are indirect, so we want to load 02578 // the value at address GV, not the value of GV itself. This means that 02579 // the GlobalAddress must be in the base or index register of the address, 02580 // not the GV offset field. 02581 if (getTargetMachine().getRelocationModel() != Reloc::Static && 02582 DarwinGVRequiresExtraLoad(GV)) 02583 Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), 02584 Result, DAG.getSrcValue(NULL)); 02585 } 02586 02587 return Result; 02588 } 02589 case ISD::ExternalSymbol: { 02590 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 02591 SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), 02592 DAG.getTargetExternalSymbol(Sym, getPointerTy())); 02593 if (Subtarget->isTargetDarwin()) { 02594 // With PIC, the address is actually $g + Offset. 02595 if (getTargetMachine().getRelocationModel() == Reloc::PIC) 02596 Result = DAG.getNode(ISD::ADD, getPointerTy(), 02597 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result); 02598 } 02599 02600 return Result; 02601 } 02602 case ISD::VASTART: { 02603 // vastart just stores the address of the VarArgsFrameIndex slot into the 02604 // memory location argument. 02605 // FIXME: Replace MVT::i32 with PointerTy 02606 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 02607 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 02608 Op.getOperand(1), Op.getOperand(2)); 02609 } 02610 case ISD::RET: { 02611 SDOperand Copy; 02612 02613 switch(Op.getNumOperands()) { 02614 default: 02615 assert(0 && "Do not know how to return this many arguments!"); 02616 abort(); 02617 case 1: 02618 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), 02619 DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); 02620 case 2: { 02621 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 02622 if (MVT::isInteger(ArgVT)) 02623 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1), 02624 SDOperand()); 02625 else if (!X86ScalarSSE) { 02626 std::vector<MVT::ValueType> Tys; 02627 Tys.push_back(MVT::Other); 02628 Tys.push_back(MVT::Flag); 02629 std::vector<SDOperand> Ops; 02630 Ops.push_back(Op.getOperand(0)); 02631 Ops.push_back(Op.getOperand(1)); 02632 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 02633 } else { 02634 SDOperand MemLoc; 02635 SDOperand Chain = Op.getOperand(0); 02636 SDOperand Value = Op.getOperand(1); 02637 02638 if (Value.getOpcode() == ISD::LOAD && 02639 (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) { 02640 Chain = Value.getOperand(0); 02641 MemLoc = Value.getOperand(1); 02642 } else { 02643 // Spill the value to memory and reload it into top of stack. 02644 unsigned Size = MVT::getSizeInBits(ArgVT)/8; 02645 MachineFunction &MF = DAG.getMachineFunction(); 02646 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); 02647 MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); 02648 Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 02649 Value, MemLoc, DAG.getSrcValue(0)); 02650 } 02651 std::vector<MVT::ValueType> Tys; 02652 Tys.push_back(MVT::f64); 02653 Tys.push_back(MVT::Other); 02654 std::vector<SDOperand> Ops; 02655 Ops.push_back(Chain); 02656 Ops.push_back(MemLoc); 02657 Ops.push_back(DAG.getValueType(ArgVT)); 02658 Copy = DAG.getNode(X86ISD::FLD, Tys, Ops); 02659 Tys.clear(); 02660 Tys.push_back(MVT::Other); 02661 Tys.push_back(MVT::Flag); 02662 Ops.clear(); 02663 Ops.push_back(Copy.getValue(1)); 02664 Ops.push_back(Copy); 02665 Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops); 02666 } 02667 break; 02668 } 02669 case 3: 02670 Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 02671 SDOperand()); 02672 Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1)); 02673 break; 02674 } 02675 return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, 02676 Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), 02677 Copy.getValue(1)); 02678 } 02679 case ISD::SCALAR_TO_VECTOR: { 02680 SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); 02681 return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); 02682 } 02683 case ISD::VECTOR_SHUFFLE: { 02684 SDOperand V1 = Op.getOperand(0); 02685 SDOperand V2 = Op.getOperand(1); 02686 SDOperand PermMask = Op.getOperand(2); 02687 MVT::ValueType VT = Op.getValueType(); 02688 unsigned NumElems = PermMask.getNumOperands(); 02689 02690 if (X86::isSplatMask(PermMask.Val)) 02691 return Op; 02692 02693 // Normalize the node to match x86 shuffle ops if needed 02694 if (V2.getOpcode() != ISD::UNDEF) { 02695 bool DoSwap = false; 02696 02697 if (ShouldXformedToMOVLP(V1, V2, PermMask)) 02698 DoSwap = true; 02699 else if (isLowerFromV2UpperFromV1(PermMask)) 02700 DoSwap = true; 02701 02702 if (DoSwap) { 02703 Op = CommuteVectorShuffle(Op, DAG); 02704 V1 = Op.getOperand(0); 02705 V2 = Op.getOperand(1); 02706 PermMask = Op.getOperand(2); 02707 } 02708 } 02709 02710 if (NumElems == 2) 02711 return Op; 02712 02713 if (X86::isMOVSMask(PermMask.Val)) 02714 // Leave the VECTOR_SHUFFLE alone. It matches MOVS{S|D}. 02715 return Op; 02716 02717 if (X86::isUNPCKLMask(PermMask.Val) || 02718 X86::isUNPCKL_v_undef_Mask(PermMask.Val) || 02719 X86::isUNPCKHMask(PermMask.Val)) 02720 // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. 02721 return Op; 02722 02723 // If VT is integer, try PSHUF* first, then SHUFP*. 02724 if (MVT::isInteger(VT)) { 02725 if (X86::isPSHUFDMask(PermMask.Val) || 02726 X86::isPSHUFHWMask(PermMask.Val) || 02727 X86::isPSHUFLWMask(PermMask.Val)) { 02728 if (V2.getOpcode() != ISD::UNDEF) 02729 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 02730 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 02731 return Op; 02732 } 02733 02734 if (X86::isSHUFPMask(PermMask.Val)) 02735 return Op; 02736 02737 // Handle v8i16 shuffle high / low shuffle node pair. 02738 if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { 02739 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02740 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 02741 std::vector<SDOperand> MaskVec; 02742 for (unsigned i = 0; i != 4; ++i) 02743 MaskVec.push_back(PermMask.getOperand(i)); 02744 for (unsigned i = 4; i != 8; ++i) 02745 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 02746 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02747 V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 02748 MaskVec.clear(); 02749 for (unsigned i = 0; i != 4; ++i) 02750 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 02751 for (unsigned i = 4; i != 8; ++i) 02752 MaskVec.push_back(PermMask.getOperand(i)); 02753 Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02754 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask); 02755 } 02756 } else { 02757 // Floating point cases in the other order. 02758 if (X86::isSHUFPMask(PermMask.Val)) 02759 return Op; 02760 if (X86::isPSHUFDMask(PermMask.Val) || 02761 X86::isPSHUFHWMask(PermMask.Val) || 02762 X86::isPSHUFLWMask(PermMask.Val)) { 02763 if (V2.getOpcode() != ISD::UNDEF) 02764 return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, 02765 DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); 02766 return Op; 02767 } 02768 } 02769 02770 return SDOperand(); 02771 } 02772 case ISD::BUILD_VECTOR: { 02773 // All one's are handled with pcmpeqd. 02774 if (ISD::isBuildVectorAllOnes(Op.Val)) 02775 return Op; 02776 02777 std::set<SDOperand> Values; 02778 SDOperand Elt0 = Op.getOperand(0); 02779 Values.insert(Elt0); 02780 bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && 02781 cast<ConstantSDNode>(Elt0)->getValue() == 0) || 02782 (isa<ConstantFPSDNode>(Elt0) && 02783 cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0)); 02784 bool RestAreZero = true; 02785 unsigned NumElems = Op.getNumOperands(); 02786 for (unsigned i = 1; i < NumElems; ++i) { 02787 SDOperand Elt = Op.getOperand(i); 02788 if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) { 02789 if (!FPC->isExactlyValue(+0.0)) 02790 RestAreZero = false; 02791 } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 02792 if (!C->isNullValue()) 02793 RestAreZero = false; 02794 } else 02795 RestAreZero = false; 02796 Values.insert(Elt); 02797 } 02798 02799 if (RestAreZero) { 02800 if (Elt0IsZero) return Op; 02801 02802 // Zero extend a scalar to a vector. 02803 return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); 02804 } 02805 02806 if (Values.size() > 2) { 02807 // Expand into a number of unpckl*. 02808 // e.g. for v4f32 02809 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 02810 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 02811 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 02812 MVT::ValueType VT = Op.getValueType(); 02813 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); 02814 MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); 02815 std::vector<SDOperand> MaskVec; 02816 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 02817 MaskVec.push_back(DAG.getConstant(i, BaseVT)); 02818 MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); 02819 } 02820 SDOperand PermMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); 02821 std::vector<SDOperand> V(NumElems); 02822 for (unsigned i = 0; i < NumElems; ++i) 02823 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); 02824 NumElems >>= 1; 02825 while (NumElems != 0) { 02826 for (unsigned i = 0; i < NumElems; ++i) 02827 V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], 02828 PermMask); 02829 NumElems >>= 1; 02830 } 02831 return V[0]; 02832 } 02833 02834 return SDOperand(); 02835 } 02836 case ISD::EXTRACT_VECTOR_ELT: { 02837 if (!isa<ConstantSDNode>(Op.getOperand(1))) 02838 return SDOperand(); 02839 02840 MVT::ValueType VT = Op.getValueType(); 02841 // TODO: handle v16i8. 02842 if (MVT::getSizeInBits(VT) == 16) { 02843 // Transform it so it match pextrw which produces a 32-bit result. 02844 MVT::ValueType EVT = (MVT::ValueType)(VT+1); 02845 SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, 02846 Op.getOperand(0), Op.getOperand(1)); 02847 SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, 02848 DAG.getValueType(VT)); 02849 return DAG.getNode(ISD::TRUNCATE, VT, Assert); 02850 } else if (MVT::getSizeInBits(VT) == 32) { 02851 SDOperand Vec = Op.getOperand(0); 02852 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 02853 if (Idx == 0) 02854 return Op; 02855 02856 // TODO: if Idex == 2, we can use unpckhps 02857 // SHUFPS the element to the lowest double word, then movss. 02858 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 02859 SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, 02860 MVT::getVectorBaseType(MaskVT)); 02861 std::vector<SDOperand> IdxVec; 02862 IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); 02863 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 02864 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 02865 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 02866 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 02867 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 02868 Vec, Vec, Mask); 02869 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 02870 DAG.getConstant(0, MVT::i32)); 02871 } else if (MVT::getSizeInBits(VT) == 64) { 02872 SDOperand Vec = Op.getOperand(0); 02873 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 02874 if (Idx == 0) 02875 return Op; 02876 02877 // UNPCKHPD the element to the lowest double word, then movsd. 02878 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 02879 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 02880 MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); 02881 std::vector<SDOperand> IdxVec; 02882 IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); 02883 IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); 02884 SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); 02885 Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), 02886 Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); 02887 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, 02888 DAG.getConstant(0, MVT::i32)); 02889 } 02890 02891 return SDOperand(); 02892 } 02893 case ISD::INSERT_VECTOR_ELT: { 02894 // Transform it so it match pinsrw which expects a 16-bit value in a R32 02895 // as its second argument. 02896 MVT::ValueType VT = Op.getValueType(); 02897 MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); 02898 if (MVT::getSizeInBits(BaseVT) == 16) { 02899 SDOperand N1 = Op.getOperand(1); 02900 SDOperand N2 = Op.getOperand(2); 02901 if (N1.getValueType() != MVT::i32) 02902 N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); 02903 if (N2.getValueType() != MVT::i32) 02904 N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32); 02905 return DAG.getNode(X86ISD::PINSRW, VT, Op.getOperand(0), N1, N2); 02906 } 02907 02908 return SDOperand(); 02909 } 02910 case ISD::INTRINSIC_WO_CHAIN: { 02911 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 02912 switch (IntNo) { 02913 default: return SDOperand(); // Don't custom lower most intrinsics. 02914 // Comparison intrinsics. 02915 case Intrinsic::x86_sse_comieq_ss: 02916 case Intrinsic::x86_sse_comilt_ss: 02917 case Intrinsic::x86_sse_comile_ss: 02918 case Intrinsic::x86_sse_comigt_ss: 02919 case Intrinsic::x86_sse_comige_ss: 02920 case Intrinsic::x86_sse_comineq_ss: 02921 case Intrinsic::x86_sse_ucomieq_ss: 02922 case Intrinsic::x86_sse_ucomilt_ss: 02923 case Intrinsic::x86_sse_ucomile_ss: 02924 case Intrinsic::x86_sse_ucomigt_ss: 02925 case Intrinsic::x86_sse_ucomige_ss: 02926 case Intrinsic::x86_sse_ucomineq_ss: 02927 case Intrinsic::x86_sse2_comieq_sd: 02928 case Intrinsic::x86_sse2_comilt_sd: 02929 case Intrinsic::x86_sse2_comile_sd: 02930 case Intrinsic::x86_sse2_comigt_sd: 02931 case Intrinsic::x86_sse2_comige_sd: 02932 case Intrinsic::x86_sse2_comineq_sd: 02933 case Intrinsic::x86_sse2_ucomieq_sd: 02934 case Intrinsic::x86_sse2_ucomilt_sd: 02935 case Intrinsic::x86_sse2_ucomile_sd: 02936 case Intrinsic::x86_sse2_ucomigt_sd: 02937 case Intrinsic::x86_sse2_ucomige_sd: 02938 case Intrinsic::x86_sse2_ucomineq_sd: { 02939 unsigned Opc = 0; 02940 ISD::CondCode CC = ISD::SETCC_INVALID; 02941 switch (IntNo) { 02942 default: break; 02943 case Intrinsic::x86_sse_comieq_ss: 02944 case Intrinsic::x86_sse2_comieq_sd: 02945 Opc = X86ISD::COMI; 02946 CC = ISD::SETEQ; 02947 break; 02948 case Intrinsic::x86_sse_comilt_ss: 02949 case Intrinsic::x86_sse2_comilt_sd: 02950 Opc = X86ISD::COMI; 02951 CC = ISD::SETLT; 02952 break; 02953 case Intrinsic::x86_sse_comile_ss: 02954 case Intrinsic::x86_sse2_comile_sd: 02955 Opc = X86ISD::COMI; 02956 CC = ISD::SETLE; 02957 break; 02958 case Intrinsic::x86_sse_comigt_ss: 02959 case Intrinsic::x86_sse2_comigt_sd: 02960 Opc = X86ISD::COMI; 02961 CC = ISD::SETGT; 02962 break; 02963 case Intrinsic::x86_sse_comige_ss: 02964 case Intrinsic::x86_sse2_comige_sd: 02965 Opc = X86ISD::COMI; 02966 CC = ISD::SETGE; 02967 break; 02968 case Intrinsic::x86_sse_comineq_ss: 02969 case Intrinsic::x86_sse2_comineq_sd: 02970 Opc = X86ISD::COMI; 02971 CC = ISD::SETNE; 02972 break; 02973 case Intrinsic::x86_sse_ucomieq_ss: 02974 case Intrinsic::x86_sse2_ucomieq_sd: 02975 Opc = X86ISD::UCOMI; 02976 CC = ISD::SETEQ; 02977 break; 02978 case Intrinsic::x86_sse_ucomilt_ss: 02979 case Intrinsic::x86_sse2_ucomilt_sd: 02980 Opc = X86ISD::UCOMI; 02981 CC = ISD::SETLT; 02982 break; 02983 case Intrinsic::x86_sse_ucomile_ss: 02984 case Intrinsic::x86_sse2_ucomile_sd: 02985 Opc = X86ISD::UCOMI; 02986 CC = ISD::SETLE; 02987 break; 02988 case Intrinsic::x86_sse_ucomigt_ss: 02989 case Intrinsic::x86_sse2_ucomigt_sd: 02990 Opc = X86ISD::UCOMI; 02991 CC = ISD::SETGT; 02992 break; 02993 case Intrinsic::x86_sse_ucomige_ss: 02994 case Intrinsic::x86_sse2_ucomige_sd: 02995 Opc = X86ISD::UCOMI; 02996 CC = ISD::SETGE; 02997 break; 02998 case Intrinsic::x86_sse_ucomineq_ss: 02999 case Intrinsic::x86_sse2_ucomineq_sd: 03000 Opc = X86ISD::UCOMI; 03001 CC = ISD::SETNE; 03002 break; 03003 } 03004 bool Flip; 03005 unsigned X86CC; 03006 translateX86CC(CC, true, X86CC, Flip); 03007 SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1), 03008 Op.getOperand(Flip?1:2)); 03009 SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 03010 DAG.getConstant(X86CC, MVT::i8), Cond); 03011 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); 03012 } 03013 } 03014 } 03015 } 03016 } 03017 03018 const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 03019 switch (Opcode) { 03020 default: return NULL; 03021 case X86ISD::SHLD: return "X86ISD::SHLD"; 03022 case X86ISD::SHRD: return "X86ISD::SHRD"; 03023 case X86ISD::FAND: return "X86ISD::FAND"; 03024 case X86ISD::FXOR: return "X86ISD::FXOR"; 03025 case X86ISD::FILD: return "X86ISD::FILD"; 03026 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 03027 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 03028 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 03029 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 03030 case X86ISD::FLD: return "X86ISD::FLD"; 03031 case X86ISD::FST: return "X86ISD::FST"; 03032 case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; 03033 case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; 03034 case X86ISD::CALL: return "X86ISD::CALL"; 03035 case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; 03036 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 03037 case X86ISD::CMP: return "X86ISD::CMP"; 03038 case X86ISD::TEST: return "X86ISD::TEST"; 03039 case X86ISD::COMI: return "X86ISD::COMI"; 03040 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 03041 case X86ISD::SETCC: return "X86ISD::SETCC"; 03042 case X86ISD::CMOV: return "X86ISD::CMOV"; 03043 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 03044 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 03045 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 03046 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 03047 case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; 03048 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 03049 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 03050 case X86ISD::S2VEC: return "X86ISD::S2VEC"; 03051 case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; 03052 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 03053 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 03054 } 03055 } 03056 03057 void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 03058 uint64_t Mask, 03059 uint64_t &KnownZero, 03060 uint64_t &KnownOne, 03061 unsigned Depth) const { 03062 unsigned Opc = Op.getOpcode(); 03063 assert((Opc >= ISD::BUILTIN_OP_END || 03064 Opc == ISD::INTRINSIC_WO_CHAIN || 03065 Opc == ISD::INTRINSIC_W_CHAIN || 03066 Opc == ISD::INTRINSIC_VOID) && 03067 "Should use MaskedValueIsZero if you don't know whether Op" 03068 " is a target node!"); 03069 03070 KnownZero = KnownOne = 0; // Don't know anything. 03071 switch (Opc) { 03072 default: break; 03073 case X86ISD::SETCC: 03074 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 03075 break; 03076 } 03077 } 03078 03079 std::vector<unsigned> X86TargetLowering:: 03080 getRegClassForInlineAsmConstraint(const std::string &Constraint, 03081 MVT::ValueType VT) const { 03082 if (Constraint.size() == 1) { 03083 // FIXME: not handling fp-stack yet! 03084 // FIXME: not handling MMX registers yet ('y' constraint). 03085 switch (Constraint[0]) { // GCC X86 Constraint Letters 03086 default: break; // Unknown constriant letter 03087 case 'r': // GENERAL_REGS 03088 case 'R': // LEGACY_REGS 03089 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 03090 X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); 03091 case 'l': // INDEX_REGS 03092 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 03093 X86::ESI, X86::EDI, X86::EBP, 0); 03094 case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) 03095 case 'Q': // Q_REGS 03096 return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0); 03097 case 'x': // SSE_REGS if SSE1 allowed 03098 if (Subtarget->hasSSE1()) 03099 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 03100 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 03101 0); 03102 return std::vector<unsigned>(); 03103 case 'Y': // SSE_REGS if SSE2 allowed 03104 if (Subtarget->hasSSE2()) 03105 return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 03106 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, 03107 0); 03108 return std::vector<unsigned>(); 03109 } 03110 } 03111 03112 return std::vector<unsigned>(); 03113 } 03114 03115 /// isLegalAddressImmediate - Return true if the integer value or 03116 /// GlobalValue can be used as the offset of the target addressing mode. 03117 bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { 03118 // X86 allows a sign-extended 32-bit immediate field. 03119 return (V > -(1LL << 32) && V < (1LL << 32)-1); 03120 } 03121 03122 bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 03123 if (Subtarget->isTargetDarwin()) { 03124 Reloc::Model RModel = getTargetMachine().getRelocationModel(); 03125 if (RModel == Reloc::Static) 03126 return true; 03127 else if (RModel == Reloc::DynamicNoPIC) 03128 return !DarwinGVRequiresExtraLoad(GV); 03129 else 03130 return false; 03131 } else 03132 return true; 03133 } 03134 03135 /// isShuffleMaskLegal - Targets can use this to indicate that they only 03136 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 03137 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 03138 /// are assumed to be legal. 03139 bool 03140 X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { 03141 // Only do shuffles on 128-bit vector types for now. 03142 if (MVT::getSizeInBits(VT) == 64) return false; 03143 return (Mask.Val->getNumOperands() == 2 || 03144 X86::isSplatMask(Mask.Val) || 03145 X86::isMOVSMask(Mask.Val) || 03146 X86::isPSHUFDMask(Mask.Val) || 03147 isPSHUFHW_PSHUFLWMask(Mask.Val) || 03148 X86::isSHUFPMask(Mask.Val) || 03149 X86::isUNPCKLMask(Mask.Val) || 03150 X86::isUNPCKL_v_undef_Mask(Mask.Val) || 03151 X86::isUNPCKHMask(Mask.Val)); 03152 }