LLVM API Documentation

X86ISelLowering.cpp

Go to the documentation of this file.
00001 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by Chris Lattner and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that X86 uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86.h"
00016 #include "X86InstrBuilder.h"
00017 #include "X86ISelLowering.h"
00018 #include "X86MachineFunctionInfo.h"
00019 #include "X86TargetMachine.h"
00020 #include "llvm/CallingConv.h"
00021 #include "llvm/Constants.h"
00022 #include "llvm/DerivedTypes.h"
00023 #include "llvm/Function.h"
00024 #include "llvm/Intrinsics.h"
00025 #include "llvm/ADT/VectorExtras.h"
00026 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
00027 #include "llvm/CodeGen/MachineFrameInfo.h"
00028 #include "llvm/CodeGen/MachineFunction.h"
00029 #include "llvm/CodeGen/MachineInstrBuilder.h"
00030 #include "llvm/CodeGen/SelectionDAG.h"
00031 #include "llvm/CodeGen/SSARegMap.h"
00032 #include "llvm/Support/MathExtras.h"
00033 #include "llvm/Target/TargetOptions.h"
00034 using namespace llvm;
00035 
00036 // FIXME: temporary.
00037 #include "llvm/Support/CommandLine.h"
00038 static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
00039                                   cl::desc("Enable fastcc on X86"));
00040 
00041 X86TargetLowering::X86TargetLowering(TargetMachine &TM)
00042   : TargetLowering(TM) {
00043   Subtarget = &TM.getSubtarget<X86Subtarget>();
00044   X86ScalarSSE = Subtarget->hasSSE2();
00045 
00046   // Set up the TargetLowering object.
00047 
00048   // X86 is weird, it always uses i8 for shift amounts and setcc results.
00049   setShiftAmountType(MVT::i8);
00050   setSetCCResultType(MVT::i8);
00051   setSetCCResultContents(ZeroOrOneSetCCResult);
00052   setSchedulingPreference(SchedulingForRegPressure);
00053   setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
00054   setStackPointerRegisterToSaveRestore(X86::ESP);
00055 
00056   if (!Subtarget->isTargetDarwin())
00057     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
00058     setUseUnderscoreSetJmpLongJmp(true);
00059     
00060   // Add legal addressing mode scale values.
00061   addLegalAddressScale(8);
00062   addLegalAddressScale(4);
00063   addLegalAddressScale(2);
00064   // Enter the ones which require both scale + index last. These are more
00065   // expensive.
00066   addLegalAddressScale(9);
00067   addLegalAddressScale(5);
00068   addLegalAddressScale(3);
00069   
00070   // Set up the register classes.
00071   addRegisterClass(MVT::i8, X86::GR8RegisterClass);
00072   addRegisterClass(MVT::i16, X86::GR16RegisterClass);
00073   addRegisterClass(MVT::i32, X86::GR32RegisterClass);
00074 
00075   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
00076   // operation.
00077   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
00078   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
00079   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
00080 
00081   if (X86ScalarSSE)
00082     // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
00083     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Expand);
00084   else
00085     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
00086 
00087   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
00088   // this operation.
00089   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
00090   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
00091   // SSE has no i16 to fp conversion, only i32
00092   if (X86ScalarSSE)
00093     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00094   else {
00095     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
00096     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00097   }
00098 
00099   // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
00100   // isn't legal.
00101   setOperationAction(ISD::SINT_TO_FP       , MVT::i64  , Custom);
00102   setOperationAction(ISD::FP_TO_SINT       , MVT::i64  , Custom);
00103 
00104   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
00105   // this operation.
00106   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
00107   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
00108 
00109   if (X86ScalarSSE) {
00110     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00111   } else {
00112     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
00113     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00114   }
00115 
00116   // Handle FP_TO_UINT by promoting the destination to a larger signed
00117   // conversion.
00118   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
00119   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
00120   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
00121 
00122   if (X86ScalarSSE && !Subtarget->hasSSE3())
00123     // Expand FP_TO_UINT into a select.
00124     // FIXME: We would like to use a Custom expander here eventually to do
00125     // the optimal thing for SSE vs. the default expansion in the legalizer.
00126     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Expand);
00127   else
00128     // With SSE3 we can use fisttpll to convert to a signed i64.
00129     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
00130 
00131   setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
00132   setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
00133 
00134   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
00135   setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
00136   setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
00137   setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
00138   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
00139   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
00140   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
00141   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
00142   setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
00143   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
00144   setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
00145   setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
00146   setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
00147   setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
00148   setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
00149   setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
00150   setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
00151   setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
00152   setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
00153   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
00154   setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
00155 
00156   // These should be promoted to a larger select which is supported.
00157   setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
00158   setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
00159 
00160   // X86 wants to expand cmov itself.
00161   setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
00162   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
00163   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
00164   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
00165   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
00166   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
00167   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
00168   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
00169   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
00170   // X86 ret instruction may pop stack.
00171   setOperationAction(ISD::RET             , MVT::Other, Custom);
00172   // Darwin ABI issue.
00173   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
00174   setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
00175   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
00176   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
00177   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
00178   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
00179   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
00180   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
00181   // X86 wants to expand memset / memcpy itself.
00182   setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
00183   setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
00184 
00185   // We don't have line number support yet.
00186   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
00187   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
00188   // FIXME - use subtarget debug flags
00189   if (!Subtarget->isTargetDarwin())
00190     setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
00191 
00192   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00193   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00194   
00195   // Use the default implementation.
00196   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
00197   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
00198   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00199   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand); 
00200   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00201   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
00202 
00203   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00204   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00205 
00206   if (X86ScalarSSE) {
00207     // Set up the FP register classes.
00208     addRegisterClass(MVT::f32, X86::FR32RegisterClass);
00209     addRegisterClass(MVT::f64, X86::FR64RegisterClass);
00210 
00211     // Use ANDPD to simulate FABS.
00212     setOperationAction(ISD::FABS , MVT::f64, Custom);
00213     setOperationAction(ISD::FABS , MVT::f32, Custom);
00214 
00215     // Use XORP to simulate FNEG.
00216     setOperationAction(ISD::FNEG , MVT::f64, Custom);
00217     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00218 
00219     // We don't support sin/cos/fmod
00220     setOperationAction(ISD::FSIN , MVT::f64, Expand);
00221     setOperationAction(ISD::FCOS , MVT::f64, Expand);
00222     setOperationAction(ISD::FREM , MVT::f64, Expand);
00223     setOperationAction(ISD::FSIN , MVT::f32, Expand);
00224     setOperationAction(ISD::FCOS , MVT::f32, Expand);
00225     setOperationAction(ISD::FREM , MVT::f32, Expand);
00226 
00227     // Expand FP immediates into loads from the stack, except for the special
00228     // cases we handle.
00229     setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
00230     setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
00231     addLegalFPImmediate(+0.0); // xorps / xorpd
00232   } else {
00233     // Set up the FP register classes.
00234     addRegisterClass(MVT::f64, X86::RFPRegisterClass);
00235     
00236     setOperationAction(ISD::UNDEF, MVT::f64, Expand);
00237     
00238     if (!UnsafeFPMath) {
00239       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
00240       setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
00241     }
00242 
00243     setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
00244     addLegalFPImmediate(+0.0); // FLD0
00245     addLegalFPImmediate(+1.0); // FLD1
00246     addLegalFPImmediate(-0.0); // FLD0/FCHS
00247     addLegalFPImmediate(-1.0); // FLD1/FCHS
00248   }
00249 
00250   // First set operation action for all vector types to expand. Then we
00251   // will selectively turn on ones that can be effectively codegen'd.
00252   for (unsigned VT = (unsigned)MVT::Vector + 1;
00253        VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
00254     setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
00255     setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
00256     setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
00257     setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
00258     setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
00259     setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
00260     setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
00261   }
00262 
00263   if (Subtarget->hasMMX()) {
00264     addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
00265     addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
00266     addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
00267 
00268     // FIXME: add MMX packed arithmetics
00269     setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
00270     setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
00271     setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
00272   }
00273 
00274   if (Subtarget->hasSSE1()) {
00275     addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
00276 
00277     setOperationAction(ISD::AND,                MVT::v4f32, Legal);
00278     setOperationAction(ISD::OR,                 MVT::v4f32, Legal);
00279     setOperationAction(ISD::XOR,                MVT::v4f32, Legal);
00280     setOperationAction(ISD::ADD,                MVT::v4f32, Legal);
00281     setOperationAction(ISD::SUB,                MVT::v4f32, Legal);
00282     setOperationAction(ISD::MUL,                MVT::v4f32, Legal);
00283     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
00284     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
00285     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
00286     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00287     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
00288   }
00289 
00290   if (Subtarget->hasSSE2()) {
00291     addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
00292     addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
00293     addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
00294     addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
00295     addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
00296 
00297     setOperationAction(ISD::ADD,                MVT::v2f64, Legal);
00298     setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
00299     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
00300     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
00301     setOperationAction(ISD::SUB,                MVT::v2f64, Legal);
00302     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
00303     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
00304     setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
00305     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
00306     setOperationAction(ISD::MUL,                MVT::v2f64, Legal);
00307 
00308     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
00309     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
00310     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00311     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
00312     // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
00313     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
00314 
00315     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
00316     for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
00317       setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
00318       setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
00319       setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
00320     }
00321     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
00322     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
00323     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
00324     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
00325     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
00326     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
00327 
00328     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 
00329     for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
00330       setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
00331       AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
00332       setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
00333       AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
00334       setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
00335       AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
00336       setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
00337       AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
00338       setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
00339       AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
00340     }
00341 
00342     // Custom lower v2i64 and v2f64 selects.
00343     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
00344     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
00345     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
00346     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
00347   }
00348 
00349   // We want to custom lower some of our intrinsics.
00350   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00351 
00352   // We have target-specific dag combine patterns for the following nodes:
00353   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
00354 
00355   computeRegisterProperties();
00356 
00357   // FIXME: These should be based on subtarget info. Plus, the values should
00358   // be smaller when we are in optimizing for size mode.
00359   maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
00360   maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
00361   maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
00362   allowUnalignedMemoryAccesses = true; // x86 supports it!
00363 }
00364 
00365 //===----------------------------------------------------------------------===//
00366 //                    C Calling Convention implementation
00367 //===----------------------------------------------------------------------===//
00368 
00369 /// AddLiveIn - This helper function adds the specified physical register to the
00370 /// MachineFunction as a live in value.  It also creates a corresponding virtual
00371 /// register for it.
00372 static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
00373                           TargetRegisterClass *RC) {
00374   assert(RC->contains(PReg) && "Not the correct regclass!");
00375   unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
00376   MF.addLiveIn(PReg, VReg);
00377   return VReg;
00378 }
00379 
00380 /// HowToPassCCCArgument - Returns how an formal argument of the specified type
00381 /// should be passed. If it is through stack, returns the size of the stack
00382 /// slot; if it is through XMM register, returns the number of XMM registers
00383 /// are needed.
00384 static void
00385 HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs,
00386                      unsigned &ObjSize, unsigned &ObjXMMRegs) {
00387   ObjXMMRegs = 0;
00388 
00389   switch (ObjectVT) {
00390   default: assert(0 && "Unhandled argument type!");
00391   case MVT::i8:  ObjSize = 1; break;
00392   case MVT::i16: ObjSize = 2; break;
00393   case MVT::i32: ObjSize = 4; break;
00394   case MVT::i64: ObjSize = 8; break;
00395   case MVT::f32: ObjSize = 4; break;
00396   case MVT::f64: ObjSize = 8; break;
00397   case MVT::v16i8:
00398   case MVT::v8i16:
00399   case MVT::v4i32:
00400   case MVT::v2i64:
00401   case MVT::v4f32:
00402   case MVT::v2f64:
00403     if (NumXMMRegs < 4)
00404       ObjXMMRegs = 1;
00405     else
00406       ObjSize = 16;
00407     break;
00408   }
00409 }
00410 
00411 SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) {
00412   unsigned NumArgs = Op.Val->getNumValues() - 1;
00413   MachineFunction &MF = DAG.getMachineFunction();
00414   MachineFrameInfo *MFI = MF.getFrameInfo();
00415   SDOperand Root = Op.getOperand(0);
00416   std::vector<SDOperand> ArgValues;
00417 
00418   // Add DAG nodes to load the arguments...  On entry to a function on the X86,
00419   // the stack frame looks like this:
00420   //
00421   // [ESP] -- return address
00422   // [ESP + 4] -- first argument (leftmost lexically)
00423   // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
00424   //    ...
00425   //
00426   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
00427   unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
00428   static const unsigned XMMArgRegs[] = {
00429     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
00430   };
00431   for (unsigned i = 0; i < NumArgs; ++i) {
00432     MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
00433     unsigned ArgIncrement = 4;
00434     unsigned ObjSize = 0;
00435     unsigned ObjXMMRegs = 0;
00436     HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs);
00437     if (ObjSize > 4)
00438       ArgIncrement = ObjSize;
00439 
00440     SDOperand ArgValue;
00441     if (ObjXMMRegs) {
00442       // Passed in a XMM register.
00443       unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
00444                                  X86::VR128RegisterClass);
00445       ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT);
00446       ArgValues.push_back(ArgValue);
00447       NumXMMRegs += ObjXMMRegs;
00448     } else {
00449       // XMM arguments have to be aligned on 16-byte boundary.
00450       if (ObjSize == 16)
00451         ArgOffset = ((ArgOffset + 15) / 16) * 16;
00452       // Create the frame index object for this incoming parameter...
00453       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
00454       SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
00455       ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
00456                              DAG.getSrcValue(NULL));
00457       ArgValues.push_back(ArgValue);
00458       ArgOffset += ArgIncrement;   // Move on to the next argument...
00459     }
00460   }
00461 
00462   ArgValues.push_back(Root);
00463 
00464   // If the function takes variable number of arguments, make a frame index for
00465   // the start of the first vararg value... for expansion of llvm.va_start.
00466   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
00467   if (isVarArg)
00468     VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
00469   ReturnAddrIndex = 0;     // No return address slot generated yet.
00470   BytesToPopOnReturn = 0;  // Callee pops nothing.
00471   BytesCallerReserves = ArgOffset;
00472 
00473   // If this is a struct return on Darwin/X86, the callee pops the hidden struct
00474   // pointer.
00475   if (MF.getFunction()->getCallingConv() == CallingConv::CSRet &&
00476       Subtarget->isTargetDarwin())
00477     BytesToPopOnReturn = 4;
00478 
00479   // Return the new list of results.
00480   std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
00481                                      Op.Val->value_end());
00482   return DAG.getNode(ISD::MERGE_VALUES, RetVTs, ArgValues);
00483 }
00484 
00485 
00486 SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) {
00487   SDOperand Chain     = Op.getOperand(0);
00488   unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
00489   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
00490   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
00491   SDOperand Callee    = Op.getOperand(4);
00492   MVT::ValueType RetVT= Op.Val->getValueType(0);
00493   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
00494 
00495   // Keep track of the number of XMM regs passed so far.
00496   unsigned NumXMMRegs = 0;
00497   static const unsigned XMMArgRegs[] = {
00498     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
00499   };
00500 
00501   // Count how many bytes are to be pushed on the stack.
00502   unsigned NumBytes = 0;
00503   for (unsigned i = 0; i != NumOps; ++i) {
00504     SDOperand Arg = Op.getOperand(5+2*i);
00505 
00506     switch (Arg.getValueType()) {
00507     default: assert(0 && "Unexpected ValueType for argument!");
00508     case MVT::i8:
00509     case MVT::i16:
00510     case MVT::i32:
00511     case MVT::f32:
00512       NumBytes += 4;
00513       break;
00514     case MVT::i64:
00515     case MVT::f64:
00516       NumBytes += 8;
00517       break;
00518     case MVT::v16i8:
00519     case MVT::v8i16:
00520     case MVT::v4i32:
00521     case MVT::v2i64:
00522     case MVT::v4f32:
00523     case MVT::v2f64:
00524       if (NumXMMRegs < 4)
00525         ++NumXMMRegs;
00526       else {
00527         // XMM arguments have to be aligned on 16-byte boundary.
00528         NumBytes = ((NumBytes + 15) / 16) * 16;
00529         NumBytes += 16;
00530       }
00531       break;
00532     }
00533   }
00534 
00535   Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
00536 
00537   // Arguments go on the stack in reverse order, as specified by the ABI.
00538   unsigned ArgOffset = 0;
00539   NumXMMRegs = 0;
00540   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
00541   std::vector<SDOperand> MemOpChains;
00542   SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy());
00543   for (unsigned i = 0; i != NumOps; ++i) {
00544     SDOperand Arg = Op.getOperand(5+2*i);
00545 
00546     switch (Arg.getValueType()) {
00547     default: assert(0 && "Unexpected ValueType for argument!");
00548     case MVT::i8:
00549     case MVT::i16: {
00550       // Promote the integer to 32 bits.  If the input type is signed use a
00551       // sign extend, otherwise use a zero extend.
00552       unsigned ExtOp =
00553         dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ?
00554         ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
00555       Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
00556     }
00557     // Fallthrough
00558 
00559     case MVT::i32:
00560     case MVT::f32: {
00561       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
00562       PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
00563       MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
00564                                         Arg, PtrOff, DAG.getSrcValue(NULL)));
00565       ArgOffset += 4;
00566       break;
00567     }
00568     case MVT::i64:
00569     case MVT::f64: {
00570       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
00571       PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
00572       MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
00573                                         Arg, PtrOff, DAG.getSrcValue(NULL)));
00574       ArgOffset += 8;
00575       break;
00576     }
00577     case MVT::v16i8:
00578     case MVT::v8i16:
00579     case MVT::v4i32:
00580     case MVT::v2i64:
00581     case MVT::v4f32:
00582     case MVT::v2f64:
00583       if (NumXMMRegs < 4) {
00584         RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
00585         NumXMMRegs++;
00586       } else {
00587         // XMM arguments have to be aligned on 16-byte boundary.
00588         ArgOffset = ((ArgOffset + 15) / 16) * 16;
00589         SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
00590         PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
00591         MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
00592                                           Arg, PtrOff, DAG.getSrcValue(NULL)));
00593         ArgOffset += 16;
00594       }
00595     }
00596   }
00597 
00598   if (!MemOpChains.empty())
00599     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains);
00600 
00601   // Build a sequence of copy-to-reg nodes chained together with token chain
00602   // and flag operands which copy the outgoing args into registers.
00603   SDOperand InFlag;
00604   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
00605     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
00606                              InFlag);
00607     InFlag = Chain.getValue(1);
00608   }
00609 
00610   // If the callee is a GlobalAddress node (quite common, every direct call is)
00611   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
00612   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
00613     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
00614   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
00615     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
00616 
00617   std::vector<MVT::ValueType> NodeTys;
00618   NodeTys.push_back(MVT::Other);   // Returns a chain
00619   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
00620   std::vector<SDOperand> Ops;
00621   Ops.push_back(Chain);
00622   Ops.push_back(Callee);
00623 
00624   // Add argument registers to the end of the list so that they are known live
00625   // into the call.
00626   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
00627     Ops.push_back(DAG.getRegister(RegsToPass[i].first, 
00628                                   RegsToPass[i].second.getValueType()));
00629 
00630   if (InFlag.Val)
00631     Ops.push_back(InFlag);
00632 
00633   Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
00634                       NodeTys, Ops);
00635   InFlag = Chain.getValue(1);
00636 
00637   // Create the CALLSEQ_END node.
00638   unsigned NumBytesForCalleeToPush = 0;
00639 
00640   // If this is is a call to a struct-return function on Darwin/X86, the callee
00641   // pops the hidden struct pointer, so we have to push it back.
00642   if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin())
00643     NumBytesForCalleeToPush = 4;
00644   
00645   NodeTys.clear();
00646   NodeTys.push_back(MVT::Other);   // Returns a chain
00647   if (RetVT != MVT::Other)
00648     NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
00649   Ops.clear();
00650   Ops.push_back(Chain);
00651   Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
00652   Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
00653   Ops.push_back(InFlag);
00654   Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
00655   if (RetVT != MVT::Other)
00656     InFlag = Chain.getValue(1);
00657   
00658   std::vector<SDOperand> ResultVals;
00659   NodeTys.clear();
00660   switch (RetVT) {
00661   default: assert(0 && "Unknown value type to return!");
00662   case MVT::Other: break;
00663   case MVT::i8:
00664     Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
00665     ResultVals.push_back(Chain.getValue(0));
00666     NodeTys.push_back(MVT::i8);
00667     break;
00668   case MVT::i16:
00669     Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
00670     ResultVals.push_back(Chain.getValue(0));
00671     NodeTys.push_back(MVT::i16);
00672     break;
00673   case MVT::i32:
00674     if (Op.Val->getValueType(1) == MVT::i32) {
00675       Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
00676       ResultVals.push_back(Chain.getValue(0));
00677       Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
00678                                  Chain.getValue(2)).getValue(1);
00679       ResultVals.push_back(Chain.getValue(0));
00680       NodeTys.push_back(MVT::i32);
00681     } else {
00682       Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
00683       ResultVals.push_back(Chain.getValue(0));
00684     }
00685     NodeTys.push_back(MVT::i32);
00686     break;
00687   case MVT::v16i8:
00688   case MVT::v8i16:
00689   case MVT::v4i32:
00690   case MVT::v2i64:
00691   case MVT::v4f32:
00692   case MVT::v2f64:
00693     Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
00694     ResultVals.push_back(Chain.getValue(0));
00695     NodeTys.push_back(RetVT);
00696     break;
00697   case MVT::f32:
00698   case MVT::f64: {
00699     std::vector<MVT::ValueType> Tys;
00700     Tys.push_back(MVT::f64);
00701     Tys.push_back(MVT::Other);
00702     Tys.push_back(MVT::Flag);
00703     std::vector<SDOperand> Ops;
00704     Ops.push_back(Chain);
00705     Ops.push_back(InFlag);
00706     SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
00707     Chain  = RetVal.getValue(1);
00708     InFlag = RetVal.getValue(2);
00709     if (X86ScalarSSE) {
00710       // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
00711       // shouldn't be necessary except that RFP cannot be live across
00712       // multiple blocks. When stackifier is fixed, they can be uncoupled.
00713       MachineFunction &MF = DAG.getMachineFunction();
00714       int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
00715       SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
00716       Tys.clear();
00717       Tys.push_back(MVT::Other);
00718       Ops.clear();
00719       Ops.push_back(Chain);
00720       Ops.push_back(RetVal);
00721       Ops.push_back(StackSlot);
00722       Ops.push_back(DAG.getValueType(RetVT));
00723       Ops.push_back(InFlag);
00724       Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
00725       RetVal = DAG.getLoad(RetVT, Chain, StackSlot,
00726                            DAG.getSrcValue(NULL));
00727       Chain = RetVal.getValue(1);
00728     }
00729 
00730     if (RetVT == MVT::f32 && !X86ScalarSSE)
00731       // FIXME: we would really like to remember that this FP_ROUND
00732       // operation is okay to eliminate if we allow excess FP precision.
00733       RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
00734     ResultVals.push_back(RetVal);
00735     NodeTys.push_back(RetVT);
00736     break;
00737   }
00738   }
00739 
00740   // If the function returns void, just return the chain.
00741   if (ResultVals.empty())
00742     return Chain;
00743   
00744   // Otherwise, merge everything together with a MERGE_VALUES node.
00745   NodeTys.push_back(MVT::Other);
00746   ResultVals.push_back(Chain);
00747   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals);
00748   return Res.getValue(Op.ResNo);
00749 }
00750 
00751 //===----------------------------------------------------------------------===//
00752 //                    Fast Calling Convention implementation
00753 //===----------------------------------------------------------------------===//
00754 //
00755 // The X86 'fast' calling convention passes up to two integer arguments in
00756 // registers (an appropriate portion of EAX/EDX), passes arguments in C order,
00757 // and requires that the callee pop its arguments off the stack (allowing proper
00758 // tail calls), and has the same return value conventions as C calling convs.
00759 //
00760 // This calling convention always arranges for the callee pop value to be 8n+4
00761 // bytes, which is needed for tail recursion elimination and stack alignment
00762 // reasons.
00763 //
00764 // Note that this can be enhanced in the future to pass fp vals in registers
00765 // (when we have a global fp allocator) and do other tricks.
00766 //
00767 
00768 /// HowToPassFastCCArgument - Returns how an formal argument of the specified
00769 /// type should be passed. If it is through stack, returns the size of the stack
00770 /// slot; if it is through integer or XMM register, returns the number of
00771 /// integer or XMM registers are needed.
00772 static void
00773 HowToPassFastCCArgument(MVT::ValueType ObjectVT,
00774                         unsigned NumIntRegs, unsigned NumXMMRegs,
00775                         unsigned &ObjSize, unsigned &ObjIntRegs,
00776                         unsigned &ObjXMMRegs) {
00777   ObjSize = 0;
00778   ObjIntRegs = 0;
00779   ObjXMMRegs = 0;
00780 
00781   switch (ObjectVT) {
00782   default: assert(0 && "Unhandled argument type!");
00783   case MVT::i8:
00784 #if FASTCC_NUM_INT_ARGS_INREGS > 0
00785     if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
00786       ObjIntRegs = 1;
00787     else
00788 #endif
00789       ObjSize = 1;
00790     break;
00791   case MVT::i16:
00792 #if FASTCC_NUM_INT_ARGS_INREGS > 0
00793     if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
00794       ObjIntRegs = 1;
00795     else
00796 #endif
00797       ObjSize = 2;
00798     break;
00799   case MVT::i32:
00800 #if FASTCC_NUM_INT_ARGS_INREGS > 0
00801     if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
00802       ObjIntRegs = 1;
00803     else
00804 #endif
00805       ObjSize = 4;
00806     break;
00807   case MVT::i64:
00808 #if FASTCC_NUM_INT_ARGS_INREGS > 0
00809     if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
00810       ObjIntRegs = 2;
00811     } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
00812       ObjIntRegs = 1;
00813       ObjSize = 4;
00814     } else
00815 #endif
00816       ObjSize = 8;
00817   case MVT::f32:
00818     ObjSize = 4;
00819     break;
00820   case MVT::f64:
00821     ObjSize = 8;
00822     break;
00823   case MVT::v16i8:
00824   case MVT::v8i16:
00825   case MVT::v4i32:
00826   case MVT::v2i64:
00827   case MVT::v4f32:
00828   case MVT::v2f64:
00829     if (NumXMMRegs < 4)
00830       ObjXMMRegs = 1;
00831     else
00832       ObjSize = 16;
00833     break;
00834   }
00835 }
00836 
00837 SDOperand
00838 X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
00839   unsigned NumArgs = Op.Val->getNumValues()-1;
00840   MachineFunction &MF = DAG.getMachineFunction();
00841   MachineFrameInfo *MFI = MF.getFrameInfo();
00842   SDOperand Root = Op.getOperand(0);
00843   std::vector<SDOperand> ArgValues;
00844 
00845   // Add DAG nodes to load the arguments...  On entry to a function the stack
00846   // frame looks like this:
00847   //
00848   // [ESP] -- return address
00849   // [ESP + 4] -- first nonreg argument (leftmost lexically)
00850   // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
00851   //    ...
00852   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
00853 
00854   // Keep track of the number of integer regs passed so far.  This can be either
00855   // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
00856   // used).
00857   unsigned NumIntRegs = 0;
00858   unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
00859 
00860   static const unsigned XMMArgRegs[] = {
00861     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
00862   };
00863   
00864   for (unsigned i = 0; i < NumArgs; ++i) {
00865     MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
00866     unsigned ArgIncrement = 4;
00867     unsigned ObjSize = 0;
00868     unsigned ObjIntRegs = 0;
00869     unsigned ObjXMMRegs = 0;
00870 
00871     HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
00872                             ObjSize, ObjIntRegs, ObjXMMRegs);
00873     if (ObjSize > 4)
00874       ArgIncrement = ObjSize;
00875 
00876     unsigned Reg = 0;
00877     SDOperand ArgValue;
00878     if (ObjIntRegs || ObjXMMRegs) {
00879       switch (ObjectVT) {
00880       default: assert(0 && "Unhandled argument type!");
00881       case MVT::i8:
00882         Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
00883                         X86::GR8RegisterClass);
00884         ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8);
00885         break;
00886       case MVT::i16:
00887         Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
00888                         X86::GR16RegisterClass);
00889         ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16);
00890         break;
00891       case MVT::i32:
00892         Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
00893                         X86::GR32RegisterClass);
00894         ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
00895         break;
00896       case MVT::i64:
00897         Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
00898                         X86::GR32RegisterClass);
00899         ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
00900         if (ObjIntRegs == 2) {
00901           Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass);
00902           SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32);
00903           ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
00904         }
00905         break;
00906       case MVT::v16i8:
00907       case MVT::v8i16:
00908       case MVT::v4i32:
00909       case MVT::v2i64:
00910       case MVT::v4f32:
00911       case MVT::v2f64:
00912         Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
00913         ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
00914         break;
00915       }
00916       NumIntRegs += ObjIntRegs;
00917       NumXMMRegs += ObjXMMRegs;
00918     }
00919 
00920     if (ObjSize) {
00921       // XMM arguments have to be aligned on 16-byte boundary.
00922       if (ObjSize == 16)
00923         ArgOffset = ((ArgOffset + 15) / 16) * 16;
00924       // Create the SelectionDAG nodes corresponding to a load from this
00925       // parameter.
00926       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
00927       SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
00928       if (ObjectVT == MVT::i64 && ObjIntRegs) {
00929         SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
00930                                           DAG.getSrcValue(NULL));
00931         ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
00932       } else
00933         ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
00934                                DAG.getSrcValue(NULL));
00935       ArgOffset += ArgIncrement;   // Move on to the next argument.
00936     }
00937 
00938     ArgValues.push_back(ArgValue);
00939   }
00940 
00941   ArgValues.push_back(Root);
00942 
00943   // Make sure the instruction takes 8n+4 bytes to make sure the start of the
00944   // arguments and the arguments after the retaddr has been pushed are aligned.
00945   if ((ArgOffset & 7) == 0)
00946     ArgOffset += 4;
00947 
00948   VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
00949   ReturnAddrIndex = 0;             // No return address slot generated yet.
00950   BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
00951   BytesCallerReserves = 0;
00952 
00953   // Finally, inform the code generator which regs we return values in.
00954   switch (getValueType(MF.getFunction()->getReturnType())) {
00955   default: assert(0 && "Unknown type!");
00956   case MVT::isVoid: break;
00957   case MVT::i8:
00958   case MVT::i16:
00959   case MVT::i32:
00960     MF.addLiveOut(X86::EAX);
00961     break;
00962   case MVT::i64:
00963     MF.addLiveOut(X86::EAX);
00964     MF.addLiveOut(X86::EDX);
00965     break;
00966   case MVT::f32:
00967   case MVT::f64:
00968     MF.addLiveOut(X86::ST0);
00969     break;
00970   case MVT::v16i8:
00971   case MVT::v8i16:
00972   case MVT::v4i32:
00973   case MVT::v2i64:
00974   case MVT::v4f32:
00975   case MVT::v2f64:
00976     MF.addLiveOut(X86::XMM0);
00977     break;
00978   }
00979 
00980   // Return the new list of results.
00981   std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
00982                                      Op.Val->value_end());
00983   return DAG.getNode(ISD::MERGE_VALUES, RetVTs, ArgValues);
00984 }
00985 
00986 SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG) {
00987   SDOperand Chain     = Op.getOperand(0);
00988   unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
00989   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
00990   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
00991   SDOperand Callee    = Op.getOperand(4);
00992   MVT::ValueType RetVT= Op.Val->getValueType(0);
00993   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
00994 
00995   // Count how many bytes are to be pushed on the stack.
00996   unsigned NumBytes = 0;
00997 
00998   // Keep track of the number of integer regs passed so far.  This can be either
00999   // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
01000   // used).
01001   unsigned NumIntRegs = 0;
01002   unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
01003 
01004   static const unsigned GPRArgRegs[][2] = {
01005     { X86::AL,  X86::DL },
01006     { X86::AX,  X86::DX },
01007     { X86::EAX, X86::EDX }
01008   };
01009   static const unsigned XMMArgRegs[] = {
01010     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
01011   };
01012 
01013   for (unsigned i = 0; i != NumOps; ++i) {
01014     SDOperand Arg = Op.getOperand(5+2*i);
01015 
01016     switch (Arg.getValueType()) {
01017     default: assert(0 && "Unknown value type!");
01018     case MVT::i8:
01019     case MVT::i16:
01020     case MVT::i32:
01021 #if FASTCC_NUM_INT_ARGS_INREGS > 0
01022       if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
01023         ++NumIntRegs;
01024         break;
01025       }
01026 #endif
01027       // Fall through
01028     case MVT::f32:
01029       NumBytes += 4;
01030       break;
01031     case MVT::f64:
01032       NumBytes += 8;
01033       break;
01034     case MVT::v16i8:
01035     case MVT::v8i16:
01036     case MVT::v4i32:
01037     case MVT::v2i64:
01038     case MVT::v4f32:
01039     case MVT::v2f64:
01040       if (NumXMMRegs < 4)
01041         NumXMMRegs++;
01042       else {
01043         // XMM arguments have to be aligned on 16-byte boundary.
01044         NumBytes = ((NumBytes + 15) / 16) * 16;
01045         NumBytes += 16;
01046       }
01047       break;
01048     }
01049   }
01050 
01051   // Make sure the instruction takes 8n+4 bytes to make sure the start of the
01052   // arguments and the arguments after the retaddr has been pushed are aligned.
01053   if ((NumBytes & 7) == 0)
01054     NumBytes += 4;
01055 
01056   Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
01057 
01058   // Arguments go on the stack in reverse order, as specified by the ABI.
01059   unsigned ArgOffset = 0;
01060   NumIntRegs = 0;
01061   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
01062   std::vector<SDOperand> MemOpChains;
01063   SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy());
01064   for (unsigned i = 0; i != NumOps; ++i) {
01065     SDOperand Arg = Op.getOperand(5+2*i);
01066 
01067     switch (Arg.getValueType()) {
01068     default: assert(0 && "Unexpected ValueType for argument!");
01069     case MVT::i8:
01070     case MVT::i16:
01071     case MVT::i32:
01072 #if FASTCC_NUM_INT_ARGS_INREGS > 0
01073       if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
01074         RegsToPass.push_back(
01075               std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs],
01076                              Arg));
01077         ++NumIntRegs;
01078         break;
01079       }
01080 #endif
01081       // Fall through
01082     case MVT::f32: {
01083       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
01084       PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
01085       MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
01086                                         Arg, PtrOff, DAG.getSrcValue(NULL)));
01087       ArgOffset += 4;
01088       break;
01089     }
01090     case MVT::f64: {
01091       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
01092       PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
01093       MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
01094                                         Arg, PtrOff, DAG.getSrcValue(NULL)));
01095       ArgOffset += 8;
01096       break;
01097     }
01098     case MVT::v16i8:
01099     case MVT::v8i16:
01100     case MVT::v4i32:
01101     case MVT::v2i64:
01102     case MVT::v4f32:
01103     case MVT::v2f64:
01104       if (NumXMMRegs < 4) {
01105         RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
01106         NumXMMRegs++;
01107       } else {
01108         // XMM arguments have to be aligned on 16-byte boundary.
01109         ArgOffset = ((ArgOffset + 15) / 16) * 16;
01110         SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
01111         PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
01112         MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
01113                                           Arg, PtrOff, DAG.getSrcValue(NULL)));
01114         ArgOffset += 16;
01115       }
01116     }
01117   }
01118 
01119   if (!MemOpChains.empty())
01120     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains);
01121 
01122   // Build a sequence of copy-to-reg nodes chained together with token chain
01123   // and flag operands which copy the outgoing args into registers.
01124   SDOperand InFlag;
01125   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01126     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
01127                              InFlag);
01128     InFlag = Chain.getValue(1);
01129   }
01130 
01131   // If the callee is a GlobalAddress node (quite common, every direct call is)
01132   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
01133   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
01134     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
01135   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
01136     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
01137 
01138   std::vector<MVT::ValueType> NodeTys;
01139   NodeTys.push_back(MVT::Other);   // Returns a chain
01140   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
01141   std::vector<SDOperand> Ops;
01142   Ops.push_back(Chain);
01143   Ops.push_back(Callee);
01144 
01145   // Add argument registers to the end of the list so that they are known live
01146   // into the call.
01147   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
01148     Ops.push_back(DAG.getRegister(RegsToPass[i].first, 
01149                                   RegsToPass[i].second.getValueType()));
01150 
01151   if (InFlag.Val)
01152     Ops.push_back(InFlag);
01153 
01154   // FIXME: Do not generate X86ISD::TAILCALL for now.
01155   Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
01156                       NodeTys, Ops);
01157   InFlag = Chain.getValue(1);
01158 
01159   NodeTys.clear();
01160   NodeTys.push_back(MVT::Other);   // Returns a chain
01161   if (RetVT != MVT::Other)
01162     NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
01163   Ops.clear();
01164   Ops.push_back(Chain);
01165   Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
01166   Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
01167   Ops.push_back(InFlag);
01168   Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
01169   if (RetVT != MVT::Other)
01170     InFlag = Chain.getValue(1);
01171   
01172   std::vector<SDOperand> ResultVals;
01173   NodeTys.clear();
01174   switch (RetVT) {
01175   default: assert(0 && "Unknown value type to return!");
01176   case MVT::Other: break;
01177   case MVT::i8:
01178     Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
01179     ResultVals.push_back(Chain.getValue(0));
01180     NodeTys.push_back(MVT::i8);
01181     break;
01182   case MVT::i16:
01183     Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
01184     ResultVals.push_back(Chain.getValue(0));
01185     NodeTys.push_back(MVT::i16);
01186     break;
01187   case MVT::i32:
01188     if (Op.Val->getValueType(1) == MVT::i32) {
01189       Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
01190       ResultVals.push_back(Chain.getValue(0));
01191       Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
01192                                  Chain.getValue(2)).getValue(1);
01193       ResultVals.push_back(Chain.getValue(0));
01194       NodeTys.push_back(MVT::i32);
01195     } else {
01196       Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
01197       ResultVals.push_back(Chain.getValue(0));
01198     }
01199     NodeTys.push_back(MVT::i32);
01200     break;
01201   case MVT::v16i8:
01202   case MVT::v8i16:
01203   case MVT::v4i32:
01204   case MVT::v2i64:
01205   case MVT::v4f32:
01206   case MVT::v2f64:
01207     Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
01208     ResultVals.push_back(Chain.getValue(0));
01209     NodeTys.push_back(RetVT);
01210     break;
01211   case MVT::f32:
01212   case MVT::f64: {
01213     std::vector<MVT::ValueType> Tys;
01214     Tys.push_back(MVT::f64);
01215     Tys.push_back(MVT::Other);
01216     Tys.push_back(MVT::Flag);
01217     std::vector<SDOperand> Ops;
01218     Ops.push_back(Chain);
01219     Ops.push_back(InFlag);
01220     SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
01221     Chain  = RetVal.getValue(1);
01222     InFlag = RetVal.getValue(2);
01223     if (X86ScalarSSE) {
01224       // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
01225       // shouldn't be necessary except that RFP cannot be live across
01226       // multiple blocks. When stackifier is fixed, they can be uncoupled.
01227       MachineFunction &MF = DAG.getMachineFunction();
01228       int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
01229       SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
01230       Tys.clear();
01231       Tys.push_back(MVT::Other);
01232       Ops.clear();
01233       Ops.push_back(Chain);
01234       Ops.push_back(RetVal);
01235       Ops.push_back(StackSlot);
01236       Ops.push_back(DAG.getValueType(RetVT));
01237       Ops.push_back(InFlag);
01238       Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
01239       RetVal = DAG.getLoad(RetVT, Chain, StackSlot,
01240                            DAG.getSrcValue(NULL));
01241       Chain = RetVal.getValue(1);
01242     }
01243 
01244     if (RetVT == MVT::f32 && !X86ScalarSSE)
01245       // FIXME: we would really like to remember that this FP_ROUND
01246       // operation is okay to eliminate if we allow excess FP precision.
01247       RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
01248     ResultVals.push_back(RetVal);
01249     NodeTys.push_back(RetVT);
01250     break;
01251   }
01252   }
01253 
01254 
01255   // If the function returns void, just return the chain.
01256   if (ResultVals.empty())
01257     return Chain;
01258   
01259   // Otherwise, merge everything together with a MERGE_VALUES node.
01260   NodeTys.push_back(MVT::Other);
01261   ResultVals.push_back(Chain);
01262   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals);
01263   return Res.getValue(Op.ResNo);
01264 }
01265 
01266 SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
01267   if (ReturnAddrIndex == 0) {
01268     // Set up a frame object for the return address.
01269     MachineFunction &MF = DAG.getMachineFunction();
01270     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
01271   }
01272 
01273   return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
01274 }
01275 
01276 
01277 
01278 std::pair<SDOperand, SDOperand> X86TargetLowering::
01279 LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
01280                         SelectionDAG &DAG) {
01281   SDOperand Result;
01282   if (Depth)        // Depths > 0 not supported yet!
01283     Result = DAG.getConstant(0, getPointerTy());
01284   else {
01285     SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
01286     if (!isFrameAddress)
01287       // Just load the return address
01288       Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
01289                            DAG.getSrcValue(NULL));
01290     else
01291       Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
01292                            DAG.getConstant(4, MVT::i32));
01293   }
01294   return std::make_pair(Result, Chain);
01295 }
01296 
01297 /// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode
01298 /// which corresponds to the condition code.
01299 static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) {
01300   switch (X86CC) {
01301   default: assert(0 && "Unknown X86 conditional code!");
01302   case X86ISD::COND_A:  return X86::JA;
01303   case X86ISD::COND_AE: return X86::JAE;
01304   case X86ISD::COND_B:  return X86::JB;
01305   case X86ISD::COND_BE: return X86::JBE;
01306   case X86ISD::COND_E:  return X86::JE;
01307   case X86ISD::COND_G:  return X86::JG;
01308   case X86ISD::COND_GE: return X86::JGE;
01309   case X86ISD::COND_L:  return X86::JL;
01310   case X86ISD::COND_LE: return X86::JLE;
01311   case X86ISD::COND_NE: return X86::JNE;
01312   case X86ISD::COND_NO: return X86::JNO;
01313   case X86ISD::COND_NP: return X86::JNP;
01314   case X86ISD::COND_NS: return X86::JNS;
01315   case X86ISD::COND_O:  return X86::JO;
01316   case X86ISD::COND_P:  return X86::JP;
01317   case X86ISD::COND_S:  return X86::JS;
01318   }
01319 }
01320 
01321 /// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
01322 /// specific condition code. It returns a false if it cannot do a direct
01323 /// translation. X86CC is the translated CondCode. Flip is set to true if the
01324 /// the order of comparison operands should be flipped.
01325 static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
01326                            unsigned &X86CC, bool &Flip) {
01327   Flip = false;
01328   X86CC = X86ISD::COND_INVALID;
01329   if (!isFP) {
01330     switch (SetCCOpcode) {
01331     default: break;
01332     case ISD::SETEQ:  X86CC = X86ISD::COND_E;  break;
01333     case ISD::SETGT:  X86CC = X86ISD::COND_G;  break;
01334     case ISD::SETGE:  X86CC = X86ISD::COND_GE; break;
01335     case ISD::SETLT:  X86CC = X86ISD::COND_L;  break;
01336     case ISD::SETLE:  X86CC = X86ISD::COND_LE; break;
01337     case ISD::SETNE:  X86CC = X86ISD::COND_NE; break;
01338     case ISD::SETULT: X86CC = X86ISD::COND_B;  break;
01339     case ISD::SETUGT: X86CC = X86ISD::COND_A;  break;
01340     case ISD::SETULE: X86CC = X86ISD::COND_BE; break;
01341     case ISD::SETUGE: X86CC = X86ISD::COND_AE; break;
01342     }
01343   } else {
01344     // On a floating point condition, the flags are set as follows:
01345     // ZF  PF  CF   op
01346     //  0 | 0 | 0 | X > Y
01347     //  0 | 0 | 1 | X < Y
01348     //  1 | 0 | 0 | X == Y
01349     //  1 | 1 | 1 | unordered
01350     switch (SetCCOpcode) {
01351     default: break;
01352     case ISD::SETUEQ:
01353     case ISD::SETEQ: X86CC = X86ISD::COND_E;  break;
01354     case ISD::SETOLT: Flip = true; // Fallthrough
01355     case ISD::SETOGT:
01356     case ISD::SETGT: X86CC = X86ISD::COND_A;  break;
01357     case ISD::SETOLE: Flip = true; // Fallthrough
01358     case ISD::SETOGE:
01359     case ISD::SETGE: X86CC = X86ISD::COND_AE; break;
01360     case ISD::SETUGT: Flip = true; // Fallthrough
01361     case ISD::SETULT:
01362     case ISD::SETLT: X86CC = X86ISD::COND_B;  break;
01363     case ISD::SETUGE: Flip = true; // Fallthrough
01364     case ISD::SETULE:
01365     case ISD::SETLE: X86CC = X86ISD::COND_BE; break;
01366     case ISD::SETONE:
01367     case ISD::SETNE: X86CC = X86ISD::COND_NE; break;
01368     case ISD::SETUO: X86CC = X86ISD::COND_P;  break;
01369     case ISD::SETO:  X86CC = X86ISD::COND_NP; break;
01370     }
01371   }
01372 
01373   return X86CC != X86ISD::COND_INVALID;
01374 }
01375 
01376 static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC,
01377                            bool &Flip) {
01378   return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip);
01379 }
01380 
01381 /// hasFPCMov - is there a floating point cmov for the specific X86 condition
01382 /// code. Current x86 isa includes the following FP cmov instructions:
01383 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
01384 static bool hasFPCMov(unsigned X86CC) {
01385   switch (X86CC) {
01386   default:
01387     return false;
01388   case X86ISD::COND_B:
01389   case X86ISD::COND_BE:
01390   case X86ISD::COND_E:
01391   case X86ISD::COND_P:
01392   case X86ISD::COND_A:
01393   case X86ISD::COND_AE:
01394   case X86ISD::COND_NE:
01395   case X86ISD::COND_NP:
01396     return true;
01397   }
01398 }
01399 
01400 /// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra
01401 /// load. For Darwin, external and weak symbols are indirect, loading the value
01402 /// at address GV rather then the value of GV itself. This means that the
01403 /// GlobalAddress must be in the base or index register of the address, not the
01404 /// GV offset field.
01405 static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
01406   return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
01407           (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
01408 }
01409 
01410 /// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
01411 /// true if Op is undef or if its value falls within the specified range (L, H].
01412 static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
01413   if (Op.getOpcode() == ISD::UNDEF)
01414     return true;
01415 
01416   unsigned Val = cast<ConstantSDNode>(Op)->getValue();
01417   return (Val >= Low && Val < Hi);
01418 }
01419 
01420 /// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
01421 /// true if Op is undef or if its value equal to the specified value.
01422 static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
01423   if (Op.getOpcode() == ISD::UNDEF)
01424     return true;
01425   return cast<ConstantSDNode>(Op)->getValue() == Val;
01426 }
01427 
01428 /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
01429 /// specifies a shuffle of elements that is suitable for input to PSHUFD.
01430 bool X86::isPSHUFDMask(SDNode *N) {
01431   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01432 
01433   if (N->getNumOperands() != 4)
01434     return false;
01435 
01436   // Check if the value doesn't reference the second vector.
01437   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01438     SDOperand Arg = N->getOperand(i);
01439     if (Arg.getOpcode() == ISD::UNDEF) continue;
01440     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01441     if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
01442       return false;
01443   }
01444 
01445   return true;
01446 }
01447 
01448 /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
01449 /// specifies a shuffle of elements that is suitable for input to PSHUFHW.
01450 bool X86::isPSHUFHWMask(SDNode *N) {
01451   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01452 
01453   if (N->getNumOperands() != 8)
01454     return false;
01455 
01456   // Lower quadword copied in order.
01457   for (unsigned i = 0; i != 4; ++i) {
01458     SDOperand Arg = N->getOperand(i);
01459     if (Arg.getOpcode() == ISD::UNDEF) continue;
01460     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01461     if (cast<ConstantSDNode>(Arg)->getValue() != i)
01462       return false;
01463   }
01464 
01465   // Upper quadword shuffled.
01466   for (unsigned i = 4; i != 8; ++i) {
01467     SDOperand Arg = N->getOperand(i);
01468     if (Arg.getOpcode() == ISD::UNDEF) continue;
01469     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01470     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01471     if (Val < 4 || Val > 7)
01472       return false;
01473   }
01474 
01475   return true;
01476 }
01477 
01478 /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
01479 /// specifies a shuffle of elements that is suitable for input to PSHUFLW.
01480 bool X86::isPSHUFLWMask(SDNode *N) {
01481   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01482 
01483   if (N->getNumOperands() != 8)
01484     return false;
01485 
01486   // Upper quadword copied in order.
01487   for (unsigned i = 4; i != 8; ++i)
01488     if (!isUndefOrEqual(N->getOperand(i), i))
01489       return false;
01490 
01491   // Lower quadword shuffled.
01492   for (unsigned i = 0; i != 4; ++i)
01493     if (!isUndefOrInRange(N->getOperand(i), 0, 4))
01494       return false;
01495 
01496   return true;
01497 }
01498 
01499 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
01500 /// specifies a shuffle of elements that is suitable for input to SHUFP*.
01501 static bool isSHUFPMask(std::vector<SDOperand> &N) {
01502   unsigned NumElems = N.size();
01503   if (NumElems != 2 && NumElems != 4) return false;
01504 
01505   unsigned Half = NumElems / 2;
01506   for (unsigned i = 0; i < Half; ++i)
01507     if (!isUndefOrInRange(N[i], 0, NumElems))
01508       return false;
01509   for (unsigned i = Half; i < NumElems; ++i)
01510     if (!isUndefOrInRange(N[i], NumElems, NumElems*2))
01511       return false;
01512 
01513   return true;
01514 }
01515 
01516 bool X86::isSHUFPMask(SDNode *N) {
01517   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01518   std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
01519   return ::isSHUFPMask(Ops);
01520 }
01521 
01522 /// isCommutedSHUFP - Returns true if the shuffle mask is except
01523 /// the reverse of what x86 shuffles want. x86 shuffles requires the lower
01524 /// half elements to come from vector 1 (which would equal the dest.) and
01525 /// the upper half to come from vector 2.
01526 static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) {
01527   unsigned NumElems = Ops.size();
01528   if (NumElems != 2 && NumElems != 4) return false;
01529 
01530   unsigned Half = NumElems / 2;
01531   for (unsigned i = 0; i < Half; ++i)
01532     if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2))
01533       return false;
01534   for (unsigned i = Half; i < NumElems; ++i)
01535     if (!isUndefOrInRange(Ops[i], 0, NumElems))
01536       return false;
01537   return true;
01538 }
01539 
01540 static bool isCommutedSHUFP(SDNode *N) {
01541   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01542   std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
01543   return isCommutedSHUFP(Ops);
01544 }
01545 
01546 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
01547 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
01548 bool X86::isMOVHLPSMask(SDNode *N) {
01549   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01550 
01551   if (N->getNumOperands() != 4)
01552     return false;
01553 
01554   // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
01555   return isUndefOrEqual(N->getOperand(0), 6) &&
01556          isUndefOrEqual(N->getOperand(1), 7) &&
01557          isUndefOrEqual(N->getOperand(2), 2) &&
01558          isUndefOrEqual(N->getOperand(3), 3);
01559 }
01560 
01561 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
01562 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
01563 bool X86::isMOVLPMask(SDNode *N) {
01564   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01565 
01566   unsigned NumElems = N->getNumOperands();
01567   if (NumElems != 2 && NumElems != 4)
01568     return false;
01569 
01570   for (unsigned i = 0; i < NumElems/2; ++i)
01571     if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
01572       return false;
01573 
01574   for (unsigned i = NumElems/2; i < NumElems; ++i)
01575     if (!isUndefOrEqual(N->getOperand(i), i))
01576       return false;
01577 
01578   return true;
01579 }
01580 
01581 /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
01582 /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
01583 /// and MOVLHPS.
01584 bool X86::isMOVHPMask(SDNode *N) {
01585   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01586 
01587   unsigned NumElems = N->getNumOperands();
01588   if (NumElems != 2 && NumElems != 4)
01589     return false;
01590 
01591   for (unsigned i = 0; i < NumElems/2; ++i)
01592     if (!isUndefOrEqual(N->getOperand(i), i))
01593       return false;
01594 
01595   for (unsigned i = 0; i < NumElems/2; ++i) {
01596     SDOperand Arg = N->getOperand(i + NumElems/2);
01597     if (!isUndefOrEqual(Arg, i + NumElems))
01598       return false;
01599   }
01600 
01601   return true;
01602 }
01603 
01604 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
01605 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
01606 bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
01607   unsigned NumElems = N.size();
01608   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
01609     return false;
01610 
01611   for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
01612     SDOperand BitI  = N[i];
01613     SDOperand BitI1 = N[i+1];
01614     if (!isUndefOrEqual(BitI, j))
01615       return false;
01616     if (V2IsSplat) {
01617       if (isUndefOrEqual(BitI1, NumElems))
01618         return false;
01619     } else {
01620       if (!isUndefOrEqual(BitI1, j + NumElems))
01621         return false;
01622     }
01623   }
01624 
01625   return true;
01626 }
01627 
01628 bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
01629   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01630   std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
01631   return ::isUNPCKLMask(Ops, V2IsSplat);
01632 }
01633 
01634 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
01635 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
01636 bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
01637   unsigned NumElems = N.size();
01638   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
01639     return false;
01640 
01641   for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
01642     SDOperand BitI  = N[i];
01643     SDOperand BitI1 = N[i+1];
01644     if (!isUndefOrEqual(BitI, j + NumElems/2))
01645       return false;
01646     if (V2IsSplat) {
01647       if (isUndefOrEqual(BitI1, NumElems))
01648         return false;
01649     } else {
01650       if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
01651         return false;
01652     }
01653   }
01654 
01655   return true;
01656 }
01657 
01658 bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
01659   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01660   std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
01661   return ::isUNPCKHMask(Ops, V2IsSplat);
01662 }
01663 
01664 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
01665 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
01666 /// <0, 0, 1, 1>
01667 bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
01668   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01669 
01670   unsigned NumElems = N->getNumOperands();
01671   if (NumElems != 4 && NumElems != 8 && NumElems != 16)
01672     return false;
01673 
01674   for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
01675     SDOperand BitI  = N->getOperand(i);
01676     SDOperand BitI1 = N->getOperand(i+1);
01677 
01678     if (!isUndefOrEqual(BitI, j))
01679       return false;
01680     if (!isUndefOrEqual(BitI1, j))
01681       return false;
01682   }
01683 
01684   return true;
01685 }
01686 
01687 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
01688 /// specifies a shuffle of elements that is suitable for input to MOVSS,
01689 /// MOVSD, and MOVD, i.e. setting the lowest element.
01690 static bool isMOVLMask(std::vector<SDOperand> &N) {
01691   unsigned NumElems = N.size();
01692   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
01693     return false;
01694 
01695   if (!isUndefOrEqual(N[0], NumElems))
01696     return false;
01697 
01698   for (unsigned i = 1; i < NumElems; ++i) {
01699     SDOperand Arg = N[i];
01700     if (!isUndefOrEqual(Arg, i))
01701       return false;
01702   }
01703 
01704   return true;
01705 }
01706 
01707 bool X86::isMOVLMask(SDNode *N) {
01708   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01709   std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
01710   return ::isMOVLMask(Ops);
01711 }
01712 
01713 /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
01714 /// of what x86 movss want. X86 movs requires the lowest  element to be lowest
01715 /// element of vector 2 and the other elements to come from vector 1 in order.
01716 static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) {
01717   unsigned NumElems = Ops.size();
01718   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
01719     return false;
01720 
01721   if (!isUndefOrEqual(Ops[0], 0))
01722     return false;
01723 
01724   for (unsigned i = 1; i < NumElems; ++i) {
01725     SDOperand Arg = Ops[i];
01726     if (V2IsSplat) {
01727       if (!isUndefOrEqual(Arg, NumElems))
01728         return false;
01729     } else {
01730       if (!isUndefOrEqual(Arg, i+NumElems))
01731         return false;
01732     }
01733   }
01734 
01735   return true;
01736 }
01737 
01738 static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) {
01739   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01740   std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
01741   return isCommutedMOVL(Ops, V2IsSplat);
01742 }
01743 
01744 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
01745 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
01746 bool X86::isMOVSHDUPMask(SDNode *N) {
01747   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01748 
01749   if (N->getNumOperands() != 4)
01750     return false;
01751 
01752   // Expect 1, 1, 3, 3
01753   for (unsigned i = 0; i < 2; ++i) {
01754     SDOperand Arg = N->getOperand(i);
01755     if (Arg.getOpcode() == ISD::UNDEF) continue;
01756     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01757     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01758     if (Val != 1) return false;
01759   }
01760 
01761   bool HasHi = false;
01762   for (unsigned i = 2; i < 4; ++i) {
01763     SDOperand Arg = N->getOperand(i);
01764     if (Arg.getOpcode() == ISD::UNDEF) continue;
01765     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01766     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01767     if (Val != 3) return false;
01768     HasHi = true;
01769   }
01770 
01771   // Don't use movshdup if it can be done with a shufps.
01772   return HasHi;
01773 }
01774 
01775 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
01776 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
01777 bool X86::isMOVSLDUPMask(SDNode *N) {
01778   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01779 
01780   if (N->getNumOperands() != 4)
01781     return false;
01782 
01783   // Expect 0, 0, 2, 2
01784   for (unsigned i = 0; i < 2; ++i) {
01785     SDOperand Arg = N->getOperand(i);
01786     if (Arg.getOpcode() == ISD::UNDEF) continue;
01787     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01788     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01789     if (Val != 0) return false;
01790   }
01791 
01792   bool HasHi = false;
01793   for (unsigned i = 2; i < 4; ++i) {
01794     SDOperand Arg = N->getOperand(i);
01795     if (Arg.getOpcode() == ISD::UNDEF) continue;
01796     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01797     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01798     if (Val != 2) return false;
01799     HasHi = true;
01800   }
01801 
01802   // Don't use movshdup if it can be done with a shufps.
01803   return HasHi;
01804 }
01805 
01806 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
01807 /// a splat of a single element.
01808 static bool isSplatMask(SDNode *N) {
01809   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01810 
01811   // This is a splat operation if each element of the permute is the same, and
01812   // if the value doesn't reference the second vector.
01813   unsigned NumElems = N->getNumOperands();
01814   SDOperand ElementBase;
01815   unsigned i = 0;
01816   for (; i != NumElems; ++i) {
01817     SDOperand Elt = N->getOperand(i);
01818     if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) {
01819       ElementBase = Elt;
01820       break;
01821     }
01822   }
01823 
01824   if (!ElementBase.Val)
01825     return false;
01826 
01827   for (; i != NumElems; ++i) {
01828     SDOperand Arg = N->getOperand(i);
01829     if (Arg.getOpcode() == ISD::UNDEF) continue;
01830     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01831     if (Arg != ElementBase) return false;
01832   }
01833 
01834   // Make sure it is a splat of the first vector operand.
01835   return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
01836 }
01837 
01838 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
01839 /// a splat of a single element and it's a 2 or 4 element mask.
01840 bool X86::isSplatMask(SDNode *N) {
01841   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01842 
01843   // We can only splat 64-bit, and 32-bit quantities with a single instruction.
01844   if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
01845     return false;
01846   return ::isSplatMask(N);
01847 }
01848 
01849 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
01850 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
01851 /// instructions.
01852 unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
01853   unsigned NumOperands = N->getNumOperands();
01854   unsigned Shift = (NumOperands == 4) ? 2 : 1;
01855   unsigned Mask = 0;
01856   for (unsigned i = 0; i < NumOperands; ++i) {
01857     unsigned Val = 0;
01858     SDOperand Arg = N->getOperand(NumOperands-i-1);
01859     if (Arg.getOpcode() != ISD::UNDEF)
01860       Val = cast<ConstantSDNode>(Arg)->getValue();
01861     if (Val >= NumOperands) Val -= NumOperands;
01862     Mask |= Val;
01863     if (i != NumOperands - 1)
01864       Mask <<= Shift;
01865   }
01866 
01867   return Mask;
01868 }
01869 
01870 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
01871 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
01872 /// instructions.
01873 unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
01874   unsigned Mask = 0;
01875   // 8 nodes, but we only care about the last 4.
01876   for (unsigned i = 7; i >= 4; --i) {
01877     unsigned Val = 0;
01878     SDOperand Arg = N->getOperand(i);
01879     if (Arg.getOpcode() != ISD::UNDEF)
01880       Val = cast<ConstantSDNode>(Arg)->getValue();
01881     Mask |= (Val - 4);
01882     if (i != 4)
01883       Mask <<= 2;
01884   }
01885 
01886   return Mask;
01887 }
01888 
01889 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
01890 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
01891 /// instructions.
01892 unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
01893   unsigned Mask = 0;
01894   // 8 nodes, but we only care about the first 4.
01895   for (int i = 3; i >= 0; --i) {
01896     unsigned Val = 0;
01897     SDOperand Arg = N->getOperand(i);
01898     if (Arg.getOpcode() != ISD::UNDEF)
01899       Val = cast<ConstantSDNode>(Arg)->getValue();
01900     Mask |= Val;
01901     if (i != 0)
01902       Mask <<= 2;
01903   }
01904 
01905   return Mask;
01906 }
01907 
01908 /// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
01909 /// specifies a 8 element shuffle that can be broken into a pair of
01910 /// PSHUFHW and PSHUFLW.
01911 static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
01912   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01913 
01914   if (N->getNumOperands() != 8)
01915     return false;
01916 
01917   // Lower quadword shuffled.
01918   for (unsigned i = 0; i != 4; ++i) {
01919     SDOperand Arg = N->getOperand(i);
01920     if (Arg.getOpcode() == ISD::UNDEF) continue;
01921     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01922     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01923     if (Val > 4)
01924       return false;
01925   }
01926 
01927   // Upper quadword shuffled.
01928   for (unsigned i = 4; i != 8; ++i) {
01929     SDOperand Arg = N->getOperand(i);
01930     if (Arg.getOpcode() == ISD::UNDEF) continue;
01931     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01932     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01933     if (Val < 4 || Val > 7)
01934       return false;
01935   }
01936 
01937   return true;
01938 }
01939 
01940 /// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
01941 /// values in ther permute mask.
01942 static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) {
01943   SDOperand V1 = Op.getOperand(0);
01944   SDOperand V2 = Op.getOperand(1);
01945   SDOperand Mask = Op.getOperand(2);
01946   MVT::ValueType VT = Op.getValueType();
01947   MVT::ValueType MaskVT = Mask.getValueType();
01948   MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
01949   unsigned NumElems = Mask.getNumOperands();
01950   std::vector<SDOperand> MaskVec;
01951 
01952   for (unsigned i = 0; i != NumElems; ++i) {
01953     SDOperand Arg = Mask.getOperand(i);
01954     if (Arg.getOpcode() == ISD::UNDEF) {
01955       MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
01956       continue;
01957     }
01958     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01959     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01960     if (Val < NumElems)
01961       MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
01962     else
01963       MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
01964   }
01965 
01966   Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
01967   return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
01968 }
01969 
01970 /// ShouldXformToMOVHLPS - Return true if the node should be transformed to
01971 /// match movhlps. The lower half elements should come from upper half of
01972 /// V1 (and in order), and the upper half elements should come from the upper
01973 /// half of V2 (and in order). 
01974 static bool ShouldXformToMOVHLPS(SDNode *Mask) {
01975   unsigned NumElems = Mask->getNumOperands();
01976   if (NumElems != 4)
01977     return false;
01978   for (unsigned i = 0, e = 2; i != e; ++i)
01979     if (!isUndefOrEqual(Mask->getOperand(i), i+2))
01980       return false;
01981   for (unsigned i = 2; i != 4; ++i)
01982     if (!isUndefOrEqual(Mask->getOperand(i), i+4))
01983       return false;
01984   return true;
01985 }
01986 
01987 /// isScalarLoadToVector - Returns true if the node is a scalar load that
01988 /// is promoted to a vector.
01989 static inline bool isScalarLoadToVector(SDNode *N) {
01990   if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
01991     N = N->getOperand(0).Val;
01992     return (N->getOpcode() == ISD::LOAD);
01993   }
01994   return false;
01995 }
01996 
01997 /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
01998 /// match movlp{s|d}. The lower half elements should come from lower half of
01999 /// V1 (and in order), and the upper half elements should come from the upper
02000 /// half of V2 (and in order). And since V1 will become the source of the
02001 /// MOVLP, it must be either a vector load or a scalar load to vector.
02002 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) {
02003   if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1))
02004     return false;
02005 
02006   unsigned NumElems = Mask->getNumOperands();
02007   if (NumElems != 2 && NumElems != 4)
02008     return false;
02009   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
02010     if (!isUndefOrEqual(Mask->getOperand(i), i))
02011       return false;
02012   for (unsigned i = NumElems/2; i != NumElems; ++i)
02013     if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
02014       return false;
02015   return true;
02016 }
02017 
02018 /// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
02019 /// all the same.
02020 static bool isSplatVector(SDNode *N) {
02021   if (N->getOpcode() != ISD::BUILD_VECTOR)
02022     return false;
02023 
02024   SDOperand SplatValue = N->getOperand(0);
02025   for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
02026     if (N->getOperand(i) != SplatValue)
02027       return false;
02028   return true;
02029 }
02030 
02031 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
02032 /// that point to V2 points to its first element.
02033 static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
02034   assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
02035 
02036   bool Changed = false;
02037   std::vector<SDOperand> MaskVec;
02038   unsigned NumElems = Mask.getNumOperands();
02039   for (unsigned i = 0; i != NumElems; ++i) {
02040     SDOperand Arg = Mask.getOperand(i);
02041     if (Arg.getOpcode() != ISD::UNDEF) {
02042       unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
02043       if (Val > NumElems) {
02044         Arg = DAG.getConstant(NumElems, Arg.getValueType());
02045         Changed = true;
02046       }
02047     }
02048     MaskVec.push_back(Arg);
02049   }
02050 
02051   if (Changed)
02052     Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
02053   return Mask;
02054 }
02055 
02056 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
02057 /// operation of specified width.
02058 static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
02059   MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
02060   MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
02061 
02062   std::vector<SDOperand> MaskVec;
02063   MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
02064   for (unsigned i = 1; i != NumElems; ++i)
02065     MaskVec.push_back(DAG.getConstant(i, BaseVT));
02066   return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02067 }
02068 
02069 /// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
02070 /// of specified width.
02071 static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
02072   MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
02073   MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
02074   std::vector<SDOperand> MaskVec;
02075   for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
02076     MaskVec.push_back(DAG.getConstant(i,            BaseVT));
02077     MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
02078   }
02079   return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02080 }
02081 
02082 /// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
02083 /// of specified width.
02084 static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
02085   MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
02086   MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
02087   unsigned Half = NumElems/2;
02088   std::vector<SDOperand> MaskVec;
02089   for (unsigned i = 0; i != Half; ++i) {
02090     MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
02091     MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
02092   }
02093   return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02094 }
02095 
02096 /// getZeroVector - Returns a vector of specified type with all zero elements.
02097 ///
02098 static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
02099   assert(MVT::isVector(VT) && "Expected a vector type");
02100   unsigned NumElems = getVectorNumElements(VT);
02101   MVT::ValueType EVT = MVT::getVectorBaseType(VT);
02102   bool isFP = MVT::isFloatingPoint(EVT);
02103   SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
02104   std::vector<SDOperand> ZeroVec(NumElems, Zero);
02105   return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec);
02106 }
02107 
02108 /// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
02109 ///
02110 static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
02111   SDOperand V1 = Op.getOperand(0);
02112   SDOperand Mask = Op.getOperand(2);
02113   MVT::ValueType VT = Op.getValueType();
02114   unsigned NumElems = Mask.getNumOperands();
02115   Mask = getUnpacklMask(NumElems, DAG);
02116   while (NumElems != 4) {
02117     V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
02118     NumElems >>= 1;
02119   }
02120   V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
02121 
02122   MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
02123   Mask = getZeroVector(MaskVT, DAG);
02124   SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
02125                                   DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
02126   return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
02127 }
02128 
02129 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
02130 /// constant +0.0.
02131 static inline bool isZeroNode(SDOperand Elt) {
02132   return ((isa<ConstantSDNode>(Elt) &&
02133            cast<ConstantSDNode>(Elt)->getValue() == 0) ||
02134           (isa<ConstantFPSDNode>(Elt) &&
02135            cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
02136 }
02137 
02138 /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
02139 /// vector and zero or undef vector.
02140 static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
02141                                              unsigned NumElems, unsigned Idx,
02142                                              bool isZero, SelectionDAG &DAG) {
02143   SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
02144   MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
02145   MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
02146   SDOperand Zero = DAG.getConstant(0, EVT);
02147   std::vector<SDOperand> MaskVec(NumElems, Zero);
02148   MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
02149   SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02150   return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
02151 }
02152 
02153 /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
02154 ///
02155 static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
02156                                        unsigned NumNonZero, unsigned NumZero,
02157                                        SelectionDAG &DAG) {
02158   if (NumNonZero > 8)
02159     return SDOperand();
02160 
02161   SDOperand V(0, 0);
02162   bool First = true;
02163   for (unsigned i = 0; i < 16; ++i) {
02164     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
02165     if (ThisIsNonZero && First) {
02166       if (NumZero)
02167         V = getZeroVector(MVT::v8i16, DAG);
02168       else
02169         V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
02170       First = false;
02171     }
02172 
02173     if ((i & 1) != 0) {
02174       SDOperand ThisElt(0, 0), LastElt(0, 0);
02175       bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
02176       if (LastIsNonZero) {
02177         LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
02178       }
02179       if (ThisIsNonZero) {
02180         ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
02181         ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
02182                               ThisElt, DAG.getConstant(8, MVT::i8));
02183         if (LastIsNonZero)
02184           ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
02185       } else
02186         ThisElt = LastElt;
02187 
02188       if (ThisElt.Val)
02189         V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
02190                         DAG.getConstant(i/2, MVT::i32));
02191     }
02192   }
02193 
02194   return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
02195 }
02196 
02197 /// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
02198 ///
02199 static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
02200                                        unsigned NumNonZero, unsigned NumZero,
02201                                        SelectionDAG &DAG) {
02202   if (NumNonZero > 4)
02203     return SDOperand();
02204 
02205   SDOperand V(0, 0);
02206   bool First = true;
02207   for (unsigned i = 0; i < 8; ++i) {
02208     bool isNonZero = (NonZeros & (1 << i)) != 0;
02209     if (isNonZero) {
02210       if (First) {
02211         if (NumZero)
02212           V = getZeroVector(MVT::v8i16, DAG);
02213         else
02214           V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
02215         First = false;
02216       }
02217       V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
02218                       DAG.getConstant(i, MVT::i32));
02219     }
02220   }
02221 
02222   return V;
02223 }
02224 
02225 SDOperand
02226 X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
02227   // All zero's are handled with pxor.
02228   if (ISD::isBuildVectorAllZeros(Op.Val))
02229     return Op;
02230 
02231   // All one's are handled with pcmpeqd.
02232   if (ISD::isBuildVectorAllOnes(Op.Val))
02233     return Op;
02234 
02235   MVT::ValueType VT = Op.getValueType();
02236   MVT::ValueType EVT = MVT::getVectorBaseType(VT);
02237   unsigned EVTBits = MVT::getSizeInBits(EVT);
02238 
02239   unsigned NumElems = Op.getNumOperands();
02240   unsigned NumZero  = 0;
02241   unsigned NumNonZero = 0;
02242   unsigned NonZeros = 0;
02243   std::set<SDOperand> Values;
02244   for (unsigned i = 0; i < NumElems; ++i) {
02245     SDOperand Elt = Op.getOperand(i);
02246     if (Elt.getOpcode() != ISD::UNDEF) {
02247       Values.insert(Elt);
02248       if (isZeroNode(Elt))
02249         NumZero++;
02250       else {
02251         NonZeros |= (1 << i);
02252         NumNonZero++;
02253       }
02254     }
02255   }
02256 
02257   if (NumNonZero == 0)
02258     // Must be a mix of zero and undef. Return a zero vector.
02259     return getZeroVector(VT, DAG);
02260 
02261   // Splat is obviously ok. Let legalizer expand it to a shuffle.
02262   if (Values.size() == 1)
02263     return SDOperand();
02264 
02265   // Special case for single non-zero element.
02266   if (NumNonZero == 1) {
02267     unsigned Idx = CountTrailingZeros_32(NonZeros);
02268     SDOperand Item = Op.getOperand(Idx);
02269     Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
02270     if (Idx == 0)
02271       // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
02272       return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
02273                                          NumZero > 0, DAG);
02274 
02275     if (EVTBits == 32) {
02276       // Turn it into a shuffle of zero and zero-extended scalar to vector.
02277       Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
02278                                          DAG);
02279       MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
02280       MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
02281       std::vector<SDOperand> MaskVec;
02282       for (unsigned i = 0; i < NumElems; i++)
02283         MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
02284       SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02285       return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
02286                          DAG.getNode(ISD::UNDEF, VT), Mask);
02287     }
02288   }
02289 
02290   // Let legalizer expand 2-widde build_vector's.
02291   if (EVTBits == 64)
02292     return SDOperand();
02293 
02294   // If element VT is < 32 bits, convert it to inserts into a zero vector.
02295   if (EVTBits == 8) {
02296     SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG);
02297     if (V.Val) return V;
02298   }
02299 
02300   if (EVTBits == 16) {
02301     SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG);
02302     if (V.Val) return V;
02303   }
02304 
02305   // If element VT is == 32 bits, turn it into a number of shuffles.
02306   std::vector<SDOperand> V(NumElems);
02307   if (NumElems == 4 && NumZero > 0) {
02308     for (unsigned i = 0; i < 4; ++i) {
02309       bool isZero = !(NonZeros & (1 << i));
02310       if (isZero)
02311         V[i] = getZeroVector(VT, DAG);
02312       else
02313         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
02314     }
02315 
02316     for (unsigned i = 0; i < 2; ++i) {
02317       switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
02318         default: break;
02319         case 0:
02320           V[i] = V[i*2];  // Must be a zero vector.
02321           break;
02322         case 1:
02323           V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
02324                              getMOVLMask(NumElems, DAG));
02325           break;
02326         case 2:
02327           V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
02328                              getMOVLMask(NumElems, DAG));
02329           break;
02330         case 3:
02331           V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
02332                              getUnpacklMask(NumElems, DAG));
02333           break;
02334       }
02335     }
02336 
02337     // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
02338     // clears the upper bits. 
02339     // FIXME: we can do the same for v4f32 case when we know both parts of
02340     // the lower half come from scalar_to_vector (loadf32). We should do
02341     // that in post legalizer dag combiner with target specific hooks.
02342     if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
02343       return V[0];
02344     MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
02345     MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
02346     std::vector<SDOperand> MaskVec;
02347     bool Reverse = (NonZeros & 0x3) == 2;
02348     for (unsigned i = 0; i < 2; ++i)
02349       if (Reverse)
02350         MaskVec.push_back(DAG.getConstant(1-i, EVT));
02351       else
02352         MaskVec.push_back(DAG.getConstant(i, EVT));
02353     Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
02354     for (unsigned i = 0; i < 2; ++i)
02355       if (Reverse)
02356         MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
02357       else
02358         MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
02359     SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02360     return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
02361   }
02362 
02363   if (Values.size() > 2) {
02364     // Expand into a number of unpckl*.
02365     // e.g. for v4f32
02366     //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
02367     //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
02368     //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
02369     SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
02370     for (unsigned i = 0; i < NumElems; ++i)
02371       V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
02372     NumElems >>= 1;
02373     while (NumElems != 0) {
02374       for (unsigned i = 0; i < NumElems; ++i)
02375         V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
02376                            UnpckMask);
02377       NumElems >>= 1;
02378     }
02379     return V[0];
02380   }
02381 
02382   return SDOperand();
02383 }
02384 
02385 SDOperand
02386 X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
02387   SDOperand V1 = Op.getOperand(0);
02388   SDOperand V2 = Op.getOperand(1);
02389   SDOperand PermMask = Op.getOperand(2);
02390   MVT::ValueType VT = Op.getValueType();
02391   unsigned NumElems = PermMask.getNumOperands();
02392   bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
02393   bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
02394 
02395   if (isSplatMask(PermMask.Val)) {
02396     if (NumElems <= 4) return Op;
02397     // Promote it to a v4i32 splat.
02398     return PromoteSplat(Op, DAG);
02399   }
02400 
02401   if (X86::isMOVLMask(PermMask.Val))
02402     return (V1IsUndef) ? V2 : Op;
02403       
02404   if (X86::isMOVSHDUPMask(PermMask.Val) ||
02405       X86::isMOVSLDUPMask(PermMask.Val) ||
02406       X86::isMOVHLPSMask(PermMask.Val) ||
02407       X86::isMOVHPMask(PermMask.Val) ||
02408       X86::isMOVLPMask(PermMask.Val))
02409     return Op;
02410 
02411   if (ShouldXformToMOVHLPS(PermMask.Val) ||
02412       ShouldXformToMOVLP(V1.Val, PermMask.Val))
02413     return CommuteVectorShuffle(Op, DAG);
02414 
02415   bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF;
02416   bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF;
02417   if (V1IsSplat && !V2IsSplat) {
02418     Op = CommuteVectorShuffle(Op, DAG);
02419     V1 = Op.getOperand(0);
02420     V2 = Op.getOperand(1);
02421     PermMask = Op.getOperand(2);
02422     V2IsSplat = true;
02423   }
02424 
02425   if (isCommutedMOVL(PermMask.Val, V2IsSplat)) {
02426     if (V2IsUndef) return V1;
02427     Op = CommuteVectorShuffle(Op, DAG);
02428     V1 = Op.getOperand(0);
02429     V2 = Op.getOperand(1);
02430     PermMask = Op.getOperand(2);
02431     if (V2IsSplat) {
02432       // V2 is a splat, so the mask may be malformed. That is, it may point
02433       // to any V2 element. The instruction selectior won't like this. Get
02434       // a corrected mask and commute to form a proper MOVS{S|D}.
02435       SDOperand NewMask = getMOVLMask(NumElems, DAG);
02436       if (NewMask.Val != PermMask.Val)
02437         Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
02438     }
02439     return Op;
02440   }
02441 
02442   if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
02443       X86::isUNPCKLMask(PermMask.Val) ||
02444       X86::isUNPCKHMask(PermMask.Val))
02445     return Op;
02446 
02447   if (V2IsSplat) {
02448     // Normalize mask so all entries that point to V2 points to its first
02449     // element then try to match unpck{h|l} again. If match, return a 
02450     // new vector_shuffle with the corrected mask.
02451     SDOperand NewMask = NormalizeMask(PermMask, DAG);
02452     if (NewMask.Val != PermMask.Val) {
02453       if (X86::isUNPCKLMask(PermMask.Val, true)) {
02454         SDOperand NewMask = getUnpacklMask(NumElems, DAG);
02455         return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
02456       } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
02457         SDOperand NewMask = getUnpackhMask(NumElems, DAG);
02458         return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
02459       }
02460     }
02461   }
02462 
02463   // Normalize the node to match x86 shuffle ops if needed
02464   if (V2.getOpcode() != ISD::UNDEF)
02465     if (isCommutedSHUFP(PermMask.Val)) {
02466       Op = CommuteVectorShuffle(Op, DAG);
02467       V1 = Op.getOperand(0);
02468       V2 = Op.getOperand(1);
02469       PermMask = Op.getOperand(2);
02470     }
02471 
02472   // If VT is integer, try PSHUF* first, then SHUFP*.
02473   if (MVT::isInteger(VT)) {
02474     if (X86::isPSHUFDMask(PermMask.Val) ||
02475         X86::isPSHUFHWMask(PermMask.Val) ||
02476         X86::isPSHUFLWMask(PermMask.Val)) {
02477       if (V2.getOpcode() != ISD::UNDEF)
02478         return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
02479                            DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
02480       return Op;
02481     }
02482 
02483     if (X86::isSHUFPMask(PermMask.Val))
02484       return Op;
02485 
02486     // Handle v8i16 shuffle high / low shuffle node pair.
02487     if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
02488       MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
02489       MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
02490       std::vector<SDOperand> MaskVec;
02491       for (unsigned i = 0; i != 4; ++i)
02492         MaskVec.push_back(PermMask.getOperand(i));
02493       for (unsigned i = 4; i != 8; ++i)
02494         MaskVec.push_back(DAG.getConstant(i, BaseVT));
02495       SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02496       V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
02497       MaskVec.clear();
02498       for (unsigned i = 0; i != 4; ++i)
02499         MaskVec.push_back(DAG.getConstant(i, BaseVT));
02500       for (unsigned i = 4; i != 8; ++i)
02501         MaskVec.push_back(PermMask.getOperand(i));
02502       Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02503       return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
02504     }
02505   } else {
02506     // Floating point cases in the other order.
02507     if (X86::isSHUFPMask(PermMask.Val))
02508       return Op;
02509     if (X86::isPSHUFDMask(PermMask.Val) ||
02510         X86::isPSHUFHWMask(PermMask.Val) ||
02511         X86::isPSHUFLWMask(PermMask.Val)) {
02512       if (V2.getOpcode() != ISD::UNDEF)
02513         return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
02514                            DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
02515       return Op;
02516     }
02517   }
02518 
02519   if (NumElems == 4) {
02520     MVT::ValueType MaskVT = PermMask.getValueType();
02521     MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
02522     std::vector<std::pair<int, int> > Locs;
02523     Locs.reserve(NumElems);
02524     std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
02525     std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
02526     unsigned NumHi = 0;
02527     unsigned NumLo = 0;
02528     // If no more than two elements come from either vector. This can be
02529     // implemented with two shuffles. First shuffle gather the elements.
02530     // The second shuffle, which takes the first shuffle as both of its
02531     // vector operands, put the elements into the right order.
02532     for (unsigned i = 0; i != NumElems; ++i) {
02533       SDOperand Elt = PermMask.getOperand(i);
02534       if (Elt.getOpcode() == ISD::UNDEF) {
02535         Locs[i] = std::make_pair(-1, -1);
02536       } else {
02537         unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
02538         if (Val < NumElems) {
02539           Locs[i] = std::make_pair(0, NumLo);
02540           Mask1[NumLo] = Elt;
02541           NumLo++;
02542         } else {
02543           Locs[i] = std::make_pair(1, NumHi);
02544           if (2+NumHi < NumElems)
02545             Mask1[2+NumHi] = Elt;
02546           NumHi++;
02547         }
02548       }
02549     }
02550     if (NumLo <= 2 && NumHi <= 2) {
02551       V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
02552                        DAG.getNode(ISD::BUILD_VECTOR, MaskVT, Mask1));
02553       for (unsigned i = 0; i != NumElems; ++i) {
02554         if (Locs[i].first == -1)
02555           continue;
02556         else {
02557           unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
02558           Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
02559           Mask2[i] = DAG.getConstant(Idx, MaskEVT);
02560         }
02561       }
02562 
02563       return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
02564                          DAG.getNode(ISD::BUILD_VECTOR, MaskVT, Mask2));
02565     }
02566 
02567     // Break it into (shuffle shuffle_hi, shuffle_lo).
02568     Locs.clear();
02569     std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
02570     std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
02571     std::vector<SDOperand> *MaskPtr = &LoMask;
02572     unsigned MaskIdx = 0;
02573     unsigned LoIdx = 0;
02574     unsigned HiIdx = NumElems/2;
02575     for (unsigned i = 0; i != NumElems; ++i) {
02576       if (i == NumElems/2) {
02577         MaskPtr = &HiMask;
02578         MaskIdx = 1;
02579         LoIdx = 0;
02580         HiIdx = NumElems/2;
02581       }
02582       SDOperand Elt = PermMask.getOperand(i);
02583       if (Elt.getOpcode() == ISD::UNDEF) {
02584         Locs[i] = std::make_pair(-1, -1);
02585       } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
02586         Locs[i] = std::make_pair(MaskIdx, LoIdx);
02587         (*MaskPtr)[LoIdx] = Elt;
02588         LoIdx++;
02589       } else {
02590         Locs[i] = std::make_pair(MaskIdx, HiIdx);
02591         (*MaskPtr)[HiIdx] = Elt;
02592         HiIdx++;
02593       }
02594     }
02595 
02596     SDOperand LoShuffle =
02597       DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
02598                   DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask));
02599     SDOperand HiShuffle = 
02600       DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
02601                   DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask));
02602     std::vector<SDOperand> MaskOps;
02603     for (unsigned i = 0; i != NumElems; ++i) {
02604       if (Locs[i].first == -1) {
02605         MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
02606       } else {
02607         unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
02608         MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
02609       }
02610     }
02611     return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
02612                        DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps));
02613   }
02614 
02615   return SDOperand();
02616 }
02617 
02618 SDOperand
02619 X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
02620   if (!isa<ConstantSDNode>(Op.getOperand(1)))
02621     return SDOperand();
02622 
02623   MVT::ValueType VT = Op.getValueType();
02624   // TODO: handle v16i8.
02625   if (MVT::getSizeInBits(VT) == 16) {
02626     // Transform it so it match pextrw which produces a 32-bit result.
02627     MVT::ValueType EVT = (MVT::ValueType)(VT+1);
02628     SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
02629                                     Op.getOperand(0), Op.getOperand(1));
02630     SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
02631                                     DAG.getValueType(VT));
02632     return DAG.getNode(ISD::TRUNCATE, VT, Assert);
02633   } else if (MVT::getSizeInBits(VT) == 32) {
02634     SDOperand Vec = Op.getOperand(0);
02635     unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
02636     if (Idx == 0)
02637       return Op;
02638     // SHUFPS the element to the lowest double word, then movss.
02639     MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
02640     std::vector<SDOperand> IdxVec;
02641     IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
02642     IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
02643     IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
02644     IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
02645     SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
02646     Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
02647                       Vec, Vec, Mask);
02648     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
02649                        DAG.getConstant(0, getPointerTy()));
02650   } else if (MVT::getSizeInBits(VT) == 64) {
02651     SDOperand Vec = Op.getOperand(0);
02652     unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
02653     if (Idx == 0)
02654       return Op;
02655 
02656     // UNPCKHPD the element to the lowest double word, then movsd.
02657     // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
02658     // to a f64mem, the whole operation is folded into a single MOVHPDmr.
02659     MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
02660     std::vector<SDOperand> IdxVec;
02661     IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
02662     IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
02663     SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
02664     Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
02665                       Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
02666     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
02667                        DAG.getConstant(0, getPointerTy()));
02668   }
02669 
02670   return SDOperand();
02671 }
02672 
02673 SDOperand
02674 X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
02675   // Transform it so it match pinsrw which expects a 16-bit value in a GR32
02676   // as its second argument.
02677   MVT::ValueType VT = Op.getValueType();
02678   MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
02679   SDOperand N0 = Op.getOperand(0);
02680   SDOperand N1 = Op.getOperand(1);
02681   SDOperand N2 = Op.getOperand(2);
02682   if (MVT::getSizeInBits(BaseVT) == 16) {
02683     if (N1.getValueType() != MVT::i32)
02684       N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
02685     if (N2.getValueType() != MVT::i32)
02686       N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
02687     return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
02688   } else if (MVT::getSizeInBits(BaseVT) == 32) {
02689     unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
02690     if (Idx == 0) {
02691       // Use a movss.
02692       N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
02693       MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
02694       MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
02695       std::vector<SDOperand> MaskVec;
02696       MaskVec.push_back(DAG.getConstant(4, BaseVT));
02697       for (unsigned i = 1; i <= 3; ++i)
02698         MaskVec.push_back(DAG.getConstant(i, BaseVT));
02699       return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
02700                          DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec));
02701     } else {
02702       // Use two pinsrw instructions to insert a 32 bit value.
02703       Idx <<= 1;
02704       if (MVT::isFloatingPoint(N1.getValueType())) {
02705         if (N1.getOpcode() == ISD::LOAD) {
02706           // Just load directly from f32mem to GR32.
02707           N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1),
02708                            N1.getOperand(2));
02709         } else {
02710           N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
02711           N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
02712           N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
02713                            DAG.getConstant(0, getPointerTy()));
02714         }
02715       }
02716       N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
02717       N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
02718                        DAG.getConstant(Idx, getPointerTy()));
02719       N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
02720       N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
02721                        DAG.getConstant(Idx+1, getPointerTy()));
02722       return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
02723     }
02724   }
02725 
02726   return SDOperand();
02727 }
02728 
02729 SDOperand
02730 X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
02731   SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
02732   return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
02733 }
02734 
02735 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 
02736 // their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
02737 // one of the above mentioned nodes. It has to be wrapped because otherwise
02738 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
02739 // be used to form addressing mode. These wrapped nodes will be selected
02740 // into MOV32ri.
02741 SDOperand
02742 X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
02743   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
02744   SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
02745                             DAG.getTargetConstantPool(CP->get(), getPointerTy(),
02746                                                       CP->getAlignment()));
02747   if (Subtarget->isTargetDarwin()) {
02748     // With PIC, the address is actually $g + Offset.
02749     if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
02750       Result = DAG.getNode(ISD::ADD, getPointerTy(),
02751                     DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
02752   }
02753 
02754   return Result;
02755 }
02756 
02757 SDOperand
02758 X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
02759   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02760   SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
02761                                  DAG.getTargetGlobalAddress(GV,
02762                                                             getPointerTy()));
02763   if (Subtarget->isTargetDarwin()) {
02764     // With PIC, the address is actually $g + Offset.
02765     if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
02766       Result = DAG.getNode(ISD::ADD, getPointerTy(),
02767                            DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
02768                            Result);
02769 
02770     // For Darwin, external and weak symbols are indirect, so we want to load
02771     // the value at address GV, not the value of GV itself. This means that
02772     // the GlobalAddress must be in the base or index register of the address,
02773     // not the GV offset field.
02774     if (getTargetMachine().getRelocationModel() != Reloc::Static &&
02775         DarwinGVRequiresExtraLoad(GV))
02776       Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(),
02777                            Result, DAG.getSrcValue(NULL));
02778   }
02779 
02780   return Result;
02781 }
02782 
02783 SDOperand
02784 X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
02785   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
02786   SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
02787                                  DAG.getTargetExternalSymbol(Sym,
02788                                                              getPointerTy()));
02789   if (Subtarget->isTargetDarwin()) {
02790     // With PIC, the address is actually $g + Offset.
02791     if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
02792       Result = DAG.getNode(ISD::ADD, getPointerTy(),
02793                            DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
02794                            Result);
02795   }
02796 
02797   return Result;
02798 }
02799 
02800 SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
02801     assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
02802            "Not an i64 shift!");
02803     bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
02804     SDOperand ShOpLo = Op.getOperand(0);
02805     SDOperand ShOpHi = Op.getOperand(1);
02806     SDOperand ShAmt  = Op.getOperand(2);
02807     SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi,
02808                                          DAG.getConstant(31, MVT::i8))
02809                            : DAG.getConstant(0, MVT::i32);
02810 
02811     SDOperand Tmp2, Tmp3;
02812     if (Op.getOpcode() == ISD::SHL_PARTS) {
02813       Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
02814       Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
02815     } else {
02816       Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
02817       Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
02818     }
02819 
02820     SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag,
02821                                    ShAmt, DAG.getConstant(32, MVT::i8));
02822 
02823     SDOperand Hi, Lo;
02824     SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
02825 
02826     std::vector<MVT::ValueType> Tys;
02827     Tys.push_back(MVT::i32);
02828     Tys.push_back(MVT::Flag);
02829     std::vector<SDOperand> Ops;
02830     if (Op.getOpcode() == ISD::SHL_PARTS) {
02831       Ops.push_back(Tmp2);
02832       Ops.push_back(Tmp3);
02833       Ops.push_back(CC);
02834       Ops.push_back(InFlag);
02835       Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
02836       InFlag = Hi.getValue(1);
02837 
02838       Ops.clear();
02839       Ops.push_back(Tmp3);
02840       Ops.push_back(Tmp1);
02841       Ops.push_back(CC);
02842       Ops.push_back(InFlag);
02843       Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
02844     } else {
02845       Ops.push_back(Tmp2);
02846       Ops.push_back(Tmp3);
02847       Ops.push_back(CC);
02848       Ops.push_back(InFlag);
02849       Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
02850       InFlag = Lo.getValue(1);
02851 
02852       Ops.clear();
02853       Ops.push_back(Tmp3);
02854       Ops.push_back(Tmp1);
02855       Ops.push_back(CC);
02856       Ops.push_back(InFlag);
02857       Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
02858     }
02859 
02860     Tys.clear();
02861     Tys.push_back(MVT::i32);
02862     Tys.push_back(MVT::i32);
02863     Ops.clear();
02864     Ops.push_back(Lo);
02865     Ops.push_back(Hi);
02866     return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
02867 }
02868 
02869 SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
02870   assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
02871          Op.getOperand(0).getValueType() >= MVT::i16 &&
02872          "Unknown SINT_TO_FP to lower!");
02873 
02874   SDOperand Result;
02875   MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
02876   unsigned Size = MVT::getSizeInBits(SrcVT)/8;
02877   MachineFunction &MF = DAG.getMachineFunction();
02878   int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
02879   SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
02880   SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
02881                                 DAG.getEntryNode(), Op.getOperand(0),
02882                                 StackSlot, DAG.getSrcValue(NULL));
02883 
02884   // Build the FILD
02885   std::vector<MVT::ValueType> Tys;
02886   Tys.push_back(MVT::f64);
02887   Tys.push_back(MVT::Other);
02888   if (X86ScalarSSE) Tys.push_back(MVT::Flag);
02889   std::vector<SDOperand> Ops;
02890   Ops.push_back(Chain);
02891   Ops.push_back(StackSlot);
02892   Ops.push_back(DAG.getValueType(SrcVT));
02893   Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
02894                        Tys, Ops);
02895 
02896   if (X86ScalarSSE) {
02897     Chain = Result.getValue(1);
02898     SDOperand InFlag = Result.getValue(2);
02899 
02900     // FIXME: Currently the FST is flagged to the FILD_FLAG. This
02901     // shouldn't be necessary except that RFP cannot be live across
02902     // multiple blocks. When stackifier is fixed, they can be uncoupled.
02903     MachineFunction &MF = DAG.getMachineFunction();
02904     int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
02905     SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
02906     std::vector<MVT::ValueType> Tys;
02907     Tys.push_back(MVT::Other);
02908     std::vector<SDOperand> Ops;
02909     Ops.push_back(Chain);
02910     Ops.push_back(Result);
02911     Ops.push_back(StackSlot);
02912     Ops.push_back(DAG.getValueType(Op.getValueType()));
02913     Ops.push_back(InFlag);
02914     Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
02915     Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
02916                          DAG.getSrcValue(NULL));
02917   }
02918 
02919   return Result;
02920 }
02921 
02922 SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
02923   assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
02924          "Unknown FP_TO_SINT to lower!");
02925   // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
02926   // stack slot.
02927   MachineFunction &MF = DAG.getMachineFunction();
02928   unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
02929   int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
02930   SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
02931 
02932   unsigned Opc;
02933   switch (Op.getValueType()) {
02934     default: assert(0 && "Invalid FP_TO_SINT to lower!");
02935     case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
02936     case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
02937     case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
02938   }
02939 
02940   SDOperand Chain = DAG.getEntryNode();
02941   SDOperand Value = Op.getOperand(0);
02942   if (X86ScalarSSE) {
02943     assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
02944     Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 
02945                         DAG.getSrcValue(0));
02946     std::vector<MVT::ValueType> Tys;
02947     Tys.push_back(MVT::f64);
02948     Tys.push_back(MVT::Other);
02949     std::vector<SDOperand> Ops;
02950     Ops.push_back(Chain);
02951     Ops.push_back(StackSlot);
02952     Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
02953     Value = DAG.getNode(X86ISD::FLD, Tys, Ops);
02954     Chain = Value.getValue(1);
02955     SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
02956     StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
02957   }
02958 
02959   // Build the FP_TO_INT*_IN_MEM
02960   std::vector<SDOperand> Ops;
02961   Ops.push_back(Chain);
02962   Ops.push_back(Value);
02963   Ops.push_back(StackSlot);
02964   SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops);
02965 
02966   // Load the result.
02967   return DAG.getLoad(Op.getValueType(), FIST, StackSlot,
02968                      DAG.getSrcValue(NULL));
02969 }
02970 
02971 SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
02972   MVT::ValueType VT = Op.getValueType();
02973   const Type *OpNTy =  MVT::getTypeForValueType(VT);
02974   std::vector<Constant*> CV;
02975   if (VT == MVT::f64) {
02976     CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
02977     CV.push_back(ConstantFP::get(OpNTy, 0.0));
02978   } else {
02979     CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
02980     CV.push_back(ConstantFP::get(OpNTy, 0.0));
02981     CV.push_back(ConstantFP::get(OpNTy, 0.0));
02982     CV.push_back(ConstantFP::get(OpNTy, 0.0));
02983   }
02984   Constant *CS = ConstantStruct::get(CV);
02985   SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
02986   SDOperand Mask 
02987     = DAG.getNode(X86ISD::LOAD_PACK,
02988                   VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
02989   return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
02990 }
02991 
02992 SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
02993   MVT::ValueType VT = Op.getValueType();
02994   const Type *OpNTy =  MVT::getTypeForValueType(VT);
02995   std::vector<Constant*> CV;
02996   if (VT == MVT::f64) {
02997     CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
02998     CV.push_back(ConstantFP::get(OpNTy, 0.0));
02999   } else {
03000     CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
03001     CV.push_back(ConstantFP::get(OpNTy, 0.0));
03002     CV.push_back(ConstantFP::get(OpNTy, 0.0));
03003     CV.push_back(ConstantFP::get(OpNTy, 0.0));
03004   }
03005   Constant *CS = ConstantStruct::get(CV);
03006   SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
03007   SDOperand Mask  = DAG.getNode(X86ISD::LOAD_PACK,
03008                           VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
03009   return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
03010 }
03011 
03012 SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
03013   assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
03014   SDOperand Cond;
03015   SDOperand CC = Op.getOperand(2);
03016   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
03017   bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
03018   bool Flip;
03019   unsigned X86CC;
03020   if (translateX86CC(CC, isFP, X86CC, Flip)) {
03021     if (Flip)
03022       Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
03023                          Op.getOperand(1), Op.getOperand(0));
03024     else
03025       Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
03026                          Op.getOperand(0), Op.getOperand(1));
03027     return DAG.getNode(X86ISD::SETCC, MVT::i8, 
03028                        DAG.getConstant(X86CC, MVT::i8), Cond);
03029   } else {
03030     assert(isFP && "Illegal integer SetCC!");
03031 
03032     Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
03033                        Op.getOperand(0), Op.getOperand(1));
03034     std::vector<MVT::ValueType> Tys;
03035     std::vector<SDOperand> Ops;
03036     switch (SetCCOpcode) {
03037       default: assert(false && "Illegal floating point SetCC!");
03038       case ISD::SETOEQ: {  // !PF & ZF
03039         Tys.push_back(MVT::i8);
03040         Tys.push_back(MVT::Flag);
03041         Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8));
03042         Ops.push_back(Cond);
03043         SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
03044         SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
03045                                      DAG.getConstant(X86ISD::COND_E, MVT::i8),
03046                                      Tmp1.getValue(1));
03047         return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
03048       }
03049       case ISD::SETUNE: {  // PF | !ZF
03050         Tys.push_back(MVT::i8);
03051         Tys.push_back(MVT::Flag);
03052         Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8));
03053         Ops.push_back(Cond);
03054         SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
03055         SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
03056                                      DAG.getConstant(X86ISD::COND_NE, MVT::i8),
03057                                      Tmp1.getValue(1));
03058         return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
03059       }
03060     }
03061   }
03062 }
03063 
03064 SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
03065   MVT::ValueType VT = Op.getValueType();
03066   bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE;
03067   bool addTest   = false;
03068   SDOperand Op0 = Op.getOperand(0);
03069   SDOperand Cond, CC;
03070   if (Op0.getOpcode() == ISD::SETCC)
03071     Op0 = LowerOperation(Op0, DAG);
03072 
03073   if (Op0.getOpcode() == X86ISD::SETCC) {
03074     // If condition flag is set by a X86ISD::CMP, then make a copy of it
03075     // (since flag operand cannot be shared). If the X86ISD::SETCC does not
03076     // have another use it will be eliminated.
03077     // If the X86ISD::SETCC has more than one use, then it's probably better
03078     // to use a test instead of duplicating the X86ISD::CMP (for register
03079     // pressure reason).
03080     unsigned CmpOpc = Op0.getOperand(1).getOpcode();
03081     if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
03082         CmpOpc == X86ISD::UCOMI) {
03083       if (!Op0.hasOneUse()) {
03084         std::vector<MVT::ValueType> Tys;
03085         for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i)
03086           Tys.push_back(Op0.Val->getValueType(i));
03087         std::vector<SDOperand> Ops;
03088         for (unsigned i = 0; i < Op0.getNumOperands(); ++i)
03089           Ops.push_back(Op0.getOperand(i));
03090         Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
03091       }
03092 
03093       CC   = Op0.getOperand(0);
03094       Cond = Op0.getOperand(1);
03095       // Make a copy as flag result cannot be used by more than one.
03096       Cond = DAG.getNode(CmpOpc, MVT::Flag,
03097                          Cond.getOperand(0), Cond.getOperand(1));
03098       addTest =
03099         isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
03100     } else
03101       addTest = true;
03102   } else
03103     addTest = true;
03104 
03105   if (addTest) {
03106     CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
03107     Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0);
03108   }
03109 
03110   std::vector<MVT::ValueType> Tys;
03111   Tys.push_back(Op.getValueType());
03112   Tys.push_back(MVT::Flag);
03113   std::vector<SDOperand> Ops;
03114   // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
03115   // condition is true.
03116   Ops.push_back(Op.getOperand(2));
03117   Ops.push_back(Op.getOperand(1));
03118   Ops.push_back(CC);
03119   Ops.push_back(Cond);
03120   return DAG.getNode(X86ISD::CMOV, Tys, Ops);
03121 }
03122 
03123 SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
03124   bool addTest = false;
03125   SDOperand Cond  = Op.getOperand(1);
03126   SDOperand Dest  = Op.getOperand(2);
03127   SDOperand CC;
03128   if (Cond.getOpcode() == ISD::SETCC)
03129     Cond = LowerOperation(Cond, DAG);
03130 
03131   if (Cond.getOpcode() == X86ISD::SETCC) {
03132     // If condition flag is set by a X86ISD::CMP, then make a copy of it
03133     // (since flag operand cannot be shared). If the X86ISD::SETCC does not
03134     // have another use it will be eliminated.
03135     // If the X86ISD::SETCC has more than one use, then it's probably better
03136     // to use a test instead of duplicating the X86ISD::CMP (for register
03137     // pressure reason).
03138     unsigned CmpOpc = Cond.getOperand(1).getOpcode();
03139     if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
03140         CmpOpc == X86ISD::UCOMI) {
03141       if (!Cond.hasOneUse()) {
03142         std::vector<MVT::ValueType> Tys;
03143         for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i)
03144           Tys.push_back(Cond.Val->getValueType(i));
03145         std::vector<SDOperand> Ops;
03146         for (unsigned i = 0; i < Cond.getNumOperands(); ++i)
03147           Ops.push_back(Cond.getOperand(i));
03148         Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops);
03149       }
03150 
03151       CC   = Cond.getOperand(0);
03152       Cond = Cond.getOperand(1);
03153       // Make a copy as flag result cannot be used by more than one.
03154       Cond = DAG.getNode(CmpOpc, MVT::Flag,
03155                          Cond.getOperand(0), Cond.getOperand(1));
03156     } else
03157       addTest = true;
03158   } else
03159     addTest = true;
03160 
03161   if (addTest) {
03162     CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
03163     Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond);
03164   }
03165   return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
03166                      Op.getOperand(0), Op.getOperand(2), CC, Cond);
03167 }
03168 
03169 SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
03170   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
03171   SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
03172                                  DAG.getTargetJumpTable(JT->getIndex(),
03173                                                         getPointerTy()));
03174   if (Subtarget->isTargetDarwin()) {
03175     // With PIC, the address is actually $g + Offset.
03176     if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
03177       Result = DAG.getNode(ISD::ADD, getPointerTy(),
03178                            DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
03179                            Result);    
03180   }
03181 
03182   return Result;
03183 }
03184 
03185 SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
03186   unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
03187   if (CallingConv == CallingConv::Fast && EnableFastCC)
03188     return LowerFastCCCallTo(Op, DAG);
03189   else
03190     return LowerCCCCallTo(Op, DAG);
03191 }
03192 
03193 SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
03194   SDOperand Copy;
03195     
03196   switch(Op.getNumOperands()) {
03197     default:
03198       assert(0 && "Do not know how to return this many arguments!");
03199       abort();
03200     case 1:    // ret void.
03201       return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
03202                         DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
03203     case 3: {
03204       MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
03205       
03206       if (MVT::isVector(ArgVT)) {
03207         // Integer or FP vector result -> XMM0.
03208         if (DAG.getMachineFunction().liveout_empty())
03209           DAG.getMachineFunction().addLiveOut(X86::XMM0);
03210         Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
03211                                 SDOperand());
03212       } else if (MVT::isInteger(ArgVT)) {
03213         // Integer result -> EAX
03214         if (DAG.getMachineFunction().liveout_empty())
03215           DAG.getMachineFunction().addLiveOut(X86::EAX);
03216 
03217         Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1),
03218                                 SDOperand());
03219       } else if (!X86ScalarSSE) {
03220         // FP return with fp-stack value.
03221         if (DAG.getMachineFunction().liveout_empty())
03222           DAG.getMachineFunction().addLiveOut(X86::ST0);
03223 
03224         std::vector<MVT::ValueType> Tys;
03225         Tys.push_back(MVT::Other);
03226         Tys.push_back(MVT::Flag);
03227         std::vector<SDOperand> Ops;
03228         Ops.push_back(Op.getOperand(0));
03229         Ops.push_back(Op.getOperand(1));
03230         Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
03231       } else {
03232         // FP return with ScalarSSE (return on fp-stack).
03233         if (DAG.getMachineFunction().liveout_empty())
03234           DAG.getMachineFunction().addLiveOut(X86::ST0);
03235 
03236         SDOperand MemLoc;
03237         SDOperand Chain = Op.getOperand(0);
03238         SDOperand Value = Op.getOperand(1);
03239 
03240         if (Value.getOpcode() == ISD::LOAD &&
03241             (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
03242           Chain  = Value.getOperand(0);
03243           MemLoc = Value.getOperand(1);
03244         } else {
03245           // Spill the value to memory and reload it into top of stack.
03246           unsigned Size = MVT::getSizeInBits(ArgVT)/8;
03247           MachineFunction &MF = DAG.getMachineFunction();
03248           int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
03249           MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
03250           Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 
03251                               Value, MemLoc, DAG.getSrcValue(0));
03252         }
03253         std::vector<MVT::ValueType> Tys;
03254         Tys.push_back(MVT::f64);
03255         Tys.push_back(MVT::Other);
03256         std::vector<SDOperand> Ops;
03257         Ops.push_back(Chain);
03258         Ops.push_back(MemLoc);
03259         Ops.push_back(DAG.getValueType(ArgVT));
03260         Copy = DAG.getNode(X86ISD::FLD, Tys, Ops);
03261         Tys.clear();
03262         Tys.push_back(MVT::Other);
03263         Tys.push_back(MVT::Flag);
03264         Ops.clear();
03265         Ops.push_back(Copy.getValue(1));
03266         Ops.push_back(Copy);
03267         Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
03268       }
03269       break;
03270     }
03271     case 5:
03272       if (DAG.getMachineFunction().liveout_empty()) {
03273         DAG.getMachineFunction().addLiveOut(X86::EAX);
03274         DAG.getMachineFunction().addLiveOut(X86::EDX);
03275       }
03276 
03277       Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(3), 
03278                               SDOperand());
03279       Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1));
03280       break;
03281   }
03282   return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
03283                    Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
03284                      Copy.getValue(1));
03285 }
03286 
03287 SDOperand
03288 X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
03289   MachineFunction &MF = DAG.getMachineFunction();
03290   const Function* Fn = MF.getFunction();
03291   if (Fn->hasExternalLinkage() &&
03292       Subtarget->TargetType == X86Subtarget::isCygwin &&
03293       Fn->getName() == "main")
03294     MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true);
03295 
03296   unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
03297   if (CC == CallingConv::Fast && EnableFastCC)
03298     return LowerFastCCArguments(Op, DAG);
03299   else
03300     return LowerCCCArguments(Op, DAG);
03301 }
03302 
03303 SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
03304   SDOperand InFlag(0, 0);
03305   SDOperand Chain = Op.getOperand(0);
03306   unsigned Align =
03307     (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
03308   if (Align == 0) Align = 1;
03309 
03310   ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
03311   // If not DWORD aligned, call memset if size is less than the threshold.
03312   // It knows how to align to the right boundary first.
03313   if ((Align & 3) != 0 ||
03314       (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
03315     MVT::ValueType IntPtr = getPointerTy();
03316     const Type *IntPtrTy = getTargetData()->getIntPtrType();
03317     std::vector<std::pair<SDOperand, const Type*> > Args;
03318     Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
03319     // Extend the ubyte argument to be an int value for the call.
03320     SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
03321     Args.push_back(std::make_pair(Val, IntPtrTy));
03322     Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
03323     std::pair<SDOperand,SDOperand> CallResult =
03324       LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
03325                   DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
03326     return CallResult.second;
03327   }
03328 
03329   MVT::ValueType AVT;
03330   SDOperand Count;
03331   ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
03332   unsigned BytesLeft = 0;
03333   bool TwoRepStos = false;
03334   if (ValC) {
03335     unsigned ValReg;
03336     unsigned Val = ValC->getValue() & 255;
03337 
03338     // If the value is a constant, then we can potentially use larger sets.
03339     switch (Align & 3) {
03340       case 2:   // WORD aligned
03341         AVT = MVT::i16;
03342         Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
03343         BytesLeft = I->getValue() % 2;
03344         Val    = (Val << 8) | Val;
03345         ValReg = X86::AX;
03346         break;
03347       case 0:   // DWORD aligned
03348         AVT = MVT::i32;
03349         if (I) {
03350           Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
03351           BytesLeft = I->getValue() % 4;
03352         } else {
03353           Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
03354                               DAG.getConstant(2, MVT::i8));
03355           TwoRepStos = true;
03356         }
03357         Val = (Val << 8)  | Val;
03358         Val = (Val << 16) | Val;
03359         ValReg = X86::EAX;
03360         break;
03361       default:  // Byte aligned
03362         AVT = MVT::i8;
03363         Count = Op.getOperand(3);
03364         ValReg = X86::AL;
03365         break;
03366     }
03367 
03368     Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
03369                               InFlag);
03370     InFlag = Chain.getValue(1);
03371   } else {
03372     AVT = MVT::i8;
03373     Count  = Op.getOperand(3);
03374     Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
03375     InFlag = Chain.getValue(1);
03376   }
03377 
03378   Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
03379   InFlag = Chain.getValue(1);
03380   Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
03381   InFlag = Chain.getValue(1);
03382 
03383   std::vector<MVT::ValueType> Tys;
03384   Tys.push_back(MVT::Other);
03385   Tys.push_back(MVT::Flag);
03386   std::vector<SDOperand> Ops;
03387   Ops.push_back(Chain);
03388   Ops.push_back(DAG.getValueType(AVT));
03389   Ops.push_back(InFlag);
03390   Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
03391 
03392   if (TwoRepStos) {
03393     InFlag = Chain.getValue(1);
03394     Count = Op.getOperand(3);
03395     MVT::ValueType CVT = Count.getValueType();
03396     SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
03397                                  DAG.getConstant(3, CVT));
03398     Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
03399     InFlag = Chain.getValue(1);
03400     Tys.clear();
03401     Tys.push_back(MVT::Other);
03402     Tys.push_back(MVT::Flag);
03403     Ops.clear();
03404     Ops.push_back(Chain);
03405     Ops.push_back(DAG.getValueType(MVT::i8));
03406     Ops.push_back(InFlag);
03407     Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
03408   } else if (BytesLeft) {
03409     // Issue stores for the last 1 - 3 bytes.
03410     SDOperand Value;
03411     unsigned Val = ValC->getValue() & 255;
03412     unsigned Offset = I->getValue() - BytesLeft;
03413     SDOperand DstAddr = Op.getOperand(1);
03414     MVT::ValueType AddrVT = DstAddr.getValueType();
03415     if (BytesLeft >= 2) {
03416       Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
03417       Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
03418                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
03419                                       DAG.getConstant(Offset, AddrVT)),
03420                           DAG.getSrcValue(NULL));
03421       BytesLeft -= 2;
03422       Offset += 2;
03423     }
03424 
03425     if (BytesLeft == 1) {
03426       Value = DAG.getConstant(Val, MVT::i8);
03427       Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
03428                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
03429                                       DAG.getConstant(Offset, AddrVT)),
03430                           DAG.getSrcValue(NULL));
03431     }
03432   }
03433 
03434   return Chain;
03435 }
03436 
03437 SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
03438   SDOperand Chain = Op.getOperand(0);
03439   unsigned Align =
03440     (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
03441   if (Align == 0) Align = 1;
03442 
03443   ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
03444   // If not DWORD aligned, call memcpy if size is less than the threshold.
03445   // It knows how to align to the right boundary first.
03446   if ((Align & 3) != 0 ||
03447       (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
03448     MVT::ValueType IntPtr = getPointerTy();
03449     const Type *IntPtrTy = getTargetData()->getIntPtrType();
03450     std::vector<std::pair<SDOperand, const Type*> > Args;
03451     Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
03452     Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
03453     Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
03454     std::pair<SDOperand,SDOperand> CallResult =
03455       LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
03456                   DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
03457     return CallResult.second;
03458   }
03459 
03460   MVT::ValueType AVT;
03461   SDOperand Count;
03462   unsigned BytesLeft = 0;
03463   bool TwoRepMovs = false;
03464   switch (Align & 3) {
03465     case 2:   // WORD aligned
03466       AVT = MVT::i16;
03467       Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
03468       BytesLeft = I->getValue() % 2;
03469       break;
03470     case 0:   // DWORD aligned
03471       AVT = MVT::i32;
03472       if (I) {
03473         Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
03474         BytesLeft = I->getValue() % 4;
03475       } else {
03476         Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
03477                             DAG.getConstant(2, MVT::i8));
03478         TwoRepMovs = true;
03479       }
03480       break;
03481     default:  // Byte aligned
03482       AVT = MVT::i8;
03483       Count = Op.getOperand(3);
03484       break;
03485   }
03486 
03487   SDOperand InFlag(0, 0);
03488   Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
03489   InFlag = Chain.getValue(1);
03490   Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
03491   InFlag = Chain.getValue(1);
03492   Chain  = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
03493   InFlag = Chain.getValue(1);
03494 
03495   std::vector<MVT::ValueType> Tys;
03496   Tys.push_back(MVT::Other);
03497   Tys.push_back(MVT::Flag);
03498   std::vector<SDOperand> Ops;
03499   Ops.push_back(Chain);
03500   Ops.push_back(DAG.getValueType(AVT));
03501   Ops.push_back(InFlag);
03502   Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
03503 
03504   if (TwoRepMovs) {
03505     InFlag = Chain.getValue(1);
03506     Count = Op.getOperand(3);
03507     MVT::ValueType CVT = Count.getValueType();
03508     SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
03509                                  DAG.getConstant(3, CVT));
03510     Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
03511     InFlag = Chain.getValue(1);
03512     Tys.clear();
03513     Tys.push_back(MVT::Other);
03514     Tys.push_back(MVT::Flag);
03515     Ops.clear();
03516     Ops.push_back(Chain);
03517     Ops.push_back(DAG.getValueType(MVT::i8));
03518     Ops.push_back(InFlag);
03519     Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
03520   } else if (BytesLeft) {
03521     // Issue loads and stores for the last 1 - 3 bytes.
03522     unsigned Offset = I->getValue() - BytesLeft;
03523     SDOperand DstAddr = Op.getOperand(1);
03524     MVT::ValueType DstVT = DstAddr.getValueType();
03525     SDOperand SrcAddr = Op.getOperand(2);
03526     MVT::ValueType SrcVT = SrcAddr.getValueType();
03527     SDOperand Value;
03528     if (BytesLeft >= 2) {
03529       Value = DAG.getLoad(MVT::i16, Chain,
03530                           DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
03531                                       DAG.getConstant(Offset, SrcVT)),
03532                           DAG.getSrcValue(NULL));
03533       Chain = Value.getValue(1);
03534       Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
03535                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
03536                                       DAG.getConstant(Offset, DstVT)),
03537                           DAG.getSrcValue(NULL));
03538       BytesLeft -= 2;
03539       Offset += 2;
03540     }
03541 
03542     if (BytesLeft == 1) {
03543       Value = DAG.getLoad(MVT::i8, Chain,
03544                           DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
03545                                       DAG.getConstant(Offset, SrcVT)),
03546                           DAG.getSrcValue(NULL));
03547       Chain = Value.getValue(1);
03548       Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
03549                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
03550                                       DAG.getConstant(Offset, DstVT)),
03551                           DAG.getSrcValue(NULL));
03552     }
03553   }
03554 
03555   return Chain;
03556 }
03557 
03558 SDOperand
03559 X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
03560   std::vector<MVT::ValueType> Tys;
03561   Tys.push_back(MVT::Other);
03562   Tys.push_back(MVT::Flag);
03563   std::vector<SDOperand> Ops;
03564   Ops.push_back(Op.getOperand(0));
03565   SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops);
03566   Ops.clear();
03567   Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)));
03568   Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 
03569                                    MVT::i32, Ops[0].getValue(2)));
03570   Ops.push_back(Ops[1].getValue(1));
03571   Tys[0] = Tys[1] = MVT::i32;
03572   Tys.push_back(MVT::Other);
03573   return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
03574 }
03575 
03576 SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
03577   // vastart just stores the address of the VarArgsFrameIndex slot into the
03578   // memory location argument.
03579   // FIXME: Replace MVT::i32 with PointerTy
03580   SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
03581   return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 
03582                      Op.getOperand(1), Op.getOperand(2));
03583 }
03584 
03585 SDOperand
03586 X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
03587   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
03588   switch (IntNo) {
03589   default: return SDOperand();    // Don't custom lower most intrinsics.
03590     // Comparison intrinsics.
03591   case Intrinsic::x86_sse_comieq_ss:
03592   case Intrinsic::x86_sse_comilt_ss:
03593   case Intrinsic::x86_sse_comile_ss:
03594   case Intrinsic::x86_sse_comigt_ss:
03595   case Intrinsic::x86_sse_comige_ss:
03596   case Intrinsic::x86_sse_comineq_ss:
03597   case Intrinsic::x86_sse_ucomieq_ss:
03598   case Intrinsic::x86_sse_ucomilt_ss:
03599   case Intrinsic::x86_sse_ucomile_ss:
03600   case Intrinsic::x86_sse_ucomigt_ss:
03601   case Intrinsic::x86_sse_ucomige_ss:
03602   case Intrinsic::x86_sse_ucomineq_ss:
03603   case Intrinsic::x86_sse2_comieq_sd:
03604   case Intrinsic::x86_sse2_comilt_sd:
03605   case Intrinsic::x86_sse2_comile_sd:
03606   case Intrinsic::x86_sse2_comigt_sd:
03607   case Intrinsic::x86_sse2_comige_sd:
03608   case Intrinsic::x86_sse2_comineq_sd:
03609   case Intrinsic::x86_sse2_ucomieq_sd:
03610   case Intrinsic::x86_sse2_ucomilt_sd:
03611   case Intrinsic::x86_sse2_ucomile_sd:
03612   case Intrinsic::x86_sse2_ucomigt_sd:
03613   case Intrinsic::x86_sse2_ucomige_sd:
03614   case Intrinsic::x86_sse2_ucomineq_sd: {
03615     unsigned Opc = 0;
03616     ISD::CondCode CC = ISD::SETCC_INVALID;
03617     switch (IntNo) {
03618     default: break;
03619     case Intrinsic::x86_sse_comieq_ss: 
03620     case Intrinsic::x86_sse2_comieq_sd: 
03621       Opc = X86ISD::COMI;
03622       CC = ISD::SETEQ;
03623       break;
03624     case Intrinsic::x86_sse_comilt_ss:
03625     case Intrinsic::x86_sse2_comilt_sd:
03626       Opc = X86ISD::COMI;
03627       CC = ISD::SETLT;
03628       break;
03629     case Intrinsic::x86_sse_comile_ss:
03630     case Intrinsic::x86_sse2_comile_sd:
03631       Opc = X86ISD::COMI;
03632       CC = ISD::SETLE;
03633       break;
03634     case Intrinsic::x86_sse_comigt_ss:
03635     case Intrinsic::x86_sse2_comigt_sd:
03636       Opc = X86ISD::COMI;
03637       CC = ISD::SETGT;
03638       break;
03639     case Intrinsic::x86_sse_comige_ss:
03640     case Intrinsic::x86_sse2_comige_sd:
03641       Opc = X86ISD::COMI;
03642       CC = ISD::SETGE;
03643       break;
03644     case Intrinsic::x86_sse_comineq_ss:
03645     case Intrinsic::x86_sse2_comineq_sd:
03646       Opc = X86ISD::COMI;
03647       CC = ISD::SETNE;
03648       break;
03649     case Intrinsic::x86_sse_ucomieq_ss:
03650     case Intrinsic::x86_sse2_ucomieq_sd:
03651       Opc = X86ISD::UCOMI;
03652       CC = ISD::SETEQ;
03653       break;
03654     case Intrinsic::x86_sse_ucomilt_ss:
03655     case Intrinsic::x86_sse2_ucomilt_sd:
03656       Opc = X86ISD::UCOMI;
03657       CC = ISD::SETLT;
03658       break;
03659     case Intrinsic::x86_sse_ucomile_ss:
03660     case Intrinsic::x86_sse2_ucomile_sd:
03661       Opc = X86ISD::UCOMI;
03662       CC = ISD::SETLE;
03663       break;
03664     case Intrinsic::x86_sse_ucomigt_ss:
03665     case Intrinsic::x86_sse2_ucomigt_sd:
03666       Opc = X86ISD::UCOMI;
03667       CC = ISD::SETGT;
03668       break;
03669     case Intrinsic::x86_sse_ucomige_ss:
03670     case Intrinsic::x86_sse2_ucomige_sd:
03671       Opc = X86ISD::UCOMI;
03672       CC = ISD::SETGE;
03673       break;
03674     case Intrinsic::x86_sse_ucomineq_ss:
03675     case Intrinsic::x86_sse2_ucomineq_sd:
03676       Opc = X86ISD::UCOMI;
03677       CC = ISD::SETNE;
03678       break;
03679     }
03680     bool Flip;
03681     unsigned X86CC;
03682     translateX86CC(CC, true, X86CC, Flip);
03683     SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1),
03684                                  Op.getOperand(Flip?1:2));
03685     SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 
03686                                   DAG.getConstant(X86CC, MVT::i8), Cond);
03687     return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
03688   }
03689   }
03690 }
03691 
03692 /// LowerOperation - Provide custom lowering hooks for some operations.
03693 ///
03694 SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
03695   switch (Op.getOpcode()) {
03696   default: assert(0 && "Should not custom lower this!");
03697   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
03698   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
03699   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
03700   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
03701   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
03702   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
03703   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
03704   case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
03705   case ISD::SHL_PARTS:
03706   case ISD::SRA_PARTS:
03707   case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
03708   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
03709   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
03710   case ISD::FABS:               return LowerFABS(Op, DAG);
03711   case ISD::FNEG:               return LowerFNEG(Op, DAG);
03712   case ISD::SETCC:              return LowerSETCC(Op, DAG);
03713   case ISD::SELECT:             return LowerSELECT(Op, DAG);
03714   case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
03715   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
03716   case ISD::CALL:               return LowerCALL(Op, DAG);
03717   case ISD::RET:                return LowerRET(Op, DAG);
03718   case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
03719   case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
03720   case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
03721   case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
03722   case ISD::VASTART:            return LowerVASTART(Op, DAG);
03723   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
03724   }
03725 }
03726 
03727 const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
03728   switch (Opcode) {
03729   default: return NULL;
03730   case X86ISD::SHLD:               return "X86ISD::SHLD";
03731   case X86ISD::SHRD:               return "X86ISD::SHRD";
03732   case X86ISD::FAND:               return "X86ISD::FAND";
03733   case X86ISD::FXOR:               return "X86ISD::FXOR";
03734   case X86ISD::FILD:               return "X86ISD::FILD";
03735   case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
03736   case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
03737   case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
03738   case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
03739   case X86ISD::FLD:                return "X86ISD::FLD";
03740   case X86ISD::FST:                return "X86ISD::FST";
03741   case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
03742   case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
03743   case X86ISD::CALL:               return "X86ISD::CALL";
03744   case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
03745   case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
03746   case X86ISD::CMP:                return "X86ISD::CMP";
03747   case X86ISD::TEST:               return "X86ISD::TEST";
03748   case X86ISD::COMI:               return "X86ISD::COMI";
03749   case X86ISD::UCOMI:              return "X86ISD::UCOMI";
03750   case X86ISD::SETCC:              return "X86ISD::SETCC";
03751   case X86ISD::CMOV:               return "X86ISD::CMOV";
03752   case X86ISD::BRCOND:             return "X86ISD::BRCOND";
03753   case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
03754   case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
03755   case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
03756   case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
03757   case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
03758   case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
03759   case X86ISD::Wrapper:            return "X86ISD::Wrapper";
03760   case X86ISD::S2VEC:              return "X86ISD::S2VEC";
03761   case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
03762   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
03763   }
03764 }
03765 
03766 /// isLegalAddressImmediate - Return true if the integer value or
03767 /// GlobalValue can be used as the offset of the target addressing mode.
03768 bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
03769   // X86 allows a sign-extended 32-bit immediate field.
03770   return (V > -(1LL << 32) && V < (1LL << 32)-1);
03771 }
03772 
03773 bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
03774   // GV is 64-bit but displacement field is 32-bit unless we are in small code
03775   // model. Mac OS X happens to support only small PIC code model.
03776   // FIXME: better support for other OS's.
03777   if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin())
03778     return false;
03779   if (Subtarget->isTargetDarwin()) {
03780     Reloc::Model RModel = getTargetMachine().getRelocationModel();
03781     if (RModel == Reloc::Static)
03782       return true;
03783     else if (RModel == Reloc::DynamicNoPIC)
03784       return !DarwinGVRequiresExtraLoad(GV);
03785     else
03786       return false;
03787   } else
03788     return true;
03789 }
03790 
03791 /// isShuffleMaskLegal - Targets can use this to indicate that they only
03792 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
03793 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
03794 /// are assumed to be legal.
03795 bool
03796 X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
03797   // Only do shuffles on 128-bit vector types for now.
03798   if (MVT::getSizeInBits(VT) == 64) return false;
03799   return (Mask.Val->getNumOperands() <= 4 ||
03800           isSplatMask(Mask.Val)  ||
03801           isPSHUFHW_PSHUFLWMask(Mask.Val) ||
03802           X86::isUNPCKLMask(Mask.Val) ||
03803           X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
03804           X86::isUNPCKHMask(Mask.Val));
03805 }
03806 
03807 bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
03808                                                MVT::ValueType EVT,
03809                                                SelectionDAG &DAG) const {
03810   unsigned NumElts = BVOps.size();
03811   // Only do shuffles on 128-bit vector types for now.
03812   if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
03813   if (NumElts == 2) return true;
03814   if (NumElts == 4) {
03815     return (isMOVLMask(BVOps)  || isCommutedMOVL(BVOps, true) ||
03816             isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps));
03817   }
03818   return false;
03819 }
03820 
03821 //===----------------------------------------------------------------------===//
03822 //                           X86 Scheduler Hooks
03823 //===----------------------------------------------------------------------===//
03824 
03825 MachineBasicBlock *
03826 X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
03827                                            MachineBasicBlock *BB) {
03828   switch (MI->getOpcode()) {
03829   default: assert(false && "Unexpected instr type to insert");
03830   case X86::CMOV_FR32:
03831   case X86::CMOV_FR64:
03832   case X86::CMOV_V4F32:
03833   case X86::CMOV_V2F64:
03834   case X86::CMOV_V2I64: {
03835     // To "insert" a SELECT_CC instruction, we actually have to insert the
03836     // diamond control-flow pattern.  The incoming instruction knows the
03837     // destination vreg to set, the condition code register to branch on, the
03838     // true/false values to select between, and a branch opcode to use.
03839     const BasicBlock *LLVM_BB = BB->getBasicBlock();
03840     ilist<MachineBasicBlock>::iterator It = BB;
03841     ++It;
03842   
03843     //  thisMBB:
03844     //  ...
03845     //   TrueVal = ...
03846     //   cmpTY ccX, r1, r2
03847     //   bCC copy1MBB
03848     //   fallthrough --> copy0MBB
03849     MachineBasicBlock *thisMBB = BB;
03850     MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
03851     MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
03852     unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue());
03853     BuildMI(BB, Opc, 1).addMBB(sinkMBB);
03854     MachineFunction *F = BB->getParent();
03855     F->getBasicBlockList().insert(It, copy0MBB);
03856     F->getBasicBlockList().insert(It, sinkMBB);
03857     // Update machine-CFG edges by first adding all successors of the current
03858     // block to the new block which will contain the Phi node for the select.
03859     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 
03860         e = BB->succ_end(); i != e; ++i)
03861       sinkMBB->addSuccessor(*i);
03862     // Next, remove all successors of the current block, and add the true
03863     // and fallthrough blocks as its successors.
03864     while(!BB->succ_empty())
03865       BB->removeSuccessor(BB->succ_begin());
03866     BB->addSuccessor(copy0MBB);
03867     BB->addSuccessor(sinkMBB);
03868   
03869     //  copy0MBB:
03870     //   %FalseValue = ...
03871     //   # fallthrough to sinkMBB
03872     BB = copy0MBB;
03873   
03874     // Update machine-CFG edges
03875     BB->addSuccessor(sinkMBB);
03876   
03877     //  sinkMBB:
03878     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
03879     //  ...
03880     BB = sinkMBB;
03881     BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg())
03882       .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
03883       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
03884 
03885     delete MI;   // The pseudo instruction is gone now.
03886     return BB;
03887   }
03888 
03889   case X86::FP_TO_INT16_IN_MEM:
03890   case X86::FP_TO_INT32_IN_MEM:
03891   case X86::FP_TO_INT64_IN_MEM: {
03892     // Change the floating point control register to use "round towards zero"
03893     // mode when truncating to an integer value.
03894     MachineFunction *F = BB->getParent();
03895     int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
03896     addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
03897 
03898     // Load the old value of the high byte of the control word...
03899     unsigned OldCW =
03900       F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
03901     addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx);
03902 
03903     // Set the high part to be round to zero...
03904     addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F);
03905 
03906     // Reload the modified control word now...
03907     addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
03908 
03909     // Restore the memory image of control word to original value
03910     addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW);
03911 
03912     // Get the X86 opcode to use.
03913     unsigned Opc;
03914     switch (MI->getOpcode()) {
03915     default: assert(0 && "illegal opcode!");
03916     case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
03917     case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
03918     case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
03919     }
03920 
03921     X86AddressMode AM;
03922     MachineOperand &Op = MI->getOperand(0);
03923     if (Op.isRegister()) {
03924       AM.BaseType = X86AddressMode::RegBase;
03925       AM.Base.Reg = Op.getReg();
03926     } else {
03927       AM.BaseType = X86AddressMode::FrameIndexBase;
03928       AM.Base.FrameIndex = Op.getFrameIndex();
03929     }
03930     Op = MI->getOperand(1);
03931     if (Op.isImmediate())
03932       AM.Scale = Op.getImmedValue();
03933     Op = MI->getOperand(2);
03934     if (Op.isImmediate())
03935       AM.IndexReg = Op.getImmedValue();
03936     Op = MI->getOperand(3);
03937     if (Op.isGlobalAddress()) {
03938       AM.GV = Op.getGlobal();
03939     } else {
03940       AM.Disp = Op.getImmedValue();
03941     }
03942     addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg());
03943 
03944     // Reload the original control word now.
03945     addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
03946 
03947     delete MI;   // The pseudo instruction is gone now.
03948     return BB;
03949   }
03950   }
03951 }
03952 
03953 //===----------------------------------------------------------------------===//
03954 //                           X86 Optimization Hooks
03955 //===----------------------------------------------------------------------===//
03956 
03957 void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
03958                                                        uint64_t Mask,
03959                                                        uint64_t &KnownZero, 
03960                                                        uint64_t &KnownOne,
03961                                                        unsigned Depth) const {
03962   unsigned Opc = Op.getOpcode();
03963   assert((Opc >= ISD::BUILTIN_OP_END ||
03964           Opc == ISD::INTRINSIC_WO_CHAIN ||
03965           Opc == ISD::INTRINSIC_W_CHAIN ||
03966           Opc == ISD::INTRINSIC_VOID) &&
03967          "Should use MaskedValueIsZero if you don't know whether Op"
03968          " is a target node!");
03969 
03970   KnownZero = KnownOne = 0;   // Don't know anything.
03971   switch (Opc) {
03972   default: break;
03973   case X86ISD::SETCC: 
03974     KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
03975     break;
03976   }
03977 }
03978 
03979 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
03980 /// element of the result of the vector shuffle.
03981 static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
03982   MVT::ValueType VT = N->getValueType(0);
03983   SDOperand PermMask = N->getOperand(2);
03984   unsigned NumElems = PermMask.getNumOperands();
03985   SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
03986   i %= NumElems;
03987   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
03988     return (i == 0)
03989       ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
03990   } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
03991     SDOperand Idx = PermMask.getOperand(i);
03992     if (Idx.getOpcode() == ISD::UNDEF)
03993       return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
03994     return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
03995   }
03996   return SDOperand();
03997 }
03998 
03999 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
04000 /// node is a GlobalAddress + an offset.
04001 static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
04002   if (N->getOpcode() == X86ISD::Wrapper) {
04003     if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
04004       GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
04005       return true;
04006     }
04007   } else if (N->getOpcode() == ISD::ADD) {
04008     SDOperand N1 = N->getOperand(0);
04009     SDOperand N2 = N->getOperand(1);
04010     if (isGAPlusOffset(N1.Val, GA, Offset)) {
04011       ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
04012       if (V) {
04013         Offset += V->getSignExtended();
04014         return true;
04015       }
04016     } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
04017       ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
04018       if (V) {
04019         Offset += V->getSignExtended();
04020         return true;
04021       }
04022     }
04023   }
04024   return false;
04025 }
04026 
04027 /// isConsecutiveLoad - Returns true if N is loading from an address of Base
04028 /// + Dist * Size.
04029 static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
04030                               MachineFrameInfo *MFI) {
04031   if (N->getOperand(0).Val != Base->getOperand(0).Val)
04032     return false;
04033 
04034   SDOperand Loc = N->getOperand(1);
04035   SDOperand BaseLoc = Base->getOperand(1);
04036   if (Loc.getOpcode() == ISD::FrameIndex) {
04037     if (BaseLoc.getOpcode() != ISD::FrameIndex)
04038       return false;
04039     int FI  = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
04040     int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
04041     int FS  = MFI->getObjectSize(FI);
04042     int BFS = MFI->getObjectSize(BFI);
04043     if (FS != BFS || FS != Size) return false;
04044     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
04045   } else {
04046     GlobalValue *GV1 = NULL;
04047     GlobalValue *GV2 = NULL;
04048     int64_t Offset1 = 0;
04049     int64_t Offset2 = 0;
04050     bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
04051     bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
04052     if (isGA1 && isGA2 && GV1 == GV2)
04053       return Offset1 == (Offset2 + Dist*Size);
04054   }
04055 
04056   return false;
04057 }
04058 
04059 static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
04060                               const X86Subtarget *Subtarget) {
04061   GlobalValue *GV;
04062   int64_t Offset;
04063   if (isGAPlusOffset(Base, GV, Offset))
04064     return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
04065   else {
04066     assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
04067     int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
04068     if (BFI < 0)
04069       // Fixed objects do not specify alignment, however the offsets are known.
04070       return ((Subtarget->getStackAlignment() % 16) == 0 &&
04071               (MFI->getObjectOffset(BFI) % 16) == 0);
04072     else
04073       return MFI->getObjectAlignment(BFI) >= 16;
04074   }
04075   return false;
04076 }
04077 
04078 
04079 /// PerformShuffleCombine - Combine a vector_shuffle that is equal to
04080 /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
04081 /// if the load addresses are consecutive, non-overlapping, and in the right
04082 /// order.
04083 static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
04084                                        const X86Subtarget *Subtarget) {
04085   MachineFunction &MF = DAG.getMachineFunction();
04086   MachineFrameInfo *MFI = MF.getFrameInfo();
04087   MVT::ValueType VT = N->getValueType(0);
04088   MVT::ValueType EVT = MVT::getVectorBaseType(VT);
04089   SDOperand PermMask = N->getOperand(2);
04090   int NumElems = (int)PermMask.getNumOperands();
04091   SDNode *Base = NULL;
04092   for (int i = 0; i < NumElems; ++i) {
04093     SDOperand Idx = PermMask.getOperand(i);
04094     if (Idx.getOpcode() == ISD::UNDEF) {
04095       if (!Base) return SDOperand();
04096     } else {
04097       SDOperand Arg =
04098         getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
04099       if (!Arg.Val || Arg.getOpcode() != ISD::LOAD)
04100         return SDOperand();
04101       if (!Base)
04102         Base = Arg.Val;
04103       else if (!isConsecutiveLoad(Arg.Val, Base,
04104                                   i, MVT::getSizeInBits(EVT)/8,MFI))
04105         return SDOperand();
04106     }
04107   }
04108 
04109   bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
04110   if (isAlign16)
04111     return DAG.getLoad(VT, Base->getOperand(0), Base->getOperand(1),
04112                        Base->getOperand(2));
04113   else
04114     // Just use movups, it's shorter.
04115     return DAG.getNode(ISD::BIT_CONVERT, VT,
04116                        DAG.getNode(X86ISD::LOAD_UA, MVT::v4f32,
04117                                    Base->getOperand(0), Base->getOperand(1),
04118                                    Base->getOperand(2)));
04119 }
04120 
04121 SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, 
04122                                                DAGCombinerInfo &DCI) const {
04123   TargetMachine &TM = getTargetMachine();
04124   SelectionDAG &DAG = DCI.DAG;
04125   switch (N->getOpcode()) {
04126   default: break;
04127   case ISD::VECTOR_SHUFFLE:
04128     return PerformShuffleCombine(N, DAG, Subtarget);
04129   }
04130 
04131   return SDOperand();
04132 }
04133 
04134 //===----------------------------------------------------------------------===//
04135 //                           X86 Inline Assembly Support
04136 //===----------------------------------------------------------------------===//
04137 
04138 /// getConstraintType - Given a constraint letter, return the type of
04139 /// constraint it is for this target.
04140 X86TargetLowering::ConstraintType
04141 X86TargetLowering::getConstraintType(char ConstraintLetter) const {
04142   switch (ConstraintLetter) {
04143   case 'A':
04144   case 'r':
04145   case 'R':
04146   case 'l':
04147   case 'q':
04148   case 'Q':
04149   case 'x':
04150   case 'Y':
04151     return C_RegisterClass;
04152   default: return TargetLowering::getConstraintType(ConstraintLetter);
04153   }
04154 }
04155 
04156 std::vector<unsigned> X86TargetLowering::
04157 getRegClassForInlineAsmConstraint(const std::string &Constraint,
04158                                   MVT::ValueType VT) const {
04159   if (Constraint.size() == 1) {
04160     // FIXME: not handling fp-stack yet!
04161     // FIXME: not handling MMX registers yet ('y' constraint).
04162     switch (Constraint[0]) {      // GCC X86 Constraint Letters
04163     default: break;  // Unknown constraint letter
04164     case 'A':   // EAX/EDX
04165       if (VT == MVT::i32 || VT == MVT::i64)
04166         return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
04167       break;
04168     case 'r':   // GENERAL_REGS
04169     case 'R':   // LEGACY_REGS
04170       if (VT == MVT::i32)
04171         return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
04172                                      X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
04173       else if (VT == MVT::i16)
04174         return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 
04175                                      X86::SI, X86::DI, X86::BP, X86::SP, 0);
04176       else if (VT == MVT::i8)
04177         return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
04178       break;
04179     case 'l':   // INDEX_REGS
04180       if (VT == MVT::i32)
04181         return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
04182                                      X86::ESI, X86::EDI, X86::EBP, 0);
04183       else if (VT == MVT::i16)
04184         return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 
04185                                      X86::SI, X86::DI, X86::BP, 0);
04186       else if (VT == MVT::i8)
04187         return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
04188       break;
04189     case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
04190     case 'Q':   // Q_REGS
04191       if (VT == MVT::i32)
04192         return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
04193       else if (VT == MVT::i16)
04194         return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
04195       else if (VT == MVT::i8)
04196         return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
04197         break;
04198     case 'x':   // SSE_REGS if SSE1 allowed
04199       if (Subtarget->hasSSE1())
04200         return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
04201                                      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
04202                                      0);
04203       return std::vector<unsigned>();
04204     case 'Y':   // SSE_REGS if SSE2 allowed
04205       if (Subtarget->hasSSE2())
04206         return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
04207                                      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
04208                                      0);
04209       return std::vector<unsigned>();
04210     }
04211   }
04212   
04213   return std::vector<unsigned>();
04214 }