LLVM API Documentation

X86ISelLowering.cpp

Go to the documentation of this file.
00001 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by Chris Lattner and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines the interfaces that X86 uses to lower LLVM code into a
00011 // selection DAG.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86.h"
00016 #include "X86InstrBuilder.h"
00017 #include "X86ISelLowering.h"
00018 #include "X86TargetMachine.h"
00019 #include "llvm/CallingConv.h"
00020 #include "llvm/Constants.h"
00021 #include "llvm/Function.h"
00022 #include "llvm/Intrinsics.h"
00023 #include "llvm/ADT/VectorExtras.h"
00024 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
00025 #include "llvm/CodeGen/MachineFrameInfo.h"
00026 #include "llvm/CodeGen/MachineFunction.h"
00027 #include "llvm/CodeGen/MachineInstrBuilder.h"
00028 #include "llvm/CodeGen/SelectionDAG.h"
00029 #include "llvm/CodeGen/SSARegMap.h"
00030 #include "llvm/Support/MathExtras.h"
00031 #include "llvm/Target/TargetOptions.h"
00032 using namespace llvm;
00033 
00034 // FIXME: temporary.
00035 #include "llvm/Support/CommandLine.h"
00036 static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
00037                                   cl::desc("Enable fastcc on X86"));
00038 
00039 X86TargetLowering::X86TargetLowering(TargetMachine &TM)
00040   : TargetLowering(TM) {
00041   Subtarget = &TM.getSubtarget<X86Subtarget>();
00042   X86ScalarSSE = Subtarget->hasSSE2();
00043 
00044   // Set up the TargetLowering object.
00045 
00046   // X86 is weird, it always uses i8 for shift amounts and setcc results.
00047   setShiftAmountType(MVT::i8);
00048   setSetCCResultType(MVT::i8);
00049   setSetCCResultContents(ZeroOrOneSetCCResult);
00050   setSchedulingPreference(SchedulingForRegPressure);
00051   setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
00052   setStackPointerRegisterToSaveRestore(X86::ESP);
00053 
00054   if (!Subtarget->isTargetDarwin())
00055     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
00056     setUseUnderscoreSetJmpLongJmp(true);
00057     
00058   // Add legal addressing mode scale values.
00059   addLegalAddressScale(8);
00060   addLegalAddressScale(4);
00061   addLegalAddressScale(2);
00062   // Enter the ones which require both scale + index last. These are more
00063   // expensive.
00064   addLegalAddressScale(9);
00065   addLegalAddressScale(5);
00066   addLegalAddressScale(3);
00067   
00068   // Set up the register classes.
00069   addRegisterClass(MVT::i8, X86::R8RegisterClass);
00070   addRegisterClass(MVT::i16, X86::R16RegisterClass);
00071   addRegisterClass(MVT::i32, X86::R32RegisterClass);
00072 
00073   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
00074   // operation.
00075   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
00076   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
00077   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
00078 
00079   if (X86ScalarSSE)
00080     // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
00081     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Expand);
00082   else
00083     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
00084 
00085   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
00086   // this operation.
00087   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
00088   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
00089   // SSE has no i16 to fp conversion, only i32
00090   if (X86ScalarSSE)
00091     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
00092   else {
00093     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
00094     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
00095   }
00096 
00097   // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
00098   // isn't legal.
00099   setOperationAction(ISD::SINT_TO_FP       , MVT::i64  , Custom);
00100   setOperationAction(ISD::FP_TO_SINT       , MVT::i64  , Custom);
00101 
00102   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
00103   // this operation.
00104   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
00105   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
00106 
00107   if (X86ScalarSSE) {
00108     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
00109   } else {
00110     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
00111     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
00112   }
00113 
00114   // Handle FP_TO_UINT by promoting the destination to a larger signed
00115   // conversion.
00116   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
00117   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
00118   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
00119 
00120   if (X86ScalarSSE && !Subtarget->hasSSE3())
00121     // Expand FP_TO_UINT into a select.
00122     // FIXME: We would like to use a Custom expander here eventually to do
00123     // the optimal thing for SSE vs. the default expansion in the legalizer.
00124     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Expand);
00125   else
00126     // With SSE3 we can use fisttpll to convert to a signed i64.
00127     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
00128 
00129   setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
00130   setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
00131 
00132   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
00133   setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
00134   setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
00135   setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
00136   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
00137   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
00138   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
00139   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
00140   setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
00141   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
00142   setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
00143   setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
00144   setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
00145   setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
00146   setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
00147   setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
00148   setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
00149   setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
00150   setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
00151   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
00152   setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
00153 
00154   // These should be promoted to a larger select which is supported.
00155   setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
00156   setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
00157 
00158   // X86 wants to expand cmov itself.
00159   setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
00160   setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
00161   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
00162   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
00163   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
00164   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
00165   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
00166   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
00167   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
00168   // X86 ret instruction may pop stack.
00169   setOperationAction(ISD::RET             , MVT::Other, Custom);
00170   // Darwin ABI issue.
00171   setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
00172   setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
00173   setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
00174   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
00175   setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
00176   setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
00177   setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
00178   // X86 wants to expand memset / memcpy itself.
00179   setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
00180   setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
00181 
00182   // We don't have line number support yet.
00183   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
00184   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
00185   // FIXME - use subtarget debug flags
00186   if (!Subtarget->isTargetDarwin())
00187     setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
00188 
00189   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00190   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00191   
00192   // Use the default implementation.
00193   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
00194   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
00195   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00196   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand); 
00197   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
00198   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
00199 
00200   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00201   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00202 
00203   if (X86ScalarSSE) {
00204     // Set up the FP register classes.
00205     addRegisterClass(MVT::f32, X86::FR32RegisterClass);
00206     addRegisterClass(MVT::f64, X86::FR64RegisterClass);
00207 
00208     // SSE has no load+extend ops
00209     setOperationAction(ISD::EXTLOAD,  MVT::f32, Expand);
00210     setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand);
00211 
00212     // Use ANDPD to simulate FABS.
00213     setOperationAction(ISD::FABS , MVT::f64, Custom);
00214     setOperationAction(ISD::FABS , MVT::f32, Custom);
00215 
00216     // Use XORP to simulate FNEG.
00217     setOperationAction(ISD::FNEG , MVT::f64, Custom);
00218     setOperationAction(ISD::FNEG , MVT::f32, Custom);
00219 
00220     // We don't support sin/cos/fmod
00221     setOperationAction(ISD::FSIN , MVT::f64, Expand);
00222     setOperationAction(ISD::FCOS , MVT::f64, Expand);
00223     setOperationAction(ISD::FREM , MVT::f64, Expand);
00224     setOperationAction(ISD::FSIN , MVT::f32, Expand);
00225     setOperationAction(ISD::FCOS , MVT::f32, Expand);
00226     setOperationAction(ISD::FREM , MVT::f32, Expand);
00227 
00228     // Expand FP immediates into loads from the stack, except for the special
00229     // cases we handle.
00230     setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
00231     setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
00232     addLegalFPImmediate(+0.0); // xorps / xorpd
00233   } else {
00234     // Set up the FP register classes.
00235     addRegisterClass(MVT::f64, X86::RFPRegisterClass);
00236     
00237     setOperationAction(ISD::UNDEF, MVT::f64, Expand);
00238     
00239     if (!UnsafeFPMath) {
00240       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
00241       setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
00242     }
00243 
00244     setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
00245     addLegalFPImmediate(+0.0); // FLD0
00246     addLegalFPImmediate(+1.0); // FLD1
00247     addLegalFPImmediate(-0.0); // FLD0/FCHS
00248     addLegalFPImmediate(-1.0); // FLD1/FCHS
00249   }
00250 
00251   // First set operation action for all vector types to expand. Then we
00252   // will selectively turn on ones that can be effectively codegen'd.
00253   for (unsigned VT = (unsigned)MVT::Vector + 1;
00254        VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
00255     setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
00256     setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
00257     setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
00258     setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
00259     setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
00260     setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
00261     setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
00262   }
00263 
00264   if (Subtarget->hasMMX()) {
00265     addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
00266     addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
00267     addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
00268 
00269     // FIXME: add MMX packed arithmetics
00270     setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
00271     setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
00272     setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
00273   }
00274 
00275   if (Subtarget->hasSSE1()) {
00276     addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
00277 
00278     setOperationAction(ISD::AND,                MVT::v4f32, Legal);
00279     setOperationAction(ISD::OR,                 MVT::v4f32, Legal);
00280     setOperationAction(ISD::XOR,                MVT::v4f32, Legal);
00281     setOperationAction(ISD::ADD,                MVT::v4f32, Legal);
00282     setOperationAction(ISD::SUB,                MVT::v4f32, Legal);
00283     setOperationAction(ISD::MUL,                MVT::v4f32, Legal);
00284     setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
00285     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
00286     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
00287     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
00288     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
00289   }
00290 
00291   if (Subtarget->hasSSE2()) {
00292     addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
00293     addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
00294     addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
00295     addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
00296     addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
00297 
00298     setOperationAction(ISD::ADD,                MVT::v2f64, Legal);
00299     setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
00300     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
00301     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
00302     setOperationAction(ISD::SUB,                MVT::v2f64, Legal);
00303     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
00304     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
00305     setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
00306     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
00307     setOperationAction(ISD::MUL,                MVT::v2f64, Legal);
00308 
00309     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
00310     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
00311     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
00312 
00313     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
00314     for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
00315       setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
00316       setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
00317       setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
00318     }
00319     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
00320     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
00321     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
00322     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
00323     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
00324     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
00325 
00326     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 
00327     for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
00328       setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
00329       AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
00330       setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
00331       AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
00332       setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
00333       AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
00334       setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
00335       AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
00336       setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
00337       AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
00338     }
00339 
00340     // Custom lower v2i64 and v2f64 selects.
00341     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
00342     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
00343     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
00344     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
00345   }
00346 
00347   // We want to custom lower some of our intrinsics.
00348   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00349 
00350   computeRegisterProperties();
00351 
00352   // FIXME: These should be based on subtarget info. Plus, the values should
00353   // be smaller when we are in optimizing for size mode.
00354   maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
00355   maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
00356   maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
00357   allowUnalignedMemoryAccesses = true; // x86 supports it!
00358 }
00359 
00360 std::vector<SDOperand>
00361 X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
00362   if (F.getCallingConv() == CallingConv::Fast && EnableFastCC)
00363     return LowerFastCCArguments(F, DAG);
00364   return LowerCCCArguments(F, DAG);
00365 }
00366 
00367 std::pair<SDOperand, SDOperand>
00368 X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
00369                                bool isVarArg, unsigned CallingConv,
00370                                bool isTailCall,
00371                                SDOperand Callee, ArgListTy &Args,
00372                                SelectionDAG &DAG) {
00373   assert((!isVarArg || CallingConv == CallingConv::C) &&
00374          "Only C takes varargs!");
00375 
00376   // If the callee is a GlobalAddress node (quite common, every direct call is)
00377   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
00378   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
00379     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
00380   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
00381     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
00382 
00383   if (CallingConv == CallingConv::Fast && EnableFastCC)
00384     return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG);
00385   return  LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG);
00386 }
00387 
00388 //===----------------------------------------------------------------------===//
00389 //                    C Calling Convention implementation
00390 //===----------------------------------------------------------------------===//
00391 
00392 std::vector<SDOperand>
00393 X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) {
00394   std::vector<SDOperand> ArgValues;
00395 
00396   MachineFunction &MF = DAG.getMachineFunction();
00397   MachineFrameInfo *MFI = MF.getFrameInfo();
00398 
00399   // Add DAG nodes to load the arguments...  On entry to a function on the X86,
00400   // the stack frame looks like this:
00401   //
00402   // [ESP] -- return address
00403   // [ESP + 4] -- first argument (leftmost lexically)
00404   // [ESP + 8] -- second argument, if first argument is four bytes in size
00405   //    ...
00406   //
00407   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
00408   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
00409     MVT::ValueType ObjectVT = getValueType(I->getType());
00410     unsigned ArgIncrement = 4;
00411     unsigned ObjSize;
00412     switch (ObjectVT) {
00413     default: assert(0 && "Unhandled argument type!");
00414     case MVT::i1:
00415     case MVT::i8:  ObjSize = 1;                break;
00416     case MVT::i16: ObjSize = 2;                break;
00417     case MVT::i32: ObjSize = 4;                break;
00418     case MVT::i64: ObjSize = ArgIncrement = 8; break;
00419     case MVT::f32: ObjSize = 4;                break;
00420     case MVT::f64: ObjSize = ArgIncrement = 8; break;
00421     }
00422     // Create the frame index object for this incoming parameter...
00423     int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
00424 
00425     // Create the SelectionDAG nodes corresponding to a load from this parameter
00426     SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
00427 
00428     // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
00429     // dead loads.
00430     SDOperand ArgValue;
00431     if (!I->use_empty())
00432       ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
00433                              DAG.getSrcValue(NULL));
00434     else {
00435       if (MVT::isInteger(ObjectVT))
00436         ArgValue = DAG.getConstant(0, ObjectVT);
00437       else
00438         ArgValue = DAG.getConstantFP(0, ObjectVT);
00439     }
00440     ArgValues.push_back(ArgValue);
00441 
00442     ArgOffset += ArgIncrement;   // Move on to the next argument...
00443   }
00444 
00445   // If the function takes variable number of arguments, make a frame index for
00446   // the start of the first vararg value... for expansion of llvm.va_start.
00447   if (F.isVarArg())
00448     VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
00449   ReturnAddrIndex = 0;     // No return address slot generated yet.
00450   BytesToPopOnReturn = 0;  // Callee pops nothing.
00451   BytesCallerReserves = ArgOffset;
00452 
00453   // Finally, inform the code generator which regs we return values in.
00454   switch (getValueType(F.getReturnType())) {
00455   default: assert(0 && "Unknown type!");
00456   case MVT::isVoid: break;
00457   case MVT::i1:
00458   case MVT::i8:
00459   case MVT::i16:
00460   case MVT::i32:
00461     MF.addLiveOut(X86::EAX);
00462     break;
00463   case MVT::i64:
00464     MF.addLiveOut(X86::EAX);
00465     MF.addLiveOut(X86::EDX);
00466     break;
00467   case MVT::f32:
00468   case MVT::f64:
00469     MF.addLiveOut(X86::ST0);
00470     break;
00471   }
00472   return ArgValues;
00473 }
00474 
00475 std::pair<SDOperand, SDOperand>
00476 X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy,
00477                                   bool isVarArg, bool isTailCall,
00478                                   SDOperand Callee, ArgListTy &Args,
00479                                   SelectionDAG &DAG) {
00480   // Count how many bytes are to be pushed on the stack.
00481   unsigned NumBytes = 0;
00482 
00483   if (Args.empty()) {
00484     // Save zero bytes.
00485     Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy()));
00486   } else {
00487     for (unsigned i = 0, e = Args.size(); i != e; ++i)
00488       switch (getValueType(Args[i].second)) {
00489       default: assert(0 && "Unknown value type!");
00490       case MVT::i1:
00491       case MVT::i8:
00492       case MVT::i16:
00493       case MVT::i32:
00494       case MVT::f32:
00495         NumBytes += 4;
00496         break;
00497       case MVT::i64:
00498       case MVT::f64:
00499         NumBytes += 8;
00500         break;
00501       }
00502 
00503     Chain = DAG.getCALLSEQ_START(Chain,
00504                                  DAG.getConstant(NumBytes, getPointerTy()));
00505 
00506     // Arguments go on the stack in reverse order, as specified by the ABI.
00507     unsigned ArgOffset = 0;
00508     SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
00509     std::vector<SDOperand> Stores;
00510 
00511     for (unsigned i = 0, e = Args.size(); i != e; ++i) {
00512       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
00513       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
00514 
00515       switch (getValueType(Args[i].second)) {
00516       default: assert(0 && "Unexpected ValueType for argument!");
00517       case MVT::i1:
00518       case MVT::i8:
00519       case MVT::i16:
00520         // Promote the integer to 32 bits.  If the input type is signed use a
00521         // sign extend, otherwise use a zero extend.
00522         if (Args[i].second->isSigned())
00523           Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
00524         else
00525           Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
00526 
00527         // FALL THROUGH
00528       case MVT::i32:
00529       case MVT::f32:
00530         Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
00531                                      Args[i].first, PtrOff,
00532                                      DAG.getSrcValue(NULL)));
00533         ArgOffset += 4;
00534         break;
00535       case MVT::i64:
00536       case MVT::f64:
00537         Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
00538                                      Args[i].first, PtrOff,
00539                                      DAG.getSrcValue(NULL)));
00540         ArgOffset += 8;
00541         break;
00542       }
00543     }
00544     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
00545   }
00546 
00547   std::vector<MVT::ValueType> RetVals;
00548   MVT::ValueType RetTyVT = getValueType(RetTy);
00549   RetVals.push_back(MVT::Other);
00550 
00551   // The result values produced have to be legal.  Promote the result.
00552   switch (RetTyVT) {
00553   case MVT::isVoid: break;
00554   default:
00555     RetVals.push_back(RetTyVT);
00556     break;
00557   case MVT::i1:
00558   case MVT::i8:
00559   case MVT::i16:
00560     RetVals.push_back(MVT::i32);
00561     break;
00562   case MVT::f32:
00563     if (X86ScalarSSE)
00564       RetVals.push_back(MVT::f32);
00565     else
00566       RetVals.push_back(MVT::f64);
00567     break;
00568   case MVT::i64:
00569     RetVals.push_back(MVT::i32);
00570     RetVals.push_back(MVT::i32);
00571     break;
00572   }
00573 
00574   std::vector<MVT::ValueType> NodeTys;
00575   NodeTys.push_back(MVT::Other);   // Returns a chain
00576   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
00577   std::vector<SDOperand> Ops;
00578   Ops.push_back(Chain);
00579   Ops.push_back(Callee);
00580 
00581   // FIXME: Do not generate X86ISD::TAILCALL for now.
00582   Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
00583   SDOperand InFlag = Chain.getValue(1);
00584 
00585   NodeTys.clear();
00586   NodeTys.push_back(MVT::Other);   // Returns a chain
00587   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
00588   Ops.clear();
00589   Ops.push_back(Chain);
00590   Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
00591   Ops.push_back(DAG.getConstant(0, getPointerTy()));
00592   Ops.push_back(InFlag);
00593   Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
00594   InFlag = Chain.getValue(1);
00595   
00596   SDOperand RetVal;
00597   if (RetTyVT != MVT::isVoid) {
00598     switch (RetTyVT) {
00599     default: assert(0 && "Unknown value type to return!");
00600     case MVT::i1:
00601     case MVT::i8:
00602       RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
00603       Chain = RetVal.getValue(1);
00604       if (RetTyVT == MVT::i1) 
00605         RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
00606       break;
00607     case MVT::i16:
00608       RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
00609       Chain = RetVal.getValue(1);
00610       break;
00611     case MVT::i32:
00612       RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
00613       Chain = RetVal.getValue(1);
00614       break;
00615     case MVT::i64: {
00616       SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
00617       SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 
00618                                         Lo.getValue(2));
00619       RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
00620       Chain = Hi.getValue(1);
00621       break;
00622     }
00623     case MVT::f32:
00624     case MVT::f64: {
00625       std::vector<MVT::ValueType> Tys;
00626       Tys.push_back(MVT::f64);
00627       Tys.push_back(MVT::Other);
00628       Tys.push_back(MVT::Flag);
00629       std::vector<SDOperand> Ops;
00630       Ops.push_back(Chain);
00631       Ops.push_back(InFlag);
00632       RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
00633       Chain  = RetVal.getValue(1);
00634       InFlag = RetVal.getValue(2);
00635       if (X86ScalarSSE) {
00636         // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
00637         // shouldn't be necessary except that RFP cannot be live across
00638         // multiple blocks. When stackifier is fixed, they can be uncoupled.
00639         MachineFunction &MF = DAG.getMachineFunction();
00640         int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
00641         SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
00642         Tys.clear();
00643         Tys.push_back(MVT::Other);
00644         Ops.clear();
00645         Ops.push_back(Chain);
00646         Ops.push_back(RetVal);
00647         Ops.push_back(StackSlot);
00648         Ops.push_back(DAG.getValueType(RetTyVT));
00649         Ops.push_back(InFlag);
00650         Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
00651         RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
00652                              DAG.getSrcValue(NULL));
00653         Chain = RetVal.getValue(1);
00654       }
00655 
00656       if (RetTyVT == MVT::f32 && !X86ScalarSSE)
00657         // FIXME: we would really like to remember that this FP_ROUND
00658         // operation is okay to eliminate if we allow excess FP precision.
00659         RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
00660       break;
00661     }
00662     }
00663   }
00664 
00665   return std::make_pair(RetVal, Chain);
00666 }
00667 
00668 //===----------------------------------------------------------------------===//
00669 //                    Fast Calling Convention implementation
00670 //===----------------------------------------------------------------------===//
00671 //
00672 // The X86 'fast' calling convention passes up to two integer arguments in
00673 // registers (an appropriate portion of EAX/EDX), passes arguments in C order,
00674 // and requires that the callee pop its arguments off the stack (allowing proper
00675 // tail calls), and has the same return value conventions as C calling convs.
00676 //
00677 // This calling convention always arranges for the callee pop value to be 8n+4
00678 // bytes, which is needed for tail recursion elimination and stack alignment
00679 // reasons.
00680 //
00681 // Note that this can be enhanced in the future to pass fp vals in registers
00682 // (when we have a global fp allocator) and do other tricks.
00683 //
00684 
00685 /// AddLiveIn - This helper function adds the specified physical register to the
00686 /// MachineFunction as a live in value.  It also creates a corresponding virtual
00687 /// register for it.
00688 static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
00689                           TargetRegisterClass *RC) {
00690   assert(RC->contains(PReg) && "Not the correct regclass!");
00691   unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
00692   MF.addLiveIn(PReg, VReg);
00693   return VReg;
00694 }
00695 
00696 // FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments
00697 // to pass in registers.  0 is none, 1 is is "use EAX", 2 is "use EAX and
00698 // EDX".  Anything more is illegal.
00699 //
00700 // FIXME: The linscan register allocator currently has problem with
00701 // coalescing.  At the time of this writing, whenever it decides to coalesce
00702 // a physreg with a virtreg, this increases the size of the physreg's live
00703 // range, and the live range cannot ever be reduced.  This causes problems if
00704 // too many physregs are coaleced with virtregs, which can cause the register
00705 // allocator to wedge itself.
00706 //
00707 // This code triggers this problem more often if we pass args in registers,
00708 // so disable it until this is fixed.
00709 //
00710 // NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings
00711 // about code being dead.
00712 //
00713 static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0;
00714 
00715 
00716 std::vector<SDOperand>
00717 X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) {
00718   std::vector<SDOperand> ArgValues;
00719 
00720   MachineFunction &MF = DAG.getMachineFunction();
00721   MachineFrameInfo *MFI = MF.getFrameInfo();
00722 
00723   // Add DAG nodes to load the arguments...  On entry to a function the stack
00724   // frame looks like this:
00725   //
00726   // [ESP] -- return address
00727   // [ESP + 4] -- first nonreg argument (leftmost lexically)
00728   // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size
00729   //    ...
00730   unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
00731 
00732   // Keep track of the number of integer regs passed so far.  This can be either
00733   // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
00734   // used).
00735   unsigned NumIntRegs = 0;
00736   
00737   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
00738     MVT::ValueType ObjectVT = getValueType(I->getType());
00739     unsigned ArgIncrement = 4;
00740     unsigned ObjSize = 0;
00741     SDOperand ArgValue;
00742 
00743     switch (ObjectVT) {
00744     default: assert(0 && "Unhandled argument type!");
00745     case MVT::i1:
00746     case MVT::i8:
00747       if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
00748         if (!I->use_empty()) {
00749           unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
00750                                     X86::R8RegisterClass);
00751           ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8);
00752           DAG.setRoot(ArgValue.getValue(1));
00753           if (ObjectVT == MVT::i1)
00754             // FIXME: Should insert a assertzext here.
00755             ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue);
00756         }
00757         ++NumIntRegs;
00758         break;
00759       }
00760 
00761       ObjSize = 1;
00762       break;
00763     case MVT::i16:
00764       if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
00765         if (!I->use_empty()) {
00766           unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
00767                                     X86::R16RegisterClass);
00768           ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16);
00769           DAG.setRoot(ArgValue.getValue(1));
00770         }
00771         ++NumIntRegs;
00772         break;
00773       }
00774       ObjSize = 2;
00775       break;
00776     case MVT::i32:
00777       if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
00778         if (!I->use_empty()) {
00779           unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
00780                                     X86::R32RegisterClass);
00781           ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
00782           DAG.setRoot(ArgValue.getValue(1));
00783         }
00784         ++NumIntRegs;
00785         break;
00786       }
00787       ObjSize = 4;
00788       break;
00789     case MVT::i64:
00790       if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
00791         if (!I->use_empty()) {
00792           unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass);
00793           unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
00794 
00795           SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32);
00796           SDOperand Hi  = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32);
00797           DAG.setRoot(Hi.getValue(1));
00798 
00799           ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi);
00800         }
00801         NumIntRegs += 2;
00802         break;
00803       } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
00804         if (!I->use_empty()) {
00805           unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
00806           SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32);
00807           DAG.setRoot(Low.getValue(1));
00808 
00809           // Load the high part from memory.
00810           // Create the frame index object for this incoming parameter...
00811           int FI = MFI->CreateFixedObject(4, ArgOffset);
00812           SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
00813           SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
00814                                      DAG.getSrcValue(NULL));
00815           ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi);
00816         }
00817         ArgOffset += 4;
00818         NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS;
00819         break;
00820       }
00821       ObjSize = ArgIncrement = 8;
00822       break;
00823     case MVT::f32: ObjSize = 4;                break;
00824     case MVT::f64: ObjSize = ArgIncrement = 8; break;
00825     }
00826 
00827     // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
00828     // dead loads.
00829     if (ObjSize && !I->use_empty()) {
00830       // Create the frame index object for this incoming parameter...
00831       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
00832 
00833       // Create the SelectionDAG nodes corresponding to a load from this
00834       // parameter.
00835       SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
00836 
00837       ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
00838                              DAG.getSrcValue(NULL));
00839     } else if (ArgValue.Val == 0) {
00840       if (MVT::isInteger(ObjectVT))
00841         ArgValue = DAG.getConstant(0, ObjectVT);
00842       else
00843         ArgValue = DAG.getConstantFP(0, ObjectVT);
00844     }
00845     ArgValues.push_back(ArgValue);
00846 
00847     if (ObjSize)
00848       ArgOffset += ArgIncrement;   // Move on to the next argument.
00849   }
00850 
00851   // Make sure the instruction takes 8n+4 bytes to make sure the start of the
00852   // arguments and the arguments after the retaddr has been pushed are aligned.
00853   if ((ArgOffset & 7) == 0)
00854     ArgOffset += 4;
00855 
00856   VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
00857   ReturnAddrIndex = 0;             // No return address slot generated yet.
00858   BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
00859   BytesCallerReserves = 0;
00860 
00861   // Finally, inform the code generator which regs we return values in.
00862   switch (getValueType(F.getReturnType())) {
00863   default: assert(0 && "Unknown type!");
00864   case MVT::isVoid: break;
00865   case MVT::i1:
00866   case MVT::i8:
00867   case MVT::i16:
00868   case MVT::i32:
00869     MF.addLiveOut(X86::EAX);
00870     break;
00871   case MVT::i64:
00872     MF.addLiveOut(X86::EAX);
00873     MF.addLiveOut(X86::EDX);
00874     break;
00875   case MVT::f32:
00876   case MVT::f64:
00877     MF.addLiveOut(X86::ST0);
00878     break;
00879   }
00880   return ArgValues;
00881 }
00882 
00883 std::pair<SDOperand, SDOperand>
00884 X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy,
00885                                      bool isTailCall, SDOperand Callee,
00886                                      ArgListTy &Args, SelectionDAG &DAG) {
00887   // Count how many bytes are to be pushed on the stack.
00888   unsigned NumBytes = 0;
00889 
00890   // Keep track of the number of integer regs passed so far.  This can be either
00891   // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
00892   // used).
00893   unsigned NumIntRegs = 0;
00894 
00895   for (unsigned i = 0, e = Args.size(); i != e; ++i)
00896     switch (getValueType(Args[i].second)) {
00897     default: assert(0 && "Unknown value type!");
00898     case MVT::i1:
00899     case MVT::i8:
00900     case MVT::i16:
00901     case MVT::i32:
00902       if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
00903         ++NumIntRegs;
00904         break;
00905       }
00906       // fall through
00907     case MVT::f32:
00908       NumBytes += 4;
00909       break;
00910     case MVT::i64:
00911       if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
00912         NumIntRegs += 2;
00913         break;
00914       } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
00915         NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS;
00916         NumBytes += 4;
00917         break;
00918       }
00919 
00920       // fall through
00921     case MVT::f64:
00922       NumBytes += 8;
00923       break;
00924     }
00925 
00926   // Make sure the instruction takes 8n+4 bytes to make sure the start of the
00927   // arguments and the arguments after the retaddr has been pushed are aligned.
00928   if ((NumBytes & 7) == 0)
00929     NumBytes += 4;
00930 
00931   Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
00932 
00933   // Arguments go on the stack in reverse order, as specified by the ABI.
00934   unsigned ArgOffset = 0;
00935   SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
00936   NumIntRegs = 0;
00937   std::vector<SDOperand> Stores;
00938   std::vector<SDOperand> RegValuesToPass;
00939   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
00940     switch (getValueType(Args[i].second)) {
00941     default: assert(0 && "Unexpected ValueType for argument!");
00942     case MVT::i1:
00943       Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first);
00944       // Fall through.
00945     case MVT::i8:
00946     case MVT::i16:
00947     case MVT::i32:
00948       if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
00949         RegValuesToPass.push_back(Args[i].first);
00950         ++NumIntRegs;
00951         break;
00952       }
00953       // Fall through
00954     case MVT::f32: {
00955       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
00956       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
00957       Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
00958                                    Args[i].first, PtrOff,
00959                                    DAG.getSrcValue(NULL)));
00960       ArgOffset += 4;
00961       break;
00962     }
00963     case MVT::i64:
00964        // Can pass (at least) part of it in regs?
00965       if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
00966         SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
00967                                    Args[i].first, DAG.getConstant(1, MVT::i32));
00968         SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
00969                                    Args[i].first, DAG.getConstant(0, MVT::i32));
00970         RegValuesToPass.push_back(Lo);
00971         ++NumIntRegs;
00972         
00973         // Pass both parts in regs?
00974         if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
00975           RegValuesToPass.push_back(Hi);
00976           ++NumIntRegs;
00977         } else {
00978           // Pass the high part in memory.
00979           SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
00980           PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
00981           Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
00982                                        Hi, PtrOff, DAG.getSrcValue(NULL)));
00983           ArgOffset += 4;
00984         }
00985         break;
00986       }
00987       // Fall through
00988     case MVT::f64:
00989       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
00990       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
00991       Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
00992                                    Args[i].first, PtrOff,
00993                                    DAG.getSrcValue(NULL)));
00994       ArgOffset += 8;
00995       break;
00996     }
00997   }
00998   if (!Stores.empty())
00999     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
01000 
01001   // Make sure the instruction takes 8n+4 bytes to make sure the start of the
01002   // arguments and the arguments after the retaddr has been pushed are aligned.
01003   if ((ArgOffset & 7) == 0)
01004     ArgOffset += 4;
01005 
01006   std::vector<MVT::ValueType> RetVals;
01007   MVT::ValueType RetTyVT = getValueType(RetTy);
01008 
01009   RetVals.push_back(MVT::Other);
01010 
01011   // The result values produced have to be legal.  Promote the result.
01012   switch (RetTyVT) {
01013   case MVT::isVoid: break;
01014   default:
01015     RetVals.push_back(RetTyVT);
01016     break;
01017   case MVT::i1:
01018   case MVT::i8:
01019   case MVT::i16:
01020     RetVals.push_back(MVT::i32);
01021     break;
01022   case MVT::f32:
01023     if (X86ScalarSSE)
01024       RetVals.push_back(MVT::f32);
01025     else
01026       RetVals.push_back(MVT::f64);
01027     break;
01028   case MVT::i64:
01029     RetVals.push_back(MVT::i32);
01030     RetVals.push_back(MVT::i32);
01031     break;
01032   }
01033 
01034   // Build a sequence of copy-to-reg nodes chained together with token chain
01035   // and flag operands which copy the outgoing args into registers.
01036   SDOperand InFlag;
01037   for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
01038     unsigned CCReg;
01039     SDOperand RegToPass = RegValuesToPass[i];
01040     switch (RegToPass.getValueType()) {
01041     default: assert(0 && "Bad thing to pass in regs");
01042     case MVT::i8:
01043       CCReg = (i == 0) ? X86::AL  : X86::DL;
01044       break;
01045     case MVT::i16:
01046       CCReg = (i == 0) ? X86::AX  : X86::DX;
01047       break;
01048     case MVT::i32:
01049       CCReg = (i == 0) ? X86::EAX : X86::EDX;
01050       break;
01051     }
01052 
01053     Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag);
01054     InFlag = Chain.getValue(1);
01055   }
01056 
01057   std::vector<MVT::ValueType> NodeTys;
01058   NodeTys.push_back(MVT::Other);   // Returns a chain
01059   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
01060   std::vector<SDOperand> Ops;
01061   Ops.push_back(Chain);
01062   Ops.push_back(Callee);
01063   if (InFlag.Val)
01064     Ops.push_back(InFlag);
01065 
01066   // FIXME: Do not generate X86ISD::TAILCALL for now.
01067   Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
01068   InFlag = Chain.getValue(1);
01069 
01070   NodeTys.clear();
01071   NodeTys.push_back(MVT::Other);   // Returns a chain
01072   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
01073   Ops.clear();
01074   Ops.push_back(Chain);
01075   Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
01076   Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
01077   Ops.push_back(InFlag);
01078   Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
01079   InFlag = Chain.getValue(1);
01080   
01081   SDOperand RetVal;
01082   if (RetTyVT != MVT::isVoid) {
01083     switch (RetTyVT) {
01084     default: assert(0 && "Unknown value type to return!");
01085     case MVT::i1:
01086     case MVT::i8:
01087       RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
01088       Chain = RetVal.getValue(1);
01089       if (RetTyVT == MVT::i1) 
01090         RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
01091       break;
01092     case MVT::i16:
01093       RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
01094       Chain = RetVal.getValue(1);
01095       break;
01096     case MVT::i32:
01097       RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
01098       Chain = RetVal.getValue(1);
01099       break;
01100     case MVT::i64: {
01101       SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
01102       SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32, 
01103                                         Lo.getValue(2));
01104       RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
01105       Chain = Hi.getValue(1);
01106       break;
01107     }
01108     case MVT::f32:
01109     case MVT::f64: {
01110       std::vector<MVT::ValueType> Tys;
01111       Tys.push_back(MVT::f64);
01112       Tys.push_back(MVT::Other);
01113       Tys.push_back(MVT::Flag);
01114       std::vector<SDOperand> Ops;
01115       Ops.push_back(Chain);
01116       Ops.push_back(InFlag);
01117       RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
01118       Chain  = RetVal.getValue(1);
01119       InFlag = RetVal.getValue(2);
01120       if (X86ScalarSSE) {
01121         // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
01122         // shouldn't be necessary except that RFP cannot be live across
01123         // multiple blocks. When stackifier is fixed, they can be uncoupled.
01124         MachineFunction &MF = DAG.getMachineFunction();
01125         int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
01126         SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
01127         Tys.clear();
01128         Tys.push_back(MVT::Other);
01129         Ops.clear();
01130         Ops.push_back(Chain);
01131         Ops.push_back(RetVal);
01132         Ops.push_back(StackSlot);
01133         Ops.push_back(DAG.getValueType(RetTyVT));
01134         Ops.push_back(InFlag);
01135         Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
01136         RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
01137                              DAG.getSrcValue(NULL));
01138         Chain = RetVal.getValue(1);
01139       }
01140 
01141       if (RetTyVT == MVT::f32 && !X86ScalarSSE)
01142         // FIXME: we would really like to remember that this FP_ROUND
01143         // operation is okay to eliminate if we allow excess FP precision.
01144         RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
01145       break;
01146     }
01147     }
01148   }
01149 
01150   return std::make_pair(RetVal, Chain);
01151 }
01152 
01153 SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
01154   if (ReturnAddrIndex == 0) {
01155     // Set up a frame object for the return address.
01156     MachineFunction &MF = DAG.getMachineFunction();
01157     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
01158   }
01159 
01160   return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
01161 }
01162 
01163 
01164 
01165 std::pair<SDOperand, SDOperand> X86TargetLowering::
01166 LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
01167                         SelectionDAG &DAG) {
01168   SDOperand Result;
01169   if (Depth)        // Depths > 0 not supported yet!
01170     Result = DAG.getConstant(0, getPointerTy());
01171   else {
01172     SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
01173     if (!isFrameAddress)
01174       // Just load the return address
01175       Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
01176                            DAG.getSrcValue(NULL));
01177     else
01178       Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
01179                            DAG.getConstant(4, MVT::i32));
01180   }
01181   return std::make_pair(Result, Chain);
01182 }
01183 
01184 /// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode
01185 /// which corresponds to the condition code.
01186 static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) {
01187   switch (X86CC) {
01188   default: assert(0 && "Unknown X86 conditional code!");
01189   case X86ISD::COND_A:  return X86::JA;
01190   case X86ISD::COND_AE: return X86::JAE;
01191   case X86ISD::COND_B:  return X86::JB;
01192   case X86ISD::COND_BE: return X86::JBE;
01193   case X86ISD::COND_E:  return X86::JE;
01194   case X86ISD::COND_G:  return X86::JG;
01195   case X86ISD::COND_GE: return X86::JGE;
01196   case X86ISD::COND_L:  return X86::JL;
01197   case X86ISD::COND_LE: return X86::JLE;
01198   case X86ISD::COND_NE: return X86::JNE;
01199   case X86ISD::COND_NO: return X86::JNO;
01200   case X86ISD::COND_NP: return X86::JNP;
01201   case X86ISD::COND_NS: return X86::JNS;
01202   case X86ISD::COND_O:  return X86::JO;
01203   case X86ISD::COND_P:  return X86::JP;
01204   case X86ISD::COND_S:  return X86::JS;
01205   }
01206 }
01207 
01208 /// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
01209 /// specific condition code. It returns a false if it cannot do a direct
01210 /// translation. X86CC is the translated CondCode. Flip is set to true if the
01211 /// the order of comparison operands should be flipped.
01212 static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
01213                            unsigned &X86CC, bool &Flip) {
01214   Flip = false;
01215   X86CC = X86ISD::COND_INVALID;
01216   if (!isFP) {
01217     switch (SetCCOpcode) {
01218     default: break;
01219     case ISD::SETEQ:  X86CC = X86ISD::COND_E;  break;
01220     case ISD::SETGT:  X86CC = X86ISD::COND_G;  break;
01221     case ISD::SETGE:  X86CC = X86ISD::COND_GE; break;
01222     case ISD::SETLT:  X86CC = X86ISD::COND_L;  break;
01223     case ISD::SETLE:  X86CC = X86ISD::COND_LE; break;
01224     case ISD::SETNE:  X86CC = X86ISD::COND_NE; break;
01225     case ISD::SETULT: X86CC = X86ISD::COND_B;  break;
01226     case ISD::SETUGT: X86CC = X86ISD::COND_A;  break;
01227     case ISD::SETULE: X86CC = X86ISD::COND_BE; break;
01228     case ISD::SETUGE: X86CC = X86ISD::COND_AE; break;
01229     }
01230   } else {
01231     // On a floating point condition, the flags are set as follows:
01232     // ZF  PF  CF   op
01233     //  0 | 0 | 0 | X > Y
01234     //  0 | 0 | 1 | X < Y
01235     //  1 | 0 | 0 | X == Y
01236     //  1 | 1 | 1 | unordered
01237     switch (SetCCOpcode) {
01238     default: break;
01239     case ISD::SETUEQ:
01240     case ISD::SETEQ: X86CC = X86ISD::COND_E;  break;
01241     case ISD::SETOLE: Flip = true; // Fallthrough
01242     case ISD::SETOGT:
01243     case ISD::SETGT: X86CC = X86ISD::COND_A;  break;
01244     case ISD::SETOLT: Flip = true; // Fallthrough
01245     case ISD::SETOGE:
01246     case ISD::SETGE: X86CC = X86ISD::COND_AE; break;
01247     case ISD::SETUGE: Flip = true; // Fallthrough
01248     case ISD::SETULT:
01249     case ISD::SETLT: X86CC = X86ISD::COND_B;  break;
01250     case ISD::SETUGT: Flip = true; // Fallthrough
01251     case ISD::SETULE:
01252     case ISD::SETLE: X86CC = X86ISD::COND_BE; break;
01253     case ISD::SETONE:
01254     case ISD::SETNE: X86CC = X86ISD::COND_NE; break;
01255     case ISD::SETUO: X86CC = X86ISD::COND_P;  break;
01256     case ISD::SETO:  X86CC = X86ISD::COND_NP; break;
01257     }
01258   }
01259 
01260   return X86CC != X86ISD::COND_INVALID;
01261 }
01262 
01263 static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC,
01264                            bool &Flip) {
01265   return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip);
01266 }
01267 
01268 /// hasFPCMov - is there a floating point cmov for the specific X86 condition
01269 /// code. Current x86 isa includes the following FP cmov instructions:
01270 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
01271 static bool hasFPCMov(unsigned X86CC) {
01272   switch (X86CC) {
01273   default:
01274     return false;
01275   case X86ISD::COND_B:
01276   case X86ISD::COND_BE:
01277   case X86ISD::COND_E:
01278   case X86ISD::COND_P:
01279   case X86ISD::COND_A:
01280   case X86ISD::COND_AE:
01281   case X86ISD::COND_NE:
01282   case X86ISD::COND_NP:
01283     return true;
01284   }
01285 }
01286 
01287 MachineBasicBlock *
01288 X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
01289                                            MachineBasicBlock *BB) {
01290   switch (MI->getOpcode()) {
01291   default: assert(false && "Unexpected instr type to insert");
01292   case X86::CMOV_FR32:
01293   case X86::CMOV_FR64:
01294   case X86::CMOV_V4F32:
01295   case X86::CMOV_V2F64:
01296   case X86::CMOV_V2I64: {
01297     // To "insert" a SELECT_CC instruction, we actually have to insert the
01298     // diamond control-flow pattern.  The incoming instruction knows the
01299     // destination vreg to set, the condition code register to branch on, the
01300     // true/false values to select between, and a branch opcode to use.
01301     const BasicBlock *LLVM_BB = BB->getBasicBlock();
01302     ilist<MachineBasicBlock>::iterator It = BB;
01303     ++It;
01304   
01305     //  thisMBB:
01306     //  ...
01307     //   TrueVal = ...
01308     //   cmpTY ccX, r1, r2
01309     //   bCC copy1MBB
01310     //   fallthrough --> copy0MBB
01311     MachineBasicBlock *thisMBB = BB;
01312     MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
01313     MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
01314     unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue());
01315     BuildMI(BB, Opc, 1).addMBB(sinkMBB);
01316     MachineFunction *F = BB->getParent();
01317     F->getBasicBlockList().insert(It, copy0MBB);
01318     F->getBasicBlockList().insert(It, sinkMBB);
01319     // Update machine-CFG edges by first adding all successors of the current
01320     // block to the new block which will contain the Phi node for the select.
01321     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 
01322         e = BB->succ_end(); i != e; ++i)
01323       sinkMBB->addSuccessor(*i);
01324     // Next, remove all successors of the current block, and add the true
01325     // and fallthrough blocks as its successors.
01326     while(!BB->succ_empty())
01327       BB->removeSuccessor(BB->succ_begin());
01328     BB->addSuccessor(copy0MBB);
01329     BB->addSuccessor(sinkMBB);
01330   
01331     //  copy0MBB:
01332     //   %FalseValue = ...
01333     //   # fallthrough to sinkMBB
01334     BB = copy0MBB;
01335   
01336     // Update machine-CFG edges
01337     BB->addSuccessor(sinkMBB);
01338   
01339     //  sinkMBB:
01340     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
01341     //  ...
01342     BB = sinkMBB;
01343     BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg())
01344       .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
01345       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
01346 
01347     delete MI;   // The pseudo instruction is gone now.
01348     return BB;
01349   }
01350 
01351   case X86::FP_TO_INT16_IN_MEM:
01352   case X86::FP_TO_INT32_IN_MEM:
01353   case X86::FP_TO_INT64_IN_MEM: {
01354     // Change the floating point control register to use "round towards zero"
01355     // mode when truncating to an integer value.
01356     MachineFunction *F = BB->getParent();
01357     int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
01358     addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
01359 
01360     // Load the old value of the high byte of the control word...
01361     unsigned OldCW =
01362       F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass);
01363     addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx);
01364 
01365     // Set the high part to be round to zero...
01366     addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F);
01367 
01368     // Reload the modified control word now...
01369     addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
01370 
01371     // Restore the memory image of control word to original value
01372     addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW);
01373 
01374     // Get the X86 opcode to use.
01375     unsigned Opc;
01376     switch (MI->getOpcode()) {
01377     default: assert(0 && "illegal opcode!");
01378     case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
01379     case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
01380     case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
01381     }
01382 
01383     X86AddressMode AM;
01384     MachineOperand &Op = MI->getOperand(0);
01385     if (Op.isRegister()) {
01386       AM.BaseType = X86AddressMode::RegBase;
01387       AM.Base.Reg = Op.getReg();
01388     } else {
01389       AM.BaseType = X86AddressMode::FrameIndexBase;
01390       AM.Base.FrameIndex = Op.getFrameIndex();
01391     }
01392     Op = MI->getOperand(1);
01393     if (Op.isImmediate())
01394       AM.Scale = Op.getImmedValue();
01395     Op = MI->getOperand(2);
01396     if (Op.isImmediate())
01397       AM.IndexReg = Op.getImmedValue();
01398     Op = MI->getOperand(3);
01399     if (Op.isGlobalAddress()) {
01400       AM.GV = Op.getGlobal();
01401     } else {
01402       AM.Disp = Op.getImmedValue();
01403     }
01404     addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg());
01405 
01406     // Reload the original control word now.
01407     addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
01408 
01409     delete MI;   // The pseudo instruction is gone now.
01410     return BB;
01411   }
01412   }
01413 }
01414 
01415 
01416 //===----------------------------------------------------------------------===//
01417 //                           X86 Custom Lowering Hooks
01418 //===----------------------------------------------------------------------===//
01419 
01420 /// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra
01421 /// load. For Darwin, external and weak symbols are indirect, loading the value
01422 /// at address GV rather then the value of GV itself. This means that the
01423 /// GlobalAddress must be in the base or index register of the address, not the
01424 /// GV offset field.
01425 static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
01426   return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
01427           (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
01428 }
01429 
01430 /// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
01431 /// true if Op is undef or if its value falls within the specified range (L, H].
01432 static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
01433   if (Op.getOpcode() == ISD::UNDEF)
01434     return true;
01435 
01436   unsigned Val = cast<ConstantSDNode>(Op)->getValue();
01437   return (Val >= Low && Val < Hi);
01438 }
01439 
01440 /// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
01441 /// true if Op is undef or if its value equal to the specified value.
01442 static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
01443   if (Op.getOpcode() == ISD::UNDEF)
01444     return true;
01445   return cast<ConstantSDNode>(Op)->getValue() == Val;
01446 }
01447 
01448 /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
01449 /// specifies a shuffle of elements that is suitable for input to PSHUFD.
01450 bool X86::isPSHUFDMask(SDNode *N) {
01451   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01452 
01453   if (N->getNumOperands() != 4)
01454     return false;
01455 
01456   // Check if the value doesn't reference the second vector.
01457   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
01458     SDOperand Arg = N->getOperand(i);
01459     if (Arg.getOpcode() == ISD::UNDEF) continue;
01460     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01461     if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
01462       return false;
01463   }
01464 
01465   return true;
01466 }
01467 
01468 /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
01469 /// specifies a shuffle of elements that is suitable for input to PSHUFHW.
01470 bool X86::isPSHUFHWMask(SDNode *N) {
01471   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01472 
01473   if (N->getNumOperands() != 8)
01474     return false;
01475 
01476   // Lower quadword copied in order.
01477   for (unsigned i = 0; i != 4; ++i) {
01478     SDOperand Arg = N->getOperand(i);
01479     if (Arg.getOpcode() == ISD::UNDEF) continue;
01480     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01481     if (cast<ConstantSDNode>(Arg)->getValue() != i)
01482       return false;
01483   }
01484 
01485   // Upper quadword shuffled.
01486   for (unsigned i = 4; i != 8; ++i) {
01487     SDOperand Arg = N->getOperand(i);
01488     if (Arg.getOpcode() == ISD::UNDEF) continue;
01489     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01490     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01491     if (Val < 4 || Val > 7)
01492       return false;
01493   }
01494 
01495   return true;
01496 }
01497 
01498 /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
01499 /// specifies a shuffle of elements that is suitable for input to PSHUFLW.
01500 bool X86::isPSHUFLWMask(SDNode *N) {
01501   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01502 
01503   if (N->getNumOperands() != 8)
01504     return false;
01505 
01506   // Upper quadword copied in order.
01507   for (unsigned i = 4; i != 8; ++i)
01508     if (!isUndefOrEqual(N->getOperand(i), i))
01509       return false;
01510 
01511   // Lower quadword shuffled.
01512   for (unsigned i = 0; i != 4; ++i)
01513     if (!isUndefOrInRange(N->getOperand(i), 0, 4))
01514       return false;
01515 
01516   return true;
01517 }
01518 
01519 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
01520 /// specifies a shuffle of elements that is suitable for input to SHUFP*.
01521 bool X86::isSHUFPMask(SDNode *N) {
01522   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01523 
01524   unsigned NumElems = N->getNumOperands();
01525   if (NumElems == 2) {
01526     // The only cases that ought be handled by SHUFPD is
01527     // Dest { 2, 1 } <=  shuffle( Dest { 1, 0 },  Src { 3, 2 }
01528     // Dest { 3, 0 } <=  shuffle( Dest { 1, 0 },  Src { 3, 2 }
01529     // Expect bit 0 == 1, bit1 == 2
01530     SDOperand Bit0 = N->getOperand(0);
01531     SDOperand Bit1 = N->getOperand(1);
01532     if (isUndefOrEqual(Bit0, 0) && isUndefOrEqual(Bit1, 3))
01533       return true;
01534     if (isUndefOrEqual(Bit0, 1) && isUndefOrEqual(Bit1, 2))
01535       return true;
01536     return false;
01537   }
01538 
01539   if (NumElems != 4) return false;
01540 
01541   // Each half must refer to only one of the vector.
01542   for (unsigned i = 0; i < 2; ++i) {
01543     SDOperand Arg = N->getOperand(i);
01544     if (Arg.getOpcode() == ISD::UNDEF) continue;
01545     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01546     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01547     if (Val >= 4) return false;
01548   }
01549   for (unsigned i = 2; i < 4; ++i) {
01550     SDOperand Arg = N->getOperand(i);
01551     if (Arg.getOpcode() == ISD::UNDEF) continue;
01552     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01553     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01554     if (Val < 4) return false;
01555   }
01556 
01557   return true;
01558 }
01559 
01560 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
01561 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
01562 bool X86::isMOVHLPSMask(SDNode *N) {
01563   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01564 
01565   if (N->getNumOperands() != 4)
01566     return false;
01567 
01568   // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
01569   return isUndefOrEqual(N->getOperand(0), 6) &&
01570          isUndefOrEqual(N->getOperand(1), 7) &&
01571          isUndefOrEqual(N->getOperand(2), 2) &&
01572          isUndefOrEqual(N->getOperand(3), 3);
01573 }
01574 
01575 /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
01576 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
01577 bool X86::isMOVLHPSMask(SDNode *N) {
01578   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01579 
01580   if (N->getNumOperands() != 4)
01581     return false;
01582 
01583   // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5
01584   return isUndefOrEqual(N->getOperand(0), 0) &&
01585          isUndefOrEqual(N->getOperand(1), 1) &&
01586          isUndefOrEqual(N->getOperand(2), 4) &&
01587          isUndefOrEqual(N->getOperand(3), 5);
01588 }
01589 
01590 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
01591 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
01592 bool X86::isMOVLPMask(SDNode *N) {
01593   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01594 
01595   unsigned NumElems = N->getNumOperands();
01596   if (NumElems != 2 && NumElems != 4)
01597     return false;
01598 
01599   for (unsigned i = 0; i < NumElems/2; ++i)
01600     if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
01601       return false;
01602 
01603   for (unsigned i = NumElems/2; i < NumElems; ++i)
01604     if (!isUndefOrEqual(N->getOperand(i), i))
01605       return false;
01606 
01607   return true;
01608 }
01609 
01610 /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
01611 /// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}.
01612 bool X86::isMOVHPMask(SDNode *N) {
01613   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01614 
01615   unsigned NumElems = N->getNumOperands();
01616   if (NumElems != 2 && NumElems != 4)
01617     return false;
01618 
01619   for (unsigned i = 0; i < NumElems/2; ++i)
01620     if (!isUndefOrEqual(N->getOperand(i), i))
01621       return false;
01622 
01623   for (unsigned i = 0; i < NumElems/2; ++i) {
01624     SDOperand Arg = N->getOperand(i + NumElems/2);
01625     if (!isUndefOrEqual(Arg, i + NumElems))
01626       return false;
01627   }
01628 
01629   return true;
01630 }
01631 
01632 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
01633 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
01634 bool X86::isUNPCKLMask(SDNode *N) {
01635   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01636 
01637   unsigned NumElems = N->getNumOperands();
01638   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
01639     return false;
01640 
01641   for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
01642     SDOperand BitI  = N->getOperand(i);
01643     SDOperand BitI1 = N->getOperand(i+1);
01644     if (!isUndefOrEqual(BitI, j))
01645       return false;
01646     if (!isUndefOrEqual(BitI1, j + NumElems))
01647       return false;
01648   }
01649 
01650   return true;
01651 }
01652 
01653 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
01654 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
01655 bool X86::isUNPCKHMask(SDNode *N) {
01656   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01657 
01658   unsigned NumElems = N->getNumOperands();
01659   if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
01660     return false;
01661 
01662   for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
01663     SDOperand BitI  = N->getOperand(i);
01664     SDOperand BitI1 = N->getOperand(i+1);
01665     if (!isUndefOrEqual(BitI, j + NumElems/2))
01666       return false;
01667     if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
01668       return false;
01669   }
01670 
01671   return true;
01672 }
01673 
01674 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
01675 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
01676 /// <0, 0, 1, 1>
01677 bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
01678   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01679 
01680   unsigned NumElems = N->getNumOperands();
01681   if (NumElems != 4 && NumElems != 8 && NumElems != 16)
01682     return false;
01683 
01684   for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
01685     SDOperand BitI  = N->getOperand(i);
01686     SDOperand BitI1 = N->getOperand(i+1);
01687 
01688     if (!isUndefOrEqual(BitI, j))
01689       return false;
01690     if (!isUndefOrEqual(BitI1, j))
01691       return false;
01692   }
01693 
01694   return true;
01695 }
01696 
01697 /// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand
01698 /// specifies a shuffle of elements that is suitable for input to MOVS{S|D}.
01699 bool X86::isMOVSMask(SDNode *N) {
01700   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01701 
01702   unsigned NumElems = N->getNumOperands();
01703   if (NumElems != 2 && NumElems != 4)
01704     return false;
01705 
01706   if (!isUndefOrEqual(N->getOperand(0), NumElems))
01707     return false;
01708 
01709   for (unsigned i = 1; i < NumElems; ++i) {
01710     SDOperand Arg = N->getOperand(i);
01711     if (!isUndefOrEqual(Arg, i))
01712       return false;
01713   }
01714 
01715   return true;
01716 }
01717 
01718 /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
01719 /// a splat of a single element.
01720 bool X86::isSplatMask(SDNode *N) {
01721   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01722 
01723   // We can only splat 64-bit, and 32-bit quantities.
01724   if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
01725     return false;
01726 
01727   // This is a splat operation if each element of the permute is the same, and
01728   // if the value doesn't reference the second vector.
01729   SDOperand Elt = N->getOperand(0);
01730   assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
01731   for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
01732     SDOperand Arg = N->getOperand(i);
01733     if (Arg.getOpcode() == ISD::UNDEF) continue;
01734     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01735     if (Arg != Elt) return false;
01736   }
01737 
01738   // Make sure it is a splat of the first vector operand.
01739   return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands();
01740 }
01741 
01742 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
01743 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
01744 /// instructions.
01745 unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
01746   unsigned NumOperands = N->getNumOperands();
01747   unsigned Shift = (NumOperands == 4) ? 2 : 1;
01748   unsigned Mask = 0;
01749   for (unsigned i = 0; i < NumOperands; ++i) {
01750     unsigned Val = 0;
01751     SDOperand Arg = N->getOperand(NumOperands-i-1);
01752     if (Arg.getOpcode() != ISD::UNDEF)
01753       Val = cast<ConstantSDNode>(Arg)->getValue();
01754     if (Val >= NumOperands) Val -= NumOperands;
01755     Mask |= Val;
01756     if (i != NumOperands - 1)
01757       Mask <<= Shift;
01758   }
01759 
01760   return Mask;
01761 }
01762 
01763 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
01764 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
01765 /// instructions.
01766 unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
01767   unsigned Mask = 0;
01768   // 8 nodes, but we only care about the last 4.
01769   for (unsigned i = 7; i >= 4; --i) {
01770     unsigned Val = 0;
01771     SDOperand Arg = N->getOperand(i);
01772     if (Arg.getOpcode() != ISD::UNDEF)
01773       Val = cast<ConstantSDNode>(Arg)->getValue();
01774     Mask |= (Val - 4);
01775     if (i != 4)
01776       Mask <<= 2;
01777   }
01778 
01779   return Mask;
01780 }
01781 
01782 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
01783 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
01784 /// instructions.
01785 unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
01786   unsigned Mask = 0;
01787   // 8 nodes, but we only care about the first 4.
01788   for (int i = 3; i >= 0; --i) {
01789     unsigned Val = 0;
01790     SDOperand Arg = N->getOperand(i);
01791     if (Arg.getOpcode() != ISD::UNDEF)
01792       Val = cast<ConstantSDNode>(Arg)->getValue();
01793     Mask |= Val;
01794     if (i != 0)
01795       Mask <<= 2;
01796   }
01797 
01798   return Mask;
01799 }
01800 
01801 /// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
01802 /// specifies a 8 element shuffle that can be broken into a pair of
01803 /// PSHUFHW and PSHUFLW.
01804 static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
01805   assert(N->getOpcode() == ISD::BUILD_VECTOR);
01806 
01807   if (N->getNumOperands() != 8)
01808     return false;
01809 
01810   // Lower quadword shuffled.
01811   for (unsigned i = 0; i != 4; ++i) {
01812     SDOperand Arg = N->getOperand(i);
01813     if (Arg.getOpcode() == ISD::UNDEF) continue;
01814     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01815     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01816     if (Val > 4)
01817       return false;
01818   }
01819 
01820   // Upper quadword shuffled.
01821   for (unsigned i = 4; i != 8; ++i) {
01822     SDOperand Arg = N->getOperand(i);
01823     if (Arg.getOpcode() == ISD::UNDEF) continue;
01824     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01825     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01826     if (Val < 4 || Val > 7)
01827       return false;
01828   }
01829 
01830   return true;
01831 }
01832 
01833 /// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
01834 /// values in ther permute mask.
01835 static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) {
01836   SDOperand V1 = Op.getOperand(0);
01837   SDOperand V2 = Op.getOperand(1);
01838   SDOperand Mask = Op.getOperand(2);
01839   MVT::ValueType VT = Op.getValueType();
01840   MVT::ValueType MaskVT = Mask.getValueType();
01841   MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
01842   unsigned NumElems = Mask.getNumOperands();
01843   std::vector<SDOperand> MaskVec;
01844 
01845   for (unsigned i = 0; i != NumElems; ++i) {
01846     SDOperand Arg = Mask.getOperand(i);
01847     if (Arg.getOpcode() == ISD::UNDEF) continue;
01848     assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
01849     unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
01850     if (Val < NumElems)
01851       MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
01852     else
01853       MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
01854   }
01855 
01856   Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
01857   return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
01858 }
01859 
01860 /// isScalarLoadToVector - Returns true if the node is a scalar load that
01861 /// is promoted to a vector.
01862 static inline bool isScalarLoadToVector(SDOperand Op) {
01863   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) {
01864     Op = Op.getOperand(0);
01865     return (Op.getOpcode() == ISD::LOAD);
01866   }
01867   return false;
01868 }
01869 
01870 /// ShouldXformedToMOVLP - Return true if the node should be transformed to
01871 /// match movlp{d|s}. The lower half elements should come from V1 (and in
01872 /// order), and the upper half elements should come from the upper half of
01873 /// V2 (not necessarily in order). And since V1 will become the source of
01874 /// the MOVLP, it must be a scalar load.
01875 static bool ShouldXformedToMOVLP(SDOperand V1, SDOperand V2, SDOperand Mask) {
01876   if (isScalarLoadToVector(V1)) {
01877     unsigned NumElems = Mask.getNumOperands();
01878     for (unsigned i = 0, e = NumElems/2; i != e; ++i)
01879       if (!isUndefOrEqual(Mask.getOperand(i), i))
01880         return false;
01881     for (unsigned i = NumElems/2; i != NumElems; ++i)
01882       if (!isUndefOrInRange(Mask.getOperand(i),
01883                             NumElems+NumElems/2, NumElems*2))
01884         return false;
01885     return true;
01886   }
01887 
01888   return false;
01889 }
01890 
01891 /// isLowerFromV2UpperFromV1 - Returns true if the shuffle mask is except
01892 /// the reverse of what x86 shuffles want. x86 shuffles requires the lower
01893 /// half elements to come from vector 1 (which would equal the dest.) and
01894 /// the upper half to come from vector 2.
01895 static bool isLowerFromV2UpperFromV1(SDOperand Op) {
01896   assert(Op.getOpcode() == ISD::BUILD_VECTOR);
01897 
01898   unsigned NumElems = Op.getNumOperands();
01899   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
01900     if (!isUndefOrInRange(Op.getOperand(i), NumElems, NumElems*2))
01901       return false;
01902   for (unsigned i = NumElems/2; i != NumElems; ++i)
01903     if (!isUndefOrInRange(Op.getOperand(i), 0, NumElems))
01904       return false;
01905   return true;
01906 }
01907 
01908 /// LowerOperation - Provide custom lowering hooks for some operations.
01909 ///
01910 SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
01911   switch (Op.getOpcode()) {
01912   default: assert(0 && "Should not custom lower this!");
01913   case ISD::SHL_PARTS:
01914   case ISD::SRA_PARTS:
01915   case ISD::SRL_PARTS: {
01916     assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
01917            "Not an i64 shift!");
01918     bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
01919     SDOperand ShOpLo = Op.getOperand(0);
01920     SDOperand ShOpHi = Op.getOperand(1);
01921     SDOperand ShAmt  = Op.getOperand(2);
01922     SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi,
01923                                          DAG.getConstant(31, MVT::i8))
01924                            : DAG.getConstant(0, MVT::i32);
01925 
01926     SDOperand Tmp2, Tmp3;
01927     if (Op.getOpcode() == ISD::SHL_PARTS) {
01928       Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
01929       Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
01930     } else {
01931       Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
01932       Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
01933     }
01934 
01935     SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag,
01936                                    ShAmt, DAG.getConstant(32, MVT::i8));
01937 
01938     SDOperand Hi, Lo;
01939     SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
01940 
01941     std::vector<MVT::ValueType> Tys;
01942     Tys.push_back(MVT::i32);
01943     Tys.push_back(MVT::Flag);
01944     std::vector<SDOperand> Ops;
01945     if (Op.getOpcode() == ISD::SHL_PARTS) {
01946       Ops.push_back(Tmp2);
01947       Ops.push_back(Tmp3);
01948       Ops.push_back(CC);
01949       Ops.push_back(InFlag);
01950       Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
01951       InFlag = Hi.getValue(1);
01952 
01953       Ops.clear();
01954       Ops.push_back(Tmp3);
01955       Ops.push_back(Tmp1);
01956       Ops.push_back(CC);
01957       Ops.push_back(InFlag);
01958       Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
01959     } else {
01960       Ops.push_back(Tmp2);
01961       Ops.push_back(Tmp3);
01962       Ops.push_back(CC);
01963       Ops.push_back(InFlag);
01964       Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
01965       InFlag = Lo.getValue(1);
01966 
01967       Ops.clear();
01968       Ops.push_back(Tmp3);
01969       Ops.push_back(Tmp1);
01970       Ops.push_back(CC);
01971       Ops.push_back(InFlag);
01972       Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
01973     }
01974 
01975     Tys.clear();
01976     Tys.push_back(MVT::i32);
01977     Tys.push_back(MVT::i32);
01978     Ops.clear();
01979     Ops.push_back(Lo);
01980     Ops.push_back(Hi);
01981     return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
01982   }
01983   case ISD::SINT_TO_FP: {
01984     assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
01985            Op.getOperand(0).getValueType() >= MVT::i16 &&
01986            "Unknown SINT_TO_FP to lower!");
01987 
01988     SDOperand Result;
01989     MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
01990     unsigned Size = MVT::getSizeInBits(SrcVT)/8;
01991     MachineFunction &MF = DAG.getMachineFunction();
01992     int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
01993     SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
01994     SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
01995                                   DAG.getEntryNode(), Op.getOperand(0),
01996                                   StackSlot, DAG.getSrcValue(NULL));
01997 
01998     // Build the FILD
01999     std::vector<MVT::ValueType> Tys;
02000     Tys.push_back(MVT::f64);
02001     Tys.push_back(MVT::Other);
02002     if (X86ScalarSSE) Tys.push_back(MVT::Flag);
02003     std::vector<SDOperand> Ops;
02004     Ops.push_back(Chain);
02005     Ops.push_back(StackSlot);
02006     Ops.push_back(DAG.getValueType(SrcVT));
02007     Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
02008                          Tys, Ops);
02009 
02010     if (X86ScalarSSE) {
02011       Chain = Result.getValue(1);
02012       SDOperand InFlag = Result.getValue(2);
02013 
02014       // FIXME: Currently the FST is flagged to the FILD_FLAG. This
02015       // shouldn't be necessary except that RFP cannot be live across
02016       // multiple blocks. When stackifier is fixed, they can be uncoupled.
02017       MachineFunction &MF = DAG.getMachineFunction();
02018       int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
02019       SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
02020       std::vector<MVT::ValueType> Tys;
02021       Tys.push_back(MVT::Other);
02022       std::vector<SDOperand> Ops;
02023       Ops.push_back(Chain);
02024       Ops.push_back(Result);
02025       Ops.push_back(StackSlot);
02026       Ops.push_back(DAG.getValueType(Op.getValueType()));
02027       Ops.push_back(InFlag);
02028       Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
02029       Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
02030                            DAG.getSrcValue(NULL));
02031     }
02032 
02033     return Result;
02034   }
02035   case ISD::FP_TO_SINT: {
02036     assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
02037            "Unknown FP_TO_SINT to lower!");
02038     // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
02039     // stack slot.
02040     MachineFunction &MF = DAG.getMachineFunction();
02041     unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
02042     int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
02043     SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
02044 
02045     unsigned Opc;
02046     switch (Op.getValueType()) {
02047     default: assert(0 && "Invalid FP_TO_SINT to lower!");
02048     case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
02049     case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
02050     case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
02051     }
02052 
02053     SDOperand Chain = DAG.getEntryNode();
02054     SDOperand Value = Op.getOperand(0);
02055     if (X86ScalarSSE) {
02056       assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
02057       Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot, 
02058                           DAG.getSrcValue(0));
02059       std::vector<MVT::ValueType> Tys;
02060       Tys.push_back(MVT::f64);
02061       Tys.push_back(MVT::Other);
02062       std::vector<SDOperand> Ops;
02063       Ops.push_back(Chain);
02064       Ops.push_back(StackSlot);
02065       Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
02066       Value = DAG.getNode(X86ISD::FLD, Tys, Ops);
02067       Chain = Value.getValue(1);
02068       SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
02069       StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
02070     }
02071 
02072     // Build the FP_TO_INT*_IN_MEM
02073     std::vector<SDOperand> Ops;
02074     Ops.push_back(Chain);
02075     Ops.push_back(Value);
02076     Ops.push_back(StackSlot);
02077     SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops);
02078 
02079     // Load the result.
02080     return DAG.getLoad(Op.getValueType(), FIST, StackSlot,
02081                        DAG.getSrcValue(NULL));
02082   }
02083   case ISD::READCYCLECOUNTER: {
02084     std::vector<MVT::ValueType> Tys;
02085     Tys.push_back(MVT::Other);
02086     Tys.push_back(MVT::Flag);
02087     std::vector<SDOperand> Ops;
02088     Ops.push_back(Op.getOperand(0));
02089     SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops);
02090     Ops.clear();
02091     Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)));
02092     Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX, 
02093                                      MVT::i32, Ops[0].getValue(2)));
02094     Ops.push_back(Ops[1].getValue(1));
02095     Tys[0] = Tys[1] = MVT::i32;
02096     Tys.push_back(MVT::Other);
02097     return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
02098   }
02099   case ISD::FABS: {
02100     MVT::ValueType VT = Op.getValueType();
02101     const Type *OpNTy =  MVT::getTypeForValueType(VT);
02102     std::vector<Constant*> CV;
02103     if (VT == MVT::f64) {
02104       CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
02105       CV.push_back(ConstantFP::get(OpNTy, 0.0));
02106     } else {
02107       CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
02108       CV.push_back(ConstantFP::get(OpNTy, 0.0));
02109       CV.push_back(ConstantFP::get(OpNTy, 0.0));
02110       CV.push_back(ConstantFP::get(OpNTy, 0.0));
02111     }
02112     Constant *CS = ConstantStruct::get(CV);
02113     SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
02114     SDOperand Mask 
02115       = DAG.getNode(X86ISD::LOAD_PACK,
02116                     VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
02117     return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
02118   }
02119   case ISD::FNEG: {
02120     MVT::ValueType VT = Op.getValueType();
02121     const Type *OpNTy =  MVT::getTypeForValueType(VT);
02122     std::vector<Constant*> CV;
02123     if (VT == MVT::f64) {
02124       CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
02125       CV.push_back(ConstantFP::get(OpNTy, 0.0));
02126     } else {
02127       CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
02128       CV.push_back(ConstantFP::get(OpNTy, 0.0));
02129       CV.push_back(ConstantFP::get(OpNTy, 0.0));
02130       CV.push_back(ConstantFP::get(OpNTy, 0.0));
02131     }
02132     Constant *CS = ConstantStruct::get(CV);
02133     SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
02134     SDOperand Mask 
02135       = DAG.getNode(X86ISD::LOAD_PACK,
02136                     VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
02137     return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
02138   }
02139   case ISD::SETCC: {
02140     assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
02141     SDOperand Cond;
02142     SDOperand CC = Op.getOperand(2);
02143     ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
02144     bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
02145     bool Flip;
02146     unsigned X86CC;
02147     if (translateX86CC(CC, isFP, X86CC, Flip)) {
02148       if (Flip)
02149         Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
02150                            Op.getOperand(1), Op.getOperand(0));
02151       else
02152         Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
02153                            Op.getOperand(0), Op.getOperand(1));
02154       return DAG.getNode(X86ISD::SETCC, MVT::i8, 
02155                          DAG.getConstant(X86CC, MVT::i8), Cond);
02156     } else {
02157       assert(isFP && "Illegal integer SetCC!");
02158 
02159       Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
02160                          Op.getOperand(0), Op.getOperand(1));
02161       std::vector<MVT::ValueType> Tys;
02162       std::vector<SDOperand> Ops;
02163       switch (SetCCOpcode) {
02164       default: assert(false && "Illegal floating point SetCC!");
02165       case ISD::SETOEQ: {  // !PF & ZF
02166         Tys.push_back(MVT::i8);
02167         Tys.push_back(MVT::Flag);
02168         Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8));
02169         Ops.push_back(Cond);
02170         SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
02171         SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
02172                                      DAG.getConstant(X86ISD::COND_E, MVT::i8),
02173                                      Tmp1.getValue(1));
02174         return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
02175       }
02176       case ISD::SETUNE: {  // PF | !ZF
02177         Tys.push_back(MVT::i8);
02178         Tys.push_back(MVT::Flag);
02179         Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8));
02180         Ops.push_back(Cond);
02181         SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
02182         SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
02183                                      DAG.getConstant(X86ISD::COND_NE, MVT::i8),
02184                                      Tmp1.getValue(1));
02185         return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
02186       }
02187       }
02188     }
02189   }
02190   case ISD::SELECT: {
02191     MVT::ValueType VT = Op.getValueType();
02192     bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE;
02193     bool addTest   = false;
02194     SDOperand Op0 = Op.getOperand(0);
02195     SDOperand Cond, CC;
02196     if (Op0.getOpcode() == ISD::SETCC)
02197       Op0 = LowerOperation(Op0, DAG);
02198 
02199     if (Op0.getOpcode() == X86ISD::SETCC) {
02200       // If condition flag is set by a X86ISD::CMP, then make a copy of it
02201       // (since flag operand cannot be shared). If the X86ISD::SETCC does not
02202       // have another use it will be eliminated.
02203       // If the X86ISD::SETCC has more than one use, then it's probably better
02204       // to use a test instead of duplicating the X86ISD::CMP (for register
02205       // pressure reason).
02206       unsigned CmpOpc = Op0.getOperand(1).getOpcode();
02207       if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
02208           CmpOpc == X86ISD::UCOMI) {
02209         if (!Op0.hasOneUse()) {
02210           std::vector<MVT::ValueType> Tys;
02211           for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i)
02212             Tys.push_back(Op0.Val->getValueType(i));
02213           std::vector<SDOperand> Ops;
02214           for (unsigned i = 0; i < Op0.getNumOperands(); ++i)
02215             Ops.push_back(Op0.getOperand(i));
02216           Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
02217         }
02218 
02219         CC   = Op0.getOperand(0);
02220         Cond = Op0.getOperand(1);
02221         // Make a copy as flag result cannot be used by more than one.
02222         Cond = DAG.getNode(CmpOpc, MVT::Flag,
02223                            Cond.getOperand(0), Cond.getOperand(1));
02224         addTest =
02225           isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
02226       } else
02227         addTest = true;
02228     } else
02229       addTest = true;
02230 
02231     if (addTest) {
02232       CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
02233       Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0);
02234     }
02235 
02236     std::vector<MVT::ValueType> Tys;
02237     Tys.push_back(Op.getValueType());
02238     Tys.push_back(MVT::Flag);
02239     std::vector<SDOperand> Ops;
02240     // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
02241     // condition is true.
02242     Ops.push_back(Op.getOperand(2));
02243     Ops.push_back(Op.getOperand(1));
02244     Ops.push_back(CC);
02245     Ops.push_back(Cond);
02246     return DAG.getNode(X86ISD::CMOV, Tys, Ops);
02247   }
02248   case ISD::BRCOND: {
02249     bool addTest = false;
02250     SDOperand Cond  = Op.getOperand(1);
02251     SDOperand Dest  = Op.getOperand(2);
02252     SDOperand CC;
02253     if (Cond.getOpcode() == ISD::SETCC)
02254       Cond = LowerOperation(Cond, DAG);
02255 
02256     if (Cond.getOpcode() == X86ISD::SETCC) {
02257       // If condition flag is set by a X86ISD::CMP, then make a copy of it
02258       // (since flag operand cannot be shared). If the X86ISD::SETCC does not
02259       // have another use it will be eliminated.
02260       // If the X86ISD::SETCC has more than one use, then it's probably better
02261       // to use a test instead of duplicating the X86ISD::CMP (for register
02262       // pressure reason).
02263       unsigned CmpOpc = Cond.getOperand(1).getOpcode();
02264       if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
02265           CmpOpc == X86ISD::UCOMI) {
02266         if (!Cond.hasOneUse()) {
02267           std::vector<MVT::ValueType> Tys;
02268           for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i)
02269             Tys.push_back(Cond.Val->getValueType(i));
02270           std::vector<SDOperand> Ops;
02271           for (unsigned i = 0; i < Cond.getNumOperands(); ++i)
02272             Ops.push_back(Cond.getOperand(i));
02273           Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops);
02274         }
02275 
02276         CC   = Cond.getOperand(0);
02277         Cond = Cond.getOperand(1);
02278         // Make a copy as flag result cannot be used by more than one.
02279         Cond = DAG.getNode(CmpOpc, MVT::Flag,
02280                            Cond.getOperand(0), Cond.getOperand(1));
02281       } else
02282         addTest = true;
02283     } else
02284       addTest = true;
02285 
02286     if (addTest) {
02287       CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
02288       Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond);
02289     }
02290     return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
02291                        Op.getOperand(0), Op.getOperand(2), CC, Cond);
02292   }
02293   case ISD::MEMSET: {
02294     SDOperand InFlag(0, 0);
02295     SDOperand Chain = Op.getOperand(0);
02296     unsigned Align =
02297       (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
02298     if (Align == 0) Align = 1;
02299 
02300     ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
02301     // If not DWORD aligned, call memset if size is less than the threshold.
02302     // It knows how to align to the right boundary first.
02303     if ((Align & 3) != 0 ||
02304         (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
02305       MVT::ValueType IntPtr = getPointerTy();
02306       const Type *IntPtrTy = getTargetData().getIntPtrType();
02307       std::vector<std::pair<SDOperand, const Type*> > Args;
02308       Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
02309       // Extend the ubyte argument to be an int value for the call.
02310       SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
02311       Args.push_back(std::make_pair(Val, IntPtrTy));
02312       Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
02313       std::pair<SDOperand,SDOperand> CallResult =
02314         LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
02315                     DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
02316       return CallResult.second;
02317     }
02318 
02319     MVT::ValueType AVT;
02320     SDOperand Count;
02321     ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
02322     unsigned BytesLeft = 0;
02323     bool TwoRepStos = false;
02324     if (ValC) {
02325       unsigned ValReg;
02326       unsigned Val = ValC->getValue() & 255;
02327 
02328       // If the value is a constant, then we can potentially use larger sets.
02329       switch (Align & 3) {
02330       case 2:   // WORD aligned
02331         AVT = MVT::i16;
02332         Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
02333         BytesLeft = I->getValue() % 2;
02334         Val    = (Val << 8) | Val;
02335         ValReg = X86::AX;
02336         break;
02337       case 0:   // DWORD aligned
02338         AVT = MVT::i32;
02339         if (I) {
02340           Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
02341           BytesLeft = I->getValue() % 4;
02342         } else {
02343           Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
02344                               DAG.getConstant(2, MVT::i8));
02345           TwoRepStos = true;
02346         }
02347         Val = (Val << 8)  | Val;
02348         Val = (Val << 16) | Val;
02349         ValReg = X86::EAX;
02350         break;
02351       default:  // Byte aligned
02352         AVT = MVT::i8;
02353         Count = Op.getOperand(3);
02354         ValReg = X86::AL;
02355         break;
02356       }
02357 
02358       Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
02359                                 InFlag);
02360       InFlag = Chain.getValue(1);
02361     } else {
02362       AVT = MVT::i8;
02363       Count  = Op.getOperand(3);
02364       Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
02365       InFlag = Chain.getValue(1);
02366     }
02367 
02368     Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
02369     InFlag = Chain.getValue(1);
02370     Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
02371     InFlag = Chain.getValue(1);
02372 
02373     std::vector<MVT::ValueType> Tys;
02374     Tys.push_back(MVT::Other);
02375     Tys.push_back(MVT::Flag);
02376     std::vector<SDOperand> Ops;
02377     Ops.push_back(Chain);
02378     Ops.push_back(DAG.getValueType(AVT));
02379     Ops.push_back(InFlag);
02380     Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
02381 
02382     if (TwoRepStos) {
02383       InFlag = Chain.getValue(1);
02384       Count = Op.getOperand(3);
02385       MVT::ValueType CVT = Count.getValueType();
02386       SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
02387                                    DAG.getConstant(3, CVT));
02388       Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
02389       InFlag = Chain.getValue(1);
02390       Tys.clear();
02391       Tys.push_back(MVT::Other);
02392       Tys.push_back(MVT::Flag);
02393       Ops.clear();
02394       Ops.push_back(Chain);
02395       Ops.push_back(DAG.getValueType(MVT::i8));
02396       Ops.push_back(InFlag);
02397       Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
02398     } else if (BytesLeft) {
02399       // Issue stores for the last 1 - 3 bytes.
02400       SDOperand Value;
02401       unsigned Val = ValC->getValue() & 255;
02402       unsigned Offset = I->getValue() - BytesLeft;
02403       SDOperand DstAddr = Op.getOperand(1);
02404       MVT::ValueType AddrVT = DstAddr.getValueType();
02405       if (BytesLeft >= 2) {
02406         Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
02407         Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
02408                             DAG.getNode(ISD::ADD, AddrVT, DstAddr,
02409                                         DAG.getConstant(Offset, AddrVT)),
02410                             DAG.getSrcValue(NULL));
02411         BytesLeft -= 2;
02412         Offset += 2;
02413       }
02414 
02415       if (BytesLeft == 1) {
02416         Value = DAG.getConstant(Val, MVT::i8);
02417         Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
02418                             DAG.getNode(ISD::ADD, AddrVT, DstAddr,
02419                                         DAG.getConstant(Offset, AddrVT)),
02420                             DAG.getSrcValue(NULL));
02421       }
02422     }
02423 
02424     return Chain;
02425   }
02426   case ISD::MEMCPY: {
02427     SDOperand Chain = Op.getOperand(0);
02428     unsigned Align =
02429       (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
02430     if (Align == 0) Align = 1;
02431 
02432     ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
02433     // If not DWORD aligned, call memcpy if size is less than the threshold.
02434     // It knows how to align to the right boundary first.
02435     if ((Align & 3) != 0 ||
02436         (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
02437       MVT::ValueType IntPtr = getPointerTy();
02438       const Type *IntPtrTy = getTargetData().getIntPtrType();
02439       std::vector<std::pair<SDOperand, const Type*> > Args;
02440       Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
02441       Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
02442       Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
02443       std::pair<SDOperand,SDOperand> CallResult =
02444         LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
02445                     DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
02446       return CallResult.second;
02447     }
02448 
02449     MVT::ValueType AVT;
02450     SDOperand Count;
02451     unsigned BytesLeft = 0;
02452     bool TwoRepMovs = false;
02453     switch (Align & 3) {
02454     case 2:   // WORD aligned
02455       AVT = MVT::i16;
02456       Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
02457       BytesLeft = I->getValue() % 2;
02458       break;
02459     case 0:   // DWORD aligned
02460       AVT = MVT::i32;
02461       if (I) {
02462         Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
02463         BytesLeft = I->getValue() % 4;
02464       } else {
02465         Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
02466                             DAG.getConstant(2, MVT::i8));
02467         TwoRepMovs = true;
02468       }
02469       break;
02470     default:  // Byte aligned
02471       AVT = MVT::i8;
02472       Count = Op.getOperand(3);
02473       break;
02474     }
02475 
02476     SDOperand InFlag(0, 0);
02477     Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
02478     InFlag = Chain.getValue(1);
02479     Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
02480     InFlag = Chain.getValue(1);
02481     Chain  = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
02482     InFlag = Chain.getValue(1);
02483 
02484     std::vector<MVT::ValueType> Tys;
02485     Tys.push_back(MVT::Other);
02486     Tys.push_back(MVT::Flag);
02487     std::vector<SDOperand> Ops;
02488     Ops.push_back(Chain);
02489     Ops.push_back(DAG.getValueType(AVT));
02490     Ops.push_back(InFlag);
02491     Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
02492 
02493     if (TwoRepMovs) {
02494       InFlag = Chain.getValue(1);
02495       Count = Op.getOperand(3);
02496       MVT::ValueType CVT = Count.getValueType();
02497       SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
02498                                    DAG.getConstant(3, CVT));
02499       Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
02500       InFlag = Chain.getValue(1);
02501       Tys.clear();
02502       Tys.push_back(MVT::Other);
02503       Tys.push_back(MVT::Flag);
02504       Ops.clear();
02505       Ops.push_back(Chain);
02506       Ops.push_back(DAG.getValueType(MVT::i8));
02507       Ops.push_back(InFlag);
02508       Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
02509     } else if (BytesLeft) {
02510       // Issue loads and stores for the last 1 - 3 bytes.
02511       unsigned Offset = I->getValue() - BytesLeft;
02512       SDOperand DstAddr = Op.getOperand(1);
02513       MVT::ValueType DstVT = DstAddr.getValueType();
02514       SDOperand SrcAddr = Op.getOperand(2);
02515       MVT::ValueType SrcVT = SrcAddr.getValueType();
02516       SDOperand Value;
02517       if (BytesLeft >= 2) {
02518         Value = DAG.getLoad(MVT::i16, Chain,
02519                             DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
02520                                         DAG.getConstant(Offset, SrcVT)),
02521                             DAG.getSrcValue(NULL));
02522         Chain = Value.getValue(1);
02523         Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
02524                             DAG.getNode(ISD::ADD, DstVT, DstAddr,
02525                                         DAG.getConstant(Offset, DstVT)),
02526                             DAG.getSrcValue(NULL));
02527         BytesLeft -= 2;
02528         Offset += 2;
02529       }
02530 
02531       if (BytesLeft == 1) {
02532         Value = DAG.getLoad(MVT::i8, Chain,
02533                             DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
02534                                         DAG.getConstant(Offset, SrcVT)),
02535                             DAG.getSrcValue(NULL));
02536         Chain = Value.getValue(1);
02537         Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
02538                             DAG.getNode(ISD::ADD, DstVT, DstAddr,
02539                                         DAG.getConstant(Offset, DstVT)),
02540                             DAG.getSrcValue(NULL));
02541       }
02542     }
02543 
02544     return Chain;
02545   }
02546 
02547   // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their
02548   // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
02549   // one of the above mentioned nodes. It has to be wrapped because otherwise
02550   // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
02551   // be used to form addressing mode. These wrapped nodes will be selected
02552   // into MOV32ri.
02553   case ISD::ConstantPool: {
02554     ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
02555     SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
02556                          DAG.getTargetConstantPool(CP->get(), getPointerTy(),
02557                                                    CP->getAlignment()));
02558     if (Subtarget->isTargetDarwin()) {
02559       // With PIC, the address is actually $g + Offset.
02560       if (getTargetMachine().getRelocationModel() == Reloc::PIC)
02561         Result = DAG.getNode(ISD::ADD, getPointerTy(),
02562                 DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);    
02563     }
02564 
02565     return Result;
02566   }
02567   case ISD::GlobalAddress: {
02568     GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
02569     SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
02570                          DAG.getTargetGlobalAddress(GV, getPointerTy()));
02571     if (Subtarget->isTargetDarwin()) {
02572       // With PIC, the address is actually $g + Offset.
02573       if (getTargetMachine().getRelocationModel() == Reloc::PIC)
02574         Result = DAG.getNode(ISD::ADD, getPointerTy(),
02575                     DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
02576 
02577       // For Darwin, external and weak symbols are indirect, so we want to load
02578       // the value at address GV, not the value of GV itself. This means that
02579       // the GlobalAddress must be in the base or index register of the address,
02580       // not the GV offset field.
02581       if (getTargetMachine().getRelocationModel() != Reloc::Static &&
02582           DarwinGVRequiresExtraLoad(GV))
02583         Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(),
02584                              Result, DAG.getSrcValue(NULL));
02585     }
02586 
02587     return Result;
02588   }
02589   case ISD::ExternalSymbol: {
02590     const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
02591     SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
02592                          DAG.getTargetExternalSymbol(Sym, getPointerTy()));
02593     if (Subtarget->isTargetDarwin()) {
02594       // With PIC, the address is actually $g + Offset.
02595       if (getTargetMachine().getRelocationModel() == Reloc::PIC)
02596         Result = DAG.getNode(ISD::ADD, getPointerTy(),
02597                     DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
02598     }
02599 
02600     return Result;
02601   }
02602   case ISD::VASTART: {
02603     // vastart just stores the address of the VarArgsFrameIndex slot into the
02604     // memory location argument.
02605     // FIXME: Replace MVT::i32 with PointerTy
02606     SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
02607     return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 
02608                        Op.getOperand(1), Op.getOperand(2));
02609   }
02610   case ISD::RET: {
02611     SDOperand Copy;
02612     
02613     switch(Op.getNumOperands()) {
02614     default:
02615       assert(0 && "Do not know how to return this many arguments!");
02616       abort();
02617     case 1: 
02618       return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
02619                          DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
02620     case 2: {
02621       MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
02622       if (MVT::isInteger(ArgVT))
02623         Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1),
02624                                 SDOperand());
02625       else if (!X86ScalarSSE) {
02626         std::vector<MVT::ValueType> Tys;
02627         Tys.push_back(MVT::Other);
02628         Tys.push_back(MVT::Flag);
02629         std::vector<SDOperand> Ops;
02630         Ops.push_back(Op.getOperand(0));
02631         Ops.push_back(Op.getOperand(1));
02632         Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
02633       } else {
02634         SDOperand MemLoc;
02635         SDOperand Chain = Op.getOperand(0);
02636         SDOperand Value = Op.getOperand(1);
02637 
02638         if (Value.getOpcode() == ISD::LOAD &&
02639             (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
02640           Chain  = Value.getOperand(0);
02641           MemLoc = Value.getOperand(1);
02642         } else {
02643           // Spill the value to memory and reload it into top of stack.
02644           unsigned Size = MVT::getSizeInBits(ArgVT)/8;
02645           MachineFunction &MF = DAG.getMachineFunction();
02646           int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
02647           MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
02648           Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), 
02649                               Value, MemLoc, DAG.getSrcValue(0));
02650         }
02651         std::vector<MVT::ValueType> Tys;
02652         Tys.push_back(MVT::f64);
02653         Tys.push_back(MVT::Other);
02654         std::vector<SDOperand> Ops;
02655         Ops.push_back(Chain);
02656         Ops.push_back(MemLoc);
02657         Ops.push_back(DAG.getValueType(ArgVT));
02658         Copy = DAG.getNode(X86ISD::FLD, Tys, Ops);
02659         Tys.clear();
02660         Tys.push_back(MVT::Other);
02661         Tys.push_back(MVT::Flag);
02662         Ops.clear();
02663         Ops.push_back(Copy.getValue(1));
02664         Ops.push_back(Copy);
02665         Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
02666       }
02667       break;
02668     }
02669     case 3:
02670       Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2), 
02671                               SDOperand());
02672       Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1));
02673       break;
02674     }
02675     return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
02676                        Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
02677                        Copy.getValue(1));
02678   }
02679   case ISD::SCALAR_TO_VECTOR: {
02680     SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
02681     return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
02682   }
02683   case ISD::VECTOR_SHUFFLE: {
02684     SDOperand V1 = Op.getOperand(0);
02685     SDOperand V2 = Op.getOperand(1);
02686     SDOperand PermMask = Op.getOperand(2);
02687     MVT::ValueType VT = Op.getValueType();
02688     unsigned NumElems = PermMask.getNumOperands();
02689 
02690     if (X86::isSplatMask(PermMask.Val))
02691       return Op;
02692 
02693     // Normalize the node to match x86 shuffle ops if needed
02694     if (V2.getOpcode() != ISD::UNDEF) {
02695       bool DoSwap = false;
02696 
02697       if (ShouldXformedToMOVLP(V1, V2, PermMask))
02698         DoSwap = true;
02699       else if (isLowerFromV2UpperFromV1(PermMask))
02700         DoSwap = true;
02701 
02702       if (DoSwap) {
02703         Op = CommuteVectorShuffle(Op, DAG);
02704         V1 = Op.getOperand(0);
02705         V2 = Op.getOperand(1);
02706         PermMask = Op.getOperand(2);
02707       }
02708     }
02709 
02710     if (NumElems == 2)
02711       return Op;
02712 
02713     if (X86::isMOVSMask(PermMask.Val))
02714       // Leave the VECTOR_SHUFFLE alone. It matches MOVS{S|D}.
02715       return Op;
02716 
02717     if (X86::isUNPCKLMask(PermMask.Val) ||
02718         X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
02719         X86::isUNPCKHMask(PermMask.Val))
02720       // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
02721       return Op;
02722 
02723     // If VT is integer, try PSHUF* first, then SHUFP*.
02724     if (MVT::isInteger(VT)) {
02725       if (X86::isPSHUFDMask(PermMask.Val) ||
02726           X86::isPSHUFHWMask(PermMask.Val) ||
02727           X86::isPSHUFLWMask(PermMask.Val)) {
02728         if (V2.getOpcode() != ISD::UNDEF)
02729           return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
02730                              DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
02731         return Op;
02732       }
02733 
02734       if (X86::isSHUFPMask(PermMask.Val))
02735         return Op;
02736 
02737       // Handle v8i16 shuffle high / low shuffle node pair.
02738       if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
02739         MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
02740         MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
02741         std::vector<SDOperand> MaskVec;
02742         for (unsigned i = 0; i != 4; ++i)
02743           MaskVec.push_back(PermMask.getOperand(i));
02744         for (unsigned i = 4; i != 8; ++i)
02745           MaskVec.push_back(DAG.getConstant(i, BaseVT));
02746         SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02747         V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
02748         MaskVec.clear();
02749         for (unsigned i = 0; i != 4; ++i)
02750           MaskVec.push_back(DAG.getConstant(i, BaseVT));
02751         for (unsigned i = 4; i != 8; ++i)
02752           MaskVec.push_back(PermMask.getOperand(i));
02753         Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02754         return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
02755       }
02756     } else {
02757       // Floating point cases in the other order.
02758       if (X86::isSHUFPMask(PermMask.Val))
02759         return Op;
02760       if (X86::isPSHUFDMask(PermMask.Val) ||
02761           X86::isPSHUFHWMask(PermMask.Val) ||
02762           X86::isPSHUFLWMask(PermMask.Val)) {
02763         if (V2.getOpcode() != ISD::UNDEF)
02764           return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
02765                              DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
02766         return Op;
02767       }
02768     }
02769 
02770     return SDOperand();
02771   }
02772   case ISD::BUILD_VECTOR: {
02773     // All one's are handled with pcmpeqd.
02774     if (ISD::isBuildVectorAllOnes(Op.Val))
02775       return Op;
02776 
02777     std::set<SDOperand> Values;
02778     SDOperand Elt0 = Op.getOperand(0);
02779     Values.insert(Elt0);
02780     bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) &&
02781                        cast<ConstantSDNode>(Elt0)->getValue() == 0) ||
02782       (isa<ConstantFPSDNode>(Elt0) &&
02783        cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0));
02784     bool RestAreZero = true;
02785     unsigned NumElems = Op.getNumOperands();
02786     for (unsigned i = 1; i < NumElems; ++i) {
02787       SDOperand Elt = Op.getOperand(i);
02788       if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) {
02789         if (!FPC->isExactlyValue(+0.0))
02790           RestAreZero = false;
02791       } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
02792         if (!C->isNullValue())
02793           RestAreZero = false;
02794       } else
02795         RestAreZero = false;
02796       Values.insert(Elt);
02797     }
02798 
02799     if (RestAreZero) {
02800       if (Elt0IsZero) return Op;
02801 
02802       // Zero extend a scalar to a vector.
02803       return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0);
02804     }
02805 
02806     if (Values.size() > 2) {
02807       // Expand into a number of unpckl*.
02808       // e.g. for v4f32
02809       //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
02810       //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
02811       //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
02812       MVT::ValueType VT = Op.getValueType();
02813       MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
02814       MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
02815       std::vector<SDOperand> MaskVec;
02816       for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
02817         MaskVec.push_back(DAG.getConstant(i,            BaseVT));
02818         MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
02819       }
02820       SDOperand PermMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
02821       std::vector<SDOperand> V(NumElems);
02822       for (unsigned i = 0; i < NumElems; ++i)
02823         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
02824       NumElems >>= 1;
02825       while (NumElems != 0) {
02826         for (unsigned i = 0; i < NumElems; ++i)
02827           V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
02828                              PermMask);
02829         NumElems >>= 1;
02830       }
02831       return V[0];
02832     }
02833 
02834     return SDOperand();
02835   }
02836   case ISD::EXTRACT_VECTOR_ELT: {
02837     if (!isa<ConstantSDNode>(Op.getOperand(1)))
02838         return SDOperand();
02839 
02840     MVT::ValueType VT = Op.getValueType();
02841     // TODO: handle v16i8.
02842     if (MVT::getSizeInBits(VT) == 16) {
02843       // Transform it so it match pextrw which produces a 32-bit result.
02844       MVT::ValueType EVT = (MVT::ValueType)(VT+1);
02845       SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
02846                                       Op.getOperand(0), Op.getOperand(1));
02847       SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
02848                                       DAG.getValueType(VT));
02849       return DAG.getNode(ISD::TRUNCATE, VT, Assert);
02850     } else if (MVT::getSizeInBits(VT) == 32) {
02851       SDOperand Vec = Op.getOperand(0);
02852       unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
02853       if (Idx == 0)
02854         return Op;
02855 
02856       // TODO: if Idex == 2, we can use unpckhps
02857       // SHUFPS the element to the lowest double word, then movss.
02858       MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
02859       SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4,
02860                                           MVT::getVectorBaseType(MaskVT));
02861       std::vector<SDOperand> IdxVec;
02862       IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
02863       IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
02864       IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
02865       IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
02866       SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
02867       Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
02868                         Vec, Vec, Mask);
02869       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
02870                          DAG.getConstant(0, MVT::i32));
02871     } else if (MVT::getSizeInBits(VT) == 64) {
02872       SDOperand Vec = Op.getOperand(0);
02873       unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
02874       if (Idx == 0)
02875         return Op;
02876 
02877       // UNPCKHPD the element to the lowest double word, then movsd.
02878       // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
02879       // to a f64mem, the whole operation is folded into a single MOVHPDmr.
02880       MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
02881       std::vector<SDOperand> IdxVec;
02882       IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
02883       IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
02884       SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
02885       Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
02886                         Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
02887       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
02888                          DAG.getConstant(0, MVT::i32));
02889     }
02890 
02891     return SDOperand();
02892   }
02893   case ISD::INSERT_VECTOR_ELT: {
02894     // Transform it so it match pinsrw which expects a 16-bit value in a R32
02895     // as its second argument.
02896     MVT::ValueType VT = Op.getValueType();
02897     MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
02898     if (MVT::getSizeInBits(BaseVT) == 16) {
02899       SDOperand N1 = Op.getOperand(1);
02900       SDOperand N2 = Op.getOperand(2);
02901       if (N1.getValueType() != MVT::i32)
02902         N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
02903       if (N2.getValueType() != MVT::i32)
02904         N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
02905       return DAG.getNode(X86ISD::PINSRW, VT, Op.getOperand(0), N1, N2);
02906     }
02907 
02908     return SDOperand();
02909   }
02910   case ISD::INTRINSIC_WO_CHAIN: {
02911     unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
02912     switch (IntNo) {
02913     default: return SDOperand();    // Don't custom lower most intrinsics.
02914     // Comparison intrinsics.
02915     case Intrinsic::x86_sse_comieq_ss:
02916     case Intrinsic::x86_sse_comilt_ss:
02917     case Intrinsic::x86_sse_comile_ss:
02918     case Intrinsic::x86_sse_comigt_ss:
02919     case Intrinsic::x86_sse_comige_ss:
02920     case Intrinsic::x86_sse_comineq_ss:
02921     case Intrinsic::x86_sse_ucomieq_ss:
02922     case Intrinsic::x86_sse_ucomilt_ss:
02923     case Intrinsic::x86_sse_ucomile_ss:
02924     case Intrinsic::x86_sse_ucomigt_ss:
02925     case Intrinsic::x86_sse_ucomige_ss:
02926     case Intrinsic::x86_sse_ucomineq_ss:
02927     case Intrinsic::x86_sse2_comieq_sd:
02928     case Intrinsic::x86_sse2_comilt_sd:
02929     case Intrinsic::x86_sse2_comile_sd:
02930     case Intrinsic::x86_sse2_comigt_sd:
02931     case Intrinsic::x86_sse2_comige_sd:
02932     case Intrinsic::x86_sse2_comineq_sd:
02933     case Intrinsic::x86_sse2_ucomieq_sd:
02934     case Intrinsic::x86_sse2_ucomilt_sd:
02935     case Intrinsic::x86_sse2_ucomile_sd:
02936     case Intrinsic::x86_sse2_ucomigt_sd:
02937     case Intrinsic::x86_sse2_ucomige_sd:
02938     case Intrinsic::x86_sse2_ucomineq_sd: {
02939       unsigned Opc = 0;
02940       ISD::CondCode CC = ISD::SETCC_INVALID;
02941       switch (IntNo) {
02942         default: break;
02943         case Intrinsic::x86_sse_comieq_ss: 
02944         case Intrinsic::x86_sse2_comieq_sd: 
02945           Opc = X86ISD::COMI;
02946           CC = ISD::SETEQ;
02947           break;
02948         case Intrinsic::x86_sse_comilt_ss:
02949         case Intrinsic::x86_sse2_comilt_sd:
02950           Opc = X86ISD::COMI;
02951           CC = ISD::SETLT;
02952           break;
02953         case Intrinsic::x86_sse_comile_ss:
02954         case Intrinsic::x86_sse2_comile_sd:
02955           Opc = X86ISD::COMI;
02956           CC = ISD::SETLE;
02957           break;
02958         case Intrinsic::x86_sse_comigt_ss:
02959         case Intrinsic::x86_sse2_comigt_sd:
02960           Opc = X86ISD::COMI;
02961           CC = ISD::SETGT;
02962           break;
02963         case Intrinsic::x86_sse_comige_ss:
02964         case Intrinsic::x86_sse2_comige_sd:
02965           Opc = X86ISD::COMI;
02966           CC = ISD::SETGE;
02967           break;
02968         case Intrinsic::x86_sse_comineq_ss:
02969         case Intrinsic::x86_sse2_comineq_sd:
02970           Opc = X86ISD::COMI;
02971           CC = ISD::SETNE;
02972           break;
02973         case Intrinsic::x86_sse_ucomieq_ss:
02974         case Intrinsic::x86_sse2_ucomieq_sd:
02975           Opc = X86ISD::UCOMI;
02976           CC = ISD::SETEQ;
02977           break;
02978         case Intrinsic::x86_sse_ucomilt_ss:
02979         case Intrinsic::x86_sse2_ucomilt_sd:
02980           Opc = X86ISD::UCOMI;
02981           CC = ISD::SETLT;
02982           break;
02983         case Intrinsic::x86_sse_ucomile_ss:
02984         case Intrinsic::x86_sse2_ucomile_sd:
02985           Opc = X86ISD::UCOMI;
02986           CC = ISD::SETLE;
02987           break;
02988         case Intrinsic::x86_sse_ucomigt_ss:
02989         case Intrinsic::x86_sse2_ucomigt_sd:
02990           Opc = X86ISD::UCOMI;
02991           CC = ISD::SETGT;
02992           break;
02993         case Intrinsic::x86_sse_ucomige_ss:
02994         case Intrinsic::x86_sse2_ucomige_sd:
02995           Opc = X86ISD::UCOMI;
02996           CC = ISD::SETGE;
02997           break;
02998         case Intrinsic::x86_sse_ucomineq_ss:
02999         case Intrinsic::x86_sse2_ucomineq_sd:
03000           Opc = X86ISD::UCOMI;
03001           CC = ISD::SETNE;
03002           break;
03003       }
03004       bool Flip;
03005       unsigned X86CC;
03006       translateX86CC(CC, true, X86CC, Flip);
03007       SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1),
03008                                    Op.getOperand(Flip?1:2));
03009       SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, 
03010                                     DAG.getConstant(X86CC, MVT::i8), Cond);
03011       return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
03012     }
03013     }
03014   }
03015   }
03016 }
03017 
03018 const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
03019   switch (Opcode) {
03020   default: return NULL;
03021   case X86ISD::SHLD:               return "X86ISD::SHLD";
03022   case X86ISD::SHRD:               return "X86ISD::SHRD";
03023   case X86ISD::FAND:               return "X86ISD::FAND";
03024   case X86ISD::FXOR:               return "X86ISD::FXOR";
03025   case X86ISD::FILD:               return "X86ISD::FILD";
03026   case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
03027   case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
03028   case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
03029   case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
03030   case X86ISD::FLD:                return "X86ISD::FLD";
03031   case X86ISD::FST:                return "X86ISD::FST";
03032   case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
03033   case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
03034   case X86ISD::CALL:               return "X86ISD::CALL";
03035   case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
03036   case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
03037   case X86ISD::CMP:                return "X86ISD::CMP";
03038   case X86ISD::TEST:               return "X86ISD::TEST";
03039   case X86ISD::COMI:               return "X86ISD::COMI";
03040   case X86ISD::UCOMI:              return "X86ISD::UCOMI";
03041   case X86ISD::SETCC:              return "X86ISD::SETCC";
03042   case X86ISD::CMOV:               return "X86ISD::CMOV";
03043   case X86ISD::BRCOND:             return "X86ISD::BRCOND";
03044   case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
03045   case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
03046   case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
03047   case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
03048   case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
03049   case X86ISD::Wrapper:            return "X86ISD::Wrapper";
03050   case X86ISD::S2VEC:              return "X86ISD::S2VEC";
03051   case X86ISD::ZEXT_S2VEC:         return "X86ISD::ZEXT_S2VEC";
03052   case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
03053   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
03054   }
03055 }
03056 
03057 void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
03058                                                        uint64_t Mask,
03059                                                        uint64_t &KnownZero, 
03060                                                        uint64_t &KnownOne,
03061                                                        unsigned Depth) const {
03062   unsigned Opc = Op.getOpcode();
03063   assert((Opc >= ISD::BUILTIN_OP_END ||
03064           Opc == ISD::INTRINSIC_WO_CHAIN ||
03065           Opc == ISD::INTRINSIC_W_CHAIN ||
03066           Opc == ISD::INTRINSIC_VOID) &&
03067          "Should use MaskedValueIsZero if you don't know whether Op"
03068          " is a target node!");
03069 
03070   KnownZero = KnownOne = 0;   // Don't know anything.
03071   switch (Opc) {
03072   default: break;
03073   case X86ISD::SETCC: 
03074     KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
03075     break;
03076   }
03077 }
03078 
03079 std::vector<unsigned> X86TargetLowering::
03080 getRegClassForInlineAsmConstraint(const std::string &Constraint,
03081                                   MVT::ValueType VT) const {
03082   if (Constraint.size() == 1) {
03083     // FIXME: not handling fp-stack yet!
03084     // FIXME: not handling MMX registers yet ('y' constraint).
03085     switch (Constraint[0]) {      // GCC X86 Constraint Letters
03086     default: break;  // Unknown constriant letter
03087     case 'r':   // GENERAL_REGS
03088     case 'R':   // LEGACY_REGS
03089       return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
03090                                    X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
03091     case 'l':   // INDEX_REGS
03092       return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
03093                                    X86::ESI, X86::EDI, X86::EBP, 0);
03094     case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
03095     case 'Q':   // Q_REGS
03096       return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0);
03097     case 'x':   // SSE_REGS if SSE1 allowed
03098       if (Subtarget->hasSSE1())
03099         return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
03100                                      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
03101                                      0);
03102       return std::vector<unsigned>();
03103     case 'Y':   // SSE_REGS if SSE2 allowed
03104       if (Subtarget->hasSSE2())
03105         return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
03106                                      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
03107                                      0);
03108       return std::vector<unsigned>();
03109     }
03110   }
03111   
03112   return std::vector<unsigned>();
03113 }
03114 
03115 /// isLegalAddressImmediate - Return true if the integer value or
03116 /// GlobalValue can be used as the offset of the target addressing mode.
03117 bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
03118   // X86 allows a sign-extended 32-bit immediate field.
03119   return (V > -(1LL << 32) && V < (1LL << 32)-1);
03120 }
03121 
03122 bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
03123   if (Subtarget->isTargetDarwin()) {
03124     Reloc::Model RModel = getTargetMachine().getRelocationModel();
03125     if (RModel == Reloc::Static)
03126       return true;
03127     else if (RModel == Reloc::DynamicNoPIC)
03128       return !DarwinGVRequiresExtraLoad(GV);
03129     else
03130       return false;
03131   } else
03132     return true;
03133 }
03134 
03135 /// isShuffleMaskLegal - Targets can use this to indicate that they only
03136 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
03137 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
03138 /// are assumed to be legal.
03139 bool
03140 X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
03141   // Only do shuffles on 128-bit vector types for now.
03142   if (MVT::getSizeInBits(VT) == 64) return false;
03143   return (Mask.Val->getNumOperands() == 2 ||
03144           X86::isSplatMask(Mask.Val)  ||
03145           X86::isMOVSMask(Mask.Val)   ||
03146           X86::isPSHUFDMask(Mask.Val) ||
03147           isPSHUFHW_PSHUFLWMask(Mask.Val) ||
03148           X86::isSHUFPMask(Mask.Val)  ||
03149           X86::isUNPCKLMask(Mask.Val) ||
03150           X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
03151           X86::isUNPCKHMask(Mask.Val));
03152 }