LLVM API Documentation

PPCISelLowering.cpp

Go to the documentation of this file.
00001 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by Chris Lattner and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the PPCISelLowering class.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "PPCISelLowering.h"
00015 #include "PPCTargetMachine.h"
00016 #include "PPCPerfectShuffle.h"
00017 #include "llvm/ADT/VectorExtras.h"
00018 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
00019 #include "llvm/CodeGen/MachineFrameInfo.h"
00020 #include "llvm/CodeGen/MachineFunction.h"
00021 #include "llvm/CodeGen/MachineInstrBuilder.h"
00022 #include "llvm/CodeGen/SelectionDAG.h"
00023 #include "llvm/CodeGen/SSARegMap.h"
00024 #include "llvm/Constants.h"
00025 #include "llvm/Function.h"
00026 #include "llvm/Intrinsics.h"
00027 #include "llvm/Support/MathExtras.h"
00028 #include "llvm/Target/TargetOptions.h"
00029 using namespace llvm;
00030 
00031 PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
00032   : TargetLowering(TM) {
00033     
00034   // Fold away setcc operations if possible.
00035   setSetCCIsExpensive();
00036   setPow2DivIsCheap();
00037   
00038   // Use _setjmp/_longjmp instead of setjmp/longjmp.
00039   setUseUnderscoreSetJmpLongJmp(true);
00040     
00041   // Set up the register classes.
00042   addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
00043   addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
00044   addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
00045   
00046   setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
00047   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
00048 
00049   // PowerPC has no intrinsics for these particular operations
00050   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
00051   setOperationAction(ISD::MEMSET, MVT::Other, Expand);
00052   setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
00053   
00054   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
00055   setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
00056   setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
00057   
00058   // PowerPC has no SREM/UREM instructions
00059   setOperationAction(ISD::SREM, MVT::i32, Expand);
00060   setOperationAction(ISD::UREM, MVT::i32, Expand);
00061   setOperationAction(ISD::SREM, MVT::i64, Expand);
00062   setOperationAction(ISD::UREM, MVT::i64, Expand);
00063   
00064   // We don't support sin/cos/sqrt/fmod
00065   setOperationAction(ISD::FSIN , MVT::f64, Expand);
00066   setOperationAction(ISD::FCOS , MVT::f64, Expand);
00067   setOperationAction(ISD::FREM , MVT::f64, Expand);
00068   setOperationAction(ISD::FSIN , MVT::f32, Expand);
00069   setOperationAction(ISD::FCOS , MVT::f32, Expand);
00070   setOperationAction(ISD::FREM , MVT::f32, Expand);
00071   
00072   // If we're enabling GP optimizations, use hardware square root
00073   if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
00074     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
00075     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
00076   }
00077   
00078   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
00079   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
00080   
00081   // PowerPC does not have BSWAP, CTPOP or CTTZ
00082   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
00083   setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
00084   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
00085   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
00086   setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
00087   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
00088   
00089   // PowerPC does not have ROTR
00090   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
00091   
00092   // PowerPC does not have Select
00093   setOperationAction(ISD::SELECT, MVT::i32, Expand);
00094   setOperationAction(ISD::SELECT, MVT::i64, Expand);
00095   setOperationAction(ISD::SELECT, MVT::f32, Expand);
00096   setOperationAction(ISD::SELECT, MVT::f64, Expand);
00097   
00098   // PowerPC wants to turn select_cc of FP into fsel when possible.
00099   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
00100   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
00101 
00102   // PowerPC wants to optimize integer setcc a bit
00103   setOperationAction(ISD::SETCC, MVT::i32, Custom);
00104   
00105   // PowerPC does not have BRCOND which requires SetCC
00106   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
00107   
00108   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
00109   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
00110 
00111   // PowerPC does not have [U|S]INT_TO_FP
00112   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
00113   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
00114 
00115   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
00116   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
00117   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
00118   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
00119 
00120   // PowerPC does not have truncstore for i1.
00121   setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
00122 
00123   // We cannot sextinreg(i1).  Expand to shifts.
00124   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
00125   
00126   
00127   // Support label based line numbers.
00128   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
00129   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
00130   // FIXME - use subtarget debug flags
00131   if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
00132     setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
00133   
00134   // We want to legalize GlobalAddress and ConstantPool nodes into the 
00135   // appropriate instructions to materialize the address.
00136   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
00137   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
00138   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
00139   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
00140   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
00141   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
00142   
00143   // RET must be custom lowered, to meet ABI requirements
00144   setOperationAction(ISD::RET               , MVT::Other, Custom);
00145   
00146   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
00147   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
00148   
00149   // Use the default implementation.
00150   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
00151   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
00152   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
00153   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand); 
00154   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
00155   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
00156   
00157   // We want to custom lower some of our intrinsics.
00158   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
00159   
00160   if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
00161     // They also have instructions for converting between i64 and fp.
00162     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
00163     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
00164     
00165     // FIXME: disable this lowered code.  This generates 64-bit register values,
00166     // and we don't model the fact that the top part is clobbered by calls.  We
00167     // need to flag these together so that the value isn't live across a call.
00168     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
00169     
00170     // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
00171     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
00172   } else {
00173     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
00174     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
00175   }
00176 
00177   if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
00178     // 64 bit PowerPC implementations can support i64 types directly
00179     addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
00180     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
00181     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
00182   } else {
00183     // 32 bit PowerPC wants to expand i64 shifts itself.
00184     setOperationAction(ISD::SHL, MVT::i64, Custom);
00185     setOperationAction(ISD::SRL, MVT::i64, Custom);
00186     setOperationAction(ISD::SRA, MVT::i64, Custom);
00187   }
00188 
00189   if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
00190     // First set operation action for all vector types to expand. Then we
00191     // will selectively turn on ones that can be effectively codegen'd.
00192     for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
00193          VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
00194       // add/sub are legal for all supported vector VT's.
00195       setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
00196       setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
00197       
00198       // We promote all shuffles to v16i8.
00199       setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);
00200       AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);
00201 
00202       // We promote all non-typed operations to v4i32.
00203       setOperationAction(ISD::AND   , (MVT::ValueType)VT, Promote);
00204       AddPromotedToType (ISD::AND   , (MVT::ValueType)VT, MVT::v4i32);
00205       setOperationAction(ISD::OR    , (MVT::ValueType)VT, Promote);
00206       AddPromotedToType (ISD::OR    , (MVT::ValueType)VT, MVT::v4i32);
00207       setOperationAction(ISD::XOR   , (MVT::ValueType)VT, Promote);
00208       AddPromotedToType (ISD::XOR   , (MVT::ValueType)VT, MVT::v4i32);
00209       setOperationAction(ISD::LOAD  , (MVT::ValueType)VT, Promote);
00210       AddPromotedToType (ISD::LOAD  , (MVT::ValueType)VT, MVT::v4i32);
00211       setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
00212       AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);
00213       setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);
00214       AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);
00215       
00216       // No other operations are legal.
00217       setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
00218       setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
00219       setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
00220       setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
00221       setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
00222       setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
00223       setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
00224       setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
00225       setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
00226 
00227       setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);
00228     }
00229 
00230     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
00231     // with merges, splats, etc.
00232     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
00233 
00234     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
00235     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
00236     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
00237     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
00238     setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
00239     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
00240     
00241     addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
00242     addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
00243     addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
00244     addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
00245     
00246     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
00247     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
00248     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
00249     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
00250 
00251     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
00252     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
00253     
00254     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
00255     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
00256     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
00257     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
00258   }
00259   
00260   setSetCCResultType(MVT::i32);
00261   setShiftAmountType(MVT::i32);
00262   setSetCCResultContents(ZeroOrOneSetCCResult);
00263   setStackPointerRegisterToSaveRestore(PPC::R1);
00264   
00265   // We have target-specific dag combine patterns for the following nodes:
00266   setTargetDAGCombine(ISD::SINT_TO_FP);
00267   setTargetDAGCombine(ISD::STORE);
00268   setTargetDAGCombine(ISD::BR_CC);
00269   setTargetDAGCombine(ISD::BSWAP);
00270   
00271   computeRegisterProperties();
00272 }
00273 
00274 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
00275   switch (Opcode) {
00276   default: return 0;
00277   case PPCISD::FSEL:          return "PPCISD::FSEL";
00278   case PPCISD::FCFID:         return "PPCISD::FCFID";
00279   case PPCISD::FCTIDZ:        return "PPCISD::FCTIDZ";
00280   case PPCISD::FCTIWZ:        return "PPCISD::FCTIWZ";
00281   case PPCISD::STFIWX:        return "PPCISD::STFIWX";
00282   case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
00283   case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
00284   case PPCISD::VPERM:         return "PPCISD::VPERM";
00285   case PPCISD::Hi:            return "PPCISD::Hi";
00286   case PPCISD::Lo:            return "PPCISD::Lo";
00287   case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
00288   case PPCISD::SRL:           return "PPCISD::SRL";
00289   case PPCISD::SRA:           return "PPCISD::SRA";
00290   case PPCISD::SHL:           return "PPCISD::SHL";
00291   case PPCISD::EXTSW_32:      return "PPCISD::EXTSW_32";
00292   case PPCISD::STD_32:        return "PPCISD::STD_32";
00293   case PPCISD::CALL:          return "PPCISD::CALL";
00294   case PPCISD::MTCTR:         return "PPCISD::MTCTR";
00295   case PPCISD::BCTRL:         return "PPCISD::BCTRL";
00296   case PPCISD::RET_FLAG:      return "PPCISD::RET_FLAG";
00297   case PPCISD::MFCR:          return "PPCISD::MFCR";
00298   case PPCISD::VCMP:          return "PPCISD::VCMP";
00299   case PPCISD::VCMPo:         return "PPCISD::VCMPo";
00300   case PPCISD::LBRX:          return "PPCISD::LBRX";
00301   case PPCISD::STBRX:         return "PPCISD::STBRX";
00302   case PPCISD::COND_BRANCH:   return "PPCISD::COND_BRANCH";
00303   }
00304 }
00305 
00306 //===----------------------------------------------------------------------===//
00307 // Node matching predicates, for use by the tblgen matching code.
00308 //===----------------------------------------------------------------------===//
00309 
00310 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
00311 static bool isFloatingPointZero(SDOperand Op) {
00312   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
00313     return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
00314   else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
00315     // Maybe this has already been legalized into the constant pool?
00316     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
00317       if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
00318         return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
00319   }
00320   return false;
00321 }
00322 
00323 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
00324 /// true if Op is undef or if it matches the specified value.
00325 static bool isConstantOrUndef(SDOperand Op, unsigned Val) {
00326   return Op.getOpcode() == ISD::UNDEF || 
00327          cast<ConstantSDNode>(Op)->getValue() == Val;
00328 }
00329 
00330 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
00331 /// VPKUHUM instruction.
00332 bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
00333   if (!isUnary) {
00334     for (unsigned i = 0; i != 16; ++i)
00335       if (!isConstantOrUndef(N->getOperand(i),  i*2+1))
00336         return false;
00337   } else {
00338     for (unsigned i = 0; i != 8; ++i)
00339       if (!isConstantOrUndef(N->getOperand(i),  i*2+1) ||
00340           !isConstantOrUndef(N->getOperand(i+8),  i*2+1))
00341         return false;
00342   }
00343   return true;
00344 }
00345 
00346 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
00347 /// VPKUWUM instruction.
00348 bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
00349   if (!isUnary) {
00350     for (unsigned i = 0; i != 16; i += 2)
00351       if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
00352           !isConstantOrUndef(N->getOperand(i+1),  i*2+3))
00353         return false;
00354   } else {
00355     for (unsigned i = 0; i != 8; i += 2)
00356       if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
00357           !isConstantOrUndef(N->getOperand(i+1),  i*2+3) ||
00358           !isConstantOrUndef(N->getOperand(i+8),  i*2+2) ||
00359           !isConstantOrUndef(N->getOperand(i+9),  i*2+3))
00360         return false;
00361   }
00362   return true;
00363 }
00364 
00365 /// isVMerge - Common function, used to match vmrg* shuffles.
00366 ///
00367 static bool isVMerge(SDNode *N, unsigned UnitSize, 
00368                      unsigned LHSStart, unsigned RHSStart) {
00369   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
00370          N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
00371   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
00372          "Unsupported merge size!");
00373   
00374   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
00375     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
00376       if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
00377                              LHSStart+j+i*UnitSize) ||
00378           !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
00379                              RHSStart+j+i*UnitSize))
00380         return false;
00381     }
00382       return true;
00383 }
00384 
00385 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
00386 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
00387 bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
00388   if (!isUnary)
00389     return isVMerge(N, UnitSize, 8, 24);
00390   return isVMerge(N, UnitSize, 8, 8);
00391 }
00392 
00393 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
00394 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
00395 bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
00396   if (!isUnary)
00397     return isVMerge(N, UnitSize, 0, 16);
00398   return isVMerge(N, UnitSize, 0, 0);
00399 }
00400 
00401 
00402 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
00403 /// amount, otherwise return -1.
00404 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
00405   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
00406          N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
00407   // Find the first non-undef value in the shuffle mask.
00408   unsigned i;
00409   for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
00410     /*search*/;
00411   
00412   if (i == 16) return -1;  // all undef.
00413   
00414   // Otherwise, check to see if the rest of the elements are consequtively
00415   // numbered from this value.
00416   unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
00417   if (ShiftAmt < i) return -1;
00418   ShiftAmt -= i;
00419 
00420   if (!isUnary) {
00421     // Check the rest of the elements to see if they are consequtive.
00422     for (++i; i != 16; ++i)
00423       if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
00424         return -1;
00425   } else {
00426     // Check the rest of the elements to see if they are consequtive.
00427     for (++i; i != 16; ++i)
00428       if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
00429         return -1;
00430   }
00431   
00432   return ShiftAmt;
00433 }
00434 
00435 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
00436 /// specifies a splat of a single element that is suitable for input to
00437 /// VSPLTB/VSPLTH/VSPLTW.
00438 bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
00439   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
00440          N->getNumOperands() == 16 &&
00441          (EltSize == 1 || EltSize == 2 || EltSize == 4));
00442   
00443   // This is a splat operation if each element of the permute is the same, and
00444   // if the value doesn't reference the second vector.
00445   unsigned ElementBase = 0;
00446   SDOperand Elt = N->getOperand(0);
00447   if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
00448     ElementBase = EltV->getValue();
00449   else
00450     return false;   // FIXME: Handle UNDEF elements too!
00451 
00452   if (cast<ConstantSDNode>(Elt)->getValue() >= 16)
00453     return false;
00454   
00455   // Check that they are consequtive.
00456   for (unsigned i = 1; i != EltSize; ++i) {
00457     if (!isa<ConstantSDNode>(N->getOperand(i)) ||
00458         cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)
00459       return false;
00460   }
00461   
00462   assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
00463   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
00464     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
00465     assert(isa<ConstantSDNode>(N->getOperand(i)) &&
00466            "Invalid VECTOR_SHUFFLE mask!");
00467     for (unsigned j = 0; j != EltSize; ++j)
00468       if (N->getOperand(i+j) != N->getOperand(j))
00469         return false;
00470   }
00471 
00472   return true;
00473 }
00474 
00475 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
00476 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
00477 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
00478   assert(isSplatShuffleMask(N, EltSize));
00479   return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;
00480 }
00481 
00482 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
00483 /// by using a vspltis[bhw] instruction of the specified element size, return
00484 /// the constant being splatted.  The ByteSize field indicates the number of
00485 /// bytes of each element [124] -> [bhw].
00486 SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
00487   SDOperand OpVal(0, 0);
00488 
00489   // If ByteSize of the splat is bigger than the element size of the
00490   // build_vector, then we have a case where we are checking for a splat where
00491   // multiple elements of the buildvector are folded together into a single
00492   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
00493   unsigned EltSize = 16/N->getNumOperands();
00494   if (EltSize < ByteSize) {
00495     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
00496     SDOperand UniquedVals[4];
00497     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
00498     
00499     // See if all of the elements in the buildvector agree across.
00500     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
00501       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
00502       // If the element isn't a constant, bail fully out.
00503       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand();
00504 
00505           
00506       if (UniquedVals[i&(Multiple-1)].Val == 0)
00507         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
00508       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
00509         return SDOperand();  // no match.
00510     }
00511     
00512     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
00513     // either constant or undef values that are identical for each chunk.  See
00514     // if these chunks can form into a larger vspltis*.
00515     
00516     // Check to see if all of the leading entries are either 0 or -1.  If
00517     // neither, then this won't fit into the immediate field.
00518     bool LeadingZero = true;
00519     bool LeadingOnes = true;
00520     for (unsigned i = 0; i != Multiple-1; ++i) {
00521       if (UniquedVals[i].Val == 0) continue;  // Must have been undefs.
00522       
00523       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
00524       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
00525     }
00526     // Finally, check the least significant entry.
00527     if (LeadingZero) {
00528       if (UniquedVals[Multiple-1].Val == 0)
00529         return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
00530       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue();
00531       if (Val < 16)
00532         return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
00533     }
00534     if (LeadingOnes) {
00535       if (UniquedVals[Multiple-1].Val == 0)
00536         return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
00537       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended();
00538       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
00539         return DAG.getTargetConstant(Val, MVT::i32);
00540     }
00541     
00542     return SDOperand();
00543   }
00544   
00545   // Check to see if this buildvec has a single non-undef value in its elements.
00546   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
00547     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
00548     if (OpVal.Val == 0)
00549       OpVal = N->getOperand(i);
00550     else if (OpVal != N->getOperand(i))
00551       return SDOperand();
00552   }
00553   
00554   if (OpVal.Val == 0) return SDOperand();  // All UNDEF: use implicit def.
00555   
00556   unsigned ValSizeInBytes = 0;
00557   uint64_t Value = 0;
00558   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
00559     Value = CN->getValue();
00560     ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
00561   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
00562     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
00563     Value = FloatToBits(CN->getValue());
00564     ValSizeInBytes = 4;
00565   }
00566 
00567   // If the splat value is larger than the element value, then we can never do
00568   // this splat.  The only case that we could fit the replicated bits into our
00569   // immediate field for would be zero, and we prefer to use vxor for it.
00570   if (ValSizeInBytes < ByteSize) return SDOperand();
00571   
00572   // If the element value is larger than the splat value, cut it in half and
00573   // check to see if the two halves are equal.  Continue doing this until we
00574   // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
00575   while (ValSizeInBytes > ByteSize) {
00576     ValSizeInBytes >>= 1;
00577     
00578     // If the top half equals the bottom half, we're still ok.
00579     if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
00580          (Value                        & ((1 << (8*ValSizeInBytes))-1)))
00581       return SDOperand();
00582   }
00583 
00584   // Properly sign extend the value.
00585   int ShAmt = (4-ByteSize)*8;
00586   int MaskVal = ((int)Value << ShAmt) >> ShAmt;
00587   
00588   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
00589   if (MaskVal == 0) return SDOperand();
00590 
00591   // Finally, if this value fits in a 5 bit sext field, return it
00592   if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
00593     return DAG.getTargetConstant(MaskVal, MVT::i32);
00594   return SDOperand();
00595 }
00596 
00597 //===----------------------------------------------------------------------===//
00598 //  LowerOperation implementation
00599 //===----------------------------------------------------------------------===//
00600 
00601 static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
00602   MVT::ValueType PtrVT = Op.getValueType();
00603   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
00604   Constant *C = CP->get();
00605   SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
00606   SDOperand Zero = DAG.getConstant(0, PtrVT);
00607 
00608   const TargetMachine &TM = DAG.getTarget();
00609   
00610   SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero);
00611   SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero);
00612 
00613   // If this is a non-darwin platform, we don't support non-static relo models
00614   // yet.
00615   if (TM.getRelocationModel() == Reloc::Static ||
00616       !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
00617     // Generate non-pic code that has direct accesses to the constant pool.
00618     // The address of the global is just (hi(&g)+lo(&g)).
00619     return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
00620   }
00621   
00622   if (TM.getRelocationModel() == Reloc::PIC_) {
00623     // With PIC, the first instruction is actually "GR+hi(&G)".
00624     Hi = DAG.getNode(ISD::ADD, PtrVT,
00625                      DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
00626   }
00627   
00628   Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
00629   return Lo;
00630 }
00631 
00632 static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
00633   MVT::ValueType PtrVT = Op.getValueType();
00634   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
00635   SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
00636   SDOperand Zero = DAG.getConstant(0, PtrVT);
00637   
00638   const TargetMachine &TM = DAG.getTarget();
00639 
00640   SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero);
00641   SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero);
00642 
00643   // If this is a non-darwin platform, we don't support non-static relo models
00644   // yet.
00645   if (TM.getRelocationModel() == Reloc::Static ||
00646       !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
00647     // Generate non-pic code that has direct accesses to the constant pool.
00648     // The address of the global is just (hi(&g)+lo(&g)).
00649     return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
00650   }
00651   
00652   if (TM.getRelocationModel() == Reloc::PIC_) {
00653     // With PIC, the first instruction is actually "GR+hi(&G)".
00654     Hi = DAG.getNode(ISD::ADD, PtrVT,
00655                      DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
00656   }
00657   
00658   Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
00659   return Lo;
00660 }
00661 
00662 static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
00663   MVT::ValueType PtrVT = Op.getValueType();
00664   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
00665   GlobalValue *GV = GSDN->getGlobal();
00666   SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
00667   SDOperand Zero = DAG.getConstant(0, PtrVT);
00668   
00669   const TargetMachine &TM = DAG.getTarget();
00670 
00671   SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero);
00672   SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero);
00673 
00674   // If this is a non-darwin platform, we don't support non-static relo models
00675   // yet.
00676   if (TM.getRelocationModel() == Reloc::Static ||
00677       !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
00678     // Generate non-pic code that has direct accesses to globals.
00679     // The address of the global is just (hi(&g)+lo(&g)).
00680     return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
00681   }
00682   
00683   if (TM.getRelocationModel() == Reloc::PIC_) {
00684     // With PIC, the first instruction is actually "GR+hi(&G)".
00685     Hi = DAG.getNode(ISD::ADD, PtrVT,
00686                      DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
00687   }
00688   
00689   Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
00690   
00691   if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
00692       (!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
00693     return Lo;
00694   
00695   // If the global is weak or external, we have to go through the lazy
00696   // resolution stub.
00697   return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
00698 }
00699 
00700 static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
00701   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
00702   
00703   // If we're comparing for equality to zero, expose the fact that this is
00704   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
00705   // fold the new nodes.
00706   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
00707     if (C->isNullValue() && CC == ISD::SETEQ) {
00708       MVT::ValueType VT = Op.getOperand(0).getValueType();
00709       SDOperand Zext = Op.getOperand(0);
00710       if (VT < MVT::i32) {
00711         VT = MVT::i32;
00712         Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
00713       } 
00714       unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
00715       SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
00716       SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
00717                                   DAG.getConstant(Log2b, MVT::i32));
00718       return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc);
00719     }
00720     // Leave comparisons against 0 and -1 alone for now, since they're usually 
00721     // optimized.  FIXME: revisit this when we can custom lower all setcc
00722     // optimizations.
00723     if (C->isAllOnesValue() || C->isNullValue())
00724       return SDOperand();
00725   }
00726   
00727   // If we have an integer seteq/setne, turn it into a compare against zero
00728   // by subtracting the rhs from the lhs, which is faster than setting a
00729   // condition register, reading it back out, and masking the correct bit.
00730   MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
00731   if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
00732     MVT::ValueType VT = Op.getValueType();
00733     SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0), 
00734                                 Op.getOperand(1));
00735     return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
00736   }
00737   return SDOperand();
00738 }
00739 
00740 static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
00741                               unsigned VarArgsFrameIndex) {
00742   // vastart just stores the address of the VarArgsFrameIndex slot into the
00743   // memory location argument.
00744   SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
00745   return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 
00746                      Op.getOperand(1), Op.getOperand(2));
00747 }
00748 
00749 static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
00750                                        int &VarArgsFrameIndex) {
00751   // TODO: add description of PPC stack frame format, or at least some docs.
00752   //
00753   MachineFunction &MF = DAG.getMachineFunction();
00754   MachineFrameInfo *MFI = MF.getFrameInfo();
00755   SSARegMap *RegMap = MF.getSSARegMap();
00756   std::vector<SDOperand> ArgValues;
00757   SDOperand Root = Op.getOperand(0);
00758   
00759   unsigned ArgOffset = 24;
00760   const unsigned Num_GPR_Regs = 8;
00761   const unsigned Num_FPR_Regs = 13;
00762   const unsigned Num_VR_Regs  = 12;
00763   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
00764   
00765   static const unsigned GPR_32[] = {           // 32-bit registers.
00766     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
00767     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
00768   };
00769   static const unsigned GPR_64[] = {           // 64-bit registers.
00770     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
00771     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
00772   };
00773   static const unsigned FPR[] = {
00774     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
00775     PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
00776   };
00777   static const unsigned VR[] = {
00778     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
00779     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
00780   };
00781 
00782   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
00783   bool isPPC64 = PtrVT == MVT::i64;
00784   const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
00785   
00786   // Add DAG nodes to load the arguments or copy them out of registers.  On
00787   // entry to a function on PPC, the arguments start at offset 24, although the
00788   // first ones are often in registers.
00789   for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
00790     SDOperand ArgVal;
00791     bool needsLoad = false;
00792     MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
00793     unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
00794 
00795     unsigned CurArgOffset = ArgOffset;
00796     switch (ObjectVT) {
00797     default: assert(0 && "Unhandled argument type!");
00798     case MVT::i32:
00799       // All int arguments reserve stack space.
00800       ArgOffset += isPPC64 ? 8 : 4;
00801 
00802       if (GPR_idx != Num_GPR_Regs) {
00803         unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
00804         MF.addLiveIn(GPR[GPR_idx], VReg);
00805         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
00806         ++GPR_idx;
00807       } else {
00808         needsLoad = true;
00809       }
00810       break;
00811     case MVT::i64:  // PPC64
00812       // All int arguments reserve stack space.
00813       ArgOffset += 8;
00814       
00815       if (GPR_idx != Num_GPR_Regs) {
00816         unsigned VReg = RegMap->createVirtualRegister(&PPC::G8RCRegClass);
00817         MF.addLiveIn(GPR[GPR_idx], VReg);
00818         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
00819         ++GPR_idx;
00820       } else {
00821         needsLoad = true;
00822       }
00823       break;
00824     case MVT::f32:
00825     case MVT::f64:
00826       // All FP arguments reserve stack space.
00827       ArgOffset += ObjSize;
00828 
00829       // Every 4 bytes of argument space consumes one of the GPRs available for
00830       // argument passing.
00831       if (GPR_idx != Num_GPR_Regs) {
00832         ++GPR_idx;
00833         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs)
00834           ++GPR_idx;
00835       }
00836       if (FPR_idx != Num_FPR_Regs) {
00837         unsigned VReg;
00838         if (ObjectVT == MVT::f32)
00839           VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
00840         else
00841           VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
00842         MF.addLiveIn(FPR[FPR_idx], VReg);
00843         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
00844         ++FPR_idx;
00845       } else {
00846         needsLoad = true;
00847       }
00848       break;
00849     case MVT::v4f32:
00850     case MVT::v4i32:
00851     case MVT::v8i16:
00852     case MVT::v16i8:
00853       // Note that vector arguments in registers don't reserve stack space.
00854       if (VR_idx != Num_VR_Regs) {
00855         unsigned VReg = RegMap->createVirtualRegister(&PPC::VRRCRegClass);
00856         MF.addLiveIn(VR[VR_idx], VReg);
00857         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
00858         ++VR_idx;
00859       } else {
00860         // This should be simple, but requires getting 16-byte aligned stack
00861         // values.
00862         assert(0 && "Loading VR argument not implemented yet!");
00863         needsLoad = true;
00864       }
00865       break;
00866     }
00867     
00868     // We need to load the argument to a virtual register if we determined above
00869     // that we ran out of physical registers of the appropriate type
00870     if (needsLoad) {
00871       // If the argument is actually used, emit a load from the right stack
00872       // slot.
00873       if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
00874         int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
00875         SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
00876         ArgVal = DAG.getLoad(ObjectVT, Root, FIN,
00877                              DAG.getSrcValue(NULL));
00878       } else {
00879         // Don't emit a dead load.
00880         ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
00881       }
00882     }
00883     
00884     ArgValues.push_back(ArgVal);
00885   }
00886   
00887   // If the function takes variable number of arguments, make a frame index for
00888   // the start of the first vararg value... for expansion of llvm.va_start.
00889   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
00890   if (isVarArg) {
00891     VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
00892                                                ArgOffset);
00893     SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
00894     // If this function is vararg, store any remaining integer argument regs
00895     // to their spots on the stack so that they may be loaded by deferencing the
00896     // result of va_next.
00897     std::vector<SDOperand> MemOps;
00898     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
00899       unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
00900       MF.addLiveIn(GPR[GPR_idx], VReg);
00901       SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
00902       SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
00903                                     Val, FIN, DAG.getSrcValue(NULL));
00904       MemOps.push_back(Store);
00905       // Increment the address by four for the next argument to store
00906       SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
00907       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
00908     }
00909     if (!MemOps.empty())
00910       Root = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
00911   }
00912   
00913   ArgValues.push_back(Root);
00914  
00915   // Return the new list of results.
00916   std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
00917                                     Op.Val->value_end());
00918   return DAG.getNode(ISD::MERGE_VALUES, RetVT, ArgValues);
00919 }
00920 
00921 /// isCallCompatibleAddress - Return the immediate to use if the specified
00922 /// 32-bit value is representable in the immediate field of a BxA instruction.
00923 static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
00924   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
00925   if (!C) return 0;
00926   
00927   int Addr = C->getValue();
00928   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
00929       (Addr << 6 >> 6) != Addr)
00930     return 0;  // Top 6 bits have to be sext of immediate.
00931   
00932   return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
00933 }
00934 
00935 
00936 static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) {
00937   SDOperand Chain = Op.getOperand(0);
00938   unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
00939   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
00940   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
00941   SDOperand Callee    = Op.getOperand(4);
00942   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
00943 
00944   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
00945   bool isPPC64 = PtrVT == MVT::i64;
00946   unsigned PtrByteSize = isPPC64 ? 8 : 4;
00947 
00948   
00949   // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
00950   // SelectExpr to use to put the arguments in the appropriate registers.
00951   std::vector<SDOperand> args_to_use;
00952   
00953   // Count how many bytes are to be pushed on the stack, including the linkage
00954   // area, and parameter passing area.  We start with 24/48 bytes, which is
00955   // prereserved space for [SP][CR][LR][3 x unused].
00956   unsigned NumBytes = 6*PtrByteSize;
00957   
00958   // Add up all the space actually used.
00959   for (unsigned i = 0; i != NumOps; ++i)
00960     NumBytes += MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;
00961 
00962   // The prolog code of the callee may store up to 8 GPR argument registers to
00963   // the stack, allowing va_start to index over them in memory if its varargs.
00964   // Because we cannot tell if this is needed on the caller side, we have to
00965   // conservatively assume that it is needed.  As such, make sure we have at
00966   // least enough stack space for the caller to store the 8 GPRs.
00967   if (NumBytes < 6*PtrByteSize+8*PtrByteSize)
00968     NumBytes = 6*PtrByteSize+8*PtrByteSize;
00969   
00970   // Adjust the stack pointer for the new arguments...
00971   // These operations are automatically eliminated by the prolog/epilog pass
00972   Chain = DAG.getCALLSEQ_START(Chain,
00973                                DAG.getConstant(NumBytes, PtrVT));
00974   
00975   // Set up a copy of the stack pointer for use loading and storing any
00976   // arguments that may not fit in the registers available for argument
00977   // passing.
00978   SDOperand StackPtr;
00979   if (isPPC64)
00980     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
00981   else
00982     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
00983   
00984   // Figure out which arguments are going to go in registers, and which in
00985   // memory.  Also, if this is a vararg function, floating point operations
00986   // must be stored to our stack, and loaded into integer regs as well, if
00987   // any integer regs are available for argument passing.
00988   unsigned ArgOffset = 6*PtrByteSize;
00989   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
00990   static const unsigned GPR_32[] = {           // 32-bit registers.
00991     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
00992     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
00993   };
00994   static const unsigned GPR_64[] = {           // 64-bit registers.
00995     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
00996     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
00997   };
00998   static const unsigned FPR[] = {
00999     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
01000     PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
01001   };
01002   static const unsigned VR[] = {
01003     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
01004     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
01005   };
01006   const unsigned NumGPRs = sizeof(GPR_32)/sizeof(GPR_32[0]);
01007   const unsigned NumFPRs = sizeof(FPR)/sizeof(FPR[0]);
01008   const unsigned NumVRs  = sizeof( VR)/sizeof( VR[0]);
01009   
01010   const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
01011 
01012   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
01013   std::vector<SDOperand> MemOpChains;
01014   for (unsigned i = 0; i != NumOps; ++i) {
01015     SDOperand Arg = Op.getOperand(5+2*i);
01016     
01017     // PtrOff will be used to store the current argument to the stack if a
01018     // register cannot be found for it.
01019     SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
01020     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
01021 
01022     // On PPC64, promote integers to 64-bit values.
01023     if (isPPC64 && Arg.getValueType() == MVT::i32) {
01024       unsigned ExtOp = ISD::ZERO_EXTEND;
01025       if (cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue())
01026         ExtOp = ISD::SIGN_EXTEND;
01027       Arg = DAG.getNode(ExtOp, MVT::i64, Arg);
01028     }
01029     
01030     switch (Arg.getValueType()) {
01031     default: assert(0 && "Unexpected ValueType for argument!");
01032     case MVT::i32:
01033     case MVT::i64:
01034       if (GPR_idx != NumGPRs) {
01035         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
01036       } else {
01037         MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
01038                                           Arg, PtrOff, DAG.getSrcValue(NULL)));
01039       }
01040       ArgOffset += PtrByteSize;
01041       break;
01042     case MVT::f32:
01043     case MVT::f64:
01044       if (FPR_idx != NumFPRs) {
01045         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
01046 
01047         if (isVarArg) {
01048           SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
01049                                         Arg, PtrOff,
01050                                         DAG.getSrcValue(NULL));
01051           MemOpChains.push_back(Store);
01052 
01053           // Float varargs are always shadowed in available integer registers
01054           if (GPR_idx != NumGPRs) {
01055             SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff,
01056                                          DAG.getSrcValue(NULL));
01057             MemOpChains.push_back(Load.getValue(1));
01058             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
01059           }
01060           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64) {
01061             SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType());
01062             PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour);
01063             SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff,
01064                                          DAG.getSrcValue(NULL));
01065             MemOpChains.push_back(Load.getValue(1));
01066             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
01067           }
01068         } else {
01069           // If we have any FPRs remaining, we may also have GPRs remaining.
01070           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
01071           // GPRs.
01072           if (GPR_idx != NumGPRs)
01073             ++GPR_idx;
01074           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
01075             ++GPR_idx;
01076         }
01077       } else {
01078         MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
01079                                           Arg, PtrOff, DAG.getSrcValue(NULL)));
01080       }
01081       if (isPPC64)
01082         ArgOffset += 8;
01083       else
01084         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
01085       break;
01086     case MVT::v4f32:
01087     case MVT::v4i32:
01088     case MVT::v8i16:
01089     case MVT::v16i8:
01090       assert(!isVarArg && "Don't support passing vectors to varargs yet!");
01091       assert(VR_idx != NumVRs &&
01092              "Don't support passing more than 12 vector args yet!");
01093       RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
01094       break;
01095     }
01096   }
01097   if (!MemOpChains.empty())
01098     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains);
01099   
01100   // Build a sequence of copy-to-reg nodes chained together with token chain
01101   // and flag operands which copy the outgoing args into the appropriate regs.
01102   SDOperand InFlag;
01103   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
01104     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
01105                              InFlag);
01106     InFlag = Chain.getValue(1);
01107   }
01108   
01109   std::vector<MVT::ValueType> NodeTys;
01110   NodeTys.push_back(MVT::Other);   // Returns a chain
01111   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
01112 
01113   std::vector<SDOperand> Ops;
01114   unsigned CallOpc = PPCISD::CALL;
01115   
01116   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
01117   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
01118   // node so that legalize doesn't hack it.
01119   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
01120     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
01121   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
01122     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
01123   else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
01124     // If this is an absolute destination address, use the munged value.
01125     Callee = SDOperand(Dest, 0);
01126   else {
01127     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
01128     // to do the call, we can't use PPCISD::CALL.
01129     Ops.push_back(Chain);
01130     Ops.push_back(Callee);
01131     
01132     if (InFlag.Val)
01133       Ops.push_back(InFlag);
01134     Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, Ops);
01135     InFlag = Chain.getValue(1);
01136     
01137     // Copy the callee address into R12 on darwin.
01138     Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag);
01139     InFlag = Chain.getValue(1);
01140 
01141     NodeTys.clear();
01142     NodeTys.push_back(MVT::Other);
01143     NodeTys.push_back(MVT::Flag);
01144     Ops.clear();
01145     Ops.push_back(Chain);
01146     CallOpc = PPCISD::BCTRL;
01147     Callee.Val = 0;
01148   }
01149 
01150   // If this is a direct call, pass the chain and the callee.
01151   if (Callee.Val) {
01152     Ops.push_back(Chain);
01153     Ops.push_back(Callee);
01154   }
01155   
01156   // Add argument registers to the end of the list so that they are known live
01157   // into the call.
01158   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
01159     Ops.push_back(DAG.getRegister(RegsToPass[i].first, 
01160                                   RegsToPass[i].second.getValueType()));
01161   
01162   if (InFlag.Val)
01163     Ops.push_back(InFlag);
01164   Chain = DAG.getNode(CallOpc, NodeTys, Ops);
01165   InFlag = Chain.getValue(1);
01166 
01167   std::vector<SDOperand> ResultVals;
01168   NodeTys.clear();
01169   
01170   // If the call has results, copy the values out of the ret val registers.
01171   switch (Op.Val->getValueType(0)) {
01172   default: assert(0 && "Unexpected ret value!");
01173   case MVT::Other: break;
01174   case MVT::i32:
01175     if (Op.Val->getValueType(1) == MVT::i32) {
01176       Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, InFlag).getValue(1);
01177       ResultVals.push_back(Chain.getValue(0));
01178       Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32,
01179                                  Chain.getValue(2)).getValue(1);
01180       ResultVals.push_back(Chain.getValue(0));
01181       NodeTys.push_back(MVT::i32);
01182     } else {
01183       Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
01184       ResultVals.push_back(Chain.getValue(0));
01185     }
01186     NodeTys.push_back(MVT::i32);
01187     break;
01188   case MVT::i64:
01189     Chain = DAG.getCopyFromReg(Chain, PPC::X3, MVT::i64, InFlag).getValue(1);
01190     ResultVals.push_back(Chain.getValue(0));
01191     NodeTys.push_back(MVT::i64);
01192     break;
01193   case MVT::f32:
01194   case MVT::f64:
01195     Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0),
01196                                InFlag).getValue(1);
01197     ResultVals.push_back(Chain.getValue(0));
01198     NodeTys.push_back(Op.Val->getValueType(0));
01199     break;
01200   case MVT::v4f32:
01201   case MVT::v4i32:
01202   case MVT::v8i16:
01203   case MVT::v16i8:
01204     Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0),
01205                                    InFlag).getValue(1);
01206     ResultVals.push_back(Chain.getValue(0));
01207     NodeTys.push_back(Op.Val->getValueType(0));
01208     break;
01209   }
01210   
01211   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
01212                       DAG.getConstant(NumBytes, PtrVT));
01213   NodeTys.push_back(MVT::Other);
01214   
01215   // If the function returns void, just return the chain.
01216   if (ResultVals.empty())
01217     return Chain;
01218   
01219   // Otherwise, merge everything together with a MERGE_VALUES node.
01220   ResultVals.push_back(Chain);
01221   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals);
01222   return Res.getValue(Op.ResNo);
01223 }
01224 
01225 static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
01226   SDOperand Copy;
01227   switch(Op.getNumOperands()) {
01228   default:
01229     assert(0 && "Do not know how to return this many arguments!");
01230     abort();
01231   case 1: 
01232     return SDOperand(); // ret void is legal
01233   case 3: {
01234     MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
01235     unsigned ArgReg;
01236     if (ArgVT == MVT::i32) {
01237       ArgReg = PPC::R3;
01238     } else if (ArgVT == MVT::i64) {
01239       ArgReg = PPC::X3;
01240     } else if (MVT::isFloatingPoint(ArgVT)) {
01241       ArgReg = PPC::F1;
01242     } else {
01243       assert(MVT::isVector(ArgVT));
01244       ArgReg = PPC::V2;
01245     }
01246     
01247     Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
01248                             SDOperand());
01249     
01250     // If we haven't noted the R3/F1 are live out, do so now.
01251     if (DAG.getMachineFunction().liveout_empty())
01252       DAG.getMachineFunction().addLiveOut(ArgReg);
01253     break;
01254   }
01255   case 5:
01256     Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(3), 
01257                             SDOperand());
01258     Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
01259     // If we haven't noted the R3+R4 are live out, do so now.
01260     if (DAG.getMachineFunction().liveout_empty()) {
01261       DAG.getMachineFunction().addLiveOut(PPC::R3);
01262       DAG.getMachineFunction().addLiveOut(PPC::R4);
01263     }
01264     break;
01265   }
01266   return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
01267 }
01268 
01269 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
01270 /// possible.
01271 static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
01272   // Not FP? Not a fsel.
01273   if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
01274       !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
01275     return SDOperand();
01276   
01277   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
01278   
01279   // Cannot handle SETEQ/SETNE.
01280   if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();
01281   
01282   MVT::ValueType ResVT = Op.getValueType();
01283   MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
01284   SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
01285   SDOperand TV  = Op.getOperand(2), FV  = Op.getOperand(3);
01286   
01287   // If the RHS of the comparison is a 0.0, we don't need to do the
01288   // subtraction at all.
01289   if (isFloatingPointZero(RHS))
01290     switch (CC) {
01291     default: break;       // SETUO etc aren't handled by fsel.
01292     case ISD::SETULT:
01293     case ISD::SETOLT:
01294     case ISD::SETLT:
01295       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
01296     case ISD::SETUGE:
01297     case ISD::SETOGE:
01298     case ISD::SETGE:
01299       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
01300         LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
01301       return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
01302     case ISD::SETUGT:
01303     case ISD::SETOGT:
01304     case ISD::SETGT:
01305       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
01306     case ISD::SETULE:
01307     case ISD::SETOLE:
01308     case ISD::SETLE:
01309       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
01310         LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
01311       return DAG.getNode(PPCISD::FSEL, ResVT,
01312                          DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
01313     }
01314       
01315       SDOperand Cmp;
01316   switch (CC) {
01317   default: break;       // SETUO etc aren't handled by fsel.
01318   case ISD::SETULT:
01319   case ISD::SETOLT:
01320   case ISD::SETLT:
01321     Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
01322     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
01323       Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
01324       return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
01325   case ISD::SETUGE:
01326   case ISD::SETOGE:
01327   case ISD::SETGE:
01328     Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
01329     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
01330       Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
01331       return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
01332   case ISD::SETUGT:
01333   case ISD::SETOGT:
01334   case ISD::SETGT:
01335     Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
01336     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
01337       Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
01338       return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
01339   case ISD::SETULE:
01340   case ISD::SETOLE:
01341   case ISD::SETLE:
01342     Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
01343     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
01344       Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
01345       return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
01346   }
01347   return SDOperand();
01348 }
01349 
01350 static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
01351   assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
01352   SDOperand Src = Op.getOperand(0);
01353   if (Src.getValueType() == MVT::f32)
01354     Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
01355   
01356   SDOperand Tmp;
01357   switch (Op.getValueType()) {
01358   default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
01359   case MVT::i32:
01360     Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
01361     break;
01362   case MVT::i64:
01363     Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
01364     break;
01365   }
01366   
01367   // Convert the FP value to an int value through memory.
01368   SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
01369   if (Op.getValueType() == MVT::i32)
01370     Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
01371   return Bits;
01372 }
01373 
01374 static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
01375   if (Op.getOperand(0).getValueType() == MVT::i64) {
01376     SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
01377     SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
01378     if (Op.getValueType() == MVT::f32)
01379       FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
01380     return FP;
01381   }
01382   
01383   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
01384          "Unhandled SINT_TO_FP type in custom expander!");
01385   // Since we only generate this in 64-bit mode, we can take advantage of
01386   // 64-bit registers.  In particular, sign extend the input value into the
01387   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
01388   // then lfd it and fcfid it.
01389   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
01390   int FrameIdx = FrameInfo->CreateStackObject(8, 8);
01391   SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
01392   
01393   SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
01394                                 Op.getOperand(0));
01395   
01396   // STD the extended value into the stack slot.
01397   SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
01398                                 DAG.getEntryNode(), Ext64, FIdx,
01399                                 DAG.getSrcValue(NULL));
01400   // Load the value as a double.
01401   SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
01402   
01403   // FCFID it and return it.
01404   SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
01405   if (Op.getValueType() == MVT::f32)
01406     FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
01407   return FP;
01408 }
01409 
01410 static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG,
01411                           MVT::ValueType PtrVT) {
01412   assert(Op.getValueType() == MVT::i64 &&
01413          Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
01414   // The generic code does a fine job expanding shift by a constant.
01415   if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
01416   
01417   // Otherwise, expand into a bunch of logical ops.  Note that these ops
01418   // depend on the PPC behavior for oversized shift amounts.
01419   SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
01420                              DAG.getConstant(0, PtrVT));
01421   SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
01422                              DAG.getConstant(1, PtrVT));
01423   SDOperand Amt = Op.getOperand(1);
01424   
01425   SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
01426                                DAG.getConstant(32, MVT::i32), Amt);
01427   SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
01428   SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
01429   SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
01430   SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
01431                                DAG.getConstant(-32U, MVT::i32));
01432   SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
01433   SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
01434   SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
01435   return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
01436 }
01437 
01438 static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG,
01439                           MVT::ValueType PtrVT) {
01440   assert(Op.getValueType() == MVT::i64 &&
01441          Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
01442   // The generic code does a fine job expanding shift by a constant.
01443   if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
01444   
01445   // Otherwise, expand into a bunch of logical ops.  Note that these ops
01446   // depend on the PPC behavior for oversized shift amounts.
01447   SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
01448                              DAG.getConstant(0, PtrVT));
01449   SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
01450                              DAG.getConstant(1, PtrVT));
01451   SDOperand Amt = Op.getOperand(1);
01452   
01453   SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
01454                                DAG.getConstant(32, MVT::i32), Amt);
01455   SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
01456   SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
01457   SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
01458   SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
01459                                DAG.getConstant(-32U, MVT::i32));
01460   SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
01461   SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
01462   SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
01463   return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
01464 }
01465 
01466 static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG,
01467                           MVT::ValueType PtrVT) {
01468   assert(Op.getValueType() == MVT::i64 &&
01469          Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
01470   // The generic code does a fine job expanding shift by a constant.
01471   if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
01472   
01473   // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
01474   SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
01475                              DAG.getConstant(0, PtrVT));
01476   SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
01477                              DAG.getConstant(1, PtrVT));
01478   SDOperand Amt = Op.getOperand(1);
01479   
01480   SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
01481                                DAG.getConstant(32, MVT::i32), Amt);
01482   SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
01483   SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
01484   SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
01485   SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
01486                                DAG.getConstant(-32U, MVT::i32));
01487   SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
01488   SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
01489   SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
01490                                     Tmp4, Tmp6, ISD::SETLE);
01491   return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
01492 }
01493 
01494 //===----------------------------------------------------------------------===//
01495 // Vector related lowering.
01496 //
01497 
01498 // If this is a vector of constants or undefs, get the bits.  A bit in
01499 // UndefBits is set if the corresponding element of the vector is an 
01500 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
01501 // zero.   Return true if this is not an array of constants, false if it is.
01502 //
01503 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
01504                                        uint64_t UndefBits[2]) {
01505   // Start with zero'd results.
01506   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
01507   
01508   unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
01509   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
01510     SDOperand OpVal = BV->getOperand(i);
01511     
01512     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
01513     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
01514 
01515     uint64_t EltBits = 0;
01516     if (OpVal.getOpcode() == ISD::UNDEF) {
01517       uint64_t EltUndefBits = ~0U >> (32-EltBitSize);
01518       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
01519       continue;
01520     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
01521       EltBits = CN->getValue() & (~0U >> (32-EltBitSize));
01522     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
01523       assert(CN->getValueType(0) == MVT::f32 &&
01524              "Only one legal FP vector type!");
01525       EltBits = FloatToBits(CN->getValue());
01526     } else {
01527       // Nonconstant element.
01528       return true;
01529     }
01530     
01531     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
01532   }
01533   
01534   //printf("%llx %llx  %llx %llx\n", 
01535   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
01536   return false;
01537 }
01538 
01539 // If this is a splat (repetition) of a value across the whole vector, return
01540 // the smallest size that splats it.  For example, "0x01010101010101..." is a
01541 // splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and 
01542 // SplatSize = 1 byte.
01543 static bool isConstantSplat(const uint64_t Bits128[2], 
01544                             const uint64_t Undef128[2],
01545                             unsigned &SplatBits, unsigned &SplatUndef,
01546                             unsigned &SplatSize) {
01547   
01548   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
01549   // the same as the lower 64-bits, ignoring undefs.
01550   if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
01551     return false;  // Can't be a splat if two pieces don't match.
01552   
01553   uint64_t Bits64  = Bits128[0] | Bits128[1];
01554   uint64_t Undef64 = Undef128[0] & Undef128[1];
01555   
01556   // Check that the top 32-bits are the same as the lower 32-bits, ignoring
01557   // undefs.
01558   if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
01559     return false;  // Can't be a splat if two pieces don't match.
01560 
01561   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
01562   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
01563 
01564   // If the top 16-bits are different than the lower 16-bits, ignoring
01565   // undefs, we have an i32 splat.
01566   if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
01567     SplatBits = Bits32;
01568     SplatUndef = Undef32;
01569     SplatSize = 4;
01570     return true;
01571   }
01572   
01573   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
01574   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
01575 
01576   // If the top 8-bits are different than the lower 8-bits, ignoring
01577   // undefs, we have an i16 splat.
01578   if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
01579     SplatBits = Bits16;
01580     SplatUndef = Undef16;
01581     SplatSize = 2;
01582     return true;
01583   }
01584   
01585   // Otherwise, we have an 8-bit splat.
01586   SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
01587   SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
01588   SplatSize = 1;
01589   return true;
01590 }
01591 
01592 /// BuildSplatI - Build a canonical splati of Val with an element size of
01593 /// SplatSize.  Cast the result to VT.
01594 static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,
01595                              SelectionDAG &DAG) {
01596   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
01597   
01598   // Force vspltis[hw] -1 to vspltisb -1.
01599   if (Val == -1) SplatSize = 1;
01600   
01601   static const MVT::ValueType VTys[] = { // canonical VT to use for each size.
01602     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
01603   };
01604   MVT::ValueType CanonicalVT = VTys[SplatSize-1];
01605   
01606   // Build a canonical splat for this value.
01607   SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT));
01608   std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
01609   SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
01610   return DAG.getNode(ISD::BIT_CONVERT, VT, Res);
01611 }
01612 
01613 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
01614 /// specified intrinsic ID.
01615 static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,
01616                                   SelectionDAG &DAG, 
01617                                   MVT::ValueType DestVT = MVT::Other) {
01618   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
01619   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
01620                      DAG.getConstant(IID, MVT::i32), LHS, RHS);
01621 }
01622 
01623 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
01624 /// specified intrinsic ID.
01625 static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,
01626                                   SDOperand Op2, SelectionDAG &DAG, 
01627                                   MVT::ValueType DestVT = MVT::Other) {
01628   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
01629   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
01630                      DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
01631 }
01632 
01633 
01634 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
01635 /// amount.  The result has the specified value type.
01636 static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,
01637                              MVT::ValueType VT, SelectionDAG &DAG) {
01638   // Force LHS/RHS to be the right type.
01639   LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);
01640   RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);
01641   
01642   std::vector<SDOperand> Ops;
01643   for (unsigned i = 0; i != 16; ++i)
01644     Ops.push_back(DAG.getConstant(i+Amt, MVT::i32));
01645   SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,
01646                             DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
01647   return DAG.getNode(ISD::BIT_CONVERT, VT, T);
01648 }
01649 
01650 // If this is a case we can't handle, return null and let the default
01651 // expansion code take care of it.  If we CAN select this case, and if it
01652 // selects to a single instruction, return Op.  Otherwise, if we can codegen
01653 // this case more efficiently than a constant pool load, lower it to the
01654 // sequence of ops that should be used.
01655 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
01656   // If this is a vector of constants or undefs, get the bits.  A bit in
01657   // UndefBits is set if the corresponding element of the vector is an 
01658   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
01659   // zero. 
01660   uint64_t VectorBits[2];
01661   uint64_t UndefBits[2];
01662   if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
01663     return SDOperand();   // Not a constant vector.
01664   
01665   // If this is a splat (repetition) of a value across the whole vector, return
01666   // the smallest size that splats it.  For example, "0x01010101010101..." is a
01667   // splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and 
01668   // SplatSize = 1 byte.
01669   unsigned SplatBits, SplatUndef, SplatSize;
01670   if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
01671     bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;
01672     
01673     // First, handle single instruction cases.
01674     
01675     // All zeros?
01676     if (SplatBits == 0) {
01677       // Canonicalize all zero vectors to be v4i32.
01678       if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
01679         SDOperand Z = DAG.getConstant(0, MVT::i32);
01680         Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
01681         Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
01682       }
01683       return Op;
01684     }
01685 
01686     // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
01687     int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
01688     if (SextVal >= -16 && SextVal <= 15)
01689       return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);
01690     
01691     
01692     // Two instruction sequences.
01693     
01694     // If this value is in the range [-32,30] and is even, use:
01695     //    tmp = VSPLTI[bhw], result = add tmp, tmp
01696     if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
01697       Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);
01698       return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);
01699     }
01700     
01701     // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is 
01702     // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
01703     // for fneg/fabs.
01704     if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
01705       // Make -1 and vspltisw -1:
01706       SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);
01707       
01708       // Make the VSLW intrinsic, computing 0x8000_0000.
01709       SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 
01710                                        OnesV, DAG);
01711       
01712       // xor by OnesV to invert it.
01713       Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
01714       return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
01715     }
01716 
01717     // Check to see if this is a wide variety of vsplti*, binop self cases.
01718     unsigned SplatBitSize = SplatSize*8;
01719     static const char SplatCsts[] = {
01720       -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
01721       -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
01722     };
01723     for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){
01724       // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
01725       // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
01726       int i = SplatCsts[idx];
01727       
01728       // Figure out what shift amount will be used by altivec if shifted by i in
01729       // this splat size.
01730       unsigned TypeShiftAmt = i & (SplatBitSize-1);
01731       
01732       // vsplti + shl self.
01733       if (SextVal == (i << (int)TypeShiftAmt)) {
01734         Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
01735         static const unsigned IIDs[] = { // Intrinsic to use for each size.
01736           Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
01737           Intrinsic::ppc_altivec_vslw
01738         };
01739         return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
01740       }
01741       
01742       // vsplti + srl self.
01743       if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
01744         Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
01745         static const unsigned IIDs[] = { // Intrinsic to use for each size.
01746           Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
01747           Intrinsic::ppc_altivec_vsrw
01748         };
01749         return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
01750       }
01751       
01752       // vsplti + sra self.
01753       if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
01754         Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
01755         static const unsigned IIDs[] = { // Intrinsic to use for each size.
01756           Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
01757           Intrinsic::ppc_altivec_vsraw
01758         };
01759         return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
01760       }
01761       
01762       // vsplti + rol self.
01763       if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
01764                            ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
01765         Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
01766         static const unsigned IIDs[] = { // Intrinsic to use for each size.
01767           Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
01768           Intrinsic::ppc_altivec_vrlw
01769         };
01770         return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
01771       }
01772 
01773       // t = vsplti c, result = vsldoi t, t, 1
01774       if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
01775         SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
01776         return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);
01777       }
01778       // t = vsplti c, result = vsldoi t, t, 2
01779       if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
01780         SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
01781         return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);
01782       }
01783       // t = vsplti c, result = vsldoi t, t, 3
01784       if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
01785         SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
01786         return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);
01787       }
01788     }
01789     
01790     // Three instruction sequences.
01791     
01792     // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
01793     if (SextVal >= 0 && SextVal <= 31) {
01794       SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG);
01795       SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);
01796       return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS);
01797     }
01798     // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
01799     if (SextVal >= -31 && SextVal <= 0) {
01800       SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG);
01801       SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);
01802       return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS);
01803     }
01804   }
01805     
01806   return SDOperand();
01807 }
01808 
01809 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
01810 /// the specified operations to build the shuffle.
01811 static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,
01812                                         SDOperand RHS, SelectionDAG &DAG) {
01813   unsigned OpNum = (PFEntry >> 26) & 0x0F;
01814   unsigned LHSID  = (PFEntry >> 13) & ((1 << 13)-1);
01815   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
01816   
01817   enum {
01818     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
01819     OP_VMRGHW,
01820     OP_VMRGLW,
01821     OP_VSPLTISW0,
01822     OP_VSPLTISW1,
01823     OP_VSPLTISW2,
01824     OP_VSPLTISW3,
01825     OP_VSLDOI4,
01826     OP_VSLDOI8,
01827     OP_VSLDOI12
01828   };
01829   
01830   if (OpNum == OP_COPY) {
01831     if (LHSID == (1*9+2)*9+3) return LHS;
01832     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
01833     return RHS;
01834   }
01835   
01836   SDOperand OpLHS, OpRHS;
01837   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);
01838   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);
01839   
01840   unsigned ShufIdxs[16];
01841   switch (OpNum) {
01842   default: assert(0 && "Unknown i32 permute!");
01843   case OP_VMRGHW:
01844     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
01845     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
01846     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
01847     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
01848     break;
01849   case OP_VMRGLW:
01850     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
01851     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
01852     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
01853     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
01854     break;
01855   case OP_VSPLTISW0:
01856     for (unsigned i = 0; i != 16; ++i)
01857       ShufIdxs[i] = (i&3)+0;
01858     break;
01859   case OP_VSPLTISW1:
01860     for (unsigned i = 0; i != 16; ++i)
01861       ShufIdxs[i] = (i&3)+4;
01862     break;
01863   case OP_VSPLTISW2:
01864     for (unsigned i = 0; i != 16; ++i)
01865       ShufIdxs[i] = (i&3)+8;
01866     break;
01867   case OP_VSPLTISW3:
01868     for (unsigned i = 0; i != 16; ++i)
01869       ShufIdxs[i] = (i&3)+12;
01870     break;
01871   case OP_VSLDOI4:
01872     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);
01873   case OP_VSLDOI8:
01874     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);
01875   case OP_VSLDOI12:
01876     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);
01877   }
01878   std::vector<SDOperand> Ops;
01879   for (unsigned i = 0; i != 16; ++i)
01880     Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32));
01881   
01882   return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,
01883                      DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
01884 }
01885 
01886 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
01887 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
01888 /// return the code it can be lowered into.  Worst case, it can always be
01889 /// lowered into a vperm.
01890 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
01891   SDOperand V1 = Op.getOperand(0);
01892   SDOperand V2 = Op.getOperand(1);
01893   SDOperand PermMask = Op.getOperand(2);
01894   
01895   // Cases that are handled by instructions that take permute immediates
01896   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
01897   // selected by the instruction selector.
01898   if (V2.getOpcode() == ISD::UNDEF) {
01899     if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
01900         PPC::isSplatShuffleMask(PermMask.Val, 2) ||
01901         PPC::isSplatShuffleMask(PermMask.Val, 4) ||
01902         PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||
01903         PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||
01904         PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||
01905         PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||
01906         PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||
01907         PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||
01908         PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||
01909         PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||
01910         PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
01911       return Op;
01912     }
01913   }
01914   
01915   // Altivec has a variety of "shuffle immediates" that take two vector inputs
01916   // and produce a fixed permutation.  If any of these match, do not lower to
01917   // VPERM.
01918   if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||
01919       PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||
01920       PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||
01921       PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||
01922       PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||
01923       PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||
01924       PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||
01925       PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||
01926       PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
01927     return Op;
01928   
01929   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
01930   // perfect shuffle table to emit an optimal matching sequence.
01931   unsigned PFIndexes[4];
01932   bool isFourElementShuffle = true;
01933   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
01934     unsigned EltNo = 8;   // Start out undef.
01935     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
01936       if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
01937         continue;   // Undef, ignore it.
01938       
01939       unsigned ByteSource = 
01940         cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();
01941       if ((ByteSource & 3) != j) {
01942         isFourElementShuffle = false;
01943         break;
01944       }
01945       
01946       if (EltNo == 8) {
01947         EltNo = ByteSource/4;
01948       } else if (EltNo != ByteSource/4) {
01949         isFourElementShuffle = false;
01950         break;
01951       }
01952     }
01953     PFIndexes[i] = EltNo;
01954   }
01955     
01956   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 
01957   // perfect shuffle vector to determine if it is cost effective to do this as
01958   // discrete instructions, or whether we should use a vperm.
01959   if (isFourElementShuffle) {
01960     // Compute the index in the perfect shuffle table.
01961     unsigned PFTableIndex = 
01962       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
01963     
01964     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
01965     unsigned Cost  = (PFEntry >> 30);
01966     
01967     // Determining when to avoid vperm is tricky.  Many things affect the cost
01968     // of vperm, particularly how many times the perm mask needs to be computed.
01969     // For example, if the perm mask can be hoisted out of a loop or is already
01970     // used (perhaps because there are multiple permutes with the same shuffle
01971     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
01972     // the loop requires an extra register.
01973     //
01974     // As a compromise, we only emit discrete instructions if the shuffle can be
01975     // generated in 3 or fewer operations.  When we have loop information 
01976     // available, if this block is within a loop, we should avoid using vperm
01977     // for 3-operation perms and use a constant pool load instead.
01978     if (Cost < 3) 
01979       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG);
01980   }
01981   
01982   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
01983   // vector that will get spilled to the constant pool.
01984   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
01985   
01986   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
01987   // that it is in input element units, not in bytes.  Convert now.
01988   MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
01989   unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
01990   
01991   std::vector<SDOperand> ResultMask;
01992   for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
01993     unsigned SrcElt;
01994     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
01995       SrcElt = 0;
01996     else 
01997       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
01998     
01999     for (unsigned j = 0; j != BytesPerElement; ++j)
02000       ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
02001                                            MVT::i8));
02002   }
02003   
02004   SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
02005   return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
02006 }
02007 
02008 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
02009 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
02010 /// information about the intrinsic.
02011 static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc,
02012                                   bool &isDot) {
02013   unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue();
02014   CompareOpc = -1;
02015   isDot = false;
02016   switch (IntrinsicID) {
02017   default: return false;
02018     // Comparison predicates.
02019   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
02020   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
02021   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
02022   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
02023   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
02024   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
02025   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
02026   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
02027   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
02028   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
02029   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
02030   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
02031   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
02032     
02033     // Normal Comparisons.
02034   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
02035   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
02036   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
02037   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
02038   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
02039   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
02040   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
02041   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
02042   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
02043   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
02044   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
02045   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
02046   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
02047   }
02048   return true;
02049 }
02050 
02051 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
02052 /// lower, do it, otherwise return null.
02053 static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
02054   // If this is a lowered altivec predicate compare, CompareOpc is set to the
02055   // opcode number of the comparison.
02056   int CompareOpc;
02057   bool isDot;
02058   if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
02059     return SDOperand();    // Don't custom lower most intrinsics.
02060   
02061   // If this is a non-dot comparison, make the VCMP node and we are done.
02062   if (!isDot) {
02063     SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
02064                                 Op.getOperand(1), Op.getOperand(2),
02065                                 DAG.getConstant(CompareOpc, MVT::i32));
02066     return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp);
02067   }
02068   
02069   // Create the PPCISD altivec 'dot' comparison node.
02070   std::vector<SDOperand> Ops;
02071   std::vector<MVT::ValueType> VTs;
02072   Ops.push_back(Op.getOperand(2));  // LHS
02073   Ops.push_back(Op.getOperand(3));  // RHS
02074   Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
02075   VTs.push_back(Op.getOperand(2).getValueType());
02076   VTs.push_back(MVT::Flag);
02077   SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
02078   
02079   // Now that we have the comparison, emit a copy from the CR to a GPR.
02080   // This is flagged to the above dot comparison.
02081   SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
02082                                 DAG.getRegister(PPC::CR6, MVT::i32),
02083                                 CompNode.getValue(1)); 
02084   
02085   // Unpack the result based on how the target uses it.
02086   unsigned BitNo;   // Bit # of CR6.
02087   bool InvertBit;   // Invert result?
02088   switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
02089   default:  // Can't happen, don't crash on invalid number though.
02090   case 0:   // Return the value of the EQ bit of CR6.
02091     BitNo = 0; InvertBit = false;
02092     break;
02093   case 1:   // Return the inverted value of the EQ bit of CR6.
02094     BitNo = 0; InvertBit = true;
02095     break;
02096   case 2:   // Return the value of the LT bit of CR6.
02097     BitNo = 2; InvertBit = false;
02098     break;
02099   case 3:   // Return the inverted value of the LT bit of CR6.
02100     BitNo = 2; InvertBit = true;
02101     break;
02102   }
02103   
02104   // Shift the bit into the low position.
02105   Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
02106                       DAG.getConstant(8-(3-BitNo), MVT::i32));
02107   // Isolate the bit.
02108   Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
02109                       DAG.getConstant(1, MVT::i32));
02110   
02111   // If we are supposed to, toggle the bit.
02112   if (InvertBit)
02113     Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
02114                         DAG.getConstant(1, MVT::i32));
02115   return Flags;
02116 }
02117 
02118 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
02119   // Create a stack slot that is 16-byte aligned.
02120   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
02121   int FrameIdx = FrameInfo->CreateStackObject(16, 16);
02122   SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
02123   
02124   // Store the input value into Value#0 of the stack slot.
02125   SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
02126                                 Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
02127   // Load it out.
02128   return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));
02129 }
02130 
02131 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
02132   if (Op.getValueType() == MVT::v4i32) {
02133     SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
02134     
02135     SDOperand Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG);
02136     SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt.
02137     
02138     SDOperand RHSSwap =   // = vrlw RHS, 16
02139       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);
02140     
02141     // Shrinkify inputs to v8i16.
02142     LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);
02143     RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);
02144     RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);
02145     
02146     // Low parts multiplied together, generating 32-bit results (we ignore the
02147     // top parts).
02148     SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
02149                                         LHS, RHS, DAG, MVT::v4i32);
02150     
02151     SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
02152                                         LHS, RHSSwap, Zero, DAG, MVT::v4i32);
02153     // Shift the high parts up 16 bits.
02154     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);
02155     return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);
02156   } else if (Op.getValueType() == MVT::v8i16) {
02157     SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
02158     
02159     SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG);
02160 
02161     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
02162                             LHS, RHS, Zero, DAG);
02163   } else if (Op.getValueType() == MVT::v16i8) {
02164     SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
02165     
02166     // Multiply the even 8-bit parts, producing 16-bit sums.
02167     SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
02168                                            LHS, RHS, DAG, MVT::v8i16);
02169     EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts);
02170     
02171     // Multiply the odd 8-bit parts, producing 16-bit sums.
02172     SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
02173                                           LHS, RHS, DAG, MVT::v8i16);
02174     OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts);
02175     
02176     // Merge the results together.
02177     std::vector<SDOperand> Ops;
02178     for (unsigned i = 0; i != 8; ++i) {
02179       Ops.push_back(DAG.getConstant(2*i+1, MVT::i8));
02180       Ops.push_back(DAG.getConstant(2*i+1+16, MVT::i8));
02181     }
02182     
02183     return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts,
02184                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
02185   } else {
02186     assert(0 && "Unknown mul to lower!");
02187     abort();
02188   }
02189 }
02190 
02191 /// LowerOperation - Provide custom lowering hooks for some operations.
02192 ///
02193 SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
02194   switch (Op.getOpcode()) {
02195   default: assert(0 && "Wasn't expecting to be able to lower this!"); 
02196   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
02197   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
02198   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
02199   case ISD::SETCC:              return LowerSETCC(Op, DAG);
02200   case ISD::VASTART:            return LowerVASTART(Op, DAG, VarArgsFrameIndex);
02201   case ISD::FORMAL_ARGUMENTS:
02202       return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
02203   case ISD::CALL:               return LowerCALL(Op, DAG);
02204   case ISD::RET:                return LowerRET(Op, DAG);
02205     
02206   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
02207   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
02208   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
02209 
02210   // Lower 64-bit shifts.
02211   case ISD::SHL:                return LowerSHL(Op, DAG, getPointerTy());
02212   case ISD::SRL:                return LowerSRL(Op, DAG, getPointerTy());
02213   case ISD::SRA:                return LowerSRA(Op, DAG, getPointerTy());
02214 
02215   // Vector-related lowering.
02216   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
02217   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
02218   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
02219   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
02220   case ISD::MUL:                return LowerMUL(Op, DAG);
02221   }
02222   return SDOperand();
02223 }
02224 
02225 //===----------------------------------------------------------------------===//
02226 //  Other Lowering Code
02227 //===----------------------------------------------------------------------===//
02228 
02229 MachineBasicBlock *
02230 PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
02231                                            MachineBasicBlock *BB) {
02232   assert((MI->getOpcode() == PPC::SELECT_CC_I4 ||
02233           MI->getOpcode() == PPC::SELECT_CC_I8 ||
02234           MI->getOpcode() == PPC::SELECT_CC_F4 ||
02235           MI->getOpcode() == PPC::SELECT_CC_F8 ||
02236           MI->getOpcode() == PPC::SELECT_CC_VRRC) &&
02237          "Unexpected instr type to insert");
02238   
02239   // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
02240   // control-flow pattern.  The incoming instruction knows the destination vreg
02241   // to set, the condition code register to branch on, the true/false values to
02242   // select between, and a branch opcode to use.
02243   const BasicBlock *LLVM_BB = BB->getBasicBlock();
02244   ilist<MachineBasicBlock>::iterator It = BB;
02245   ++It;
02246   
02247   //  thisMBB:
02248   //  ...
02249   //   TrueVal = ...
02250   //   cmpTY ccX, r1, r2
02251   //   bCC copy1MBB
02252   //   fallthrough --> copy0MBB
02253   MachineBasicBlock *thisMBB = BB;
02254   MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
02255   MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
02256   BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
02257     .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
02258   MachineFunction *F = BB->getParent();
02259   F->getBasicBlockList().insert(It, copy0MBB);
02260   F->getBasicBlockList().insert(It, sinkMBB);
02261   // Update machine-CFG edges by first adding all successors of the current
02262   // block to the new block which will contain the Phi node for the select.
02263   for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 
02264       e = BB->succ_end(); i != e; ++i)
02265     sinkMBB->addSuccessor(*i);
02266   // Next, remove all successors of the current block, and add the true
02267   // and fallthrough blocks as its successors.
02268   while(!BB->succ_empty())
02269     BB->removeSuccessor(BB->succ_begin());
02270   BB->addSuccessor(copy0MBB);
02271   BB->addSuccessor(sinkMBB);
02272   
02273   //  copy0MBB:
02274   //   %FalseValue = ...
02275   //   # fallthrough to sinkMBB
02276   BB = copy0MBB;
02277   
02278   // Update machine-CFG edges
02279   BB->addSuccessor(sinkMBB);
02280   
02281   //  sinkMBB:
02282   //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
02283   //  ...
02284   BB = sinkMBB;
02285   BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
02286     .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
02287     .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
02288 
02289   delete MI;   // The pseudo instruction is gone now.
02290   return BB;
02291 }
02292 
02293 //===----------------------------------------------------------------------===//
02294 // Target Optimization Hooks
02295 //===----------------------------------------------------------------------===//
02296 
02297 SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 
02298                                                DAGCombinerInfo &DCI) const {
02299   TargetMachine &TM = getTargetMachine();
02300   SelectionDAG &DAG = DCI.DAG;
02301   switch (N->getOpcode()) {
02302   default: break;
02303   case ISD::SINT_TO_FP:
02304     if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
02305       if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
02306         // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
02307         // We allow the src/dst to be either f32/f64, but the intermediate
02308         // type must be i64.
02309         if (N->getOperand(0).getValueType() == MVT::i64) {
02310           SDOperand Val = N->getOperand(0).getOperand(0);
02311           if (Val.getValueType() == MVT::f32) {
02312             Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
02313             DCI.AddToWorklist(Val.Val);
02314           }
02315             
02316           Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
02317           DCI.AddToWorklist(Val.Val);
02318           Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
02319           DCI.AddToWorklist(Val.Val);
02320           if (N->getValueType(0) == MVT::f32) {
02321             Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
02322             DCI.AddToWorklist(Val.Val);
02323           }
02324           return Val;
02325         } else if (N->getOperand(0).getValueType() == MVT::i32) {
02326           // If the intermediate type is i32, we can avoid the load/store here
02327           // too.
02328         }
02329       }
02330     }
02331     break;
02332   case ISD::STORE:
02333     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
02334     if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
02335         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
02336         N->getOperand(1).getValueType() == MVT::i32) {
02337       SDOperand Val = N->getOperand(1).getOperand(0);
02338       if (Val.getValueType() == MVT::f32) {
02339         Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
02340         DCI.AddToWorklist(Val.Val);
02341       }
02342       Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
02343       DCI.AddToWorklist(Val.Val);
02344 
02345       Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
02346                         N->getOperand(2), N->getOperand(3));
02347       DCI.AddToWorklist(Val.Val);
02348       return Val;
02349     }
02350     
02351     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
02352     if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
02353         N->getOperand(1).Val->hasOneUse() &&
02354         (N->getOperand(1).getValueType() == MVT::i32 ||
02355          N->getOperand(1).getValueType() == MVT::i16)) {
02356       SDOperand BSwapOp = N->getOperand(1).getOperand(0);
02357       // Do an any-extend to 32-bits if this is a half-word input.
02358       if (BSwapOp.getValueType() == MVT::i16)
02359         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp);
02360 
02361       return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp,
02362                          N->getOperand(2), N->getOperand(3),
02363                          DAG.getValueType(N->getOperand(1).getValueType()));
02364     }
02365     break;
02366   case ISD::BSWAP:
02367     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
02368     if (N->getOperand(0).getOpcode() == ISD::LOAD &&
02369         N->getOperand(0).hasOneUse() &&
02370         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
02371       SDOperand Load = N->getOperand(0);
02372       // Create the byte-swapping load.
02373       std::vector<MVT::ValueType> VTs;
02374       VTs.push_back(MVT::i32);
02375       VTs.push_back(MVT::Other);
02376       std::vector<SDOperand> Ops;
02377       Ops.push_back(Load.getOperand(0));   // Chain
02378       Ops.push_back(Load.getOperand(1));   // Ptr
02379       Ops.push_back(Load.getOperand(2));   // SrcValue
02380       Ops.push_back(DAG.getValueType(N->getValueType(0))); // VT
02381       SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops);
02382 
02383       // If this is an i16 load, insert the truncate.  
02384       SDOperand ResVal = BSLoad;
02385       if (N->getValueType(0) == MVT::i16)
02386         ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad);
02387       
02388       // First, combine the bswap away.  This makes the value produced by the
02389       // load dead.
02390       DCI.CombineTo(N, ResVal);
02391 
02392       // Next, combine the load away, we give it a bogus result value but a real
02393       // chain result.  The result value is dead because the bswap is dead.
02394       DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1));
02395       
02396       // Return N so it doesn't get rechecked!
02397       return SDOperand(N, 0);
02398     }
02399     
02400     break;
02401   case PPCISD::VCMP: {
02402     // If a VCMPo node already exists with exactly the same operands as this
02403     // node, use its result instead of this node (VCMPo computes both a CR6 and
02404     // a normal output).
02405     //
02406     if (!N->getOperand(0).hasOneUse() &&
02407         !N->getOperand(1).hasOneUse() &&
02408         !N->getOperand(2).hasOneUse()) {
02409       
02410       // Scan all of the users of the LHS, looking for VCMPo's that match.
02411       SDNode *VCMPoNode = 0;
02412       
02413       SDNode *LHSN = N->getOperand(0).Val;
02414       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
02415            UI != E; ++UI)
02416         if ((*UI)->getOpcode() == PPCISD::VCMPo &&
02417             (*UI)->getOperand(1) == N->getOperand(1) &&
02418             (*UI)->getOperand(2) == N->getOperand(2) &&
02419             (*UI)->getOperand(0) == N->getOperand(0)) {
02420           VCMPoNode = *UI;
02421           break;
02422         }
02423       
02424       // If there is no VCMPo node, or if the flag value has a single use, don't
02425       // transform this.
02426       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
02427         break;
02428         
02429       // Look at the (necessarily single) use of the flag value.  If it has a 
02430       // chain, this transformation is more complex.  Note that multiple things
02431       // could use the value result, which we should ignore.
02432       SDNode *FlagUser = 0;
02433       for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 
02434            FlagUser == 0; ++UI) {
02435         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
02436         SDNode *User = *UI;
02437         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
02438           if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) {
02439             FlagUser = User;
02440             break;
02441           }
02442         }
02443       }
02444       
02445       // If the user is a MFCR instruction, we know this is safe.  Otherwise we
02446       // give up for right now.
02447       if (FlagUser->getOpcode() == PPCISD::MFCR)
02448         return SDOperand(VCMPoNode, 0);
02449     }
02450     break;
02451   }
02452   case ISD::BR_CC: {
02453     // If this is a branch on an altivec predicate comparison, lower this so
02454     // that we don't have to do a MFCR: instead, branch directly on CR6.  This
02455     // lowering is done pre-legalize, because the legalizer lowers the predicate
02456     // compare down to code that is difficult to reassemble.
02457     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
02458     SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3);
02459     int CompareOpc;
02460     bool isDot;
02461     
02462     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
02463         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
02464         getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
02465       assert(isDot && "Can't compare against a vector result!");
02466       
02467       // If this is a comparison against something other than 0/1, then we know
02468       // that the condition is never/always true.
02469       unsigned Val = cast<ConstantSDNode>(RHS)->getValue();
02470       if (Val != 0 && Val != 1) {
02471         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
02472           return N->getOperand(0);
02473         // Always !=, turn it into an unconditional branch.
02474         return DAG.getNode(ISD::BR, MVT::Other, 
02475                            N->getOperand(0), N->getOperand(4));
02476       }
02477     
02478       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
02479       
02480       // Create the PPCISD altivec 'dot' comparison node.
02481       std::vector<SDOperand> Ops;
02482       std::vector<MVT::ValueType> VTs;
02483       Ops.push_back(LHS.getOperand(2));  // LHS of compare
02484       Ops.push_back(LHS.getOperand(3));  // RHS of compare
02485       Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
02486       VTs.push_back(LHS.getOperand(2).getValueType());
02487       VTs.push_back(MVT::Flag);
02488       SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
02489       
02490       // Unpack the result based on how the target uses it.
02491       unsigned CompOpc;
02492       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) {
02493       default:  // Can't happen, don't crash on invalid number though.
02494       case 0:   // Branch on the value of the EQ bit of CR6.
02495         CompOpc = BranchOnWhenPredTrue ? PPC::BEQ : PPC::BNE;
02496         break;
02497       case 1:   // Branch on the inverted value of the EQ bit of CR6.
02498         CompOpc = BranchOnWhenPredTrue ? PPC::BNE : PPC::BEQ;
02499         break;
02500       case 2:   // Branch on the value of the LT bit of CR6.
02501         CompOpc = BranchOnWhenPredTrue ? PPC::BLT : PPC::BGE;
02502         break;
02503       case 3:   // Branch on the inverted value of the LT bit of CR6.
02504         CompOpc = BranchOnWhenPredTrue ? PPC::BGE : PPC::BLT;
02505         break;
02506       }
02507 
02508       return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0),
02509                          DAG.getRegister(PPC::CR6, MVT::i32),
02510                          DAG.getConstant(CompOpc, MVT::i32),
02511                          N->getOperand(4), CompNode.getValue(1));
02512     }
02513     break;
02514   }
02515   }
02516   
02517   return SDOperand();
02518 }
02519 
02520 //===----------------------------------------------------------------------===//
02521 // Inline Assembly Support
02522 //===----------------------------------------------------------------------===//
02523 
02524 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
02525                                                        uint64_t Mask,
02526                                                        uint64_t &KnownZero, 
02527                                                        uint64_t &KnownOne,
02528                                                        unsigned Depth) const {
02529   KnownZero = 0;
02530   KnownOne = 0;
02531   switch (Op.getOpcode()) {
02532   default: break;
02533   case PPCISD::LBRX: {
02534     // lhbrx is known to have the top bits cleared out.
02535     if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
02536       KnownZero = 0xFFFF0000;
02537     break;
02538   }
02539   case ISD::INTRINSIC_WO_CHAIN: {
02540     switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
02541     default: break;
02542     case Intrinsic::ppc_altivec_vcmpbfp_p:
02543     case Intrinsic::ppc_altivec_vcmpeqfp_p:
02544     case Intrinsic::ppc_altivec_vcmpequb_p:
02545     case Intrinsic::ppc_altivec_vcmpequh_p:
02546     case Intrinsic::ppc_altivec_vcmpequw_p:
02547     case Intrinsic::ppc_altivec_vcmpgefp_p:
02548     case Intrinsic::ppc_altivec_vcmpgtfp_p:
02549     case Intrinsic::ppc_altivec_vcmpgtsb_p:
02550     case Intrinsic::ppc_altivec_vcmpgtsh_p:
02551     case Intrinsic::ppc_altivec_vcmpgtsw_p:
02552     case Intrinsic::ppc_altivec_vcmpgtub_p:
02553     case Intrinsic::ppc_altivec_vcmpgtuh_p:
02554     case Intrinsic::ppc_altivec_vcmpgtuw_p:
02555       KnownZero = ~1U;  // All bits but the low one are known to be zero.
02556       break;
02557     }        
02558   }
02559   }
02560 }
02561 
02562 
02563 /// getConstraintType - Given a constraint letter, return the type of
02564 /// constraint it is for this target.
02565 PPCTargetLowering::ConstraintType 
02566 PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
02567   switch (ConstraintLetter) {
02568   default: break;
02569   case 'b':
02570   case 'r':
02571   case 'f':
02572   case 'v':
02573   case 'y':
02574     return C_RegisterClass;
02575   }  
02576   return TargetLowering::getConstraintType(ConstraintLetter);
02577 }
02578 
02579 
02580 std::vector<unsigned> PPCTargetLowering::
02581 getRegClassForInlineAsmConstraint(const std::string &Constraint,
02582                                   MVT::ValueType VT) const {
02583   if (Constraint.size() == 1) {
02584     switch (Constraint[0]) {      // GCC RS6000 Constraint Letters
02585     default: break;  // Unknown constriant letter
02586     case 'b': 
02587       return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
02588                                    PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
02589                                    PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 
02590                                    PPC::R12, PPC::R13, PPC::R14, PPC::R15, 
02591                                    PPC::R16, PPC::R17, PPC::R18, PPC::R19, 
02592                                    PPC::R20, PPC::R21, PPC::R22, PPC::R23, 
02593                                    PPC::R24, PPC::R25, PPC::R26, PPC::R27, 
02594                                    PPC::R28, PPC::R29, PPC::R30, PPC::R31, 
02595                                    0);
02596     case 'r': 
02597       return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
02598                                    PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
02599                                    PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 
02600                                    PPC::R12, PPC::R13, PPC::R14, PPC::R15, 
02601                                    PPC::R16, PPC::R17, PPC::R18, PPC::R19, 
02602                                    PPC::R20, PPC::R21, PPC::R22, PPC::R23, 
02603                                    PPC::R24, PPC::R25, PPC::R26, PPC::R27, 
02604                                    PPC::R28, PPC::R29, PPC::R30, PPC::R31, 
02605                                    0);
02606     case 'f': 
02607       return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
02608                                    PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
02609                                    PPC::F8 , PPC::F9 , PPC::F10, PPC::F11, 
02610                                    PPC::F12, PPC::F13, PPC::F14, PPC::F15, 
02611                                    PPC::F16, PPC::F17, PPC::F18, PPC::F19, 
02612                                    PPC::F20, PPC::F21, PPC::F22, PPC::F23, 
02613                                    PPC::F24, PPC::F25, PPC::F26, PPC::F27, 
02614                                    PPC::F28, PPC::F29, PPC::F30, PPC::F31, 
02615                                    0);
02616     case 'v': 
02617       return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
02618                                    PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
02619                                    PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, 
02620                                    PPC::V12, PPC::V13, PPC::V14, PPC::V15, 
02621                                    PPC::V16, PPC::V17, PPC::V18, PPC::V19, 
02622                                    PPC::V20, PPC::V21, PPC::V22, PPC::V23, 
02623                                    PPC::V24, PPC::V25, PPC::V26, PPC::V27, 
02624                                    PPC::V28, PPC::V29, PPC::V30, PPC::V31, 
02625                                    0);
02626     case 'y': 
02627       return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
02628                                    PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
02629                                    0);
02630     }
02631   }
02632   
02633   return std::vector<unsigned>();
02634 }
02635 
02636 // isOperandValidForConstraint
02637 bool PPCTargetLowering::
02638 isOperandValidForConstraint(SDOperand Op, char Letter) {
02639   switch (Letter) {
02640   default: break;
02641   case 'I':
02642   case 'J':
02643   case 'K':
02644   case 'L':
02645   case 'M':
02646   case 'N':
02647   case 'O':
02648   case 'P': {
02649     if (!isa<ConstantSDNode>(Op)) return false;  // Must be an immediate.
02650     unsigned Value = cast<ConstantSDNode>(Op)->getValue();
02651     switch (Letter) {
02652     default: assert(0 && "Unknown constraint letter!");
02653     case 'I':  // "I" is a signed 16-bit constant.
02654       return (short)Value == (int)Value;
02655     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
02656     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
02657       return (short)Value == 0;
02658     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
02659       return (Value >> 16) == 0;
02660     case 'M':  // "M" is a constant that is greater than 31.
02661       return Value > 31;
02662     case 'N':  // "N" is a positive constant that is an exact power of two.
02663       return (int)Value > 0 && isPowerOf2_32(Value);
02664     case 'O':  // "O" is the constant zero. 
02665       return Value == 0;
02666     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
02667       return (short)-Value == (int)-Value;
02668     }
02669     break;
02670   }
02671   }
02672   
02673   // Handle standard constraint letters.
02674   return TargetLowering::isOperandValidForConstraint(Op, Letter);
02675 }
02676 
02677 /// isLegalAddressImmediate - Return true if the integer value can be used
02678 /// as the offset of the target addressing mode.
02679 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
02680   // PPC allows a sign-extended 16-bit immediate field.
02681   return (V > -(1 << 16) && V < (1 << 16)-1);
02682 }