LLVM API Documentation
00001 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This implements the TargetLowering class. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "llvm/Target/TargetLowering.h" 00015 #include "llvm/Target/TargetData.h" 00016 #include "llvm/Target/TargetMachine.h" 00017 #include "llvm/Target/MRegisterInfo.h" 00018 #include "llvm/DerivedTypes.h" 00019 #include "llvm/CodeGen/SelectionDAG.h" 00020 #include "llvm/ADT/StringExtras.h" 00021 #include "llvm/Support/MathExtras.h" 00022 using namespace llvm; 00023 00024 TargetLowering::TargetLowering(TargetMachine &tm) 00025 : TM(tm), TD(TM.getTargetData()) { 00026 assert(ISD::BUILTIN_OP_END <= 156 && 00027 "Fixed size array in TargetLowering is not large enough!"); 00028 // All operations default to being supported. 00029 memset(OpActions, 0, sizeof(OpActions)); 00030 00031 IsLittleEndian = TD->isLittleEndian(); 00032 ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType()); 00033 ShiftAmtHandling = Undefined; 00034 memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); 00035 memset(TargetDAGCombineArray, 0, 00036 sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0])); 00037 maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; 00038 allowUnalignedMemoryAccesses = false; 00039 UseUnderscoreSetJmpLongJmp = false; 00040 IntDivIsCheap = false; 00041 Pow2DivIsCheap = false; 00042 StackPointerRegisterToSaveRestore = 0; 00043 SchedPreferenceInfo = SchedulingForLatency; 00044 } 00045 00046 TargetLowering::~TargetLowering() {} 00047 00048 /// setValueTypeAction - Set the action for a particular value type. This 00049 /// assumes an action has not already been set for this value type. 00050 static void SetValueTypeAction(MVT::ValueType VT, 00051 TargetLowering::LegalizeAction Action, 00052 TargetLowering &TLI, 00053 MVT::ValueType *TransformToType, 00054 TargetLowering::ValueTypeActionImpl &ValueTypeActions) { 00055 ValueTypeActions.setTypeAction(VT, Action); 00056 if (Action == TargetLowering::Promote) { 00057 MVT::ValueType PromoteTo; 00058 if (VT == MVT::f32) 00059 PromoteTo = MVT::f64; 00060 else { 00061 unsigned LargerReg = VT+1; 00062 while (!TLI.isTypeLegal((MVT::ValueType)LargerReg)) { 00063 ++LargerReg; 00064 assert(MVT::isInteger((MVT::ValueType)LargerReg) && 00065 "Nothing to promote to??"); 00066 } 00067 PromoteTo = (MVT::ValueType)LargerReg; 00068 } 00069 00070 assert(MVT::isInteger(VT) == MVT::isInteger(PromoteTo) && 00071 MVT::isFloatingPoint(VT) == MVT::isFloatingPoint(PromoteTo) && 00072 "Can only promote from int->int or fp->fp!"); 00073 assert(VT < PromoteTo && "Must promote to a larger type!"); 00074 TransformToType[VT] = PromoteTo; 00075 } else if (Action == TargetLowering::Expand) { 00076 assert((VT == MVT::Vector || MVT::isInteger(VT)) && VT > MVT::i8 && 00077 "Cannot expand this type: target must support SOME integer reg!"); 00078 // Expand to the next smaller integer type! 00079 TransformToType[VT] = (MVT::ValueType)(VT-1); 00080 } 00081 } 00082 00083 00084 /// computeRegisterProperties - Once all of the register classes are added, 00085 /// this allows us to compute derived properties we expose. 00086 void TargetLowering::computeRegisterProperties() { 00087 assert(MVT::LAST_VALUETYPE <= 32 && 00088 "Too many value types for ValueTypeActions to hold!"); 00089 00090 // Everything defaults to one. 00091 for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) 00092 NumElementsForVT[i] = 1; 00093 00094 // Find the largest integer register class. 00095 unsigned LargestIntReg = MVT::i128; 00096 for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) 00097 assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); 00098 00099 // Every integer value type larger than this largest register takes twice as 00100 // many registers to represent as the previous ValueType. 00101 unsigned ExpandedReg = LargestIntReg; ++LargestIntReg; 00102 for (++ExpandedReg; MVT::isInteger((MVT::ValueType)ExpandedReg);++ExpandedReg) 00103 NumElementsForVT[ExpandedReg] = 2*NumElementsForVT[ExpandedReg-1]; 00104 00105 // Inspect all of the ValueType's possible, deciding how to process them. 00106 for (unsigned IntReg = MVT::i1; IntReg <= MVT::i128; ++IntReg) 00107 // If we are expanding this type, expand it! 00108 if (getNumElements((MVT::ValueType)IntReg) != 1) 00109 SetValueTypeAction((MVT::ValueType)IntReg, Expand, *this, TransformToType, 00110 ValueTypeActions); 00111 else if (!isTypeLegal((MVT::ValueType)IntReg)) 00112 // Otherwise, if we don't have native support, we must promote to a 00113 // larger type. 00114 SetValueTypeAction((MVT::ValueType)IntReg, Promote, *this, 00115 TransformToType, ValueTypeActions); 00116 else 00117 TransformToType[(MVT::ValueType)IntReg] = (MVT::ValueType)IntReg; 00118 00119 // If the target does not have native support for F32, promote it to F64. 00120 if (!isTypeLegal(MVT::f32)) 00121 SetValueTypeAction(MVT::f32, Promote, *this, 00122 TransformToType, ValueTypeActions); 00123 else 00124 TransformToType[MVT::f32] = MVT::f32; 00125 00126 // Set MVT::Vector to always be Expanded 00127 SetValueTypeAction(MVT::Vector, Expand, *this, TransformToType, 00128 ValueTypeActions); 00129 00130 // Loop over all of the legal vector value types, specifying an identity type 00131 // transformation. 00132 for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; 00133 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { 00134 if (isTypeLegal((MVT::ValueType)i)) 00135 TransformToType[i] = (MVT::ValueType)i; 00136 } 00137 00138 assert(isTypeLegal(MVT::f64) && "Target does not support FP?"); 00139 TransformToType[MVT::f64] = MVT::f64; 00140 } 00141 00142 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { 00143 return NULL; 00144 } 00145 00146 /// getPackedTypeBreakdown - Packed types are broken down into some number of 00147 /// legal first class types. For example, <8 x float> maps to 2 MVT::v4f32 00148 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. 00149 /// 00150 /// This method returns the number and type of the resultant breakdown. 00151 /// 00152 unsigned TargetLowering::getPackedTypeBreakdown(const PackedType *PTy, 00153 MVT::ValueType &PTyElementVT, 00154 MVT::ValueType &PTyLegalElementVT) const { 00155 // Figure out the right, legal destination reg to copy into. 00156 unsigned NumElts = PTy->getNumElements(); 00157 MVT::ValueType EltTy = getValueType(PTy->getElementType()); 00158 00159 unsigned NumVectorRegs = 1; 00160 00161 // Divide the input until we get to a supported size. This will always 00162 // end with a scalar if the target doesn't support vectors. 00163 while (NumElts > 1 && !isTypeLegal(getVectorType(EltTy, NumElts))) { 00164 NumElts >>= 1; 00165 NumVectorRegs <<= 1; 00166 } 00167 00168 MVT::ValueType VT; 00169 if (NumElts == 1) { 00170 VT = EltTy; 00171 } else { 00172 VT = getVectorType(EltTy, NumElts); 00173 } 00174 PTyElementVT = VT; 00175 00176 MVT::ValueType DestVT = getTypeToTransformTo(VT); 00177 PTyLegalElementVT = DestVT; 00178 if (DestVT < VT) { 00179 // Value is expanded, e.g. i64 -> i16. 00180 return NumVectorRegs*(MVT::getSizeInBits(VT)/MVT::getSizeInBits(DestVT)); 00181 } else { 00182 // Otherwise, promotion or legal types use the same number of registers as 00183 // the vector decimated to the appropriate level. 00184 return NumVectorRegs; 00185 } 00186 00187 return 1; 00188 } 00189 00190 //===----------------------------------------------------------------------===// 00191 // Optimization Methods 00192 //===----------------------------------------------------------------------===// 00193 00194 /// ShrinkDemandedConstant - Check to see if the specified operand of the 00195 /// specified instruction is a constant integer. If so, check to see if there 00196 /// are any bits set in the constant that are not demanded. If so, shrink the 00197 /// constant and return true. 00198 bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, 00199 uint64_t Demanded) { 00200 // FIXME: ISD::SELECT, ISD::SELECT_CC 00201 switch(Op.getOpcode()) { 00202 default: break; 00203 case ISD::AND: 00204 case ISD::OR: 00205 case ISD::XOR: 00206 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) 00207 if ((~Demanded & C->getValue()) != 0) { 00208 MVT::ValueType VT = Op.getValueType(); 00209 SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), 00210 DAG.getConstant(Demanded & C->getValue(), 00211 VT)); 00212 return CombineTo(Op, New); 00213 } 00214 break; 00215 } 00216 return false; 00217 } 00218 00219 /// SimplifyDemandedBits - Look at Op. At this point, we know that only the 00220 /// DemandedMask bits of the result of Op are ever used downstream. If we can 00221 /// use this information to simplify Op, create a new simplified DAG node and 00222 /// return true, returning the original and new nodes in Old and New. Otherwise, 00223 /// analyze the expression and return a mask of KnownOne and KnownZero bits for 00224 /// the expression (used to simplify the caller). The KnownZero/One bits may 00225 /// only be accurate for those bits in the DemandedMask. 00226 bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask, 00227 uint64_t &KnownZero, 00228 uint64_t &KnownOne, 00229 TargetLoweringOpt &TLO, 00230 unsigned Depth) const { 00231 KnownZero = KnownOne = 0; // Don't know anything. 00232 // Other users may use these bits. 00233 if (!Op.Val->hasOneUse()) { 00234 if (Depth != 0) { 00235 // If not at the root, Just compute the KnownZero/KnownOne bits to 00236 // simplify things downstream. 00237 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 00238 return false; 00239 } 00240 // If this is the root being simplified, allow it to have multiple uses, 00241 // just set the DemandedMask to all bits. 00242 DemandedMask = MVT::getIntVTBitMask(Op.getValueType()); 00243 } else if (DemandedMask == 0) { 00244 // Not demanding any bits from Op. 00245 if (Op.getOpcode() != ISD::UNDEF) 00246 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType())); 00247 return false; 00248 } else if (Depth == 6) { // Limit search depth. 00249 return false; 00250 } 00251 00252 uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; 00253 switch (Op.getOpcode()) { 00254 case ISD::Constant: 00255 // We know all of the bits for a constant! 00256 KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask; 00257 KnownZero = ~KnownOne & DemandedMask; 00258 return false; // Don't fall through, will infinitely loop. 00259 case ISD::AND: 00260 // If the RHS is a constant, check to see if the LHS would be zero without 00261 // using the bits from the RHS. Below, we use knowledge about the RHS to 00262 // simplify the LHS, here we're using information from the LHS to simplify 00263 // the RHS. 00264 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 00265 uint64_t LHSZero, LHSOne; 00266 ComputeMaskedBits(Op.getOperand(0), DemandedMask, 00267 LHSZero, LHSOne, Depth+1); 00268 // If the LHS already has zeros where RHSC does, this and is dead. 00269 if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask)) 00270 return TLO.CombineTo(Op, Op.getOperand(0)); 00271 // If any of the set bits in the RHS are known zero on the LHS, shrink 00272 // the constant. 00273 if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask)) 00274 return true; 00275 } 00276 00277 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 00278 KnownOne, TLO, Depth+1)) 00279 return true; 00280 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00281 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero, 00282 KnownZero2, KnownOne2, TLO, Depth+1)) 00283 return true; 00284 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00285 00286 // If all of the demanded bits are known one on one side, return the other. 00287 // These bits cannot contribute to the result of the 'and'. 00288 if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2)) 00289 return TLO.CombineTo(Op, Op.getOperand(0)); 00290 if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero)) 00291 return TLO.CombineTo(Op, Op.getOperand(1)); 00292 // If all of the demanded bits in the inputs are known zeros, return zero. 00293 if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask) 00294 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); 00295 // If the RHS is a constant, see if we can simplify it. 00296 if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2)) 00297 return true; 00298 00299 // Output known-1 bits are only known if set in both the LHS & RHS. 00300 KnownOne &= KnownOne2; 00301 // Output known-0 are known to be clear if zero in either the LHS | RHS. 00302 KnownZero |= KnownZero2; 00303 break; 00304 case ISD::OR: 00305 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 00306 KnownOne, TLO, Depth+1)) 00307 return true; 00308 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00309 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne, 00310 KnownZero2, KnownOne2, TLO, Depth+1)) 00311 return true; 00312 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00313 00314 // If all of the demanded bits are known zero on one side, return the other. 00315 // These bits cannot contribute to the result of the 'or'. 00316 if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2)) 00317 return TLO.CombineTo(Op, Op.getOperand(0)); 00318 if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne)) 00319 return TLO.CombineTo(Op, Op.getOperand(1)); 00320 // If all of the potentially set bits on one side are known to be set on 00321 // the other side, just use the 'other' side. 00322 if ((DemandedMask & (~KnownZero) & KnownOne2) == 00323 (DemandedMask & (~KnownZero))) 00324 return TLO.CombineTo(Op, Op.getOperand(0)); 00325 if ((DemandedMask & (~KnownZero2) & KnownOne) == 00326 (DemandedMask & (~KnownZero2))) 00327 return TLO.CombineTo(Op, Op.getOperand(1)); 00328 // If the RHS is a constant, see if we can simplify it. 00329 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 00330 return true; 00331 00332 // Output known-0 bits are only known if clear in both the LHS & RHS. 00333 KnownZero &= KnownZero2; 00334 // Output known-1 are known to be set if set in either the LHS | RHS. 00335 KnownOne |= KnownOne2; 00336 break; 00337 case ISD::XOR: 00338 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 00339 KnownOne, TLO, Depth+1)) 00340 return true; 00341 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00342 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2, 00343 KnownOne2, TLO, Depth+1)) 00344 return true; 00345 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00346 00347 // If all of the demanded bits are known zero on one side, return the other. 00348 // These bits cannot contribute to the result of the 'xor'. 00349 if ((DemandedMask & KnownZero) == DemandedMask) 00350 return TLO.CombineTo(Op, Op.getOperand(0)); 00351 if ((DemandedMask & KnownZero2) == DemandedMask) 00352 return TLO.CombineTo(Op, Op.getOperand(1)); 00353 00354 // Output known-0 bits are known if clear or set in both the LHS & RHS. 00355 KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 00356 // Output known-1 are known to be set if set in only one of the LHS, RHS. 00357 KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 00358 00359 // If all of the unknown bits are known to be zero on one side or the other 00360 // (but not both) turn this into an *inclusive* or. 00361 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 00362 if (uint64_t UnknownBits = DemandedMask & ~(KnownZeroOut|KnownOneOut)) 00363 if ((UnknownBits & (KnownZero|KnownZero2)) == UnknownBits) 00364 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(), 00365 Op.getOperand(0), 00366 Op.getOperand(1))); 00367 // If all of the demanded bits on one side are known, and all of the set 00368 // bits on that side are also known to be set on the other side, turn this 00369 // into an AND, as we know the bits will be cleared. 00370 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 00371 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known 00372 if ((KnownOne & KnownOne2) == KnownOne) { 00373 MVT::ValueType VT = Op.getValueType(); 00374 SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT); 00375 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0), 00376 ANDC)); 00377 } 00378 } 00379 00380 // If the RHS is a constant, see if we can simplify it. 00381 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. 00382 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 00383 return true; 00384 00385 KnownZero = KnownZeroOut; 00386 KnownOne = KnownOneOut; 00387 break; 00388 case ISD::SETCC: 00389 // If we know the result of a setcc has the top bits zero, use this info. 00390 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) 00391 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 00392 break; 00393 case ISD::SELECT: 00394 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero, 00395 KnownOne, TLO, Depth+1)) 00396 return true; 00397 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2, 00398 KnownOne2, TLO, Depth+1)) 00399 return true; 00400 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00401 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00402 00403 // If the operands are constants, see if we can simplify them. 00404 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 00405 return true; 00406 00407 // Only known if known in both the LHS and RHS. 00408 KnownOne &= KnownOne2; 00409 KnownZero &= KnownZero2; 00410 break; 00411 case ISD::SELECT_CC: 00412 if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero, 00413 KnownOne, TLO, Depth+1)) 00414 return true; 00415 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2, 00416 KnownOne2, TLO, Depth+1)) 00417 return true; 00418 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00419 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00420 00421 // If the operands are constants, see if we can simplify them. 00422 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 00423 return true; 00424 00425 // Only known if known in both the LHS and RHS. 00426 KnownOne &= KnownOne2; 00427 KnownZero &= KnownZero2; 00428 break; 00429 case ISD::SHL: 00430 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 00431 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> SA->getValue(), 00432 KnownZero, KnownOne, TLO, Depth+1)) 00433 return true; 00434 KnownZero <<= SA->getValue(); 00435 KnownOne <<= SA->getValue(); 00436 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. 00437 } 00438 break; 00439 case ISD::SRL: 00440 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 00441 MVT::ValueType VT = Op.getValueType(); 00442 unsigned ShAmt = SA->getValue(); 00443 00444 // Compute the new bits that are at the top now. 00445 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 00446 if (SimplifyDemandedBits(Op.getOperand(0), 00447 (DemandedMask << ShAmt) & TypeMask, 00448 KnownZero, KnownOne, TLO, Depth+1)) 00449 return true; 00450 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00451 KnownZero &= TypeMask; 00452 KnownOne &= TypeMask; 00453 KnownZero >>= ShAmt; 00454 KnownOne >>= ShAmt; 00455 00456 uint64_t HighBits = (1ULL << ShAmt)-1; 00457 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 00458 KnownZero |= HighBits; // High bits known zero. 00459 } 00460 break; 00461 case ISD::SRA: 00462 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 00463 MVT::ValueType VT = Op.getValueType(); 00464 unsigned ShAmt = SA->getValue(); 00465 00466 // Compute the new bits that are at the top now. 00467 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 00468 00469 uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask; 00470 00471 // If any of the demanded bits are produced by the sign extension, we also 00472 // demand the input sign bit. 00473 uint64_t HighBits = (1ULL << ShAmt)-1; 00474 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 00475 if (HighBits & DemandedMask) 00476 InDemandedMask |= MVT::getIntVTSignBit(VT); 00477 00478 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, 00479 KnownZero, KnownOne, TLO, Depth+1)) 00480 return true; 00481 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00482 KnownZero &= TypeMask; 00483 KnownOne &= TypeMask; 00484 KnownZero >>= ShAmt; 00485 KnownOne >>= ShAmt; 00486 00487 // Handle the sign bits. 00488 uint64_t SignBit = MVT::getIntVTSignBit(VT); 00489 SignBit >>= ShAmt; // Adjust to where it is now in the mask. 00490 00491 // If the input sign bit is known to be zero, or if none of the top bits 00492 // are demanded, turn this into an unsigned shift right. 00493 if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) { 00494 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0), 00495 Op.getOperand(1))); 00496 } else if (KnownOne & SignBit) { // New bits are known one. 00497 KnownOne |= HighBits; 00498 } 00499 } 00500 break; 00501 case ISD::SIGN_EXTEND_INREG: { 00502 MVT::ValueType VT = Op.getValueType(); 00503 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 00504 00505 // Sign extension. Compute the demanded bits in the result that are not 00506 // present in the input. 00507 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask; 00508 00509 // If none of the extended bits are demanded, eliminate the sextinreg. 00510 if (NewBits == 0) 00511 return TLO.CombineTo(Op, Op.getOperand(0)); 00512 00513 uint64_t InSignBit = MVT::getIntVTSignBit(EVT); 00514 int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT); 00515 00516 // Since the sign extended bits are demanded, we know that the sign 00517 // bit is demanded. 00518 InputDemandedBits |= InSignBit; 00519 00520 if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, 00521 KnownZero, KnownOne, TLO, Depth+1)) 00522 return true; 00523 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00524 00525 // If the sign bit of the input is known set or clear, then we know the 00526 // top bits of the result. 00527 00528 // If the input sign bit is known zero, convert this into a zero extension. 00529 if (KnownZero & InSignBit) 00530 return TLO.CombineTo(Op, 00531 TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT)); 00532 00533 if (KnownOne & InSignBit) { // Input sign bit known set 00534 KnownOne |= NewBits; 00535 KnownZero &= ~NewBits; 00536 } else { // Input sign bit unknown 00537 KnownZero &= ~NewBits; 00538 KnownOne &= ~NewBits; 00539 } 00540 break; 00541 } 00542 case ISD::CTTZ: 00543 case ISD::CTLZ: 00544 case ISD::CTPOP: { 00545 MVT::ValueType VT = Op.getValueType(); 00546 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; 00547 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); 00548 KnownOne = 0; 00549 break; 00550 } 00551 case ISD::ZEXTLOAD: { 00552 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(3))->getVT(); 00553 KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask; 00554 break; 00555 } 00556 case ISD::ZERO_EXTEND: { 00557 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 00558 00559 // If none of the top bits are demanded, convert this into an any_extend. 00560 uint64_t NewBits = (~InMask) & DemandedMask; 00561 if (NewBits == 0) 00562 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, 00563 Op.getValueType(), 00564 Op.getOperand(0))); 00565 00566 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 00567 KnownZero, KnownOne, TLO, Depth+1)) 00568 return true; 00569 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00570 KnownZero |= NewBits; 00571 break; 00572 } 00573 case ISD::SIGN_EXTEND: { 00574 MVT::ValueType InVT = Op.getOperand(0).getValueType(); 00575 uint64_t InMask = MVT::getIntVTBitMask(InVT); 00576 uint64_t InSignBit = MVT::getIntVTSignBit(InVT); 00577 uint64_t NewBits = (~InMask) & DemandedMask; 00578 00579 // If none of the top bits are demanded, convert this into an any_extend. 00580 if (NewBits == 0) 00581 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(), 00582 Op.getOperand(0))); 00583 00584 // Since some of the sign extended bits are demanded, we know that the sign 00585 // bit is demanded. 00586 uint64_t InDemandedBits = DemandedMask & InMask; 00587 InDemandedBits |= InSignBit; 00588 00589 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 00590 KnownOne, TLO, Depth+1)) 00591 return true; 00592 00593 // If the sign bit is known zero, convert this to a zero extend. 00594 if (KnownZero & InSignBit) 00595 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, 00596 Op.getValueType(), 00597 Op.getOperand(0))); 00598 00599 // If the sign bit is known one, the top bits match. 00600 if (KnownOne & InSignBit) { 00601 KnownOne |= NewBits; 00602 KnownZero &= ~NewBits; 00603 } else { // Otherwise, top bits aren't known. 00604 KnownOne &= ~NewBits; 00605 KnownZero &= ~NewBits; 00606 } 00607 break; 00608 } 00609 case ISD::ANY_EXTEND: { 00610 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 00611 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 00612 KnownZero, KnownOne, TLO, Depth+1)) 00613 return true; 00614 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00615 break; 00616 } 00617 case ISD::TRUNCATE: { 00618 // Simplify the input, using demanded bit information, and compute the known 00619 // zero/one bits live out. 00620 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, 00621 KnownZero, KnownOne, TLO, Depth+1)) 00622 return true; 00623 00624 // If the input is only used by this truncate, see if we can shrink it based 00625 // on the known demanded bits. 00626 if (Op.getOperand(0).Val->hasOneUse()) { 00627 SDOperand In = Op.getOperand(0); 00628 switch (In.getOpcode()) { 00629 default: break; 00630 case ISD::SRL: 00631 // Shrink SRL by a constant if none of the high bits shifted in are 00632 // demanded. 00633 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ 00634 uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType()); 00635 HighBits &= ~MVT::getIntVTBitMask(Op.getValueType()); 00636 HighBits >>= ShAmt->getValue(); 00637 00638 if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) && 00639 (DemandedMask & HighBits) == 0) { 00640 // None of the shifted in bits are needed. Add a truncate of the 00641 // shift input, then shift it. 00642 SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, 00643 Op.getValueType(), 00644 In.getOperand(0)); 00645 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(), 00646 NewTrunc, In.getOperand(1))); 00647 } 00648 } 00649 break; 00650 } 00651 } 00652 00653 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00654 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); 00655 KnownZero &= OutMask; 00656 KnownOne &= OutMask; 00657 break; 00658 } 00659 case ISD::AssertZext: { 00660 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 00661 uint64_t InMask = MVT::getIntVTBitMask(VT); 00662 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 00663 KnownZero, KnownOne, TLO, Depth+1)) 00664 return true; 00665 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00666 KnownZero |= ~InMask & DemandedMask; 00667 break; 00668 } 00669 case ISD::ADD: 00670 case ISD::SUB: 00671 case ISD::INTRINSIC_WO_CHAIN: 00672 case ISD::INTRINSIC_W_CHAIN: 00673 case ISD::INTRINSIC_VOID: 00674 // Just use ComputeMaskedBits to compute output bits. 00675 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 00676 break; 00677 } 00678 00679 // If we know the value of all of the demanded bits, return this as a 00680 // constant. 00681 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) 00682 return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); 00683 00684 return false; 00685 } 00686 00687 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use 00688 /// this predicate to simplify operations downstream. Mask is known to be zero 00689 /// for bits that V cannot have. 00690 bool TargetLowering::MaskedValueIsZero(SDOperand Op, uint64_t Mask, 00691 unsigned Depth) const { 00692 uint64_t KnownZero, KnownOne; 00693 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); 00694 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00695 return (KnownZero & Mask) == Mask; 00696 } 00697 00698 /// ComputeMaskedBits - Determine which of the bits specified in Mask are 00699 /// known to be either zero or one and return them in the KnownZero/KnownOne 00700 /// bitsets. This code only analyzes bits in Mask, in order to short-circuit 00701 /// processing. 00702 void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask, 00703 uint64_t &KnownZero, uint64_t &KnownOne, 00704 unsigned Depth) const { 00705 KnownZero = KnownOne = 0; // Don't know anything. 00706 if (Depth == 6 || Mask == 0) 00707 return; // Limit search depth. 00708 00709 uint64_t KnownZero2, KnownOne2; 00710 00711 switch (Op.getOpcode()) { 00712 case ISD::Constant: 00713 // We know all of the bits for a constant! 00714 KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask; 00715 KnownZero = ~KnownOne & Mask; 00716 return; 00717 case ISD::AND: 00718 // If either the LHS or the RHS are Zero, the result is zero. 00719 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 00720 Mask &= ~KnownZero; 00721 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 00722 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00723 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00724 00725 // Output known-1 bits are only known if set in both the LHS & RHS. 00726 KnownOne &= KnownOne2; 00727 // Output known-0 are known to be clear if zero in either the LHS | RHS. 00728 KnownZero |= KnownZero2; 00729 return; 00730 case ISD::OR: 00731 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 00732 Mask &= ~KnownOne; 00733 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 00734 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00735 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00736 00737 // Output known-0 bits are only known if clear in both the LHS & RHS. 00738 KnownZero &= KnownZero2; 00739 // Output known-1 are known to be set if set in either the LHS | RHS. 00740 KnownOne |= KnownOne2; 00741 return; 00742 case ISD::XOR: { 00743 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 00744 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 00745 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00746 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00747 00748 // Output known-0 bits are known if clear or set in both the LHS & RHS. 00749 uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 00750 // Output known-1 are known to be set if set in only one of the LHS, RHS. 00751 KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 00752 KnownZero = KnownZeroOut; 00753 return; 00754 } 00755 case ISD::SELECT: 00756 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); 00757 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); 00758 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00759 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00760 00761 // Only known if known in both the LHS and RHS. 00762 KnownOne &= KnownOne2; 00763 KnownZero &= KnownZero2; 00764 return; 00765 case ISD::SELECT_CC: 00766 ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); 00767 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); 00768 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00769 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00770 00771 // Only known if known in both the LHS and RHS. 00772 KnownOne &= KnownOne2; 00773 KnownZero &= KnownZero2; 00774 return; 00775 case ISD::SETCC: 00776 // If we know the result of a setcc has the top bits zero, use this info. 00777 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) 00778 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 00779 return; 00780 case ISD::SHL: 00781 // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 00782 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 00783 ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(), 00784 KnownZero, KnownOne, Depth+1); 00785 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00786 KnownZero <<= SA->getValue(); 00787 KnownOne <<= SA->getValue(); 00788 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. 00789 } 00790 return; 00791 case ISD::SRL: 00792 // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 00793 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 00794 MVT::ValueType VT = Op.getValueType(); 00795 unsigned ShAmt = SA->getValue(); 00796 00797 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 00798 ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask, 00799 KnownZero, KnownOne, Depth+1); 00800 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00801 KnownZero &= TypeMask; 00802 KnownOne &= TypeMask; 00803 KnownZero >>= ShAmt; 00804 KnownOne >>= ShAmt; 00805 00806 uint64_t HighBits = (1ULL << ShAmt)-1; 00807 HighBits <<= MVT::getSizeInBits(VT)-ShAmt; 00808 KnownZero |= HighBits; // High bits known zero. 00809 } 00810 return; 00811 case ISD::SRA: 00812 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 00813 MVT::ValueType VT = Op.getValueType(); 00814 unsigned ShAmt = SA->getValue(); 00815 00816 // Compute the new bits that are at the top now. 00817 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 00818 00819 uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask; 00820 // If any of the demanded bits are produced by the sign extension, we also 00821 // demand the input sign bit. 00822 uint64_t HighBits = (1ULL << ShAmt)-1; 00823 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 00824 if (HighBits & Mask) 00825 InDemandedMask |= MVT::getIntVTSignBit(VT); 00826 00827 ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, 00828 Depth+1); 00829 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00830 KnownZero &= TypeMask; 00831 KnownOne &= TypeMask; 00832 KnownZero >>= ShAmt; 00833 KnownOne >>= ShAmt; 00834 00835 // Handle the sign bits. 00836 uint64_t SignBit = MVT::getIntVTSignBit(VT); 00837 SignBit >>= ShAmt; // Adjust to where it is now in the mask. 00838 00839 if (KnownZero & SignBit) { 00840 KnownZero |= HighBits; // New bits are known zero. 00841 } else if (KnownOne & SignBit) { 00842 KnownOne |= HighBits; // New bits are known one. 00843 } 00844 } 00845 return; 00846 case ISD::SIGN_EXTEND_INREG: { 00847 MVT::ValueType VT = Op.getValueType(); 00848 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 00849 00850 // Sign extension. Compute the demanded bits in the result that are not 00851 // present in the input. 00852 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask; 00853 00854 uint64_t InSignBit = MVT::getIntVTSignBit(EVT); 00855 int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT); 00856 00857 // If the sign extended bits are demanded, we know that the sign 00858 // bit is demanded. 00859 if (NewBits) 00860 InputDemandedBits |= InSignBit; 00861 00862 ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, 00863 KnownZero, KnownOne, Depth+1); 00864 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00865 00866 // If the sign bit of the input is known set or clear, then we know the 00867 // top bits of the result. 00868 if (KnownZero & InSignBit) { // Input sign bit known clear 00869 KnownZero |= NewBits; 00870 KnownOne &= ~NewBits; 00871 } else if (KnownOne & InSignBit) { // Input sign bit known set 00872 KnownOne |= NewBits; 00873 KnownZero &= ~NewBits; 00874 } else { // Input sign bit unknown 00875 KnownZero &= ~NewBits; 00876 KnownOne &= ~NewBits; 00877 } 00878 return; 00879 } 00880 case ISD::CTTZ: 00881 case ISD::CTLZ: 00882 case ISD::CTPOP: { 00883 MVT::ValueType VT = Op.getValueType(); 00884 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; 00885 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); 00886 KnownOne = 0; 00887 return; 00888 } 00889 case ISD::ZEXTLOAD: { 00890 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(3))->getVT(); 00891 KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask; 00892 return; 00893 } 00894 case ISD::ZERO_EXTEND: { 00895 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 00896 uint64_t NewBits = (~InMask) & Mask; 00897 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 00898 KnownOne, Depth+1); 00899 KnownZero |= NewBits & Mask; 00900 KnownOne &= ~NewBits; 00901 return; 00902 } 00903 case ISD::SIGN_EXTEND: { 00904 MVT::ValueType InVT = Op.getOperand(0).getValueType(); 00905 unsigned InBits = MVT::getSizeInBits(InVT); 00906 uint64_t InMask = MVT::getIntVTBitMask(InVT); 00907 uint64_t InSignBit = 1ULL << (InBits-1); 00908 uint64_t NewBits = (~InMask) & Mask; 00909 uint64_t InDemandedBits = Mask & InMask; 00910 00911 // If any of the sign extended bits are demanded, we know that the sign 00912 // bit is demanded. 00913 if (NewBits & Mask) 00914 InDemandedBits |= InSignBit; 00915 00916 ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, 00917 KnownOne, Depth+1); 00918 // If the sign bit is known zero or one, the top bits match. 00919 if (KnownZero & InSignBit) { 00920 KnownZero |= NewBits; 00921 KnownOne &= ~NewBits; 00922 } else if (KnownOne & InSignBit) { 00923 KnownOne |= NewBits; 00924 KnownZero &= ~NewBits; 00925 } else { // Otherwise, top bits aren't known. 00926 KnownOne &= ~NewBits; 00927 KnownZero &= ~NewBits; 00928 } 00929 return; 00930 } 00931 case ISD::ANY_EXTEND: { 00932 MVT::ValueType VT = Op.getOperand(0).getValueType(); 00933 ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT), 00934 KnownZero, KnownOne, Depth+1); 00935 return; 00936 } 00937 case ISD::TRUNCATE: { 00938 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 00939 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00940 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); 00941 KnownZero &= OutMask; 00942 KnownOne &= OutMask; 00943 break; 00944 } 00945 case ISD::AssertZext: { 00946 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 00947 uint64_t InMask = MVT::getIntVTBitMask(VT); 00948 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 00949 KnownOne, Depth+1); 00950 KnownZero |= (~InMask) & Mask; 00951 return; 00952 } 00953 case ISD::ADD: { 00954 // If either the LHS or the RHS are Zero, the result is zero. 00955 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 00956 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 00957 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 00958 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 00959 00960 // Output known-0 bits are known if clear or set in both the low clear bits 00961 // common to both LHS & RHS. For example, 8+(X<<3) is known to have the 00962 // low 3 bits clear. 00963 uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 00964 CountTrailingZeros_64(~KnownZero2)); 00965 00966 KnownZero = (1ULL << KnownZeroOut) - 1; 00967 KnownOne = 0; 00968 return; 00969 } 00970 case ISD::SUB: { 00971 ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)); 00972 if (!CLHS) return; 00973 00974 // We know that the top bits of C-X are clear if X contains less bits 00975 // than C (i.e. no wrap-around can happen). For example, 20-X is 00976 // positive if we can prove that X is >= 0 and < 16. 00977 MVT::ValueType VT = CLHS->getValueType(0); 00978 if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear 00979 unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1); 00980 uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit 00981 MaskV = ~MaskV & MVT::getIntVTBitMask(VT); 00982 ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); 00983 00984 // If all of the MaskV bits are known to be zero, then we know the output 00985 // top bits are zero, because we now know that the output is from [0-C]. 00986 if ((KnownZero & MaskV) == MaskV) { 00987 unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue()); 00988 KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero. 00989 KnownOne = 0; // No one bits known. 00990 } else { 00991 KnownZero = KnownOne = 0; // Otherwise, nothing known. 00992 } 00993 } 00994 return; 00995 } 00996 default: 00997 // Allow the target to implement this method for its nodes. 00998 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { 00999 case ISD::INTRINSIC_WO_CHAIN: 01000 case ISD::INTRINSIC_W_CHAIN: 01001 case ISD::INTRINSIC_VOID: 01002 computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne); 01003 } 01004 return; 01005 } 01006 } 01007 01008 /// computeMaskedBitsForTargetNode - Determine which of the bits specified 01009 /// in Mask are known to be either zero or one and return them in the 01010 /// KnownZero/KnownOne bitsets. 01011 void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 01012 uint64_t Mask, 01013 uint64_t &KnownZero, 01014 uint64_t &KnownOne, 01015 unsigned Depth) const { 01016 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 01017 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 01018 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 01019 Op.getOpcode() == ISD::INTRINSIC_VOID) && 01020 "Should use MaskedValueIsZero if you don't know whether Op" 01021 " is a target node!"); 01022 KnownZero = 0; 01023 KnownOne = 0; 01024 } 01025 01026 /// ComputeNumSignBits - Return the number of times the sign bit of the 01027 /// register is replicated into the other bits. We know that at least 1 bit 01028 /// is always equal to the sign bit (itself), but other cases can give us 01029 /// information. For example, immediately after an "SRA X, 2", we know that 01030 /// the top 3 bits are all equal to each other, so we return 3. 01031 unsigned TargetLowering::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{ 01032 MVT::ValueType VT = Op.getValueType(); 01033 assert(MVT::isInteger(VT) && "Invalid VT!"); 01034 unsigned VTBits = MVT::getSizeInBits(VT); 01035 unsigned Tmp, Tmp2; 01036 01037 if (Depth == 6) 01038 return 1; // Limit search depth. 01039 01040 switch (Op.getOpcode()) { 01041 default: break; 01042 case ISD::AssertSext: 01043 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 01044 return VTBits-Tmp+1; 01045 case ISD::AssertZext: 01046 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 01047 return VTBits-Tmp; 01048 01049 case ISD::SEXTLOAD: // '17' bits known 01050 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(3))->getVT()); 01051 return VTBits-Tmp+1; 01052 case ISD::ZEXTLOAD: // '16' bits known 01053 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(3))->getVT()); 01054 return VTBits-Tmp; 01055 01056 case ISD::Constant: { 01057 uint64_t Val = cast<ConstantSDNode>(Op)->getValue(); 01058 // If negative, invert the bits, then look at it. 01059 if (Val & MVT::getIntVTSignBit(VT)) 01060 Val = ~Val; 01061 01062 // Shift the bits so they are the leading bits in the int64_t. 01063 Val <<= 64-VTBits; 01064 01065 // Return # leading zeros. We use 'min' here in case Val was zero before 01066 // shifting. We don't want to return '64' as for an i32 "0". 01067 return std::min(VTBits, CountLeadingZeros_64(Val)); 01068 } 01069 01070 case ISD::SIGN_EXTEND: 01071 Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType()); 01072 return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; 01073 01074 case ISD::SIGN_EXTEND_INREG: 01075 // Max of the input and what this extends. 01076 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 01077 Tmp = VTBits-Tmp+1; 01078 01079 Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); 01080 return std::max(Tmp, Tmp2); 01081 01082 case ISD::SRA: 01083 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 01084 // SRA X, C -> adds C sign bits. 01085 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 01086 Tmp += C->getValue(); 01087 if (Tmp > VTBits) Tmp = VTBits; 01088 } 01089 return Tmp; 01090 case ISD::SHL: 01091 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 01092 // shl destroys sign bits. 01093 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 01094 if (C->getValue() >= VTBits || // Bad shift. 01095 C->getValue() >= Tmp) break; // Shifted all sign bits out. 01096 return Tmp - C->getValue(); 01097 } 01098 break; 01099 case ISD::AND: 01100 case ISD::OR: 01101 case ISD::XOR: // NOT is handled here. 01102 // Logical binary ops preserve the number of sign bits. 01103 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 01104 if (Tmp == 1) return 1; // Early out. 01105 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 01106 return std::min(Tmp, Tmp2); 01107 01108 case ISD::SELECT: 01109 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 01110 if (Tmp == 1) return 1; // Early out. 01111 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 01112 return std::min(Tmp, Tmp2); 01113 01114 case ISD::SETCC: 01115 // If setcc returns 0/-1, all bits are sign bits. 01116 if (getSetCCResultContents() == ZeroOrNegativeOneSetCCResult) 01117 return VTBits; 01118 break; 01119 case ISD::ROTL: 01120 case ISD::ROTR: 01121 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 01122 unsigned RotAmt = C->getValue() & (VTBits-1); 01123 01124 // Handle rotate right by N like a rotate left by 32-N. 01125 if (Op.getOpcode() == ISD::ROTR) 01126 RotAmt = (VTBits-RotAmt) & (VTBits-1); 01127 01128 // If we aren't rotating out all of the known-in sign bits, return the 01129 // number that are left. This handles rotl(sext(x), 1) for example. 01130 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 01131 if (Tmp > RotAmt+1) return Tmp-RotAmt; 01132 } 01133 break; 01134 case ISD::ADD: 01135 // Add can have at most one carry bit. Thus we know that the output 01136 // is, at worst, one more bit than the inputs. 01137 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 01138 if (Tmp == 1) return 1; // Early out. 01139 01140 // Special case decrementing a value (ADD X, -1): 01141 if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) 01142 if (CRHS->isAllOnesValue()) { 01143 uint64_t KnownZero, KnownOne; 01144 uint64_t Mask = MVT::getIntVTBitMask(VT); 01145 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 01146 01147 // If the input is known to be 0 or 1, the output is 0/-1, which is all 01148 // sign bits set. 01149 if ((KnownZero|1) == Mask) 01150 return VTBits; 01151 01152 // If we are subtracting one from a positive number, there is no carry 01153 // out of the result. 01154 if (KnownZero & MVT::getIntVTSignBit(VT)) 01155 return Tmp; 01156 } 01157 01158 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 01159 if (Tmp2 == 1) return 1; 01160 return std::min(Tmp, Tmp2)-1; 01161 break; 01162 01163 case ISD::SUB: 01164 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 01165 if (Tmp2 == 1) return 1; 01166 01167 // Handle NEG. 01168 if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) 01169 if (CLHS->getValue() == 0) { 01170 uint64_t KnownZero, KnownOne; 01171 uint64_t Mask = MVT::getIntVTBitMask(VT); 01172 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 01173 // If the input is known to be 0 or 1, the output is 0/-1, which is all 01174 // sign bits set. 01175 if ((KnownZero|1) == Mask) 01176 return VTBits; 01177 01178 // If the input is known to be positive (the sign bit is known clear), 01179 // the output of the NEG has the same number of sign bits as the input. 01180 if (KnownZero & MVT::getIntVTSignBit(VT)) 01181 return Tmp2; 01182 01183 // Otherwise, we treat this like a SUB. 01184 } 01185 01186 // Sub can have at most one carry bit. Thus we know that the output 01187 // is, at worst, one more bit than the inputs. 01188 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 01189 if (Tmp == 1) return 1; // Early out. 01190 return std::min(Tmp, Tmp2)-1; 01191 break; 01192 case ISD::TRUNCATE: 01193 // FIXME: it's tricky to do anything useful for this, but it is an important 01194 // case for targets like X86. 01195 break; 01196 } 01197 01198 // Allow the target to implement this method for its nodes. 01199 if (Op.getOpcode() >= ISD::BUILTIN_OP_END || 01200 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 01201 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 01202 Op.getOpcode() == ISD::INTRINSIC_VOID) { 01203 unsigned NumBits = ComputeNumSignBitsForTargetNode(Op, Depth); 01204 if (NumBits > 1) return NumBits; 01205 } 01206 01207 // Finally, if we can prove that the top bits of the result are 0's or 1's, 01208 // use this information. 01209 uint64_t KnownZero, KnownOne; 01210 uint64_t Mask = MVT::getIntVTBitMask(VT); 01211 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); 01212 01213 uint64_t SignBit = MVT::getIntVTSignBit(VT); 01214 if (KnownZero & SignBit) { // SignBit is 0 01215 Mask = KnownZero; 01216 } else if (KnownOne & SignBit) { // SignBit is 1; 01217 Mask = KnownOne; 01218 } else { 01219 // Nothing known. 01220 return 1; 01221 } 01222 01223 // Okay, we know that the sign bit in Mask is set. Use CLZ to determine 01224 // the number of identical bits in the top of the input value. 01225 Mask ^= ~0ULL; 01226 Mask <<= 64-VTBits; 01227 // Return # leading zeros. We use 'min' here in case Val was zero before 01228 // shifting. We don't want to return '64' as for an i32 "0". 01229 return std::min(VTBits, CountLeadingZeros_64(Mask)); 01230 } 01231 01232 01233 01234 /// ComputeNumSignBitsForTargetNode - This method can be implemented by 01235 /// targets that want to expose additional information about sign bits to the 01236 /// DAG Combiner. 01237 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op, 01238 unsigned Depth) const { 01239 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 01240 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 01241 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 01242 Op.getOpcode() == ISD::INTRINSIC_VOID) && 01243 "Should use ComputeNumSignBits if you don't know whether Op" 01244 " is a target node!"); 01245 return 1; 01246 } 01247 01248 01249 SDOperand TargetLowering:: 01250 PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 01251 // Default implementation: no optimization. 01252 return SDOperand(); 01253 } 01254 01255 //===----------------------------------------------------------------------===// 01256 // Inline Assembler Implementation Methods 01257 //===----------------------------------------------------------------------===// 01258 01259 TargetLowering::ConstraintType 01260 TargetLowering::getConstraintType(char ConstraintLetter) const { 01261 // FIXME: lots more standard ones to handle. 01262 switch (ConstraintLetter) { 01263 default: return C_Unknown; 01264 case 'r': return C_RegisterClass; 01265 case 'm': // memory 01266 case 'o': // offsetable 01267 case 'V': // not offsetable 01268 return C_Memory; 01269 case 'i': // Simple Integer or Relocatable Constant 01270 case 'n': // Simple Integer 01271 case 's': // Relocatable Constant 01272 case 'I': // Target registers. 01273 case 'J': 01274 case 'K': 01275 case 'L': 01276 case 'M': 01277 case 'N': 01278 case 'O': 01279 case 'P': 01280 return C_Other; 01281 } 01282 } 01283 01284 bool TargetLowering::isOperandValidForConstraint(SDOperand Op, 01285 char ConstraintLetter) { 01286 switch (ConstraintLetter) { 01287 default: return false; 01288 case 'i': // Simple Integer or Relocatable Constant 01289 case 'n': // Simple Integer 01290 case 's': // Relocatable Constant 01291 return true; // FIXME: not right. 01292 } 01293 } 01294 01295 01296 std::vector<unsigned> TargetLowering:: 01297 getRegClassForInlineAsmConstraint(const std::string &Constraint, 01298 MVT::ValueType VT) const { 01299 return std::vector<unsigned>(); 01300 } 01301 01302 01303 std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: 01304 getRegForInlineAsmConstraint(const std::string &Constraint, 01305 MVT::ValueType VT) const { 01306 if (Constraint[0] != '{') 01307 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 01308 assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); 01309 01310 // Remove the braces from around the name. 01311 std::string RegName(Constraint.begin()+1, Constraint.end()-1); 01312 01313 // Figure out which register class contains this reg. 01314 const MRegisterInfo *RI = TM.getRegisterInfo(); 01315 for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), 01316 E = RI->regclass_end(); RCI != E; ++RCI) { 01317 const TargetRegisterClass *RC = *RCI; 01318 01319 // If none of the the value types for this register class are valid, we 01320 // can't use it. For example, 64-bit reg classes on 32-bit targets. 01321 bool isLegal = false; 01322 for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); 01323 I != E; ++I) { 01324 if (isTypeLegal(*I)) { 01325 isLegal = true; 01326 break; 01327 } 01328 } 01329 01330 if (!isLegal) continue; 01331 01332 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 01333 I != E; ++I) { 01334 if (StringsEqualNoCase(RegName, RI->get(*I).Name)) 01335 return std::make_pair(*I, RC); 01336 } 01337 } 01338 01339 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 01340 } 01341 01342 //===----------------------------------------------------------------------===// 01343 // Loop Strength Reduction hooks 01344 //===----------------------------------------------------------------------===// 01345 01346 /// isLegalAddressImmediate - Return true if the integer value or 01347 /// GlobalValue can be used as the offset of the target addressing mode. 01348 bool TargetLowering::isLegalAddressImmediate(int64_t V) const { 01349 return false; 01350 } 01351 bool TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 01352 return false; 01353 } 01354 01355 01356 // Magic for divide replacement 01357 01358 struct ms { 01359 int64_t m; // magic number 01360 int64_t s; // shift amount 01361 }; 01362 01363 struct mu { 01364 uint64_t m; // magic number 01365 int64_t a; // add indicator 01366 int64_t s; // shift amount 01367 }; 01368 01369 /// magic - calculate the magic numbers required to codegen an integer sdiv as 01370 /// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 01371 /// or -1. 01372 static ms magic32(int32_t d) { 01373 int32_t p; 01374 uint32_t ad, anc, delta, q1, r1, q2, r2, t; 01375 const uint32_t two31 = 0x80000000U; 01376 struct ms mag; 01377 01378 ad = abs(d); 01379 t = two31 + ((uint32_t)d >> 31); 01380 anc = t - 1 - t%ad; // absolute value of nc 01381 p = 31; // initialize p 01382 q1 = two31/anc; // initialize q1 = 2p/abs(nc) 01383 r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 01384 q2 = two31/ad; // initialize q2 = 2p/abs(d) 01385 r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d)) 01386 do { 01387 p = p + 1; 01388 q1 = 2*q1; // update q1 = 2p/abs(nc) 01389 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 01390 if (r1 >= anc) { // must be unsigned comparison 01391 q1 = q1 + 1; 01392 r1 = r1 - anc; 01393 } 01394 q2 = 2*q2; // update q2 = 2p/abs(d) 01395 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 01396 if (r2 >= ad) { // must be unsigned comparison 01397 q2 = q2 + 1; 01398 r2 = r2 - ad; 01399 } 01400 delta = ad - r2; 01401 } while (q1 < delta || (q1 == delta && r1 == 0)); 01402 01403 mag.m = (int32_t)(q2 + 1); // make sure to sign extend 01404 if (d < 0) mag.m = -mag.m; // resulting magic number 01405 mag.s = p - 32; // resulting shift 01406 return mag; 01407 } 01408 01409 /// magicu - calculate the magic numbers required to codegen an integer udiv as 01410 /// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 01411 static mu magicu32(uint32_t d) { 01412 int32_t p; 01413 uint32_t nc, delta, q1, r1, q2, r2; 01414 struct mu magu; 01415 magu.a = 0; // initialize "add" indicator 01416 nc = - 1 - (-d)%d; 01417 p = 31; // initialize p 01418 q1 = 0x80000000/nc; // initialize q1 = 2p/nc 01419 r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc) 01420 q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d 01421 r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d) 01422 do { 01423 p = p + 1; 01424 if (r1 >= nc - r1 ) { 01425 q1 = 2*q1 + 1; // update q1 01426 r1 = 2*r1 - nc; // update r1 01427 } 01428 else { 01429 q1 = 2*q1; // update q1 01430 r1 = 2*r1; // update r1 01431 } 01432 if (r2 + 1 >= d - r2) { 01433 if (q2 >= 0x7FFFFFFF) magu.a = 1; 01434 q2 = 2*q2 + 1; // update q2 01435 r2 = 2*r2 + 1 - d; // update r2 01436 } 01437 else { 01438 if (q2 >= 0x80000000) magu.a = 1; 01439 q2 = 2*q2; // update q2 01440 r2 = 2*r2 + 1; // update r2 01441 } 01442 delta = d - 1 - r2; 01443 } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); 01444 magu.m = q2 + 1; // resulting magic number 01445 magu.s = p - 32; // resulting shift 01446 return magu; 01447 } 01448 01449 /// magic - calculate the magic numbers required to codegen an integer sdiv as 01450 /// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 01451 /// or -1. 01452 static ms magic64(int64_t d) { 01453 int64_t p; 01454 uint64_t ad, anc, delta, q1, r1, q2, r2, t; 01455 const uint64_t two63 = 9223372036854775808ULL; // 2^63 01456 struct ms mag; 01457 01458 ad = d >= 0 ? d : -d; 01459 t = two63 + ((uint64_t)d >> 63); 01460 anc = t - 1 - t%ad; // absolute value of nc 01461 p = 63; // initialize p 01462 q1 = two63/anc; // initialize q1 = 2p/abs(nc) 01463 r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 01464 q2 = two63/ad; // initialize q2 = 2p/abs(d) 01465 r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d)) 01466 do { 01467 p = p + 1; 01468 q1 = 2*q1; // update q1 = 2p/abs(nc) 01469 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 01470 if (r1 >= anc) { // must be unsigned comparison 01471 q1 = q1 + 1; 01472 r1 = r1 - anc; 01473 } 01474 q2 = 2*q2; // update q2 = 2p/abs(d) 01475 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 01476 if (r2 >= ad) { // must be unsigned comparison 01477 q2 = q2 + 1; 01478 r2 = r2 - ad; 01479 } 01480 delta = ad - r2; 01481 } while (q1 < delta || (q1 == delta && r1 == 0)); 01482 01483 mag.m = q2 + 1; 01484 if (d < 0) mag.m = -mag.m; // resulting magic number 01485 mag.s = p - 64; // resulting shift 01486 return mag; 01487 } 01488 01489 /// magicu - calculate the magic numbers required to codegen an integer udiv as 01490 /// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 01491 static mu magicu64(uint64_t d) 01492 { 01493 int64_t p; 01494 uint64_t nc, delta, q1, r1, q2, r2; 01495 struct mu magu; 01496 magu.a = 0; // initialize "add" indicator 01497 nc = - 1 - (-d)%d; 01498 p = 63; // initialize p 01499 q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc 01500 r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc) 01501 q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d 01502 r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d) 01503 do { 01504 p = p + 1; 01505 if (r1 >= nc - r1 ) { 01506 q1 = 2*q1 + 1; // update q1 01507 r1 = 2*r1 - nc; // update r1 01508 } 01509 else { 01510 q1 = 2*q1; // update q1 01511 r1 = 2*r1; // update r1 01512 } 01513 if (r2 + 1 >= d - r2) { 01514 if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1; 01515 q2 = 2*q2 + 1; // update q2 01516 r2 = 2*r2 + 1 - d; // update r2 01517 } 01518 else { 01519 if (q2 >= 0x8000000000000000ull) magu.a = 1; 01520 q2 = 2*q2; // update q2 01521 r2 = 2*r2 + 1; // update r2 01522 } 01523 delta = d - 1 - r2; 01524 } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0))); 01525 magu.m = q2 + 1; // resulting magic number 01526 magu.s = p - 64; // resulting shift 01527 return magu; 01528 } 01529 01530 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 01531 /// return a DAG expression to select that will generate the same value by 01532 /// multiplying by a magic number. See: 01533 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 01534 SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 01535 std::vector<SDNode*>* Created) const { 01536 MVT::ValueType VT = N->getValueType(0); 01537 01538 // Check to see if we can do this. 01539 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 01540 return SDOperand(); // BuildSDIV only operates on i32 or i64 01541 if (!isOperationLegal(ISD::MULHS, VT)) 01542 return SDOperand(); // Make sure the target supports MULHS. 01543 01544 int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended(); 01545 ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d); 01546 01547 // Multiply the numerator (operand 0) by the magic value 01548 SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), 01549 DAG.getConstant(magics.m, VT)); 01550 // If d > 0 and m < 0, add the numerator 01551 if (d > 0 && magics.m < 0) { 01552 Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0)); 01553 if (Created) 01554 Created->push_back(Q.Val); 01555 } 01556 // If d < 0 and m > 0, subtract the numerator. 01557 if (d < 0 && magics.m > 0) { 01558 Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0)); 01559 if (Created) 01560 Created->push_back(Q.Val); 01561 } 01562 // Shift right algebraic if shift value is nonzero 01563 if (magics.s > 0) { 01564 Q = DAG.getNode(ISD::SRA, VT, Q, 01565 DAG.getConstant(magics.s, getShiftAmountTy())); 01566 if (Created) 01567 Created->push_back(Q.Val); 01568 } 01569 // Extract the sign bit and add it to the quotient 01570 SDOperand T = 01571 DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1, 01572 getShiftAmountTy())); 01573 if (Created) 01574 Created->push_back(T.Val); 01575 return DAG.getNode(ISD::ADD, VT, Q, T); 01576 } 01577 01578 /// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 01579 /// return a DAG expression to select that will generate the same value by 01580 /// multiplying by a magic number. See: 01581 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 01582 SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, 01583 std::vector<SDNode*>* Created) const { 01584 MVT::ValueType VT = N->getValueType(0); 01585 01586 // Check to see if we can do this. 01587 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 01588 return SDOperand(); // BuildUDIV only operates on i32 or i64 01589 if (!isOperationLegal(ISD::MULHU, VT)) 01590 return SDOperand(); // Make sure the target supports MULHU. 01591 01592 uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue(); 01593 mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); 01594 01595 // Multiply the numerator (operand 0) by the magic value 01596 SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), 01597 DAG.getConstant(magics.m, VT)); 01598 if (Created) 01599 Created->push_back(Q.Val); 01600 01601 if (magics.a == 0) { 01602 return DAG.getNode(ISD::SRL, VT, Q, 01603 DAG.getConstant(magics.s, getShiftAmountTy())); 01604 } else { 01605 SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q); 01606 if (Created) 01607 Created->push_back(NPQ.Val); 01608 NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 01609 DAG.getConstant(1, getShiftAmountTy())); 01610 if (Created) 01611 Created->push_back(NPQ.Val); 01612 NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q); 01613 if (Created) 01614 Created->push_back(NPQ.Val); 01615 return DAG.getNode(ISD::SRL, VT, NPQ, 01616 DAG.getConstant(magics.s-1, getShiftAmountTy())); 01617 } 01618 }