LLVM API Documentation
00001 //===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Nate Begeman and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass performs a strength reduction on array references inside loops that 00011 // have as one or more of their components the loop induction variable. This is 00012 // accomplished by creating a new Value to hold the initial value of the array 00013 // access for the first iteration, and then creating a new GEP instruction in 00014 // the loop to increment the value by the appropriate amount. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 00018 #define DEBUG_TYPE "loop-reduce" 00019 #include "llvm/Transforms/Scalar.h" 00020 #include "llvm/Constants.h" 00021 #include "llvm/Instructions.h" 00022 #include "llvm/Type.h" 00023 #include "llvm/DerivedTypes.h" 00024 #include "llvm/Analysis/Dominators.h" 00025 #include "llvm/Analysis/LoopInfo.h" 00026 #include "llvm/Analysis/ScalarEvolutionExpander.h" 00027 #include "llvm/Support/CFG.h" 00028 #include "llvm/Support/GetElementPtrTypeIterator.h" 00029 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00030 #include "llvm/Transforms/Utils/Local.h" 00031 #include "llvm/Target/TargetData.h" 00032 #include "llvm/ADT/Statistic.h" 00033 #include "llvm/Support/Debug.h" 00034 #include "llvm/Support/Visibility.h" 00035 #include "llvm/Target/TargetLowering.h" 00036 #include <algorithm> 00037 #include <iostream> 00038 #include <set> 00039 using namespace llvm; 00040 00041 namespace { 00042 Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced"); 00043 Statistic<> NumInserted("loop-reduce", "Number of PHIs inserted"); 00044 Statistic<> NumVariable("loop-reduce","Number of PHIs with variable strides"); 00045 00046 /// IVStrideUse - Keep track of one use of a strided induction variable, where 00047 /// the stride is stored externally. The Offset member keeps track of the 00048 /// offset from the IV, User is the actual user of the operand, and 'Operand' 00049 /// is the operand # of the User that is the use. 00050 struct IVStrideUse { 00051 SCEVHandle Offset; 00052 Instruction *User; 00053 Value *OperandValToReplace; 00054 00055 // isUseOfPostIncrementedValue - True if this should use the 00056 // post-incremented version of this IV, not the preincremented version. 00057 // This can only be set in special cases, such as the terminating setcc 00058 // instruction for a loop or uses dominated by the loop. 00059 bool isUseOfPostIncrementedValue; 00060 00061 IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O) 00062 : Offset(Offs), User(U), OperandValToReplace(O), 00063 isUseOfPostIncrementedValue(false) {} 00064 }; 00065 00066 /// IVUsersOfOneStride - This structure keeps track of all instructions that 00067 /// have an operand that is based on the trip count multiplied by some stride. 00068 /// The stride for all of these users is common and kept external to this 00069 /// structure. 00070 struct IVUsersOfOneStride { 00071 /// Users - Keep track of all of the users of this stride as well as the 00072 /// initial value and the operand that uses the IV. 00073 std::vector<IVStrideUse> Users; 00074 00075 void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) { 00076 Users.push_back(IVStrideUse(Offset, User, Operand)); 00077 } 00078 }; 00079 00080 /// IVInfo - This structure keeps track of one IV expression inserted during 00081 /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as 00082 /// well as the PHI node and increment value created for rewrite. 00083 struct IVExpr { 00084 SCEVHandle Stride; 00085 SCEVHandle Base; 00086 PHINode *PHI; 00087 Value *IncV; 00088 00089 IVExpr() 00090 : Stride(SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)), 00091 Base (SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)) {} 00092 IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi, 00093 Value *incv) 00094 : Stride(stride), Base(base), PHI(phi), IncV(incv) {} 00095 }; 00096 00097 /// IVsOfOneStride - This structure keeps track of all IV expression inserted 00098 /// during StrengthReduceStridedIVUsers for a particular stride of the IV. 00099 struct IVsOfOneStride { 00100 std::vector<IVExpr> IVs; 00101 00102 void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI, 00103 Value *IncV) { 00104 IVs.push_back(IVExpr(Stride, Base, PHI, IncV)); 00105 } 00106 }; 00107 00108 class VISIBILITY_HIDDEN LoopStrengthReduce : public FunctionPass { 00109 LoopInfo *LI; 00110 ETForest *EF; 00111 ScalarEvolution *SE; 00112 const TargetData *TD; 00113 const Type *UIntPtrTy; 00114 bool Changed; 00115 00116 /// IVUsesByStride - Keep track of all uses of induction variables that we 00117 /// are interested in. The key of the map is the stride of the access. 00118 std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride; 00119 00120 /// IVsByStride - Keep track of all IVs that have been inserted for a 00121 /// particular stride. 00122 std::map<SCEVHandle, IVsOfOneStride> IVsByStride; 00123 00124 /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: 00125 /// We use this to iterate over the IVUsesByStride collection without being 00126 /// dependent on random ordering of pointers in the process. 00127 std::vector<SCEVHandle> StrideOrder; 00128 00129 /// CastedValues - As we need to cast values to uintptr_t, this keeps track 00130 /// of the casted version of each value. This is accessed by 00131 /// getCastedVersionOf. 00132 std::map<Value*, Value*> CastedPointers; 00133 00134 /// DeadInsts - Keep track of instructions we may have made dead, so that 00135 /// we can remove them after we are done working. 00136 std::set<Instruction*> DeadInsts; 00137 00138 /// TLI - Keep a pointer of a TargetLowering to consult for determining 00139 /// transformation profitability. 00140 const TargetLowering *TLI; 00141 00142 public: 00143 LoopStrengthReduce(const TargetLowering *tli = NULL) 00144 : TLI(tli) { 00145 } 00146 00147 virtual bool runOnFunction(Function &) { 00148 LI = &getAnalysis<LoopInfo>(); 00149 EF = &getAnalysis<ETForest>(); 00150 SE = &getAnalysis<ScalarEvolution>(); 00151 TD = &getAnalysis<TargetData>(); 00152 UIntPtrTy = TD->getIntPtrType(); 00153 Changed = false; 00154 00155 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) 00156 runOnLoop(*I); 00157 00158 return Changed; 00159 } 00160 00161 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00162 // We split critical edges, so we change the CFG. However, we do update 00163 // many analyses if they are around. 00164 AU.addPreservedID(LoopSimplifyID); 00165 AU.addPreserved<LoopInfo>(); 00166 AU.addPreserved<DominatorSet>(); 00167 AU.addPreserved<ETForest>(); 00168 AU.addPreserved<ImmediateDominators>(); 00169 AU.addPreserved<DominanceFrontier>(); 00170 AU.addPreserved<DominatorTree>(); 00171 00172 AU.addRequiredID(LoopSimplifyID); 00173 AU.addRequired<LoopInfo>(); 00174 AU.addRequired<ETForest>(); 00175 AU.addRequired<TargetData>(); 00176 AU.addRequired<ScalarEvolution>(); 00177 } 00178 00179 /// getCastedVersionOf - Return the specified value casted to uintptr_t. 00180 /// 00181 Value *getCastedVersionOf(Value *V); 00182 private: 00183 void runOnLoop(Loop *L); 00184 bool AddUsersIfInteresting(Instruction *I, Loop *L, 00185 std::set<Instruction*> &Processed); 00186 SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L); 00187 00188 void OptimizeIndvars(Loop *L); 00189 00190 unsigned CheckForIVReuse(const SCEVHandle&, IVExpr&, const Type*); 00191 00192 void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, 00193 IVUsersOfOneStride &Uses, 00194 Loop *L, bool isOnlyStride); 00195 void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts); 00196 }; 00197 RegisterOpt<LoopStrengthReduce> X("loop-reduce", 00198 "Loop Strength Reduction"); 00199 } 00200 00201 FunctionPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { 00202 return new LoopStrengthReduce(TLI); 00203 } 00204 00205 /// getCastedVersionOf - Return the specified value casted to uintptr_t. 00206 /// 00207 Value *LoopStrengthReduce::getCastedVersionOf(Value *V) { 00208 if (V->getType() == UIntPtrTy) return V; 00209 if (Constant *CB = dyn_cast<Constant>(V)) 00210 return ConstantExpr::getCast(CB, UIntPtrTy); 00211 00212 Value *&New = CastedPointers[V]; 00213 if (New) return New; 00214 00215 New = SCEVExpander::InsertCastOfTo(V, UIntPtrTy); 00216 DeadInsts.insert(cast<Instruction>(New)); 00217 return New; 00218 } 00219 00220 00221 /// DeleteTriviallyDeadInstructions - If any of the instructions is the 00222 /// specified set are trivially dead, delete them and see if this makes any of 00223 /// their operands subsequently dead. 00224 void LoopStrengthReduce:: 00225 DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) { 00226 while (!Insts.empty()) { 00227 Instruction *I = *Insts.begin(); 00228 Insts.erase(Insts.begin()); 00229 if (isInstructionTriviallyDead(I)) { 00230 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 00231 if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i))) 00232 Insts.insert(U); 00233 SE->deleteInstructionFromRecords(I); 00234 I->eraseFromParent(); 00235 Changed = true; 00236 } 00237 } 00238 } 00239 00240 00241 /// GetExpressionSCEV - Compute and return the SCEV for the specified 00242 /// instruction. 00243 SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { 00244 // Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions. 00245 // If this is a GEP that SE doesn't know about, compute it now and insert it. 00246 // If this is not a GEP, or if we have already done this computation, just let 00247 // SE figure it out. 00248 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Exp); 00249 if (!GEP || SE->hasSCEV(GEP)) 00250 return SE->getSCEV(Exp); 00251 00252 // Analyze all of the subscripts of this getelementptr instruction, looking 00253 // for uses that are determined by the trip count of L. First, skip all 00254 // operands the are not dependent on the IV. 00255 00256 // Build up the base expression. Insert an LLVM cast of the pointer to 00257 // uintptr_t first. 00258 SCEVHandle GEPVal = SCEVUnknown::get(getCastedVersionOf(GEP->getOperand(0))); 00259 00260 gep_type_iterator GTI = gep_type_begin(GEP); 00261 00262 for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 00263 // If this is a use of a recurrence that we can analyze, and it comes before 00264 // Op does in the GEP operand list, we will handle this when we process this 00265 // operand. 00266 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 00267 const StructLayout *SL = TD->getStructLayout(STy); 00268 unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue(); 00269 uint64_t Offset = SL->MemberOffsets[Idx]; 00270 GEPVal = SCEVAddExpr::get(GEPVal, 00271 SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy)); 00272 } else { 00273 Value *OpVal = getCastedVersionOf(GEP->getOperand(i)); 00274 SCEVHandle Idx = SE->getSCEV(OpVal); 00275 00276 uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType()); 00277 if (TypeSize != 1) 00278 Idx = SCEVMulExpr::get(Idx, 00279 SCEVConstant::get(ConstantUInt::get(UIntPtrTy, 00280 TypeSize))); 00281 GEPVal = SCEVAddExpr::get(GEPVal, Idx); 00282 } 00283 } 00284 00285 SE->setSCEV(GEP, GEPVal); 00286 return GEPVal; 00287 } 00288 00289 /// getSCEVStartAndStride - Compute the start and stride of this expression, 00290 /// returning false if the expression is not a start/stride pair, or true if it 00291 /// is. The stride must be a loop invariant expression, but the start may be 00292 /// a mix of loop invariant and loop variant expressions. 00293 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, 00294 SCEVHandle &Start, SCEVHandle &Stride) { 00295 SCEVHandle TheAddRec = Start; // Initialize to zero. 00296 00297 // If the outer level is an AddExpr, the operands are all start values except 00298 // for a nested AddRecExpr. 00299 if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { 00300 for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) 00301 if (SCEVAddRecExpr *AddRec = 00302 dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) { 00303 if (AddRec->getLoop() == L) 00304 TheAddRec = SCEVAddExpr::get(AddRec, TheAddRec); 00305 else 00306 return false; // Nested IV of some sort? 00307 } else { 00308 Start = SCEVAddExpr::get(Start, AE->getOperand(i)); 00309 } 00310 00311 } else if (SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SH)) { 00312 TheAddRec = SH; 00313 } else { 00314 return false; // not analyzable. 00315 } 00316 00317 SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec); 00318 if (!AddRec || AddRec->getLoop() != L) return false; 00319 00320 // FIXME: Generalize to non-affine IV's. 00321 if (!AddRec->isAffine()) return false; 00322 00323 Start = SCEVAddExpr::get(Start, AddRec->getOperand(0)); 00324 00325 if (!isa<SCEVConstant>(AddRec->getOperand(1))) 00326 DEBUG(std::cerr << "[" << L->getHeader()->getName() 00327 << "] Variable stride: " << *AddRec << "\n"); 00328 00329 Stride = AddRec->getOperand(1); 00330 // Check that all constant strides are the unsigned type, we don't want to 00331 // have two IV's one of signed stride 4 and one of unsigned stride 4 to not be 00332 // merged. 00333 assert((!isa<SCEVConstant>(Stride) || Stride->getType()->isUnsigned()) && 00334 "Constants should be canonicalized to unsigned!"); 00335 00336 return true; 00337 } 00338 00339 /// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression 00340 /// and now we need to decide whether the user should use the preinc or post-inc 00341 /// value. If this user should use the post-inc version of the IV, return true. 00342 /// 00343 /// Choosing wrong here can break dominance properties (if we choose to use the 00344 /// post-inc value when we cannot) or it can end up adding extra live-ranges to 00345 /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we 00346 /// should use the post-inc value). 00347 static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, 00348 Loop *L, ETForest *EF, Pass *P) { 00349 // If the user is in the loop, use the preinc value. 00350 if (L->contains(User->getParent())) return false; 00351 00352 BasicBlock *LatchBlock = L->getLoopLatch(); 00353 00354 // Ok, the user is outside of the loop. If it is dominated by the latch 00355 // block, use the post-inc value. 00356 if (EF->dominates(LatchBlock, User->getParent())) 00357 return true; 00358 00359 // There is one case we have to be careful of: PHI nodes. These little guys 00360 // can live in blocks that do not dominate the latch block, but (since their 00361 // uses occur in the predecessor block, not the block the PHI lives in) should 00362 // still use the post-inc value. Check for this case now. 00363 PHINode *PN = dyn_cast<PHINode>(User); 00364 if (!PN) return false; // not a phi, not dominated by latch block. 00365 00366 // Look at all of the uses of IV by the PHI node. If any use corresponds to 00367 // a block that is not dominated by the latch block, give up and use the 00368 // preincremented value. 00369 unsigned NumUses = 0; 00370 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 00371 if (PN->getIncomingValue(i) == IV) { 00372 ++NumUses; 00373 if (!EF->dominates(LatchBlock, PN->getIncomingBlock(i))) 00374 return false; 00375 } 00376 00377 // Okay, all uses of IV by PN are in predecessor blocks that really are 00378 // dominated by the latch block. Split the critical edges and use the 00379 // post-incremented value. 00380 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 00381 if (PN->getIncomingValue(i) == IV) { 00382 SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P); 00383 if (--NumUses == 0) break; 00384 } 00385 00386 return true; 00387 } 00388 00389 00390 00391 /// AddUsersIfInteresting - Inspect the specified instruction. If it is a 00392 /// reducible SCEV, recursively add its users to the IVUsesByStride set and 00393 /// return true. Otherwise, return false. 00394 bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, 00395 std::set<Instruction*> &Processed) { 00396 if (!I->getType()->isInteger() && !isa<PointerType>(I->getType())) 00397 return false; // Void and FP expressions cannot be reduced. 00398 if (!Processed.insert(I).second) 00399 return true; // Instruction already handled. 00400 00401 // Get the symbolic expression for this instruction. 00402 SCEVHandle ISE = GetExpressionSCEV(I, L); 00403 if (isa<SCEVCouldNotCompute>(ISE)) return false; 00404 00405 // Get the start and stride for this expression. 00406 SCEVHandle Start = SCEVUnknown::getIntegerSCEV(0, ISE->getType()); 00407 SCEVHandle Stride = Start; 00408 if (!getSCEVStartAndStride(ISE, L, Start, Stride)) 00409 return false; // Non-reducible symbolic expression, bail out. 00410 00411 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){ 00412 Instruction *User = cast<Instruction>(*UI); 00413 00414 // Do not infinitely recurse on PHI nodes. 00415 if (isa<PHINode>(User) && Processed.count(User)) 00416 continue; 00417 00418 // If this is an instruction defined in a nested loop, or outside this loop, 00419 // don't recurse into it. 00420 bool AddUserToIVUsers = false; 00421 if (LI->getLoopFor(User->getParent()) != L) { 00422 DEBUG(std::cerr << "FOUND USER in other loop: " << *User 00423 << " OF SCEV: " << *ISE << "\n"); 00424 AddUserToIVUsers = true; 00425 } else if (!AddUsersIfInteresting(User, L, Processed)) { 00426 DEBUG(std::cerr << "FOUND USER: " << *User 00427 << " OF SCEV: " << *ISE << "\n"); 00428 AddUserToIVUsers = true; 00429 } 00430 00431 if (AddUserToIVUsers) { 00432 IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride]; 00433 if (StrideUses.Users.empty()) // First occurance of this stride? 00434 StrideOrder.push_back(Stride); 00435 00436 // Okay, we found a user that we cannot reduce. Analyze the instruction 00437 // and decide what to do with it. If we are a use inside of the loop, use 00438 // the value before incrementation, otherwise use it after incrementation. 00439 if (IVUseShouldUsePostIncValue(User, I, L, EF, this)) { 00440 // The value used will be incremented by the stride more than we are 00441 // expecting, so subtract this off. 00442 SCEVHandle NewStart = SCEV::getMinusSCEV(Start, Stride); 00443 StrideUses.addUser(NewStart, User, I); 00444 StrideUses.Users.back().isUseOfPostIncrementedValue = true; 00445 DEBUG(std::cerr << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); 00446 } else { 00447 StrideUses.addUser(Start, User, I); 00448 } 00449 } 00450 } 00451 return true; 00452 } 00453 00454 namespace { 00455 /// BasedUser - For a particular base value, keep information about how we've 00456 /// partitioned the expression so far. 00457 struct BasedUser { 00458 /// Base - The Base value for the PHI node that needs to be inserted for 00459 /// this use. As the use is processed, information gets moved from this 00460 /// field to the Imm field (below). BasedUser values are sorted by this 00461 /// field. 00462 SCEVHandle Base; 00463 00464 /// Inst - The instruction using the induction variable. 00465 Instruction *Inst; 00466 00467 /// OperandValToReplace - The operand value of Inst to replace with the 00468 /// EmittedBase. 00469 Value *OperandValToReplace; 00470 00471 /// Imm - The immediate value that should be added to the base immediately 00472 /// before Inst, because it will be folded into the imm field of the 00473 /// instruction. 00474 SCEVHandle Imm; 00475 00476 /// EmittedBase - The actual value* to use for the base value of this 00477 /// operation. This is null if we should just use zero so far. 00478 Value *EmittedBase; 00479 00480 // isUseOfPostIncrementedValue - True if this should use the 00481 // post-incremented version of this IV, not the preincremented version. 00482 // This can only be set in special cases, such as the terminating setcc 00483 // instruction for a loop and uses outside the loop that are dominated by 00484 // the loop. 00485 bool isUseOfPostIncrementedValue; 00486 00487 BasedUser(IVStrideUse &IVSU) 00488 : Base(IVSU.Offset), Inst(IVSU.User), 00489 OperandValToReplace(IVSU.OperandValToReplace), 00490 Imm(SCEVUnknown::getIntegerSCEV(0, Base->getType())), EmittedBase(0), 00491 isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {} 00492 00493 // Once we rewrite the code to insert the new IVs we want, update the 00494 // operands of Inst to use the new expression 'NewBase', with 'Imm' added 00495 // to it. 00496 void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, 00497 SCEVExpander &Rewriter, Loop *L, 00498 Pass *P); 00499 00500 Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 00501 SCEVExpander &Rewriter, 00502 Instruction *IP, Loop *L); 00503 void dump() const; 00504 }; 00505 } 00506 00507 void BasedUser::dump() const { 00508 std::cerr << " Base=" << *Base; 00509 std::cerr << " Imm=" << *Imm; 00510 if (EmittedBase) 00511 std::cerr << " EB=" << *EmittedBase; 00512 00513 std::cerr << " Inst: " << *Inst; 00514 } 00515 00516 Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 00517 SCEVExpander &Rewriter, 00518 Instruction *IP, Loop *L) { 00519 // Figure out where we *really* want to insert this code. In particular, if 00520 // the user is inside of a loop that is nested inside of L, we really don't 00521 // want to insert this expression before the user, we'd rather pull it out as 00522 // many loops as possible. 00523 LoopInfo &LI = Rewriter.getLoopInfo(); 00524 Instruction *BaseInsertPt = IP; 00525 00526 // Figure out the most-nested loop that IP is in. 00527 Loop *InsertLoop = LI.getLoopFor(IP->getParent()); 00528 00529 // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out 00530 // the preheader of the outer-most loop where NewBase is not loop invariant. 00531 while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { 00532 BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); 00533 InsertLoop = InsertLoop->getParentLoop(); 00534 } 00535 00536 // If there is no immediate value, skip the next part. 00537 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm)) 00538 if (SC->getValue()->isNullValue()) 00539 return Rewriter.expandCodeFor(NewBase, BaseInsertPt, 00540 OperandValToReplace->getType()); 00541 00542 Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt); 00543 00544 // Always emit the immediate (if non-zero) into the same block as the user. 00545 SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(Base), Imm); 00546 return Rewriter.expandCodeFor(NewValSCEV, IP, 00547 OperandValToReplace->getType()); 00548 } 00549 00550 00551 // Once we rewrite the code to insert the new IVs we want, update the 00552 // operands of Inst to use the new expression 'NewBase', with 'Imm' added 00553 // to it. 00554 void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, 00555 SCEVExpander &Rewriter, 00556 Loop *L, Pass *P) { 00557 if (!isa<PHINode>(Inst)) { 00558 Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, Inst, L); 00559 // Replace the use of the operand Value with the new Phi we just created. 00560 Inst->replaceUsesOfWith(OperandValToReplace, NewVal); 00561 DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst); 00562 return; 00563 } 00564 00565 // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm 00566 // expression into each operand block that uses it. Note that PHI nodes can 00567 // have multiple entries for the same predecessor. We use a map to make sure 00568 // that a PHI node only has a single Value* for each predecessor (which also 00569 // prevents us from inserting duplicate code in some blocks). 00570 std::map<BasicBlock*, Value*> InsertedCode; 00571 PHINode *PN = cast<PHINode>(Inst); 00572 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 00573 if (PN->getIncomingValue(i) == OperandValToReplace) { 00574 // If this is a critical edge, split the edge so that we do not insert the 00575 // code on all predecessor/successor paths. We do this unless this is the 00576 // canonical backedge for this loop, as this can make some inserted code 00577 // be in an illegal position. 00578 BasicBlock *PHIPred = PN->getIncomingBlock(i); 00579 if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && 00580 (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { 00581 00582 // First step, split the critical edge. 00583 SplitCriticalEdge(PHIPred, PN->getParent(), P); 00584 00585 // Next step: move the basic block. In particular, if the PHI node 00586 // is outside of the loop, and PredTI is in the loop, we want to 00587 // move the block to be immediately before the PHI block, not 00588 // immediately after PredTI. 00589 if (L->contains(PHIPred) && !L->contains(PN->getParent())) { 00590 BasicBlock *NewBB = PN->getIncomingBlock(i); 00591 NewBB->moveBefore(PN->getParent()); 00592 } 00593 } 00594 00595 Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; 00596 if (!Code) { 00597 // Insert the code into the end of the predecessor block. 00598 Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator(); 00599 Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L); 00600 } 00601 00602 // Replace the use of the operand Value with the new Phi we just created. 00603 PN->setIncomingValue(i, Code); 00604 Rewriter.clear(); 00605 } 00606 } 00607 DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst); 00608 } 00609 00610 00611 /// isTargetConstant - Return true if the following can be referenced by the 00612 /// immediate field of a target instruction. 00613 static bool isTargetConstant(const SCEVHandle &V, const TargetLowering *TLI) { 00614 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { 00615 int64_t V = SC->getValue()->getSExtValue(); 00616 if (TLI) 00617 return TLI->isLegalAddressImmediate(V); 00618 else 00619 // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. 00620 return (V > -(1 << 16) && V < (1 << 16)-1); 00621 } 00622 00623 if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) 00624 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue())) 00625 if (CE->getOpcode() == Instruction::Cast) { 00626 Constant *Op0 = CE->getOperand(0); 00627 if (isa<GlobalValue>(Op0) && 00628 TLI && 00629 TLI->isLegalAddressImmediate(cast<GlobalValue>(Op0))) 00630 return true; 00631 } 00632 return false; 00633 } 00634 00635 /// MoveLoopVariantsToImediateField - Move any subexpressions from Val that are 00636 /// loop varying to the Imm operand. 00637 static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm, 00638 Loop *L) { 00639 if (Val->isLoopInvariant(L)) return; // Nothing to do. 00640 00641 if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { 00642 std::vector<SCEVHandle> NewOps; 00643 NewOps.reserve(SAE->getNumOperands()); 00644 00645 for (unsigned i = 0; i != SAE->getNumOperands(); ++i) 00646 if (!SAE->getOperand(i)->isLoopInvariant(L)) { 00647 // If this is a loop-variant expression, it must stay in the immediate 00648 // field of the expression. 00649 Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i)); 00650 } else { 00651 NewOps.push_back(SAE->getOperand(i)); 00652 } 00653 00654 if (NewOps.empty()) 00655 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00656 else 00657 Val = SCEVAddExpr::get(NewOps); 00658 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { 00659 // Try to pull immediates out of the start value of nested addrec's. 00660 SCEVHandle Start = SARE->getStart(); 00661 MoveLoopVariantsToImediateField(Start, Imm, L); 00662 00663 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 00664 Ops[0] = Start; 00665 Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); 00666 } else { 00667 // Otherwise, all of Val is variant, move the whole thing over. 00668 Imm = SCEVAddExpr::get(Imm, Val); 00669 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00670 } 00671 } 00672 00673 00674 /// MoveImmediateValues - Look at Val, and pull out any additions of constants 00675 /// that can fit into the immediate field of instructions in the target. 00676 /// Accumulate these immediate values into the Imm value. 00677 static void MoveImmediateValues(const TargetLowering *TLI, 00678 SCEVHandle &Val, SCEVHandle &Imm, 00679 bool isAddress, Loop *L) { 00680 if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { 00681 std::vector<SCEVHandle> NewOps; 00682 NewOps.reserve(SAE->getNumOperands()); 00683 00684 for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { 00685 SCEVHandle NewOp = SAE->getOperand(i); 00686 MoveImmediateValues(TLI, NewOp, Imm, isAddress, L); 00687 00688 if (!NewOp->isLoopInvariant(L)) { 00689 // If this is a loop-variant expression, it must stay in the immediate 00690 // field of the expression. 00691 Imm = SCEVAddExpr::get(Imm, NewOp); 00692 } else { 00693 NewOps.push_back(NewOp); 00694 } 00695 } 00696 00697 if (NewOps.empty()) 00698 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00699 else 00700 Val = SCEVAddExpr::get(NewOps); 00701 return; 00702 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { 00703 // Try to pull immediates out of the start value of nested addrec's. 00704 SCEVHandle Start = SARE->getStart(); 00705 MoveImmediateValues(TLI, Start, Imm, isAddress, L); 00706 00707 if (Start != SARE->getStart()) { 00708 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 00709 Ops[0] = Start; 00710 Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); 00711 } 00712 return; 00713 } else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) { 00714 // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field. 00715 if (isAddress && isTargetConstant(SME->getOperand(0), TLI) && 00716 SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { 00717 00718 SCEVHandle SubImm = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00719 SCEVHandle NewOp = SME->getOperand(1); 00720 MoveImmediateValues(TLI, NewOp, SubImm, isAddress, L); 00721 00722 // If we extracted something out of the subexpressions, see if we can 00723 // simplify this! 00724 if (NewOp != SME->getOperand(1)) { 00725 // Scale SubImm up by "8". If the result is a target constant, we are 00726 // good. 00727 SubImm = SCEVMulExpr::get(SubImm, SME->getOperand(0)); 00728 if (isTargetConstant(SubImm, TLI)) { 00729 // Accumulate the immediate. 00730 Imm = SCEVAddExpr::get(Imm, SubImm); 00731 00732 // Update what is left of 'Val'. 00733 Val = SCEVMulExpr::get(SME->getOperand(0), NewOp); 00734 return; 00735 } 00736 } 00737 } 00738 } 00739 00740 // Loop-variant expressions must stay in the immediate field of the 00741 // expression. 00742 if ((isAddress && isTargetConstant(Val, TLI)) || 00743 !Val->isLoopInvariant(L)) { 00744 Imm = SCEVAddExpr::get(Imm, Val); 00745 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00746 return; 00747 } 00748 00749 // Otherwise, no immediates to move. 00750 } 00751 00752 00753 /// IncrementAddExprUses - Decompose the specified expression into its added 00754 /// subexpressions, and increment SubExpressionUseCounts for each of these 00755 /// decomposed parts. 00756 static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, 00757 SCEVHandle Expr) { 00758 if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { 00759 for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) 00760 SeparateSubExprs(SubExprs, AE->getOperand(j)); 00761 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) { 00762 SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Expr->getType()); 00763 if (SARE->getOperand(0) == Zero) { 00764 SubExprs.push_back(Expr); 00765 } else { 00766 // Compute the addrec with zero as its base. 00767 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 00768 Ops[0] = Zero; // Start with zero base. 00769 SubExprs.push_back(SCEVAddRecExpr::get(Ops, SARE->getLoop())); 00770 00771 00772 SeparateSubExprs(SubExprs, SARE->getOperand(0)); 00773 } 00774 } else if (!isa<SCEVConstant>(Expr) || 00775 !cast<SCEVConstant>(Expr)->getValue()->isNullValue()) { 00776 // Do not add zero. 00777 SubExprs.push_back(Expr); 00778 } 00779 } 00780 00781 00782 /// RemoveCommonExpressionsFromUseBases - Look through all of the uses in Bases, 00783 /// removing any common subexpressions from it. Anything truly common is 00784 /// removed, accumulated, and returned. This looks for things like (a+b+c) and 00785 /// (a+c+d) -> (a+c). The common expression is *removed* from the Bases. 00786 static SCEVHandle 00787 RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses) { 00788 unsigned NumUses = Uses.size(); 00789 00790 // Only one use? Use its base, regardless of what it is! 00791 SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Uses[0].Base->getType()); 00792 SCEVHandle Result = Zero; 00793 if (NumUses == 1) { 00794 std::swap(Result, Uses[0].Base); 00795 return Result; 00796 } 00797 00798 // To find common subexpressions, count how many of Uses use each expression. 00799 // If any subexpressions are used Uses.size() times, they are common. 00800 std::map<SCEVHandle, unsigned> SubExpressionUseCounts; 00801 00802 // UniqueSubExprs - Keep track of all of the subexpressions we see in the 00803 // order we see them. 00804 std::vector<SCEVHandle> UniqueSubExprs; 00805 00806 std::vector<SCEVHandle> SubExprs; 00807 for (unsigned i = 0; i != NumUses; ++i) { 00808 // If the base is zero (which is common), return zero now, there are no 00809 // CSEs we can find. 00810 if (Uses[i].Base == Zero) return Zero; 00811 00812 // Split the expression into subexprs. 00813 SeparateSubExprs(SubExprs, Uses[i].Base); 00814 // Add one to SubExpressionUseCounts for each subexpr present. 00815 for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) 00816 if (++SubExpressionUseCounts[SubExprs[j]] == 1) 00817 UniqueSubExprs.push_back(SubExprs[j]); 00818 SubExprs.clear(); 00819 } 00820 00821 // Now that we know how many times each is used, build Result. Iterate over 00822 // UniqueSubexprs so that we have a stable ordering. 00823 for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { 00824 std::map<SCEVHandle, unsigned>::iterator I = 00825 SubExpressionUseCounts.find(UniqueSubExprs[i]); 00826 assert(I != SubExpressionUseCounts.end() && "Entry not found?"); 00827 if (I->second == NumUses) { // Found CSE! 00828 Result = SCEVAddExpr::get(Result, I->first); 00829 } else { 00830 // Remove non-cse's from SubExpressionUseCounts. 00831 SubExpressionUseCounts.erase(I); 00832 } 00833 } 00834 00835 // If we found no CSE's, return now. 00836 if (Result == Zero) return Result; 00837 00838 // Otherwise, remove all of the CSE's we found from each of the base values. 00839 for (unsigned i = 0; i != NumUses; ++i) { 00840 // Split the expression into subexprs. 00841 SeparateSubExprs(SubExprs, Uses[i].Base); 00842 00843 // Remove any common subexpressions. 00844 for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) 00845 if (SubExpressionUseCounts.count(SubExprs[j])) { 00846 SubExprs.erase(SubExprs.begin()+j); 00847 --j; --e; 00848 } 00849 00850 // Finally, the non-shared expressions together. 00851 if (SubExprs.empty()) 00852 Uses[i].Base = Zero; 00853 else 00854 Uses[i].Base = SCEVAddExpr::get(SubExprs); 00855 SubExprs.clear(); 00856 } 00857 00858 return Result; 00859 } 00860 00861 /// isZero - returns true if the scalar evolution expression is zero. 00862 /// 00863 static bool isZero(SCEVHandle &V) { 00864 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) 00865 return SC->getValue()->getRawValue() == 0; 00866 return false; 00867 } 00868 00869 00870 /// CheckForIVReuse - Returns the multiple if the stride is the multiple 00871 /// of a previous stride and it is a legal value for the target addressing 00872 /// mode scale component. This allows the users of this stride to be rewritten 00873 /// as prev iv * factor. It returns 0 if no reuse is possible. 00874 unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, 00875 IVExpr &IV, const Type *Ty) { 00876 if (!TLI) return 0; 00877 00878 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { 00879 int64_t SInt = SC->getValue()->getSExtValue(); 00880 if (SInt == 1) return 0; 00881 00882 for (TargetLowering::legal_am_scale_iterator 00883 I = TLI->legal_am_scale_begin(), E = TLI->legal_am_scale_end(); 00884 I != E; ++I) { 00885 unsigned Scale = *I; 00886 if (unsigned(abs(SInt)) < Scale || (SInt % Scale) != 0) 00887 continue; 00888 std::map<SCEVHandle, IVsOfOneStride>::iterator SI = 00889 IVsByStride.find(SCEVUnknown::getIntegerSCEV(SInt/Scale, Type::UIntTy)); 00890 if (SI == IVsByStride.end()) 00891 continue; 00892 for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), 00893 IE = SI->second.IVs.end(); II != IE; ++II) 00894 // FIXME: Only handle base == 0 for now. 00895 // Only reuse previous IV if it would not require a type conversion. 00896 if (isZero(II->Base) && 00897 II->Base->getType()->isLosslesslyConvertibleTo(Ty)) { 00898 IV = *II; 00899 return Scale; 00900 } 00901 } 00902 } 00903 00904 return 0; 00905 } 00906 00907 00908 /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single 00909 /// stride of IV. All of the users may have different starting values, and this 00910 /// may not be the only stride (we know it is if isOnlyStride is true). 00911 void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, 00912 IVUsersOfOneStride &Uses, 00913 Loop *L, 00914 bool isOnlyStride) { 00915 // Transform our list of users and offsets to a bit more complex table. In 00916 // this new vector, each 'BasedUser' contains 'Base' the base of the 00917 // strided accessas well as the old information from Uses. We progressively 00918 // move information from the Base field to the Imm field, until we eventually 00919 // have the full access expression to rewrite the use. 00920 std::vector<BasedUser> UsersToProcess; 00921 UsersToProcess.reserve(Uses.Users.size()); 00922 for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) { 00923 UsersToProcess.push_back(Uses.Users[i]); 00924 00925 // Move any loop invariant operands from the offset field to the immediate 00926 // field of the use, so that we don't try to use something before it is 00927 // computed. 00928 MoveLoopVariantsToImediateField(UsersToProcess.back().Base, 00929 UsersToProcess.back().Imm, L); 00930 assert(UsersToProcess.back().Base->isLoopInvariant(L) && 00931 "Base value is not loop invariant!"); 00932 } 00933 00934 // We now have a whole bunch of uses of like-strided induction variables, but 00935 // they might all have different bases. We want to emit one PHI node for this 00936 // stride which we fold as many common expressions (between the IVs) into as 00937 // possible. Start by identifying the common expressions in the base values 00938 // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find 00939 // "A+B"), emit it to the preheader, then remove the expression from the 00940 // UsersToProcess base values. 00941 SCEVHandle CommonExprs = 00942 RemoveCommonExpressionsFromUseBases(UsersToProcess); 00943 00944 // Check if it is possible to reuse a IV with stride that is factor of this 00945 // stride. And the multiple is a number that can be encoded in the scale 00946 // field of the target addressing mode. 00947 PHINode *NewPHI = NULL; 00948 Value *IncV = NULL; 00949 IVExpr ReuseIV; 00950 unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV, 00951 CommonExprs->getType()); 00952 if (RewriteFactor != 0) { 00953 DEBUG(std::cerr << "BASED ON IV of STRIDE " << *ReuseIV.Stride 00954 << " and BASE " << *ReuseIV.Base << " :\n"); 00955 NewPHI = ReuseIV.PHI; 00956 IncV = ReuseIV.IncV; 00957 } 00958 00959 // Next, figure out what we can represent in the immediate fields of 00960 // instructions. If we can represent anything there, move it to the imm 00961 // fields of the BasedUsers. We do this so that it increases the commonality 00962 // of the remaining uses. 00963 for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { 00964 // If the user is not in the current loop, this means it is using the exit 00965 // value of the IV. Do not put anything in the base, make sure it's all in 00966 // the immediate field to allow as much factoring as possible. 00967 if (!L->contains(UsersToProcess[i].Inst->getParent())) { 00968 UsersToProcess[i].Imm = SCEVAddExpr::get(UsersToProcess[i].Imm, 00969 UsersToProcess[i].Base); 00970 UsersToProcess[i].Base = 00971 SCEVUnknown::getIntegerSCEV(0, UsersToProcess[i].Base->getType()); 00972 } else { 00973 00974 // Addressing modes can be folded into loads and stores. Be careful that 00975 // the store is through the expression, not of the expression though. 00976 bool isAddress = isa<LoadInst>(UsersToProcess[i].Inst); 00977 if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst)) 00978 if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace) 00979 isAddress = true; 00980 00981 MoveImmediateValues(TLI, UsersToProcess[i].Base, UsersToProcess[i].Imm, 00982 isAddress, L); 00983 } 00984 } 00985 00986 // Now that we know what we need to do, insert the PHI node itself. 00987 // 00988 DEBUG(std::cerr << "INSERTING IV of STRIDE " << *Stride << " and BASE " 00989 << *CommonExprs << " :\n"); 00990 00991 SCEVExpander Rewriter(*SE, *LI); 00992 SCEVExpander PreheaderRewriter(*SE, *LI); 00993 00994 BasicBlock *Preheader = L->getLoopPreheader(); 00995 Instruction *PreInsertPt = Preheader->getTerminator(); 00996 Instruction *PhiInsertBefore = L->getHeader()->begin(); 00997 00998 BasicBlock *LatchBlock = L->getLoopLatch(); 00999 01000 const Type *ReplacedTy = CommonExprs->getType(); 01001 01002 // Emit the initial base value into the loop preheader. 01003 Value *CommonBaseV 01004 = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt, 01005 ReplacedTy); 01006 01007 if (RewriteFactor == 0) { 01008 // Create a new Phi for this base, and stick it in the loop header. 01009 NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore); 01010 ++NumInserted; 01011 01012 // Add common base to the new Phi node. 01013 NewPHI->addIncoming(CommonBaseV, Preheader); 01014 01015 // Insert the stride into the preheader. 01016 Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt, 01017 ReplacedTy); 01018 if (!isa<ConstantInt>(StrideV)) ++NumVariable; 01019 01020 // Emit the increment of the base value before the terminator of the loop 01021 // latch block, and add it to the Phi node. 01022 SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI), 01023 SCEVUnknown::get(StrideV)); 01024 01025 IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(), 01026 ReplacedTy); 01027 IncV->setName(NewPHI->getName()+".inc"); 01028 NewPHI->addIncoming(IncV, LatchBlock); 01029 01030 // Remember this in case a later stride is multiple of this. 01031 IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV); 01032 } else { 01033 Constant *C = dyn_cast<Constant>(CommonBaseV); 01034 if (!C || 01035 (!C->isNullValue() && 01036 !isTargetConstant(SCEVUnknown::get(CommonBaseV), TLI))) 01037 // We want the common base emitted into the preheader! 01038 CommonBaseV = new CastInst(CommonBaseV, CommonBaseV->getType(), 01039 "commonbase", PreInsertPt); 01040 } 01041 01042 // Sort by the base value, so that all IVs with identical bases are next to 01043 // each other. 01044 while (!UsersToProcess.empty()) { 01045 SCEVHandle Base = UsersToProcess.back().Base; 01046 01047 DEBUG(std::cerr << " INSERTING code for BASE = " << *Base << ":\n"); 01048 01049 // Emit the code for Base into the preheader. 01050 Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt, 01051 ReplacedTy); 01052 01053 // If BaseV is a constant other than 0, make sure that it gets inserted into 01054 // the preheader, instead of being forward substituted into the uses. We do 01055 // this by forcing a noop cast to be inserted into the preheader in this 01056 // case. 01057 if (Constant *C = dyn_cast<Constant>(BaseV)) 01058 if (!C->isNullValue() && !isTargetConstant(Base, TLI)) { 01059 // We want this constant emitted into the preheader! 01060 BaseV = new CastInst(BaseV, BaseV->getType(), "preheaderinsert", 01061 PreInsertPt); 01062 } 01063 01064 // Emit the code to add the immediate offset to the Phi value, just before 01065 // the instructions that we identified as using this stride and base. 01066 unsigned ScanPos = 0; 01067 do { 01068 BasedUser &User = UsersToProcess.back(); 01069 01070 // If this instruction wants to use the post-incremented value, move it 01071 // after the post-inc and use its value instead of the PHI. 01072 Value *RewriteOp = NewPHI; 01073 if (User.isUseOfPostIncrementedValue) { 01074 RewriteOp = IncV; 01075 01076 // If this user is in the loop, make sure it is the last thing in the 01077 // loop to ensure it is dominated by the increment. 01078 if (L->contains(User.Inst->getParent())) 01079 User.Inst->moveBefore(LatchBlock->getTerminator()); 01080 } 01081 if (RewriteOp->getType() != ReplacedTy) 01082 RewriteOp = SCEVExpander::InsertCastOfTo(RewriteOp, ReplacedTy); 01083 01084 SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp); 01085 01086 // Clear the SCEVExpander's expression map so that we are guaranteed 01087 // to have the code emitted where we expect it. 01088 Rewriter.clear(); 01089 01090 // If we are reusing the iv, then it must be multiplied by a constant 01091 // factor take advantage of addressing mode scale component. 01092 if (RewriteFactor != 0) { 01093 RewriteExpr = 01094 SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor, 01095 RewriteExpr->getType()), 01096 RewriteExpr); 01097 01098 // The common base is emitted in the loop preheader. But since we 01099 // are reusing an IV, it has not been used to initialize the PHI node. 01100 // Add it to the expression used to rewrite the uses. 01101 if (!isa<ConstantInt>(CommonBaseV) || 01102 !cast<ConstantInt>(CommonBaseV)->isNullValue()) 01103 RewriteExpr = SCEVAddExpr::get(RewriteExpr, 01104 SCEVUnknown::get(CommonBaseV)); 01105 } 01106 01107 // Now that we know what we need to do, insert code before User for the 01108 // immediate and any loop-variant expressions. 01109 if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isNullValue()) 01110 // Add BaseV to the PHI value if needed. 01111 RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV)); 01112 01113 User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this); 01114 01115 // Mark old value we replaced as possibly dead, so that it is elminated 01116 // if we just replaced the last use of that value. 01117 DeadInsts.insert(cast<Instruction>(User.OperandValToReplace)); 01118 01119 UsersToProcess.pop_back(); 01120 ++NumReduced; 01121 01122 // If there are any more users to process with the same base, move one of 01123 // them to the end of the list so that we will process it. 01124 if (!UsersToProcess.empty()) { 01125 for (unsigned e = UsersToProcess.size(); ScanPos != e; ++ScanPos) 01126 if (UsersToProcess[ScanPos].Base == Base) { 01127 std::swap(UsersToProcess[ScanPos], UsersToProcess.back()); 01128 break; 01129 } 01130 } 01131 } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base); 01132 // TODO: Next, find out which base index is the most common, pull it out. 01133 } 01134 01135 // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but 01136 // different starting values, into different PHIs. 01137 } 01138 01139 // OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar 01140 // uses in the loop, look to see if we can eliminate some, in favor of using 01141 // common indvars for the different uses. 01142 void LoopStrengthReduce::OptimizeIndvars(Loop *L) { 01143 // TODO: implement optzns here. 01144 01145 01146 01147 01148 // Finally, get the terminating condition for the loop if possible. If we 01149 // can, we want to change it to use a post-incremented version of its 01150 // induction variable, to allow coalescing the live ranges for the IV into 01151 // one register value. 01152 PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin()); 01153 BasicBlock *Preheader = L->getLoopPreheader(); 01154 BasicBlock *LatchBlock = 01155 SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader); 01156 BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator()); 01157 if (!TermBr || TermBr->isUnconditional() || 01158 !isa<SetCondInst>(TermBr->getCondition())) 01159 return; 01160 SetCondInst *Cond = cast<SetCondInst>(TermBr->getCondition()); 01161 01162 // Search IVUsesByStride to find Cond's IVUse if there is one. 01163 IVStrideUse *CondUse = 0; 01164 const SCEVHandle *CondStride = 0; 01165 01166 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse; 01167 ++Stride) { 01168 std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 01169 IVUsesByStride.find(StrideOrder[Stride]); 01170 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); 01171 01172 for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(), 01173 E = SI->second.Users.end(); UI != E; ++UI) 01174 if (UI->User == Cond) { 01175 CondUse = &*UI; 01176 CondStride = &SI->first; 01177 // NOTE: we could handle setcc instructions with multiple uses here, but 01178 // InstCombine does it as well for simple uses, it's not clear that it 01179 // occurs enough in real life to handle. 01180 break; 01181 } 01182 } 01183 if (!CondUse) return; // setcc doesn't use the IV. 01184 01185 // setcc stride is complex, don't mess with users. 01186 // FIXME: Evaluate whether this is a good idea or not. 01187 if (!isa<SCEVConstant>(*CondStride)) return; 01188 01189 // It's possible for the setcc instruction to be anywhere in the loop, and 01190 // possible for it to have multiple users. If it is not immediately before 01191 // the latch block branch, move it. 01192 if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { 01193 if (Cond->hasOneUse()) { // Condition has a single use, just move it. 01194 Cond->moveBefore(TermBr); 01195 } else { 01196 // Otherwise, clone the terminating condition and insert into the loopend. 01197 Cond = cast<SetCondInst>(Cond->clone()); 01198 Cond->setName(L->getHeader()->getName() + ".termcond"); 01199 LatchBlock->getInstList().insert(TermBr, Cond); 01200 01201 // Clone the IVUse, as the old use still exists! 01202 IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond, 01203 CondUse->OperandValToReplace); 01204 CondUse = &IVUsesByStride[*CondStride].Users.back(); 01205 } 01206 } 01207 01208 // If we get to here, we know that we can transform the setcc instruction to 01209 // use the post-incremented version of the IV, allowing us to coalesce the 01210 // live ranges for the IV correctly. 01211 CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride); 01212 CondUse->isUseOfPostIncrementedValue = true; 01213 } 01214 01215 namespace { 01216 // Constant strides come first which in turns are sorted by their absolute 01217 // values. If absolute values are the same, then positive strides comes first. 01218 // e.g. 01219 // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X 01220 struct StrideCompare { 01221 bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) { 01222 SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); 01223 SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); 01224 if (LHSC && RHSC) { 01225 int64_t LV = LHSC->getValue()->getSExtValue(); 01226 int64_t RV = RHSC->getValue()->getSExtValue(); 01227 uint64_t ALV = (LV < 0) ? -LV : LV; 01228 uint64_t ARV = (RV < 0) ? -RV : RV; 01229 if (ALV == ARV) 01230 return LV > RV; 01231 else 01232 return ALV < ARV; 01233 } 01234 return (LHSC && !RHSC); 01235 } 01236 }; 01237 } 01238 01239 void LoopStrengthReduce::runOnLoop(Loop *L) { 01240 // First step, transform all loops nesting inside of this loop. 01241 for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I) 01242 runOnLoop(*I); 01243 01244 // Next, find all uses of induction variables in this loop, and catagorize 01245 // them by stride. Start by finding all of the PHI nodes in the header for 01246 // this loop. If they are induction variables, inspect their uses. 01247 std::set<Instruction*> Processed; // Don't reprocess instructions. 01248 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) 01249 AddUsersIfInteresting(I, L, Processed); 01250 01251 // If we have nothing to do, return. 01252 if (IVUsesByStride.empty()) return; 01253 01254 // Optimize induction variables. Some indvar uses can be transformed to use 01255 // strides that will be needed for other purposes. A common example of this 01256 // is the exit test for the loop, which can often be rewritten to use the 01257 // computation of some other indvar to decide when to terminate the loop. 01258 OptimizeIndvars(L); 01259 01260 01261 // FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of 01262 // doing computation in byte values, promote to 32-bit values if safe. 01263 01264 // FIXME: Attempt to reuse values across multiple IV's. In particular, we 01265 // could have something like "for(i) { foo(i*8); bar(i*16) }", which should be 01266 // codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need 01267 // to be careful that IV's are all the same type. Only works for intptr_t 01268 // indvars. 01269 01270 // If we only have one stride, we can more aggressively eliminate some things. 01271 bool HasOneStride = IVUsesByStride.size() == 1; 01272 01273 #ifndef NDEBUG 01274 DEBUG(std::cerr << "\nLSR on "); 01275 DEBUG(L->dump()); 01276 #endif 01277 01278 // IVsByStride keeps IVs for one particular loop. 01279 IVsByStride.clear(); 01280 01281 // Sort the StrideOrder so we process larger strides first. 01282 std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare()); 01283 01284 // Note: this processes each stride/type pair individually. All users passed 01285 // into StrengthReduceStridedIVUsers have the same type AND stride. Also, 01286 // node that we iterate over IVUsesByStride indirectly by using StrideOrder. 01287 // This extra layer of indirection makes the ordering of strides deterministic 01288 // - not dependent on map order. 01289 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { 01290 std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 01291 IVUsesByStride.find(StrideOrder[Stride]); 01292 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); 01293 StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride); 01294 } 01295 01296 // Clean up after ourselves 01297 if (!DeadInsts.empty()) { 01298 DeleteTriviallyDeadInstructions(DeadInsts); 01299 01300 BasicBlock::iterator I = L->getHeader()->begin(); 01301 PHINode *PN; 01302 while ((PN = dyn_cast<PHINode>(I))) { 01303 ++I; // Preincrement iterator to avoid invalidating it when deleting PN. 01304 01305 // At this point, we know that we have killed one or more GEP 01306 // instructions. It is worth checking to see if the cann indvar is also 01307 // dead, so that we can remove it as well. The requirements for the cann 01308 // indvar to be considered dead are: 01309 // 1. the cann indvar has one use 01310 // 2. the use is an add instruction 01311 // 3. the add has one use 01312 // 4. the add is used by the cann indvar 01313 // If all four cases above are true, then we can remove both the add and 01314 // the cann indvar. 01315 // FIXME: this needs to eliminate an induction variable even if it's being 01316 // compared against some value to decide loop termination. 01317 if (PN->hasOneUse()) { 01318 BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin())); 01319 if (BO && BO->hasOneUse()) { 01320 if (PN == *(BO->use_begin())) { 01321 DeadInsts.insert(BO); 01322 // Break the cycle, then delete the PHI. 01323 PN->replaceAllUsesWith(UndefValue::get(PN->getType())); 01324 SE->deleteInstructionFromRecords(PN); 01325 PN->eraseFromParent(); 01326 } 01327 } 01328 } 01329 } 01330 DeleteTriviallyDeadInstructions(DeadInsts); 01331 } 01332 01333 CastedPointers.clear(); 01334 IVUsesByStride.clear(); 01335 StrideOrder.clear(); 01336 return; 01337 }