LLVM API Documentation
00001 //===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Nate Begeman and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass performs a strength reduction on array references inside loops that 00011 // have as one or more of their components the loop induction variable. This is 00012 // accomplished by creating a new Value to hold the initial value of the array 00013 // access for the first iteration, and then creating a new GEP instruction in 00014 // the loop to increment the value by the appropriate amount. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 00018 #define DEBUG_TYPE "loop-reduce" 00019 #include "llvm/Transforms/Scalar.h" 00020 #include "llvm/Constants.h" 00021 #include "llvm/Instructions.h" 00022 #include "llvm/Type.h" 00023 #include "llvm/DerivedTypes.h" 00024 #include "llvm/Analysis/Dominators.h" 00025 #include "llvm/Analysis/LoopInfo.h" 00026 #include "llvm/Analysis/ScalarEvolutionExpander.h" 00027 #include "llvm/Support/CFG.h" 00028 #include "llvm/Support/GetElementPtrTypeIterator.h" 00029 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00030 #include "llvm/Transforms/Utils/Local.h" 00031 #include "llvm/Target/TargetData.h" 00032 #include "llvm/ADT/Statistic.h" 00033 #include "llvm/Support/Debug.h" 00034 #include "llvm/Target/TargetLowering.h" 00035 #include <algorithm> 00036 #include <iostream> 00037 #include <set> 00038 using namespace llvm; 00039 00040 namespace { 00041 Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced"); 00042 Statistic<> NumInserted("loop-reduce", "Number of PHIs inserted"); 00043 Statistic<> NumVariable("loop-reduce","Number of PHIs with variable strides"); 00044 00045 /// IVStrideUse - Keep track of one use of a strided induction variable, where 00046 /// the stride is stored externally. The Offset member keeps track of the 00047 /// offset from the IV, User is the actual user of the operand, and 'Operand' 00048 /// is the operand # of the User that is the use. 00049 struct IVStrideUse { 00050 SCEVHandle Offset; 00051 Instruction *User; 00052 Value *OperandValToReplace; 00053 00054 // isUseOfPostIncrementedValue - True if this should use the 00055 // post-incremented version of this IV, not the preincremented version. 00056 // This can only be set in special cases, such as the terminating setcc 00057 // instruction for a loop or uses dominated by the loop. 00058 bool isUseOfPostIncrementedValue; 00059 00060 IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O) 00061 : Offset(Offs), User(U), OperandValToReplace(O), 00062 isUseOfPostIncrementedValue(false) {} 00063 }; 00064 00065 /// IVUsersOfOneStride - This structure keeps track of all instructions that 00066 /// have an operand that is based on the trip count multiplied by some stride. 00067 /// The stride for all of these users is common and kept external to this 00068 /// structure. 00069 struct IVUsersOfOneStride { 00070 /// Users - Keep track of all of the users of this stride as well as the 00071 /// initial value and the operand that uses the IV. 00072 std::vector<IVStrideUse> Users; 00073 00074 void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) { 00075 Users.push_back(IVStrideUse(Offset, User, Operand)); 00076 } 00077 }; 00078 00079 /// IVInfo - This structure keeps track of one IV expression inserted during 00080 /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as 00081 /// well as the PHI node and increment value created for rewrite. 00082 struct IVExpr { 00083 SCEVHandle Stride; 00084 SCEVHandle Base; 00085 PHINode *PHI; 00086 Value *IncV; 00087 00088 IVExpr() 00089 : Stride(SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)), 00090 Base (SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)) {} 00091 IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi, 00092 Value *incv) 00093 : Stride(stride), Base(base), PHI(phi), IncV(incv) {} 00094 }; 00095 00096 /// IVsOfOneStride - This structure keeps track of all IV expression inserted 00097 /// during StrengthReduceStridedIVUsers for a particular stride of the IV. 00098 struct IVsOfOneStride { 00099 std::vector<IVExpr> IVs; 00100 00101 void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI, 00102 Value *IncV) { 00103 IVs.push_back(IVExpr(Stride, Base, PHI, IncV)); 00104 } 00105 }; 00106 00107 class LoopStrengthReduce : public FunctionPass { 00108 LoopInfo *LI; 00109 ETForest *EF; 00110 ScalarEvolution *SE; 00111 const TargetData *TD; 00112 const Type *UIntPtrTy; 00113 bool Changed; 00114 00115 /// IVUsesByStride - Keep track of all uses of induction variables that we 00116 /// are interested in. The key of the map is the stride of the access. 00117 std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride; 00118 00119 /// IVsByStride - Keep track of all IVs that have been inserted for a 00120 /// particular stride. 00121 std::map<SCEVHandle, IVsOfOneStride> IVsByStride; 00122 00123 /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: 00124 /// We use this to iterate over the IVUsesByStride collection without being 00125 /// dependent on random ordering of pointers in the process. 00126 std::vector<SCEVHandle> StrideOrder; 00127 00128 /// CastedValues - As we need to cast values to uintptr_t, this keeps track 00129 /// of the casted version of each value. This is accessed by 00130 /// getCastedVersionOf. 00131 std::map<Value*, Value*> CastedPointers; 00132 00133 /// DeadInsts - Keep track of instructions we may have made dead, so that 00134 /// we can remove them after we are done working. 00135 std::set<Instruction*> DeadInsts; 00136 00137 /// TLI - Keep a pointer of a TargetLowering to consult for determining 00138 /// transformation profitability. 00139 const TargetLowering *TLI; 00140 00141 public: 00142 LoopStrengthReduce(const TargetLowering *tli = NULL) 00143 : TLI(tli) { 00144 } 00145 00146 virtual bool runOnFunction(Function &) { 00147 LI = &getAnalysis<LoopInfo>(); 00148 EF = &getAnalysis<ETForest>(); 00149 SE = &getAnalysis<ScalarEvolution>(); 00150 TD = &getAnalysis<TargetData>(); 00151 UIntPtrTy = TD->getIntPtrType(); 00152 Changed = false; 00153 00154 for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) 00155 runOnLoop(*I); 00156 00157 return Changed; 00158 } 00159 00160 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00161 // We split critical edges, so we change the CFG. However, we do update 00162 // many analyses if they are around. 00163 AU.addPreservedID(LoopSimplifyID); 00164 AU.addPreserved<LoopInfo>(); 00165 AU.addPreserved<DominatorSet>(); 00166 AU.addPreserved<ETForest>(); 00167 AU.addPreserved<ImmediateDominators>(); 00168 AU.addPreserved<DominanceFrontier>(); 00169 AU.addPreserved<DominatorTree>(); 00170 00171 AU.addRequiredID(LoopSimplifyID); 00172 AU.addRequired<LoopInfo>(); 00173 AU.addRequired<ETForest>(); 00174 AU.addRequired<TargetData>(); 00175 AU.addRequired<ScalarEvolution>(); 00176 } 00177 00178 /// getCastedVersionOf - Return the specified value casted to uintptr_t. 00179 /// 00180 Value *getCastedVersionOf(Value *V); 00181 private: 00182 void runOnLoop(Loop *L); 00183 bool AddUsersIfInteresting(Instruction *I, Loop *L, 00184 std::set<Instruction*> &Processed); 00185 SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L); 00186 00187 void OptimizeIndvars(Loop *L); 00188 00189 unsigned CheckForIVReuse(const SCEVHandle &Stride, IVExpr &IV); 00190 00191 void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, 00192 IVUsersOfOneStride &Uses, 00193 Loop *L, bool isOnlyStride); 00194 void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts); 00195 }; 00196 RegisterOpt<LoopStrengthReduce> X("loop-reduce", 00197 "Loop Strength Reduction"); 00198 } 00199 00200 FunctionPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { 00201 return new LoopStrengthReduce(TLI); 00202 } 00203 00204 /// getCastedVersionOf - Return the specified value casted to uintptr_t. 00205 /// 00206 Value *LoopStrengthReduce::getCastedVersionOf(Value *V) { 00207 if (V->getType() == UIntPtrTy) return V; 00208 if (Constant *CB = dyn_cast<Constant>(V)) 00209 return ConstantExpr::getCast(CB, UIntPtrTy); 00210 00211 Value *&New = CastedPointers[V]; 00212 if (New) return New; 00213 00214 New = SCEVExpander::InsertCastOfTo(V, UIntPtrTy); 00215 DeadInsts.insert(cast<Instruction>(New)); 00216 return New; 00217 } 00218 00219 00220 /// DeleteTriviallyDeadInstructions - If any of the instructions is the 00221 /// specified set are trivially dead, delete them and see if this makes any of 00222 /// their operands subsequently dead. 00223 void LoopStrengthReduce:: 00224 DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) { 00225 while (!Insts.empty()) { 00226 Instruction *I = *Insts.begin(); 00227 Insts.erase(Insts.begin()); 00228 if (isInstructionTriviallyDead(I)) { 00229 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 00230 if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i))) 00231 Insts.insert(U); 00232 SE->deleteInstructionFromRecords(I); 00233 I->eraseFromParent(); 00234 Changed = true; 00235 } 00236 } 00237 } 00238 00239 00240 /// GetExpressionSCEV - Compute and return the SCEV for the specified 00241 /// instruction. 00242 SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) { 00243 // Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions. 00244 // If this is a GEP that SE doesn't know about, compute it now and insert it. 00245 // If this is not a GEP, or if we have already done this computation, just let 00246 // SE figure it out. 00247 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Exp); 00248 if (!GEP || SE->hasSCEV(GEP)) 00249 return SE->getSCEV(Exp); 00250 00251 // Analyze all of the subscripts of this getelementptr instruction, looking 00252 // for uses that are determined by the trip count of L. First, skip all 00253 // operands the are not dependent on the IV. 00254 00255 // Build up the base expression. Insert an LLVM cast of the pointer to 00256 // uintptr_t first. 00257 SCEVHandle GEPVal = SCEVUnknown::get(getCastedVersionOf(GEP->getOperand(0))); 00258 00259 gep_type_iterator GTI = gep_type_begin(GEP); 00260 00261 for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 00262 // If this is a use of a recurrence that we can analyze, and it comes before 00263 // Op does in the GEP operand list, we will handle this when we process this 00264 // operand. 00265 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 00266 const StructLayout *SL = TD->getStructLayout(STy); 00267 unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue(); 00268 uint64_t Offset = SL->MemberOffsets[Idx]; 00269 GEPVal = SCEVAddExpr::get(GEPVal, 00270 SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy)); 00271 } else { 00272 Value *OpVal = getCastedVersionOf(GEP->getOperand(i)); 00273 SCEVHandle Idx = SE->getSCEV(OpVal); 00274 00275 uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType()); 00276 if (TypeSize != 1) 00277 Idx = SCEVMulExpr::get(Idx, 00278 SCEVConstant::get(ConstantUInt::get(UIntPtrTy, 00279 TypeSize))); 00280 GEPVal = SCEVAddExpr::get(GEPVal, Idx); 00281 } 00282 } 00283 00284 SE->setSCEV(GEP, GEPVal); 00285 return GEPVal; 00286 } 00287 00288 /// getSCEVStartAndStride - Compute the start and stride of this expression, 00289 /// returning false if the expression is not a start/stride pair, or true if it 00290 /// is. The stride must be a loop invariant expression, but the start may be 00291 /// a mix of loop invariant and loop variant expressions. 00292 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, 00293 SCEVHandle &Start, SCEVHandle &Stride) { 00294 SCEVHandle TheAddRec = Start; // Initialize to zero. 00295 00296 // If the outer level is an AddExpr, the operands are all start values except 00297 // for a nested AddRecExpr. 00298 if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { 00299 for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) 00300 if (SCEVAddRecExpr *AddRec = 00301 dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) { 00302 if (AddRec->getLoop() == L) 00303 TheAddRec = SCEVAddExpr::get(AddRec, TheAddRec); 00304 else 00305 return false; // Nested IV of some sort? 00306 } else { 00307 Start = SCEVAddExpr::get(Start, AE->getOperand(i)); 00308 } 00309 00310 } else if (SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SH)) { 00311 TheAddRec = SH; 00312 } else { 00313 return false; // not analyzable. 00314 } 00315 00316 SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec); 00317 if (!AddRec || AddRec->getLoop() != L) return false; 00318 00319 // FIXME: Generalize to non-affine IV's. 00320 if (!AddRec->isAffine()) return false; 00321 00322 Start = SCEVAddExpr::get(Start, AddRec->getOperand(0)); 00323 00324 if (!isa<SCEVConstant>(AddRec->getOperand(1))) 00325 DEBUG(std::cerr << "[" << L->getHeader()->getName() 00326 << "] Variable stride: " << *AddRec << "\n"); 00327 00328 Stride = AddRec->getOperand(1); 00329 // Check that all constant strides are the unsigned type, we don't want to 00330 // have two IV's one of signed stride 4 and one of unsigned stride 4 to not be 00331 // merged. 00332 assert((!isa<SCEVConstant>(Stride) || Stride->getType()->isUnsigned()) && 00333 "Constants should be canonicalized to unsigned!"); 00334 00335 return true; 00336 } 00337 00338 /// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression 00339 /// and now we need to decide whether the user should use the preinc or post-inc 00340 /// value. If this user should use the post-inc version of the IV, return true. 00341 /// 00342 /// Choosing wrong here can break dominance properties (if we choose to use the 00343 /// post-inc value when we cannot) or it can end up adding extra live-ranges to 00344 /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we 00345 /// should use the post-inc value). 00346 static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, 00347 Loop *L, ETForest *EF, Pass *P) { 00348 // If the user is in the loop, use the preinc value. 00349 if (L->contains(User->getParent())) return false; 00350 00351 BasicBlock *LatchBlock = L->getLoopLatch(); 00352 00353 // Ok, the user is outside of the loop. If it is dominated by the latch 00354 // block, use the post-inc value. 00355 if (EF->dominates(LatchBlock, User->getParent())) 00356 return true; 00357 00358 // There is one case we have to be careful of: PHI nodes. These little guys 00359 // can live in blocks that do not dominate the latch block, but (since their 00360 // uses occur in the predecessor block, not the block the PHI lives in) should 00361 // still use the post-inc value. Check for this case now. 00362 PHINode *PN = dyn_cast<PHINode>(User); 00363 if (!PN) return false; // not a phi, not dominated by latch block. 00364 00365 // Look at all of the uses of IV by the PHI node. If any use corresponds to 00366 // a block that is not dominated by the latch block, give up and use the 00367 // preincremented value. 00368 unsigned NumUses = 0; 00369 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 00370 if (PN->getIncomingValue(i) == IV) { 00371 ++NumUses; 00372 if (!EF->dominates(LatchBlock, PN->getIncomingBlock(i))) 00373 return false; 00374 } 00375 00376 // Okay, all uses of IV by PN are in predecessor blocks that really are 00377 // dominated by the latch block. Split the critical edges and use the 00378 // post-incremented value. 00379 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 00380 if (PN->getIncomingValue(i) == IV) { 00381 SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P); 00382 if (--NumUses == 0) break; 00383 } 00384 00385 return true; 00386 } 00387 00388 00389 00390 /// AddUsersIfInteresting - Inspect the specified instruction. If it is a 00391 /// reducible SCEV, recursively add its users to the IVUsesByStride set and 00392 /// return true. Otherwise, return false. 00393 bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L, 00394 std::set<Instruction*> &Processed) { 00395 if (!I->getType()->isInteger() && !isa<PointerType>(I->getType())) 00396 return false; // Void and FP expressions cannot be reduced. 00397 if (!Processed.insert(I).second) 00398 return true; // Instruction already handled. 00399 00400 // Get the symbolic expression for this instruction. 00401 SCEVHandle ISE = GetExpressionSCEV(I, L); 00402 if (isa<SCEVCouldNotCompute>(ISE)) return false; 00403 00404 // Get the start and stride for this expression. 00405 SCEVHandle Start = SCEVUnknown::getIntegerSCEV(0, ISE->getType()); 00406 SCEVHandle Stride = Start; 00407 if (!getSCEVStartAndStride(ISE, L, Start, Stride)) 00408 return false; // Non-reducible symbolic expression, bail out. 00409 00410 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){ 00411 Instruction *User = cast<Instruction>(*UI); 00412 00413 // Do not infinitely recurse on PHI nodes. 00414 if (isa<PHINode>(User) && Processed.count(User)) 00415 continue; 00416 00417 // If this is an instruction defined in a nested loop, or outside this loop, 00418 // don't recurse into it. 00419 bool AddUserToIVUsers = false; 00420 if (LI->getLoopFor(User->getParent()) != L) { 00421 DEBUG(std::cerr << "FOUND USER in other loop: " << *User 00422 << " OF SCEV: " << *ISE << "\n"); 00423 AddUserToIVUsers = true; 00424 } else if (!AddUsersIfInteresting(User, L, Processed)) { 00425 DEBUG(std::cerr << "FOUND USER: " << *User 00426 << " OF SCEV: " << *ISE << "\n"); 00427 AddUserToIVUsers = true; 00428 } 00429 00430 if (AddUserToIVUsers) { 00431 IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride]; 00432 if (StrideUses.Users.empty()) // First occurance of this stride? 00433 StrideOrder.push_back(Stride); 00434 00435 // Okay, we found a user that we cannot reduce. Analyze the instruction 00436 // and decide what to do with it. If we are a use inside of the loop, use 00437 // the value before incrementation, otherwise use it after incrementation. 00438 if (IVUseShouldUsePostIncValue(User, I, L, EF, this)) { 00439 // The value used will be incremented by the stride more than we are 00440 // expecting, so subtract this off. 00441 SCEVHandle NewStart = SCEV::getMinusSCEV(Start, Stride); 00442 StrideUses.addUser(NewStart, User, I); 00443 StrideUses.Users.back().isUseOfPostIncrementedValue = true; 00444 DEBUG(std::cerr << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); 00445 } else { 00446 StrideUses.addUser(Start, User, I); 00447 } 00448 } 00449 } 00450 return true; 00451 } 00452 00453 namespace { 00454 /// BasedUser - For a particular base value, keep information about how we've 00455 /// partitioned the expression so far. 00456 struct BasedUser { 00457 /// Base - The Base value for the PHI node that needs to be inserted for 00458 /// this use. As the use is processed, information gets moved from this 00459 /// field to the Imm field (below). BasedUser values are sorted by this 00460 /// field. 00461 SCEVHandle Base; 00462 00463 /// Inst - The instruction using the induction variable. 00464 Instruction *Inst; 00465 00466 /// OperandValToReplace - The operand value of Inst to replace with the 00467 /// EmittedBase. 00468 Value *OperandValToReplace; 00469 00470 /// Imm - The immediate value that should be added to the base immediately 00471 /// before Inst, because it will be folded into the imm field of the 00472 /// instruction. 00473 SCEVHandle Imm; 00474 00475 /// EmittedBase - The actual value* to use for the base value of this 00476 /// operation. This is null if we should just use zero so far. 00477 Value *EmittedBase; 00478 00479 // isUseOfPostIncrementedValue - True if this should use the 00480 // post-incremented version of this IV, not the preincremented version. 00481 // This can only be set in special cases, such as the terminating setcc 00482 // instruction for a loop and uses outside the loop that are dominated by 00483 // the loop. 00484 bool isUseOfPostIncrementedValue; 00485 00486 BasedUser(IVStrideUse &IVSU) 00487 : Base(IVSU.Offset), Inst(IVSU.User), 00488 OperandValToReplace(IVSU.OperandValToReplace), 00489 Imm(SCEVUnknown::getIntegerSCEV(0, Base->getType())), EmittedBase(0), 00490 isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {} 00491 00492 // Once we rewrite the code to insert the new IVs we want, update the 00493 // operands of Inst to use the new expression 'NewBase', with 'Imm' added 00494 // to it. 00495 void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, 00496 SCEVExpander &Rewriter, Loop *L, 00497 Pass *P); 00498 00499 Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 00500 SCEVExpander &Rewriter, 00501 Instruction *IP, Loop *L); 00502 void dump() const; 00503 }; 00504 } 00505 00506 void BasedUser::dump() const { 00507 std::cerr << " Base=" << *Base; 00508 std::cerr << " Imm=" << *Imm; 00509 if (EmittedBase) 00510 std::cerr << " EB=" << *EmittedBase; 00511 00512 std::cerr << " Inst: " << *Inst; 00513 } 00514 00515 Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 00516 SCEVExpander &Rewriter, 00517 Instruction *IP, Loop *L) { 00518 // Figure out where we *really* want to insert this code. In particular, if 00519 // the user is inside of a loop that is nested inside of L, we really don't 00520 // want to insert this expression before the user, we'd rather pull it out as 00521 // many loops as possible. 00522 LoopInfo &LI = Rewriter.getLoopInfo(); 00523 Instruction *BaseInsertPt = IP; 00524 00525 // Figure out the most-nested loop that IP is in. 00526 Loop *InsertLoop = LI.getLoopFor(IP->getParent()); 00527 00528 // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out 00529 // the preheader of the outer-most loop where NewBase is not loop invariant. 00530 while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { 00531 BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); 00532 InsertLoop = InsertLoop->getParentLoop(); 00533 } 00534 00535 // If there is no immediate value, skip the next part. 00536 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm)) 00537 if (SC->getValue()->isNullValue()) 00538 return Rewriter.expandCodeFor(NewBase, BaseInsertPt, 00539 OperandValToReplace->getType()); 00540 00541 Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt); 00542 00543 // Always emit the immediate (if non-zero) into the same block as the user. 00544 SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(Base), Imm); 00545 return Rewriter.expandCodeFor(NewValSCEV, IP, 00546 OperandValToReplace->getType()); 00547 } 00548 00549 00550 // Once we rewrite the code to insert the new IVs we want, update the 00551 // operands of Inst to use the new expression 'NewBase', with 'Imm' added 00552 // to it. 00553 void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, 00554 SCEVExpander &Rewriter, 00555 Loop *L, Pass *P) { 00556 if (!isa<PHINode>(Inst)) { 00557 Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, Inst, L); 00558 // Replace the use of the operand Value with the new Phi we just created. 00559 Inst->replaceUsesOfWith(OperandValToReplace, NewVal); 00560 DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst); 00561 return; 00562 } 00563 00564 // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm 00565 // expression into each operand block that uses it. Note that PHI nodes can 00566 // have multiple entries for the same predecessor. We use a map to make sure 00567 // that a PHI node only has a single Value* for each predecessor (which also 00568 // prevents us from inserting duplicate code in some blocks). 00569 std::map<BasicBlock*, Value*> InsertedCode; 00570 PHINode *PN = cast<PHINode>(Inst); 00571 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 00572 if (PN->getIncomingValue(i) == OperandValToReplace) { 00573 // If this is a critical edge, split the edge so that we do not insert the 00574 // code on all predecessor/successor paths. We do this unless this is the 00575 // canonical backedge for this loop, as this can make some inserted code 00576 // be in an illegal position. 00577 BasicBlock *PHIPred = PN->getIncomingBlock(i); 00578 if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && 00579 (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { 00580 00581 // First step, split the critical edge. 00582 SplitCriticalEdge(PHIPred, PN->getParent(), P); 00583 00584 // Next step: move the basic block. In particular, if the PHI node 00585 // is outside of the loop, and PredTI is in the loop, we want to 00586 // move the block to be immediately before the PHI block, not 00587 // immediately after PredTI. 00588 if (L->contains(PHIPred) && !L->contains(PN->getParent())) { 00589 BasicBlock *NewBB = PN->getIncomingBlock(i); 00590 NewBB->moveBefore(PN->getParent()); 00591 } 00592 } 00593 00594 Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; 00595 if (!Code) { 00596 // Insert the code into the end of the predecessor block. 00597 Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator(); 00598 Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L); 00599 } 00600 00601 // Replace the use of the operand Value with the new Phi we just created. 00602 PN->setIncomingValue(i, Code); 00603 Rewriter.clear(); 00604 } 00605 } 00606 DEBUG(std::cerr << " CHANGED: IMM =" << *Imm << " Inst = " << *Inst); 00607 } 00608 00609 00610 /// isTargetConstant - Return true if the following can be referenced by the 00611 /// immediate field of a target instruction. 00612 static bool isTargetConstant(const SCEVHandle &V, const TargetLowering *TLI) { 00613 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { 00614 int64_t V = SC->getValue()->getSExtValue(); 00615 if (TLI) 00616 return TLI->isLegalAddressImmediate(V); 00617 else 00618 // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. 00619 return (V > -(1 << 16) && V < (1 << 16)-1); 00620 } 00621 00622 if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) 00623 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue())) 00624 if (CE->getOpcode() == Instruction::Cast) { 00625 Constant *Op0 = CE->getOperand(0); 00626 if (isa<GlobalValue>(Op0) && 00627 TLI && 00628 TLI->isLegalAddressImmediate(cast<GlobalValue>(Op0))) 00629 return true; 00630 } 00631 return false; 00632 } 00633 00634 /// MoveLoopVariantsToImediateField - Move any subexpressions from Val that are 00635 /// loop varying to the Imm operand. 00636 static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm, 00637 Loop *L) { 00638 if (Val->isLoopInvariant(L)) return; // Nothing to do. 00639 00640 if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { 00641 std::vector<SCEVHandle> NewOps; 00642 NewOps.reserve(SAE->getNumOperands()); 00643 00644 for (unsigned i = 0; i != SAE->getNumOperands(); ++i) 00645 if (!SAE->getOperand(i)->isLoopInvariant(L)) { 00646 // If this is a loop-variant expression, it must stay in the immediate 00647 // field of the expression. 00648 Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i)); 00649 } else { 00650 NewOps.push_back(SAE->getOperand(i)); 00651 } 00652 00653 if (NewOps.empty()) 00654 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00655 else 00656 Val = SCEVAddExpr::get(NewOps); 00657 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { 00658 // Try to pull immediates out of the start value of nested addrec's. 00659 SCEVHandle Start = SARE->getStart(); 00660 MoveLoopVariantsToImediateField(Start, Imm, L); 00661 00662 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 00663 Ops[0] = Start; 00664 Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); 00665 } else { 00666 // Otherwise, all of Val is variant, move the whole thing over. 00667 Imm = SCEVAddExpr::get(Imm, Val); 00668 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00669 } 00670 } 00671 00672 00673 /// MoveImmediateValues - Look at Val, and pull out any additions of constants 00674 /// that can fit into the immediate field of instructions in the target. 00675 /// Accumulate these immediate values into the Imm value. 00676 static void MoveImmediateValues(const TargetLowering *TLI, 00677 SCEVHandle &Val, SCEVHandle &Imm, 00678 bool isAddress, Loop *L) { 00679 if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { 00680 std::vector<SCEVHandle> NewOps; 00681 NewOps.reserve(SAE->getNumOperands()); 00682 00683 for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { 00684 SCEVHandle NewOp = SAE->getOperand(i); 00685 MoveImmediateValues(TLI, NewOp, Imm, isAddress, L); 00686 00687 if (!NewOp->isLoopInvariant(L)) { 00688 // If this is a loop-variant expression, it must stay in the immediate 00689 // field of the expression. 00690 Imm = SCEVAddExpr::get(Imm, NewOp); 00691 } else { 00692 NewOps.push_back(NewOp); 00693 } 00694 } 00695 00696 if (NewOps.empty()) 00697 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00698 else 00699 Val = SCEVAddExpr::get(NewOps); 00700 return; 00701 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { 00702 // Try to pull immediates out of the start value of nested addrec's. 00703 SCEVHandle Start = SARE->getStart(); 00704 MoveImmediateValues(TLI, Start, Imm, isAddress, L); 00705 00706 if (Start != SARE->getStart()) { 00707 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 00708 Ops[0] = Start; 00709 Val = SCEVAddRecExpr::get(Ops, SARE->getLoop()); 00710 } 00711 return; 00712 } else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) { 00713 // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field. 00714 if (isAddress && isTargetConstant(SME->getOperand(0), TLI) && 00715 SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { 00716 00717 SCEVHandle SubImm = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00718 SCEVHandle NewOp = SME->getOperand(1); 00719 MoveImmediateValues(TLI, NewOp, SubImm, isAddress, L); 00720 00721 // If we extracted something out of the subexpressions, see if we can 00722 // simplify this! 00723 if (NewOp != SME->getOperand(1)) { 00724 // Scale SubImm up by "8". If the result is a target constant, we are 00725 // good. 00726 SubImm = SCEVMulExpr::get(SubImm, SME->getOperand(0)); 00727 if (isTargetConstant(SubImm, TLI)) { 00728 // Accumulate the immediate. 00729 Imm = SCEVAddExpr::get(Imm, SubImm); 00730 00731 // Update what is left of 'Val'. 00732 Val = SCEVMulExpr::get(SME->getOperand(0), NewOp); 00733 return; 00734 } 00735 } 00736 } 00737 } 00738 00739 // Loop-variant expressions must stay in the immediate field of the 00740 // expression. 00741 if ((isAddress && isTargetConstant(Val, TLI)) || 00742 !Val->isLoopInvariant(L)) { 00743 Imm = SCEVAddExpr::get(Imm, Val); 00744 Val = SCEVUnknown::getIntegerSCEV(0, Val->getType()); 00745 return; 00746 } 00747 00748 // Otherwise, no immediates to move. 00749 } 00750 00751 00752 /// IncrementAddExprUses - Decompose the specified expression into its added 00753 /// subexpressions, and increment SubExpressionUseCounts for each of these 00754 /// decomposed parts. 00755 static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, 00756 SCEVHandle Expr) { 00757 if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { 00758 for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) 00759 SeparateSubExprs(SubExprs, AE->getOperand(j)); 00760 } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) { 00761 SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Expr->getType()); 00762 if (SARE->getOperand(0) == Zero) { 00763 SubExprs.push_back(Expr); 00764 } else { 00765 // Compute the addrec with zero as its base. 00766 std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); 00767 Ops[0] = Zero; // Start with zero base. 00768 SubExprs.push_back(SCEVAddRecExpr::get(Ops, SARE->getLoop())); 00769 00770 00771 SeparateSubExprs(SubExprs, SARE->getOperand(0)); 00772 } 00773 } else if (!isa<SCEVConstant>(Expr) || 00774 !cast<SCEVConstant>(Expr)->getValue()->isNullValue()) { 00775 // Do not add zero. 00776 SubExprs.push_back(Expr); 00777 } 00778 } 00779 00780 00781 /// RemoveCommonExpressionsFromUseBases - Look through all of the uses in Bases, 00782 /// removing any common subexpressions from it. Anything truly common is 00783 /// removed, accumulated, and returned. This looks for things like (a+b+c) and 00784 /// (a+c+d) -> (a+c). The common expression is *removed* from the Bases. 00785 static SCEVHandle 00786 RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses) { 00787 unsigned NumUses = Uses.size(); 00788 00789 // Only one use? Use its base, regardless of what it is! 00790 SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Uses[0].Base->getType()); 00791 SCEVHandle Result = Zero; 00792 if (NumUses == 1) { 00793 std::swap(Result, Uses[0].Base); 00794 return Result; 00795 } 00796 00797 // To find common subexpressions, count how many of Uses use each expression. 00798 // If any subexpressions are used Uses.size() times, they are common. 00799 std::map<SCEVHandle, unsigned> SubExpressionUseCounts; 00800 00801 // UniqueSubExprs - Keep track of all of the subexpressions we see in the 00802 // order we see them. 00803 std::vector<SCEVHandle> UniqueSubExprs; 00804 00805 std::vector<SCEVHandle> SubExprs; 00806 for (unsigned i = 0; i != NumUses; ++i) { 00807 // If the base is zero (which is common), return zero now, there are no 00808 // CSEs we can find. 00809 if (Uses[i].Base == Zero) return Zero; 00810 00811 // Split the expression into subexprs. 00812 SeparateSubExprs(SubExprs, Uses[i].Base); 00813 // Add one to SubExpressionUseCounts for each subexpr present. 00814 for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) 00815 if (++SubExpressionUseCounts[SubExprs[j]] == 1) 00816 UniqueSubExprs.push_back(SubExprs[j]); 00817 SubExprs.clear(); 00818 } 00819 00820 // Now that we know how many times each is used, build Result. Iterate over 00821 // UniqueSubexprs so that we have a stable ordering. 00822 for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { 00823 std::map<SCEVHandle, unsigned>::iterator I = 00824 SubExpressionUseCounts.find(UniqueSubExprs[i]); 00825 assert(I != SubExpressionUseCounts.end() && "Entry not found?"); 00826 if (I->second == NumUses) { // Found CSE! 00827 Result = SCEVAddExpr::get(Result, I->first); 00828 } else { 00829 // Remove non-cse's from SubExpressionUseCounts. 00830 SubExpressionUseCounts.erase(I); 00831 } 00832 } 00833 00834 // If we found no CSE's, return now. 00835 if (Result == Zero) return Result; 00836 00837 // Otherwise, remove all of the CSE's we found from each of the base values. 00838 for (unsigned i = 0; i != NumUses; ++i) { 00839 // Split the expression into subexprs. 00840 SeparateSubExprs(SubExprs, Uses[i].Base); 00841 00842 // Remove any common subexpressions. 00843 for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) 00844 if (SubExpressionUseCounts.count(SubExprs[j])) { 00845 SubExprs.erase(SubExprs.begin()+j); 00846 --j; --e; 00847 } 00848 00849 // Finally, the non-shared expressions together. 00850 if (SubExprs.empty()) 00851 Uses[i].Base = Zero; 00852 else 00853 Uses[i].Base = SCEVAddExpr::get(SubExprs); 00854 SubExprs.clear(); 00855 } 00856 00857 return Result; 00858 } 00859 00860 /// isZero - returns true if the scalar evolution expression is zero. 00861 /// 00862 static bool isZero(SCEVHandle &V) { 00863 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) 00864 return SC->getValue()->getRawValue() == 0; 00865 return false; 00866 } 00867 00868 00869 /// CheckForIVReuse - Returns the multiple if the stride is the multiple 00870 /// of a previous stride and it is a legal value for the target addressing 00871 /// mode scale component. This allows the users of this stride to be rewritten 00872 /// as prev iv * factor. It returns 0 if no reuse is possible. 00873 unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride, 00874 IVExpr &IV) { 00875 if (!TLI) return 0; 00876 00877 if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { 00878 int64_t SInt = SC->getValue()->getSExtValue(); 00879 if (SInt == 1) return 0; 00880 00881 for (TargetLowering::legal_am_scale_iterator 00882 I = TLI->legal_am_scale_begin(), E = TLI->legal_am_scale_end(); 00883 I != E; ++I) { 00884 unsigned Scale = *I; 00885 if (unsigned(abs(SInt)) < Scale || (SInt % Scale) != 0) 00886 continue; 00887 std::map<SCEVHandle, IVsOfOneStride>::iterator SI = 00888 IVsByStride.find(SCEVUnknown::getIntegerSCEV(SInt/Scale, Type::UIntTy)); 00889 if (SI == IVsByStride.end()) 00890 continue; 00891 for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), 00892 IE = SI->second.IVs.end(); II != IE; ++II) 00893 // FIXME: Only handle base == 0 for now. 00894 if (isZero(II->Base)) { 00895 IV = *II; 00896 return Scale; 00897 } 00898 } 00899 } 00900 00901 return 0; 00902 } 00903 00904 00905 /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single 00906 /// stride of IV. All of the users may have different starting values, and this 00907 /// may not be the only stride (we know it is if isOnlyStride is true). 00908 void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, 00909 IVUsersOfOneStride &Uses, 00910 Loop *L, 00911 bool isOnlyStride) { 00912 // Transform our list of users and offsets to a bit more complex table. In 00913 // this new vector, each 'BasedUser' contains 'Base' the base of the 00914 // strided accessas well as the old information from Uses. We progressively 00915 // move information from the Base field to the Imm field, until we eventually 00916 // have the full access expression to rewrite the use. 00917 std::vector<BasedUser> UsersToProcess; 00918 UsersToProcess.reserve(Uses.Users.size()); 00919 for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) { 00920 UsersToProcess.push_back(Uses.Users[i]); 00921 00922 // Move any loop invariant operands from the offset field to the immediate 00923 // field of the use, so that we don't try to use something before it is 00924 // computed. 00925 MoveLoopVariantsToImediateField(UsersToProcess.back().Base, 00926 UsersToProcess.back().Imm, L); 00927 assert(UsersToProcess.back().Base->isLoopInvariant(L) && 00928 "Base value is not loop invariant!"); 00929 } 00930 00931 // Check if it is possible to reuse a IV with stride that is factor of this 00932 // stride. And the multiple is a number that can be encoded in the scale 00933 // field of the target addressing mode. 00934 PHINode *NewPHI = NULL; 00935 Value *IncV = NULL; 00936 IVExpr ReuseIV; 00937 unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV); 00938 if (RewriteFactor != 0) { 00939 DEBUG(std::cerr << "BASED ON IV of STRIDE " << *ReuseIV.Stride 00940 << " and BASE " << *ReuseIV.Base << " :\n"); 00941 NewPHI = ReuseIV.PHI; 00942 IncV = ReuseIV.IncV; 00943 } 00944 00945 // We now have a whole bunch of uses of like-strided induction variables, but 00946 // they might all have different bases. We want to emit one PHI node for this 00947 // stride which we fold as many common expressions (between the IVs) into as 00948 // possible. Start by identifying the common expressions in the base values 00949 // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find 00950 // "A+B"), emit it to the preheader, then remove the expression from the 00951 // UsersToProcess base values. 00952 SCEVHandle CommonExprs = 00953 RemoveCommonExpressionsFromUseBases(UsersToProcess); 00954 00955 // Next, figure out what we can represent in the immediate fields of 00956 // instructions. If we can represent anything there, move it to the imm 00957 // fields of the BasedUsers. We do this so that it increases the commonality 00958 // of the remaining uses. 00959 for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { 00960 // If the user is not in the current loop, this means it is using the exit 00961 // value of the IV. Do not put anything in the base, make sure it's all in 00962 // the immediate field to allow as much factoring as possible. 00963 if (!L->contains(UsersToProcess[i].Inst->getParent())) { 00964 UsersToProcess[i].Imm = SCEVAddExpr::get(UsersToProcess[i].Imm, 00965 UsersToProcess[i].Base); 00966 UsersToProcess[i].Base = 00967 SCEVUnknown::getIntegerSCEV(0, UsersToProcess[i].Base->getType()); 00968 } else { 00969 00970 // Addressing modes can be folded into loads and stores. Be careful that 00971 // the store is through the expression, not of the expression though. 00972 bool isAddress = isa<LoadInst>(UsersToProcess[i].Inst); 00973 if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst)) 00974 if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace) 00975 isAddress = true; 00976 00977 MoveImmediateValues(TLI, UsersToProcess[i].Base, UsersToProcess[i].Imm, 00978 isAddress, L); 00979 } 00980 } 00981 00982 // Now that we know what we need to do, insert the PHI node itself. 00983 // 00984 DEBUG(std::cerr << "INSERTING IV of STRIDE " << *Stride << " and BASE " 00985 << *CommonExprs << " :\n"); 00986 00987 SCEVExpander Rewriter(*SE, *LI); 00988 SCEVExpander PreheaderRewriter(*SE, *LI); 00989 00990 BasicBlock *Preheader = L->getLoopPreheader(); 00991 Instruction *PreInsertPt = Preheader->getTerminator(); 00992 Instruction *PhiInsertBefore = L->getHeader()->begin(); 00993 00994 BasicBlock *LatchBlock = L->getLoopLatch(); 00995 00996 const Type *ReplacedTy = CommonExprs->getType(); 00997 00998 // Emit the initial base value into the loop preheader. 00999 Value *CommonBaseV 01000 = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt, 01001 ReplacedTy); 01002 01003 if (RewriteFactor == 0) { 01004 // Create a new Phi for this base, and stick it in the loop header. 01005 NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore); 01006 ++NumInserted; 01007 01008 // Add common base to the new Phi node. 01009 NewPHI->addIncoming(CommonBaseV, Preheader); 01010 01011 // Insert the stride into the preheader. 01012 Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt, 01013 ReplacedTy); 01014 if (!isa<ConstantInt>(StrideV)) ++NumVariable; 01015 01016 // Emit the increment of the base value before the terminator of the loop 01017 // latch block, and add it to the Phi node. 01018 SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI), 01019 SCEVUnknown::get(StrideV)); 01020 01021 IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(), 01022 ReplacedTy); 01023 IncV->setName(NewPHI->getName()+".inc"); 01024 NewPHI->addIncoming(IncV, LatchBlock); 01025 01026 // Remember this in case a later stride is multiple of this. 01027 IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV); 01028 } else { 01029 Constant *C = dyn_cast<Constant>(CommonBaseV); 01030 if (!C || 01031 (!C->isNullValue() && 01032 !isTargetConstant(SCEVUnknown::get(CommonBaseV), TLI))) 01033 // We want the common base emitted into the preheader! 01034 CommonBaseV = new CastInst(CommonBaseV, CommonBaseV->getType(), 01035 "commonbase", PreInsertPt); 01036 } 01037 01038 // Sort by the base value, so that all IVs with identical bases are next to 01039 // each other. 01040 while (!UsersToProcess.empty()) { 01041 SCEVHandle Base = UsersToProcess.back().Base; 01042 01043 DEBUG(std::cerr << " INSERTING code for BASE = " << *Base << ":\n"); 01044 01045 // Emit the code for Base into the preheader. 01046 Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt, 01047 ReplacedTy); 01048 01049 // If BaseV is a constant other than 0, make sure that it gets inserted into 01050 // the preheader, instead of being forward substituted into the uses. We do 01051 // this by forcing a noop cast to be inserted into the preheader in this 01052 // case. 01053 if (Constant *C = dyn_cast<Constant>(BaseV)) 01054 if (!C->isNullValue() && !isTargetConstant(Base, TLI)) { 01055 // We want this constant emitted into the preheader! 01056 BaseV = new CastInst(BaseV, BaseV->getType(), "preheaderinsert", 01057 PreInsertPt); 01058 } 01059 01060 // Emit the code to add the immediate offset to the Phi value, just before 01061 // the instructions that we identified as using this stride and base. 01062 unsigned ScanPos = 0; 01063 do { 01064 BasedUser &User = UsersToProcess.back(); 01065 01066 // If this instruction wants to use the post-incremented value, move it 01067 // after the post-inc and use its value instead of the PHI. 01068 Value *RewriteOp = NewPHI; 01069 if (User.isUseOfPostIncrementedValue) { 01070 RewriteOp = IncV; 01071 01072 // If this user is in the loop, make sure it is the last thing in the 01073 // loop to ensure it is dominated by the increment. 01074 if (L->contains(User.Inst->getParent())) 01075 User.Inst->moveBefore(LatchBlock->getTerminator()); 01076 } 01077 SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp); 01078 01079 // Clear the SCEVExpander's expression map so that we are guaranteed 01080 // to have the code emitted where we expect it. 01081 Rewriter.clear(); 01082 01083 // If we are reusing the iv, then it must be multiplied by a constant 01084 // factor take advantage of addressing mode scale component. 01085 if (RewriteFactor != 0) { 01086 RewriteExpr = 01087 SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor, 01088 RewriteExpr->getType()), 01089 RewriteExpr); 01090 01091 // The common base is emitted in the loop preheader. But since we 01092 // are reusing an IV, it has not been used to initialize the PHI node. 01093 // Add it to the expression used to rewrite the uses. 01094 if (!isa<ConstantInt>(CommonBaseV) || 01095 !cast<ConstantInt>(CommonBaseV)->isNullValue()) 01096 RewriteExpr = SCEVAddExpr::get(RewriteExpr, 01097 SCEVUnknown::get(CommonBaseV)); 01098 } 01099 01100 // Now that we know what we need to do, insert code before User for the 01101 // immediate and any loop-variant expressions. 01102 if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isNullValue()) 01103 // Add BaseV to the PHI value if needed. 01104 RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV)); 01105 01106 User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this); 01107 01108 // Mark old value we replaced as possibly dead, so that it is elminated 01109 // if we just replaced the last use of that value. 01110 DeadInsts.insert(cast<Instruction>(User.OperandValToReplace)); 01111 01112 UsersToProcess.pop_back(); 01113 ++NumReduced; 01114 01115 // If there are any more users to process with the same base, move one of 01116 // them to the end of the list so that we will process it. 01117 if (!UsersToProcess.empty()) { 01118 for (unsigned e = UsersToProcess.size(); ScanPos != e; ++ScanPos) 01119 if (UsersToProcess[ScanPos].Base == Base) { 01120 std::swap(UsersToProcess[ScanPos], UsersToProcess.back()); 01121 break; 01122 } 01123 } 01124 } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base); 01125 // TODO: Next, find out which base index is the most common, pull it out. 01126 } 01127 01128 // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but 01129 // different starting values, into different PHIs. 01130 } 01131 01132 // OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar 01133 // uses in the loop, look to see if we can eliminate some, in favor of using 01134 // common indvars for the different uses. 01135 void LoopStrengthReduce::OptimizeIndvars(Loop *L) { 01136 // TODO: implement optzns here. 01137 01138 01139 01140 01141 // Finally, get the terminating condition for the loop if possible. If we 01142 // can, we want to change it to use a post-incremented version of its 01143 // induction variable, to allow coalescing the live ranges for the IV into 01144 // one register value. 01145 PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin()); 01146 BasicBlock *Preheader = L->getLoopPreheader(); 01147 BasicBlock *LatchBlock = 01148 SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader); 01149 BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator()); 01150 if (!TermBr || TermBr->isUnconditional() || 01151 !isa<SetCondInst>(TermBr->getCondition())) 01152 return; 01153 SetCondInst *Cond = cast<SetCondInst>(TermBr->getCondition()); 01154 01155 // Search IVUsesByStride to find Cond's IVUse if there is one. 01156 IVStrideUse *CondUse = 0; 01157 const SCEVHandle *CondStride = 0; 01158 01159 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse; 01160 ++Stride) { 01161 std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 01162 IVUsesByStride.find(StrideOrder[Stride]); 01163 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); 01164 01165 for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(), 01166 E = SI->second.Users.end(); UI != E; ++UI) 01167 if (UI->User == Cond) { 01168 CondUse = &*UI; 01169 CondStride = &SI->first; 01170 // NOTE: we could handle setcc instructions with multiple uses here, but 01171 // InstCombine does it as well for simple uses, it's not clear that it 01172 // occurs enough in real life to handle. 01173 break; 01174 } 01175 } 01176 if (!CondUse) return; // setcc doesn't use the IV. 01177 01178 // setcc stride is complex, don't mess with users. 01179 // FIXME: Evaluate whether this is a good idea or not. 01180 if (!isa<SCEVConstant>(*CondStride)) return; 01181 01182 // It's possible for the setcc instruction to be anywhere in the loop, and 01183 // possible for it to have multiple users. If it is not immediately before 01184 // the latch block branch, move it. 01185 if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { 01186 if (Cond->hasOneUse()) { // Condition has a single use, just move it. 01187 Cond->moveBefore(TermBr); 01188 } else { 01189 // Otherwise, clone the terminating condition and insert into the loopend. 01190 Cond = cast<SetCondInst>(Cond->clone()); 01191 Cond->setName(L->getHeader()->getName() + ".termcond"); 01192 LatchBlock->getInstList().insert(TermBr, Cond); 01193 01194 // Clone the IVUse, as the old use still exists! 01195 IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond, 01196 CondUse->OperandValToReplace); 01197 CondUse = &IVUsesByStride[*CondStride].Users.back(); 01198 } 01199 } 01200 01201 // If we get to here, we know that we can transform the setcc instruction to 01202 // use the post-incremented version of the IV, allowing us to coalesce the 01203 // live ranges for the IV correctly. 01204 CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride); 01205 CondUse->isUseOfPostIncrementedValue = true; 01206 } 01207 01208 namespace { 01209 // Constant strides come first which in turns are sorted by their absolute 01210 // values. If absolute values are the same, then positive strides comes first. 01211 // e.g. 01212 // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X 01213 struct StrideCompare { 01214 bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) { 01215 SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); 01216 SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); 01217 if (LHSC && RHSC) { 01218 int64_t LV = LHSC->getValue()->getSExtValue(); 01219 int64_t RV = RHSC->getValue()->getSExtValue(); 01220 uint64_t ALV = (LV < 0) ? -LV : LV; 01221 uint64_t ARV = (RV < 0) ? -RV : RV; 01222 if (ALV == ARV) 01223 return LV > RV; 01224 else 01225 return ALV < ARV; 01226 } 01227 return (LHSC && !RHSC); 01228 } 01229 }; 01230 } 01231 01232 void LoopStrengthReduce::runOnLoop(Loop *L) { 01233 // First step, transform all loops nesting inside of this loop. 01234 for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I) 01235 runOnLoop(*I); 01236 01237 // Next, find all uses of induction variables in this loop, and catagorize 01238 // them by stride. Start by finding all of the PHI nodes in the header for 01239 // this loop. If they are induction variables, inspect their uses. 01240 std::set<Instruction*> Processed; // Don't reprocess instructions. 01241 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) 01242 AddUsersIfInteresting(I, L, Processed); 01243 01244 // If we have nothing to do, return. 01245 if (IVUsesByStride.empty()) return; 01246 01247 // Optimize induction variables. Some indvar uses can be transformed to use 01248 // strides that will be needed for other purposes. A common example of this 01249 // is the exit test for the loop, which can often be rewritten to use the 01250 // computation of some other indvar to decide when to terminate the loop. 01251 OptimizeIndvars(L); 01252 01253 01254 // FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of 01255 // doing computation in byte values, promote to 32-bit values if safe. 01256 01257 // FIXME: Attempt to reuse values across multiple IV's. In particular, we 01258 // could have something like "for(i) { foo(i*8); bar(i*16) }", which should be 01259 // codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need 01260 // to be careful that IV's are all the same type. Only works for intptr_t 01261 // indvars. 01262 01263 // If we only have one stride, we can more aggressively eliminate some things. 01264 bool HasOneStride = IVUsesByStride.size() == 1; 01265 01266 #ifndef NDEBUG 01267 DEBUG(std::cerr << "\nLSR on "); 01268 DEBUG(L->dump()); 01269 #endif 01270 01271 // IVsByStride keeps IVs for one particular loop. 01272 IVsByStride.clear(); 01273 01274 // Sort the StrideOrder so we process larger strides first. 01275 std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare()); 01276 01277 // Note: this processes each stride/type pair individually. All users passed 01278 // into StrengthReduceStridedIVUsers have the same type AND stride. Also, 01279 // node that we iterate over IVUsesByStride indirectly by using StrideOrder. 01280 // This extra layer of indirection makes the ordering of strides deterministic 01281 // - not dependent on map order. 01282 for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { 01283 std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 01284 IVUsesByStride.find(StrideOrder[Stride]); 01285 assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); 01286 StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride); 01287 } 01288 01289 // Clean up after ourselves 01290 if (!DeadInsts.empty()) { 01291 DeleteTriviallyDeadInstructions(DeadInsts); 01292 01293 BasicBlock::iterator I = L->getHeader()->begin(); 01294 PHINode *PN; 01295 while ((PN = dyn_cast<PHINode>(I))) { 01296 ++I; // Preincrement iterator to avoid invalidating it when deleting PN. 01297 01298 // At this point, we know that we have killed one or more GEP 01299 // instructions. It is worth checking to see if the cann indvar is also 01300 // dead, so that we can remove it as well. The requirements for the cann 01301 // indvar to be considered dead are: 01302 // 1. the cann indvar has one use 01303 // 2. the use is an add instruction 01304 // 3. the add has one use 01305 // 4. the add is used by the cann indvar 01306 // If all four cases above are true, then we can remove both the add and 01307 // the cann indvar. 01308 // FIXME: this needs to eliminate an induction variable even if it's being 01309 // compared against some value to decide loop termination. 01310 if (PN->hasOneUse()) { 01311 BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin())); 01312 if (BO && BO->hasOneUse()) { 01313 if (PN == *(BO->use_begin())) { 01314 DeadInsts.insert(BO); 01315 // Break the cycle, then delete the PHI. 01316 PN->replaceAllUsesWith(UndefValue::get(PN->getType())); 01317 SE->deleteInstructionFromRecords(PN); 01318 PN->eraseFromParent(); 01319 } 01320 } 01321 } 01322 } 01323 DeleteTriviallyDeadInstructions(DeadInsts); 01324 } 01325 01326 CastedPointers.clear(); 01327 IVUsesByStride.clear(); 01328 StrideOrder.clear(); 01329 return; 01330 }