LLVM API Documentation

LoopStrengthReduce.cpp

Go to the documentation of this file.
00001 //===- LoopStrengthReduce.cpp - Strength Reduce GEPs in Loops -------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by Nate Begeman and is distributed under the
00006 // University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass performs a strength reduction on array references inside loops that
00011 // have as one or more of their components the loop induction variable.  This is
00012 // accomplished by creating a new Value to hold the initial value of the array
00013 // access for the first iteration, and then creating a new GEP instruction in
00014 // the loop to increment the value by the appropriate amount.
00015 //
00016 //===----------------------------------------------------------------------===//
00017 
00018 #define DEBUG_TYPE "loop-reduce"
00019 #include "llvm/Transforms/Scalar.h"
00020 #include "llvm/Constants.h"
00021 #include "llvm/Instructions.h"
00022 #include "llvm/Type.h"
00023 #include "llvm/DerivedTypes.h"
00024 #include "llvm/Analysis/Dominators.h"
00025 #include "llvm/Analysis/LoopInfo.h"
00026 #include "llvm/Analysis/ScalarEvolutionExpander.h"
00027 #include "llvm/Support/CFG.h"
00028 #include "llvm/Support/GetElementPtrTypeIterator.h"
00029 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
00030 #include "llvm/Transforms/Utils/Local.h"
00031 #include "llvm/Target/TargetData.h"
00032 #include "llvm/ADT/Statistic.h"
00033 #include "llvm/Support/Debug.h"
00034 #include "llvm/Target/TargetLowering.h"
00035 #include <algorithm>
00036 #include <iostream>
00037 #include <set>
00038 using namespace llvm;
00039 
00040 namespace {
00041   Statistic<> NumReduced ("loop-reduce", "Number of GEPs strength reduced");
00042   Statistic<> NumInserted("loop-reduce", "Number of PHIs inserted");
00043   Statistic<> NumVariable("loop-reduce","Number of PHIs with variable strides");
00044 
00045   /// IVStrideUse - Keep track of one use of a strided induction variable, where
00046   /// the stride is stored externally.  The Offset member keeps track of the 
00047   /// offset from the IV, User is the actual user of the operand, and 'Operand'
00048   /// is the operand # of the User that is the use.
00049   struct IVStrideUse {
00050     SCEVHandle Offset;
00051     Instruction *User;
00052     Value *OperandValToReplace;
00053 
00054     // isUseOfPostIncrementedValue - True if this should use the
00055     // post-incremented version of this IV, not the preincremented version.
00056     // This can only be set in special cases, such as the terminating setcc
00057     // instruction for a loop or uses dominated by the loop.
00058     bool isUseOfPostIncrementedValue;
00059     
00060     IVStrideUse(const SCEVHandle &Offs, Instruction *U, Value *O)
00061       : Offset(Offs), User(U), OperandValToReplace(O),
00062         isUseOfPostIncrementedValue(false) {}
00063   };
00064   
00065   /// IVUsersOfOneStride - This structure keeps track of all instructions that
00066   /// have an operand that is based on the trip count multiplied by some stride.
00067   /// The stride for all of these users is common and kept external to this
00068   /// structure.
00069   struct IVUsersOfOneStride {
00070     /// Users - Keep track of all of the users of this stride as well as the
00071     /// initial value and the operand that uses the IV.
00072     std::vector<IVStrideUse> Users;
00073     
00074     void addUser(const SCEVHandle &Offset,Instruction *User, Value *Operand) {
00075       Users.push_back(IVStrideUse(Offset, User, Operand));
00076     }
00077   };
00078 
00079   /// IVInfo - This structure keeps track of one IV expression inserted during
00080   /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
00081   /// well as the PHI node and increment value created for rewrite.
00082   struct IVExpr {
00083     SCEVHandle  Stride;
00084     SCEVHandle  Base;
00085     PHINode    *PHI;
00086     Value      *IncV;
00087 
00088     IVExpr()
00089       : Stride(SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)),
00090         Base  (SCEVUnknown::getIntegerSCEV(0, Type::UIntTy)) {}
00091     IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi,
00092            Value *incv)
00093       : Stride(stride), Base(base), PHI(phi), IncV(incv) {}
00094   };
00095 
00096   /// IVsOfOneStride - This structure keeps track of all IV expression inserted
00097   /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
00098   struct IVsOfOneStride {
00099     std::vector<IVExpr> IVs;
00100 
00101     void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI,
00102                Value *IncV) {
00103       IVs.push_back(IVExpr(Stride, Base, PHI, IncV));
00104     }
00105   };
00106 
00107   class LoopStrengthReduce : public FunctionPass {
00108     LoopInfo *LI;
00109     ETForest *EF;
00110     ScalarEvolution *SE;
00111     const TargetData *TD;
00112     const Type *UIntPtrTy;
00113     bool Changed;
00114 
00115     /// IVUsesByStride - Keep track of all uses of induction variables that we
00116     /// are interested in.  The key of the map is the stride of the access.
00117     std::map<SCEVHandle, IVUsersOfOneStride> IVUsesByStride;
00118 
00119     /// IVsByStride - Keep track of all IVs that have been inserted for a
00120     /// particular stride.
00121     std::map<SCEVHandle, IVsOfOneStride> IVsByStride;
00122 
00123     /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
00124     /// We use this to iterate over the IVUsesByStride collection without being
00125     /// dependent on random ordering of pointers in the process.
00126     std::vector<SCEVHandle> StrideOrder;
00127 
00128     /// CastedValues - As we need to cast values to uintptr_t, this keeps track
00129     /// of the casted version of each value.  This is accessed by
00130     /// getCastedVersionOf.
00131     std::map<Value*, Value*> CastedPointers;
00132 
00133     /// DeadInsts - Keep track of instructions we may have made dead, so that
00134     /// we can remove them after we are done working.
00135     std::set<Instruction*> DeadInsts;
00136 
00137     /// TLI - Keep a pointer of a TargetLowering to consult for determining
00138     /// transformation profitability.
00139     const TargetLowering *TLI;
00140 
00141   public:
00142     LoopStrengthReduce(const TargetLowering *tli = NULL)
00143       : TLI(tli) {
00144     }
00145 
00146     virtual bool runOnFunction(Function &) {
00147       LI = &getAnalysis<LoopInfo>();
00148       EF = &getAnalysis<ETForest>();
00149       SE = &getAnalysis<ScalarEvolution>();
00150       TD = &getAnalysis<TargetData>();
00151       UIntPtrTy = TD->getIntPtrType();
00152       Changed = false;
00153 
00154       for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
00155         runOnLoop(*I);
00156       
00157       return Changed;
00158     }
00159 
00160     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
00161       // We split critical edges, so we change the CFG.  However, we do update
00162       // many analyses if they are around.
00163       AU.addPreservedID(LoopSimplifyID);
00164       AU.addPreserved<LoopInfo>();
00165       AU.addPreserved<DominatorSet>();
00166       AU.addPreserved<ETForest>();
00167       AU.addPreserved<ImmediateDominators>();
00168       AU.addPreserved<DominanceFrontier>();
00169       AU.addPreserved<DominatorTree>();
00170 
00171       AU.addRequiredID(LoopSimplifyID);
00172       AU.addRequired<LoopInfo>();
00173       AU.addRequired<ETForest>();
00174       AU.addRequired<TargetData>();
00175       AU.addRequired<ScalarEvolution>();
00176     }
00177     
00178     /// getCastedVersionOf - Return the specified value casted to uintptr_t.
00179     ///
00180     Value *getCastedVersionOf(Value *V);
00181 private:
00182     void runOnLoop(Loop *L);
00183     bool AddUsersIfInteresting(Instruction *I, Loop *L,
00184                                std::set<Instruction*> &Processed);
00185     SCEVHandle GetExpressionSCEV(Instruction *E, Loop *L);
00186 
00187     void OptimizeIndvars(Loop *L);
00188 
00189     unsigned CheckForIVReuse(const SCEVHandle &Stride, IVExpr &IV);
00190 
00191     void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
00192                                       IVUsersOfOneStride &Uses,
00193                                       Loop *L, bool isOnlyStride);
00194     void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts);
00195   };
00196   RegisterOpt<LoopStrengthReduce> X("loop-reduce",
00197                                     "Loop Strength Reduction");
00198 }
00199 
00200 FunctionPass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
00201   return new LoopStrengthReduce(TLI);
00202 }
00203 
00204 /// getCastedVersionOf - Return the specified value casted to uintptr_t.
00205 ///
00206 Value *LoopStrengthReduce::getCastedVersionOf(Value *V) {
00207   if (V->getType() == UIntPtrTy) return V;
00208   if (Constant *CB = dyn_cast<Constant>(V))
00209     return ConstantExpr::getCast(CB, UIntPtrTy);
00210 
00211   Value *&New = CastedPointers[V];
00212   if (New) return New;
00213   
00214   New = SCEVExpander::InsertCastOfTo(V, UIntPtrTy);
00215   DeadInsts.insert(cast<Instruction>(New));
00216   return New;
00217 }
00218 
00219 
00220 /// DeleteTriviallyDeadInstructions - If any of the instructions is the
00221 /// specified set are trivially dead, delete them and see if this makes any of
00222 /// their operands subsequently dead.
00223 void LoopStrengthReduce::
00224 DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts) {
00225   while (!Insts.empty()) {
00226     Instruction *I = *Insts.begin();
00227     Insts.erase(Insts.begin());
00228     if (isInstructionTriviallyDead(I)) {
00229       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
00230         if (Instruction *U = dyn_cast<Instruction>(I->getOperand(i)))
00231           Insts.insert(U);
00232       SE->deleteInstructionFromRecords(I);
00233       I->eraseFromParent();
00234       Changed = true;
00235     }
00236   }
00237 }
00238 
00239 
00240 /// GetExpressionSCEV - Compute and return the SCEV for the specified
00241 /// instruction.
00242 SCEVHandle LoopStrengthReduce::GetExpressionSCEV(Instruction *Exp, Loop *L) {
00243   // Scalar Evolutions doesn't know how to compute SCEV's for GEP instructions.
00244   // If this is a GEP that SE doesn't know about, compute it now and insert it.
00245   // If this is not a GEP, or if we have already done this computation, just let
00246   // SE figure it out.
00247   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Exp);
00248   if (!GEP || SE->hasSCEV(GEP))
00249     return SE->getSCEV(Exp);
00250     
00251   // Analyze all of the subscripts of this getelementptr instruction, looking
00252   // for uses that are determined by the trip count of L.  First, skip all
00253   // operands the are not dependent on the IV.
00254 
00255   // Build up the base expression.  Insert an LLVM cast of the pointer to
00256   // uintptr_t first.
00257   SCEVHandle GEPVal = SCEVUnknown::get(getCastedVersionOf(GEP->getOperand(0)));
00258 
00259   gep_type_iterator GTI = gep_type_begin(GEP);
00260   
00261   for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
00262     // If this is a use of a recurrence that we can analyze, and it comes before
00263     // Op does in the GEP operand list, we will handle this when we process this
00264     // operand.
00265     if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
00266       const StructLayout *SL = TD->getStructLayout(STy);
00267       unsigned Idx = cast<ConstantUInt>(GEP->getOperand(i))->getValue();
00268       uint64_t Offset = SL->MemberOffsets[Idx];
00269       GEPVal = SCEVAddExpr::get(GEPVal,
00270                                 SCEVUnknown::getIntegerSCEV(Offset, UIntPtrTy));
00271     } else {
00272       Value *OpVal = getCastedVersionOf(GEP->getOperand(i));
00273       SCEVHandle Idx = SE->getSCEV(OpVal);
00274 
00275       uint64_t TypeSize = TD->getTypeSize(GTI.getIndexedType());
00276       if (TypeSize != 1)
00277         Idx = SCEVMulExpr::get(Idx,
00278                                SCEVConstant::get(ConstantUInt::get(UIntPtrTy,
00279                                                                    TypeSize)));
00280       GEPVal = SCEVAddExpr::get(GEPVal, Idx);
00281     }
00282   }
00283 
00284   SE->setSCEV(GEP, GEPVal);
00285   return GEPVal;
00286 }
00287 
00288 /// getSCEVStartAndStride - Compute the start and stride of this expression,
00289 /// returning false if the expression is not a start/stride pair, or true if it
00290 /// is.  The stride must be a loop invariant expression, but the start may be
00291 /// a mix of loop invariant and loop variant expressions.
00292 static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L,
00293                                   SCEVHandle &Start, SCEVHandle &Stride) {
00294   SCEVHandle TheAddRec = Start;   // Initialize to zero.
00295 
00296   // If the outer level is an AddExpr, the operands are all start values except
00297   // for a nested AddRecExpr.
00298   if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
00299     for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
00300       if (SCEVAddRecExpr *AddRec =
00301              dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
00302         if (AddRec->getLoop() == L)
00303           TheAddRec = SCEVAddExpr::get(AddRec, TheAddRec);
00304         else
00305           return false;  // Nested IV of some sort?
00306       } else {
00307         Start = SCEVAddExpr::get(Start, AE->getOperand(i));
00308       }
00309         
00310   } else if (SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SH)) {
00311     TheAddRec = SH;
00312   } else {
00313     return false;  // not analyzable.
00314   }
00315   
00316   SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
00317   if (!AddRec || AddRec->getLoop() != L) return false;
00318   
00319   // FIXME: Generalize to non-affine IV's.
00320   if (!AddRec->isAffine()) return false;
00321 
00322   Start = SCEVAddExpr::get(Start, AddRec->getOperand(0));
00323   
00324   if (!isa<SCEVConstant>(AddRec->getOperand(1)))
00325     DEBUG(std::cerr << "[" << L->getHeader()->getName()
00326                     << "] Variable stride: " << *AddRec << "\n");
00327 
00328   Stride = AddRec->getOperand(1);
00329   // Check that all constant strides are the unsigned type, we don't want to
00330   // have two IV's one of signed stride 4 and one of unsigned stride 4 to not be
00331   // merged.
00332   assert((!isa<SCEVConstant>(Stride) || Stride->getType()->isUnsigned()) &&
00333          "Constants should be canonicalized to unsigned!");
00334 
00335   return true;
00336 }
00337 
00338 /// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
00339 /// and now we need to decide whether the user should use the preinc or post-inc
00340 /// value.  If this user should use the post-inc version of the IV, return true.
00341 ///
00342 /// Choosing wrong here can break dominance properties (if we choose to use the
00343 /// post-inc value when we cannot) or it can end up adding extra live-ranges to
00344 /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
00345 /// should use the post-inc value).
00346 static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
00347                                        Loop *L, ETForest *EF, Pass *P) {
00348   // If the user is in the loop, use the preinc value.
00349   if (L->contains(User->getParent())) return false;
00350   
00351   BasicBlock *LatchBlock = L->getLoopLatch();
00352   
00353   // Ok, the user is outside of the loop.  If it is dominated by the latch
00354   // block, use the post-inc value.
00355   if (EF->dominates(LatchBlock, User->getParent()))
00356     return true;
00357 
00358   // There is one case we have to be careful of: PHI nodes.  These little guys
00359   // can live in blocks that do not dominate the latch block, but (since their
00360   // uses occur in the predecessor block, not the block the PHI lives in) should
00361   // still use the post-inc value.  Check for this case now.
00362   PHINode *PN = dyn_cast<PHINode>(User);
00363   if (!PN) return false;  // not a phi, not dominated by latch block.
00364   
00365   // Look at all of the uses of IV by the PHI node.  If any use corresponds to
00366   // a block that is not dominated by the latch block, give up and use the
00367   // preincremented value.
00368   unsigned NumUses = 0;
00369   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00370     if (PN->getIncomingValue(i) == IV) {
00371       ++NumUses;
00372       if (!EF->dominates(LatchBlock, PN->getIncomingBlock(i)))
00373         return false;
00374     }
00375 
00376   // Okay, all uses of IV by PN are in predecessor blocks that really are
00377   // dominated by the latch block.  Split the critical edges and use the
00378   // post-incremented value.
00379   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00380     if (PN->getIncomingValue(i) == IV) {
00381       SplitCriticalEdge(PN->getIncomingBlock(i), PN->getParent(), P);
00382       if (--NumUses == 0) break;
00383     }
00384   
00385   return true;
00386 }
00387 
00388   
00389 
00390 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
00391 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
00392 /// return true.  Otherwise, return false.
00393 bool LoopStrengthReduce::AddUsersIfInteresting(Instruction *I, Loop *L,
00394                                             std::set<Instruction*> &Processed) {
00395   if (!I->getType()->isInteger() && !isa<PointerType>(I->getType()))
00396       return false;   // Void and FP expressions cannot be reduced.
00397   if (!Processed.insert(I).second)
00398     return true;    // Instruction already handled.
00399   
00400   // Get the symbolic expression for this instruction.
00401   SCEVHandle ISE = GetExpressionSCEV(I, L);
00402   if (isa<SCEVCouldNotCompute>(ISE)) return false;
00403   
00404   // Get the start and stride for this expression.
00405   SCEVHandle Start = SCEVUnknown::getIntegerSCEV(0, ISE->getType());
00406   SCEVHandle Stride = Start;
00407   if (!getSCEVStartAndStride(ISE, L, Start, Stride))
00408     return false;  // Non-reducible symbolic expression, bail out.
00409   
00410   for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;++UI){
00411     Instruction *User = cast<Instruction>(*UI);
00412 
00413     // Do not infinitely recurse on PHI nodes.
00414     if (isa<PHINode>(User) && Processed.count(User))
00415       continue;
00416 
00417     // If this is an instruction defined in a nested loop, or outside this loop,
00418     // don't recurse into it.
00419     bool AddUserToIVUsers = false;
00420     if (LI->getLoopFor(User->getParent()) != L) {
00421       DEBUG(std::cerr << "FOUND USER in other loop: " << *User
00422             << "   OF SCEV: " << *ISE << "\n");
00423       AddUserToIVUsers = true;
00424     } else if (!AddUsersIfInteresting(User, L, Processed)) {
00425       DEBUG(std::cerr << "FOUND USER: " << *User
00426             << "   OF SCEV: " << *ISE << "\n");
00427       AddUserToIVUsers = true;
00428     }
00429 
00430     if (AddUserToIVUsers) {
00431       IVUsersOfOneStride &StrideUses = IVUsesByStride[Stride];
00432       if (StrideUses.Users.empty())     // First occurance of this stride?
00433         StrideOrder.push_back(Stride);
00434       
00435       // Okay, we found a user that we cannot reduce.  Analyze the instruction
00436       // and decide what to do with it.  If we are a use inside of the loop, use
00437       // the value before incrementation, otherwise use it after incrementation.
00438       if (IVUseShouldUsePostIncValue(User, I, L, EF, this)) {
00439         // The value used will be incremented by the stride more than we are
00440         // expecting, so subtract this off.
00441         SCEVHandle NewStart = SCEV::getMinusSCEV(Start, Stride);
00442         StrideUses.addUser(NewStart, User, I);
00443         StrideUses.Users.back().isUseOfPostIncrementedValue = true;
00444         DEBUG(std::cerr << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
00445       } else {        
00446         StrideUses.addUser(Start, User, I);
00447       }
00448     }
00449   }
00450   return true;
00451 }
00452 
00453 namespace {
00454   /// BasedUser - For a particular base value, keep information about how we've
00455   /// partitioned the expression so far.
00456   struct BasedUser {
00457     /// Base - The Base value for the PHI node that needs to be inserted for
00458     /// this use.  As the use is processed, information gets moved from this
00459     /// field to the Imm field (below).  BasedUser values are sorted by this
00460     /// field.
00461     SCEVHandle Base;
00462     
00463     /// Inst - The instruction using the induction variable.
00464     Instruction *Inst;
00465 
00466     /// OperandValToReplace - The operand value of Inst to replace with the
00467     /// EmittedBase.
00468     Value *OperandValToReplace;
00469 
00470     /// Imm - The immediate value that should be added to the base immediately
00471     /// before Inst, because it will be folded into the imm field of the
00472     /// instruction.
00473     SCEVHandle Imm;
00474 
00475     /// EmittedBase - The actual value* to use for the base value of this
00476     /// operation.  This is null if we should just use zero so far.
00477     Value *EmittedBase;
00478 
00479     // isUseOfPostIncrementedValue - True if this should use the
00480     // post-incremented version of this IV, not the preincremented version.
00481     // This can only be set in special cases, such as the terminating setcc
00482     // instruction for a loop and uses outside the loop that are dominated by
00483     // the loop.
00484     bool isUseOfPostIncrementedValue;
00485     
00486     BasedUser(IVStrideUse &IVSU)
00487       : Base(IVSU.Offset), Inst(IVSU.User), 
00488         OperandValToReplace(IVSU.OperandValToReplace), 
00489         Imm(SCEVUnknown::getIntegerSCEV(0, Base->getType())), EmittedBase(0),
00490         isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue) {}
00491 
00492     // Once we rewrite the code to insert the new IVs we want, update the
00493     // operands of Inst to use the new expression 'NewBase', with 'Imm' added
00494     // to it.
00495     void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
00496                                         SCEVExpander &Rewriter, Loop *L,
00497                                         Pass *P);
00498     
00499     Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 
00500                                        SCEVExpander &Rewriter,
00501                                        Instruction *IP, Loop *L);
00502     void dump() const;
00503   };
00504 }
00505 
00506 void BasedUser::dump() const {
00507   std::cerr << " Base=" << *Base;
00508   std::cerr << " Imm=" << *Imm;
00509   if (EmittedBase)
00510     std::cerr << "  EB=" << *EmittedBase;
00511 
00512   std::cerr << "   Inst: " << *Inst;
00513 }
00514 
00515 Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, 
00516                                               SCEVExpander &Rewriter,
00517                                               Instruction *IP, Loop *L) {
00518   // Figure out where we *really* want to insert this code.  In particular, if
00519   // the user is inside of a loop that is nested inside of L, we really don't
00520   // want to insert this expression before the user, we'd rather pull it out as
00521   // many loops as possible.
00522   LoopInfo &LI = Rewriter.getLoopInfo();
00523   Instruction *BaseInsertPt = IP;
00524   
00525   // Figure out the most-nested loop that IP is in.
00526   Loop *InsertLoop = LI.getLoopFor(IP->getParent());
00527   
00528   // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
00529   // the preheader of the outer-most loop where NewBase is not loop invariant.
00530   while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
00531     BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
00532     InsertLoop = InsertLoop->getParentLoop();
00533   }
00534   
00535   // If there is no immediate value, skip the next part.
00536   if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
00537     if (SC->getValue()->isNullValue())
00538       return Rewriter.expandCodeFor(NewBase, BaseInsertPt,
00539                                     OperandValToReplace->getType());
00540 
00541   Value *Base = Rewriter.expandCodeFor(NewBase, BaseInsertPt);
00542   
00543   // Always emit the immediate (if non-zero) into the same block as the user.
00544   SCEVHandle NewValSCEV = SCEVAddExpr::get(SCEVUnknown::get(Base), Imm);
00545   return Rewriter.expandCodeFor(NewValSCEV, IP,
00546                                 OperandValToReplace->getType());
00547 }
00548 
00549 
00550 // Once we rewrite the code to insert the new IVs we want, update the
00551 // operands of Inst to use the new expression 'NewBase', with 'Imm' added
00552 // to it.
00553 void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase,
00554                                                SCEVExpander &Rewriter,
00555                                                Loop *L, Pass *P) {
00556   if (!isa<PHINode>(Inst)) {
00557     Value *NewVal = InsertCodeForBaseAtPosition(NewBase, Rewriter, Inst, L);
00558     // Replace the use of the operand Value with the new Phi we just created.
00559     Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
00560     DEBUG(std::cerr << "    CHANGED: IMM =" << *Imm << "  Inst = " << *Inst);
00561     return;
00562   }
00563   
00564   // PHI nodes are more complex.  We have to insert one copy of the NewBase+Imm
00565   // expression into each operand block that uses it.  Note that PHI nodes can
00566   // have multiple entries for the same predecessor.  We use a map to make sure
00567   // that a PHI node only has a single Value* for each predecessor (which also
00568   // prevents us from inserting duplicate code in some blocks).
00569   std::map<BasicBlock*, Value*> InsertedCode;
00570   PHINode *PN = cast<PHINode>(Inst);
00571   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
00572     if (PN->getIncomingValue(i) == OperandValToReplace) {
00573       // If this is a critical edge, split the edge so that we do not insert the
00574       // code on all predecessor/successor paths.  We do this unless this is the
00575       // canonical backedge for this loop, as this can make some inserted code
00576       // be in an illegal position.
00577       BasicBlock *PHIPred = PN->getIncomingBlock(i);
00578       if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
00579           (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
00580         
00581         // First step, split the critical edge.
00582         SplitCriticalEdge(PHIPred, PN->getParent(), P);
00583             
00584         // Next step: move the basic block.  In particular, if the PHI node
00585         // is outside of the loop, and PredTI is in the loop, we want to
00586         // move the block to be immediately before the PHI block, not
00587         // immediately after PredTI.
00588         if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
00589           BasicBlock *NewBB = PN->getIncomingBlock(i);
00590           NewBB->moveBefore(PN->getParent());
00591         }
00592       }
00593 
00594       Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
00595       if (!Code) {
00596         // Insert the code into the end of the predecessor block.
00597         Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator();
00598         Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L);
00599       }
00600       
00601       // Replace the use of the operand Value with the new Phi we just created.
00602       PN->setIncomingValue(i, Code);
00603       Rewriter.clear();
00604     }
00605   }
00606   DEBUG(std::cerr << "    CHANGED: IMM =" << *Imm << "  Inst = " << *Inst);
00607 }
00608 
00609 
00610 /// isTargetConstant - Return true if the following can be referenced by the
00611 /// immediate field of a target instruction.
00612 static bool isTargetConstant(const SCEVHandle &V, const TargetLowering *TLI) {
00613   if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
00614     int64_t V = SC->getValue()->getSExtValue();
00615     if (TLI)
00616       return TLI->isLegalAddressImmediate(V);
00617     else
00618       // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
00619       return (V > -(1 << 16) && V < (1 << 16)-1);
00620   }
00621 
00622   if (SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
00623     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(SU->getValue()))
00624       if (CE->getOpcode() == Instruction::Cast) {
00625         Constant *Op0 = CE->getOperand(0);
00626         if (isa<GlobalValue>(Op0) &&
00627             TLI &&
00628             TLI->isLegalAddressImmediate(cast<GlobalValue>(Op0)))
00629           return true;
00630       }
00631   return false;
00632 }
00633 
00634 /// MoveLoopVariantsToImediateField - Move any subexpressions from Val that are
00635 /// loop varying to the Imm operand.
00636 static void MoveLoopVariantsToImediateField(SCEVHandle &Val, SCEVHandle &Imm,
00637                                             Loop *L) {
00638   if (Val->isLoopInvariant(L)) return;  // Nothing to do.
00639   
00640   if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
00641     std::vector<SCEVHandle> NewOps;
00642     NewOps.reserve(SAE->getNumOperands());
00643     
00644     for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
00645       if (!SAE->getOperand(i)->isLoopInvariant(L)) {
00646         // If this is a loop-variant expression, it must stay in the immediate
00647         // field of the expression.
00648         Imm = SCEVAddExpr::get(Imm, SAE->getOperand(i));
00649       } else {
00650         NewOps.push_back(SAE->getOperand(i));
00651       }
00652 
00653     if (NewOps.empty())
00654       Val = SCEVUnknown::getIntegerSCEV(0, Val->getType());
00655     else
00656       Val = SCEVAddExpr::get(NewOps);
00657   } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
00658     // Try to pull immediates out of the start value of nested addrec's.
00659     SCEVHandle Start = SARE->getStart();
00660     MoveLoopVariantsToImediateField(Start, Imm, L);
00661     
00662     std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
00663     Ops[0] = Start;
00664     Val = SCEVAddRecExpr::get(Ops, SARE->getLoop());
00665   } else {
00666     // Otherwise, all of Val is variant, move the whole thing over.
00667     Imm = SCEVAddExpr::get(Imm, Val);
00668     Val = SCEVUnknown::getIntegerSCEV(0, Val->getType());
00669   }
00670 }
00671 
00672 
00673 /// MoveImmediateValues - Look at Val, and pull out any additions of constants
00674 /// that can fit into the immediate field of instructions in the target.
00675 /// Accumulate these immediate values into the Imm value.
00676 static void MoveImmediateValues(const TargetLowering *TLI,
00677                                 SCEVHandle &Val, SCEVHandle &Imm,
00678                                 bool isAddress, Loop *L) {
00679   if (SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
00680     std::vector<SCEVHandle> NewOps;
00681     NewOps.reserve(SAE->getNumOperands());
00682     
00683     for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
00684       SCEVHandle NewOp = SAE->getOperand(i);
00685       MoveImmediateValues(TLI, NewOp, Imm, isAddress, L);
00686       
00687       if (!NewOp->isLoopInvariant(L)) {
00688         // If this is a loop-variant expression, it must stay in the immediate
00689         // field of the expression.
00690         Imm = SCEVAddExpr::get(Imm, NewOp);
00691       } else {
00692         NewOps.push_back(NewOp);
00693       }
00694     }
00695 
00696     if (NewOps.empty())
00697       Val = SCEVUnknown::getIntegerSCEV(0, Val->getType());
00698     else
00699       Val = SCEVAddExpr::get(NewOps);
00700     return;
00701   } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
00702     // Try to pull immediates out of the start value of nested addrec's.
00703     SCEVHandle Start = SARE->getStart();
00704     MoveImmediateValues(TLI, Start, Imm, isAddress, L);
00705     
00706     if (Start != SARE->getStart()) {
00707       std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
00708       Ops[0] = Start;
00709       Val = SCEVAddRecExpr::get(Ops, SARE->getLoop());
00710     }
00711     return;
00712   } else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
00713     // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
00714     if (isAddress && isTargetConstant(SME->getOperand(0), TLI) &&
00715         SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
00716 
00717       SCEVHandle SubImm = SCEVUnknown::getIntegerSCEV(0, Val->getType());
00718       SCEVHandle NewOp = SME->getOperand(1);
00719       MoveImmediateValues(TLI, NewOp, SubImm, isAddress, L);
00720       
00721       // If we extracted something out of the subexpressions, see if we can 
00722       // simplify this!
00723       if (NewOp != SME->getOperand(1)) {
00724         // Scale SubImm up by "8".  If the result is a target constant, we are
00725         // good.
00726         SubImm = SCEVMulExpr::get(SubImm, SME->getOperand(0));
00727         if (isTargetConstant(SubImm, TLI)) {
00728           // Accumulate the immediate.
00729           Imm = SCEVAddExpr::get(Imm, SubImm);
00730           
00731           // Update what is left of 'Val'.
00732           Val = SCEVMulExpr::get(SME->getOperand(0), NewOp);
00733           return;
00734         }
00735       }
00736     }
00737   }
00738 
00739   // Loop-variant expressions must stay in the immediate field of the
00740   // expression.
00741   if ((isAddress && isTargetConstant(Val, TLI)) ||
00742       !Val->isLoopInvariant(L)) {
00743     Imm = SCEVAddExpr::get(Imm, Val);
00744     Val = SCEVUnknown::getIntegerSCEV(0, Val->getType());
00745     return;
00746   }
00747 
00748   // Otherwise, no immediates to move.
00749 }
00750 
00751 
00752 /// IncrementAddExprUses - Decompose the specified expression into its added
00753 /// subexpressions, and increment SubExpressionUseCounts for each of these
00754 /// decomposed parts.
00755 static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
00756                              SCEVHandle Expr) {
00757   if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
00758     for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
00759       SeparateSubExprs(SubExprs, AE->getOperand(j));
00760   } else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
00761     SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Expr->getType());
00762     if (SARE->getOperand(0) == Zero) {
00763       SubExprs.push_back(Expr);
00764     } else {
00765       // Compute the addrec with zero as its base.
00766       std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
00767       Ops[0] = Zero;   // Start with zero base.
00768       SubExprs.push_back(SCEVAddRecExpr::get(Ops, SARE->getLoop()));
00769       
00770 
00771       SeparateSubExprs(SubExprs, SARE->getOperand(0));
00772     }
00773   } else if (!isa<SCEVConstant>(Expr) ||
00774              !cast<SCEVConstant>(Expr)->getValue()->isNullValue()) {
00775     // Do not add zero.
00776     SubExprs.push_back(Expr);
00777   }
00778 }
00779 
00780 
00781 /// RemoveCommonExpressionsFromUseBases - Look through all of the uses in Bases,
00782 /// removing any common subexpressions from it.  Anything truly common is
00783 /// removed, accumulated, and returned.  This looks for things like (a+b+c) and
00784 /// (a+c+d) -> (a+c).  The common expression is *removed* from the Bases.
00785 static SCEVHandle 
00786 RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses) {
00787   unsigned NumUses = Uses.size();
00788 
00789   // Only one use?  Use its base, regardless of what it is!
00790   SCEVHandle Zero = SCEVUnknown::getIntegerSCEV(0, Uses[0].Base->getType());
00791   SCEVHandle Result = Zero;
00792   if (NumUses == 1) {
00793     std::swap(Result, Uses[0].Base);
00794     return Result;
00795   }
00796 
00797   // To find common subexpressions, count how many of Uses use each expression.
00798   // If any subexpressions are used Uses.size() times, they are common.
00799   std::map<SCEVHandle, unsigned> SubExpressionUseCounts;
00800   
00801   // UniqueSubExprs - Keep track of all of the subexpressions we see in the
00802   // order we see them.
00803   std::vector<SCEVHandle> UniqueSubExprs;
00804 
00805   std::vector<SCEVHandle> SubExprs;
00806   for (unsigned i = 0; i != NumUses; ++i) {
00807     // If the base is zero (which is common), return zero now, there are no
00808     // CSEs we can find.
00809     if (Uses[i].Base == Zero) return Zero;
00810 
00811     // Split the expression into subexprs.
00812     SeparateSubExprs(SubExprs, Uses[i].Base);
00813     // Add one to SubExpressionUseCounts for each subexpr present.
00814     for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
00815       if (++SubExpressionUseCounts[SubExprs[j]] == 1)
00816         UniqueSubExprs.push_back(SubExprs[j]);
00817     SubExprs.clear();
00818   }
00819 
00820   // Now that we know how many times each is used, build Result.  Iterate over
00821   // UniqueSubexprs so that we have a stable ordering.
00822   for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
00823     std::map<SCEVHandle, unsigned>::iterator I = 
00824        SubExpressionUseCounts.find(UniqueSubExprs[i]);
00825     assert(I != SubExpressionUseCounts.end() && "Entry not found?");
00826     if (I->second == NumUses) {  // Found CSE!
00827       Result = SCEVAddExpr::get(Result, I->first);
00828     } else {
00829       // Remove non-cse's from SubExpressionUseCounts.
00830       SubExpressionUseCounts.erase(I);
00831     }
00832   }
00833   
00834   // If we found no CSE's, return now.
00835   if (Result == Zero) return Result;
00836   
00837   // Otherwise, remove all of the CSE's we found from each of the base values.
00838   for (unsigned i = 0; i != NumUses; ++i) {
00839     // Split the expression into subexprs.
00840     SeparateSubExprs(SubExprs, Uses[i].Base);
00841 
00842     // Remove any common subexpressions.
00843     for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
00844       if (SubExpressionUseCounts.count(SubExprs[j])) {
00845         SubExprs.erase(SubExprs.begin()+j);
00846         --j; --e;
00847       }
00848     
00849     // Finally, the non-shared expressions together.
00850     if (SubExprs.empty())
00851       Uses[i].Base = Zero;
00852     else
00853       Uses[i].Base = SCEVAddExpr::get(SubExprs);
00854     SubExprs.clear();
00855   }
00856  
00857   return Result;
00858 }
00859 
00860 /// isZero - returns true if the scalar evolution expression is zero.
00861 ///
00862 static bool isZero(SCEVHandle &V) {
00863   if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V))
00864     return SC->getValue()->getRawValue() == 0;
00865   return false;
00866 }
00867 
00868 
00869 /// CheckForIVReuse - Returns the multiple if the stride is the multiple
00870 /// of a previous stride and it is a legal value for the target addressing
00871 /// mode scale component. This allows the users of this stride to be rewritten
00872 /// as prev iv * factor. It returns 0 if no reuse is possible.
00873 unsigned LoopStrengthReduce::CheckForIVReuse(const SCEVHandle &Stride,
00874                                              IVExpr &IV) {
00875   if (!TLI) return 0;
00876 
00877   if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
00878     int64_t SInt = SC->getValue()->getSExtValue();
00879     if (SInt == 1) return 0;
00880 
00881     for (TargetLowering::legal_am_scale_iterator
00882            I = TLI->legal_am_scale_begin(), E = TLI->legal_am_scale_end();
00883          I != E; ++I) {
00884       unsigned Scale = *I;
00885       if (unsigned(abs(SInt)) < Scale || (SInt % Scale) != 0)
00886         continue;
00887       std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
00888         IVsByStride.find(SCEVUnknown::getIntegerSCEV(SInt/Scale, Type::UIntTy));
00889       if (SI == IVsByStride.end())
00890         continue;
00891       for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
00892              IE = SI->second.IVs.end(); II != IE; ++II)
00893         // FIXME: Only handle base == 0 for now.
00894         if (isZero(II->Base)) {
00895           IV = *II;
00896           return Scale;
00897         }
00898     }
00899   }
00900 
00901   return 0;
00902 }
00903 
00904 
00905 /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
00906 /// stride of IV.  All of the users may have different starting values, and this
00907 /// may not be the only stride (we know it is if isOnlyStride is true).
00908 void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
00909                                                       IVUsersOfOneStride &Uses,
00910                                                       Loop *L,
00911                                                       bool isOnlyStride) {
00912   // Transform our list of users and offsets to a bit more complex table.  In
00913   // this new vector, each 'BasedUser' contains 'Base' the base of the
00914   // strided accessas well as the old information from Uses.  We progressively
00915   // move information from the Base field to the Imm field, until we eventually
00916   // have the full access expression to rewrite the use.
00917   std::vector<BasedUser> UsersToProcess;
00918   UsersToProcess.reserve(Uses.Users.size());
00919   for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) {
00920     UsersToProcess.push_back(Uses.Users[i]);
00921     
00922     // Move any loop invariant operands from the offset field to the immediate
00923     // field of the use, so that we don't try to use something before it is
00924     // computed.
00925     MoveLoopVariantsToImediateField(UsersToProcess.back().Base,
00926                                     UsersToProcess.back().Imm, L);
00927     assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
00928            "Base value is not loop invariant!");
00929   }
00930 
00931   // Check if it is possible to reuse a IV with stride that is factor of this
00932   // stride. And the multiple is a number that can be encoded in the scale
00933   // field of the target addressing mode.
00934   PHINode *NewPHI = NULL;
00935   Value   *IncV   = NULL;
00936   IVExpr   ReuseIV;
00937   unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV);
00938   if (RewriteFactor != 0) {
00939     DEBUG(std::cerr << "BASED ON IV of STRIDE " << *ReuseIV.Stride
00940           << " and BASE " << *ReuseIV.Base << " :\n");
00941     NewPHI = ReuseIV.PHI;
00942     IncV   = ReuseIV.IncV;
00943   }
00944 
00945   // We now have a whole bunch of uses of like-strided induction variables, but
00946   // they might all have different bases.  We want to emit one PHI node for this
00947   // stride which we fold as many common expressions (between the IVs) into as
00948   // possible.  Start by identifying the common expressions in the base values 
00949   // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
00950   // "A+B"), emit it to the preheader, then remove the expression from the
00951   // UsersToProcess base values.
00952   SCEVHandle CommonExprs =
00953     RemoveCommonExpressionsFromUseBases(UsersToProcess);
00954   
00955   // Next, figure out what we can represent in the immediate fields of
00956   // instructions.  If we can represent anything there, move it to the imm
00957   // fields of the BasedUsers.  We do this so that it increases the commonality
00958   // of the remaining uses.
00959   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
00960     // If the user is not in the current loop, this means it is using the exit
00961     // value of the IV.  Do not put anything in the base, make sure it's all in
00962     // the immediate field to allow as much factoring as possible.
00963     if (!L->contains(UsersToProcess[i].Inst->getParent())) {
00964       UsersToProcess[i].Imm = SCEVAddExpr::get(UsersToProcess[i].Imm,
00965                                                UsersToProcess[i].Base);
00966       UsersToProcess[i].Base = 
00967         SCEVUnknown::getIntegerSCEV(0, UsersToProcess[i].Base->getType());
00968     } else {
00969       
00970       // Addressing modes can be folded into loads and stores.  Be careful that
00971       // the store is through the expression, not of the expression though.
00972       bool isAddress = isa<LoadInst>(UsersToProcess[i].Inst);
00973       if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst))
00974         if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace)
00975           isAddress = true;
00976       
00977       MoveImmediateValues(TLI, UsersToProcess[i].Base, UsersToProcess[i].Imm,
00978                           isAddress, L);
00979     }
00980   }
00981 
00982   // Now that we know what we need to do, insert the PHI node itself.
00983   //
00984   DEBUG(std::cerr << "INSERTING IV of STRIDE " << *Stride << " and BASE "
00985         << *CommonExprs << " :\n");
00986 
00987   SCEVExpander Rewriter(*SE, *LI);
00988   SCEVExpander PreheaderRewriter(*SE, *LI);
00989   
00990   BasicBlock  *Preheader = L->getLoopPreheader();
00991   Instruction *PreInsertPt = Preheader->getTerminator();
00992   Instruction *PhiInsertBefore = L->getHeader()->begin();
00993   
00994   BasicBlock *LatchBlock = L->getLoopLatch();
00995 
00996   const Type *ReplacedTy = CommonExprs->getType();
00997 
00998   // Emit the initial base value into the loop preheader.
00999   Value *CommonBaseV
01000     = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt,
01001                                       ReplacedTy);
01002 
01003   if (RewriteFactor == 0) {
01004     // Create a new Phi for this base, and stick it in the loop header.
01005     NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
01006     ++NumInserted;
01007   
01008     // Add common base to the new Phi node.
01009     NewPHI->addIncoming(CommonBaseV, Preheader);
01010 
01011     // Insert the stride into the preheader.
01012     Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt,
01013                                                      ReplacedTy);
01014     if (!isa<ConstantInt>(StrideV)) ++NumVariable;
01015 
01016     // Emit the increment of the base value before the terminator of the loop
01017     // latch block, and add it to the Phi node.
01018     SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
01019                                          SCEVUnknown::get(StrideV));
01020   
01021     IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(),
01022                                   ReplacedTy);
01023     IncV->setName(NewPHI->getName()+".inc");
01024     NewPHI->addIncoming(IncV, LatchBlock);
01025 
01026     // Remember this in case a later stride is multiple of this.
01027     IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV);
01028   } else {
01029     Constant *C = dyn_cast<Constant>(CommonBaseV);
01030     if (!C ||
01031         (!C->isNullValue() &&
01032          !isTargetConstant(SCEVUnknown::get(CommonBaseV), TLI)))
01033       // We want the common base emitted into the preheader!
01034       CommonBaseV = new CastInst(CommonBaseV, CommonBaseV->getType(),
01035                                  "commonbase", PreInsertPt);
01036   }
01037 
01038   // Sort by the base value, so that all IVs with identical bases are next to
01039   // each other.
01040   while (!UsersToProcess.empty()) {
01041     SCEVHandle Base = UsersToProcess.back().Base;
01042 
01043     DEBUG(std::cerr << "  INSERTING code for BASE = " << *Base << ":\n");
01044    
01045     // Emit the code for Base into the preheader.
01046     Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt,
01047                                                    ReplacedTy);
01048     
01049     // If BaseV is a constant other than 0, make sure that it gets inserted into
01050     // the preheader, instead of being forward substituted into the uses.  We do
01051     // this by forcing a noop cast to be inserted into the preheader in this
01052     // case.
01053     if (Constant *C = dyn_cast<Constant>(BaseV))
01054       if (!C->isNullValue() && !isTargetConstant(Base, TLI)) {
01055         // We want this constant emitted into the preheader!
01056         BaseV = new CastInst(BaseV, BaseV->getType(), "preheaderinsert",
01057                              PreInsertPt);       
01058       }
01059     
01060     // Emit the code to add the immediate offset to the Phi value, just before
01061     // the instructions that we identified as using this stride and base.
01062     unsigned ScanPos = 0;
01063     do {
01064       BasedUser &User = UsersToProcess.back();
01065 
01066       // If this instruction wants to use the post-incremented value, move it
01067       // after the post-inc and use its value instead of the PHI.
01068       Value *RewriteOp = NewPHI;
01069       if (User.isUseOfPostIncrementedValue) {
01070         RewriteOp = IncV;
01071 
01072         // If this user is in the loop, make sure it is the last thing in the
01073         // loop to ensure it is dominated by the increment.
01074         if (L->contains(User.Inst->getParent()))
01075           User.Inst->moveBefore(LatchBlock->getTerminator());
01076       }
01077       SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp);
01078 
01079       // Clear the SCEVExpander's expression map so that we are guaranteed
01080       // to have the code emitted where we expect it.
01081       Rewriter.clear();
01082 
01083       // If we are reusing the iv, then it must be multiplied by a constant
01084       // factor take advantage of addressing mode scale component.
01085       if (RewriteFactor != 0) {
01086         RewriteExpr =
01087           SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor,
01088                                                        RewriteExpr->getType()),
01089                            RewriteExpr);
01090 
01091         // The common base is emitted in the loop preheader. But since we
01092         // are reusing an IV, it has not been used to initialize the PHI node.
01093         // Add it to the expression used to rewrite the uses.
01094         if (!isa<ConstantInt>(CommonBaseV) ||
01095             !cast<ConstantInt>(CommonBaseV)->isNullValue())
01096           RewriteExpr = SCEVAddExpr::get(RewriteExpr,
01097                                          SCEVUnknown::get(CommonBaseV));
01098       }
01099 
01100       // Now that we know what we need to do, insert code before User for the
01101       // immediate and any loop-variant expressions.
01102       if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isNullValue())
01103         // Add BaseV to the PHI value if needed.
01104         RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV));
01105 
01106       User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this);
01107 
01108       // Mark old value we replaced as possibly dead, so that it is elminated
01109       // if we just replaced the last use of that value.
01110       DeadInsts.insert(cast<Instruction>(User.OperandValToReplace));
01111 
01112       UsersToProcess.pop_back();
01113       ++NumReduced;
01114 
01115       // If there are any more users to process with the same base, move one of
01116       // them to the end of the list so that we will process it.
01117       if (!UsersToProcess.empty()) {
01118         for (unsigned e = UsersToProcess.size(); ScanPos != e; ++ScanPos)
01119           if (UsersToProcess[ScanPos].Base == Base) {
01120             std::swap(UsersToProcess[ScanPos], UsersToProcess.back());
01121             break;
01122           }
01123       }
01124     } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
01125     // TODO: Next, find out which base index is the most common, pull it out.
01126   }
01127 
01128   // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
01129   // different starting values, into different PHIs.
01130 }
01131 
01132 // OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
01133 // uses in the loop, look to see if we can eliminate some, in favor of using
01134 // common indvars for the different uses.
01135 void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
01136   // TODO: implement optzns here.
01137 
01138 
01139 
01140 
01141   // Finally, get the terminating condition for the loop if possible.  If we
01142   // can, we want to change it to use a post-incremented version of its
01143   // induction variable, to allow coalescing the live ranges for the IV into
01144   // one register value.
01145   PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin());
01146   BasicBlock  *Preheader = L->getLoopPreheader();
01147   BasicBlock *LatchBlock =
01148    SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
01149   BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
01150   if (!TermBr || TermBr->isUnconditional() ||
01151       !isa<SetCondInst>(TermBr->getCondition()))
01152     return;
01153   SetCondInst *Cond = cast<SetCondInst>(TermBr->getCondition());
01154 
01155   // Search IVUsesByStride to find Cond's IVUse if there is one.
01156   IVStrideUse *CondUse = 0;
01157   const SCEVHandle *CondStride = 0;
01158 
01159   for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
01160        ++Stride) {
01161     std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 
01162       IVUsesByStride.find(StrideOrder[Stride]);
01163     assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
01164     
01165     for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
01166            E = SI->second.Users.end(); UI != E; ++UI)
01167       if (UI->User == Cond) {
01168         CondUse = &*UI;
01169         CondStride = &SI->first;
01170         // NOTE: we could handle setcc instructions with multiple uses here, but
01171         // InstCombine does it as well for simple uses, it's not clear that it
01172         // occurs enough in real life to handle.
01173         break;
01174       }
01175   }
01176   if (!CondUse) return;  // setcc doesn't use the IV.
01177 
01178   // setcc stride is complex, don't mess with users.
01179   // FIXME: Evaluate whether this is a good idea or not.
01180   if (!isa<SCEVConstant>(*CondStride)) return;
01181 
01182   // It's possible for the setcc instruction to be anywhere in the loop, and
01183   // possible for it to have multiple users.  If it is not immediately before
01184   // the latch block branch, move it.
01185   if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
01186     if (Cond->hasOneUse()) {   // Condition has a single use, just move it.
01187       Cond->moveBefore(TermBr);
01188     } else {
01189       // Otherwise, clone the terminating condition and insert into the loopend.
01190       Cond = cast<SetCondInst>(Cond->clone());
01191       Cond->setName(L->getHeader()->getName() + ".termcond");
01192       LatchBlock->getInstList().insert(TermBr, Cond);
01193       
01194       // Clone the IVUse, as the old use still exists!
01195       IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
01196                                          CondUse->OperandValToReplace);
01197       CondUse = &IVUsesByStride[*CondStride].Users.back();
01198     }
01199   }
01200 
01201   // If we get to here, we know that we can transform the setcc instruction to
01202   // use the post-incremented version of the IV, allowing us to coalesce the
01203   // live ranges for the IV correctly.
01204   CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride);
01205   CondUse->isUseOfPostIncrementedValue = true;
01206 }
01207 
01208 namespace {
01209   // Constant strides come first which in turns are sorted by their absolute
01210   // values. If absolute values are the same, then positive strides comes first.
01211   // e.g.
01212   // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
01213   struct StrideCompare {
01214     bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) {
01215       SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
01216       SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
01217       if (LHSC && RHSC) {
01218         int64_t  LV = LHSC->getValue()->getSExtValue();
01219         int64_t  RV = RHSC->getValue()->getSExtValue();
01220         uint64_t ALV = (LV < 0) ? -LV : LV;
01221         uint64_t ARV = (RV < 0) ? -RV : RV;
01222         if (ALV == ARV)
01223           return LV > RV;
01224         else
01225           return ALV < ARV;
01226       }
01227       return (LHSC && !RHSC);
01228     }
01229   };
01230 }
01231 
01232 void LoopStrengthReduce::runOnLoop(Loop *L) {
01233   // First step, transform all loops nesting inside of this loop.
01234   for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I)
01235     runOnLoop(*I);
01236 
01237   // Next, find all uses of induction variables in this loop, and catagorize
01238   // them by stride.  Start by finding all of the PHI nodes in the header for
01239   // this loop.  If they are induction variables, inspect their uses.
01240   std::set<Instruction*> Processed;   // Don't reprocess instructions.
01241   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
01242     AddUsersIfInteresting(I, L, Processed);
01243 
01244   // If we have nothing to do, return.
01245   if (IVUsesByStride.empty()) return;
01246 
01247   // Optimize induction variables.  Some indvar uses can be transformed to use
01248   // strides that will be needed for other purposes.  A common example of this
01249   // is the exit test for the loop, which can often be rewritten to use the
01250   // computation of some other indvar to decide when to terminate the loop.
01251   OptimizeIndvars(L);
01252 
01253 
01254   // FIXME: We can widen subreg IV's here for RISC targets.  e.g. instead of
01255   // doing computation in byte values, promote to 32-bit values if safe.
01256 
01257   // FIXME: Attempt to reuse values across multiple IV's.  In particular, we
01258   // could have something like "for(i) { foo(i*8); bar(i*16) }", which should be
01259   // codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.  Need
01260   // to be careful that IV's are all the same type.  Only works for intptr_t
01261   // indvars.
01262 
01263   // If we only have one stride, we can more aggressively eliminate some things.
01264   bool HasOneStride = IVUsesByStride.size() == 1;
01265 
01266 #ifndef NDEBUG
01267   DEBUG(std::cerr << "\nLSR on ");
01268   DEBUG(L->dump());
01269 #endif
01270 
01271   // IVsByStride keeps IVs for one particular loop.
01272   IVsByStride.clear();
01273 
01274   // Sort the StrideOrder so we process larger strides first.
01275   std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());
01276 
01277   // Note: this processes each stride/type pair individually.  All users passed
01278   // into StrengthReduceStridedIVUsers have the same type AND stride.  Also,
01279   // node that we iterate over IVUsesByStride indirectly by using StrideOrder.
01280   // This extra layer of indirection makes the ordering of strides deterministic
01281   // - not dependent on map order.
01282   for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
01283     std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 
01284       IVUsesByStride.find(StrideOrder[Stride]);
01285     assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
01286     StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
01287   }
01288 
01289   // Clean up after ourselves
01290   if (!DeadInsts.empty()) {
01291     DeleteTriviallyDeadInstructions(DeadInsts);
01292 
01293     BasicBlock::iterator I = L->getHeader()->begin();
01294     PHINode *PN;
01295     while ((PN = dyn_cast<PHINode>(I))) {
01296       ++I;  // Preincrement iterator to avoid invalidating it when deleting PN.
01297       
01298       // At this point, we know that we have killed one or more GEP
01299       // instructions.  It is worth checking to see if the cann indvar is also
01300       // dead, so that we can remove it as well.  The requirements for the cann
01301       // indvar to be considered dead are:
01302       // 1. the cann indvar has one use
01303       // 2. the use is an add instruction
01304       // 3. the add has one use
01305       // 4. the add is used by the cann indvar
01306       // If all four cases above are true, then we can remove both the add and
01307       // the cann indvar.
01308       // FIXME: this needs to eliminate an induction variable even if it's being
01309       // compared against some value to decide loop termination.
01310       if (PN->hasOneUse()) {
01311         BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
01312         if (BO && BO->hasOneUse()) {
01313           if (PN == *(BO->use_begin())) {
01314             DeadInsts.insert(BO);
01315             // Break the cycle, then delete the PHI.
01316             PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
01317             SE->deleteInstructionFromRecords(PN);
01318             PN->eraseFromParent();
01319           }
01320         }
01321       }
01322     }
01323     DeleteTriviallyDeadInstructions(DeadInsts);
01324   }
01325 
01326   CastedPointers.clear();
01327   IVUsesByStride.clear();
01328   StrideOrder.clear();
01329   return;
01330 }