LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

LoadValueNumbering.cpp

Go to the documentation of this file.
00001 //===- LoadValueNumbering.cpp - Load Value #'ing Implementation -*- C++ -*-===//
00002 // 
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 // 
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements a value numbering pass that value numbers load and call
00011 // instructions.  To do this, it finds lexically identical load instructions,
00012 // and uses alias analysis to determine which loads are guaranteed to produce
00013 // the same value.  To value number call instructions, it looks for calls to
00014 // functions that do not write to memory which do not have intervening
00015 // instructions that clobber the memory that is read from.
00016 //
00017 // This pass builds off of another value numbering pass to implement value
00018 // numbering for non-load and non-call instructions.  It uses Alias Analysis so
00019 // that it can disambiguate the load instructions.  The more powerful these base
00020 // analyses are, the more powerful the resultant value numbering will be.
00021 //
00022 //===----------------------------------------------------------------------===//
00023 
00024 #include "llvm/Analysis/LoadValueNumbering.h"
00025 #include "llvm/Constant.h"
00026 #include "llvm/Function.h"
00027 #include "llvm/Instructions.h"
00028 #include "llvm/Pass.h"
00029 #include "llvm/Type.h"
00030 #include "llvm/Analysis/ValueNumbering.h"
00031 #include "llvm/Analysis/AliasAnalysis.h"
00032 #include "llvm/Analysis/Dominators.h"
00033 #include "llvm/Support/CFG.h"
00034 #include "llvm/Target/TargetData.h"
00035 #include <set>
00036 #include <algorithm>
00037 using namespace llvm;
00038 
00039 namespace {
00040   // FIXME: This should not be a FunctionPass.
00041   struct LoadVN : public FunctionPass, public ValueNumbering {
00042     
00043     /// Pass Implementation stuff.  This doesn't do any analysis.
00044     ///
00045     bool runOnFunction(Function &) { return false; }
00046     
00047     /// getAnalysisUsage - Does not modify anything.  It uses Value Numbering
00048     /// and Alias Analysis.
00049     ///
00050     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
00051     
00052     /// getEqualNumberNodes - Return nodes with the same value number as the
00053     /// specified Value.  This fills in the argument vector with any equal
00054     /// values.
00055     ///
00056     virtual void getEqualNumberNodes(Value *V1,
00057                                      std::vector<Value*> &RetVals) const;
00058 
00059     /// deleteValue - This method should be called whenever an LLVM Value is
00060     /// deleted from the program, for example when an instruction is found to be
00061     /// redundant and is eliminated.
00062     ///
00063     virtual void deleteValue(Value *V) {
00064       getAnalysis<AliasAnalysis>().deleteValue(V);
00065     }
00066     
00067     /// copyValue - This method should be used whenever a preexisting value in
00068     /// the program is copied or cloned, introducing a new value.  Note that
00069     /// analysis implementations should tolerate clients that use this method to
00070     /// introduce the same value multiple times: if the analysis already knows
00071     /// about a value, it should ignore the request.
00072     ///
00073     virtual void copyValue(Value *From, Value *To) {
00074       getAnalysis<AliasAnalysis>().copyValue(From, To);
00075     }
00076 
00077     /// getCallEqualNumberNodes - Given a call instruction, find other calls
00078     /// that have the same value number.
00079     void getCallEqualNumberNodes(CallInst *CI,
00080                                  std::vector<Value*> &RetVals) const;
00081   };
00082 
00083   // Register this pass...
00084   RegisterOpt<LoadVN> X("load-vn", "Load Value Numbering");
00085 
00086   // Declare that we implement the ValueNumbering interface
00087   RegisterAnalysisGroup<ValueNumbering, LoadVN> Y;
00088 }
00089 
00090 FunctionPass *llvm::createLoadValueNumberingPass() { return new LoadVN(); }
00091 
00092 
00093 /// getAnalysisUsage - Does not modify anything.  It uses Value Numbering and
00094 /// Alias Analysis.
00095 ///
00096 void LoadVN::getAnalysisUsage(AnalysisUsage &AU) const {
00097   AU.setPreservesAll();
00098   AU.addRequired<AliasAnalysis>();
00099   AU.addRequired<ValueNumbering>();
00100   AU.addRequired<DominatorSet>();
00101   AU.addRequired<TargetData>();
00102 }
00103 
00104 static bool isPathTransparentTo(BasicBlock *CurBlock, BasicBlock *Dom,
00105                                 Value *Ptr, unsigned Size, AliasAnalysis &AA,
00106                                 std::set<BasicBlock*> &Visited,
00107                                 std::map<BasicBlock*, bool> &TransparentBlocks){
00108   // If we have already checked out this path, or if we reached our destination,
00109   // stop searching, returning success.
00110   if (CurBlock == Dom || !Visited.insert(CurBlock).second)
00111     return true;
00112   
00113   // Check whether this block is known transparent or not.
00114   std::map<BasicBlock*, bool>::iterator TBI =
00115     TransparentBlocks.lower_bound(CurBlock);
00116 
00117   if (TBI == TransparentBlocks.end() || TBI->first != CurBlock) {
00118     // If this basic block can modify the memory location, then the path is not
00119     // transparent!
00120     if (AA.canBasicBlockModify(*CurBlock, Ptr, Size)) {
00121       TransparentBlocks.insert(TBI, std::make_pair(CurBlock, false));
00122       return false;
00123     }
00124     TransparentBlocks.insert(TBI, std::make_pair(CurBlock, true));
00125   } else if (!TBI->second)
00126     // This block is known non-transparent, so that path can't be either.
00127     return false;
00128   
00129   // The current block is known to be transparent.  The entire path is
00130   // transparent if all of the predecessors paths to the parent is also
00131   // transparent to the memory location.
00132   for (pred_iterator PI = pred_begin(CurBlock), E = pred_end(CurBlock);
00133        PI != E; ++PI)
00134     if (!isPathTransparentTo(*PI, Dom, Ptr, Size, AA, Visited,
00135                              TransparentBlocks))
00136       return false;
00137   return true;
00138 }
00139 
00140 /// getCallEqualNumberNodes - Given a call instruction, find other calls that
00141 /// have the same value number.
00142 void LoadVN::getCallEqualNumberNodes(CallInst *CI,
00143                                      std::vector<Value*> &RetVals) const {
00144   Function *CF = CI->getCalledFunction();
00145   if (CF == 0) return;  // Indirect call.
00146   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
00147   if (!AA.onlyReadsMemory(CF)) return;  // Nothing we can do.
00148 
00149   // Scan all of the arguments of the function, looking for one that is not
00150   // global.  In particular, we would prefer to have an argument or instruction
00151   // operand to chase the def-use chains of.
00152   Value *Op = CF;
00153   for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
00154     if (isa<Argument>(CI->getOperand(i)) ||
00155         isa<Instruction>(CI->getOperand(i))) {
00156       Op = CI->getOperand(i);
00157       break;
00158     }
00159 
00160   // Identify all lexically identical calls in this function.
00161   std::vector<CallInst*> IdenticalCalls;
00162 
00163   Function *CIFunc = CI->getParent()->getParent();
00164   for (Value::use_iterator UI = Op->use_begin(), E = Op->use_end(); UI != E;
00165        ++UI)
00166     if (CallInst *C = dyn_cast<CallInst>(*UI))
00167       if (C->getNumOperands() == CI->getNumOperands() &&
00168           C->getOperand(0) == CI->getOperand(0) &&
00169           C->getParent()->getParent() == CIFunc && C != CI) {
00170         bool AllOperandsEqual = true;
00171         for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
00172           if (C->getOperand(i) != CI->getOperand(i)) {
00173             AllOperandsEqual = false;
00174             break;
00175           }
00176 
00177         if (AllOperandsEqual)
00178           IdenticalCalls.push_back(C);
00179       }
00180   
00181   if (IdenticalCalls.empty()) return;
00182 
00183   // Eliminate duplicates, which could occur if we chose a value that is passed
00184   // into a call site multiple times.
00185   std::sort(IdenticalCalls.begin(), IdenticalCalls.end());
00186   IdenticalCalls.erase(std::unique(IdenticalCalls.begin(),IdenticalCalls.end()),
00187                        IdenticalCalls.end());
00188 
00189   // If the call reads memory, we must make sure that there are no stores
00190   // between the calls in question.
00191   //
00192   // FIXME: This should use mod/ref information.  What we really care about it
00193   // whether an intervening instruction could modify memory that is read, not
00194   // ANY memory.
00195   //
00196   if (!AA.doesNotAccessMemory(CF)) {
00197     DominatorSet &DomSetInfo = getAnalysis<DominatorSet>();
00198     BasicBlock *CIBB = CI->getParent();
00199     for (unsigned i = 0; i != IdenticalCalls.size(); ++i) {
00200       CallInst *C = IdenticalCalls[i];
00201       bool CantEqual = false;
00202 
00203       if (DomSetInfo.dominates(CIBB, C->getParent())) {
00204         // FIXME: we currently only handle the case where both calls are in the
00205         // same basic block.
00206         if (CIBB != C->getParent()) {
00207           CantEqual = true;
00208         } else {
00209           Instruction *First = CI, *Second = C;
00210           if (!DomSetInfo.dominates(CI, C))
00211             std::swap(First, Second);
00212           
00213           // Scan the instructions between the calls, checking for stores or
00214           // calls to dangerous functions.
00215           BasicBlock::iterator I = First;
00216           for (++First; I != BasicBlock::iterator(Second); ++I) {
00217             if (isa<StoreInst>(I)) {
00218               // FIXME: We could use mod/ref information to make this much
00219               // better!
00220               CantEqual = true;
00221               break;
00222             } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
00223               if (CI->getCalledFunction() == 0 ||
00224                   !AA.onlyReadsMemory(CI->getCalledFunction())) {
00225                 CantEqual = true;
00226                 break;
00227               }
00228             } else if (I->mayWriteToMemory()) {
00229               CantEqual = true;
00230               break;
00231             }
00232           }
00233         }
00234 
00235       } else if (DomSetInfo.dominates(C->getParent(), CIBB)) {
00236         // FIXME: We could implement this, but we don't for now.
00237         CantEqual = true;
00238       } else {
00239         // FIXME: if one doesn't dominate the other, we can't tell yet.
00240         CantEqual = true;
00241       }
00242 
00243 
00244       if (CantEqual) {
00245         // This call does not produce the same value as the one in the query.
00246         std::swap(IdenticalCalls[i--], IdenticalCalls.back());
00247         IdenticalCalls.pop_back();
00248       }
00249     }
00250   }
00251 
00252   // Any calls that are identical and not destroyed will produce equal values!
00253   for (unsigned i = 0, e = IdenticalCalls.size(); i != e; ++i)
00254     RetVals.push_back(IdenticalCalls[i]);
00255 }
00256 
00257 // getEqualNumberNodes - Return nodes with the same value number as the
00258 // specified Value.  This fills in the argument vector with any equal values.
00259 //
00260 void LoadVN::getEqualNumberNodes(Value *V,
00261                                  std::vector<Value*> &RetVals) const {
00262   // If the alias analysis has any must alias information to share with us, we
00263   // can definitely use it.
00264   if (isa<PointerType>(V->getType()))
00265     getAnalysis<AliasAnalysis>().getMustAliases(V, RetVals);
00266 
00267   if (!isa<LoadInst>(V)) {
00268     if (CallInst *CI = dyn_cast<CallInst>(V))
00269       getCallEqualNumberNodes(CI, RetVals);
00270 
00271     // Not a load instruction?  Just chain to the base value numbering
00272     // implementation to satisfy the request...
00273     assert(&getAnalysis<ValueNumbering>() != (ValueNumbering*)this &&
00274            "getAnalysis() returned this!");
00275 
00276     return getAnalysis<ValueNumbering>().getEqualNumberNodes(V, RetVals);
00277   }
00278 
00279   // Volatile loads cannot be replaced with the value of other loads.
00280   LoadInst *LI = cast<LoadInst>(V);
00281   if (LI->isVolatile())
00282     return getAnalysis<ValueNumbering>().getEqualNumberNodes(V, RetVals);
00283   
00284   // If we have a load instruction, find all of the load and store instructions
00285   // that use the same source operand.  We implement this recursively, because
00286   // there could be a load of a load of a load that are all identical.  We are
00287   // guaranteed that this cannot be an infinite recursion because load
00288   // instructions would have to pass through a PHI node in order for there to be
00289   // a cycle.  The PHI node would be handled by the else case here, breaking the
00290   // infinite recursion.
00291   //
00292   std::vector<Value*> PointerSources;
00293   getEqualNumberNodes(LI->getOperand(0), PointerSources);
00294   PointerSources.push_back(LI->getOperand(0));
00295   
00296   BasicBlock *LoadBB = LI->getParent();
00297   Function *F = LoadBB->getParent();
00298   
00299   // Now that we know the set of equivalent source pointers for the load
00300   // instruction, look to see if there are any load or store candidates that are
00301   // identical.
00302   //
00303   std::map<BasicBlock*, std::vector<LoadInst*> >  CandidateLoads;
00304   std::map<BasicBlock*, std::vector<StoreInst*> > CandidateStores;
00305   std::set<AllocationInst*> Allocations;
00306   
00307   while (!PointerSources.empty()) {
00308     Value *Source = PointerSources.back();
00309     PointerSources.pop_back();                // Get a source pointer...
00310 
00311     if (AllocationInst *AI = dyn_cast<AllocationInst>(Source))
00312       Allocations.insert(AI);
00313     
00314     for (Value::use_iterator UI = Source->use_begin(), UE = Source->use_end();
00315          UI != UE; ++UI)
00316       if (LoadInst *Cand = dyn_cast<LoadInst>(*UI)) {// Is a load of source?
00317         if (Cand->getParent()->getParent() == F &&   // In the same function?
00318             Cand != LI && !Cand->isVolatile())       // Not LI itself?
00319           CandidateLoads[Cand->getParent()].push_back(Cand);     // Got one...
00320       } else if (StoreInst *Cand = dyn_cast<StoreInst>(*UI)) {
00321         if (Cand->getParent()->getParent() == F && !Cand->isVolatile() &&
00322             Cand->getOperand(1) == Source)  // It's a store THROUGH the ptr...
00323           CandidateStores[Cand->getParent()].push_back(Cand);
00324       }
00325   }
00326   
00327   // Get alias analysis & dominators.
00328   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
00329   DominatorSet &DomSetInfo = getAnalysis<DominatorSet>();
00330   Value *LoadPtr = LI->getOperand(0);
00331   // Find out how many bytes of memory are loaded by the load instruction...
00332   unsigned LoadSize = getAnalysis<TargetData>().getTypeSize(LI->getType());
00333 
00334   // Find all of the candidate loads and stores that are in the same block as
00335   // the defining instruction.
00336   std::set<Instruction*> Instrs;
00337   Instrs.insert(CandidateLoads[LoadBB].begin(), CandidateLoads[LoadBB].end());
00338   CandidateLoads.erase(LoadBB);
00339   Instrs.insert(CandidateStores[LoadBB].begin(), CandidateStores[LoadBB].end());
00340   CandidateStores.erase(LoadBB);
00341 
00342   // Figure out if the load is invalidated from the entry of the block it is in
00343   // until the actual instruction.  This scans the block backwards from LI.  If
00344   // we see any candidate load or store instructions, then we know that the
00345   // candidates have the same value # as LI.
00346   bool LoadInvalidatedInBBBefore = false;
00347   for (BasicBlock::iterator I = LI; I != LoadBB->begin(); ) {
00348     --I;
00349     // If this instruction is a candidate load before LI, we know there are no
00350     // invalidating instructions between it and LI, so they have the same value
00351     // number.
00352     if (isa<LoadInst>(I) && Instrs.count(I)) {
00353       RetVals.push_back(I);
00354       Instrs.erase(I);
00355     } else if (AllocationInst *AI = dyn_cast<AllocationInst>(I)) {
00356       // If we run into an allocation of the value being loaded, then the
00357       // contenxt are not initialized.  We can return any value, so we will
00358       // return a zero.
00359       if (Allocations.count(AI)) {
00360         LoadInvalidatedInBBBefore = true;
00361         RetVals.push_back(Constant::getNullValue(LI->getType()));
00362         break;
00363       }
00364     }
00365 
00366     if (AA.getModRefInfo(I, LoadPtr, LoadSize) & AliasAnalysis::Mod) {
00367       // If the invalidating instruction is a store, and its in our candidate
00368       // set, then we can do store-load forwarding: the load has the same value
00369       // # as the stored value.
00370       if (isa<StoreInst>(I) && Instrs.count(I)) {
00371         Instrs.erase(I);
00372         RetVals.push_back(I->getOperand(0));
00373       }
00374 
00375       LoadInvalidatedInBBBefore = true;
00376       break;
00377     }
00378   }
00379 
00380   // Figure out if the load is invalidated between the load and the exit of the
00381   // block it is defined in.  While we are scanning the current basic block, if
00382   // we see any candidate loads, then we know they have the same value # as LI.
00383   //
00384   bool LoadInvalidatedInBBAfter = false;
00385   for (BasicBlock::iterator I = LI->getNext(); I != LoadBB->end(); ++I) {
00386     // If this instruction is a load, then this instruction returns the same
00387     // value as LI.
00388     if (isa<LoadInst>(I) && Instrs.count(I)) {
00389       RetVals.push_back(I);
00390       Instrs.erase(I);
00391     }
00392 
00393     if (AA.getModRefInfo(I, LoadPtr, LoadSize) & AliasAnalysis::Mod) {
00394       LoadInvalidatedInBBAfter = true;
00395       break;
00396     }
00397   }
00398 
00399   // If there is anything left in the Instrs set, it could not possibly equal
00400   // LI.
00401   Instrs.clear();
00402 
00403   // TransparentBlocks - For each basic block the load/store is alive across,
00404   // figure out if the pointer is invalidated or not.  If it is invalidated, the
00405   // boolean is set to false, if it's not it is set to true.  If we don't know
00406   // yet, the entry is not in the map.
00407   std::map<BasicBlock*, bool> TransparentBlocks;
00408 
00409   // Loop over all of the basic blocks that also load the value.  If the value
00410   // is live across the CFG from the source to destination blocks, and if the
00411   // value is not invalidated in either the source or destination blocks, add it
00412   // to the equivalence sets.
00413   for (std::map<BasicBlock*, std::vector<LoadInst*> >::iterator
00414          I = CandidateLoads.begin(), E = CandidateLoads.end(); I != E; ++I) {
00415     bool CantEqual = false;
00416 
00417     // Right now we only can handle cases where one load dominates the other.
00418     // FIXME: generalize this!
00419     BasicBlock *BB1 = I->first, *BB2 = LoadBB;
00420     if (DomSetInfo.dominates(BB1, BB2)) {
00421       // The other load dominates LI.  If the loaded value is killed entering
00422       // the LoadBB block, we know the load is not live.
00423       if (LoadInvalidatedInBBBefore)
00424         CantEqual = true;
00425     } else if (DomSetInfo.dominates(BB2, BB1)) {
00426       std::swap(BB1, BB2);          // Canonicalize
00427       // LI dominates the other load.  If the loaded value is killed exiting
00428       // the LoadBB block, we know the load is not live.
00429       if (LoadInvalidatedInBBAfter)
00430         CantEqual = true;
00431     } else {
00432       // None of these loads can VN the same.
00433       CantEqual = true;
00434     }
00435 
00436     if (!CantEqual) {
00437       // Ok, at this point, we know that BB1 dominates BB2, and that there is
00438       // nothing in the LI block that kills the loaded value.  Check to see if
00439       // the value is live across the CFG.
00440       std::set<BasicBlock*> Visited;
00441       for (pred_iterator PI = pred_begin(BB2), E = pred_end(BB2); PI!=E; ++PI)
00442         if (!isPathTransparentTo(*PI, BB1, LoadPtr, LoadSize, AA,
00443                                  Visited, TransparentBlocks)) {
00444           // None of these loads can VN the same.
00445           CantEqual = true;
00446           break;
00447         }
00448     }
00449 
00450     // If the loads can equal so far, scan the basic block that contains the
00451     // loads under consideration to see if they are invalidated in the block.
00452     // For any loads that are not invalidated, add them to the equivalence
00453     // set!
00454     if (!CantEqual) {
00455       Instrs.insert(I->second.begin(), I->second.end());
00456       if (BB1 == LoadBB) {
00457         // If LI dominates the block in question, check to see if any of the
00458         // loads in this block are invalidated before they are reached.
00459         for (BasicBlock::iterator BBI = I->first->begin(); ; ++BBI) {
00460           if (isa<LoadInst>(BBI) && Instrs.count(BBI)) {
00461             // The load is in the set!
00462             RetVals.push_back(BBI);
00463             Instrs.erase(BBI);
00464             if (Instrs.empty()) break;
00465           } else if (AA.getModRefInfo(BBI, LoadPtr, LoadSize)
00466                              & AliasAnalysis::Mod) {
00467             // If there is a modifying instruction, nothing below it will value
00468             // # the same.
00469             break;
00470           }
00471         }
00472       } else {
00473         // If the block dominates LI, make sure that the loads in the block are
00474         // not invalidated before the block ends.
00475         BasicBlock::iterator BBI = I->first->end();
00476         while (1) {
00477           --BBI;
00478           if (isa<LoadInst>(BBI) && Instrs.count(BBI)) {
00479             // The load is in the set!
00480             RetVals.push_back(BBI);
00481             Instrs.erase(BBI);
00482             if (Instrs.empty()) break;
00483           } else if (AA.getModRefInfo(BBI, LoadPtr, LoadSize)
00484                              & AliasAnalysis::Mod) {
00485             // If there is a modifying instruction, nothing above it will value
00486             // # the same.
00487             break;
00488           }
00489         }
00490       }
00491 
00492       Instrs.clear();
00493     }
00494   }
00495 
00496   // Handle candidate stores.  If the loaded location is clobbered on entrance
00497   // to the LoadBB, no store outside of the LoadBB can value number equal, so
00498   // quick exit.
00499   if (LoadInvalidatedInBBBefore)
00500     return;
00501 
00502   for (std::map<BasicBlock*, std::vector<StoreInst*> >::iterator
00503          I = CandidateStores.begin(), E = CandidateStores.end(); I != E; ++I)
00504     if (DomSetInfo.dominates(I->first, LoadBB)) {
00505       // Check to see if the path from the store to the load is transparent
00506       // w.r.t. the memory location.
00507       bool CantEqual = false;
00508       std::set<BasicBlock*> Visited;
00509       for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
00510            PI != E; ++PI)
00511         if (!isPathTransparentTo(*PI, I->first, LoadPtr, LoadSize, AA,
00512                                  Visited, TransparentBlocks)) {
00513           // None of these stores can VN the same.
00514           CantEqual = true;
00515           break;
00516         }
00517       Visited.clear();
00518       if (!CantEqual) {
00519         // Okay, the path from the store block to the load block is clear, and
00520         // we know that there are no invalidating instructions from the start
00521         // of the load block to the load itself.  Now we just scan the store
00522         // block.
00523 
00524         BasicBlock::iterator BBI = I->first->end();
00525         while (1) {
00526           assert(BBI != I->first->begin() &&
00527                  "There is a store in this block of the pointer, but the store"
00528                  " doesn't mod the address being stored to??  Must be a bug in"
00529                  " the alias analysis implementation!");
00530           --BBI;
00531           if (AA.getModRefInfo(BBI, LoadPtr, LoadSize) & AliasAnalysis::Mod) {
00532             // If the invalidating instruction is one of the candidates,
00533             // then it provides the value the load loads.
00534             if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
00535               if (std::find(I->second.begin(), I->second.end(), SI) !=
00536                   I->second.end())
00537                 RetVals.push_back(SI->getOperand(0));
00538             break;
00539           }
00540         }
00541       }
00542     }
00543 }