LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

TailDuplication.cpp

Go to the documentation of this file.
00001 //===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
00002 // 
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 // 
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass performs a limited form of tail duplication, intended to simplify
00011 // CFGs by removing some unconditional branches.  This pass is necessary to
00012 // straighten out loops created by the C front-end, but also is capable of
00013 // making other code nicer.  After this pass is run, the CFG simplify pass
00014 // should be run to clean up the mess.
00015 //
00016 // This pass could be enhanced in the future to use profile information to be
00017 // more aggressive.
00018 //
00019 //===----------------------------------------------------------------------===//
00020 
00021 #include "llvm/Transforms/Scalar.h"
00022 #include "llvm/Constant.h"
00023 #include "llvm/Function.h"
00024 #include "llvm/Instructions.h"
00025 #include "llvm/IntrinsicInst.h"
00026 #include "llvm/Pass.h"
00027 #include "llvm/Type.h"
00028 #include "llvm/Support/CFG.h"
00029 #include "llvm/Transforms/Utils/Local.h"
00030 #include "llvm/Support/CommandLine.h"
00031 #include "llvm/Support/Debug.h"
00032 #include "llvm/ADT/Statistic.h"
00033 using namespace llvm;
00034 
00035 namespace {
00036   cl::opt<unsigned>
00037   Threshold("taildup-threshold", cl::desc("Max block size to tail duplicate"),
00038             cl::init(6), cl::Hidden);
00039   Statistic<> NumEliminated("tailduplicate",
00040                             "Number of unconditional branches eliminated");
00041   Statistic<> NumPHINodes("tailduplicate", "Number of phi nodes inserted");
00042 
00043   class TailDup : public FunctionPass {
00044     bool runOnFunction(Function &F);
00045   private:
00046     inline bool shouldEliminateUnconditionalBranch(TerminatorInst *TI);
00047     inline void eliminateUnconditionalBranch(BranchInst *BI);
00048   };
00049   RegisterOpt<TailDup> X("tailduplicate", "Tail Duplication");
00050 }
00051 
00052 // Public interface to the Tail Duplication pass
00053 FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
00054 
00055 /// runOnFunction - Top level algorithm - Loop over each unconditional branch in
00056 /// the function, eliminating it if it looks attractive enough.
00057 ///
00058 bool TailDup::runOnFunction(Function &F) {
00059   bool Changed = false;
00060   for (Function::iterator I = F.begin(), E = F.end(); I != E; )
00061     if (shouldEliminateUnconditionalBranch(I->getTerminator())) {
00062       eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
00063       Changed = true;
00064     } else {
00065       ++I;
00066     }
00067   return Changed;
00068 }
00069 
00070 /// shouldEliminateUnconditionalBranch - Return true if this branch looks
00071 /// attractive to eliminate.  We eliminate the branch if the destination basic
00072 /// block has <= 5 instructions in it, not counting PHI nodes.  In practice,
00073 /// since one of these is a terminator instruction, this means that we will add
00074 /// up to 4 instructions to the new block.
00075 ///
00076 /// We don't count PHI nodes in the count since they will be removed when the
00077 /// contents of the block are copied over.
00078 ///
00079 bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI) {
00080   BranchInst *BI = dyn_cast<BranchInst>(TI);
00081   if (!BI || !BI->isUnconditional()) return false;  // Not an uncond branch!
00082 
00083   BasicBlock *Dest = BI->getSuccessor(0);
00084   if (Dest == BI->getParent()) return false;        // Do not loop infinitely!
00085 
00086   // Do not inline a block if we will just get another branch to the same block!
00087   TerminatorInst *DTI = Dest->getTerminator();
00088   if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
00089     if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
00090       return false;                                 // Do not loop infinitely!
00091 
00092   // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
00093   // because doing so would require breaking critical edges.  This should be
00094   // fixed eventually.
00095   if (!DTI->use_empty())
00096     return false;
00097 
00098   // Do not bother working on dead blocks...
00099   pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
00100   if (PI == PE && Dest != Dest->getParent()->begin())
00101     return false;   // It's just a dead block, ignore it...
00102 
00103   // Also, do not bother with blocks with only a single predecessor: simplify
00104   // CFG will fold these two blocks together!
00105   ++PI;
00106   if (PI == PE) return false;  // Exactly one predecessor!
00107 
00108   BasicBlock::iterator I = Dest->begin();
00109   while (isa<PHINode>(*I)) ++I;
00110 
00111   for (unsigned Size = 0; I != Dest->end(); ++I) {
00112     if (Size == Threshold) return false;  // The block is too large.
00113     // Only count instructions that are not debugger intrinsics.
00114     if (!isa<DbgInfoIntrinsic>(I)) ++Size;
00115   }
00116 
00117   // Do not tail duplicate a block that has thousands of successors into a block
00118   // with a single successor if the block has many other predecessors.  This can
00119   // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
00120   // cases that have a large number of indirect gotos.
00121   unsigned NumSuccs = DTI->getNumSuccessors();
00122   if (NumSuccs > 8) {
00123     unsigned TooMany = 128;
00124     if (NumSuccs >= TooMany) return false;
00125     TooMany = TooMany/NumSuccs;
00126     for (; PI != PE; ++PI)
00127       if (TooMany-- == 0) return false;
00128   }
00129 
00130   return true;  
00131 }
00132 
00133 /// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
00134 /// DestBlock, and that SrcBlock is not the only predecessor of DstBlock.  If we
00135 /// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
00136 /// DstBlock, return it.
00137 static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
00138                                           BasicBlock *DstBlock) {
00139   // SrcBlock must have a single predecessor.
00140   pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
00141   if (PI == PE || ++PI != PE) return 0;
00142 
00143   BasicBlock *SrcPred = *pred_begin(SrcBlock);
00144   
00145   // Look at the predecessors of DstBlock.  One of them will be SrcBlock.  If
00146   // there is only one other pred, get it, otherwise we can't handle it.
00147   PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
00148   BasicBlock *DstOtherPred = 0;
00149   if (*PI == SrcBlock) {
00150     if (++PI == PE) return 0;
00151     DstOtherPred = *PI;
00152     if (++PI != PE) return 0;
00153   } else {
00154     DstOtherPred = *PI;
00155     if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
00156   }
00157 
00158   // We can handle two situations here: "if then" and "if then else" blocks.  An
00159   // 'if then' situation is just where DstOtherPred == SrcPred.
00160   if (DstOtherPred == SrcPred)
00161     return SrcPred;
00162 
00163   // Check to see if we have an "if then else" situation, which means that
00164   // DstOtherPred will have a single predecessor and it will be SrcPred.
00165   PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
00166   if (PI != PE && *PI == SrcPred) {
00167     if (++PI != PE) return 0;  // Not a single pred.
00168     return SrcPred;  // Otherwise, it's an "if then" situation.  Return the if.
00169   }
00170 
00171   // Otherwise, this is something we can't handle.
00172   return 0;
00173 }
00174 
00175 
00176 /// eliminateUnconditionalBranch - Clone the instructions from the destination
00177 /// block into the source block, eliminating the specified unconditional branch.
00178 /// If the destination block defines values used by successors of the dest
00179 /// block, we may need to insert PHI nodes.
00180 ///
00181 void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
00182   BasicBlock *SourceBlock = Branch->getParent();
00183   BasicBlock *DestBlock = Branch->getSuccessor(0);
00184   assert(SourceBlock != DestBlock && "Our predicate is broken!");
00185 
00186   DEBUG(std::cerr << "TailDuplication[" << SourceBlock->getParent()->getName()
00187                   << "]: Eliminating branch: " << *Branch);
00188 
00189   // See if we can avoid duplicating code by moving it up to a dominator of both
00190   // blocks.
00191   if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
00192     DEBUG(std::cerr << "Found shared dominator: " << DomBlock->getName()
00193                     << "\n");
00194 
00195     // If there are non-phi instructions in DestBlock that have no operands
00196     // defined in DestBlock, and if the instruction has no side effects, we can
00197     // move the instruction to DomBlock instead of duplicating it.
00198     BasicBlock::iterator BBI = DestBlock->begin();
00199     while (isa<PHINode>(BBI)) ++BBI;
00200     while (!isa<TerminatorInst>(BBI)) {
00201       Instruction *I = BBI++;
00202       
00203       bool CanHoist = !I->isTrapping() && !I->mayWriteToMemory();
00204       if (CanHoist) {
00205         for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
00206           if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
00207             if (OpI->getParent() == DestBlock ||
00208                 (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
00209               CanHoist = false;
00210               break;
00211             }
00212         if (CanHoist) {
00213           // Remove from DestBlock, move right before the term in DomBlock.
00214           DestBlock->getInstList().remove(I);
00215           DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
00216           DEBUG(std::cerr << "Hoisted: " << *I);
00217         }
00218       }
00219     }
00220   }
00221 
00222   // Tail duplication can not update SSA properties correctly if the values
00223   // defined in the duplicated tail are used outside of the tail itself.  For
00224   // this reason, we spill all values that are used outside of the tail to the
00225   // stack.
00226   for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
00227     for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
00228          ++UI) {
00229       bool ShouldDemote = false;
00230       if (cast<Instruction>(*UI)->getParent() != DestBlock) {
00231         // We must allow our successors to use tail values in their PHI nodes
00232         // (if the incoming value corresponds to the tail block).
00233         if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
00234           for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00235             if (PN->getIncomingValue(i) == I &&
00236                 PN->getIncomingBlock(i) != DestBlock) {
00237               ShouldDemote = true;
00238               break;
00239             }
00240 
00241         } else {
00242           ShouldDemote = true;
00243         }
00244       } else if (PHINode *PN = dyn_cast<PHINode>(cast<Instruction>(*UI))) {
00245         // If the user of this instruction is a PHI node in the current block,
00246         // which has an entry from another block using the value, spill it.
00247         for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00248           if (PN->getIncomingValue(i) == I &&
00249               PN->getIncomingBlock(i) != DestBlock) {
00250             ShouldDemote = true;
00251             break;
00252           }
00253       }
00254 
00255       if (ShouldDemote) {
00256         // We found a use outside of the tail.  Create a new stack slot to
00257         // break this inter-block usage pattern.
00258         DemoteRegToStack(*I);
00259         break;
00260       }
00261     }
00262 
00263   // We are going to have to map operands from the original block B to the new
00264   // copy of the block B'.  If there are PHI nodes in the DestBlock, these PHI
00265   // nodes also define part of this mapping.  Loop over these PHI nodes, adding
00266   // them to our mapping.
00267   //
00268   std::map<Value*, Value*> ValueMapping;
00269 
00270   BasicBlock::iterator BI = DestBlock->begin();
00271   bool HadPHINodes = isa<PHINode>(BI);
00272   for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
00273     ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
00274 
00275   // Clone the non-phi instructions of the dest block into the source block,
00276   // keeping track of the mapping...
00277   //
00278   for (; BI != DestBlock->end(); ++BI) {
00279     Instruction *New = BI->clone();
00280     New->setName(BI->getName());
00281     SourceBlock->getInstList().push_back(New);
00282     ValueMapping[BI] = New;
00283   }
00284 
00285   // Now that we have built the mapping information and cloned all of the
00286   // instructions (giving us a new terminator, among other things), walk the new
00287   // instructions, rewriting references of old instructions to use new
00288   // instructions.
00289   //
00290   BI = Branch; ++BI;  // Get an iterator to the first new instruction
00291   for (; BI != SourceBlock->end(); ++BI)
00292     for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i)
00293       if (Value *Remapped = ValueMapping[BI->getOperand(i)])
00294         BI->setOperand(i, Remapped);
00295 
00296   // Next we check to see if any of the successors of DestBlock had PHI nodes.
00297   // If so, we need to add entries to the PHI nodes for SourceBlock now.
00298   for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
00299        SI != SE; ++SI) {
00300     BasicBlock *Succ = *SI;
00301     for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
00302       PHINode *PN = cast<PHINode>(PNI);
00303       // Ok, we have a PHI node.  Figure out what the incoming value was for the
00304       // DestBlock.
00305       Value *IV = PN->getIncomingValueForBlock(DestBlock);
00306       
00307       // Remap the value if necessary...
00308       if (Value *MappedIV = ValueMapping[IV])
00309         IV = MappedIV;
00310       PN->addIncoming(IV, SourceBlock);
00311     }
00312   }
00313 
00314   // Next, remove the old branch instruction, and any PHI node entries that we
00315   // had.
00316   BI = Branch; ++BI;  // Get an iterator to the first new instruction
00317   DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
00318   SourceBlock->getInstList().erase(Branch);  // Destroy the uncond branch...
00319 
00320   // Final step: now that we have finished everything up, walk the cloned
00321   // instructions one last time, constant propagating and DCE'ing them, because
00322   // they may not be needed anymore.
00323   //
00324   if (HadPHINodes)
00325     while (BI != SourceBlock->end())
00326       if (!dceInstruction(BI) && !doConstantPropagation(BI))
00327         ++BI;
00328 
00329   ++NumEliminated;  // We just killed a branch!
00330 }