LLVM API Documentation
00001 //===- TailDuplication.cpp - Simplify CFG through tail duplication --------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass performs a limited form of tail duplication, intended to simplify 00011 // CFGs by removing some unconditional branches. This pass is necessary to 00012 // straighten out loops created by the C front-end, but also is capable of 00013 // making other code nicer. After this pass is run, the CFG simplify pass 00014 // should be run to clean up the mess. 00015 // 00016 // This pass could be enhanced in the future to use profile information to be 00017 // more aggressive. 00018 // 00019 //===----------------------------------------------------------------------===// 00020 00021 #include "llvm/Transforms/Scalar.h" 00022 #include "llvm/Constant.h" 00023 #include "llvm/Function.h" 00024 #include "llvm/Instructions.h" 00025 #include "llvm/IntrinsicInst.h" 00026 #include "llvm/Pass.h" 00027 #include "llvm/Type.h" 00028 #include "llvm/Support/CFG.h" 00029 #include "llvm/Transforms/Utils/Local.h" 00030 #include "llvm/Support/CommandLine.h" 00031 #include "llvm/Support/Debug.h" 00032 #include "llvm/ADT/Statistic.h" 00033 #include <iostream> 00034 using namespace llvm; 00035 00036 namespace { 00037 cl::opt<unsigned> 00038 Threshold("taildup-threshold", cl::desc("Max block size to tail duplicate"), 00039 cl::init(6), cl::Hidden); 00040 Statistic<> NumEliminated("tailduplicate", 00041 "Number of unconditional branches eliminated"); 00042 Statistic<> NumPHINodes("tailduplicate", "Number of phi nodes inserted"); 00043 00044 class TailDup : public FunctionPass { 00045 bool runOnFunction(Function &F); 00046 private: 00047 inline bool shouldEliminateUnconditionalBranch(TerminatorInst *TI); 00048 inline void eliminateUnconditionalBranch(BranchInst *BI); 00049 }; 00050 RegisterOpt<TailDup> X("tailduplicate", "Tail Duplication"); 00051 } 00052 00053 // Public interface to the Tail Duplication pass 00054 FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } 00055 00056 /// runOnFunction - Top level algorithm - Loop over each unconditional branch in 00057 /// the function, eliminating it if it looks attractive enough. 00058 /// 00059 bool TailDup::runOnFunction(Function &F) { 00060 bool Changed = false; 00061 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) 00062 if (shouldEliminateUnconditionalBranch(I->getTerminator())) { 00063 eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator())); 00064 Changed = true; 00065 } else { 00066 ++I; 00067 } 00068 return Changed; 00069 } 00070 00071 /// shouldEliminateUnconditionalBranch - Return true if this branch looks 00072 /// attractive to eliminate. We eliminate the branch if the destination basic 00073 /// block has <= 5 instructions in it, not counting PHI nodes. In practice, 00074 /// since one of these is a terminator instruction, this means that we will add 00075 /// up to 4 instructions to the new block. 00076 /// 00077 /// We don't count PHI nodes in the count since they will be removed when the 00078 /// contents of the block are copied over. 00079 /// 00080 bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI) { 00081 BranchInst *BI = dyn_cast<BranchInst>(TI); 00082 if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch! 00083 00084 BasicBlock *Dest = BI->getSuccessor(0); 00085 if (Dest == BI->getParent()) return false; // Do not loop infinitely! 00086 00087 // Do not inline a block if we will just get another branch to the same block! 00088 TerminatorInst *DTI = Dest->getTerminator(); 00089 if (BranchInst *DBI = dyn_cast<BranchInst>(DTI)) 00090 if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest) 00091 return false; // Do not loop infinitely! 00092 00093 // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack, 00094 // because doing so would require breaking critical edges. This should be 00095 // fixed eventually. 00096 if (!DTI->use_empty()) 00097 return false; 00098 00099 // Do not bother working on dead blocks... 00100 pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest); 00101 if (PI == PE && Dest != Dest->getParent()->begin()) 00102 return false; // It's just a dead block, ignore it... 00103 00104 // Also, do not bother with blocks with only a single predecessor: simplify 00105 // CFG will fold these two blocks together! 00106 ++PI; 00107 if (PI == PE) return false; // Exactly one predecessor! 00108 00109 BasicBlock::iterator I = Dest->begin(); 00110 while (isa<PHINode>(*I)) ++I; 00111 00112 for (unsigned Size = 0; I != Dest->end(); ++I) { 00113 if (Size == Threshold) return false; // The block is too large. 00114 // Only count instructions that are not debugger intrinsics. 00115 if (!isa<DbgInfoIntrinsic>(I)) ++Size; 00116 } 00117 00118 // Do not tail duplicate a block that has thousands of successors into a block 00119 // with a single successor if the block has many other predecessors. This can 00120 // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in 00121 // cases that have a large number of indirect gotos. 00122 unsigned NumSuccs = DTI->getNumSuccessors(); 00123 if (NumSuccs > 8) { 00124 unsigned TooMany = 128; 00125 if (NumSuccs >= TooMany) return false; 00126 TooMany = TooMany/NumSuccs; 00127 for (; PI != PE; ++PI) 00128 if (TooMany-- == 0) return false; 00129 } 00130 00131 return true; 00132 } 00133 00134 /// FindObviousSharedDomOf - We know there is a branch from SrcBlock to 00135 /// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we 00136 /// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and 00137 /// DstBlock, return it. 00138 static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, 00139 BasicBlock *DstBlock) { 00140 // SrcBlock must have a single predecessor. 00141 pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock); 00142 if (PI == PE || ++PI != PE) return 0; 00143 00144 BasicBlock *SrcPred = *pred_begin(SrcBlock); 00145 00146 // Look at the predecessors of DstBlock. One of them will be SrcBlock. If 00147 // there is only one other pred, get it, otherwise we can't handle it. 00148 PI = pred_begin(DstBlock); PE = pred_end(DstBlock); 00149 BasicBlock *DstOtherPred = 0; 00150 if (*PI == SrcBlock) { 00151 if (++PI == PE) return 0; 00152 DstOtherPred = *PI; 00153 if (++PI != PE) return 0; 00154 } else { 00155 DstOtherPred = *PI; 00156 if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; 00157 } 00158 00159 // We can handle two situations here: "if then" and "if then else" blocks. An 00160 // 'if then' situation is just where DstOtherPred == SrcPred. 00161 if (DstOtherPred == SrcPred) 00162 return SrcPred; 00163 00164 // Check to see if we have an "if then else" situation, which means that 00165 // DstOtherPred will have a single predecessor and it will be SrcPred. 00166 PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred); 00167 if (PI != PE && *PI == SrcPred) { 00168 if (++PI != PE) return 0; // Not a single pred. 00169 return SrcPred; // Otherwise, it's an "if then" situation. Return the if. 00170 } 00171 00172 // Otherwise, this is something we can't handle. 00173 return 0; 00174 } 00175 00176 00177 /// eliminateUnconditionalBranch - Clone the instructions from the destination 00178 /// block into the source block, eliminating the specified unconditional branch. 00179 /// If the destination block defines values used by successors of the dest 00180 /// block, we may need to insert PHI nodes. 00181 /// 00182 void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { 00183 BasicBlock *SourceBlock = Branch->getParent(); 00184 BasicBlock *DestBlock = Branch->getSuccessor(0); 00185 assert(SourceBlock != DestBlock && "Our predicate is broken!"); 00186 00187 DEBUG(std::cerr << "TailDuplication[" << SourceBlock->getParent()->getName() 00188 << "]: Eliminating branch: " << *Branch); 00189 00190 // See if we can avoid duplicating code by moving it up to a dominator of both 00191 // blocks. 00192 if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { 00193 DEBUG(std::cerr << "Found shared dominator: " << DomBlock->getName() 00194 << "\n"); 00195 00196 // If there are non-phi instructions in DestBlock that have no operands 00197 // defined in DestBlock, and if the instruction has no side effects, we can 00198 // move the instruction to DomBlock instead of duplicating it. 00199 BasicBlock::iterator BBI = DestBlock->begin(); 00200 while (isa<PHINode>(BBI)) ++BBI; 00201 while (!isa<TerminatorInst>(BBI)) { 00202 Instruction *I = BBI++; 00203 00204 bool CanHoist = !I->isTrapping() && !I->mayWriteToMemory(); 00205 if (CanHoist) { 00206 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) 00207 if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op))) 00208 if (OpI->getParent() == DestBlock || 00209 (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) { 00210 CanHoist = false; 00211 break; 00212 } 00213 if (CanHoist) { 00214 // Remove from DestBlock, move right before the term in DomBlock. 00215 DestBlock->getInstList().remove(I); 00216 DomBlock->getInstList().insert(DomBlock->getTerminator(), I); 00217 DEBUG(std::cerr << "Hoisted: " << *I); 00218 } 00219 } 00220 } 00221 } 00222 00223 // Tail duplication can not update SSA properties correctly if the values 00224 // defined in the duplicated tail are used outside of the tail itself. For 00225 // this reason, we spill all values that are used outside of the tail to the 00226 // stack. 00227 for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I) 00228 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; 00229 ++UI) { 00230 bool ShouldDemote = false; 00231 if (cast<Instruction>(*UI)->getParent() != DestBlock) { 00232 // We must allow our successors to use tail values in their PHI nodes 00233 // (if the incoming value corresponds to the tail block). 00234 if (PHINode *PN = dyn_cast<PHINode>(*UI)) { 00235 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 00236 if (PN->getIncomingValue(i) == I && 00237 PN->getIncomingBlock(i) != DestBlock) { 00238 ShouldDemote = true; 00239 break; 00240 } 00241 00242 } else { 00243 ShouldDemote = true; 00244 } 00245 } else if (PHINode *PN = dyn_cast<PHINode>(cast<Instruction>(*UI))) { 00246 // If the user of this instruction is a PHI node in the current block, 00247 // which has an entry from another block using the value, spill it. 00248 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 00249 if (PN->getIncomingValue(i) == I && 00250 PN->getIncomingBlock(i) != DestBlock) { 00251 ShouldDemote = true; 00252 break; 00253 } 00254 } 00255 00256 if (ShouldDemote) { 00257 // We found a use outside of the tail. Create a new stack slot to 00258 // break this inter-block usage pattern. 00259 DemoteRegToStack(*I); 00260 break; 00261 } 00262 } 00263 00264 // We are going to have to map operands from the original block B to the new 00265 // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI 00266 // nodes also define part of this mapping. Loop over these PHI nodes, adding 00267 // them to our mapping. 00268 // 00269 std::map<Value*, Value*> ValueMapping; 00270 00271 BasicBlock::iterator BI = DestBlock->begin(); 00272 bool HadPHINodes = isa<PHINode>(BI); 00273 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 00274 ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock); 00275 00276 // Clone the non-phi instructions of the dest block into the source block, 00277 // keeping track of the mapping... 00278 // 00279 for (; BI != DestBlock->end(); ++BI) { 00280 Instruction *New = BI->clone(); 00281 New->setName(BI->getName()); 00282 SourceBlock->getInstList().push_back(New); 00283 ValueMapping[BI] = New; 00284 } 00285 00286 // Now that we have built the mapping information and cloned all of the 00287 // instructions (giving us a new terminator, among other things), walk the new 00288 // instructions, rewriting references of old instructions to use new 00289 // instructions. 00290 // 00291 BI = Branch; ++BI; // Get an iterator to the first new instruction 00292 for (; BI != SourceBlock->end(); ++BI) 00293 for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) 00294 if (Value *Remapped = ValueMapping[BI->getOperand(i)]) 00295 BI->setOperand(i, Remapped); 00296 00297 // Next we check to see if any of the successors of DestBlock had PHI nodes. 00298 // If so, we need to add entries to the PHI nodes for SourceBlock now. 00299 for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock); 00300 SI != SE; ++SI) { 00301 BasicBlock *Succ = *SI; 00302 for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) { 00303 PHINode *PN = cast<PHINode>(PNI); 00304 // Ok, we have a PHI node. Figure out what the incoming value was for the 00305 // DestBlock. 00306 Value *IV = PN->getIncomingValueForBlock(DestBlock); 00307 00308 // Remap the value if necessary... 00309 if (Value *MappedIV = ValueMapping[IV]) 00310 IV = MappedIV; 00311 PN->addIncoming(IV, SourceBlock); 00312 } 00313 } 00314 00315 // Next, remove the old branch instruction, and any PHI node entries that we 00316 // had. 00317 BI = Branch; ++BI; // Get an iterator to the first new instruction 00318 DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes... 00319 SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch... 00320 00321 // Final step: now that we have finished everything up, walk the cloned 00322 // instructions one last time, constant propagating and DCE'ing them, because 00323 // they may not be needed anymore. 00324 // 00325 if (HadPHINodes) 00326 while (BI != SourceBlock->end()) 00327 if (!dceInstruction(BI) && !doConstantPropagation(BI)) 00328 ++BI; 00329 00330 ++NumEliminated; // We just killed a branch! 00331 }