LLVM API Documentation
00001 //===- TailDuplication.cpp - Simplify CFG through tail duplication --------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass performs a limited form of tail duplication, intended to simplify 00011 // CFGs by removing some unconditional branches. This pass is necessary to 00012 // straighten out loops created by the C front-end, but also is capable of 00013 // making other code nicer. After this pass is run, the CFG simplify pass 00014 // should be run to clean up the mess. 00015 // 00016 // This pass could be enhanced in the future to use profile information to be 00017 // more aggressive. 00018 // 00019 //===----------------------------------------------------------------------===// 00020 00021 #include "llvm/Transforms/Scalar.h" 00022 #include "llvm/Constant.h" 00023 #include "llvm/Function.h" 00024 #include "llvm/Instructions.h" 00025 #include "llvm/IntrinsicInst.h" 00026 #include "llvm/Pass.h" 00027 #include "llvm/Type.h" 00028 #include "llvm/Support/CFG.h" 00029 #include "llvm/Transforms/Utils/Local.h" 00030 #include "llvm/Support/CommandLine.h" 00031 #include "llvm/Support/Debug.h" 00032 #include "llvm/ADT/Statistic.h" 00033 using namespace llvm; 00034 00035 namespace { 00036 cl::opt<unsigned> 00037 Threshold("taildup-threshold", cl::desc("Max block size to tail duplicate"), 00038 cl::init(6), cl::Hidden); 00039 Statistic<> NumEliminated("tailduplicate", 00040 "Number of unconditional branches eliminated"); 00041 Statistic<> NumPHINodes("tailduplicate", "Number of phi nodes inserted"); 00042 00043 class TailDup : public FunctionPass { 00044 bool runOnFunction(Function &F); 00045 private: 00046 inline bool shouldEliminateUnconditionalBranch(TerminatorInst *TI); 00047 inline void eliminateUnconditionalBranch(BranchInst *BI); 00048 }; 00049 RegisterOpt<TailDup> X("tailduplicate", "Tail Duplication"); 00050 } 00051 00052 // Public interface to the Tail Duplication pass 00053 FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } 00054 00055 /// runOnFunction - Top level algorithm - Loop over each unconditional branch in 00056 /// the function, eliminating it if it looks attractive enough. 00057 /// 00058 bool TailDup::runOnFunction(Function &F) { 00059 bool Changed = false; 00060 for (Function::iterator I = F.begin(), E = F.end(); I != E; ) 00061 if (shouldEliminateUnconditionalBranch(I->getTerminator())) { 00062 eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator())); 00063 Changed = true; 00064 } else { 00065 ++I; 00066 } 00067 return Changed; 00068 } 00069 00070 /// shouldEliminateUnconditionalBranch - Return true if this branch looks 00071 /// attractive to eliminate. We eliminate the branch if the destination basic 00072 /// block has <= 5 instructions in it, not counting PHI nodes. In practice, 00073 /// since one of these is a terminator instruction, this means that we will add 00074 /// up to 4 instructions to the new block. 00075 /// 00076 /// We don't count PHI nodes in the count since they will be removed when the 00077 /// contents of the block are copied over. 00078 /// 00079 bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI) { 00080 BranchInst *BI = dyn_cast<BranchInst>(TI); 00081 if (!BI || !BI->isUnconditional()) return false; // Not an uncond branch! 00082 00083 BasicBlock *Dest = BI->getSuccessor(0); 00084 if (Dest == BI->getParent()) return false; // Do not loop infinitely! 00085 00086 // Do not inline a block if we will just get another branch to the same block! 00087 TerminatorInst *DTI = Dest->getTerminator(); 00088 if (BranchInst *DBI = dyn_cast<BranchInst>(DTI)) 00089 if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest) 00090 return false; // Do not loop infinitely! 00091 00092 // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack, 00093 // because doing so would require breaking critical edges. This should be 00094 // fixed eventually. 00095 if (!DTI->use_empty()) 00096 return false; 00097 00098 // Do not bother working on dead blocks... 00099 pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest); 00100 if (PI == PE && Dest != Dest->getParent()->begin()) 00101 return false; // It's just a dead block, ignore it... 00102 00103 // Also, do not bother with blocks with only a single predecessor: simplify 00104 // CFG will fold these two blocks together! 00105 ++PI; 00106 if (PI == PE) return false; // Exactly one predecessor! 00107 00108 BasicBlock::iterator I = Dest->begin(); 00109 while (isa<PHINode>(*I)) ++I; 00110 00111 for (unsigned Size = 0; I != Dest->end(); ++I) { 00112 if (Size == Threshold) return false; // The block is too large. 00113 // Only count instructions that are not debugger intrinsics. 00114 if (!isa<DbgInfoIntrinsic>(I)) ++Size; 00115 } 00116 00117 // Do not tail duplicate a block that has thousands of successors into a block 00118 // with a single successor if the block has many other predecessors. This can 00119 // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in 00120 // cases that have a large number of indirect gotos. 00121 unsigned NumSuccs = DTI->getNumSuccessors(); 00122 if (NumSuccs > 8) { 00123 unsigned TooMany = 128; 00124 if (NumSuccs >= TooMany) return false; 00125 TooMany = TooMany/NumSuccs; 00126 for (; PI != PE; ++PI) 00127 if (TooMany-- == 0) return false; 00128 } 00129 00130 return true; 00131 } 00132 00133 /// FindObviousSharedDomOf - We know there is a branch from SrcBlock to 00134 /// DestBlock, and that SrcBlock is not the only predecessor of DstBlock. If we 00135 /// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and 00136 /// DstBlock, return it. 00137 static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, 00138 BasicBlock *DstBlock) { 00139 // SrcBlock must have a single predecessor. 00140 pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock); 00141 if (PI == PE || ++PI != PE) return 0; 00142 00143 BasicBlock *SrcPred = *pred_begin(SrcBlock); 00144 00145 // Look at the predecessors of DstBlock. One of them will be SrcBlock. If 00146 // there is only one other pred, get it, otherwise we can't handle it. 00147 PI = pred_begin(DstBlock); PE = pred_end(DstBlock); 00148 BasicBlock *DstOtherPred = 0; 00149 if (*PI == SrcBlock) { 00150 if (++PI == PE) return 0; 00151 DstOtherPred = *PI; 00152 if (++PI != PE) return 0; 00153 } else { 00154 DstOtherPred = *PI; 00155 if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; 00156 } 00157 00158 // We can handle two situations here: "if then" and "if then else" blocks. An 00159 // 'if then' situation is just where DstOtherPred == SrcPred. 00160 if (DstOtherPred == SrcPred) 00161 return SrcPred; 00162 00163 // Check to see if we have an "if then else" situation, which means that 00164 // DstOtherPred will have a single predecessor and it will be SrcPred. 00165 PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred); 00166 if (PI != PE && *PI == SrcPred) { 00167 if (++PI != PE) return 0; // Not a single pred. 00168 return SrcPred; // Otherwise, it's an "if then" situation. Return the if. 00169 } 00170 00171 // Otherwise, this is something we can't handle. 00172 return 0; 00173 } 00174 00175 00176 /// eliminateUnconditionalBranch - Clone the instructions from the destination 00177 /// block into the source block, eliminating the specified unconditional branch. 00178 /// If the destination block defines values used by successors of the dest 00179 /// block, we may need to insert PHI nodes. 00180 /// 00181 void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { 00182 BasicBlock *SourceBlock = Branch->getParent(); 00183 BasicBlock *DestBlock = Branch->getSuccessor(0); 00184 assert(SourceBlock != DestBlock && "Our predicate is broken!"); 00185 00186 DEBUG(std::cerr << "TailDuplication[" << SourceBlock->getParent()->getName() 00187 << "]: Eliminating branch: " << *Branch); 00188 00189 // See if we can avoid duplicating code by moving it up to a dominator of both 00190 // blocks. 00191 if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { 00192 DEBUG(std::cerr << "Found shared dominator: " << DomBlock->getName() 00193 << "\n"); 00194 00195 // If there are non-phi instructions in DestBlock that have no operands 00196 // defined in DestBlock, and if the instruction has no side effects, we can 00197 // move the instruction to DomBlock instead of duplicating it. 00198 BasicBlock::iterator BBI = DestBlock->begin(); 00199 while (isa<PHINode>(BBI)) ++BBI; 00200 while (!isa<TerminatorInst>(BBI)) { 00201 Instruction *I = BBI++; 00202 00203 bool CanHoist = !I->isTrapping() && !I->mayWriteToMemory(); 00204 if (CanHoist) { 00205 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) 00206 if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op))) 00207 if (OpI->getParent() == DestBlock || 00208 (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) { 00209 CanHoist = false; 00210 break; 00211 } 00212 if (CanHoist) { 00213 // Remove from DestBlock, move right before the term in DomBlock. 00214 DestBlock->getInstList().remove(I); 00215 DomBlock->getInstList().insert(DomBlock->getTerminator(), I); 00216 DEBUG(std::cerr << "Hoisted: " << *I); 00217 } 00218 } 00219 } 00220 } 00221 00222 // Tail duplication can not update SSA properties correctly if the values 00223 // defined in the duplicated tail are used outside of the tail itself. For 00224 // this reason, we spill all values that are used outside of the tail to the 00225 // stack. 00226 for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I) 00227 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; 00228 ++UI) { 00229 bool ShouldDemote = false; 00230 if (cast<Instruction>(*UI)->getParent() != DestBlock) { 00231 // We must allow our successors to use tail values in their PHI nodes 00232 // (if the incoming value corresponds to the tail block). 00233 if (PHINode *PN = dyn_cast<PHINode>(*UI)) { 00234 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 00235 if (PN->getIncomingValue(i) == I && 00236 PN->getIncomingBlock(i) != DestBlock) { 00237 ShouldDemote = true; 00238 break; 00239 } 00240 00241 } else { 00242 ShouldDemote = true; 00243 } 00244 } else if (PHINode *PN = dyn_cast<PHINode>(cast<Instruction>(*UI))) { 00245 // If the user of this instruction is a PHI node in the current block, 00246 // which has an entry from another block using the value, spill it. 00247 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 00248 if (PN->getIncomingValue(i) == I && 00249 PN->getIncomingBlock(i) != DestBlock) { 00250 ShouldDemote = true; 00251 break; 00252 } 00253 } 00254 00255 if (ShouldDemote) { 00256 // We found a use outside of the tail. Create a new stack slot to 00257 // break this inter-block usage pattern. 00258 DemoteRegToStack(*I); 00259 break; 00260 } 00261 } 00262 00263 // We are going to have to map operands from the original block B to the new 00264 // copy of the block B'. If there are PHI nodes in the DestBlock, these PHI 00265 // nodes also define part of this mapping. Loop over these PHI nodes, adding 00266 // them to our mapping. 00267 // 00268 std::map<Value*, Value*> ValueMapping; 00269 00270 BasicBlock::iterator BI = DestBlock->begin(); 00271 bool HadPHINodes = isa<PHINode>(BI); 00272 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 00273 ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock); 00274 00275 // Clone the non-phi instructions of the dest block into the source block, 00276 // keeping track of the mapping... 00277 // 00278 for (; BI != DestBlock->end(); ++BI) { 00279 Instruction *New = BI->clone(); 00280 New->setName(BI->getName()); 00281 SourceBlock->getInstList().push_back(New); 00282 ValueMapping[BI] = New; 00283 } 00284 00285 // Now that we have built the mapping information and cloned all of the 00286 // instructions (giving us a new terminator, among other things), walk the new 00287 // instructions, rewriting references of old instructions to use new 00288 // instructions. 00289 // 00290 BI = Branch; ++BI; // Get an iterator to the first new instruction 00291 for (; BI != SourceBlock->end(); ++BI) 00292 for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) 00293 if (Value *Remapped = ValueMapping[BI->getOperand(i)]) 00294 BI->setOperand(i, Remapped); 00295 00296 // Next we check to see if any of the successors of DestBlock had PHI nodes. 00297 // If so, we need to add entries to the PHI nodes for SourceBlock now. 00298 for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock); 00299 SI != SE; ++SI) { 00300 BasicBlock *Succ = *SI; 00301 for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) { 00302 PHINode *PN = cast<PHINode>(PNI); 00303 // Ok, we have a PHI node. Figure out what the incoming value was for the 00304 // DestBlock. 00305 Value *IV = PN->getIncomingValueForBlock(DestBlock); 00306 00307 // Remap the value if necessary... 00308 if (Value *MappedIV = ValueMapping[IV]) 00309 IV = MappedIV; 00310 PN->addIncoming(IV, SourceBlock); 00311 } 00312 } 00313 00314 // Next, remove the old branch instruction, and any PHI node entries that we 00315 // had. 00316 BI = Branch; ++BI; // Get an iterator to the first new instruction 00317 DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes... 00318 SourceBlock->getInstList().erase(Branch); // Destroy the uncond branch... 00319 00320 // Final step: now that we have finished everything up, walk the cloned 00321 // instructions one last time, constant propagating and DCE'ing them, because 00322 // they may not be needed anymore. 00323 // 00324 if (HadPHINodes) 00325 while (BI != SourceBlock->end()) 00326 if (!dceInstruction(BI) && !doConstantPropagation(BI)) 00327 ++BI; 00328 00329 ++NumEliminated; // We just killed a branch! 00330 }