LLVM API Documentation
00001 //===-- ModuloScheduling.cpp - ModuloScheduling ----------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This ModuloScheduling pass is based on the Swing Modulo Scheduling 00011 // algorithm. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #define DEBUG_TYPE "ModuloSched" 00016 00017 #include "ModuloScheduling.h" 00018 #include "llvm/Constants.h" 00019 #include "llvm/Instructions.h" 00020 #include "llvm/Function.h" 00021 #include "llvm/CodeGen/MachineFunction.h" 00022 #include "llvm/CodeGen/Passes.h" 00023 #include "llvm/Support/CFG.h" 00024 #include "llvm/Target/TargetSchedInfo.h" 00025 #include "llvm/Support/Debug.h" 00026 #include "llvm/Support/GraphWriter.h" 00027 #include "llvm/ADT/SCCIterator.h" 00028 #include "llvm/ADT/StringExtras.h" 00029 #include "llvm/ADT/Statistic.h" 00030 #include "llvm/Support/Timer.h" 00031 #include <cmath> 00032 #include <algorithm> 00033 #include <fstream> 00034 #include <sstream> 00035 #include <utility> 00036 #include <vector> 00037 #include "../MachineCodeForInstruction.h" 00038 #include "../SparcV9TmpInstr.h" 00039 #include "../SparcV9Internals.h" 00040 #include "../SparcV9RegisterInfo.h" 00041 using namespace llvm; 00042 00043 /// Create ModuloSchedulingPass 00044 /// 00045 FunctionPass *llvm::createModuloSchedulingPass(TargetMachine & targ) { 00046 DEBUG(std::cerr << "Created ModuloSchedulingPass\n"); 00047 return new ModuloSchedulingPass(targ); 00048 } 00049 00050 00051 //Graph Traits for printing out the dependence graph 00052 template<typename GraphType> 00053 static void WriteGraphToFile(std::ostream &O, const std::string &GraphName, 00054 const GraphType >) { 00055 std::string Filename = GraphName + ".dot"; 00056 O << "Writing '" << Filename << "'..."; 00057 std::ofstream F(Filename.c_str()); 00058 00059 if (F.good()) 00060 WriteGraph(F, GT); 00061 else 00062 O << " error opening file for writing!"; 00063 O << "\n"; 00064 }; 00065 00066 00067 #if 1 00068 #define TIME_REGION(VARNAME, DESC) \ 00069 NamedRegionTimer VARNAME(DESC) 00070 #else 00071 #define TIME_REGION(VARNAME, DESC) 00072 #endif 00073 00074 00075 //Graph Traits for printing out the dependence graph 00076 namespace llvm { 00077 00078 //Loop statistics 00079 Statistic<> ValidLoops("modulosched-validLoops", "Number of candidate loops modulo-scheduled"); 00080 Statistic<> JumboBB("modulosched-jumboBB", "Basic Blocks with more then 100 instructions"); 00081 Statistic<> LoopsWithCalls("modulosched-loopCalls", "Loops with calls"); 00082 Statistic<> LoopsWithCondMov("modulosched-loopCondMov", "Loops with conditional moves"); 00083 Statistic<> InvalidLoops("modulosched-invalidLoops", "Loops with unknown trip counts or loop invariant trip counts"); 00084 Statistic<> SingleBBLoops("modulosched-singeBBLoops", "Number of single basic block loops"); 00085 00086 //Scheduling Statistics 00087 Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled"); 00088 Statistic<> NoSched("modulosched-noSched", "No schedule"); 00089 Statistic<> SameStage("modulosched-sameStage", "Max stage is 0"); 00090 Statistic<> ResourceConstraint("modulosched-resourceConstraint", "Loops constrained by resources"); 00091 Statistic<> RecurrenceConstraint("modulosched-recurrenceConstraint", "Loops constrained by recurrences"); 00092 Statistic<> FinalIISum("modulosched-finalIISum", "Sum of all final II"); 00093 Statistic<> IISum("modulosched-IISum", "Sum of all theoretical II"); 00094 00095 template<> 00096 struct DOTGraphTraits<MSchedGraph*> : public DefaultDOTGraphTraits { 00097 static std::string getGraphName(MSchedGraph *F) { 00098 return "Dependence Graph"; 00099 } 00100 00101 static std::string getNodeLabel(MSchedGraphNode *Node, MSchedGraph *Graph) { 00102 if (Node->getInst()) { 00103 std::stringstream ss; 00104 ss << *(Node->getInst()); 00105 return ss.str(); //((MachineInstr*)Node->getInst()); 00106 } 00107 else 00108 return "No Inst"; 00109 } 00110 static std::string getEdgeSourceLabel(MSchedGraphNode *Node, 00111 MSchedGraphNode::succ_iterator I) { 00112 //Label each edge with the type of dependence 00113 std::string edgelabel = ""; 00114 switch (I.getEdge().getDepOrderType()) { 00115 00116 case MSchedGraphEdge::TrueDep: 00117 edgelabel = "True"; 00118 break; 00119 00120 case MSchedGraphEdge::AntiDep: 00121 edgelabel = "Anti"; 00122 break; 00123 00124 case MSchedGraphEdge::OutputDep: 00125 edgelabel = "Output"; 00126 break; 00127 00128 default: 00129 edgelabel = "Unknown"; 00130 break; 00131 } 00132 00133 //FIXME 00134 int iteDiff = I.getEdge().getIteDiff(); 00135 std::string intStr = "(IteDiff: "; 00136 intStr += itostr(iteDiff); 00137 00138 intStr += ")"; 00139 edgelabel += intStr; 00140 00141 return edgelabel; 00142 } 00143 }; 00144 } 00145 00146 00147 #include <unistd.h> 00148 00149 /// ModuloScheduling::runOnFunction - main transformation entry point 00150 /// The Swing Modulo Schedule algorithm has three basic steps: 00151 /// 1) Computation and Analysis of the dependence graph 00152 /// 2) Ordering of the nodes 00153 /// 3) Scheduling 00154 /// 00155 bool ModuloSchedulingPass::runOnFunction(Function &F) { 00156 alarm(100); 00157 00158 bool Changed = false; 00159 int numMS = 0; 00160 00161 DEBUG(std::cerr << "Creating ModuloSchedGraph for each valid BasicBlock in " + F.getName() + "\n"); 00162 00163 //Get MachineFunction 00164 MachineFunction &MF = MachineFunction::get(&F); 00165 00166 DependenceAnalyzer &DA = getAnalysis<DependenceAnalyzer>(); 00167 00168 00169 //Worklist 00170 std::vector<MachineBasicBlock*> Worklist; 00171 00172 //Iterate over BasicBlocks and put them into our worklist if they are valid 00173 for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI) 00174 if(MachineBBisValid(BI)) { 00175 if(BI->size() < 100) { 00176 Worklist.push_back(&*BI); 00177 ++ValidLoops; 00178 } 00179 else 00180 ++JumboBB; 00181 00182 } 00183 00184 defaultInst = 0; 00185 00186 DEBUG(if(Worklist.size() == 0) std::cerr << "No single basic block loops in function to ModuloSchedule\n"); 00187 00188 //Iterate over the worklist and perform scheduling 00189 for(std::vector<MachineBasicBlock*>::iterator BI = Worklist.begin(), 00190 BE = Worklist.end(); BI != BE; ++BI) { 00191 00192 //Print out BB for debugging 00193 DEBUG(std::cerr << "BB Size: " << (*BI)->size() << "\n"); 00194 DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr)); 00195 00196 //Print out LLVM BB 00197 DEBUG(std::cerr << "ModuloScheduling LLVMBB: \n"; (*BI)->getBasicBlock()->print(std::cerr)); 00198 00199 //Catch the odd case where we only have TmpInstructions and no real Value*s 00200 if(!CreateDefMap(*BI)) { 00201 //Clear out our maps for the next basic block that is processed 00202 nodeToAttributesMap.clear(); 00203 partialOrder.clear(); 00204 recurrenceList.clear(); 00205 FinalNodeOrder.clear(); 00206 schedule.clear(); 00207 defMap.clear(); 00208 continue; 00209 } 00210 00211 MSchedGraph *MSG = new MSchedGraph(*BI, target, indVarInstrs[*BI], DA, machineTollvm[*BI]); 00212 00213 //Write Graph out to file 00214 DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG)); 00215 DEBUG(MSG->print(std::cerr)); 00216 00217 //Calculate Resource II 00218 int ResMII = calculateResMII(*BI); 00219 00220 //Calculate Recurrence II 00221 int RecMII = calculateRecMII(MSG, ResMII); 00222 00223 DEBUG(std::cerr << "Number of reccurrences found: " << recurrenceList.size() << "\n"); 00224 00225 //Our starting initiation interval is the maximum of RecMII and ResMII 00226 if(RecMII < ResMII) 00227 ++RecurrenceConstraint; 00228 else 00229 ++ResourceConstraint; 00230 00231 II = std::max(RecMII, ResMII); 00232 int mII = II; 00233 00234 //Print out II, RecMII, and ResMII 00235 DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n"); 00236 00237 //Dump node properties if in debug mode 00238 DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), 00239 E = nodeToAttributesMap.end(); I !=E; ++I) { 00240 std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 00241 << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 00242 << " Height: " << I->second.height << "\n"; 00243 }); 00244 00245 //Calculate Node Properties 00246 calculateNodeAttributes(MSG, ResMII); 00247 00248 //Dump node properties if in debug mode 00249 DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), 00250 E = nodeToAttributesMap.end(); I !=E; ++I) { 00251 std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 00252 << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 00253 << " Height: " << I->second.height << "\n"; 00254 }); 00255 00256 //Put nodes in order to schedule them 00257 computePartialOrder(); 00258 00259 //Dump out partial order 00260 DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(), 00261 E = partialOrder.end(); I !=E; ++I) { 00262 std::cerr << "Start set in PO\n"; 00263 for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J) 00264 std::cerr << "PO:" << **J << "\n"; 00265 }); 00266 00267 //Place nodes in final order 00268 orderNodes(); 00269 00270 //Dump out order of nodes 00271 DEBUG(for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) { 00272 std::cerr << "FO:" << **I << "\n"; 00273 }); 00274 00275 //Finally schedule nodes 00276 bool haveSched = computeSchedule(*BI, MSG); 00277 00278 //Print out final schedule 00279 DEBUG(schedule.print(std::cerr)); 00280 00281 //Final scheduling step is to reconstruct the loop only if we actual have 00282 //stage > 0 00283 if(haveSched) { 00284 reconstructLoop(*BI); 00285 ++MSLoops; 00286 Changed = true; 00287 FinalIISum += II; 00288 IISum += mII; 00289 00290 if(schedule.getMaxStage() == 0) 00291 ++SameStage; 00292 } 00293 else { 00294 ++NoSched; 00295 } 00296 00297 //Clear out our maps for the next basic block that is processed 00298 nodeToAttributesMap.clear(); 00299 partialOrder.clear(); 00300 recurrenceList.clear(); 00301 FinalNodeOrder.clear(); 00302 schedule.clear(); 00303 defMap.clear(); 00304 //Clean up. Nuke old MachineBB and llvmBB 00305 //BasicBlock *llvmBB = (BasicBlock*) (*BI)->getBasicBlock(); 00306 //Function *parent = (Function*) llvmBB->getParent(); 00307 //Should't std::find work?? 00308 //parent->getBasicBlockList().erase(std::find(parent->getBasicBlockList().begin(), parent->getBasicBlockList().end(), *llvmBB)); 00309 //parent->getBasicBlockList().erase(llvmBB); 00310 00311 //delete(llvmBB); 00312 //delete(*BI); 00313 } 00314 00315 alarm(0); 00316 return Changed; 00317 } 00318 00319 bool ModuloSchedulingPass::CreateDefMap(MachineBasicBlock *BI) { 00320 defaultInst = 0; 00321 00322 for(MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E; ++I) { 00323 for(unsigned opNum = 0; opNum < I->getNumOperands(); ++opNum) { 00324 const MachineOperand &mOp = I->getOperand(opNum); 00325 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 00326 //assert if this is the second def we have seen 00327 //DEBUG(std::cerr << "Putting " << *(mOp.getVRegValue()) << " into map\n"); 00328 //assert(!defMap.count(mOp.getVRegValue()) && "Def already in the map"); 00329 if(defMap.count(mOp.getVRegValue())) 00330 return false; 00331 00332 defMap[mOp.getVRegValue()] = &*I; 00333 } 00334 00335 //See if we can use this Value* as our defaultInst 00336 if(!defaultInst && mOp.getType() == MachineOperand::MO_VirtualRegister) { 00337 Value *V = mOp.getVRegValue(); 00338 if(!isa<TmpInstruction>(V) && !isa<Argument>(V) && !isa<Constant>(V) && !isa<PHINode>(V)) 00339 defaultInst = (Instruction*) V; 00340 } 00341 } 00342 } 00343 00344 if(!defaultInst) 00345 return false; 00346 00347 return true; 00348 00349 } 00350 /// This function checks if a Machine Basic Block is valid for modulo 00351 /// scheduling. This means that it has no control flow (if/else or 00352 /// calls) in the block. Currently ModuloScheduling only works on 00353 /// single basic block loops. 00354 bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { 00355 00356 bool isLoop = false; 00357 00358 //Check first if its a valid loop 00359 for(succ_const_iterator I = succ_begin(BI->getBasicBlock()), 00360 E = succ_end(BI->getBasicBlock()); I != E; ++I) { 00361 if (*I == BI->getBasicBlock()) // has single block loop 00362 isLoop = true; 00363 } 00364 00365 if(!isLoop) 00366 return false; 00367 00368 //Check that we have a conditional branch (avoiding MS infinite loops) 00369 if(BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) BI->getBasicBlock())->getTerminator())) 00370 if(b->isUnconditional()) 00371 return false; 00372 00373 //Check size of our basic block.. make sure we have more then just the terminator in it 00374 if(BI->getBasicBlock()->size() == 1) 00375 return false; 00376 00377 //Increase number of single basic block loops for stats 00378 ++SingleBBLoops; 00379 00380 //Get Target machine instruction info 00381 const TargetInstrInfo *TMI = target.getInstrInfo(); 00382 00383 //Check each instruction and look for calls, keep map to get index later 00384 std::map<const MachineInstr*, unsigned> indexMap; 00385 00386 unsigned count = 0; 00387 for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { 00388 //Get opcode to check instruction type 00389 MachineOpCode OC = I->getOpcode(); 00390 00391 //Look for calls 00392 if(TMI->isCall(OC)) { 00393 ++LoopsWithCalls; 00394 return false; 00395 } 00396 00397 //Look for conditional move 00398 if(OC == V9::MOVRZr || OC == V9::MOVRZi || OC == V9::MOVRLEZr || OC == V9::MOVRLEZi 00399 || OC == V9::MOVRLZr || OC == V9::MOVRLZi || OC == V9::MOVRNZr || OC == V9::MOVRNZi 00400 || OC == V9::MOVRGZr || OC == V9::MOVRGZi || OC == V9::MOVRGEZr 00401 || OC == V9::MOVRGEZi || OC == V9::MOVLEr || OC == V9::MOVLEi || OC == V9::MOVLEUr 00402 || OC == V9::MOVLEUi || OC == V9::MOVFLEr || OC == V9::MOVFLEi 00403 || OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi 00404 || OC == V9::MOVFNEr || OC == V9::MOVFNEi || OC == V9::MOVGr || OC == V9::MOVGi) { 00405 ++LoopsWithCondMov; 00406 return false; 00407 } 00408 00409 indexMap[I] = count; 00410 00411 if(TMI->isNop(OC)) 00412 continue; 00413 00414 ++count; 00415 } 00416 00417 //Apply a simple pattern match to make sure this loop can be modulo scheduled 00418 //This means only loops with a branch associated to the iteration count 00419 00420 //Get the branch 00421 BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) BI->getBasicBlock())->getTerminator()); 00422 00423 //Get the condition for the branch (we already checked if it was conditional) 00424 Value *cond = b->getCondition(); 00425 00426 DEBUG(std::cerr << "Condition: " << *cond << "\n"); 00427 00428 //List of instructions associated with induction variable 00429 std::set<Instruction*> indVar; 00430 std::vector<Instruction*> stack; 00431 00432 BasicBlock *BB = (BasicBlock*) BI->getBasicBlock(); 00433 00434 //Add branch 00435 indVar.insert(b); 00436 00437 if(Instruction *I = dyn_cast<Instruction>(cond)) 00438 if(I->getParent() == BB) { 00439 if (!assocIndVar(I, indVar, stack, BB)) { 00440 ++InvalidLoops; 00441 return false; 00442 } 00443 } 00444 else { 00445 ++InvalidLoops; 00446 return false; 00447 } 00448 else { 00449 ++InvalidLoops; 00450 return false; 00451 } 00452 //The indVar set must be >= 3 instructions for this loop to match (FIX ME!) 00453 if(indVar.size() < 3 ) 00454 return false; 00455 00456 //Dump out instructions associate with indvar for debug reasons 00457 DEBUG(for(std::set<Instruction*>::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) { 00458 std::cerr << **N << "\n"; 00459 }); 00460 00461 //Create map of machine instr to llvm instr 00462 std::map<MachineInstr*, Instruction*> mllvm; 00463 for(BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 00464 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(I); 00465 for (unsigned j = 0; j < tempMvec.size(); j++) { 00466 mllvm[tempMvec[j]] = I; 00467 } 00468 } 00469 00470 //Convert list of LLVM Instructions to list of Machine instructions 00471 std::map<const MachineInstr*, unsigned> mIndVar; 00472 for(std::set<Instruction*>::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) { 00473 00474 //If we have a load, we can't handle this loop because there is no way to preserve dependences 00475 //between loads and stores 00476 if(isa<LoadInst>(*N)) 00477 return false; 00478 00479 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(*N); 00480 for (unsigned j = 0; j < tempMvec.size(); j++) { 00481 MachineOpCode OC = (tempMvec[j])->getOpcode(); 00482 if(TMI->isNop(OC)) 00483 continue; 00484 if(!indexMap.count(tempMvec[j])) 00485 continue; 00486 mIndVar[(MachineInstr*) tempMvec[j]] = indexMap[(MachineInstr*) tempMvec[j]]; 00487 DEBUG(std::cerr << *(tempMvec[j]) << " at index " << indexMap[(MachineInstr*) tempMvec[j]] << "\n"); 00488 } 00489 } 00490 00491 //Must have some guts to the loop body (more then 1 instr, dont count nops in size) 00492 if(mIndVar.size() >= (BI->size()-3)) 00493 return false; 00494 00495 //Put into a map for future access 00496 indVarInstrs[BI] = mIndVar; 00497 machineTollvm[BI] = mllvm; 00498 return true; 00499 } 00500 00501 bool ModuloSchedulingPass::assocIndVar(Instruction *I, std::set<Instruction*> &indVar, 00502 std::vector<Instruction*> &stack, BasicBlock *BB) { 00503 00504 stack.push_back(I); 00505 00506 //If this is a phi node, check if its the canonical indvar 00507 if(PHINode *PN = dyn_cast<PHINode>(I)) { 00508 if (Instruction *Inc = 00509 dyn_cast<Instruction>(PN->getIncomingValueForBlock(BB))) 00510 if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN) 00511 if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) 00512 if (CI->equalsInt(1)) { 00513 //We have found the indvar, so add the stack, and inc instruction to the set 00514 indVar.insert(stack.begin(), stack.end()); 00515 indVar.insert(Inc); 00516 stack.pop_back(); 00517 return true; 00518 } 00519 return false; 00520 } 00521 else { 00522 //Loop over each of the instructions operands, check if they are an instruction and in this BB 00523 for(unsigned i = 0; i < I->getNumOperands(); ++i) { 00524 if(Instruction *N = dyn_cast<Instruction>(I->getOperand(i))) { 00525 if(N->getParent() == BB) 00526 if(!assocIndVar(N, indVar, stack, BB)) 00527 return false; 00528 } 00529 } 00530 } 00531 00532 stack.pop_back(); 00533 return true; 00534 } 00535 00536 //ResMII is calculated by determining the usage count for each resource 00537 //and using the maximum. 00538 //FIXME: In future there should be a way to get alternative resources 00539 //for each instruction 00540 int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) { 00541 00542 TIME_REGION(X, "calculateResMII"); 00543 00544 const TargetInstrInfo *mii = target.getInstrInfo(); 00545 const TargetSchedInfo *msi = target.getSchedInfo(); 00546 00547 int ResMII = 0; 00548 00549 //Map to keep track of usage count of each resource 00550 std::map<unsigned, unsigned> resourceUsageCount; 00551 00552 for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { 00553 00554 //Get resource usage for this instruction 00555 InstrRUsage rUsage = msi->getInstrRUsage(I->getOpcode()); 00556 std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle; 00557 00558 //Loop over resources in each cycle and increments their usage count 00559 for(unsigned i=0; i < resources.size(); ++i) 00560 for(unsigned j=0; j < resources[i].size(); ++j) { 00561 if(!resourceUsageCount.count(resources[i][j])) { 00562 resourceUsageCount[resources[i][j]] = 1; 00563 } 00564 else { 00565 resourceUsageCount[resources[i][j]] = resourceUsageCount[resources[i][j]] + 1; 00566 } 00567 } 00568 } 00569 00570 //Find maximum usage count 00571 00572 //Get max number of instructions that can be issued at once. (FIXME) 00573 int issueSlots = msi->maxNumIssueTotal; 00574 00575 for(std::map<unsigned,unsigned>::iterator RB = resourceUsageCount.begin(), RE = resourceUsageCount.end(); RB != RE; ++RB) { 00576 00577 //Get the total number of the resources in our cpu 00578 int resourceNum = CPUResource::getCPUResource(RB->first)->maxNumUsers; 00579 00580 //Get total usage count for this resources 00581 unsigned usageCount = RB->second; 00582 00583 //Divide the usage count by either the max number we can issue or the number of 00584 //resources (whichever is its upper bound) 00585 double finalUsageCount; 00586 DEBUG(std::cerr << "Resource Num: " << RB->first << " Usage: " << usageCount << " TotalNum: " << resourceNum << "\n"); 00587 00588 if( resourceNum <= issueSlots) 00589 finalUsageCount = ceil(1.0 * usageCount / resourceNum); 00590 else 00591 finalUsageCount = ceil(1.0 * usageCount / issueSlots); 00592 00593 00594 //Only keep track of the max 00595 ResMII = std::max( (int) finalUsageCount, ResMII); 00596 00597 } 00598 00599 return ResMII; 00600 00601 } 00602 00603 /// calculateRecMII - Calculates the value of the highest recurrence 00604 /// By value we mean the total latency 00605 int ModuloSchedulingPass::calculateRecMII(MSchedGraph *graph, int MII) { 00606 /*std::vector<MSchedGraphNode*> vNodes; 00607 //Loop over all nodes in the graph 00608 for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) { 00609 findAllReccurrences(I->second, vNodes, MII); 00610 vNodes.clear(); 00611 }*/ 00612 00613 TIME_REGION(X, "calculateRecMII"); 00614 00615 findAllCircuits(graph, MII); 00616 int RecMII = 0; 00617 00618 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) { 00619 RecMII = std::max(RecMII, I->first); 00620 } 00621 00622 return MII; 00623 } 00624 00625 /// calculateNodeAttributes - The following properties are calculated for 00626 /// each node in the dependence graph: ASAP, ALAP, Depth, Height, and 00627 /// MOB. 00628 void ModuloSchedulingPass::calculateNodeAttributes(MSchedGraph *graph, int MII) { 00629 00630 TIME_REGION(X, "calculateNodeAttributes"); 00631 00632 assert(nodeToAttributesMap.empty() && "Node attribute map was not cleared"); 00633 00634 //Loop over the nodes and add them to the map 00635 for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) { 00636 00637 DEBUG(std::cerr << "Inserting node into attribute map: " << *I->second << "\n"); 00638 00639 //Assert if its already in the map 00640 assert(nodeToAttributesMap.count(I->second) == 0 && 00641 "Node attributes are already in the map"); 00642 00643 //Put into the map with default attribute values 00644 nodeToAttributesMap[I->second] = MSNodeAttributes(); 00645 } 00646 00647 //Create set to deal with reccurrences 00648 std::set<MSchedGraphNode*> visitedNodes; 00649 00650 //Now Loop over map and calculate the node attributes 00651 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 00652 calculateASAP(I->first, MII, (MSchedGraphNode*) 0); 00653 visitedNodes.clear(); 00654 } 00655 00656 int maxASAP = findMaxASAP(); 00657 //Calculate ALAP which depends on ASAP being totally calculated 00658 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 00659 calculateALAP(I->first, MII, maxASAP, (MSchedGraphNode*) 0); 00660 visitedNodes.clear(); 00661 } 00662 00663 //Calculate MOB which depends on ASAP being totally calculated, also do depth and height 00664 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 00665 (I->second).MOB = std::max(0,(I->second).ALAP - (I->second).ASAP); 00666 00667 DEBUG(std::cerr << "MOB: " << (I->second).MOB << " (" << *(I->first) << ")\n"); 00668 calculateDepth(I->first, (MSchedGraphNode*) 0); 00669 calculateHeight(I->first, (MSchedGraphNode*) 0); 00670 } 00671 00672 00673 } 00674 00675 /// ignoreEdge - Checks to see if this edge of a recurrence should be ignored or not 00676 bool ModuloSchedulingPass::ignoreEdge(MSchedGraphNode *srcNode, MSchedGraphNode *destNode) { 00677 if(destNode == 0 || srcNode ==0) 00678 return false; 00679 00680 bool findEdge = edgesToIgnore.count(std::make_pair(srcNode, destNode->getInEdgeNum(srcNode))); 00681 00682 DEBUG(std::cerr << "Ignoring edge? from: " << *srcNode << " to " << *destNode << "\n"); 00683 00684 return findEdge; 00685 } 00686 00687 00688 /// calculateASAP - Calculates the 00689 int ModuloSchedulingPass::calculateASAP(MSchedGraphNode *node, int MII, MSchedGraphNode *destNode) { 00690 00691 DEBUG(std::cerr << "Calculating ASAP for " << *node << "\n"); 00692 00693 //Get current node attributes 00694 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second; 00695 00696 if(attributes.ASAP != -1) 00697 return attributes.ASAP; 00698 00699 int maxPredValue = 0; 00700 00701 //Iterate over all of the predecessors and find max 00702 for(MSchedGraphNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) { 00703 00704 //Only process if we are not ignoring the edge 00705 if(!ignoreEdge(*P, node)) { 00706 int predASAP = -1; 00707 predASAP = calculateASAP(*P, MII, node); 00708 00709 assert(predASAP != -1 && "ASAP has not been calculated"); 00710 int iteDiff = node->getInEdge(*P).getIteDiff(); 00711 00712 int currentPredValue = predASAP + (*P)->getLatency() - (iteDiff * MII); 00713 DEBUG(std::cerr << "pred ASAP: " << predASAP << ", iteDiff: " << iteDiff << ", PredLatency: " << (*P)->getLatency() << ", Current ASAP pred: " << currentPredValue << "\n"); 00714 maxPredValue = std::max(maxPredValue, currentPredValue); 00715 } 00716 } 00717 00718 attributes.ASAP = maxPredValue; 00719 00720 DEBUG(std::cerr << "ASAP: " << attributes.ASAP << " (" << *node << ")\n"); 00721 00722 return maxPredValue; 00723 } 00724 00725 00726 int ModuloSchedulingPass::calculateALAP(MSchedGraphNode *node, int MII, 00727 int maxASAP, MSchedGraphNode *srcNode) { 00728 00729 DEBUG(std::cerr << "Calculating ALAP for " << *node << "\n"); 00730 00731 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second; 00732 00733 if(attributes.ALAP != -1) 00734 return attributes.ALAP; 00735 00736 if(node->hasSuccessors()) { 00737 00738 //Trying to deal with the issue where the node has successors, but 00739 //we are ignoring all of the edges to them. So this is my hack for 00740 //now.. there is probably a more elegant way of doing this (FIXME) 00741 bool processedOneEdge = false; 00742 00743 //FIXME, set to something high to start 00744 int minSuccValue = 9999999; 00745 00746 //Iterate over all of the predecessors and fine max 00747 for(MSchedGraphNode::succ_iterator P = node->succ_begin(), 00748 E = node->succ_end(); P != E; ++P) { 00749 00750 //Only process if we are not ignoring the edge 00751 if(!ignoreEdge(node, *P)) { 00752 processedOneEdge = true; 00753 int succALAP = -1; 00754 succALAP = calculateALAP(*P, MII, maxASAP, node); 00755 00756 assert(succALAP != -1 && "Successors ALAP should have been caclulated"); 00757 00758 int iteDiff = P.getEdge().getIteDiff(); 00759 00760 int currentSuccValue = succALAP - node->getLatency() + iteDiff * MII; 00761 00762 DEBUG(std::cerr << "succ ALAP: " << succALAP << ", iteDiff: " << iteDiff << ", SuccLatency: " << (*P)->getLatency() << ", Current ALAP succ: " << currentSuccValue << "\n"); 00763 00764 minSuccValue = std::min(minSuccValue, currentSuccValue); 00765 } 00766 } 00767 00768 if(processedOneEdge) 00769 attributes.ALAP = minSuccValue; 00770 00771 else 00772 attributes.ALAP = maxASAP; 00773 } 00774 else 00775 attributes.ALAP = maxASAP; 00776 00777 DEBUG(std::cerr << "ALAP: " << attributes.ALAP << " (" << *node << ")\n"); 00778 00779 if(attributes.ALAP < 0) 00780 attributes.ALAP = 0; 00781 00782 return attributes.ALAP; 00783 } 00784 00785 int ModuloSchedulingPass::findMaxASAP() { 00786 int maxASAP = 0; 00787 00788 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), 00789 E = nodeToAttributesMap.end(); I != E; ++I) 00790 maxASAP = std::max(maxASAP, I->second.ASAP); 00791 return maxASAP; 00792 } 00793 00794 00795 int ModuloSchedulingPass::calculateHeight(MSchedGraphNode *node,MSchedGraphNode *srcNode) { 00796 00797 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second; 00798 00799 if(attributes.height != -1) 00800 return attributes.height; 00801 00802 int maxHeight = 0; 00803 00804 //Iterate over all of the predecessors and find max 00805 for(MSchedGraphNode::succ_iterator P = node->succ_begin(), 00806 E = node->succ_end(); P != E; ++P) { 00807 00808 00809 if(!ignoreEdge(node, *P)) { 00810 int succHeight = calculateHeight(*P, node); 00811 00812 assert(succHeight != -1 && "Successors Height should have been caclulated"); 00813 00814 int currentHeight = succHeight + node->getLatency(); 00815 maxHeight = std::max(maxHeight, currentHeight); 00816 } 00817 } 00818 attributes.height = maxHeight; 00819 DEBUG(std::cerr << "Height: " << attributes.height << " (" << *node << ")\n"); 00820 return maxHeight; 00821 } 00822 00823 00824 int ModuloSchedulingPass::calculateDepth(MSchedGraphNode *node, 00825 MSchedGraphNode *destNode) { 00826 00827 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second; 00828 00829 if(attributes.depth != -1) 00830 return attributes.depth; 00831 00832 int maxDepth = 0; 00833 00834 //Iterate over all of the predecessors and fine max 00835 for(MSchedGraphNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) { 00836 00837 if(!ignoreEdge(*P, node)) { 00838 int predDepth = -1; 00839 predDepth = calculateDepth(*P, node); 00840 00841 assert(predDepth != -1 && "Predecessors ASAP should have been caclulated"); 00842 00843 int currentDepth = predDepth + (*P)->getLatency(); 00844 maxDepth = std::max(maxDepth, currentDepth); 00845 } 00846 } 00847 attributes.depth = maxDepth; 00848 00849 DEBUG(std::cerr << "Depth: " << attributes.depth << " (" << *node << "*)\n"); 00850 return maxDepth; 00851 } 00852 00853 00854 00855 void ModuloSchedulingPass::addReccurrence(std::vector<MSchedGraphNode*> &recurrence, int II, MSchedGraphNode *srcBENode, MSchedGraphNode *destBENode) { 00856 //Check to make sure that this recurrence is unique 00857 bool same = false; 00858 00859 00860 //Loop over all recurrences already in our list 00861 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator R = recurrenceList.begin(), RE = recurrenceList.end(); R != RE; ++R) { 00862 00863 bool all_same = true; 00864 //First compare size 00865 if(R->second.size() == recurrence.size()) { 00866 00867 for(std::vector<MSchedGraphNode*>::const_iterator node = R->second.begin(), end = R->second.end(); node != end; ++node) { 00868 if(std::find(recurrence.begin(), recurrence.end(), *node) == recurrence.end()) { 00869 all_same = all_same && false; 00870 break; 00871 } 00872 else 00873 all_same = all_same && true; 00874 } 00875 if(all_same) { 00876 same = true; 00877 break; 00878 } 00879 } 00880 } 00881 00882 if(!same) { 00883 srcBENode = recurrence.back(); 00884 destBENode = recurrence.front(); 00885 00886 //FIXME 00887 if(destBENode->getInEdge(srcBENode).getIteDiff() == 0) { 00888 //DEBUG(std::cerr << "NOT A BACKEDGE\n"); 00889 //find actual backedge HACK HACK 00890 for(unsigned i=0; i< recurrence.size()-1; ++i) { 00891 if(recurrence[i+1]->getInEdge(recurrence[i]).getIteDiff() == 1) { 00892 srcBENode = recurrence[i]; 00893 destBENode = recurrence[i+1]; 00894 break; 00895 } 00896 00897 } 00898 00899 } 00900 DEBUG(std::cerr << "Back Edge to Remove: " << *srcBENode << " to " << *destBENode << "\n"); 00901 edgesToIgnore.insert(std::make_pair(srcBENode, destBENode->getInEdgeNum(srcBENode))); 00902 recurrenceList.insert(std::make_pair(II, recurrence)); 00903 } 00904 00905 } 00906 00907 int CircCount; 00908 00909 void ModuloSchedulingPass::unblock(MSchedGraphNode *u, std::set<MSchedGraphNode*> &blocked, 00910 std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B) { 00911 00912 //Unblock u 00913 DEBUG(std::cerr << "Unblocking: " << *u << "\n"); 00914 blocked.erase(u); 00915 00916 //std::set<MSchedGraphNode*> toErase; 00917 while (!B[u].empty()) { 00918 MSchedGraphNode *W = *B[u].begin(); 00919 B[u].erase(W); 00920 //toErase.insert(*W); 00921 DEBUG(std::cerr << "Removed: " << *W << "from B-List\n"); 00922 if(blocked.count(W)) 00923 unblock(W, blocked, B); 00924 } 00925 00926 } 00927 00928 bool ModuloSchedulingPass::circuit(MSchedGraphNode *v, std::vector<MSchedGraphNode*> &stack, 00929 std::set<MSchedGraphNode*> &blocked, std::vector<MSchedGraphNode*> &SCC, 00930 MSchedGraphNode *s, std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B, 00931 int II, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) { 00932 bool f = false; 00933 00934 DEBUG(std::cerr << "Finding Circuits Starting with: ( " << v << ")"<< *v << "\n"); 00935 00936 //Push node onto the stack 00937 stack.push_back(v); 00938 00939 //block this node 00940 blocked.insert(v); 00941 00942 //Loop over all successors of node v that are in the scc, create Adjaceny list 00943 std::set<MSchedGraphNode*> AkV; 00944 for(MSchedGraphNode::succ_iterator I = v->succ_begin(), E = v->succ_end(); I != E; ++I) { 00945 if((std::find(SCC.begin(), SCC.end(), *I) != SCC.end())) { 00946 AkV.insert(*I); 00947 } 00948 } 00949 00950 for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) { 00951 if(*I == s) { 00952 //We have a circuit, so add it to our list 00953 addRecc(stack, newNodes); 00954 f = true; 00955 } 00956 else if(!blocked.count(*I)) { 00957 if(circuit(*I, stack, blocked, SCC, s, B, II, newNodes)) 00958 f = true; 00959 } 00960 else 00961 DEBUG(std::cerr << "Blocked: " << **I << "\n"); 00962 } 00963 00964 00965 if(f) { 00966 unblock(v, blocked, B); 00967 } 00968 else { 00969 for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) 00970 B[*I].insert(v); 00971 00972 } 00973 00974 //Pop v 00975 stack.pop_back(); 00976 00977 return f; 00978 00979 } 00980 00981 void ModuloSchedulingPass::addRecc(std::vector<MSchedGraphNode*> &stack, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) { 00982 std::vector<MSchedGraphNode*> recc; 00983 //Dump recurrence for now 00984 DEBUG(std::cerr << "Starting Recc\n"); 00985 00986 int totalDelay = 0; 00987 int totalDistance = 0; 00988 MSchedGraphNode *lastN = 0; 00989 MSchedGraphNode *start = 0; 00990 MSchedGraphNode *end = 0; 00991 00992 //Loop over recurrence, get delay and distance 00993 for(std::vector<MSchedGraphNode*>::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) { 00994 DEBUG(std::cerr << **N << "\n"); 00995 totalDelay += (*N)->getLatency(); 00996 if(lastN) { 00997 int iteDiff = (*N)->getInEdge(lastN).getIteDiff(); 00998 totalDistance += iteDiff; 00999 01000 if(iteDiff > 0) { 01001 start = lastN; 01002 end = *N; 01003 } 01004 } 01005 //Get the original node 01006 lastN = *N; 01007 recc.push_back(newNodes[*N]); 01008 01009 01010 } 01011 01012 //Get the loop edge 01013 totalDistance += lastN->getIteDiff(*stack.begin()); 01014 01015 DEBUG(std::cerr << "End Recc\n"); 01016 CircCount++; 01017 01018 if(start && end) { 01019 //Insert reccurrence into the list 01020 DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n"); 01021 edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start]))); 01022 } 01023 else { 01024 //Insert reccurrence into the list 01025 DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n"); 01026 edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN]))); 01027 01028 } 01029 //Adjust II until we get close to the inequality delay - II*distance <= 0 01030 int RecMII = II; //Starting value 01031 int value = totalDelay-(RecMII * totalDistance); 01032 int lastII = II; 01033 while(value < 0) { 01034 01035 lastII = RecMII; 01036 RecMII--; 01037 value = totalDelay-(RecMII * totalDistance); 01038 } 01039 01040 recurrenceList.insert(std::make_pair(lastII, recc)); 01041 01042 } 01043 01044 void ModuloSchedulingPass::addSCC(std::vector<MSchedGraphNode*> &SCC, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) { 01045 01046 int totalDelay = 0; 01047 int totalDistance = 0; 01048 std::vector<MSchedGraphNode*> recc; 01049 MSchedGraphNode *start = 0; 01050 MSchedGraphNode *end = 0; 01051 01052 //Loop over recurrence, get delay and distance 01053 for(std::vector<MSchedGraphNode*>::iterator N = SCC.begin(), NE = SCC.end(); N != NE; ++N) { 01054 DEBUG(std::cerr << **N << "\n"); 01055 totalDelay += (*N)->getLatency(); 01056 01057 for(unsigned i = 0; i < (*N)->succ_size(); ++i) { 01058 MSchedGraphEdge *edge = (*N)->getSuccessor(i); 01059 if(find(SCC.begin(), SCC.end(), edge->getDest()) != SCC.end()) { 01060 totalDistance += edge->getIteDiff(); 01061 if(edge->getIteDiff() > 0) 01062 if(!start && !end) { 01063 start = *N; 01064 end = edge->getDest(); 01065 } 01066 01067 } 01068 } 01069 01070 01071 //Get the original node 01072 recc.push_back(newNodes[*N]); 01073 01074 01075 } 01076 01077 DEBUG(std::cerr << "End Recc\n"); 01078 CircCount++; 01079 01080 assert( (start && end) && "Must have start and end node to ignore edge for SCC"); 01081 01082 if(start && end) { 01083 //Insert reccurrence into the list 01084 DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n"); 01085 edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start]))); 01086 } 01087 01088 int lastII = totalDelay / totalDistance; 01089 01090 01091 recurrenceList.insert(std::make_pair(lastII, recc)); 01092 01093 } 01094 01095 void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) { 01096 01097 CircCount = 0; 01098 01099 //Keep old to new node mapping information 01100 std::map<MSchedGraphNode*, MSchedGraphNode*> newNodes; 01101 01102 //copy the graph 01103 MSchedGraph *MSG = new MSchedGraph(*g, newNodes); 01104 01105 DEBUG(std::cerr << "Finding All Circuits\n"); 01106 01107 //Set of blocked nodes 01108 std::set<MSchedGraphNode*> blocked; 01109 01110 //Stack holding current circuit 01111 std::vector<MSchedGraphNode*> stack; 01112 01113 //Map for B Lists 01114 std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > B; 01115 01116 //current node 01117 MSchedGraphNode *s; 01118 01119 01120 //Iterate over the graph until its down to one node or empty 01121 while(MSG->size() > 1) { 01122 01123 //Write Graph out to file 01124 //WriteGraphToFile(std::cerr, "Graph" + utostr(MSG->size()), MSG); 01125 01126 DEBUG(std::cerr << "Graph Size: " << MSG->size() << "\n"); 01127 DEBUG(std::cerr << "Finding strong component Vk with least vertex\n"); 01128 01129 //Iterate over all the SCCs in the graph 01130 std::set<MSchedGraphNode*> Visited; 01131 std::vector<MSchedGraphNode*> Vk; 01132 MSchedGraphNode* s = 0; 01133 int numEdges = 0; 01134 01135 //Find scc with the least vertex 01136 for (MSchedGraph::iterator GI = MSG->begin(), E = MSG->end(); GI != E; ++GI) 01137 if (Visited.insert(GI->second).second) { 01138 for (scc_iterator<MSchedGraphNode*> SCCI = scc_begin(GI->second), 01139 E = scc_end(GI->second); SCCI != E; ++SCCI) { 01140 std::vector<MSchedGraphNode*> &nextSCC = *SCCI; 01141 01142 if (Visited.insert(nextSCC[0]).second) { 01143 Visited.insert(nextSCC.begin()+1, nextSCC.end()); 01144 01145 if(nextSCC.size() > 1) { 01146 std::cerr << "SCC size: " << nextSCC.size() << "\n"; 01147 01148 for(unsigned i = 0; i < nextSCC.size(); ++i) { 01149 //Loop over successor and see if in scc, then count edge 01150 MSchedGraphNode *node = nextSCC[i]; 01151 for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE; ++S) { 01152 if(find(nextSCC.begin(), nextSCC.end(), *S) != nextSCC.end()) 01153 numEdges++; 01154 } 01155 } 01156 std::cerr << "Num Edges: " << numEdges << "\n"; 01157 } 01158 01159 //Ignore self loops 01160 if(nextSCC.size() > 1) { 01161 01162 //Get least vertex in Vk 01163 if(!s) { 01164 s = nextSCC[0]; 01165 Vk = nextSCC; 01166 } 01167 01168 for(unsigned i = 0; i < nextSCC.size(); ++i) { 01169 if(nextSCC[i] < s) { 01170 s = nextSCC[i]; 01171 Vk = nextSCC; 01172 } 01173 } 01174 } 01175 } 01176 } 01177 } 01178 01179 01180 01181 //Process SCC 01182 DEBUG(for(std::vector<MSchedGraphNode*>::iterator N = Vk.begin(), NE = Vk.end(); 01183 N != NE; ++N) { std::cerr << *((*N)->getInst()); }); 01184 01185 //Iterate over all nodes in this scc 01186 for(std::vector<MSchedGraphNode*>::iterator N = Vk.begin(), NE = Vk.end(); 01187 N != NE; ++N) { 01188 blocked.erase(*N); 01189 B[*N].clear(); 01190 } 01191 if(Vk.size() > 1) { 01192 if(numEdges < 98) 01193 circuit(s, stack, blocked, Vk, s, B, II, newNodes); 01194 else 01195 addSCC(Vk, newNodes); 01196 01197 //Delete nodes from the graph 01198 //Find all nodes up to s and delete them 01199 std::vector<MSchedGraphNode*> nodesToRemove; 01200 nodesToRemove.push_back(s); 01201 for(MSchedGraph::iterator N = MSG->begin(), NE = MSG->end(); N != NE; ++N) { 01202 if(N->second < s ) 01203 nodesToRemove.push_back(N->second); 01204 } 01205 for(std::vector<MSchedGraphNode*>::iterator N = nodesToRemove.begin(), NE = nodesToRemove.end(); N != NE; ++N) { 01206 DEBUG(std::cerr << "Deleting Node: " << **N << "\n"); 01207 MSG->deleteNode(*N); 01208 } 01209 } 01210 else 01211 break; 01212 } 01213 DEBUG(std::cerr << "Num Circuits found: " << CircCount << "\n"); 01214 } 01215 01216 01217 void ModuloSchedulingPass::findAllReccurrences(MSchedGraphNode *node, 01218 std::vector<MSchedGraphNode*> &visitedNodes, 01219 int II) { 01220 01221 01222 if(std::find(visitedNodes.begin(), visitedNodes.end(), node) != visitedNodes.end()) { 01223 std::vector<MSchedGraphNode*> recurrence; 01224 bool first = true; 01225 int delay = 0; 01226 int distance = 0; 01227 int RecMII = II; //Starting value 01228 MSchedGraphNode *last = node; 01229 MSchedGraphNode *srcBackEdge = 0; 01230 MSchedGraphNode *destBackEdge = 0; 01231 01232 01233 01234 for(std::vector<MSchedGraphNode*>::iterator I = visitedNodes.begin(), E = visitedNodes.end(); 01235 I !=E; ++I) { 01236 01237 if(*I == node) 01238 first = false; 01239 if(first) 01240 continue; 01241 01242 delay = delay + (*I)->getLatency(); 01243 01244 if(*I != node) { 01245 int diff = (*I)->getInEdge(last).getIteDiff(); 01246 distance += diff; 01247 if(diff > 0) { 01248 srcBackEdge = last; 01249 destBackEdge = *I; 01250 } 01251 } 01252 01253 recurrence.push_back(*I); 01254 last = *I; 01255 } 01256 01257 01258 01259 //Get final distance calc 01260 distance += node->getInEdge(last).getIteDiff(); 01261 DEBUG(std::cerr << "Reccurrence Distance: " << distance << "\n"); 01262 01263 //Adjust II until we get close to the inequality delay - II*distance <= 0 01264 01265 int value = delay-(RecMII * distance); 01266 int lastII = II; 01267 while(value <= 0) { 01268 01269 lastII = RecMII; 01270 RecMII--; 01271 value = delay-(RecMII * distance); 01272 } 01273 01274 01275 DEBUG(std::cerr << "Final II for this recurrence: " << lastII << "\n"); 01276 addReccurrence(recurrence, lastII, srcBackEdge, destBackEdge); 01277 assert(distance != 0 && "Recurrence distance should not be zero"); 01278 return; 01279 } 01280 01281 unsigned count = 0; 01282 for(MSchedGraphNode::succ_iterator I = node->succ_begin(), E = node->succ_end(); I != E; ++I) { 01283 visitedNodes.push_back(node); 01284 //if(!edgesToIgnore.count(std::make_pair(node, count))) 01285 findAllReccurrences(*I, visitedNodes, II); 01286 visitedNodes.pop_back(); 01287 count++; 01288 } 01289 } 01290 01291 void ModuloSchedulingPass::searchPath(MSchedGraphNode *node, 01292 std::vector<MSchedGraphNode*> &path, 01293 std::set<MSchedGraphNode*> &nodesToAdd, 01294 std::set<MSchedGraphNode*> &new_reccurrence) { 01295 //Push node onto the path 01296 path.push_back(node); 01297 01298 //Loop over all successors and see if there is a path from this node to 01299 //a recurrence in the partial order, if so.. add all nodes to be added to recc 01300 for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE; 01301 ++S) { 01302 01303 //Check if we should ignore this edge first 01304 if(ignoreEdge(node,*S)) 01305 continue; 01306 01307 //check if successor is in this recurrence, we will get to it eventually 01308 if(new_reccurrence.count(*S)) 01309 continue; 01310 01311 //If this node exists in a recurrence already in the partial 01312 //order, then add all nodes in the path to the set of nodes to add 01313 //Check if its already in our partial order, if not add it to the 01314 //final vector 01315 bool found = false; 01316 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(), 01317 PE = partialOrder.end(); PO != PE; ++PO) { 01318 01319 if(PO->count(*S)) { 01320 found = true; 01321 break; 01322 } 01323 } 01324 01325 if(!found) { 01326 nodesToAdd.insert(*S); 01327 searchPath(*S, path, nodesToAdd, new_reccurrence); 01328 } 01329 } 01330 01331 //Pop Node off the path 01332 path.pop_back(); 01333 } 01334 01335 void ModuloSchedulingPass::pathToRecc(MSchedGraphNode *node, 01336 std::vector<MSchedGraphNode*> &path, 01337 std::set<MSchedGraphNode*> &poSet, 01338 std::set<MSchedGraphNode*> &lastNodes) { 01339 //Push node onto the path 01340 path.push_back(node); 01341 01342 DEBUG(std::cerr << "Current node: " << *node << "\n"); 01343 01344 //Loop over all successors and see if there is a path from this node to 01345 //a recurrence in the partial order, if so.. add all nodes to be added to recc 01346 for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE; 01347 ++S) { 01348 DEBUG(std::cerr << "Succ:" << **S << "\n"); 01349 //Check if we should ignore this edge first 01350 if(ignoreEdge(node,*S)) 01351 continue; 01352 01353 if(poSet.count(*S)) { 01354 DEBUG(std::cerr << "Found path to recc from no pred\n"); 01355 //Loop over path, if it exists in lastNodes, then add to poset, and remove from lastNodes 01356 for(std::vector<MSchedGraphNode*>::iterator I = path.begin(), IE = path.end(); I != IE; ++I) { 01357 if(lastNodes.count(*I)) { 01358 DEBUG(std::cerr << "Inserting node into recc: " << **I << "\n"); 01359 poSet.insert(*I); 01360 lastNodes.erase(*I); 01361 } 01362 } 01363 } 01364 else 01365 pathToRecc(*S, path, poSet, lastNodes); 01366 } 01367 01368 //Pop Node off the path 01369 path.pop_back(); 01370 } 01371 01372 void ModuloSchedulingPass::computePartialOrder() { 01373 01374 TIME_REGION(X, "calculatePartialOrder"); 01375 01376 DEBUG(std::cerr << "Computing Partial Order\n"); 01377 01378 //Only push BA branches onto the final node order, we put other 01379 //branches after it FIXME: Should we really be pushing branches on 01380 //it a specific order instead of relying on BA being there? 01381 01382 std::vector<MSchedGraphNode*> branches; 01383 01384 //Steps to add a recurrence to the partial order 1) Find reccurrence 01385 //with the highest RecMII. Add it to the partial order. 2) For each 01386 //recurrence with decreasing RecMII, add it to the partial order 01387 //along with any nodes that connect this recurrence to recurrences 01388 //already in the partial order 01389 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::reverse_iterator 01390 I = recurrenceList.rbegin(), E=recurrenceList.rend(); I !=E; ++I) { 01391 01392 std::set<MSchedGraphNode*> new_recurrence; 01393 01394 //Loop through recurrence and remove any nodes already in the partial order 01395 for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(), 01396 NE = I->second.end(); N != NE; ++N) { 01397 01398 bool found = false; 01399 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(), 01400 PE = partialOrder.end(); PO != PE; ++PO) { 01401 if(PO->count(*N)) 01402 found = true; 01403 } 01404 01405 //Check if its a branch, and remove to handle special 01406 if(!found) { 01407 if((*N)->isBranch() && !(*N)->hasPredecessors()) { 01408 branches.push_back(*N); 01409 } 01410 else 01411 new_recurrence.insert(*N); 01412 } 01413 01414 } 01415 01416 01417 if(new_recurrence.size() > 0) { 01418 01419 std::vector<MSchedGraphNode*> path; 01420 std::set<MSchedGraphNode*> nodesToAdd; 01421 01422 //Dump recc we are dealing with (minus nodes already in PO) 01423 DEBUG(std::cerr << "Recc: "); 01424 DEBUG(for(std::set<MSchedGraphNode*>::iterator R = new_recurrence.begin(), RE = new_recurrence.end(); R != RE; ++R) { std::cerr << **R ; }); 01425 01426 //Add nodes that connect this recurrence to recurrences in the partial path 01427 for(std::set<MSchedGraphNode*>::iterator N = new_recurrence.begin(), 01428 NE = new_recurrence.end(); N != NE; ++N) 01429 searchPath(*N, path, nodesToAdd, new_recurrence); 01430 01431 //Add nodes to this recurrence if they are not already in the partial order 01432 for(std::set<MSchedGraphNode*>::iterator N = nodesToAdd.begin(), NE = nodesToAdd.end(); 01433 N != NE; ++N) { 01434 bool found = false; 01435 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(), 01436 PE = partialOrder.end(); PO != PE; ++PO) { 01437 if(PO->count(*N)) 01438 found = true; 01439 } 01440 if(!found) { 01441 assert("FOUND CONNECTOR"); 01442 new_recurrence.insert(*N); 01443 } 01444 } 01445 01446 partialOrder.push_back(new_recurrence); 01447 01448 01449 //Dump out partial order 01450 DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(), 01451 E = partialOrder.end(); I !=E; ++I) { 01452 std::cerr << "Start set in PO\n"; 01453 for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J) 01454 std::cerr << "PO:" << **J << "\n"; 01455 }); 01456 01457 } 01458 } 01459 01460 //Add any nodes that are not already in the partial order 01461 //Add them in a set, one set per connected component 01462 std::set<MSchedGraphNode*> lastNodes; 01463 std::set<MSchedGraphNode*> noPredNodes; 01464 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), 01465 E = nodeToAttributesMap.end(); I != E; ++I) { 01466 01467 bool found = false; 01468 01469 //Check if its already in our partial order, if not add it to the final vector 01470 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(), 01471 PE = partialOrder.end(); PO != PE; ++PO) { 01472 if(PO->count(I->first)) 01473 found = true; 01474 } 01475 if(!found) 01476 lastNodes.insert(I->first); 01477 } 01478 01479 //For each node w/out preds, see if there is a path to one of the 01480 //recurrences, and if so add them to that current recc 01481 /*for(std::set<MSchedGraphNode*>::iterator N = noPredNodes.begin(), NE = noPredNodes.end(); 01482 N != NE; ++N) { 01483 DEBUG(std::cerr << "No Pred Path from: " << **N << "\n"); 01484 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(), 01485 PE = partialOrder.end(); PO != PE; ++PO) { 01486 std::vector<MSchedGraphNode*> path; 01487 pathToRecc(*N, path, *PO, lastNodes); 01488 } 01489 }*/ 01490 01491 01492 //Break up remaining nodes that are not in the partial order 01493 ///into their connected compoenents 01494 while(lastNodes.size() > 0) { 01495 std::set<MSchedGraphNode*> ccSet; 01496 connectedComponentSet(*(lastNodes.begin()),ccSet, lastNodes); 01497 if(ccSet.size() > 0) 01498 partialOrder.push_back(ccSet); 01499 } 01500 01501 01502 //Clean up branches by putting them in final order 01503 assert(branches.size() == 0 && "We should not have any branches in our graph"); 01504 } 01505 01506 01507 void ModuloSchedulingPass::connectedComponentSet(MSchedGraphNode *node, std::set<MSchedGraphNode*> &ccSet, std::set<MSchedGraphNode*> &lastNodes) { 01508 01509 //Add to final set 01510 if( !ccSet.count(node) && lastNodes.count(node)) { 01511 lastNodes.erase(node); 01512 ccSet.insert(node); 01513 } 01514 else 01515 return; 01516 01517 //Loop over successors and recurse if we have not seen this node before 01518 for(MSchedGraphNode::succ_iterator node_succ = node->succ_begin(), end=node->succ_end(); node_succ != end; ++node_succ) { 01519 connectedComponentSet(*node_succ, ccSet, lastNodes); 01520 } 01521 01522 } 01523 01524 void ModuloSchedulingPass::predIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult) { 01525 01526 for(unsigned j=0; j < FinalNodeOrder.size(); ++j) { 01527 for(MSchedGraphNode::pred_iterator P = FinalNodeOrder[j]->pred_begin(), 01528 E = FinalNodeOrder[j]->pred_end(); P != E; ++P) { 01529 01530 //Check if we are supposed to ignore this edge or not 01531 if(ignoreEdge(*P,FinalNodeOrder[j])) 01532 continue; 01533 01534 if(CurrentSet.count(*P)) 01535 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end()) 01536 IntersectResult.insert(*P); 01537 } 01538 } 01539 } 01540 01541 01542 01543 01544 01545 void ModuloSchedulingPass::succIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult) { 01546 01547 for(unsigned j=0; j < FinalNodeOrder.size(); ++j) { 01548 for(MSchedGraphNode::succ_iterator P = FinalNodeOrder[j]->succ_begin(), 01549 E = FinalNodeOrder[j]->succ_end(); P != E; ++P) { 01550 01551 //Check if we are supposed to ignore this edge or not 01552 if(ignoreEdge(FinalNodeOrder[j],*P)) 01553 continue; 01554 01555 if(CurrentSet.count(*P)) 01556 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end()) 01557 IntersectResult.insert(*P); 01558 } 01559 } 01560 } 01561 01562 void dumpIntersection(std::set<MSchedGraphNode*> &IntersectCurrent) { 01563 std::cerr << "Intersection ("; 01564 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(), E = IntersectCurrent.end(); I != E; ++I) 01565 std::cerr << **I << ", "; 01566 std::cerr << ")\n"; 01567 } 01568 01569 01570 01571 void ModuloSchedulingPass::orderNodes() { 01572 01573 TIME_REGION(X, "orderNodes"); 01574 01575 int BOTTOM_UP = 0; 01576 int TOP_DOWN = 1; 01577 01578 //Set default order 01579 int order = BOTTOM_UP; 01580 01581 01582 //Loop over all the sets and place them in the final node order 01583 for(std::vector<std::set<MSchedGraphNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) { 01584 01585 DEBUG(std::cerr << "Processing set in S\n"); 01586 DEBUG(dumpIntersection(*CurrentSet)); 01587 01588 //Result of intersection 01589 std::set<MSchedGraphNode*> IntersectCurrent; 01590 01591 predIntersect(*CurrentSet, IntersectCurrent); 01592 01593 //If the intersection of predecessor and current set is not empty 01594 //sort nodes bottom up 01595 if(IntersectCurrent.size() != 0) { 01596 DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is NOT empty\n"); 01597 order = BOTTOM_UP; 01598 } 01599 //If empty, use successors 01600 else { 01601 DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is empty\n"); 01602 01603 succIntersect(*CurrentSet, IntersectCurrent); 01604 01605 //sort top-down 01606 if(IntersectCurrent.size() != 0) { 01607 DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is NOT empty\n"); 01608 order = TOP_DOWN; 01609 } 01610 else { 01611 DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is empty\n"); 01612 //Find node with max ASAP in current Set 01613 MSchedGraphNode *node; 01614 int maxASAP = 0; 01615 DEBUG(std::cerr << "Using current set of size " << CurrentSet->size() << "to find max ASAP\n"); 01616 for(std::set<MSchedGraphNode*>::iterator J = CurrentSet->begin(), JE = CurrentSet->end(); J != JE; ++J) { 01617 //Get node attributes 01618 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*J)->second; 01619 //assert(nodeAttr != nodeToAttributesMap.end() && "Node not in attributes map!"); 01620 01621 if(maxASAP <= nodeAttr.ASAP) { 01622 maxASAP = nodeAttr.ASAP; 01623 node = *J; 01624 } 01625 } 01626 assert(node != 0 && "In node ordering node should not be null"); 01627 IntersectCurrent.insert(node); 01628 order = BOTTOM_UP; 01629 } 01630 } 01631 01632 //Repeat until all nodes are put into the final order from current set 01633 while(IntersectCurrent.size() > 0) { 01634 01635 if(order == TOP_DOWN) { 01636 DEBUG(std::cerr << "Order is TOP DOWN\n"); 01637 01638 while(IntersectCurrent.size() > 0) { 01639 DEBUG(std::cerr << "Intersection is not empty, so find heighest height\n"); 01640 01641 int MOB = 0; 01642 int height = 0; 01643 MSchedGraphNode *highestHeightNode = *(IntersectCurrent.begin()); 01644 01645 //Find node in intersection with highest heigh and lowest MOB 01646 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(), 01647 E = IntersectCurrent.end(); I != E; ++I) { 01648 01649 //Get current nodes properties 01650 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*I)->second; 01651 01652 if(height < nodeAttr.height) { 01653 highestHeightNode = *I; 01654 height = nodeAttr.height; 01655 MOB = nodeAttr.MOB; 01656 } 01657 else if(height == nodeAttr.height) { 01658 if(MOB > nodeAttr.height) { 01659 highestHeightNode = *I; 01660 height = nodeAttr.height; 01661 MOB = nodeAttr.MOB; 01662 } 01663 } 01664 } 01665 01666 //Append our node with greatest height to the NodeOrder 01667 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestHeightNode) == FinalNodeOrder.end()) { 01668 DEBUG(std::cerr << "Adding node to Final Order: " << *highestHeightNode << "\n"); 01669 FinalNodeOrder.push_back(highestHeightNode); 01670 } 01671 01672 //Remove V from IntersectOrder 01673 IntersectCurrent.erase(std::find(IntersectCurrent.begin(), 01674 IntersectCurrent.end(), highestHeightNode)); 01675 01676 01677 //Intersect V's successors with CurrentSet 01678 for(MSchedGraphNode::succ_iterator P = highestHeightNode->succ_begin(), 01679 E = highestHeightNode->succ_end(); P != E; ++P) { 01680 //if(lower_bound(CurrentSet->begin(), 01681 // CurrentSet->end(), *P) != CurrentSet->end()) { 01682 if(std::find(CurrentSet->begin(), CurrentSet->end(), *P) != CurrentSet->end()) { 01683 if(ignoreEdge(highestHeightNode, *P)) 01684 continue; 01685 //If not already in Intersect, add 01686 if(!IntersectCurrent.count(*P)) 01687 IntersectCurrent.insert(*P); 01688 } 01689 } 01690 } //End while loop over Intersect Size 01691 01692 //Change direction 01693 order = BOTTOM_UP; 01694 01695 //Reset Intersect to reflect changes in OrderNodes 01696 IntersectCurrent.clear(); 01697 predIntersect(*CurrentSet, IntersectCurrent); 01698 01699 } //End If TOP_DOWN 01700 01701 //Begin if BOTTOM_UP 01702 else { 01703 DEBUG(std::cerr << "Order is BOTTOM UP\n"); 01704 while(IntersectCurrent.size() > 0) { 01705 DEBUG(std::cerr << "Intersection of size " << IntersectCurrent.size() << ", finding highest depth\n"); 01706 01707 //dump intersection 01708 DEBUG(dumpIntersection(IntersectCurrent)); 01709 //Get node with highest depth, if a tie, use one with lowest 01710 //MOB 01711 int MOB = 0; 01712 int depth = 0; 01713 MSchedGraphNode *highestDepthNode = *(IntersectCurrent.begin()); 01714 01715 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(), 01716 E = IntersectCurrent.end(); I != E; ++I) { 01717 //Find node attribute in graph 01718 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*I)->second; 01719 01720 if(depth < nodeAttr.depth) { 01721 highestDepthNode = *I; 01722 depth = nodeAttr.depth; 01723 MOB = nodeAttr.MOB; 01724 } 01725 else if(depth == nodeAttr.depth) { 01726 if(MOB > nodeAttr.MOB) { 01727 highestDepthNode = *I; 01728 depth = nodeAttr.depth; 01729 MOB = nodeAttr.MOB; 01730 } 01731 } 01732 } 01733 01734 01735 01736 //Append highest depth node to the NodeOrder 01737 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestDepthNode) == FinalNodeOrder.end()) { 01738 DEBUG(std::cerr << "Adding node to Final Order: " << *highestDepthNode << "\n"); 01739 FinalNodeOrder.push_back(highestDepthNode); 01740 } 01741 //Remove heightestDepthNode from IntersectOrder 01742 IntersectCurrent.erase(highestDepthNode); 01743 01744 01745 //Intersect heightDepthNode's pred with CurrentSet 01746 for(MSchedGraphNode::pred_iterator P = highestDepthNode->pred_begin(), 01747 E = highestDepthNode->pred_end(); P != E; ++P) { 01748 if(CurrentSet->count(*P)) { 01749 if(ignoreEdge(*P, highestDepthNode)) 01750 continue; 01751 01752 //If not already in Intersect, add 01753 if(!IntersectCurrent.count(*P)) 01754 IntersectCurrent.insert(*P); 01755 } 01756 } 01757 01758 } //End while loop over Intersect Size 01759 01760 //Change order 01761 order = TOP_DOWN; 01762 01763 //Reset IntersectCurrent to reflect changes in OrderNodes 01764 IntersectCurrent.clear(); 01765 succIntersect(*CurrentSet, IntersectCurrent); 01766 } //End if BOTTOM_DOWN 01767 01768 DEBUG(std::cerr << "Current Intersection Size: " << IntersectCurrent.size() << "\n"); 01769 } 01770 //End Wrapping while loop 01771 DEBUG(std::cerr << "Ending Size of Current Set: " << CurrentSet->size() << "\n"); 01772 }//End for over all sets of nodes 01773 01774 //FIXME: As the algorithm stands it will NEVER add an instruction such as ba (with no 01775 //data dependencies) to the final order. We add this manually. It will always be 01776 //in the last set of S since its not part of a recurrence 01777 //Loop over all the sets and place them in the final node order 01778 std::vector<std::set<MSchedGraphNode*> > ::reverse_iterator LastSet = partialOrder.rbegin(); 01779 for(std::set<MSchedGraphNode*>::iterator CurrentNode = LastSet->begin(), LastNode = LastSet->end(); 01780 CurrentNode != LastNode; ++CurrentNode) { 01781 if((*CurrentNode)->getInst()->getOpcode() == V9::BA) 01782 FinalNodeOrder.push_back(*CurrentNode); 01783 } 01784 //Return final Order 01785 //return FinalNodeOrder; 01786 } 01787 01788 bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB, MSchedGraph *MSG) { 01789 01790 TIME_REGION(X, "computeSchedule"); 01791 01792 bool success = false; 01793 01794 //FIXME: Should be set to max II of the original loop 01795 //Cap II in order to prevent infinite loop 01796 int capII = MSG->totalDelay(); 01797 01798 while(!success) { 01799 01800 //Keep track of branches, but do not insert into the schedule 01801 std::vector<MSchedGraphNode*> branches; 01802 01803 //Loop over the final node order and process each node 01804 for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), 01805 E = FinalNodeOrder.end(); I != E; ++I) { 01806 01807 //CalculateEarly and Late start 01808 bool initialLSVal = false; 01809 bool initialESVal = false; 01810 int EarlyStart = 0; 01811 int LateStart = 0; 01812 bool hasSucc = false; 01813 bool hasPred = false; 01814 bool sched; 01815 01816 if((*I)->isBranch()) 01817 if((*I)->hasPredecessors()) 01818 sched = true; 01819 else 01820 sched = false; 01821 else 01822 sched = true; 01823 01824 if(sched) { 01825 //Loop over nodes in the schedule and determine if they are predecessors 01826 //or successors of the node we are trying to schedule 01827 for(MSSchedule::schedule_iterator nodesByCycle = schedule.begin(), nodesByCycleEnd = schedule.end(); 01828 nodesByCycle != nodesByCycleEnd; ++nodesByCycle) { 01829 01830 //For this cycle, get the vector of nodes schedule and loop over it 01831 for(std::vector<MSchedGraphNode*>::iterator schedNode = nodesByCycle->second.begin(), SNE = nodesByCycle->second.end(); schedNode != SNE; ++schedNode) { 01832 01833 if((*I)->isPredecessor(*schedNode)) { 01834 int diff = (*I)->getInEdge(*schedNode).getIteDiff(); 01835 int ES_Temp = nodesByCycle->first + (*schedNode)->getLatency() - diff * II; 01836 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n"); 01837 DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n"); 01838 if(initialESVal) 01839 EarlyStart = std::max(EarlyStart, ES_Temp); 01840 else { 01841 EarlyStart = ES_Temp; 01842 initialESVal = true; 01843 } 01844 hasPred = true; 01845 } 01846 if((*I)->isSuccessor(*schedNode)) { 01847 int diff = (*schedNode)->getInEdge(*I).getIteDiff(); 01848 int LS_Temp = nodesByCycle->first - (*I)->getLatency() + diff * II; 01849 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n"); 01850 DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n"); 01851 if(initialLSVal) 01852 LateStart = std::min(LateStart, LS_Temp); 01853 else { 01854 LateStart = LS_Temp; 01855 initialLSVal = true; 01856 } 01857 hasSucc = true; 01858 } 01859 } 01860 } 01861 } 01862 else { 01863 branches.push_back(*I); 01864 continue; 01865 } 01866 01867 //Check if this node is a pred or succ to a branch, and restrict its placement 01868 //even though the branch is not in the schedule 01869 /*int count = branches.size(); 01870 for(std::vector<MSchedGraphNode*>::iterator B = branches.begin(), BE = branches.end(); 01871 B != BE; ++B) { 01872 if((*I)->isPredecessor(*B)) { 01873 int diff = (*I)->getInEdge(*B).getIteDiff(); 01874 int ES_Temp = (II+count-1) + (*B)->getLatency() - diff * II; 01875 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count)-1 << "\n"); 01876 DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n"); 01877 EarlyStart = std::max(EarlyStart, ES_Temp); 01878 hasPred = true; 01879 } 01880 01881 if((*I)->isSuccessor(*B)) { 01882 int diff = (*B)->getInEdge(*I).getIteDiff(); 01883 int LS_Temp = (II+count-1) - (*I)->getLatency() + diff * II; 01884 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count-1) << "\n"); 01885 DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n"); 01886 LateStart = std::min(LateStart, LS_Temp); 01887 hasSucc = true; 01888 } 01889 01890 count--; 01891 }*/ 01892 01893 //Check if the node has no pred or successors and set Early Start to its ASAP 01894 if(!hasSucc && !hasPred) 01895 EarlyStart = nodeToAttributesMap.find(*I)->second.ASAP; 01896 01897 DEBUG(std::cerr << "Has Successors: " << hasSucc << ", Has Pred: " << hasPred << "\n"); 01898 DEBUG(std::cerr << "EarlyStart: " << EarlyStart << ", LateStart: " << LateStart << "\n"); 01899 01900 //Now, try to schedule this node depending upon its pred and successor in the schedule 01901 //already 01902 if(!hasSucc && hasPred) 01903 success = scheduleNode(*I, EarlyStart, (EarlyStart + II -1)); 01904 else if(!hasPred && hasSucc) 01905 success = scheduleNode(*I, LateStart, (LateStart - II +1)); 01906 else if(hasPred && hasSucc) { 01907 if(EarlyStart > LateStart) { 01908 success = false; 01909 //LateStart = EarlyStart; 01910 DEBUG(std::cerr << "Early Start can not be later then the late start cycle, schedule fails\n"); 01911 } 01912 else 01913 success = scheduleNode(*I, EarlyStart, std::min(LateStart, (EarlyStart + II -1))); 01914 } 01915 else 01916 success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1); 01917 01918 if(!success) { 01919 ++II; 01920 schedule.clear(); 01921 break; 01922 } 01923 01924 } 01925 01926 if(success) { 01927 DEBUG(std::cerr << "Constructing Schedule Kernel\n"); 01928 success = schedule.constructKernel(II, branches, indVarInstrs[BB]); 01929 DEBUG(std::cerr << "Done Constructing Schedule Kernel\n"); 01930 if(!success) { 01931 ++II; 01932 schedule.clear(); 01933 } 01934 DEBUG(std::cerr << "Final II: " << II << "\n"); 01935 } 01936 01937 01938 if(II >= capII) { 01939 DEBUG(std::cerr << "Maximum II reached, giving up\n"); 01940 return false; 01941 } 01942 01943 assert(II < capII && "The II should not exceed the original loop number of cycles"); 01944 } 01945 return true; 01946 } 01947 01948 01949 bool ModuloSchedulingPass::scheduleNode(MSchedGraphNode *node, 01950 int start, int end) { 01951 bool success = false; 01952 01953 DEBUG(std::cerr << *node << " (Start Cycle: " << start << ", End Cycle: " << end << ")\n"); 01954 01955 //Make sure start and end are not negative 01956 //if(start < 0) { 01957 //start = 0; 01958 01959 //} 01960 //if(end < 0) 01961 //end = 0; 01962 01963 bool forward = true; 01964 if(start > end) 01965 forward = false; 01966 01967 bool increaseSC = true; 01968 int cycle = start ; 01969 01970 01971 while(increaseSC) { 01972 01973 increaseSC = false; 01974 01975 increaseSC = schedule.insert(node, cycle, II); 01976 01977 if(!increaseSC) 01978 return true; 01979 01980 //Increment cycle to try again 01981 if(forward) { 01982 ++cycle; 01983 DEBUG(std::cerr << "Increase cycle: " << cycle << "\n"); 01984 if(cycle > end) 01985 return false; 01986 } 01987 else { 01988 --cycle; 01989 DEBUG(std::cerr << "Decrease cycle: " << cycle << "\n"); 01990 if(cycle < end) 01991 return false; 01992 } 01993 } 01994 01995 return success; 01996 } 01997 01998 void ModuloSchedulingPass::writePrologues(std::vector<MachineBasicBlock *> &prologues, MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_prologues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation) { 01999 02000 //Keep a map to easily know whats in the kernel 02001 std::map<int, std::set<const MachineInstr*> > inKernel; 02002 int maxStageCount = 0; 02003 02004 //Keep a map of new values we consumed in case they need to be added back 02005 std::map<Value*, std::map<int, Value*> > consumedValues; 02006 02007 MSchedGraphNode *branch = 0; 02008 MSchedGraphNode *BAbranch = 0; 02009 02010 DEBUG(schedule.print(std::cerr)); 02011 02012 std::vector<MSchedGraphNode*> branches; 02013 02014 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 02015 maxStageCount = std::max(maxStageCount, I->second); 02016 02017 //Put int the map so we know what instructions in each stage are in the kernel 02018 DEBUG(std::cerr << "Inserting instruction " << *(I->first) << " into map at stage " << I->second << "\n"); 02019 inKernel[I->second].insert(I->first); 02020 } 02021 02022 //Get target information to look at machine operands 02023 const TargetInstrInfo *mii = target.getInstrInfo(); 02024 02025 //Now write the prologues 02026 for(int i = 0; i < maxStageCount; ++i) { 02027 BasicBlock *llvmBB = new BasicBlock("PROLOGUE", (Function*) (origBB->getBasicBlock()->getParent())); 02028 MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); 02029 02030 DEBUG(std::cerr << "i=" << i << "\n"); 02031 for(int j = i; j >= 0; --j) { 02032 for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { 02033 if(inKernel[j].count(&*MI)) { 02034 MachineInstr *instClone = MI->clone(); 02035 machineBB->push_back(instClone); 02036 02037 //If its a branch, insert a nop 02038 if(mii->isBranch(instClone->getOpcode())) 02039 BuildMI(machineBB, V9::NOP, 0); 02040 02041 02042 DEBUG(std::cerr << "Cloning: " << *MI << "\n"); 02043 02044 //After cloning, we may need to save the value that this instruction defines 02045 for(unsigned opNum=0; opNum < MI->getNumOperands(); ++opNum) { 02046 Instruction *tmp; 02047 02048 //get machine operand 02049 MachineOperand &mOp = instClone->getOperand(opNum); 02050 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 02051 02052 //Check if this is a value we should save 02053 if(valuesToSave.count(mOp.getVRegValue())) { 02054 //Save copy in tmpInstruction 02055 tmp = new TmpInstruction(mOp.getVRegValue()); 02056 02057 //Add TmpInstruction to safe LLVM Instruction MCFI 02058 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02059 tempMvec.addTemp((Value*) tmp); 02060 02061 DEBUG(std::cerr << "Value: " << *(mOp.getVRegValue()) << " New Value: " << *tmp << " Stage: " << i << "\n"); 02062 02063 newValues[mOp.getVRegValue()][i]= tmp; 02064 newValLocation[tmp] = machineBB; 02065 02066 DEBUG(std::cerr << "Machine Instr Operands: " << *(mOp.getVRegValue()) << ", 0, " << *tmp << "\n"); 02067 02068 //Create machine instruction and put int machineBB 02069 MachineInstr *saveValue; 02070 if(mOp.getVRegValue()->getType() == Type::FloatTy) 02071 saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02072 else if(mOp.getVRegValue()->getType() == Type::DoubleTy) 02073 saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02074 else 02075 saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 02076 02077 02078 DEBUG(std::cerr << "Created new machine instr: " << *saveValue << "\n"); 02079 } 02080 } 02081 02082 //We may also need to update the value that we use if its from an earlier prologue 02083 if(j != 0) { 02084 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 02085 if(newValues.count(mOp.getVRegValue())) { 02086 if(newValues[mOp.getVRegValue()].count(i-1)) { 02087 Value *oldV = mOp.getVRegValue(); 02088 DEBUG(std::cerr << "Replaced this value: " << mOp.getVRegValue() << " With:" << (newValues[mOp.getVRegValue()][i-1]) << "\n"); 02089 //Update the operand with the right value 02090 mOp.setValueReg(newValues[mOp.getVRegValue()][i-1]); 02091 02092 //Remove this value since we have consumed it 02093 //NOTE: Should this only be done if j != maxStage? 02094 consumedValues[oldV][i-1] = (newValues[oldV][i-1]); 02095 DEBUG(std::cerr << "Deleted value: " << consumedValues[oldV][i-1] << "\n"); 02096 newValues[oldV].erase(i-1); 02097 } 02098 } 02099 else 02100 if(consumedValues.count(mOp.getVRegValue())) 02101 assert(!consumedValues[mOp.getVRegValue()].count(i-1) && "Found a case where we need the value"); 02102 } 02103 } 02104 } 02105 } 02106 } 02107 } 02108 02109 MachineFunction *F = (((MachineBasicBlock*)origBB)->getParent()); 02110 MachineFunction::BasicBlockListType &BL = F->getBasicBlockList(); 02111 MachineFunction::BasicBlockListType::iterator BLI = origBB; 02112 assert(BLI != BL.end() && "Must find original BB in machine function\n"); 02113 BL.insert(BLI,machineBB); 02114 prologues.push_back(machineBB); 02115 llvm_prologues.push_back(llvmBB); 02116 } 02117 } 02118 02119 void ModuloSchedulingPass::writeEpilogues(std::vector<MachineBasicBlock *> &epilogues, const MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_epilogues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues,std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs ) { 02120 02121 std::map<int, std::set<const MachineInstr*> > inKernel; 02122 02123 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 02124 02125 //Ignore the branch, we will handle this separately 02126 //if(I->first->isBranch()) 02127 //continue; 02128 02129 //Put int the map so we know what instructions in each stage are in the kernel 02130 inKernel[I->second].insert(I->first); 02131 } 02132 02133 std::map<Value*, Value*> valPHIs; 02134 02135 //some debug stuff, will remove later 02136 DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), E = newValues.end(); V !=E; ++V) { 02137 std::cerr << "Old Value: " << *(V->first) << "\n"; 02138 for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I) 02139 std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n"; 02140 }); 02141 02142 //some debug stuff, will remove later 02143 DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = kernelPHIs.begin(), E = kernelPHIs.end(); V !=E; ++V) { 02144 std::cerr << "Old Value: " << *(V->first) << "\n"; 02145 for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I) 02146 std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n"; 02147 }); 02148 02149 //Now write the epilogues 02150 for(int i = schedule.getMaxStage()-1; i >= 0; --i) { 02151 BasicBlock *llvmBB = new BasicBlock("EPILOGUE", (Function*) (origBB->getBasicBlock()->getParent())); 02152 MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); 02153 02154 DEBUG(std::cerr << " Epilogue #: " << i << "\n"); 02155 02156 02157 std::map<Value*, int> inEpilogue; 02158 02159 for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { 02160 for(int j=schedule.getMaxStage(); j > i; --j) { 02161 if(inKernel[j].count(&*MI)) { 02162 DEBUG(std::cerr << "Cloning instruction " << *MI << "\n"); 02163 MachineInstr *clone = MI->clone(); 02164 02165 //Update operands that need to use the result from the phi 02166 for(unsigned opNum=0; opNum < clone->getNumOperands(); ++opNum) { 02167 //get machine operand 02168 const MachineOperand &mOp = clone->getOperand(opNum); 02169 02170 if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse())) { 02171 02172 DEBUG(std::cerr << "Writing PHI for " << (mOp.getVRegValue()) << "\n"); 02173 02174 //If this is the last instructions for the max iterations ago, don't update operands 02175 if(inEpilogue.count(mOp.getVRegValue())) 02176 if(inEpilogue[mOp.getVRegValue()] == i) 02177 continue; 02178 02179 //Quickly write appropriate phis for this operand 02180 if(newValues.count(mOp.getVRegValue())) { 02181 if(newValues[mOp.getVRegValue()].count(i)) { 02182 Instruction *tmp = new TmpInstruction(newValues[mOp.getVRegValue()][i]); 02183 02184 //Get machine code for this instruction 02185 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02186 tempMvec.addTemp((Value*) tmp); 02187 02188 //assert of no kernelPHI for this value 02189 assert(kernelPHIs[mOp.getVRegValue()][i] !=0 && "Must have final kernel phi to construct epilogue phi"); 02190 02191 MachineInstr *saveValue = BuildMI(machineBB, V9::PHI, 3).addReg(newValues[mOp.getVRegValue()][i]).addReg(kernelPHIs[mOp.getVRegValue()][i]).addRegDef(tmp); 02192 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 02193 valPHIs[mOp.getVRegValue()] = tmp; 02194 } 02195 } 02196 02197 if(valPHIs.count(mOp.getVRegValue())) { 02198 //Update the operand in the cloned instruction 02199 clone->getOperand(opNum).setValueReg(valPHIs[mOp.getVRegValue()]); 02200 } 02201 } 02202 else if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef())) { 02203 inEpilogue[mOp.getVRegValue()] = i; 02204 } 02205 } 02206 machineBB->push_back(clone); 02207 } 02208 } 02209 } 02210 02211 MachineFunction *F = (((MachineBasicBlock*)origBB)->getParent()); 02212 MachineFunction::BasicBlockListType &BL = F->getBasicBlockList(); 02213 MachineFunction::BasicBlockListType::iterator BLI = (MachineBasicBlock*) origBB; 02214 assert(BLI != BL.end() && "Must find original BB in machine function\n"); 02215 BL.insert(BLI,machineBB); 02216 epilogues.push_back(machineBB); 02217 llvm_epilogues.push_back(llvmBB); 02218 02219 DEBUG(std::cerr << "EPILOGUE #" << i << "\n"); 02220 DEBUG(machineBB->print(std::cerr)); 02221 } 02222 } 02223 02224 void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs) { 02225 02226 //Keep track of operands that are read and saved from a previous iteration. The new clone 02227 //instruction will use the result of the phi instead. 02228 std::map<Value*, Value*> finalPHIValue; 02229 std::map<Value*, Value*> kernelValue; 02230 02231 //Branches are a special case 02232 std::vector<MachineInstr*> branches; 02233 02234 //Get target information to look at machine operands 02235 const TargetInstrInfo *mii = target.getInstrInfo(); 02236 02237 //Create TmpInstructions for the final phis 02238 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 02239 02240 DEBUG(std::cerr << "Stage: " << I->second << " Inst: " << *(I->first) << "\n";); 02241 02242 //Clone instruction 02243 const MachineInstr *inst = I->first; 02244 MachineInstr *instClone = inst->clone(); 02245 02246 //Insert into machine basic block 02247 machineBB->push_back(instClone); 02248 02249 if(mii->isBranch(instClone->getOpcode())) 02250 BuildMI(machineBB, V9::NOP, 0); 02251 02252 DEBUG(std::cerr << "Cloned Inst: " << *instClone << "\n"); 02253 02254 //Loop over Machine Operands 02255 for(unsigned i=0; i < inst->getNumOperands(); ++i) { 02256 //get machine operand 02257 const MachineOperand &mOp = inst->getOperand(i); 02258 02259 if(I->second != 0) { 02260 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 02261 02262 //Check to see where this operand is defined if this instruction is from max stage 02263 if(I->second == schedule.getMaxStage()) { 02264 DEBUG(std::cerr << "VREG: " << *(mOp.getVRegValue()) << "\n"); 02265 } 02266 02267 //If its in the value saved, we need to create a temp instruction and use that instead 02268 if(valuesToSave.count(mOp.getVRegValue())) { 02269 02270 //Check if we already have a final PHI value for this 02271 if(!finalPHIValue.count(mOp.getVRegValue())) { 02272 //Only create phi if the operand def is from a stage before this one 02273 if(schedule.defPreviousStage(mOp.getVRegValue(), I->second)) { 02274 TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue()); 02275 02276 //Get machine code for this instruction 02277 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02278 tempMvec.addTemp((Value*) tmp); 02279 02280 //Update the operand in the cloned instruction 02281 instClone->getOperand(i).setValueReg(tmp); 02282 02283 //save this as our final phi 02284 finalPHIValue[mOp.getVRegValue()] = tmp; 02285 newValLocation[tmp] = machineBB; 02286 } 02287 } 02288 else { 02289 //Use the previous final phi value 02290 instClone->getOperand(i).setValueReg(finalPHIValue[mOp.getVRegValue()]); 02291 } 02292 } 02293 } 02294 } 02295 if(I->second != schedule.getMaxStage()) { 02296 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 02297 if(valuesToSave.count(mOp.getVRegValue())) { 02298 02299 TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue()); 02300 02301 //Get machine code for this instruction 02302 MachineCodeForInstruction & tempVec = MachineCodeForInstruction::get(defaultInst); 02303 tempVec.addTemp((Value*) tmp); 02304 02305 //Create new machine instr and put in MBB 02306 MachineInstr *saveValue; 02307 if(mOp.getVRegValue()->getType() == Type::FloatTy) 02308 saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02309 else if(mOp.getVRegValue()->getType() == Type::DoubleTy) 02310 saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02311 else 02312 saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 02313 02314 02315 //Save for future cleanup 02316 kernelValue[mOp.getVRegValue()] = tmp; 02317 newValLocation[tmp] = machineBB; 02318 kernelPHIs[mOp.getVRegValue()][schedule.getMaxStage()-1] = tmp; 02319 } 02320 } 02321 } 02322 } 02323 02324 } 02325 02326 //Add branches 02327 for(std::vector<MachineInstr*>::iterator I = branches.begin(), E = branches.end(); I != E; ++I) { 02328 machineBB->push_back(*I); 02329 BuildMI(machineBB, V9::NOP, 0); 02330 } 02331 02332 02333 DEBUG(std::cerr << "KERNEL before PHIs\n"); 02334 DEBUG(machineBB->print(std::cerr)); 02335 02336 02337 //Loop over each value we need to generate phis for 02338 for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), 02339 E = newValues.end(); V != E; ++V) { 02340 02341 02342 DEBUG(std::cerr << "Writing phi for" << *(V->first)); 02343 DEBUG(std::cerr << "\nMap of Value* for this phi\n"); 02344 DEBUG(for(std::map<int, Value*>::iterator I = V->second.begin(), 02345 IE = V->second.end(); I != IE; ++I) { 02346 std::cerr << "Stage: " << I->first; 02347 std::cerr << " Value: " << *(I->second) << "\n"; 02348 }); 02349 02350 //If we only have one current iteration live, its safe to set lastPhi = to kernel value 02351 if(V->second.size() == 1) { 02352 assert(kernelValue[V->first] != 0 && "Kernel value* must exist to create phi"); 02353 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(),V9::PHI, 3).addReg(V->second.begin()->second).addReg(kernelValue[V->first]).addRegDef(finalPHIValue[V->first]); 02354 DEBUG(std::cerr << "Resulting PHI (one live): " << *saveValue << "\n"); 02355 kernelPHIs[V->first][V->second.begin()->first] = kernelValue[V->first]; 02356 DEBUG(std::cerr << "Put kernel phi in at stage: " << schedule.getMaxStage()-1 << " (map stage = " << V->second.begin()->first << ")\n"); 02357 } 02358 else { 02359 02360 //Keep track of last phi created. 02361 Instruction *lastPhi = 0; 02362 02363 unsigned count = 1; 02364 //Loop over the the map backwards to generate phis 02365 for(std::map<int, Value*>::reverse_iterator I = V->second.rbegin(), IE = V->second.rend(); 02366 I != IE; ++I) { 02367 02368 if(count < (V->second).size()) { 02369 if(lastPhi == 0) { 02370 lastPhi = new TmpInstruction(I->second); 02371 02372 //Get machine code for this instruction 02373 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02374 tempMvec.addTemp((Value*) lastPhi); 02375 02376 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(kernelValue[V->first]).addReg(I->second).addRegDef(lastPhi); 02377 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 02378 newValLocation[lastPhi] = machineBB; 02379 } 02380 else { 02381 Instruction *tmp = new TmpInstruction(I->second); 02382 02383 //Get machine code for this instruction 02384 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02385 tempMvec.addTemp((Value*) tmp); 02386 02387 02388 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(tmp); 02389 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 02390 lastPhi = tmp; 02391 kernelPHIs[V->first][I->first] = lastPhi; 02392 newValLocation[lastPhi] = machineBB; 02393 } 02394 } 02395 //Final phi value 02396 else { 02397 //The resulting value must be the Value* we created earlier 02398 assert(lastPhi != 0 && "Last phi is NULL!\n"); 02399 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(finalPHIValue[V->first]); 02400 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 02401 kernelPHIs[V->first][I->first] = finalPHIValue[V->first]; 02402 } 02403 02404 ++count; 02405 } 02406 02407 } 02408 } 02409 02410 DEBUG(std::cerr << "KERNEL after PHIs\n"); 02411 DEBUG(machineBB->print(std::cerr)); 02412 } 02413 02414 02415 void ModuloSchedulingPass::removePHIs(const MachineBasicBlock *origBB, std::vector<MachineBasicBlock *> &prologues, std::vector<MachineBasicBlock *> &epilogues, MachineBasicBlock *kernelBB, std::map<Value*, MachineBasicBlock*> &newValLocation) { 02416 02417 //Worklist to delete things 02418 std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> > worklist; 02419 02420 //Worklist of TmpInstructions that need to be added to a MCFI 02421 std::vector<Instruction*> addToMCFI; 02422 02423 //Worklist to add OR instructions to end of kernel so not to invalidate the iterator 02424 //std::vector<std::pair<Instruction*, Value*> > newORs; 02425 02426 const TargetInstrInfo *TMI = target.getInstrInfo(); 02427 02428 //Start with the kernel and for each phi insert a copy for the phi def and for each arg 02429 for(MachineBasicBlock::iterator I = kernelBB->begin(), E = kernelBB->end(); I != E; ++I) { 02430 02431 DEBUG(std::cerr << "Looking at Instr: " << *I << "\n"); 02432 //Get op code and check if its a phi 02433 if(I->getOpcode() == V9::PHI) { 02434 02435 DEBUG(std::cerr << "Replacing PHI: " << *I << "\n"); 02436 Instruction *tmp = 0; 02437 02438 for(unsigned i = 0; i < I->getNumOperands(); ++i) { 02439 //Get Operand 02440 const MachineOperand &mOp = I->getOperand(i); 02441 assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n"); 02442 02443 if(!tmp) { 02444 tmp = new TmpInstruction(mOp.getVRegValue()); 02445 addToMCFI.push_back(tmp); 02446 } 02447 02448 //Now for all our arguments we read, OR to the new TmpInstruction that we created 02449 if(mOp.isUse()) { 02450 DEBUG(std::cerr << "Use: " << mOp << "\n"); 02451 //Place a copy at the end of its BB but before the branches 02452 assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n"); 02453 //Reverse iterate to find the branches, we can safely assume no instructions have been 02454 //put in the nop positions 02455 for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) { 02456 MachineOpCode opc = inst->getOpcode(); 02457 if(TMI->isBranch(opc) || TMI->isNop(opc)) 02458 continue; 02459 else { 02460 if(mOp.getVRegValue()->getType() == Type::FloatTy) 02461 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02462 else if(mOp.getVRegValue()->getType() == Type::DoubleTy) 02463 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02464 else 02465 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 02466 02467 break; 02468 } 02469 02470 } 02471 02472 } 02473 else { 02474 //Remove the phi and replace it with an OR 02475 DEBUG(std::cerr << "Def: " << mOp << "\n"); 02476 //newORs.push_back(std::make_pair(tmp, mOp.getVRegValue())); 02477 if(tmp->getType() == Type::FloatTy) 02478 BuildMI(*kernelBB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); 02479 else if(tmp->getType() == Type::DoubleTy) 02480 BuildMI(*kernelBB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); 02481 else 02482 BuildMI(*kernelBB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); 02483 02484 02485 worklist.push_back(std::make_pair(kernelBB, I)); 02486 } 02487 02488 } 02489 02490 } 02491 02492 02493 } 02494 02495 //Add TmpInstructions to some MCFI 02496 if(addToMCFI.size() > 0) { 02497 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02498 for(unsigned x = 0; x < addToMCFI.size(); ++x) { 02499 tempMvec.addTemp(addToMCFI[x]); 02500 } 02501 addToMCFI.clear(); 02502 } 02503 02504 02505 //Remove phis from epilogue 02506 for(std::vector<MachineBasicBlock*>::iterator MB = epilogues.begin(), ME = epilogues.end(); MB != ME; ++MB) { 02507 for(MachineBasicBlock::iterator I = (*MB)->begin(), E = (*MB)->end(); I != E; ++I) { 02508 02509 DEBUG(std::cerr << "Looking at Instr: " << *I << "\n"); 02510 //Get op code and check if its a phi 02511 if(I->getOpcode() == V9::PHI) { 02512 Instruction *tmp = 0; 02513 02514 for(unsigned i = 0; i < I->getNumOperands(); ++i) { 02515 //Get Operand 02516 const MachineOperand &mOp = I->getOperand(i); 02517 assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n"); 02518 02519 if(!tmp) { 02520 tmp = new TmpInstruction(mOp.getVRegValue()); 02521 addToMCFI.push_back(tmp); 02522 } 02523 02524 //Now for all our arguments we read, OR to the new TmpInstruction that we created 02525 if(mOp.isUse()) { 02526 DEBUG(std::cerr << "Use: " << mOp << "\n"); 02527 //Place a copy at the end of its BB but before the branches 02528 assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n"); 02529 //Reverse iterate to find the branches, we can safely assume no instructions have been 02530 //put in the nop positions 02531 for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) { 02532 MachineOpCode opc = inst->getOpcode(); 02533 if(TMI->isBranch(opc) || TMI->isNop(opc)) 02534 continue; 02535 else { 02536 if(mOp.getVRegValue()->getType() == Type::FloatTy) 02537 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02538 else if(mOp.getVRegValue()->getType() == Type::DoubleTy) 02539 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02540 else 02541 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 02542 02543 02544 break; 02545 } 02546 02547 } 02548 02549 } 02550 else { 02551 //Remove the phi and replace it with an OR 02552 DEBUG(std::cerr << "Def: " << mOp << "\n"); 02553 if(tmp->getType() == Type::FloatTy) 02554 BuildMI(**MB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); 02555 else if(tmp->getType() == Type::DoubleTy) 02556 BuildMI(**MB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); 02557 else 02558 BuildMI(**MB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); 02559 02560 worklist.push_back(std::make_pair(*MB,I)); 02561 } 02562 02563 } 02564 } 02565 02566 02567 } 02568 } 02569 02570 02571 if(addToMCFI.size() > 0) { 02572 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02573 for(unsigned x = 0; x < addToMCFI.size(); ++x) { 02574 tempMvec.addTemp(addToMCFI[x]); 02575 } 02576 addToMCFI.clear(); 02577 } 02578 02579 //Delete the phis 02580 for(std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> >::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { 02581 02582 DEBUG(std::cerr << "Deleting PHI " << *I->second << "\n"); 02583 I->first->erase(I->second); 02584 02585 } 02586 02587 02588 assert((addToMCFI.size() == 0) && "We should have added all TmpInstructions to some MachineCodeForInstruction"); 02589 } 02590 02591 02592 void ModuloSchedulingPass::reconstructLoop(MachineBasicBlock *BB) { 02593 02594 TIME_REGION(X, "reconstructLoop"); 02595 02596 02597 DEBUG(std::cerr << "Reconstructing Loop\n"); 02598 02599 //First find the value *'s that we need to "save" 02600 std::map<const Value*, std::pair<const MachineInstr*, int> > valuesToSave; 02601 02602 //Keep track of instructions we have already seen and their stage because 02603 //we don't want to "save" values if they are used in the kernel immediately 02604 std::map<const MachineInstr*, int> lastInstrs; 02605 std::map<const Value*, int> phiUses; 02606 02607 //Loop over kernel and only look at instructions from a stage > 0 02608 //Look at its operands and save values *'s that are read 02609 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 02610 02611 if(I->second !=0) { 02612 //For this instruction, get the Value*'s that it reads and put them into the set. 02613 //Assert if there is an operand of another type that we need to save 02614 const MachineInstr *inst = I->first; 02615 lastInstrs[inst] = I->second; 02616 02617 for(unsigned i=0; i < inst->getNumOperands(); ++i) { 02618 //get machine operand 02619 const MachineOperand &mOp = inst->getOperand(i); 02620 02621 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 02622 //find the value in the map 02623 if (const Value* srcI = mOp.getVRegValue()) { 02624 02625 if(isa<Constant>(srcI) || isa<Argument>(srcI)) 02626 continue; 02627 02628 //Before we declare this Value* one that we should save 02629 //make sure its def is not of the same stage as this instruction 02630 //because it will be consumed before its used 02631 Instruction *defInst = (Instruction*) srcI; 02632 02633 //Should we save this value? 02634 bool save = true; 02635 02636 //Continue if not in the def map, loop invariant code does not need to be saved 02637 if(!defMap.count(srcI)) 02638 continue; 02639 02640 MachineInstr *defInstr = defMap[srcI]; 02641 02642 02643 if(lastInstrs.count(defInstr)) { 02644 if(lastInstrs[defInstr] == I->second) { 02645 save = false; 02646 02647 } 02648 } 02649 02650 if(save) { 02651 assert(!phiUses.count(srcI) && "Did not expect to see phi use twice"); 02652 if(isa<PHINode>(srcI)) 02653 phiUses[srcI] = I->second; 02654 02655 valuesToSave[srcI] = std::make_pair(I->first, i); 02656 02657 } 02658 } 02659 } 02660 else if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 02661 if (const Value* destI = mOp.getVRegValue()) { 02662 if(!isa<PHINode>(destI)) 02663 continue; 02664 if(phiUses.count(destI)) { 02665 if(phiUses[destI] == I->second) { 02666 //remove from save list 02667 valuesToSave.erase(destI); 02668 } 02669 } 02670 } 02671 } 02672 02673 if(mOp.getType() != MachineOperand::MO_VirtualRegister && mOp.isUse()) { 02674 assert("Our assumption is wrong. We have another type of register that needs to be saved\n"); 02675 } 02676 } 02677 } 02678 } 02679 02680 //The new loop will consist of one or more prologues, the kernel, and one or more epilogues. 02681 02682 //Map to keep track of old to new values 02683 std::map<Value*, std::map<int, Value*> > newValues; 02684 02685 //Map to keep track of old to new values in kernel 02686 std::map<Value*, std::map<int, Value*> > kernelPHIs; 02687 02688 //Another map to keep track of what machine basic blocks these new value*s are in since 02689 //they have no llvm instruction equivalent 02690 std::map<Value*, MachineBasicBlock*> newValLocation; 02691 02692 std::vector<MachineBasicBlock*> prologues; 02693 std::vector<BasicBlock*> llvm_prologues; 02694 02695 02696 //Write prologue 02697 if(schedule.getMaxStage() != 0) 02698 writePrologues(prologues, BB, llvm_prologues, valuesToSave, newValues, newValLocation); 02699 02700 //Print out epilogues and prologue 02701 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end(); 02702 I != E; ++I) { 02703 std::cerr << "PROLOGUE\n"; 02704 (*I)->print(std::cerr); 02705 }); 02706 02707 BasicBlock *llvmKernelBB = new BasicBlock("Kernel", (Function*) (BB->getBasicBlock()->getParent())); 02708 MachineBasicBlock *machineKernelBB = new MachineBasicBlock(llvmKernelBB); 02709 02710 MachineFunction *F = (((MachineBasicBlock*)BB)->getParent()); 02711 MachineFunction::BasicBlockListType &BL = F->getBasicBlockList(); 02712 MachineFunction::BasicBlockListType::iterator BLI = BB; 02713 assert(BLI != BL.end() && "Must find original BB in machine function\n"); 02714 BL.insert(BLI,machineKernelBB); 02715 02716 //(((MachineBasicBlock*)BB)->getParent())->getBasicBlockList().push_back(machineKernelBB); 02717 writeKernel(llvmKernelBB, machineKernelBB, valuesToSave, newValues, newValLocation, kernelPHIs); 02718 02719 02720 std::vector<MachineBasicBlock*> epilogues; 02721 std::vector<BasicBlock*> llvm_epilogues; 02722 02723 //Write epilogues 02724 if(schedule.getMaxStage() != 0) 02725 writeEpilogues(epilogues, BB, llvm_epilogues, valuesToSave, newValues, newValLocation, kernelPHIs); 02726 02727 02728 //Fix our branches 02729 fixBranches(prologues, llvm_prologues, machineKernelBB, llvmKernelBB, epilogues, llvm_epilogues, BB); 02730 02731 //Remove phis 02732 removePHIs(BB, prologues, epilogues, machineKernelBB, newValLocation); 02733 02734 //Print out epilogues and prologue 02735 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end(); 02736 I != E; ++I) { 02737 std::cerr << "PROLOGUE\n"; 02738 (*I)->print(std::cerr); 02739 }); 02740 02741 DEBUG(std::cerr << "KERNEL\n"); 02742 DEBUG(machineKernelBB->print(std::cerr)); 02743 02744 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = epilogues.begin(), E = epilogues.end(); 02745 I != E; ++I) { 02746 std::cerr << "EPILOGUE\n"; 02747 (*I)->print(std::cerr); 02748 }); 02749 02750 02751 DEBUG(std::cerr << "New Machine Function" << "\n"); 02752 DEBUG(std::cerr << BB->getParent() << "\n"); 02753 02754 02755 } 02756 02757 void ModuloSchedulingPass::fixBranches(std::vector<MachineBasicBlock *> &prologues, std::vector<BasicBlock*> &llvm_prologues, MachineBasicBlock *machineKernelBB, BasicBlock *llvmKernelBB, std::vector<MachineBasicBlock *> &epilogues, std::vector<BasicBlock*> &llvm_epilogues, MachineBasicBlock *BB) { 02758 02759 const TargetInstrInfo *TMI = target.getInstrInfo(); 02760 02761 if(schedule.getMaxStage() != 0) { 02762 //Fix prologue branches 02763 for(unsigned I = 0; I < prologues.size(); ++I) { 02764 02765 //Find terminator since getFirstTerminator does not work! 02766 for(MachineBasicBlock::reverse_iterator mInst = prologues[I]->rbegin(), mInstEnd = prologues[I]->rend(); mInst != mInstEnd; ++mInst) { 02767 MachineOpCode OC = mInst->getOpcode(); 02768 //If its a branch update its branchto 02769 if(TMI->isBranch(OC)) { 02770 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 02771 MachineOperand &mOp = mInst->getOperand(opNum); 02772 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02773 //Check if we are branching to the kernel, if not branch to epilogue 02774 if(mOp.getVRegValue() == BB->getBasicBlock()) { 02775 if(I == prologues.size()-1) 02776 mOp.setValueReg(llvmKernelBB); 02777 else 02778 mOp.setValueReg(llvm_prologues[I+1]); 02779 } 02780 else { 02781 mOp.setValueReg(llvm_epilogues[(llvm_epilogues.size()-1-I)]); 02782 } 02783 } 02784 } 02785 02786 DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n"); 02787 } 02788 } 02789 02790 02791 //Update llvm basic block with our new branch instr 02792 DEBUG(std::cerr << BB->getBasicBlock()->getTerminator() << "\n"); 02793 const BranchInst *branchVal = dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator()); 02794 02795 if(I == prologues.size()-1) { 02796 TerminatorInst *newBranch = new BranchInst(llvmKernelBB, 02797 llvm_epilogues[(llvm_epilogues.size()-1-I)], 02798 branchVal->getCondition(), 02799 llvm_prologues[I]); 02800 } 02801 else 02802 TerminatorInst *newBranch = new BranchInst(llvm_prologues[I+1], 02803 llvm_epilogues[(llvm_epilogues.size()-1-I)], 02804 branchVal->getCondition(), 02805 llvm_prologues[I]); 02806 02807 } 02808 } 02809 02810 Value *origBranchExit = 0; 02811 02812 //Fix up kernel machine branches 02813 for(MachineBasicBlock::reverse_iterator mInst = machineKernelBB->rbegin(), mInstEnd = machineKernelBB->rend(); mInst != mInstEnd; ++mInst) { 02814 MachineOpCode OC = mInst->getOpcode(); 02815 if(TMI->isBranch(OC)) { 02816 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 02817 MachineOperand &mOp = mInst->getOperand(opNum); 02818 02819 if(mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02820 if(mOp.getVRegValue() == BB->getBasicBlock()) 02821 mOp.setValueReg(llvmKernelBB); 02822 else 02823 if(llvm_epilogues.size() > 0) { 02824 assert(origBranchExit == 0 && "There should only be one branch out of the loop"); 02825 02826 origBranchExit = mOp.getVRegValue(); 02827 mOp.setValueReg(llvm_epilogues[0]); 02828 } 02829 else 02830 origBranchExit = mOp.getVRegValue(); 02831 } 02832 } 02833 } 02834 } 02835 02836 //Update kernelLLVM branches 02837 const BranchInst *branchVal = dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator()); 02838 02839 assert(origBranchExit != 0 && "We must have the original bb the kernel exits to!"); 02840 02841 if(epilogues.size() > 0) { 02842 TerminatorInst *newBranch = new BranchInst(llvmKernelBB, 02843 llvm_epilogues[0], 02844 branchVal->getCondition(), 02845 llvmKernelBB); 02846 } 02847 else { 02848 BasicBlock *origBBExit = dyn_cast<BasicBlock>(origBranchExit); 02849 assert(origBBExit !=0 && "Original exit basic block must be set"); 02850 TerminatorInst *newBranch = new BranchInst(llvmKernelBB, 02851 origBBExit, 02852 branchVal->getCondition(), 02853 llvmKernelBB); 02854 } 02855 02856 if(schedule.getMaxStage() != 0) { 02857 //Lastly add unconditional branches for the epilogues 02858 for(unsigned I = 0; I < epilogues.size(); ++I) { 02859 02860 //Now since we don't have fall throughs, add a unconditional branch to the next prologue 02861 if(I != epilogues.size()-1) { 02862 BuildMI(epilogues[I], V9::BA, 1).addPCDisp(llvm_epilogues[I+1]); 02863 //Add unconditional branch to end of epilogue 02864 TerminatorInst *newBranch = new BranchInst(llvm_epilogues[I+1], 02865 llvm_epilogues[I]); 02866 02867 } 02868 else { 02869 BuildMI(epilogues[I], V9::BA, 1).addPCDisp(origBranchExit); 02870 02871 02872 //Update last epilogue exit branch 02873 BranchInst *branchVal = (BranchInst*) dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator()); 02874 //Find where we are supposed to branch to 02875 BasicBlock *nextBlock = 0; 02876 for(unsigned j=0; j <branchVal->getNumSuccessors(); ++j) { 02877 if(branchVal->getSuccessor(j) != BB->getBasicBlock()) 02878 nextBlock = branchVal->getSuccessor(j); 02879 } 02880 02881 assert((nextBlock != 0) && "Next block should not be null!"); 02882 TerminatorInst *newBranch = new BranchInst(nextBlock, llvm_epilogues[I]); 02883 } 02884 //Add one more nop! 02885 BuildMI(epilogues[I], V9::NOP, 0); 02886 02887 } 02888 } 02889 02890 //FIX UP Machine BB entry!! 02891 //We are looking at the predecesor of our loop basic block and we want to change its ba instruction 02892 02893 02894 //Find all llvm basic blocks that branch to the loop entry and change to our first prologue. 02895 const BasicBlock *llvmBB = BB->getBasicBlock(); 02896 02897 std::vector<const BasicBlock*>Preds (pred_begin(llvmBB), pred_end(llvmBB)); 02898 02899 //for(pred_const_iterator P = pred_begin(llvmBB), PE = pred_end(llvmBB); P != PE; ++PE) { 02900 for(std::vector<const BasicBlock*>::iterator P = Preds.begin(), PE = Preds.end(); P != PE; ++P) { 02901 if(*P == llvmBB) 02902 continue; 02903 else { 02904 DEBUG(std::cerr << "Found our entry BB\n"); 02905 //Get the Terminator instruction for this basic block and print it out 02906 DEBUG(std::cerr << *((*P)->getTerminator()) << "\n"); 02907 //Update the terminator 02908 TerminatorInst *term = ((BasicBlock*)*P)->getTerminator(); 02909 for(unsigned i=0; i < term->getNumSuccessors(); ++i) { 02910 if(term->getSuccessor(i) == llvmBB) { 02911 DEBUG(std::cerr << "Replacing successor bb\n"); 02912 if(llvm_prologues.size() > 0) { 02913 term->setSuccessor(i, llvm_prologues[0]); 02914 //Also update its corresponding machine instruction 02915 MachineCodeForInstruction & tempMvec = 02916 MachineCodeForInstruction::get(term); 02917 for (unsigned j = 0; j < tempMvec.size(); j++) { 02918 MachineInstr *temp = tempMvec[j]; 02919 MachineOpCode opc = temp->getOpcode(); 02920 if(TMI->isBranch(opc)) { 02921 DEBUG(std::cerr << *temp << "\n"); 02922 //Update branch 02923 for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) { 02924 MachineOperand &mOp = temp->getOperand(opNum); 02925 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02926 if(mOp.getVRegValue() == llvmBB) 02927 mOp.setValueReg(llvm_prologues[0]); 02928 } 02929 } 02930 } 02931 } 02932 } 02933 else { 02934 term->setSuccessor(i, llvmKernelBB); 02935 //Also update its corresponding machine instruction 02936 MachineCodeForInstruction & tempMvec = 02937 MachineCodeForInstruction::get(term); 02938 for (unsigned j = 0; j < tempMvec.size(); j++) { 02939 MachineInstr *temp = tempMvec[j]; 02940 MachineOpCode opc = temp->getOpcode(); 02941 if(TMI->isBranch(opc)) { 02942 DEBUG(std::cerr << *temp << "\n"); 02943 //Update branch 02944 for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) { 02945 MachineOperand &mOp = temp->getOperand(opNum); 02946 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02947 if(mOp.getVRegValue() == llvmBB) 02948 mOp.setValueReg(llvmKernelBB); 02949 } 02950 } 02951 } 02952 } 02953 } 02954 } 02955 } 02956 break; 02957 } 02958 } 02959 02960 02961 //BB->getParent()->getBasicBlockList().erase(BB); 02962 02963 } 02964