LLVM API Documentation
00001 //===-- ModuloScheduling.cpp - ModuloScheduling ----------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This ModuloScheduling pass is based on the Swing Modulo Scheduling 00011 // algorithm. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #define DEBUG_TYPE "ModuloSched" 00016 00017 #include "ModuloScheduling.h" 00018 #include "llvm/Instructions.h" 00019 #include "llvm/Function.h" 00020 #include "llvm/CodeGen/MachineFunction.h" 00021 #include "llvm/CodeGen/Passes.h" 00022 #include "llvm/Support/CFG.h" 00023 #include "llvm/Target/TargetSchedInfo.h" 00024 #include "llvm/Support/Debug.h" 00025 #include "llvm/Support/GraphWriter.h" 00026 #include "llvm/ADT/StringExtras.h" 00027 #include "llvm/ADT/Statistic.h" 00028 #include <cmath> 00029 #include <algorithm> 00030 #include <fstream> 00031 #include <sstream> 00032 #include <utility> 00033 #include <vector> 00034 #include "../MachineCodeForInstruction.h" 00035 #include "../SparcV9TmpInstr.h" 00036 #include "../SparcV9Internals.h" 00037 #include "../SparcV9RegisterInfo.h" 00038 using namespace llvm; 00039 00040 /// Create ModuloSchedulingPass 00041 /// 00042 FunctionPass *llvm::createModuloSchedulingPass(TargetMachine & targ) { 00043 DEBUG(std::cerr << "Created ModuloSchedulingPass\n"); 00044 return new ModuloSchedulingPass(targ); 00045 } 00046 00047 00048 //Graph Traits for printing out the dependence graph 00049 template<typename GraphType> 00050 static void WriteGraphToFile(std::ostream &O, const std::string &GraphName, 00051 const GraphType >) { 00052 std::string Filename = GraphName + ".dot"; 00053 O << "Writing '" << Filename << "'..."; 00054 std::ofstream F(Filename.c_str()); 00055 00056 if (F.good()) 00057 WriteGraph(F, GT); 00058 else 00059 O << " error opening file for writing!"; 00060 O << "\n"; 00061 }; 00062 00063 //Graph Traits for printing out the dependence graph 00064 namespace llvm { 00065 Statistic<> ValidLoops("modulosched-validLoops", "Number of candidate loops modulo-scheduled"); 00066 Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled"); 00067 Statistic<> IncreasedII("modulosched-increasedII", "Number of times we had to increase II"); 00068 00069 template<> 00070 struct DOTGraphTraits<MSchedGraph*> : public DefaultDOTGraphTraits { 00071 static std::string getGraphName(MSchedGraph *F) { 00072 return "Dependence Graph"; 00073 } 00074 00075 static std::string getNodeLabel(MSchedGraphNode *Node, MSchedGraph *Graph) { 00076 if (Node->getInst()) { 00077 std::stringstream ss; 00078 ss << *(Node->getInst()); 00079 return ss.str(); //((MachineInstr*)Node->getInst()); 00080 } 00081 else 00082 return "No Inst"; 00083 } 00084 static std::string getEdgeSourceLabel(MSchedGraphNode *Node, 00085 MSchedGraphNode::succ_iterator I) { 00086 //Label each edge with the type of dependence 00087 std::string edgelabel = ""; 00088 switch (I.getEdge().getDepOrderType()) { 00089 00090 case MSchedGraphEdge::TrueDep: 00091 edgelabel = "True"; 00092 break; 00093 00094 case MSchedGraphEdge::AntiDep: 00095 edgelabel = "Anti"; 00096 break; 00097 00098 case MSchedGraphEdge::OutputDep: 00099 edgelabel = "Output"; 00100 break; 00101 00102 default: 00103 edgelabel = "Unknown"; 00104 break; 00105 } 00106 00107 //FIXME 00108 int iteDiff = I.getEdge().getIteDiff(); 00109 std::string intStr = "(IteDiff: "; 00110 intStr += itostr(iteDiff); 00111 00112 intStr += ")"; 00113 edgelabel += intStr; 00114 00115 return edgelabel; 00116 } 00117 }; 00118 } 00119 00120 /// ModuloScheduling::runOnFunction - main transformation entry point 00121 /// The Swing Modulo Schedule algorithm has three basic steps: 00122 /// 1) Computation and Analysis of the dependence graph 00123 /// 2) Ordering of the nodes 00124 /// 3) Scheduling 00125 /// 00126 bool ModuloSchedulingPass::runOnFunction(Function &F) { 00127 00128 bool Changed = false; 00129 int numMS = 0; 00130 00131 DEBUG(std::cerr << "Creating ModuloSchedGraph for each valid BasicBlock in " + F.getName() + "\n"); 00132 00133 //Get MachineFunction 00134 MachineFunction &MF = MachineFunction::get(&F); 00135 00136 00137 //Worklist 00138 std::vector<MachineBasicBlock*> Worklist; 00139 00140 //Iterate over BasicBlocks and put them into our worklist if they are valid 00141 for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI) 00142 if(MachineBBisValid(BI)) { 00143 Worklist.push_back(&*BI); 00144 ++ValidLoops; 00145 } 00146 00147 defaultInst = 0; 00148 00149 DEBUG(if(Worklist.size() == 0) std::cerr << "No single basic block loops in function to ModuloSchedule\n"); 00150 00151 //Iterate over the worklist and perform scheduling 00152 for(std::vector<MachineBasicBlock*>::iterator BI = Worklist.begin(), 00153 BE = Worklist.end(); BI != BE; ++BI) { 00154 00155 CreateDefMap(*BI); 00156 00157 MSchedGraph *MSG = new MSchedGraph(*BI, target); 00158 00159 //Write Graph out to file 00160 DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG)); 00161 00162 //Print out BB for debugging 00163 DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr)); 00164 00165 //Calculate Resource II 00166 int ResMII = calculateResMII(*BI); 00167 00168 //Calculate Recurrence II 00169 int RecMII = calculateRecMII(MSG, ResMII); 00170 00171 //Our starting initiation interval is the maximum of RecMII and ResMII 00172 II = std::max(RecMII, ResMII); 00173 00174 //Print out II, RecMII, and ResMII 00175 DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n"); 00176 00177 //Dump node properties if in debug mode 00178 DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), 00179 E = nodeToAttributesMap.end(); I !=E; ++I) { 00180 std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 00181 << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 00182 << " Height: " << I->second.height << "\n"; 00183 }); 00184 00185 //Calculate Node Properties 00186 calculateNodeAttributes(MSG, ResMII); 00187 00188 //Dump node properties if in debug mode 00189 DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), 00190 E = nodeToAttributesMap.end(); I !=E; ++I) { 00191 std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 00192 << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 00193 << " Height: " << I->second.height << "\n"; 00194 }); 00195 00196 //Put nodes in order to schedule them 00197 computePartialOrder(); 00198 00199 //Dump out partial order 00200 DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(), 00201 E = partialOrder.end(); I !=E; ++I) { 00202 std::cerr << "Start set in PO\n"; 00203 for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J) 00204 std::cerr << "PO:" << **J << "\n"; 00205 }); 00206 00207 //Place nodes in final order 00208 orderNodes(); 00209 00210 //Dump out order of nodes 00211 DEBUG(for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) { 00212 std::cerr << "FO:" << **I << "\n"; 00213 }); 00214 00215 //Finally schedule nodes 00216 bool haveSched = computeSchedule(); 00217 00218 //Print out final schedule 00219 DEBUG(schedule.print(std::cerr)); 00220 00221 //Final scheduling step is to reconstruct the loop only if we actual have 00222 //stage > 0 00223 if(schedule.getMaxStage() != 0 && haveSched) { 00224 reconstructLoop(*BI); 00225 ++MSLoops; 00226 Changed = true; 00227 } 00228 else 00229 DEBUG(std::cerr << "Max stage is 0, so no change in loop or reached cap\n"); 00230 00231 //Clear out our maps for the next basic block that is processed 00232 nodeToAttributesMap.clear(); 00233 partialOrder.clear(); 00234 recurrenceList.clear(); 00235 FinalNodeOrder.clear(); 00236 schedule.clear(); 00237 defMap.clear(); 00238 //Clean up. Nuke old MachineBB and llvmBB 00239 //BasicBlock *llvmBB = (BasicBlock*) (*BI)->getBasicBlock(); 00240 //Function *parent = (Function*) llvmBB->getParent(); 00241 //Should't std::find work?? 00242 //parent->getBasicBlockList().erase(std::find(parent->getBasicBlockList().begin(), parent->getBasicBlockList().end(), *llvmBB)); 00243 //parent->getBasicBlockList().erase(llvmBB); 00244 00245 //delete(llvmBB); 00246 //delete(*BI); 00247 } 00248 00249 return Changed; 00250 } 00251 00252 void ModuloSchedulingPass::CreateDefMap(MachineBasicBlock *BI) { 00253 defaultInst = 0; 00254 00255 for(MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E; ++I) { 00256 for(unsigned opNum = 0; opNum < I->getNumOperands(); ++opNum) { 00257 const MachineOperand &mOp = I->getOperand(opNum); 00258 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 00259 //assert if this is the second def we have seen 00260 DEBUG(std::cerr << "Putting " << *(mOp.getVRegValue()) << " into map\n"); 00261 assert(!defMap.count(mOp.getVRegValue()) && "Def already in the map"); 00262 00263 defMap[mOp.getVRegValue()] = &*I; 00264 } 00265 00266 //See if we can use this Value* as our defaultInst 00267 if(!defaultInst && mOp.getType() == MachineOperand::MO_VirtualRegister) { 00268 Value *V = mOp.getVRegValue(); 00269 if(!isa<TmpInstruction>(V) && !isa<Argument>(V) && !isa<Constant>(V) && !isa<PHINode>(V)) 00270 defaultInst = (Instruction*) V; 00271 } 00272 } 00273 } 00274 00275 assert(defaultInst && "We must have a default instruction to use as our main point to add to machine code for instruction\n"); 00276 00277 } 00278 /// This function checks if a Machine Basic Block is valid for modulo 00279 /// scheduling. This means that it has no control flow (if/else or 00280 /// calls) in the block. Currently ModuloScheduling only works on 00281 /// single basic block loops. 00282 bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { 00283 00284 bool isLoop = false; 00285 00286 //Check first if its a valid loop 00287 for(succ_const_iterator I = succ_begin(BI->getBasicBlock()), 00288 E = succ_end(BI->getBasicBlock()); I != E; ++I) { 00289 if (*I == BI->getBasicBlock()) // has single block loop 00290 isLoop = true; 00291 } 00292 00293 if(!isLoop) 00294 return false; 00295 00296 //Check that we have a conditional branch (avoiding MS infinite loops) 00297 if(BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) BI->getBasicBlock())->getTerminator())) 00298 if(b->isUnconditional()) 00299 return false; 00300 00301 //Check size of our basic block.. make sure we have more then just the terminator in it 00302 if(BI->getBasicBlock()->size() == 1) 00303 return false; 00304 00305 //Get Target machine instruction info 00306 const TargetInstrInfo *TMI = target.getInstrInfo(); 00307 00308 //Check each instruction and look for calls 00309 for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { 00310 //Get opcode to check instruction type 00311 MachineOpCode OC = I->getOpcode(); 00312 if(TMI->isCall(OC)) 00313 return false; 00314 } 00315 return true; 00316 } 00317 00318 //ResMII is calculated by determining the usage count for each resource 00319 //and using the maximum. 00320 //FIXME: In future there should be a way to get alternative resources 00321 //for each instruction 00322 int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) { 00323 00324 const TargetInstrInfo *mii = target.getInstrInfo(); 00325 const TargetSchedInfo *msi = target.getSchedInfo(); 00326 00327 int ResMII = 0; 00328 00329 //Map to keep track of usage count of each resource 00330 std::map<unsigned, unsigned> resourceUsageCount; 00331 00332 for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { 00333 00334 //Get resource usage for this instruction 00335 InstrRUsage rUsage = msi->getInstrRUsage(I->getOpcode()); 00336 std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle; 00337 00338 //Loop over resources in each cycle and increments their usage count 00339 for(unsigned i=0; i < resources.size(); ++i) 00340 for(unsigned j=0; j < resources[i].size(); ++j) { 00341 if( resourceUsageCount.find(resources[i][j]) == resourceUsageCount.end()) { 00342 resourceUsageCount[resources[i][j]] = 1; 00343 } 00344 else { 00345 resourceUsageCount[resources[i][j]] = resourceUsageCount[resources[i][j]] + 1; 00346 } 00347 } 00348 } 00349 00350 //Find maximum usage count 00351 00352 //Get max number of instructions that can be issued at once. (FIXME) 00353 int issueSlots = msi->maxNumIssueTotal; 00354 00355 for(std::map<unsigned,unsigned>::iterator RB = resourceUsageCount.begin(), RE = resourceUsageCount.end(); RB != RE; ++RB) { 00356 00357 //Get the total number of the resources in our cpu 00358 int resourceNum = CPUResource::getCPUResource(RB->first)->maxNumUsers; 00359 00360 //Get total usage count for this resources 00361 unsigned usageCount = RB->second; 00362 00363 //Divide the usage count by either the max number we can issue or the number of 00364 //resources (whichever is its upper bound) 00365 double finalUsageCount; 00366 if( resourceNum <= issueSlots) 00367 finalUsageCount = ceil(1.0 * usageCount / resourceNum); 00368 else 00369 finalUsageCount = ceil(1.0 * usageCount / issueSlots); 00370 00371 00372 //Only keep track of the max 00373 ResMII = std::max( (int) finalUsageCount, ResMII); 00374 00375 } 00376 00377 return ResMII; 00378 00379 } 00380 00381 /// calculateRecMII - Calculates the value of the highest recurrence 00382 /// By value we mean the total latency 00383 int ModuloSchedulingPass::calculateRecMII(MSchedGraph *graph, int MII) { 00384 std::vector<MSchedGraphNode*> vNodes; 00385 //Loop over all nodes in the graph 00386 for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) { 00387 findAllReccurrences(I->second, vNodes, MII); 00388 vNodes.clear(); 00389 } 00390 00391 int RecMII = 0; 00392 00393 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) { 00394 DEBUG(for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(), NE = I->second.end(); N != NE; ++N) { 00395 std::cerr << **N << "\n"; 00396 }); 00397 RecMII = std::max(RecMII, I->first); 00398 } 00399 00400 return MII; 00401 } 00402 00403 /// calculateNodeAttributes - The following properties are calculated for 00404 /// each node in the dependence graph: ASAP, ALAP, Depth, Height, and 00405 /// MOB. 00406 void ModuloSchedulingPass::calculateNodeAttributes(MSchedGraph *graph, int MII) { 00407 00408 assert(nodeToAttributesMap.empty() && "Node attribute map was not cleared"); 00409 00410 //Loop over the nodes and add them to the map 00411 for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) { 00412 00413 DEBUG(std::cerr << "Inserting node into attribute map: " << *I->second << "\n"); 00414 00415 //Assert if its already in the map 00416 assert(nodeToAttributesMap.count(I->second) == 0 && 00417 "Node attributes are already in the map"); 00418 00419 //Put into the map with default attribute values 00420 nodeToAttributesMap[I->second] = MSNodeAttributes(); 00421 } 00422 00423 //Create set to deal with reccurrences 00424 std::set<MSchedGraphNode*> visitedNodes; 00425 00426 //Now Loop over map and calculate the node attributes 00427 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 00428 calculateASAP(I->first, MII, (MSchedGraphNode*) 0); 00429 visitedNodes.clear(); 00430 } 00431 00432 int maxASAP = findMaxASAP(); 00433 //Calculate ALAP which depends on ASAP being totally calculated 00434 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 00435 calculateALAP(I->first, MII, maxASAP, (MSchedGraphNode*) 0); 00436 visitedNodes.clear(); 00437 } 00438 00439 //Calculate MOB which depends on ASAP being totally calculated, also do depth and height 00440 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 00441 (I->second).MOB = std::max(0,(I->second).ALAP - (I->second).ASAP); 00442 00443 DEBUG(std::cerr << "MOB: " << (I->second).MOB << " (" << *(I->first) << ")\n"); 00444 calculateDepth(I->first, (MSchedGraphNode*) 0); 00445 calculateHeight(I->first, (MSchedGraphNode*) 0); 00446 } 00447 00448 00449 } 00450 00451 /// ignoreEdge - Checks to see if this edge of a recurrence should be ignored or not 00452 bool ModuloSchedulingPass::ignoreEdge(MSchedGraphNode *srcNode, MSchedGraphNode *destNode) { 00453 if(destNode == 0 || srcNode ==0) 00454 return false; 00455 00456 bool findEdge = edgesToIgnore.count(std::make_pair(srcNode, destNode->getInEdgeNum(srcNode))); 00457 00458 return findEdge; 00459 } 00460 00461 00462 /// calculateASAP - Calculates the 00463 int ModuloSchedulingPass::calculateASAP(MSchedGraphNode *node, int MII, MSchedGraphNode *destNode) { 00464 00465 DEBUG(std::cerr << "Calculating ASAP for " << *node << "\n"); 00466 00467 //Get current node attributes 00468 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second; 00469 00470 if(attributes.ASAP != -1) 00471 return attributes.ASAP; 00472 00473 int maxPredValue = 0; 00474 00475 //Iterate over all of the predecessors and find max 00476 for(MSchedGraphNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) { 00477 00478 //Only process if we are not ignoring the edge 00479 if(!ignoreEdge(*P, node)) { 00480 int predASAP = -1; 00481 predASAP = calculateASAP(*P, MII, node); 00482 00483 assert(predASAP != -1 && "ASAP has not been calculated"); 00484 int iteDiff = node->getInEdge(*P).getIteDiff(); 00485 00486 int currentPredValue = predASAP + (*P)->getLatency() - (iteDiff * MII); 00487 DEBUG(std::cerr << "pred ASAP: " << predASAP << ", iteDiff: " << iteDiff << ", PredLatency: " << (*P)->getLatency() << ", Current ASAP pred: " << currentPredValue << "\n"); 00488 maxPredValue = std::max(maxPredValue, currentPredValue); 00489 } 00490 } 00491 00492 attributes.ASAP = maxPredValue; 00493 00494 DEBUG(std::cerr << "ASAP: " << attributes.ASAP << " (" << *node << ")\n"); 00495 00496 return maxPredValue; 00497 } 00498 00499 00500 int ModuloSchedulingPass::calculateALAP(MSchedGraphNode *node, int MII, 00501 int maxASAP, MSchedGraphNode *srcNode) { 00502 00503 DEBUG(std::cerr << "Calculating ALAP for " << *node << "\n"); 00504 00505 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second; 00506 00507 if(attributes.ALAP != -1) 00508 return attributes.ALAP; 00509 00510 if(node->hasSuccessors()) { 00511 00512 //Trying to deal with the issue where the node has successors, but 00513 //we are ignoring all of the edges to them. So this is my hack for 00514 //now.. there is probably a more elegant way of doing this (FIXME) 00515 bool processedOneEdge = false; 00516 00517 //FIXME, set to something high to start 00518 int minSuccValue = 9999999; 00519 00520 //Iterate over all of the predecessors and fine max 00521 for(MSchedGraphNode::succ_iterator P = node->succ_begin(), 00522 E = node->succ_end(); P != E; ++P) { 00523 00524 //Only process if we are not ignoring the edge 00525 if(!ignoreEdge(node, *P)) { 00526 processedOneEdge = true; 00527 int succALAP = -1; 00528 succALAP = calculateALAP(*P, MII, maxASAP, node); 00529 00530 assert(succALAP != -1 && "Successors ALAP should have been caclulated"); 00531 00532 int iteDiff = P.getEdge().getIteDiff(); 00533 00534 int currentSuccValue = succALAP - node->getLatency() + iteDiff * MII; 00535 00536 DEBUG(std::cerr << "succ ALAP: " << succALAP << ", iteDiff: " << iteDiff << ", SuccLatency: " << (*P)->getLatency() << ", Current ALAP succ: " << currentSuccValue << "\n"); 00537 00538 minSuccValue = std::min(minSuccValue, currentSuccValue); 00539 } 00540 } 00541 00542 if(processedOneEdge) 00543 attributes.ALAP = minSuccValue; 00544 00545 else 00546 attributes.ALAP = maxASAP; 00547 } 00548 else 00549 attributes.ALAP = maxASAP; 00550 00551 DEBUG(std::cerr << "ALAP: " << attributes.ALAP << " (" << *node << ")\n"); 00552 00553 if(attributes.ALAP < 0) 00554 attributes.ALAP = 0; 00555 00556 return attributes.ALAP; 00557 } 00558 00559 int ModuloSchedulingPass::findMaxASAP() { 00560 int maxASAP = 0; 00561 00562 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), 00563 E = nodeToAttributesMap.end(); I != E; ++I) 00564 maxASAP = std::max(maxASAP, I->second.ASAP); 00565 return maxASAP; 00566 } 00567 00568 00569 int ModuloSchedulingPass::calculateHeight(MSchedGraphNode *node,MSchedGraphNode *srcNode) { 00570 00571 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second; 00572 00573 if(attributes.height != -1) 00574 return attributes.height; 00575 00576 int maxHeight = 0; 00577 00578 //Iterate over all of the predecessors and find max 00579 for(MSchedGraphNode::succ_iterator P = node->succ_begin(), 00580 E = node->succ_end(); P != E; ++P) { 00581 00582 00583 if(!ignoreEdge(node, *P)) { 00584 int succHeight = calculateHeight(*P, node); 00585 00586 assert(succHeight != -1 && "Successors Height should have been caclulated"); 00587 00588 int currentHeight = succHeight + node->getLatency(); 00589 maxHeight = std::max(maxHeight, currentHeight); 00590 } 00591 } 00592 attributes.height = maxHeight; 00593 DEBUG(std::cerr << "Height: " << attributes.height << " (" << *node << ")\n"); 00594 return maxHeight; 00595 } 00596 00597 00598 int ModuloSchedulingPass::calculateDepth(MSchedGraphNode *node, 00599 MSchedGraphNode *destNode) { 00600 00601 MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second; 00602 00603 if(attributes.depth != -1) 00604 return attributes.depth; 00605 00606 int maxDepth = 0; 00607 00608 //Iterate over all of the predecessors and fine max 00609 for(MSchedGraphNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) { 00610 00611 if(!ignoreEdge(*P, node)) { 00612 int predDepth = -1; 00613 predDepth = calculateDepth(*P, node); 00614 00615 assert(predDepth != -1 && "Predecessors ASAP should have been caclulated"); 00616 00617 int currentDepth = predDepth + (*P)->getLatency(); 00618 maxDepth = std::max(maxDepth, currentDepth); 00619 } 00620 } 00621 attributes.depth = maxDepth; 00622 00623 DEBUG(std::cerr << "Depth: " << attributes.depth << " (" << *node << "*)\n"); 00624 return maxDepth; 00625 } 00626 00627 00628 00629 void ModuloSchedulingPass::addReccurrence(std::vector<MSchedGraphNode*> &recurrence, int II, MSchedGraphNode *srcBENode, MSchedGraphNode *destBENode) { 00630 //Check to make sure that this recurrence is unique 00631 bool same = false; 00632 00633 00634 //Loop over all recurrences already in our list 00635 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator R = recurrenceList.begin(), RE = recurrenceList.end(); R != RE; ++R) { 00636 00637 bool all_same = true; 00638 //First compare size 00639 if(R->second.size() == recurrence.size()) { 00640 00641 for(std::vector<MSchedGraphNode*>::const_iterator node = R->second.begin(), end = R->second.end(); node != end; ++node) { 00642 if(std::find(recurrence.begin(), recurrence.end(), *node) == recurrence.end()) { 00643 all_same = all_same && false; 00644 break; 00645 } 00646 else 00647 all_same = all_same && true; 00648 } 00649 if(all_same) { 00650 same = true; 00651 break; 00652 } 00653 } 00654 } 00655 00656 if(!same) { 00657 srcBENode = recurrence.back(); 00658 destBENode = recurrence.front(); 00659 00660 //FIXME 00661 if(destBENode->getInEdge(srcBENode).getIteDiff() == 0) { 00662 //DEBUG(std::cerr << "NOT A BACKEDGE\n"); 00663 //find actual backedge HACK HACK 00664 for(unsigned i=0; i< recurrence.size()-1; ++i) { 00665 if(recurrence[i+1]->getInEdge(recurrence[i]).getIteDiff() == 1) { 00666 srcBENode = recurrence[i]; 00667 destBENode = recurrence[i+1]; 00668 break; 00669 } 00670 00671 } 00672 00673 } 00674 DEBUG(std::cerr << "Back Edge to Remove: " << *srcBENode << " to " << *destBENode << "\n"); 00675 edgesToIgnore.insert(std::make_pair(srcBENode, destBENode->getInEdgeNum(srcBENode))); 00676 recurrenceList.insert(std::make_pair(II, recurrence)); 00677 } 00678 00679 } 00680 00681 void ModuloSchedulingPass::findAllReccurrences(MSchedGraphNode *node, 00682 std::vector<MSchedGraphNode*> &visitedNodes, 00683 int II) { 00684 00685 if(std::find(visitedNodes.begin(), visitedNodes.end(), node) != visitedNodes.end()) { 00686 std::vector<MSchedGraphNode*> recurrence; 00687 bool first = true; 00688 int delay = 0; 00689 int distance = 0; 00690 int RecMII = II; //Starting value 00691 MSchedGraphNode *last = node; 00692 MSchedGraphNode *srcBackEdge = 0; 00693 MSchedGraphNode *destBackEdge = 0; 00694 00695 00696 00697 for(std::vector<MSchedGraphNode*>::iterator I = visitedNodes.begin(), E = visitedNodes.end(); 00698 I !=E; ++I) { 00699 00700 if(*I == node) 00701 first = false; 00702 if(first) 00703 continue; 00704 00705 delay = delay + (*I)->getLatency(); 00706 00707 if(*I != node) { 00708 int diff = (*I)->getInEdge(last).getIteDiff(); 00709 distance += diff; 00710 if(diff > 0) { 00711 srcBackEdge = last; 00712 destBackEdge = *I; 00713 } 00714 } 00715 00716 recurrence.push_back(*I); 00717 last = *I; 00718 } 00719 00720 00721 00722 //Get final distance calc 00723 distance += node->getInEdge(last).getIteDiff(); 00724 DEBUG(std::cerr << "Reccurrence Distance: " << distance << "\n"); 00725 00726 //Adjust II until we get close to the inequality delay - II*distance <= 0 00727 00728 int value = delay-(RecMII * distance); 00729 int lastII = II; 00730 while(value <= 0) { 00731 00732 lastII = RecMII; 00733 RecMII--; 00734 value = delay-(RecMII * distance); 00735 } 00736 00737 00738 DEBUG(std::cerr << "Final II for this recurrence: " << lastII << "\n"); 00739 addReccurrence(recurrence, lastII, srcBackEdge, destBackEdge); 00740 assert(distance != 0 && "Recurrence distance should not be zero"); 00741 return; 00742 } 00743 00744 for(MSchedGraphNode::succ_iterator I = node->succ_begin(), E = node->succ_end(); I != E; ++I) { 00745 visitedNodes.push_back(node); 00746 findAllReccurrences(*I, visitedNodes, II); 00747 visitedNodes.pop_back(); 00748 } 00749 } 00750 00751 00752 00753 00754 00755 void ModuloSchedulingPass::computePartialOrder() { 00756 00757 //Only push BA branches onto the final node order, we put other branches after it 00758 //FIXME: Should we really be pushing branches on it a specific order instead of relying 00759 //on BA being there? 00760 std::vector<MSchedGraphNode*> branches; 00761 00762 //Loop over all recurrences and add to our partial order 00763 //be sure to remove nodes that are already in the partial order in 00764 //a different recurrence and don't add empty recurrences. 00765 for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::reverse_iterator I = recurrenceList.rbegin(), E=recurrenceList.rend(); I !=E; ++I) { 00766 00767 //Add nodes that connect this recurrence to the previous recurrence 00768 00769 //If this is the first recurrence in the partial order, add all predecessors 00770 for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(), NE = I->second.end(); N != NE; ++N) { 00771 00772 } 00773 00774 00775 std::set<MSchedGraphNode*> new_recurrence; 00776 //Loop through recurrence and remove any nodes already in the partial order 00777 for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(), NE = I->second.end(); N != NE; ++N) { 00778 bool found = false; 00779 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(), PE = partialOrder.end(); PO != PE; ++PO) { 00780 if(PO->count(*N)) 00781 found = true; 00782 } 00783 if(!found) { 00784 if((*N)->isBranch()) { 00785 branches.push_back(*N); 00786 } 00787 else 00788 new_recurrence.insert(*N); 00789 } 00790 if(partialOrder.size() == 0) 00791 //For each predecessors, add it to this recurrence ONLY if it is not already in it 00792 for(MSchedGraphNode::pred_iterator P = (*N)->pred_begin(), 00793 PE = (*N)->pred_end(); P != PE; ++P) { 00794 00795 //Check if we are supposed to ignore this edge or not 00796 if(!ignoreEdge(*P, *N)) 00797 //Check if already in this recurrence 00798 if(std::find(I->second.begin(), I->second.end(), *P) == I->second.end()) { 00799 //Also need to check if in partial order 00800 bool predFound = false; 00801 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(), PEND = partialOrder.end(); PO != PEND; ++PO) { 00802 if(PO->count(*P)) 00803 predFound = true; 00804 } 00805 00806 if(!predFound) 00807 if(!new_recurrence.count(*P)) { 00808 if((*P)->isBranch()) { 00809 branches.push_back(*P); 00810 } 00811 else 00812 new_recurrence.insert(*P); 00813 00814 } 00815 } 00816 } 00817 } 00818 00819 if(new_recurrence.size() > 0) 00820 partialOrder.push_back(new_recurrence); 00821 } 00822 00823 //Add any nodes that are not already in the partial order 00824 //Add them in a set, one set per connected component 00825 std::set<MSchedGraphNode*> lastNodes; 00826 for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 00827 bool found = false; 00828 //Check if its already in our partial order, if not add it to the final vector 00829 for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(), PE = partialOrder.end(); PO != PE; ++PO) { 00830 if(PO->count(I->first)) 00831 found = true; 00832 } 00833 if(!found) { 00834 if(I->first->isBranch()) { 00835 if(std::find(branches.begin(), branches.end(), I->first) == branches.end()) 00836 branches.push_back(I->first); 00837 } 00838 else 00839 lastNodes.insert(I->first); 00840 } 00841 } 00842 00843 //Break up remaining nodes that are not in the partial order 00844 //into their connected compoenents 00845 while(lastNodes.size() > 0) { 00846 std::set<MSchedGraphNode*> ccSet; 00847 connectedComponentSet(*(lastNodes.begin()),ccSet, lastNodes); 00848 if(ccSet.size() > 0) 00849 partialOrder.push_back(ccSet); 00850 } 00851 //if(lastNodes.size() > 0) 00852 //partialOrder.push_back(lastNodes); 00853 00854 //Clean up branches by putting them in final order 00855 std::map<unsigned, MSchedGraphNode*> branchOrder; 00856 for(std::vector<MSchedGraphNode*>::iterator I = branches.begin(), E = branches.end(); I != E; ++I) 00857 branchOrder[(*I)->getIndex()] = *I; 00858 00859 for(std::map<unsigned, MSchedGraphNode*>::reverse_iterator I = branchOrder.rbegin(), E = branchOrder.rend(); I != E; ++I) 00860 FinalNodeOrder.push_back(I->second); 00861 00862 } 00863 00864 00865 void ModuloSchedulingPass::connectedComponentSet(MSchedGraphNode *node, std::set<MSchedGraphNode*> &ccSet, std::set<MSchedGraphNode*> &lastNodes) { 00866 00867 //Add to final set 00868 if( !ccSet.count(node) && lastNodes.count(node)) { 00869 lastNodes.erase(node); 00870 if(node->isBranch()) 00871 FinalNodeOrder.push_back(node); 00872 else 00873 ccSet.insert(node); 00874 } 00875 else 00876 return; 00877 00878 //Loop over successors and recurse if we have not seen this node before 00879 for(MSchedGraphNode::succ_iterator node_succ = node->succ_begin(), end=node->succ_end(); node_succ != end; ++node_succ) { 00880 connectedComponentSet(*node_succ, ccSet, lastNodes); 00881 } 00882 00883 } 00884 00885 void ModuloSchedulingPass::predIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult) { 00886 00887 for(unsigned j=0; j < FinalNodeOrder.size(); ++j) { 00888 for(MSchedGraphNode::pred_iterator P = FinalNodeOrder[j]->pred_begin(), 00889 E = FinalNodeOrder[j]->pred_end(); P != E; ++P) { 00890 00891 //Check if we are supposed to ignore this edge or not 00892 if(ignoreEdge(*P,FinalNodeOrder[j])) 00893 continue; 00894 00895 if(CurrentSet.count(*P)) 00896 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end()) 00897 IntersectResult.insert(*P); 00898 } 00899 } 00900 } 00901 00902 00903 00904 00905 00906 void ModuloSchedulingPass::succIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult) { 00907 00908 for(unsigned j=0; j < FinalNodeOrder.size(); ++j) { 00909 for(MSchedGraphNode::succ_iterator P = FinalNodeOrder[j]->succ_begin(), 00910 E = FinalNodeOrder[j]->succ_end(); P != E; ++P) { 00911 00912 //Check if we are supposed to ignore this edge or not 00913 if(ignoreEdge(FinalNodeOrder[j],*P)) 00914 continue; 00915 00916 if(CurrentSet.count(*P)) 00917 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end()) 00918 IntersectResult.insert(*P); 00919 } 00920 } 00921 } 00922 00923 void dumpIntersection(std::set<MSchedGraphNode*> &IntersectCurrent) { 00924 std::cerr << "Intersection ("; 00925 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(), E = IntersectCurrent.end(); I != E; ++I) 00926 std::cerr << **I << ", "; 00927 std::cerr << ")\n"; 00928 } 00929 00930 00931 00932 void ModuloSchedulingPass::orderNodes() { 00933 00934 int BOTTOM_UP = 0; 00935 int TOP_DOWN = 1; 00936 00937 //Set default order 00938 int order = BOTTOM_UP; 00939 00940 00941 //Loop over all the sets and place them in the final node order 00942 for(std::vector<std::set<MSchedGraphNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) { 00943 00944 DEBUG(std::cerr << "Processing set in S\n"); 00945 DEBUG(dumpIntersection(*CurrentSet)); 00946 00947 //Result of intersection 00948 std::set<MSchedGraphNode*> IntersectCurrent; 00949 00950 predIntersect(*CurrentSet, IntersectCurrent); 00951 00952 //If the intersection of predecessor and current set is not empty 00953 //sort nodes bottom up 00954 if(IntersectCurrent.size() != 0) { 00955 DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is NOT empty\n"); 00956 order = BOTTOM_UP; 00957 } 00958 //If empty, use successors 00959 else { 00960 DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is empty\n"); 00961 00962 succIntersect(*CurrentSet, IntersectCurrent); 00963 00964 //sort top-down 00965 if(IntersectCurrent.size() != 0) { 00966 DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is NOT empty\n"); 00967 order = TOP_DOWN; 00968 } 00969 else { 00970 DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is empty\n"); 00971 //Find node with max ASAP in current Set 00972 MSchedGraphNode *node; 00973 int maxASAP = 0; 00974 DEBUG(std::cerr << "Using current set of size " << CurrentSet->size() << "to find max ASAP\n"); 00975 for(std::set<MSchedGraphNode*>::iterator J = CurrentSet->begin(), JE = CurrentSet->end(); J != JE; ++J) { 00976 //Get node attributes 00977 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*J)->second; 00978 //assert(nodeAttr != nodeToAttributesMap.end() && "Node not in attributes map!"); 00979 00980 if(maxASAP <= nodeAttr.ASAP) { 00981 maxASAP = nodeAttr.ASAP; 00982 node = *J; 00983 } 00984 } 00985 assert(node != 0 && "In node ordering node should not be null"); 00986 IntersectCurrent.insert(node); 00987 order = BOTTOM_UP; 00988 } 00989 } 00990 00991 //Repeat until all nodes are put into the final order from current set 00992 while(IntersectCurrent.size() > 0) { 00993 00994 if(order == TOP_DOWN) { 00995 DEBUG(std::cerr << "Order is TOP DOWN\n"); 00996 00997 while(IntersectCurrent.size() > 0) { 00998 DEBUG(std::cerr << "Intersection is not empty, so find heighest height\n"); 00999 01000 int MOB = 0; 01001 int height = 0; 01002 MSchedGraphNode *highestHeightNode = *(IntersectCurrent.begin()); 01003 01004 //Find node in intersection with highest heigh and lowest MOB 01005 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(), 01006 E = IntersectCurrent.end(); I != E; ++I) { 01007 01008 //Get current nodes properties 01009 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*I)->second; 01010 01011 if(height < nodeAttr.height) { 01012 highestHeightNode = *I; 01013 height = nodeAttr.height; 01014 MOB = nodeAttr.MOB; 01015 } 01016 else if(height == nodeAttr.height) { 01017 if(MOB > nodeAttr.height) { 01018 highestHeightNode = *I; 01019 height = nodeAttr.height; 01020 MOB = nodeAttr.MOB; 01021 } 01022 } 01023 } 01024 01025 //Append our node with greatest height to the NodeOrder 01026 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestHeightNode) == FinalNodeOrder.end()) { 01027 DEBUG(std::cerr << "Adding node to Final Order: " << *highestHeightNode << "\n"); 01028 FinalNodeOrder.push_back(highestHeightNode); 01029 } 01030 01031 //Remove V from IntersectOrder 01032 IntersectCurrent.erase(std::find(IntersectCurrent.begin(), 01033 IntersectCurrent.end(), highestHeightNode)); 01034 01035 01036 //Intersect V's successors with CurrentSet 01037 for(MSchedGraphNode::succ_iterator P = highestHeightNode->succ_begin(), 01038 E = highestHeightNode->succ_end(); P != E; ++P) { 01039 //if(lower_bound(CurrentSet->begin(), 01040 // CurrentSet->end(), *P) != CurrentSet->end()) { 01041 if(std::find(CurrentSet->begin(), CurrentSet->end(), *P) != CurrentSet->end()) { 01042 if(ignoreEdge(highestHeightNode, *P)) 01043 continue; 01044 //If not already in Intersect, add 01045 if(!IntersectCurrent.count(*P)) 01046 IntersectCurrent.insert(*P); 01047 } 01048 } 01049 } //End while loop over Intersect Size 01050 01051 //Change direction 01052 order = BOTTOM_UP; 01053 01054 //Reset Intersect to reflect changes in OrderNodes 01055 IntersectCurrent.clear(); 01056 predIntersect(*CurrentSet, IntersectCurrent); 01057 01058 } //End If TOP_DOWN 01059 01060 //Begin if BOTTOM_UP 01061 else { 01062 DEBUG(std::cerr << "Order is BOTTOM UP\n"); 01063 while(IntersectCurrent.size() > 0) { 01064 DEBUG(std::cerr << "Intersection of size " << IntersectCurrent.size() << ", finding highest depth\n"); 01065 01066 //dump intersection 01067 DEBUG(dumpIntersection(IntersectCurrent)); 01068 //Get node with highest depth, if a tie, use one with lowest 01069 //MOB 01070 int MOB = 0; 01071 int depth = 0; 01072 MSchedGraphNode *highestDepthNode = *(IntersectCurrent.begin()); 01073 01074 for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(), 01075 E = IntersectCurrent.end(); I != E; ++I) { 01076 //Find node attribute in graph 01077 MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*I)->second; 01078 01079 if(depth < nodeAttr.depth) { 01080 highestDepthNode = *I; 01081 depth = nodeAttr.depth; 01082 MOB = nodeAttr.MOB; 01083 } 01084 else if(depth == nodeAttr.depth) { 01085 if(MOB > nodeAttr.MOB) { 01086 highestDepthNode = *I; 01087 depth = nodeAttr.depth; 01088 MOB = nodeAttr.MOB; 01089 } 01090 } 01091 } 01092 01093 01094 01095 //Append highest depth node to the NodeOrder 01096 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestDepthNode) == FinalNodeOrder.end()) { 01097 DEBUG(std::cerr << "Adding node to Final Order: " << *highestDepthNode << "\n"); 01098 FinalNodeOrder.push_back(highestDepthNode); 01099 } 01100 //Remove heightestDepthNode from IntersectOrder 01101 IntersectCurrent.erase(highestDepthNode); 01102 01103 01104 //Intersect heightDepthNode's pred with CurrentSet 01105 for(MSchedGraphNode::pred_iterator P = highestDepthNode->pred_begin(), 01106 E = highestDepthNode->pred_end(); P != E; ++P) { 01107 if(CurrentSet->count(*P)) { 01108 if(ignoreEdge(*P, highestDepthNode)) 01109 continue; 01110 01111 //If not already in Intersect, add 01112 if(!IntersectCurrent.count(*P)) 01113 IntersectCurrent.insert(*P); 01114 } 01115 } 01116 01117 } //End while loop over Intersect Size 01118 01119 //Change order 01120 order = TOP_DOWN; 01121 01122 //Reset IntersectCurrent to reflect changes in OrderNodes 01123 IntersectCurrent.clear(); 01124 succIntersect(*CurrentSet, IntersectCurrent); 01125 } //End if BOTTOM_DOWN 01126 01127 DEBUG(std::cerr << "Current Intersection Size: " << IntersectCurrent.size() << "\n"); 01128 } 01129 //End Wrapping while loop 01130 DEBUG(std::cerr << "Ending Size of Current Set: " << CurrentSet->size() << "\n"); 01131 }//End for over all sets of nodes 01132 01133 //FIXME: As the algorithm stands it will NEVER add an instruction such as ba (with no 01134 //data dependencies) to the final order. We add this manually. It will always be 01135 //in the last set of S since its not part of a recurrence 01136 //Loop over all the sets and place them in the final node order 01137 std::vector<std::set<MSchedGraphNode*> > ::reverse_iterator LastSet = partialOrder.rbegin(); 01138 for(std::set<MSchedGraphNode*>::iterator CurrentNode = LastSet->begin(), LastNode = LastSet->end(); 01139 CurrentNode != LastNode; ++CurrentNode) { 01140 if((*CurrentNode)->getInst()->getOpcode() == V9::BA) 01141 FinalNodeOrder.push_back(*CurrentNode); 01142 } 01143 //Return final Order 01144 //return FinalNodeOrder; 01145 } 01146 01147 bool ModuloSchedulingPass::computeSchedule() { 01148 01149 bool success = false; 01150 01151 //FIXME: Should be set to max II of the original loop 01152 //Cap II in order to prevent infinite loop 01153 int capII = 100; 01154 01155 while(!success) { 01156 01157 int branchES = II - 1; 01158 int branchLS = II - 1; 01159 bool lastBranch = true; 01160 01161 //Loop over the final node order and process each node 01162 for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), 01163 E = FinalNodeOrder.end(); I != E; ++I) { 01164 01165 //CalculateEarly and Late start 01166 int EarlyStart = -1; 01167 int LateStart = 99999; //Set to something higher then we would ever expect (FIXME) 01168 bool hasSucc = false; 01169 bool hasPred = false; 01170 01171 if(!(*I)->isBranch()) { 01172 //Loop over nodes in the schedule and determine if they are predecessors 01173 //or successors of the node we are trying to schedule 01174 for(MSSchedule::schedule_iterator nodesByCycle = schedule.begin(), nodesByCycleEnd = schedule.end(); 01175 nodesByCycle != nodesByCycleEnd; ++nodesByCycle) { 01176 01177 //For this cycle, get the vector of nodes schedule and loop over it 01178 for(std::vector<MSchedGraphNode*>::iterator schedNode = nodesByCycle->second.begin(), SNE = nodesByCycle->second.end(); schedNode != SNE; ++schedNode) { 01179 01180 if((*I)->isPredecessor(*schedNode)) { 01181 if(!ignoreEdge(*schedNode, *I)) { 01182 int diff = (*I)->getInEdge(*schedNode).getIteDiff(); 01183 int ES_Temp = nodesByCycle->first + (*schedNode)->getLatency() - diff * II; 01184 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n"); 01185 DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n"); 01186 EarlyStart = std::max(EarlyStart, ES_Temp); 01187 hasPred = true; 01188 } 01189 } 01190 if((*I)->isSuccessor(*schedNode)) { 01191 if(!ignoreEdge(*I,*schedNode)) { 01192 int diff = (*schedNode)->getInEdge(*I).getIteDiff(); 01193 int LS_Temp = nodesByCycle->first - (*I)->getLatency() + diff * II; 01194 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n"); 01195 DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n"); 01196 LateStart = std::min(LateStart, LS_Temp); 01197 hasSucc = true; 01198 } 01199 } 01200 } 01201 } 01202 } 01203 else { 01204 if(lastBranch) { 01205 EarlyStart = branchES; 01206 LateStart = branchLS; 01207 lastBranch = false; 01208 --branchES; 01209 branchLS = 0; 01210 } 01211 else { 01212 EarlyStart = branchES; 01213 LateStart = branchLS; 01214 assert( (EarlyStart >= 0) && (LateStart >=0) && "EarlyStart and LateStart must be greater then 0"); 01215 --branchES; 01216 } 01217 hasPred = 1; 01218 hasSucc = 1; 01219 } 01220 01221 01222 DEBUG(std::cerr << "Has Successors: " << hasSucc << ", Has Pred: " << hasPred << "\n"); 01223 DEBUG(std::cerr << "EarlyStart: " << EarlyStart << ", LateStart: " << LateStart << "\n"); 01224 01225 //Check if the node has no pred or successors and set Early Start to its ASAP 01226 if(!hasSucc && !hasPred) 01227 EarlyStart = nodeToAttributesMap.find(*I)->second.ASAP; 01228 01229 //Now, try to schedule this node depending upon its pred and successor in the schedule 01230 //already 01231 if(!hasSucc && hasPred) 01232 success = scheduleNode(*I, EarlyStart, (EarlyStart + II -1)); 01233 else if(!hasPred && hasSucc) 01234 success = scheduleNode(*I, LateStart, (LateStart - II +1)); 01235 else if(hasPred && hasSucc) 01236 success = scheduleNode(*I, EarlyStart, std::min(LateStart, (EarlyStart + II -1))); 01237 else 01238 success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1); 01239 01240 if(!success) { 01241 ++IncreasedII; 01242 ++II; 01243 schedule.clear(); 01244 break; 01245 } 01246 01247 } 01248 01249 if(success) { 01250 DEBUG(std::cerr << "Constructing Schedule Kernel\n"); 01251 success = schedule.constructKernel(II); 01252 DEBUG(std::cerr << "Done Constructing Schedule Kernel\n"); 01253 if(!success) { 01254 ++IncreasedII; 01255 ++II; 01256 schedule.clear(); 01257 } 01258 } 01259 01260 if(II >= capII) 01261 return false; 01262 01263 assert(II < capII && "The II should not exceed the original loop number of cycles"); 01264 } 01265 return true; 01266 } 01267 01268 01269 bool ModuloSchedulingPass::scheduleNode(MSchedGraphNode *node, 01270 int start, int end) { 01271 bool success = false; 01272 01273 DEBUG(std::cerr << *node << " (Start Cycle: " << start << ", End Cycle: " << end << ")\n"); 01274 01275 //Make sure start and end are not negative 01276 if(start < 0) { 01277 start = 0; 01278 01279 } 01280 if(end < 0) 01281 end = 0; 01282 01283 bool forward = true; 01284 if(start > end) 01285 forward = false; 01286 01287 bool increaseSC = true; 01288 int cycle = start ; 01289 01290 01291 while(increaseSC) { 01292 01293 increaseSC = false; 01294 01295 increaseSC = schedule.insert(node, cycle); 01296 01297 if(!increaseSC) 01298 return true; 01299 01300 //Increment cycle to try again 01301 if(forward) { 01302 ++cycle; 01303 DEBUG(std::cerr << "Increase cycle: " << cycle << "\n"); 01304 if(cycle > end) 01305 return false; 01306 } 01307 else { 01308 --cycle; 01309 DEBUG(std::cerr << "Decrease cycle: " << cycle << "\n"); 01310 if(cycle < end) 01311 return false; 01312 } 01313 } 01314 01315 return success; 01316 } 01317 01318 void ModuloSchedulingPass::writePrologues(std::vector<MachineBasicBlock *> &prologues, MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_prologues, std::map<const Value*, std::pair<const MSchedGraphNode*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation) { 01319 01320 //Keep a map to easily know whats in the kernel 01321 std::map<int, std::set<const MachineInstr*> > inKernel; 01322 int maxStageCount = 0; 01323 01324 //Keep a map of new values we consumed in case they need to be added back 01325 std::map<Value*, std::map<int, Value*> > consumedValues; 01326 01327 MSchedGraphNode *branch = 0; 01328 MSchedGraphNode *BAbranch = 0; 01329 01330 schedule.print(std::cerr); 01331 01332 std::vector<MSchedGraphNode*> branches; 01333 01334 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 01335 maxStageCount = std::max(maxStageCount, I->second); 01336 01337 //Ignore the branch, we will handle this separately 01338 if(I->first->isBranch()) { 01339 branches.push_back(I->first); 01340 continue; 01341 } 01342 01343 //Put int the map so we know what instructions in each stage are in the kernel 01344 DEBUG(std::cerr << "Inserting instruction " << *(I->first->getInst()) << " into map at stage " << I->second << "\n"); 01345 inKernel[I->second].insert(I->first->getInst()); 01346 } 01347 01348 //Get target information to look at machine operands 01349 const TargetInstrInfo *mii = target.getInstrInfo(); 01350 01351 //Now write the prologues 01352 for(int i = 0; i < maxStageCount; ++i) { 01353 BasicBlock *llvmBB = new BasicBlock("PROLOGUE", (Function*) (origBB->getBasicBlock()->getParent())); 01354 MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); 01355 01356 DEBUG(std::cerr << "i=" << i << "\n"); 01357 for(int j = 0; j <= i; ++j) { 01358 for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { 01359 if(inKernel[j].count(&*MI)) { 01360 MachineInstr *instClone = MI->clone(); 01361 machineBB->push_back(instClone); 01362 01363 DEBUG(std::cerr << "Cloning: " << *MI << "\n"); 01364 01365 Instruction *tmp; 01366 01367 //After cloning, we may need to save the value that this instruction defines 01368 for(unsigned opNum=0; opNum < MI->getNumOperands(); ++opNum) { 01369 //get machine operand 01370 MachineOperand &mOp = instClone->getOperand(opNum); 01371 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 01372 01373 //Check if this is a value we should save 01374 if(valuesToSave.count(mOp.getVRegValue())) { 01375 //Save copy in tmpInstruction 01376 tmp = new TmpInstruction(mOp.getVRegValue()); 01377 01378 //Add TmpInstruction to safe LLVM Instruction MCFI 01379 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 01380 tempMvec.addTemp((Value*) tmp); 01381 01382 DEBUG(std::cerr << "Value: " << *(mOp.getVRegValue()) << " New Value: " << *tmp << " Stage: " << i << "\n"); 01383 01384 newValues[mOp.getVRegValue()][i]= tmp; 01385 newValLocation[tmp] = machineBB; 01386 01387 DEBUG(std::cerr << "Machine Instr Operands: " << *(mOp.getVRegValue()) << ", 0, " << *tmp << "\n"); 01388 01389 //Create machine instruction and put int machineBB 01390 MachineInstr *saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 01391 01392 DEBUG(std::cerr << "Created new machine instr: " << *saveValue << "\n"); 01393 } 01394 } 01395 01396 //We may also need to update the value that we use if its from an earlier prologue 01397 if(j != 0) { 01398 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 01399 if(newValues.count(mOp.getVRegValue())) { 01400 if(newValues[mOp.getVRegValue()].count(i-1)) { 01401 Value *oldV = mOp.getVRegValue(); 01402 DEBUG(std::cerr << "Replaced this value: " << mOp.getVRegValue() << " With:" << (newValues[mOp.getVRegValue()][i-1]) << "\n"); 01403 //Update the operand with the right value 01404 mOp.setValueReg(newValues[mOp.getVRegValue()][i-1]); 01405 01406 //Remove this value since we have consumed it 01407 //NOTE: Should this only be done if j != maxStage? 01408 consumedValues[oldV][i-1] = (newValues[oldV][i-1]); 01409 DEBUG(std::cerr << "Deleted value: " << consumedValues[oldV][i-1] << "\n"); 01410 newValues[oldV].erase(i-1); 01411 } 01412 } 01413 else 01414 if(consumedValues.count(mOp.getVRegValue())) 01415 assert(!consumedValues[mOp.getVRegValue()].count(i-1) && "Found a case where we need the value"); 01416 } 01417 } 01418 } 01419 } 01420 } 01421 } 01422 01423 01424 for(std::vector<MSchedGraphNode*>::iterator BR = branches.begin(), BE = branches.end(); BR != BE; ++BR) { 01425 01426 //Stick in branch at the end 01427 machineBB->push_back((*BR)->getInst()->clone()); 01428 01429 //Add nop 01430 BuildMI(machineBB, V9::NOP, 0); 01431 } 01432 01433 01434 (((MachineBasicBlock*)origBB)->getParent())->getBasicBlockList().push_back(machineBB); 01435 prologues.push_back(machineBB); 01436 llvm_prologues.push_back(llvmBB); 01437 } 01438 } 01439 01440 void ModuloSchedulingPass::writeEpilogues(std::vector<MachineBasicBlock *> &epilogues, const MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_epilogues, std::map<const Value*, std::pair<const MSchedGraphNode*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues,std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs ) { 01441 01442 std::map<int, std::set<const MachineInstr*> > inKernel; 01443 01444 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 01445 01446 //Ignore the branch, we will handle this separately 01447 if(I->first->isBranch()) 01448 continue; 01449 01450 //Put int the map so we know what instructions in each stage are in the kernel 01451 inKernel[I->second].insert(I->first->getInst()); 01452 } 01453 01454 std::map<Value*, Value*> valPHIs; 01455 01456 //some debug stuff, will remove later 01457 DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), E = newValues.end(); V !=E; ++V) { 01458 std::cerr << "Old Value: " << *(V->first) << "\n"; 01459 for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I) 01460 std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n"; 01461 }); 01462 01463 //some debug stuff, will remove later 01464 DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = kernelPHIs.begin(), E = kernelPHIs.end(); V !=E; ++V) { 01465 std::cerr << "Old Value: " << *(V->first) << "\n"; 01466 for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I) 01467 std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n"; 01468 }); 01469 01470 //Now write the epilogues 01471 for(int i = schedule.getMaxStage()-1; i >= 0; --i) { 01472 BasicBlock *llvmBB = new BasicBlock("EPILOGUE", (Function*) (origBB->getBasicBlock()->getParent())); 01473 MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); 01474 01475 DEBUG(std::cerr << " Epilogue #: " << i << "\n"); 01476 01477 01478 std::map<Value*, int> inEpilogue; 01479 01480 for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) { 01481 for(int j=schedule.getMaxStage(); j > i; --j) { 01482 if(inKernel[j].count(&*MI)) { 01483 DEBUG(std::cerr << "Cloning instruction " << *MI << "\n"); 01484 MachineInstr *clone = MI->clone(); 01485 01486 //Update operands that need to use the result from the phi 01487 for(unsigned opNum=0; opNum < clone->getNumOperands(); ++opNum) { 01488 //get machine operand 01489 const MachineOperand &mOp = clone->getOperand(opNum); 01490 01491 if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse())) { 01492 01493 DEBUG(std::cerr << "Writing PHI for " << *(mOp.getVRegValue()) << "\n"); 01494 01495 //If this is the last instructions for the max iterations ago, don't update operands 01496 if(inEpilogue.count(mOp.getVRegValue())) 01497 if(inEpilogue[mOp.getVRegValue()] == i) 01498 continue; 01499 01500 //Quickly write appropriate phis for this operand 01501 if(newValues.count(mOp.getVRegValue())) { 01502 if(newValues[mOp.getVRegValue()].count(i)) { 01503 Instruction *tmp = new TmpInstruction(newValues[mOp.getVRegValue()][i]); 01504 01505 //Get machine code for this instruction 01506 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 01507 tempMvec.addTemp((Value*) tmp); 01508 01509 MachineInstr *saveValue = BuildMI(machineBB, V9::PHI, 3).addReg(newValues[mOp.getVRegValue()][i]).addReg(kernelPHIs[mOp.getVRegValue()][i]).addRegDef(tmp); 01510 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 01511 valPHIs[mOp.getVRegValue()] = tmp; 01512 } 01513 } 01514 01515 if(valPHIs.count(mOp.getVRegValue())) { 01516 //Update the operand in the cloned instruction 01517 clone->getOperand(opNum).setValueReg(valPHIs[mOp.getVRegValue()]); 01518 } 01519 } 01520 else if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef())) { 01521 inEpilogue[mOp.getVRegValue()] = i; 01522 } 01523 } 01524 machineBB->push_back(clone); 01525 } 01526 } 01527 } 01528 01529 (((MachineBasicBlock*)origBB)->getParent())->getBasicBlockList().push_back(machineBB); 01530 epilogues.push_back(machineBB); 01531 llvm_epilogues.push_back(llvmBB); 01532 01533 DEBUG(std::cerr << "EPILOGUE #" << i << "\n"); 01534 DEBUG(machineBB->print(std::cerr)); 01535 } 01536 } 01537 01538 void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map<const Value*, std::pair<const MSchedGraphNode*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs) { 01539 01540 //Keep track of operands that are read and saved from a previous iteration. The new clone 01541 //instruction will use the result of the phi instead. 01542 std::map<Value*, Value*> finalPHIValue; 01543 std::map<Value*, Value*> kernelValue; 01544 01545 //Branches are a special case 01546 std::vector<MachineInstr*> branches; 01547 01548 //Create TmpInstructions for the final phis 01549 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 01550 01551 DEBUG(std::cerr << "Stage: " << I->second << " Inst: " << *(I->first->getInst()) << "\n";); 01552 01553 if(I->first->isBranch()) { 01554 //Clone instruction 01555 const MachineInstr *inst = I->first->getInst(); 01556 MachineInstr *instClone = inst->clone(); 01557 branches.push_back(instClone); 01558 } 01559 01560 //Clone instruction 01561 const MachineInstr *inst = I->first->getInst(); 01562 MachineInstr *instClone = inst->clone(); 01563 01564 //Insert into machine basic block 01565 machineBB->push_back(instClone); 01566 01567 DEBUG(std::cerr << "Cloned Inst: " << *instClone << "\n"); 01568 01569 //Loop over Machine Operands 01570 for(unsigned i=0; i < inst->getNumOperands(); ++i) { 01571 //get machine operand 01572 const MachineOperand &mOp = inst->getOperand(i); 01573 01574 if(I->second != 0) { 01575 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 01576 01577 //Check to see where this operand is defined if this instruction is from max stage 01578 if(I->second == schedule.getMaxStage()) { 01579 DEBUG(std::cerr << "VREG: " << *(mOp.getVRegValue()) << "\n"); 01580 } 01581 01582 //If its in the value saved, we need to create a temp instruction and use that instead 01583 if(valuesToSave.count(mOp.getVRegValue())) { 01584 01585 //Check if we already have a final PHI value for this 01586 if(!finalPHIValue.count(mOp.getVRegValue())) { 01587 TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue()); 01588 01589 //Get machine code for this instruction 01590 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 01591 tempMvec.addTemp((Value*) tmp); 01592 01593 //Update the operand in the cloned instruction 01594 instClone->getOperand(i).setValueReg(tmp); 01595 01596 //save this as our final phi 01597 finalPHIValue[mOp.getVRegValue()] = tmp; 01598 newValLocation[tmp] = machineBB; 01599 } 01600 else { 01601 //Use the previous final phi value 01602 instClone->getOperand(i).setValueReg(finalPHIValue[mOp.getVRegValue()]); 01603 } 01604 } 01605 } 01606 } 01607 if(I->second != schedule.getMaxStage()) { 01608 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 01609 if(valuesToSave.count(mOp.getVRegValue())) { 01610 01611 TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue()); 01612 01613 //Get machine code for this instruction 01614 MachineCodeForInstruction & tempVec = MachineCodeForInstruction::get(defaultInst); 01615 tempVec.addTemp((Value*) tmp); 01616 01617 //Create new machine instr and put in MBB 01618 MachineInstr *saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 01619 01620 //Save for future cleanup 01621 kernelValue[mOp.getVRegValue()] = tmp; 01622 newValLocation[tmp] = machineBB; 01623 kernelPHIs[mOp.getVRegValue()][schedule.getMaxStage()-1] = tmp; 01624 } 01625 } 01626 } 01627 } 01628 01629 } 01630 01631 //Add branches 01632 for(std::vector<MachineInstr*>::iterator I = branches.begin(), E = branches.end(); I != E; ++I) { 01633 machineBB->push_back(*I); 01634 BuildMI(machineBB, V9::NOP, 0); 01635 } 01636 01637 01638 DEBUG(std::cerr << "KERNEL before PHIs\n"); 01639 DEBUG(machineBB->print(std::cerr)); 01640 01641 01642 //Loop over each value we need to generate phis for 01643 for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), 01644 E = newValues.end(); V != E; ++V) { 01645 01646 01647 DEBUG(std::cerr << "Writing phi for" << *(V->first)); 01648 DEBUG(std::cerr << "\nMap of Value* for this phi\n"); 01649 DEBUG(for(std::map<int, Value*>::iterator I = V->second.begin(), 01650 IE = V->second.end(); I != IE; ++I) { 01651 std::cerr << "Stage: " << I->first; 01652 std::cerr << " Value: " << *(I->second) << "\n"; 01653 }); 01654 01655 //If we only have one current iteration live, its safe to set lastPhi = to kernel value 01656 if(V->second.size() == 1) { 01657 assert(kernelValue[V->first] != 0 && "Kernel value* must exist to create phi"); 01658 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(),V9::PHI, 3).addReg(V->second.begin()->second).addReg(kernelValue[V->first]).addRegDef(finalPHIValue[V->first]); 01659 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 01660 kernelPHIs[V->first][schedule.getMaxStage()-1] = kernelValue[V->first]; 01661 } 01662 else { 01663 01664 //Keep track of last phi created. 01665 Instruction *lastPhi = 0; 01666 01667 unsigned count = 1; 01668 //Loop over the the map backwards to generate phis 01669 for(std::map<int, Value*>::reverse_iterator I = V->second.rbegin(), IE = V->second.rend(); 01670 I != IE; ++I) { 01671 01672 if(count < (V->second).size()) { 01673 if(lastPhi == 0) { 01674 lastPhi = new TmpInstruction(I->second); 01675 01676 //Get machine code for this instruction 01677 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 01678 tempMvec.addTemp((Value*) lastPhi); 01679 01680 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(kernelValue[V->first]).addReg(I->second).addRegDef(lastPhi); 01681 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 01682 newValLocation[lastPhi] = machineBB; 01683 } 01684 else { 01685 Instruction *tmp = new TmpInstruction(I->second); 01686 01687 //Get machine code for this instruction 01688 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 01689 tempMvec.addTemp((Value*) tmp); 01690 01691 01692 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(tmp); 01693 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 01694 lastPhi = tmp; 01695 kernelPHIs[V->first][I->first] = lastPhi; 01696 newValLocation[lastPhi] = machineBB; 01697 } 01698 } 01699 //Final phi value 01700 else { 01701 //The resulting value must be the Value* we created earlier 01702 assert(lastPhi != 0 && "Last phi is NULL!\n"); 01703 MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(finalPHIValue[V->first]); 01704 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 01705 kernelPHIs[V->first][I->first] = finalPHIValue[V->first]; 01706 } 01707 01708 ++count; 01709 } 01710 01711 } 01712 } 01713 01714 DEBUG(std::cerr << "KERNEL after PHIs\n"); 01715 DEBUG(machineBB->print(std::cerr)); 01716 } 01717 01718 01719 void ModuloSchedulingPass::removePHIs(const MachineBasicBlock *origBB, std::vector<MachineBasicBlock *> &prologues, std::vector<MachineBasicBlock *> &epilogues, MachineBasicBlock *kernelBB, std::map<Value*, MachineBasicBlock*> &newValLocation) { 01720 01721 //Worklist to delete things 01722 std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> > worklist; 01723 01724 //Worklist of TmpInstructions that need to be added to a MCFI 01725 std::vector<Instruction*> addToMCFI; 01726 01727 //Worklist to add OR instructions to end of kernel so not to invalidate the iterator 01728 //std::vector<std::pair<Instruction*, Value*> > newORs; 01729 01730 const TargetInstrInfo *TMI = target.getInstrInfo(); 01731 01732 //Start with the kernel and for each phi insert a copy for the phi def and for each arg 01733 for(MachineBasicBlock::iterator I = kernelBB->begin(), E = kernelBB->end(); I != E; ++I) { 01734 01735 DEBUG(std::cerr << "Looking at Instr: " << *I << "\n"); 01736 //Get op code and check if its a phi 01737 if(I->getOpcode() == V9::PHI) { 01738 01739 DEBUG(std::cerr << "Replacing PHI: " << *I << "\n"); 01740 Instruction *tmp = 0; 01741 01742 for(unsigned i = 0; i < I->getNumOperands(); ++i) { 01743 //Get Operand 01744 const MachineOperand &mOp = I->getOperand(i); 01745 assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n"); 01746 01747 if(!tmp) { 01748 tmp = new TmpInstruction(mOp.getVRegValue()); 01749 addToMCFI.push_back(tmp); 01750 } 01751 01752 //Now for all our arguments we read, OR to the new TmpInstruction that we created 01753 if(mOp.isUse()) { 01754 DEBUG(std::cerr << "Use: " << mOp << "\n"); 01755 //Place a copy at the end of its BB but before the branches 01756 assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n"); 01757 //Reverse iterate to find the branches, we can safely assume no instructions have been 01758 //put in the nop positions 01759 for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) { 01760 MachineOpCode opc = inst->getOpcode(); 01761 if(TMI->isBranch(opc) || TMI->isNop(opc)) 01762 continue; 01763 else { 01764 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 01765 break; 01766 } 01767 01768 } 01769 01770 } 01771 else { 01772 //Remove the phi and replace it with an OR 01773 DEBUG(std::cerr << "Def: " << mOp << "\n"); 01774 //newORs.push_back(std::make_pair(tmp, mOp.getVRegValue())); 01775 BuildMI(*kernelBB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); 01776 worklist.push_back(std::make_pair(kernelBB, I)); 01777 } 01778 01779 } 01780 01781 } 01782 01783 01784 } 01785 01786 //Add TmpInstructions to some MCFI 01787 if(addToMCFI.size() > 0) { 01788 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 01789 for(unsigned x = 0; x < addToMCFI.size(); ++x) { 01790 tempMvec.addTemp(addToMCFI[x]); 01791 } 01792 addToMCFI.clear(); 01793 } 01794 01795 01796 //Remove phis from epilogue 01797 for(std::vector<MachineBasicBlock*>::iterator MB = epilogues.begin(), ME = epilogues.end(); MB != ME; ++MB) { 01798 for(MachineBasicBlock::iterator I = (*MB)->begin(), E = (*MB)->end(); I != E; ++I) { 01799 01800 DEBUG(std::cerr << "Looking at Instr: " << *I << "\n"); 01801 //Get op code and check if its a phi 01802 if(I->getOpcode() == V9::PHI) { 01803 Instruction *tmp = 0; 01804 01805 for(unsigned i = 0; i < I->getNumOperands(); ++i) { 01806 //Get Operand 01807 const MachineOperand &mOp = I->getOperand(i); 01808 assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n"); 01809 01810 if(!tmp) { 01811 tmp = new TmpInstruction(mOp.getVRegValue()); 01812 addToMCFI.push_back(tmp); 01813 } 01814 01815 //Now for all our arguments we read, OR to the new TmpInstruction that we created 01816 if(mOp.isUse()) { 01817 DEBUG(std::cerr << "Use: " << mOp << "\n"); 01818 //Place a copy at the end of its BB but before the branches 01819 assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n"); 01820 //Reverse iterate to find the branches, we can safely assume no instructions have been 01821 //put in the nop positions 01822 for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) { 01823 MachineOpCode opc = inst->getOpcode(); 01824 if(TMI->isBranch(opc) || TMI->isNop(opc)) 01825 continue; 01826 else { 01827 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 01828 break; 01829 } 01830 01831 } 01832 01833 } 01834 else { 01835 //Remove the phi and replace it with an OR 01836 DEBUG(std::cerr << "Def: " << mOp << "\n"); 01837 BuildMI(**MB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); 01838 worklist.push_back(std::make_pair(*MB,I)); 01839 } 01840 01841 } 01842 } 01843 01844 01845 } 01846 } 01847 01848 01849 if(addToMCFI.size() > 0) { 01850 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 01851 for(unsigned x = 0; x < addToMCFI.size(); ++x) { 01852 tempMvec.addTemp(addToMCFI[x]); 01853 } 01854 addToMCFI.clear(); 01855 } 01856 01857 //Delete the phis 01858 for(std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> >::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { 01859 01860 DEBUG(std::cerr << "Deleting PHI " << *I->second << "\n"); 01861 I->first->erase(I->second); 01862 01863 } 01864 01865 01866 assert((addToMCFI.size() == 0) && "We should have added all TmpInstructions to some MachineCodeForInstruction"); 01867 } 01868 01869 01870 void ModuloSchedulingPass::reconstructLoop(MachineBasicBlock *BB) { 01871 01872 DEBUG(std::cerr << "Reconstructing Loop\n"); 01873 01874 //First find the value *'s that we need to "save" 01875 std::map<const Value*, std::pair<const MSchedGraphNode*, int> > valuesToSave; 01876 01877 //Keep track of instructions we have already seen and their stage because 01878 //we don't want to "save" values if they are used in the kernel immediately 01879 std::map<const MachineInstr*, int> lastInstrs; 01880 01881 //Loop over kernel and only look at instructions from a stage > 0 01882 //Look at its operands and save values *'s that are read 01883 for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 01884 01885 if(I->second !=0) { 01886 //For this instruction, get the Value*'s that it reads and put them into the set. 01887 //Assert if there is an operand of another type that we need to save 01888 const MachineInstr *inst = I->first->getInst(); 01889 lastInstrs[inst] = I->second; 01890 01891 for(unsigned i=0; i < inst->getNumOperands(); ++i) { 01892 //get machine operand 01893 const MachineOperand &mOp = inst->getOperand(i); 01894 01895 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 01896 //find the value in the map 01897 if (const Value* srcI = mOp.getVRegValue()) { 01898 01899 if(isa<Constant>(srcI) || isa<Argument>(srcI) || isa<PHINode>(srcI)) 01900 continue; 01901 01902 //Before we declare this Value* one that we should save 01903 //make sure its def is not of the same stage as this instruction 01904 //because it will be consumed before its used 01905 Instruction *defInst = (Instruction*) srcI; 01906 01907 //Should we save this value? 01908 bool save = true; 01909 01910 //Continue if not in the def map, loop invariant code does not need to be saved 01911 if(!defMap.count(srcI)) 01912 continue; 01913 01914 MachineInstr *defInstr = defMap[srcI]; 01915 01916 01917 if(lastInstrs.count(defInstr)) { 01918 if(lastInstrs[defInstr] == I->second) { 01919 save = false; 01920 01921 } 01922 } 01923 01924 if(save) 01925 valuesToSave[srcI] = std::make_pair(I->first, i); 01926 } 01927 } 01928 01929 if(mOp.getType() != MachineOperand::MO_VirtualRegister && mOp.isUse()) { 01930 assert("Our assumption is wrong. We have another type of register that needs to be saved\n"); 01931 } 01932 } 01933 } 01934 } 01935 01936 //The new loop will consist of one or more prologues, the kernel, and one or more epilogues. 01937 01938 //Map to keep track of old to new values 01939 std::map<Value*, std::map<int, Value*> > newValues; 01940 01941 //Map to keep track of old to new values in kernel 01942 std::map<Value*, std::map<int, Value*> > kernelPHIs; 01943 01944 //Another map to keep track of what machine basic blocks these new value*s are in since 01945 //they have no llvm instruction equivalent 01946 std::map<Value*, MachineBasicBlock*> newValLocation; 01947 01948 std::vector<MachineBasicBlock*> prologues; 01949 std::vector<BasicBlock*> llvm_prologues; 01950 01951 01952 //Write prologue 01953 writePrologues(prologues, BB, llvm_prologues, valuesToSave, newValues, newValLocation); 01954 01955 //Print out epilogues and prologue 01956 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end(); 01957 I != E; ++I) { 01958 std::cerr << "PROLOGUE\n"; 01959 (*I)->print(std::cerr); 01960 }); 01961 01962 BasicBlock *llvmKernelBB = new BasicBlock("Kernel", (Function*) (BB->getBasicBlock()->getParent())); 01963 MachineBasicBlock *machineKernelBB = new MachineBasicBlock(llvmKernelBB); 01964 (((MachineBasicBlock*)BB)->getParent())->getBasicBlockList().push_back(machineKernelBB); 01965 writeKernel(llvmKernelBB, machineKernelBB, valuesToSave, newValues, newValLocation, kernelPHIs); 01966 01967 01968 std::vector<MachineBasicBlock*> epilogues; 01969 std::vector<BasicBlock*> llvm_epilogues; 01970 01971 //Write epilogues 01972 writeEpilogues(epilogues, BB, llvm_epilogues, valuesToSave, newValues, newValLocation, kernelPHIs); 01973 01974 01975 //Fix our branches 01976 fixBranches(prologues, llvm_prologues, machineKernelBB, llvmKernelBB, epilogues, llvm_epilogues, BB); 01977 01978 //Remove phis 01979 removePHIs(BB, prologues, epilogues, machineKernelBB, newValLocation); 01980 01981 //Print out epilogues and prologue 01982 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end(); 01983 I != E; ++I) { 01984 std::cerr << "PROLOGUE\n"; 01985 (*I)->print(std::cerr); 01986 }); 01987 01988 DEBUG(std::cerr << "KERNEL\n"); 01989 DEBUG(machineKernelBB->print(std::cerr)); 01990 01991 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = epilogues.begin(), E = epilogues.end(); 01992 I != E; ++I) { 01993 std::cerr << "EPILOGUE\n"; 01994 (*I)->print(std::cerr); 01995 }); 01996 01997 01998 DEBUG(std::cerr << "New Machine Function" << "\n"); 01999 DEBUG(std::cerr << BB->getParent() << "\n"); 02000 02001 02002 } 02003 02004 void ModuloSchedulingPass::fixBranches(std::vector<MachineBasicBlock *> &prologues, std::vector<BasicBlock*> &llvm_prologues, MachineBasicBlock *machineKernelBB, BasicBlock *llvmKernelBB, std::vector<MachineBasicBlock *> &epilogues, std::vector<BasicBlock*> &llvm_epilogues, MachineBasicBlock *BB) { 02005 02006 const TargetInstrInfo *TMI = target.getInstrInfo(); 02007 02008 //Fix prologue branches 02009 for(unsigned I = 0; I < prologues.size(); ++I) { 02010 02011 //Find terminator since getFirstTerminator does not work! 02012 for(MachineBasicBlock::reverse_iterator mInst = prologues[I]->rbegin(), mInstEnd = prologues[I]->rend(); mInst != mInstEnd; ++mInst) { 02013 MachineOpCode OC = mInst->getOpcode(); 02014 //If its a branch update its branchto 02015 if(TMI->isBranch(OC)) { 02016 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 02017 MachineOperand &mOp = mInst->getOperand(opNum); 02018 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02019 //Check if we are branching to the kernel, if not branch to epilogue 02020 if(mOp.getVRegValue() == BB->getBasicBlock()) { 02021 if(I == prologues.size()-1) 02022 mOp.setValueReg(llvmKernelBB); 02023 else 02024 mOp.setValueReg(llvm_prologues[I+1]); 02025 } 02026 else { 02027 mOp.setValueReg(llvm_epilogues[(llvm_epilogues.size()-1-I)]); 02028 } 02029 } 02030 } 02031 02032 DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n"); 02033 } 02034 } 02035 02036 02037 //Update llvm basic block with our new branch instr 02038 DEBUG(std::cerr << BB->getBasicBlock()->getTerminator() << "\n"); 02039 const BranchInst *branchVal = dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator()); 02040 02041 if(I == prologues.size()-1) { 02042 TerminatorInst *newBranch = new BranchInst(llvmKernelBB, 02043 llvm_epilogues[(llvm_epilogues.size()-1-I)], 02044 branchVal->getCondition(), 02045 llvm_prologues[I]); 02046 } 02047 else 02048 TerminatorInst *newBranch = new BranchInst(llvm_prologues[I+1], 02049 llvm_epilogues[(llvm_epilogues.size()-1-I)], 02050 branchVal->getCondition(), 02051 llvm_prologues[I]); 02052 02053 } 02054 02055 Value *origBranchExit = 0; 02056 02057 //Fix up kernel machine branches 02058 for(MachineBasicBlock::reverse_iterator mInst = machineKernelBB->rbegin(), mInstEnd = machineKernelBB->rend(); mInst != mInstEnd; ++mInst) { 02059 MachineOpCode OC = mInst->getOpcode(); 02060 if(TMI->isBranch(OC)) { 02061 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 02062 MachineOperand &mOp = mInst->getOperand(opNum); 02063 02064 if(mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02065 if(mOp.getVRegValue() == BB->getBasicBlock()) 02066 mOp.setValueReg(llvmKernelBB); 02067 else 02068 if(llvm_epilogues.size() > 0) { 02069 assert(origBranchExit == 0 && "There should only be one branch out of the loop"); 02070 02071 origBranchExit = mOp.getVRegValue(); 02072 mOp.setValueReg(llvm_epilogues[0]); 02073 } 02074 } 02075 } 02076 } 02077 } 02078 02079 //Update kernelLLVM branches 02080 const BranchInst *branchVal = dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator()); 02081 02082 assert(llvm_epilogues.size() != 0 && "We must have epilogues!"); 02083 02084 TerminatorInst *newBranch = new BranchInst(llvmKernelBB, 02085 llvm_epilogues[0], 02086 branchVal->getCondition(), 02087 llvmKernelBB); 02088 02089 02090 //Lastly add unconditional branches for the epilogues 02091 for(unsigned I = 0; I < epilogues.size(); ++I) { 02092 02093 //Now since we don't have fall throughs, add a unconditional branch to the next prologue 02094 if(I != epilogues.size()-1) { 02095 BuildMI(epilogues[I], V9::BA, 1).addPCDisp(llvm_epilogues[I+1]); 02096 //Add unconditional branch to end of epilogue 02097 TerminatorInst *newBranch = new BranchInst(llvm_epilogues[I+1], 02098 llvm_epilogues[I]); 02099 02100 } 02101 else { 02102 BuildMI(epilogues[I], V9::BA, 1).addPCDisp(origBranchExit); 02103 02104 02105 //Update last epilogue exit branch 02106 BranchInst *branchVal = (BranchInst*) dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator()); 02107 //Find where we are supposed to branch to 02108 BasicBlock *nextBlock = 0; 02109 for(unsigned j=0; j <branchVal->getNumSuccessors(); ++j) { 02110 if(branchVal->getSuccessor(j) != BB->getBasicBlock()) 02111 nextBlock = branchVal->getSuccessor(j); 02112 } 02113 02114 assert((nextBlock != 0) && "Next block should not be null!"); 02115 TerminatorInst *newBranch = new BranchInst(nextBlock, llvm_epilogues[I]); 02116 } 02117 //Add one more nop! 02118 BuildMI(epilogues[I], V9::NOP, 0); 02119 02120 } 02121 02122 //FIX UP Machine BB entry!! 02123 //We are looking at the predecesor of our loop basic block and we want to change its ba instruction 02124 02125 02126 //Find all llvm basic blocks that branch to the loop entry and change to our first prologue. 02127 const BasicBlock *llvmBB = BB->getBasicBlock(); 02128 02129 std::vector<const BasicBlock*>Preds (pred_begin(llvmBB), pred_end(llvmBB)); 02130 02131 //for(pred_const_iterator P = pred_begin(llvmBB), PE = pred_end(llvmBB); P != PE; ++PE) { 02132 for(std::vector<const BasicBlock*>::iterator P = Preds.begin(), PE = Preds.end(); P != PE; ++P) { 02133 if(*P == llvmBB) 02134 continue; 02135 else { 02136 DEBUG(std::cerr << "Found our entry BB\n"); 02137 //Get the Terminator instruction for this basic block and print it out 02138 DEBUG(std::cerr << *((*P)->getTerminator()) << "\n"); 02139 //Update the terminator 02140 TerminatorInst *term = ((BasicBlock*)*P)->getTerminator(); 02141 for(unsigned i=0; i < term->getNumSuccessors(); ++i) { 02142 if(term->getSuccessor(i) == llvmBB) { 02143 DEBUG(std::cerr << "Replacing successor bb\n"); 02144 if(llvm_prologues.size() > 0) { 02145 term->setSuccessor(i, llvm_prologues[0]); 02146 //Also update its corresponding machine instruction 02147 MachineCodeForInstruction & tempMvec = 02148 MachineCodeForInstruction::get(term); 02149 for (unsigned j = 0; j < tempMvec.size(); j++) { 02150 MachineInstr *temp = tempMvec[j]; 02151 MachineOpCode opc = temp->getOpcode(); 02152 if(TMI->isBranch(opc)) { 02153 DEBUG(std::cerr << *temp << "\n"); 02154 //Update branch 02155 for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) { 02156 MachineOperand &mOp = temp->getOperand(opNum); 02157 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02158 mOp.setValueReg(llvm_prologues[0]); 02159 } 02160 } 02161 } 02162 } 02163 } 02164 else { 02165 term->setSuccessor(i, llvmKernelBB); 02166 //Also update its corresponding machine instruction 02167 MachineCodeForInstruction & tempMvec = 02168 MachineCodeForInstruction::get(term); 02169 for (unsigned j = 0; j < tempMvec.size(); j++) { 02170 MachineInstr *temp = tempMvec[j]; 02171 MachineOpCode opc = temp->getOpcode(); 02172 if(TMI->isBranch(opc)) { 02173 DEBUG(std::cerr << *temp << "\n"); 02174 //Update branch 02175 for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) { 02176 MachineOperand &mOp = temp->getOperand(opNum); 02177 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02178 mOp.setValueReg(llvmKernelBB); 02179 } 02180 } 02181 } 02182 } 02183 } 02184 } 02185 } 02186 break; 02187 } 02188 } 02189 02190 02191 //BB->getParent()->getBasicBlockList().erase(BB); 02192 02193 } 02194