LLVM API Documentation
00001 //===-- ModuloSchedulingSuperBlock.cpp - ModuloScheduling--------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This ModuloScheduling pass is based on the Swing Modulo Scheduling 00011 // algorithm, but has been extended to support SuperBlocks (multiple 00012 // basic block, single entry, multipl exit loops). 00013 // 00014 //===----------------------------------------------------------------------===// 00015 00016 #define DEBUG_TYPE "ModuloSchedSB" 00017 00018 #include "DependenceAnalyzer.h" 00019 #include "ModuloSchedulingSuperBlock.h" 00020 #include "llvm/Constants.h" 00021 #include "llvm/ADT/Statistic.h" 00022 #include "llvm/CodeGen/MachineFunction.h" 00023 #include "llvm/CodeGen/Passes.h" 00024 #include "llvm/Support/CFG.h" 00025 #include "llvm/Support/Debug.h" 00026 #include "llvm/Support/GraphWriter.h" 00027 #include "llvm/Support/Timer.h" 00028 #include "llvm/ADT/StringExtras.h" 00029 #include "llvm/ADT/SCCIterator.h" 00030 #include "llvm/Instructions.h" 00031 #include "../MachineCodeForInstruction.h" 00032 #include "../SparcV9RegisterInfo.h" 00033 #include "../SparcV9Internals.h" 00034 #include "../SparcV9TmpInstr.h" 00035 #include <fstream> 00036 #include <sstream> 00037 #include <cmath> 00038 #include <utility> 00039 00040 using namespace llvm; 00041 /// Create ModuloSchedulingSBPass 00042 /// 00043 FunctionPass *llvm::createModuloSchedulingSBPass(TargetMachine & targ) { 00044 DEBUG(std::cerr << "Created ModuloSchedulingSBPass\n"); 00045 return new ModuloSchedulingSBPass(targ); 00046 } 00047 00048 00049 #if 1 00050 #define TIME_REGION(VARNAME, DESC) \ 00051 NamedRegionTimer VARNAME(DESC) 00052 #else 00053 #define TIME_REGION(VARNAME, DESC) 00054 #endif 00055 00056 00057 //Graph Traits for printing out the dependence graph 00058 template<typename GraphType> 00059 static void WriteGraphToFileSB(std::ostream &O, const std::string &GraphName, 00060 const GraphType >) { 00061 std::string Filename = GraphName + ".dot"; 00062 O << "Writing '" << Filename << "'..."; 00063 std::ofstream F(Filename.c_str()); 00064 00065 if (F.good()) 00066 WriteGraph(F, GT); 00067 else 00068 O << " error opening file for writing!"; 00069 O << "\n"; 00070 }; 00071 00072 namespace llvm { 00073 Statistic<> NumLoops("moduloschedSB-numLoops", "Total Number of Loops"); 00074 Statistic<> NumSB("moduloschedSB-numSuperBlocks", "Total Number of SuperBlocks"); 00075 Statistic<> BBWithCalls("modulosched-BBCalls", "Basic Blocks rejected due to calls"); 00076 Statistic<> BBWithCondMov("modulosched-loopCondMov", 00077 "Basic Blocks rejected due to conditional moves"); 00078 Statistic<> SBResourceConstraint("modulosched-resourceConstraint", 00079 "Loops constrained by resources"); 00080 Statistic<> SBRecurrenceConstraint("modulosched-recurrenceConstraint", 00081 "Loops constrained by recurrences"); 00082 Statistic<> SBFinalIISum("modulosched-finalIISum", "Sum of all final II"); 00083 Statistic<> SBIISum("modulosched-IISum", "Sum of all theoretical II"); 00084 Statistic<> SBMSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled"); 00085 Statistic<> SBNoSched("modulosched-noSched", "No schedule"); 00086 Statistic<> SBSameStage("modulosched-sameStage", "Max stage is 0"); 00087 Statistic<> SBBLoops("modulosched-SBBLoops", "Number single basic block loops"); 00088 Statistic<> SBInvalid("modulosched-SBInvalid", "Number invalid superblock loops"); 00089 Statistic<> SBValid("modulosched-SBValid", "Number valid superblock loops"); 00090 Statistic<> SBSize("modulosched-SBSize", "Total size of all valid superblocks"); 00091 00092 template<> 00093 struct DOTGraphTraits<MSchedGraphSB*> : public DefaultDOTGraphTraits { 00094 static std::string getGraphName(MSchedGraphSB *F) { 00095 return "Dependence Graph"; 00096 } 00097 00098 static std::string getNodeLabel(MSchedGraphSBNode *Node, MSchedGraphSB *Graph) { 00099 if(!Node->isPredicate()) { 00100 if (Node->getInst()) { 00101 std::stringstream ss; 00102 ss << *(Node->getInst()); 00103 return ss.str(); //((MachineInstr*)Node->getInst()); 00104 } 00105 else 00106 return "No Inst"; 00107 } 00108 else 00109 return "Pred Node"; 00110 } 00111 static std::string getEdgeSourceLabel(MSchedGraphSBNode *Node, 00112 MSchedGraphSBNode::succ_iterator I) { 00113 //Label each edge with the type of dependence 00114 std::string edgelabel = ""; 00115 switch (I.getEdge().getDepOrderType()) { 00116 00117 case MSchedGraphSBEdge::TrueDep: 00118 edgelabel = "True"; 00119 break; 00120 00121 case MSchedGraphSBEdge::AntiDep: 00122 edgelabel = "Anti"; 00123 break; 00124 00125 case MSchedGraphSBEdge::OutputDep: 00126 edgelabel = "Output"; 00127 break; 00128 00129 case MSchedGraphSBEdge::NonDataDep: 00130 edgelabel = "Pred"; 00131 break; 00132 00133 default: 00134 edgelabel = "Unknown"; 00135 break; 00136 } 00137 00138 //FIXME 00139 int iteDiff = I.getEdge().getIteDiff(); 00140 std::string intStr = "(IteDiff: "; 00141 intStr += itostr(iteDiff); 00142 00143 intStr += ")"; 00144 edgelabel += intStr; 00145 00146 return edgelabel; 00147 } 00148 }; 00149 00150 bool ModuloSchedulingSBPass::runOnFunction(Function &F) { 00151 bool Changed = false; 00152 00153 //Get MachineFunction 00154 MachineFunction &MF = MachineFunction::get(&F); 00155 00156 //Get Loop Info & Dependence Anaysis info 00157 LoopInfo &LI = getAnalysis<LoopInfo>(); 00158 DependenceAnalyzer &DA = getAnalysis<DependenceAnalyzer>(); 00159 00160 //Worklist of superblocks 00161 std::vector<std::vector<const MachineBasicBlock*> > Worklist; 00162 FindSuperBlocks(F, LI, Worklist); 00163 00164 DEBUG(if(Worklist.size() == 0) std::cerr << "No superblocks in function to ModuloSchedule\n"); 00165 00166 //Loop over worklist and ModuloSchedule each SuperBlock 00167 for(std::vector<std::vector<const MachineBasicBlock*> >::iterator SB = Worklist.begin(), 00168 SBE = Worklist.end(); SB != SBE; ++SB) { 00169 00170 //Print out Superblock 00171 DEBUG(std::cerr << "ModuloScheduling SB: \n"; 00172 for(std::vector<const MachineBasicBlock*>::const_iterator BI = SB->begin(), 00173 BE = SB->end(); BI != BE; ++BI) { 00174 (*BI)->print(std::cerr);}); 00175 00176 if(!CreateDefMap(*SB)) { 00177 defaultInst = 0; 00178 defMap.clear(); 00179 continue; 00180 } 00181 00182 MSchedGraphSB *MSG = new MSchedGraphSB(*SB, target, indVarInstrs[*SB], DA, 00183 machineTollvm[*SB]); 00184 00185 //Write Graph out to file 00186 DEBUG(WriteGraphToFileSB(std::cerr, F.getName(), MSG)); 00187 00188 //Calculate Resource II 00189 int ResMII = calculateResMII(*SB); 00190 00191 //Calculate Recurrence II 00192 int RecMII = calculateRecMII(MSG, ResMII); 00193 00194 DEBUG(std::cerr << "Number of reccurrences found: " << recurrenceList.size() << "\n"); 00195 00196 //Our starting initiation interval is the maximum of RecMII and ResMII 00197 if(RecMII < ResMII) 00198 ++SBRecurrenceConstraint; 00199 else 00200 ++SBResourceConstraint; 00201 00202 II = std::max(RecMII, ResMII); 00203 int mII = II; 00204 00205 00206 //Print out II, RecMII, and ResMII 00207 DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n"); 00208 00209 //Calculate Node Properties 00210 calculateNodeAttributes(MSG, ResMII); 00211 00212 //Dump node properties if in debug mode 00213 DEBUG(for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), 00214 E = nodeToAttributesMap.end(); I !=E; ++I) { 00215 std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 00216 << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 00217 << " Height: " << I->second.height << "\n"; 00218 }); 00219 00220 00221 //Put nodes in order to schedule them 00222 computePartialOrder(); 00223 00224 //Dump out partial order 00225 DEBUG(for(std::vector<std::set<MSchedGraphSBNode*> >::iterator I = partialOrder.begin(), 00226 E = partialOrder.end(); I !=E; ++I) { 00227 std::cerr << "Start set in PO\n"; 00228 for(std::set<MSchedGraphSBNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J) 00229 std::cerr << "PO:" << **J << "\n"; 00230 }); 00231 00232 //Place nodes in final order 00233 orderNodes(); 00234 00235 //Dump out order of nodes 00236 DEBUG(for(std::vector<MSchedGraphSBNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) { 00237 std::cerr << "FO:" << **I << "\n"; 00238 }); 00239 00240 00241 //Finally schedule nodes 00242 bool haveSched = computeSchedule(*SB, MSG); 00243 00244 //Print out final schedule 00245 DEBUG(schedule.print(std::cerr)); 00246 00247 //Final scheduling step is to reconstruct the loop only if we actual have 00248 //stage > 0 00249 if(haveSched) { 00250 //schedule.printSchedule(std::cerr); 00251 reconstructLoop(*SB); 00252 ++SBMSLoops; 00253 //Changed = true; 00254 SBIISum += mII; 00255 SBFinalIISum += II; 00256 00257 if(schedule.getMaxStage() == 0) 00258 ++SBSameStage; 00259 } 00260 else 00261 ++SBNoSched; 00262 00263 //Clear out our maps for the next basic block that is processed 00264 nodeToAttributesMap.clear(); 00265 partialOrder.clear(); 00266 recurrenceList.clear(); 00267 FinalNodeOrder.clear(); 00268 schedule.clear(); 00269 defMap.clear(); 00270 00271 } 00272 return Changed; 00273 } 00274 00275 void ModuloSchedulingSBPass::FindSuperBlocks(Function &F, LoopInfo &LI, 00276 std::vector<std::vector<const MachineBasicBlock*> > &Worklist) { 00277 00278 //Get MachineFunction 00279 MachineFunction &MF = MachineFunction::get(&F); 00280 00281 //Map of LLVM BB to machine BB 00282 std::map<BasicBlock*, MachineBasicBlock*> bbMap; 00283 00284 for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI) { 00285 BasicBlock *llvmBB = (BasicBlock*) BI->getBasicBlock(); 00286 assert(!bbMap.count(llvmBB) && "LLVM BB already in map!"); 00287 bbMap[llvmBB] = &*BI; 00288 } 00289 00290 //Iterate over the loops, and find super blocks 00291 for(LoopInfo::iterator LB = LI.begin(), LE = LI.end(); LB != LE; ++LB) { 00292 Loop *L = *LB; 00293 ++NumLoops; 00294 00295 //If loop is not single entry, try the next one 00296 if(!L->getLoopPreheader()) 00297 continue; 00298 00299 //Check size of this loop, we don't want SBB loops 00300 if(L->getBlocks().size() == 1) 00301 continue; 00302 00303 //Check if this loop contains no sub loops 00304 if(L->getSubLoops().size() == 0) { 00305 00306 std::vector<const MachineBasicBlock*> superBlock; 00307 00308 //Get Loop Headers 00309 BasicBlock *header = L->getHeader(); 00310 00311 //Follow the header and make sure each BB only has one entry and is valid 00312 BasicBlock *current = header; 00313 assert(bbMap.count(current) && "LLVM BB must have corresponding Machine BB\n"); 00314 MachineBasicBlock *currentMBB = bbMap[header]; 00315 bool done = false; 00316 bool success = true; 00317 unsigned offset = 0; 00318 std::map<const MachineInstr*, unsigned> indexMap; 00319 00320 while(!done) { 00321 //Loop over successors of this BB, they should be in the 00322 //loop block and be valid 00323 BasicBlock *next = 0; 00324 for(succ_iterator I = succ_begin(current), E = succ_end(current); 00325 I != E; ++I) { 00326 if(L->contains(*I)) { 00327 if(!next) 00328 next = *I; 00329 else { 00330 done = true; 00331 success = false; 00332 break; 00333 } 00334 } 00335 } 00336 00337 if(success) { 00338 superBlock.push_back(currentMBB); 00339 if(next == header) 00340 done = true; 00341 else if(!next->getSinglePredecessor()) { 00342 done = true; 00343 success = false; 00344 } 00345 else { 00346 //Check that the next BB only has one entry 00347 current = next; 00348 assert(bbMap.count(current) && "LLVM BB must have corresponding Machine BB"); 00349 currentMBB = bbMap[current]; 00350 } 00351 } 00352 } 00353 00354 00355 00356 00357 00358 if(success) { 00359 ++NumSB; 00360 00361 //Loop over all the blocks in the superblock 00362 for(std::vector<const MachineBasicBlock*>::iterator currentMBB = superBlock.begin(), MBBEnd = superBlock.end(); currentMBB != MBBEnd; ++currentMBB) { 00363 if(!MachineBBisValid(*currentMBB, indexMap, offset)) { 00364 success = false; 00365 break; 00366 } 00367 } 00368 } 00369 00370 if(success) { 00371 if(getIndVar(superBlock, bbMap, indexMap)) { 00372 ++SBValid; 00373 Worklist.push_back(superBlock); 00374 SBSize += superBlock.size(); 00375 } 00376 else 00377 ++SBInvalid; 00378 } 00379 } 00380 } 00381 } 00382 00383 00384 bool ModuloSchedulingSBPass::getIndVar(std::vector<const MachineBasicBlock*> &superBlock, std::map<BasicBlock*, MachineBasicBlock*> &bbMap, 00385 std::map<const MachineInstr*, unsigned> &indexMap) { 00386 //See if we can get induction var instructions 00387 std::set<const BasicBlock*> llvmSuperBlock; 00388 00389 for(unsigned i =0; i < superBlock.size(); ++i) 00390 llvmSuperBlock.insert(superBlock[i]->getBasicBlock()); 00391 00392 //Get Target machine instruction info 00393 const TargetInstrInfo *TMI = target.getInstrInfo(); 00394 00395 //Get the loop back branch 00396 BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) (superBlock[superBlock.size()-1])->getBasicBlock())->getTerminator()); 00397 std::set<Instruction*> indVar; 00398 00399 if(b->isConditional()) { 00400 //Get the condition for the branch 00401 Value *cond = b->getCondition(); 00402 00403 DEBUG(std::cerr << "Condition: " << *cond << "\n"); 00404 00405 //List of instructions associated with induction variable 00406 std::vector<Instruction*> stack; 00407 00408 //Add branch 00409 indVar.insert(b); 00410 00411 if(Instruction *I = dyn_cast<Instruction>(cond)) 00412 if(bbMap.count(I->getParent())) { 00413 if (!assocIndVar(I, indVar, stack, bbMap, superBlock[(superBlock.size()-1)]->getBasicBlock(), llvmSuperBlock)) 00414 return false; 00415 } 00416 else 00417 return false; 00418 else 00419 return false; 00420 } 00421 else { 00422 indVar.insert(b); 00423 } 00424 00425 //Dump out instructions associate with indvar for debug reasons 00426 DEBUG(for(std::set<Instruction*>::iterator N = indVar.begin(), NE = indVar.end(); 00427 N != NE; ++N) { 00428 std::cerr << **N << "\n"; 00429 }); 00430 00431 //Create map of machine instr to llvm instr 00432 std::map<MachineInstr*, Instruction*> mllvm; 00433 for(std::vector<const MachineBasicBlock*>::iterator MBB = superBlock.begin(), MBE = superBlock.end(); MBB != MBE; ++MBB) { 00434 BasicBlock *BB = (BasicBlock*) (*MBB)->getBasicBlock(); 00435 for(BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 00436 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(I); 00437 for (unsigned j = 0; j < tempMvec.size(); j++) { 00438 mllvm[tempMvec[j]] = I; 00439 } 00440 } 00441 } 00442 00443 //Convert list of LLVM Instructions to list of Machine instructions 00444 std::map<const MachineInstr*, unsigned> mIndVar; 00445 for(std::set<Instruction*>::iterator N = indVar.begin(), 00446 NE = indVar.end(); N != NE; ++N) { 00447 00448 //If we have a load, we can't handle this loop because 00449 //there is no way to preserve dependences between loads 00450 //and stores 00451 if(isa<LoadInst>(*N)) 00452 return false; 00453 00454 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(*N); 00455 for (unsigned j = 0; j < tempMvec.size(); j++) { 00456 MachineOpCode OC = (tempMvec[j])->getOpcode(); 00457 if(TMI->isNop(OC)) 00458 continue; 00459 if(!indexMap.count(tempMvec[j])) 00460 continue; 00461 mIndVar[(MachineInstr*) tempMvec[j]] = indexMap[(MachineInstr*) tempMvec[j]]; 00462 DEBUG(std::cerr << *(tempMvec[j]) << " at index " << indexMap[(MachineInstr*) tempMvec[j]] << "\n"); 00463 } 00464 } 00465 00466 //Put into a map for future access 00467 indVarInstrs[superBlock] = mIndVar; 00468 machineTollvm[superBlock] = mllvm; 00469 00470 return true; 00471 00472 } 00473 00474 bool ModuloSchedulingSBPass::assocIndVar(Instruction *I, 00475 std::set<Instruction*> &indVar, 00476 std::vector<Instruction*> &stack, 00477 std::map<BasicBlock*, MachineBasicBlock*> &bbMap, 00478 const BasicBlock *last, std::set<const BasicBlock*> &llvmSuperBlock) { 00479 00480 stack.push_back(I); 00481 00482 //If this is a phi node, check if its the canonical indvar 00483 if(PHINode *PN = dyn_cast<PHINode>(I)) { 00484 if(llvmSuperBlock.count(PN->getParent())) { 00485 if (Instruction *Inc = 00486 dyn_cast<Instruction>(PN->getIncomingValueForBlock(last))) 00487 if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN) 00488 if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) 00489 if (CI->equalsInt(1)) { 00490 //We have found the indvar, so add the stack, and inc instruction to the set 00491 indVar.insert(stack.begin(), stack.end()); 00492 indVar.insert(Inc); 00493 stack.pop_back(); 00494 return true; 00495 } 00496 return false; 00497 } 00498 } 00499 else { 00500 //Loop over each of the instructions operands, check if they are an instruction and in this BB 00501 for(unsigned i = 0; i < I->getNumOperands(); ++i) { 00502 if(Instruction *N = dyn_cast<Instruction>(I->getOperand(i))) { 00503 if(bbMap.count(N->getParent())) 00504 if(!assocIndVar(N, indVar, stack, bbMap, last, llvmSuperBlock)) 00505 return false; 00506 } 00507 } 00508 } 00509 00510 stack.pop_back(); 00511 return true; 00512 } 00513 00514 00515 /// This function checks if a Machine Basic Block is valid for modulo 00516 /// scheduling. This means that it has no control flow (if/else or 00517 /// calls) in the block. Currently ModuloScheduling only works on 00518 /// single basic block loops. 00519 bool ModuloSchedulingSBPass::MachineBBisValid(const MachineBasicBlock *BI, 00520 std::map<const MachineInstr*, unsigned> &indexMap, 00521 unsigned &offset) { 00522 00523 //Check size of our basic block.. make sure we have more then just the terminator in it 00524 if(BI->getBasicBlock()->size() == 1) 00525 return false; 00526 00527 //Get Target machine instruction info 00528 const TargetInstrInfo *TMI = target.getInstrInfo(); 00529 00530 unsigned count = 0; 00531 for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { 00532 //Get opcode to check instruction type 00533 MachineOpCode OC = I->getOpcode(); 00534 00535 //Look for calls 00536 if(TMI->isCall(OC)) { 00537 ++BBWithCalls; 00538 return false; 00539 } 00540 00541 //Look for conditional move 00542 if(OC == V9::MOVRZr || OC == V9::MOVRZi || OC == V9::MOVRLEZr || OC == V9::MOVRLEZi 00543 || OC == V9::MOVRLZr || OC == V9::MOVRLZi || OC == V9::MOVRNZr || OC == V9::MOVRNZi 00544 || OC == V9::MOVRGZr || OC == V9::MOVRGZi || OC == V9::MOVRGEZr 00545 || OC == V9::MOVRGEZi || OC == V9::MOVLEr || OC == V9::MOVLEi || OC == V9::MOVLEUr 00546 || OC == V9::MOVLEUi || OC == V9::MOVFLEr || OC == V9::MOVFLEi 00547 || OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi 00548 || OC == V9::MOVFNEr || OC == V9::MOVFNEi) { 00549 ++BBWithCondMov; 00550 return false; 00551 } 00552 00553 indexMap[I] = count + offset; 00554 00555 if(TMI->isNop(OC)) 00556 continue; 00557 00558 ++count; 00559 } 00560 00561 offset += count; 00562 00563 return true; 00564 } 00565 } 00566 00567 bool ModuloSchedulingSBPass::CreateDefMap(std::vector<const MachineBasicBlock*> &SB) { 00568 defaultInst = 0; 00569 00570 for(std::vector<const MachineBasicBlock*>::iterator BI = SB.begin(), 00571 BE = SB.end(); BI != BE; ++BI) { 00572 00573 for(MachineBasicBlock::const_iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) { 00574 for(unsigned opNum = 0; opNum < I->getNumOperands(); ++opNum) { 00575 const MachineOperand &mOp = I->getOperand(opNum); 00576 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 00577 Value *V = mOp.getVRegValue(); 00578 //assert if this is the second def we have seen 00579 if(defMap.count(V) && isa<PHINode>(V)) 00580 DEBUG(std::cerr << "FIXME: Dup def for phi!\n"); 00581 else { 00582 //assert(!defMap.count(V) && "Def already in the map"); 00583 if(defMap.count(V)) 00584 return false; 00585 defMap[V] = (MachineInstr*) &*I; 00586 } 00587 } 00588 00589 //See if we can use this Value* as our defaultInst 00590 if(!defaultInst && mOp.getType() == MachineOperand::MO_VirtualRegister) { 00591 Value *V = mOp.getVRegValue(); 00592 if(!isa<TmpInstruction>(V) && !isa<Argument>(V) && !isa<Constant>(V) && !isa<PHINode>(V)) 00593 defaultInst = (Instruction*) V; 00594 } 00595 } 00596 } 00597 } 00598 00599 if(!defaultInst) 00600 return false; 00601 00602 return true; 00603 00604 } 00605 00606 00607 //ResMII is calculated by determining the usage count for each resource 00608 //and using the maximum. 00609 //FIXME: In future there should be a way to get alternative resources 00610 //for each instruction 00611 int ModuloSchedulingSBPass::calculateResMII(std::vector<const MachineBasicBlock*> &superBlock) { 00612 00613 TIME_REGION(X, "calculateResMII"); 00614 00615 const TargetInstrInfo *mii = target.getInstrInfo(); 00616 const TargetSchedInfo *msi = target.getSchedInfo(); 00617 00618 int ResMII = 0; 00619 00620 //Map to keep track of usage count of each resource 00621 std::map<unsigned, unsigned> resourceUsageCount; 00622 00623 for(std::vector<const MachineBasicBlock*>::iterator BI = superBlock.begin(), BE = superBlock.end(); BI != BE; ++BI) { 00624 for(MachineBasicBlock::const_iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) { 00625 00626 //Get resource usage for this instruction 00627 InstrRUsage rUsage = msi->getInstrRUsage(I->getOpcode()); 00628 std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle; 00629 00630 //Loop over resources in each cycle and increments their usage count 00631 for(unsigned i=0; i < resources.size(); ++i) 00632 for(unsigned j=0; j < resources[i].size(); ++j) { 00633 if(!resourceUsageCount.count(resources[i][j])) { 00634 resourceUsageCount[resources[i][j]] = 1; 00635 } 00636 else { 00637 resourceUsageCount[resources[i][j]] = resourceUsageCount[resources[i][j]] + 1; 00638 } 00639 } 00640 } 00641 } 00642 00643 //Find maximum usage count 00644 00645 //Get max number of instructions that can be issued at once. (FIXME) 00646 int issueSlots = msi->maxNumIssueTotal; 00647 00648 for(std::map<unsigned,unsigned>::iterator RB = resourceUsageCount.begin(), RE = resourceUsageCount.end(); RB != RE; ++RB) { 00649 00650 //Get the total number of the resources in our cpu 00651 int resourceNum = CPUResource::getCPUResource(RB->first)->maxNumUsers; 00652 00653 //Get total usage count for this resources 00654 unsigned usageCount = RB->second; 00655 00656 //Divide the usage count by either the max number we can issue or the number of 00657 //resources (whichever is its upper bound) 00658 double finalUsageCount; 00659 DEBUG(std::cerr << "Resource Num: " << RB->first << " Usage: " << usageCount << " TotalNum: " << resourceNum << "\n"); 00660 00661 if( resourceNum <= issueSlots) 00662 finalUsageCount = ceil(1.0 * usageCount / resourceNum); 00663 else 00664 finalUsageCount = ceil(1.0 * usageCount / issueSlots); 00665 00666 00667 //Only keep track of the max 00668 ResMII = std::max( (int) finalUsageCount, ResMII); 00669 00670 } 00671 00672 return ResMII; 00673 00674 } 00675 00676 /// calculateRecMII - Calculates the value of the highest recurrence 00677 /// By value we mean the total latency/distance 00678 int ModuloSchedulingSBPass::calculateRecMII(MSchedGraphSB *graph, int MII) { 00679 00680 TIME_REGION(X, "calculateRecMII"); 00681 00682 findAllCircuits(graph, MII); 00683 int RecMII = 0; 00684 00685 for(std::set<std::pair<int, std::vector<MSchedGraphSBNode*> > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) { 00686 RecMII = std::max(RecMII, I->first); 00687 } 00688 00689 return MII; 00690 } 00691 00692 int CircCountSB; 00693 00694 void ModuloSchedulingSBPass::unblock(MSchedGraphSBNode *u, std::set<MSchedGraphSBNode*> &blocked, 00695 std::map<MSchedGraphSBNode*, std::set<MSchedGraphSBNode*> > &B) { 00696 00697 //Unblock u 00698 DEBUG(std::cerr << "Unblocking: " << *u << "\n"); 00699 blocked.erase(u); 00700 00701 //std::set<MSchedGraphSBNode*> toErase; 00702 while (!B[u].empty()) { 00703 MSchedGraphSBNode *W = *B[u].begin(); 00704 B[u].erase(W); 00705 //toErase.insert(*W); 00706 DEBUG(std::cerr << "Removed: " << *W << "from B-List\n"); 00707 if(blocked.count(W)) 00708 unblock(W, blocked, B); 00709 } 00710 00711 } 00712 00713 void ModuloSchedulingSBPass::addSCC(std::vector<MSchedGraphSBNode*> &SCC, std::map<MSchedGraphSBNode*, MSchedGraphSBNode*> &newNodes) { 00714 00715 int totalDelay = 0; 00716 int totalDistance = 0; 00717 std::vector<MSchedGraphSBNode*> recc; 00718 MSchedGraphSBNode *start = 0; 00719 MSchedGraphSBNode *end = 0; 00720 00721 //Loop over recurrence, get delay and distance 00722 for(std::vector<MSchedGraphSBNode*>::iterator N = SCC.begin(), NE = SCC.end(); N != NE; ++N) { 00723 DEBUG(std::cerr << **N << "\n"); 00724 totalDelay += (*N)->getLatency(); 00725 00726 for(unsigned i = 0; i < (*N)->succ_size(); ++i) { 00727 MSchedGraphSBEdge *edge = (*N)->getSuccessor(i); 00728 if(find(SCC.begin(), SCC.end(), edge->getDest()) != SCC.end()) { 00729 totalDistance += edge->getIteDiff(); 00730 if(edge->getIteDiff() > 0) 00731 if(!start && !end) { 00732 start = *N; 00733 end = edge->getDest(); 00734 } 00735 00736 } 00737 } 00738 00739 00740 //Get the original node 00741 recc.push_back(newNodes[*N]); 00742 00743 00744 } 00745 00746 DEBUG(std::cerr << "End Recc\n"); 00747 00748 00749 assert( (start && end) && "Must have start and end node to ignore edge for SCC"); 00750 00751 if(start && end) { 00752 //Insert reccurrence into the list 00753 DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n"); 00754 edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start]))); 00755 } 00756 00757 int lastII = totalDelay / totalDistance; 00758 00759 00760 recurrenceList.insert(std::make_pair(lastII, recc)); 00761 00762 } 00763 00764 bool ModuloSchedulingSBPass::circuit(MSchedGraphSBNode *v, std::vector<MSchedGraphSBNode*> &stack, 00765 std::set<MSchedGraphSBNode*> &blocked, std::vector<MSchedGraphSBNode*> &SCC, 00766 MSchedGraphSBNode *s, std::map<MSchedGraphSBNode*, std::set<MSchedGraphSBNode*> > &B, 00767 int II, std::map<MSchedGraphSBNode*, MSchedGraphSBNode*> &newNodes) { 00768 bool f = false; 00769 00770 DEBUG(std::cerr << "Finding Circuits Starting with: ( " << v << ")"<< *v << "\n"); 00771 00772 //Push node onto the stack 00773 stack.push_back(v); 00774 00775 //block this node 00776 blocked.insert(v); 00777 00778 //Loop over all successors of node v that are in the scc, create Adjaceny list 00779 std::set<MSchedGraphSBNode*> AkV; 00780 for(MSchedGraphSBNode::succ_iterator I = v->succ_begin(), E = v->succ_end(); I != E; ++I) { 00781 if((std::find(SCC.begin(), SCC.end(), *I) != SCC.end())) { 00782 AkV.insert(*I); 00783 } 00784 } 00785 00786 for(std::set<MSchedGraphSBNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) { 00787 if(*I == s) { 00788 //We have a circuit, so add it to our list 00789 addRecc(stack, newNodes); 00790 f = true; 00791 } 00792 else if(!blocked.count(*I)) { 00793 if(circuit(*I, stack, blocked, SCC, s, B, II, newNodes)) 00794 f = true; 00795 } 00796 else 00797 DEBUG(std::cerr << "Blocked: " << **I << "\n"); 00798 } 00799 00800 00801 if(f) { 00802 unblock(v, blocked, B); 00803 } 00804 else { 00805 for(std::set<MSchedGraphSBNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) 00806 B[*I].insert(v); 00807 00808 } 00809 00810 //Pop v 00811 stack.pop_back(); 00812 00813 return f; 00814 00815 } 00816 00817 void ModuloSchedulingSBPass::addRecc(std::vector<MSchedGraphSBNode*> &stack, std::map<MSchedGraphSBNode*, MSchedGraphSBNode*> &newNodes) { 00818 std::vector<MSchedGraphSBNode*> recc; 00819 //Dump recurrence for now 00820 DEBUG(std::cerr << "Starting Recc\n"); 00821 00822 int totalDelay = 0; 00823 int totalDistance = 0; 00824 MSchedGraphSBNode *lastN = 0; 00825 MSchedGraphSBNode *start = 0; 00826 MSchedGraphSBNode *end = 0; 00827 00828 //Loop over recurrence, get delay and distance 00829 for(std::vector<MSchedGraphSBNode*>::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) { 00830 DEBUG(std::cerr << **N << "\n"); 00831 totalDelay += (*N)->getLatency(); 00832 if(lastN) { 00833 int iteDiff = (*N)->getInEdge(lastN).getIteDiff(); 00834 totalDistance += iteDiff; 00835 00836 if(iteDiff > 0) { 00837 start = lastN; 00838 end = *N; 00839 } 00840 } 00841 //Get the original node 00842 lastN = *N; 00843 recc.push_back(newNodes[*N]); 00844 00845 00846 } 00847 00848 //Get the loop edge 00849 totalDistance += lastN->getIteDiff(*stack.begin()); 00850 00851 DEBUG(std::cerr << "End Recc\n"); 00852 CircCountSB++; 00853 00854 if(start && end) { 00855 //Insert reccurrence into the list 00856 DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n"); 00857 edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start]))); 00858 } 00859 else { 00860 //Insert reccurrence into the list 00861 DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n"); 00862 edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN]))); 00863 00864 } 00865 //Adjust II until we get close to the inequality delay - II*distance <= 0 00866 int RecMII = II; //Starting value 00867 int value = totalDelay-(RecMII * totalDistance); 00868 int lastII = II; 00869 while(value < 0) { 00870 00871 lastII = RecMII; 00872 RecMII--; 00873 value = totalDelay-(RecMII * totalDistance); 00874 } 00875 00876 recurrenceList.insert(std::make_pair(lastII, recc)); 00877 00878 } 00879 00880 00881 void ModuloSchedulingSBPass::findAllCircuits(MSchedGraphSB *g, int II) { 00882 00883 CircCountSB = 0; 00884 00885 //Keep old to new node mapping information 00886 std::map<MSchedGraphSBNode*, MSchedGraphSBNode*> newNodes; 00887 00888 //copy the graph 00889 MSchedGraphSB *MSG = new MSchedGraphSB(*g, newNodes); 00890 00891 DEBUG(std::cerr << "Finding All Circuits\n"); 00892 00893 //Set of blocked nodes 00894 std::set<MSchedGraphSBNode*> blocked; 00895 00896 //Stack holding current circuit 00897 std::vector<MSchedGraphSBNode*> stack; 00898 00899 //Map for B Lists 00900 std::map<MSchedGraphSBNode*, std::set<MSchedGraphSBNode*> > B; 00901 00902 //current node 00903 MSchedGraphSBNode *s; 00904 00905 00906 //Iterate over the graph until its down to one node or empty 00907 while(MSG->size() > 1) { 00908 00909 //Write Graph out to file 00910 //WriteGraphToFile(std::cerr, "Graph" + utostr(MSG->size()), MSG); 00911 00912 DEBUG(std::cerr << "Graph Size: " << MSG->size() << "\n"); 00913 DEBUG(std::cerr << "Finding strong component Vk with least vertex\n"); 00914 00915 //Iterate over all the SCCs in the graph 00916 std::set<MSchedGraphSBNode*> Visited; 00917 std::vector<MSchedGraphSBNode*> Vk; 00918 MSchedGraphSBNode* s = 0; 00919 int numEdges = 0; 00920 00921 //Find scc with the least vertex 00922 for (MSchedGraphSB::iterator GI = MSG->begin(), E = MSG->end(); GI != E; ++GI) 00923 if (Visited.insert(GI->second).second) { 00924 for (scc_iterator<MSchedGraphSBNode*> SCCI = scc_begin(GI->second), 00925 E = scc_end(GI->second); SCCI != E; ++SCCI) { 00926 std::vector<MSchedGraphSBNode*> &nextSCC = *SCCI; 00927 00928 if (Visited.insert(nextSCC[0]).second) { 00929 Visited.insert(nextSCC.begin()+1, nextSCC.end()); 00930 00931 if(nextSCC.size() > 1) { 00932 DEBUG(std::cerr << "SCC size: " << nextSCC.size() << "\n"); 00933 00934 for(unsigned i = 0; i < nextSCC.size(); ++i) { 00935 //Loop over successor and see if in scc, then count edge 00936 MSchedGraphSBNode *node = nextSCC[i]; 00937 for(MSchedGraphSBNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE; ++S) { 00938 if(find(nextSCC.begin(), nextSCC.end(), *S) != nextSCC.end()) 00939 numEdges++; 00940 } 00941 } 00942 DEBUG(std::cerr << "Num Edges: " << numEdges << "\n"); 00943 } 00944 00945 //Ignore self loops 00946 if(nextSCC.size() > 1) { 00947 00948 //Get least vertex in Vk 00949 if(!s) { 00950 s = nextSCC[0]; 00951 Vk = nextSCC; 00952 } 00953 00954 for(unsigned i = 0; i < nextSCC.size(); ++i) { 00955 if(nextSCC[i] < s) { 00956 s = nextSCC[i]; 00957 Vk = nextSCC; 00958 } 00959 } 00960 } 00961 } 00962 } 00963 } 00964 00965 00966 00967 //Process SCC 00968 DEBUG(for(std::vector<MSchedGraphSBNode*>::iterator N = Vk.begin(), NE = Vk.end(); 00969 N != NE; ++N) { std::cerr << *((*N)->getInst()); }); 00970 00971 //Iterate over all nodes in this scc 00972 for(std::vector<MSchedGraphSBNode*>::iterator N = Vk.begin(), NE = Vk.end(); 00973 N != NE; ++N) { 00974 blocked.erase(*N); 00975 B[*N].clear(); 00976 } 00977 if(Vk.size() > 1) { 00978 if(numEdges < 98) 00979 circuit(s, stack, blocked, Vk, s, B, II, newNodes); 00980 else 00981 addSCC(Vk, newNodes); 00982 00983 00984 //Delete nodes from the graph 00985 //Find all nodes up to s and delete them 00986 std::vector<MSchedGraphSBNode*> nodesToRemove; 00987 nodesToRemove.push_back(s); 00988 for(MSchedGraphSB::iterator N = MSG->begin(), NE = MSG->end(); N != NE; ++N) { 00989 if(N->second < s ) 00990 nodesToRemove.push_back(N->second); 00991 } 00992 for(std::vector<MSchedGraphSBNode*>::iterator N = nodesToRemove.begin(), NE = nodesToRemove.end(); N != NE; ++N) { 00993 DEBUG(std::cerr << "Deleting Node: " << **N << "\n"); 00994 MSG->deleteNode(*N); 00995 } 00996 } 00997 else 00998 break; 00999 } 01000 DEBUG(std::cerr << "Num Circuits found: " << CircCountSB << "\n"); 01001 } 01002 /// calculateNodeAttributes - The following properties are calculated for 01003 /// each node in the dependence graph: ASAP, ALAP, Depth, Height, and 01004 /// MOB. 01005 void ModuloSchedulingSBPass::calculateNodeAttributes(MSchedGraphSB *graph, int MII) { 01006 01007 TIME_REGION(X, "calculateNodeAttributes"); 01008 01009 assert(nodeToAttributesMap.empty() && "Node attribute map was not cleared"); 01010 01011 //Loop over the nodes and add them to the map 01012 for(MSchedGraphSB::iterator I = graph->begin(), E = graph->end(); I != E; ++I) { 01013 01014 DEBUG(std::cerr << "Inserting node into attribute map: " << *I->second << "\n"); 01015 01016 //Assert if its already in the map 01017 assert(nodeToAttributesMap.count(I->second) == 0 && 01018 "Node attributes are already in the map"); 01019 01020 //Put into the map with default attribute values 01021 nodeToAttributesMap[I->second] = MSNodeSBAttributes(); 01022 } 01023 01024 //Create set to deal with reccurrences 01025 std::set<MSchedGraphSBNode*> visitedNodes; 01026 01027 //Now Loop over map and calculate the node attributes 01028 for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 01029 calculateASAP(I->first, MII, (MSchedGraphSBNode*) 0); 01030 visitedNodes.clear(); 01031 } 01032 01033 int maxASAP = findMaxASAP(); 01034 //Calculate ALAP which depends on ASAP being totally calculated 01035 for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 01036 calculateALAP(I->first, MII, maxASAP, (MSchedGraphSBNode*) 0); 01037 visitedNodes.clear(); 01038 } 01039 01040 //Calculate MOB which depends on ASAP being totally calculated, also do depth and height 01041 for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) { 01042 (I->second).MOB = std::max(0,(I->second).ALAP - (I->second).ASAP); 01043 01044 DEBUG(std::cerr << "MOB: " << (I->second).MOB << " (" << *(I->first) << ")\n"); 01045 calculateDepth(I->first, (MSchedGraphSBNode*) 0); 01046 calculateHeight(I->first, (MSchedGraphSBNode*) 0); 01047 } 01048 01049 01050 } 01051 01052 /// ignoreEdge - Checks to see if this edge of a recurrence should be ignored or not 01053 bool ModuloSchedulingSBPass::ignoreEdge(MSchedGraphSBNode *srcNode, MSchedGraphSBNode *destNode) { 01054 if(destNode == 0 || srcNode ==0) 01055 return false; 01056 01057 bool findEdge = edgesToIgnore.count(std::make_pair(srcNode, destNode->getInEdgeNum(srcNode))); 01058 01059 DEBUG(std::cerr << "Ignoring edge? from: " << *srcNode << " to " << *destNode << "\n"); 01060 01061 return findEdge; 01062 } 01063 01064 01065 /// calculateASAP - Calculates the 01066 int ModuloSchedulingSBPass::calculateASAP(MSchedGraphSBNode *node, int MII, MSchedGraphSBNode *destNode) { 01067 01068 DEBUG(std::cerr << "Calculating ASAP for " << *node << "\n"); 01069 01070 //Get current node attributes 01071 MSNodeSBAttributes &attributes = nodeToAttributesMap.find(node)->second; 01072 01073 if(attributes.ASAP != -1) 01074 return attributes.ASAP; 01075 01076 int maxPredValue = 0; 01077 01078 //Iterate over all of the predecessors and find max 01079 for(MSchedGraphSBNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) { 01080 01081 //Only process if we are not ignoring the edge 01082 if(!ignoreEdge(*P, node)) { 01083 int predASAP = -1; 01084 predASAP = calculateASAP(*P, MII, node); 01085 01086 assert(predASAP != -1 && "ASAP has not been calculated"); 01087 int iteDiff = node->getInEdge(*P).getIteDiff(); 01088 01089 int currentPredValue = predASAP + (*P)->getLatency() - (iteDiff * MII); 01090 DEBUG(std::cerr << "pred ASAP: " << predASAP << ", iteDiff: " << iteDiff << ", PredLatency: " << (*P)->getLatency() << ", Current ASAP pred: " << currentPredValue << "\n"); 01091 maxPredValue = std::max(maxPredValue, currentPredValue); 01092 } 01093 } 01094 01095 attributes.ASAP = maxPredValue; 01096 01097 DEBUG(std::cerr << "ASAP: " << attributes.ASAP << " (" << *node << ")\n"); 01098 01099 return maxPredValue; 01100 } 01101 01102 01103 int ModuloSchedulingSBPass::calculateALAP(MSchedGraphSBNode *node, int MII, 01104 int maxASAP, MSchedGraphSBNode *srcNode) { 01105 01106 DEBUG(std::cerr << "Calculating ALAP for " << *node << "\n"); 01107 01108 MSNodeSBAttributes &attributes = nodeToAttributesMap.find(node)->second; 01109 01110 if(attributes.ALAP != -1) 01111 return attributes.ALAP; 01112 01113 if(node->hasSuccessors()) { 01114 01115 //Trying to deal with the issue where the node has successors, but 01116 //we are ignoring all of the edges to them. So this is my hack for 01117 //now.. there is probably a more elegant way of doing this (FIXME) 01118 bool processedOneEdge = false; 01119 01120 //FIXME, set to something high to start 01121 int minSuccValue = 9999999; 01122 01123 //Iterate over all of the predecessors and fine max 01124 for(MSchedGraphSBNode::succ_iterator P = node->succ_begin(), 01125 E = node->succ_end(); P != E; ++P) { 01126 01127 //Only process if we are not ignoring the edge 01128 if(!ignoreEdge(node, *P)) { 01129 processedOneEdge = true; 01130 int succALAP = -1; 01131 succALAP = calculateALAP(*P, MII, maxASAP, node); 01132 01133 assert(succALAP != -1 && "Successors ALAP should have been caclulated"); 01134 01135 int iteDiff = P.getEdge().getIteDiff(); 01136 01137 int currentSuccValue = succALAP - node->getLatency() + iteDiff * MII; 01138 01139 DEBUG(std::cerr << "succ ALAP: " << succALAP << ", iteDiff: " << iteDiff << ", SuccLatency: " << (*P)->getLatency() << ", Current ALAP succ: " << currentSuccValue << "\n"); 01140 01141 minSuccValue = std::min(minSuccValue, currentSuccValue); 01142 } 01143 } 01144 01145 if(processedOneEdge) 01146 attributes.ALAP = minSuccValue; 01147 01148 else 01149 attributes.ALAP = maxASAP; 01150 } 01151 else 01152 attributes.ALAP = maxASAP; 01153 01154 DEBUG(std::cerr << "ALAP: " << attributes.ALAP << " (" << *node << ")\n"); 01155 01156 if(attributes.ALAP < 0) 01157 attributes.ALAP = 0; 01158 01159 return attributes.ALAP; 01160 } 01161 01162 int ModuloSchedulingSBPass::findMaxASAP() { 01163 int maxASAP = 0; 01164 01165 for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), 01166 E = nodeToAttributesMap.end(); I != E; ++I) 01167 maxASAP = std::max(maxASAP, I->second.ASAP); 01168 return maxASAP; 01169 } 01170 01171 01172 int ModuloSchedulingSBPass::calculateHeight(MSchedGraphSBNode *node,MSchedGraphSBNode *srcNode) { 01173 01174 MSNodeSBAttributes &attributes = nodeToAttributesMap.find(node)->second; 01175 01176 if(attributes.height != -1) 01177 return attributes.height; 01178 01179 int maxHeight = 0; 01180 01181 //Iterate over all of the predecessors and find max 01182 for(MSchedGraphSBNode::succ_iterator P = node->succ_begin(), 01183 E = node->succ_end(); P != E; ++P) { 01184 01185 01186 if(!ignoreEdge(node, *P)) { 01187 int succHeight = calculateHeight(*P, node); 01188 01189 assert(succHeight != -1 && "Successors Height should have been caclulated"); 01190 01191 int currentHeight = succHeight + node->getLatency(); 01192 maxHeight = std::max(maxHeight, currentHeight); 01193 } 01194 } 01195 attributes.height = maxHeight; 01196 DEBUG(std::cerr << "Height: " << attributes.height << " (" << *node << ")\n"); 01197 return maxHeight; 01198 } 01199 01200 01201 int ModuloSchedulingSBPass::calculateDepth(MSchedGraphSBNode *node, 01202 MSchedGraphSBNode *destNode) { 01203 01204 MSNodeSBAttributes &attributes = nodeToAttributesMap.find(node)->second; 01205 01206 if(attributes.depth != -1) 01207 return attributes.depth; 01208 01209 int maxDepth = 0; 01210 01211 //Iterate over all of the predecessors and fine max 01212 for(MSchedGraphSBNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) { 01213 01214 if(!ignoreEdge(*P, node)) { 01215 int predDepth = -1; 01216 predDepth = calculateDepth(*P, node); 01217 01218 assert(predDepth != -1 && "Predecessors ASAP should have been caclulated"); 01219 01220 int currentDepth = predDepth + (*P)->getLatency(); 01221 maxDepth = std::max(maxDepth, currentDepth); 01222 } 01223 } 01224 attributes.depth = maxDepth; 01225 01226 DEBUG(std::cerr << "Depth: " << attributes.depth << " (" << *node << "*)\n"); 01227 return maxDepth; 01228 } 01229 01230 void ModuloSchedulingSBPass::computePartialOrder() { 01231 01232 TIME_REGION(X, "calculatePartialOrder"); 01233 01234 DEBUG(std::cerr << "Computing Partial Order\n"); 01235 01236 //Steps to add a recurrence to the partial order 1) Find reccurrence 01237 //with the highest RecMII. Add it to the partial order. 2) For each 01238 //recurrence with decreasing RecMII, add it to the partial order 01239 //along with any nodes that connect this recurrence to recurrences 01240 //already in the partial order 01241 for(std::set<std::pair<int, std::vector<MSchedGraphSBNode*> > >::reverse_iterator 01242 I = recurrenceList.rbegin(), E=recurrenceList.rend(); I !=E; ++I) { 01243 01244 std::set<MSchedGraphSBNode*> new_recurrence; 01245 01246 //Loop through recurrence and remove any nodes already in the partial order 01247 for(std::vector<MSchedGraphSBNode*>::const_iterator N = I->second.begin(), 01248 NE = I->second.end(); N != NE; ++N) { 01249 01250 bool found = false; 01251 for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(), 01252 PE = partialOrder.end(); PO != PE; ++PO) { 01253 if(PO->count(*N)) 01254 found = true; 01255 } 01256 01257 //Check if its a branch, and remove to handle special 01258 if(!found) { 01259 new_recurrence.insert(*N); 01260 } 01261 01262 } 01263 01264 01265 if(new_recurrence.size() > 0) { 01266 01267 std::vector<MSchedGraphSBNode*> path; 01268 std::set<MSchedGraphSBNode*> nodesToAdd; 01269 01270 //Dump recc we are dealing with (minus nodes already in PO) 01271 DEBUG(std::cerr << "Recc: "); 01272 DEBUG(for(std::set<MSchedGraphSBNode*>::iterator R = new_recurrence.begin(), RE = new_recurrence.end(); R != RE; ++R) { std::cerr << **R ; }); 01273 01274 //Add nodes that connect this recurrence to recurrences in the partial path 01275 for(std::set<MSchedGraphSBNode*>::iterator N = new_recurrence.begin(), 01276 NE = new_recurrence.end(); N != NE; ++N) 01277 searchPath(*N, path, nodesToAdd, new_recurrence); 01278 01279 //Add nodes to this recurrence if they are not already in the partial order 01280 for(std::set<MSchedGraphSBNode*>::iterator N = nodesToAdd.begin(), NE = nodesToAdd.end(); 01281 N != NE; ++N) { 01282 bool found = false; 01283 for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(), 01284 PE = partialOrder.end(); PO != PE; ++PO) { 01285 if(PO->count(*N)) 01286 found = true; 01287 } 01288 if(!found) { 01289 assert("FOUND CONNECTOR"); 01290 new_recurrence.insert(*N); 01291 } 01292 } 01293 01294 partialOrder.push_back(new_recurrence); 01295 } 01296 } 01297 01298 //Add any nodes that are not already in the partial order 01299 //Add them in a set, one set per connected component 01300 std::set<MSchedGraphSBNode*> lastNodes; 01301 std::set<MSchedGraphSBNode*> noPredNodes; 01302 for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), 01303 E = nodeToAttributesMap.end(); I != E; ++I) { 01304 01305 bool found = false; 01306 01307 //Check if its already in our partial order, if not add it to the final vector 01308 for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(), 01309 PE = partialOrder.end(); PO != PE; ++PO) { 01310 if(PO->count(I->first)) 01311 found = true; 01312 } 01313 if(!found) 01314 lastNodes.insert(I->first); 01315 } 01316 01317 //For each node w/out preds, see if there is a path to one of the 01318 //recurrences, and if so add them to that current recc 01319 /*for(std::set<MSchedGraphSBNode*>::iterator N = noPredNodes.begin(), NE = noPredNodes.end(); 01320 N != NE; ++N) { 01321 DEBUG(std::cerr << "No Pred Path from: " << **N << "\n"); 01322 for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(), 01323 PE = partialOrder.end(); PO != PE; ++PO) { 01324 std::vector<MSchedGraphSBNode*> path; 01325 pathToRecc(*N, path, *PO, lastNodes); 01326 } 01327 }*/ 01328 01329 01330 //Break up remaining nodes that are not in the partial order 01331 ///into their connected compoenents 01332 while(lastNodes.size() > 0) { 01333 std::set<MSchedGraphSBNode*> ccSet; 01334 connectedComponentSet(*(lastNodes.begin()),ccSet, lastNodes); 01335 if(ccSet.size() > 0) 01336 partialOrder.push_back(ccSet); 01337 } 01338 01339 } 01340 01341 void ModuloSchedulingSBPass::connectedComponentSet(MSchedGraphSBNode *node, std::set<MSchedGraphSBNode*> &ccSet, std::set<MSchedGraphSBNode*> &lastNodes) { 01342 01343 //Add to final set 01344 if( !ccSet.count(node) && lastNodes.count(node)) { 01345 lastNodes.erase(node); 01346 ccSet.insert(node); 01347 } 01348 else 01349 return; 01350 01351 //Loop over successors and recurse if we have not seen this node before 01352 for(MSchedGraphSBNode::succ_iterator node_succ = node->succ_begin(), end=node->succ_end(); node_succ != end; ++node_succ) { 01353 connectedComponentSet(*node_succ, ccSet, lastNodes); 01354 } 01355 01356 } 01357 01358 void ModuloSchedulingSBPass::searchPath(MSchedGraphSBNode *node, 01359 std::vector<MSchedGraphSBNode*> &path, 01360 std::set<MSchedGraphSBNode*> &nodesToAdd, 01361 std::set<MSchedGraphSBNode*> &new_reccurrence) { 01362 //Push node onto the path 01363 path.push_back(node); 01364 01365 //Loop over all successors and see if there is a path from this node to 01366 //a recurrence in the partial order, if so.. add all nodes to be added to recc 01367 for(MSchedGraphSBNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE; 01368 ++S) { 01369 01370 //Check if we should ignore this edge first 01371 if(ignoreEdge(node,*S)) 01372 continue; 01373 01374 //check if successor is in this recurrence, we will get to it eventually 01375 if(new_reccurrence.count(*S)) 01376 continue; 01377 01378 //If this node exists in a recurrence already in the partial 01379 //order, then add all nodes in the path to the set of nodes to add 01380 //Check if its already in our partial order, if not add it to the 01381 //final vector 01382 bool found = false; 01383 for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(), 01384 PE = partialOrder.end(); PO != PE; ++PO) { 01385 01386 if(PO->count(*S)) { 01387 found = true; 01388 break; 01389 } 01390 } 01391 01392 if(!found) { 01393 nodesToAdd.insert(*S); 01394 searchPath(*S, path, nodesToAdd, new_reccurrence); 01395 } 01396 } 01397 01398 //Pop Node off the path 01399 path.pop_back(); 01400 } 01401 01402 void dumpIntersection(std::set<MSchedGraphSBNode*> &IntersectCurrent) { 01403 std::cerr << "Intersection ("; 01404 for(std::set<MSchedGraphSBNode*>::iterator I = IntersectCurrent.begin(), E = IntersectCurrent.end(); I != E; ++I) 01405 std::cerr << **I << ", "; 01406 std::cerr << ")\n"; 01407 } 01408 01409 void ModuloSchedulingSBPass::orderNodes() { 01410 01411 TIME_REGION(X, "orderNodes"); 01412 01413 int BOTTOM_UP = 0; 01414 int TOP_DOWN = 1; 01415 01416 //Set default order 01417 int order = BOTTOM_UP; 01418 01419 //Loop over and find all pred nodes and schedule them first 01420 /*for(std::vector<std::set<MSchedGraphSBNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) { 01421 for(std::set<MSchedGraphSBNode*>::iterator N = CurrentSet->begin(), NE = CurrentSet->end(); N != NE; ++N) 01422 if((*N)->isPredicate()) { 01423 FinalNodeOrder.push_back(*N); 01424 CurrentSet->erase(*N); 01425 } 01426 }*/ 01427 01428 01429 01430 //Loop over all the sets and place them in the final node order 01431 for(std::vector<std::set<MSchedGraphSBNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) { 01432 01433 DEBUG(std::cerr << "Processing set in S\n"); 01434 DEBUG(dumpIntersection(*CurrentSet)); 01435 01436 //Result of intersection 01437 std::set<MSchedGraphSBNode*> IntersectCurrent; 01438 01439 predIntersect(*CurrentSet, IntersectCurrent); 01440 01441 //If the intersection of predecessor and current set is not empty 01442 //sort nodes bottom up 01443 if(IntersectCurrent.size() != 0) { 01444 DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is NOT empty\n"); 01445 order = BOTTOM_UP; 01446 } 01447 //If empty, use successors 01448 else { 01449 DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is empty\n"); 01450 01451 succIntersect(*CurrentSet, IntersectCurrent); 01452 01453 //sort top-down 01454 if(IntersectCurrent.size() != 0) { 01455 DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is NOT empty\n"); 01456 order = TOP_DOWN; 01457 } 01458 else { 01459 DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is empty\n"); 01460 //Find node with max ASAP in current Set 01461 MSchedGraphSBNode *node; 01462 int maxASAP = 0; 01463 DEBUG(std::cerr << "Using current set of size " << CurrentSet->size() << "to find max ASAP\n"); 01464 for(std::set<MSchedGraphSBNode*>::iterator J = CurrentSet->begin(), JE = CurrentSet->end(); J != JE; ++J) { 01465 //Get node attributes 01466 MSNodeSBAttributes nodeAttr= nodeToAttributesMap.find(*J)->second; 01467 //assert(nodeAttr != nodeToAttributesMap.end() && "Node not in attributes map!"); 01468 01469 if(maxASAP <= nodeAttr.ASAP) { 01470 maxASAP = nodeAttr.ASAP; 01471 node = *J; 01472 } 01473 } 01474 assert(node != 0 && "In node ordering node should not be null"); 01475 IntersectCurrent.insert(node); 01476 order = BOTTOM_UP; 01477 } 01478 } 01479 01480 //Repeat until all nodes are put into the final order from current set 01481 while(IntersectCurrent.size() > 0) { 01482 01483 if(order == TOP_DOWN) { 01484 DEBUG(std::cerr << "Order is TOP DOWN\n"); 01485 01486 while(IntersectCurrent.size() > 0) { 01487 DEBUG(std::cerr << "Intersection is not empty, so find heighest height\n"); 01488 01489 int MOB = 0; 01490 int height = 0; 01491 MSchedGraphSBNode *highestHeightNode = *(IntersectCurrent.begin()); 01492 01493 //Find node in intersection with highest heigh and lowest MOB 01494 for(std::set<MSchedGraphSBNode*>::iterator I = IntersectCurrent.begin(), 01495 E = IntersectCurrent.end(); I != E; ++I) { 01496 01497 //Get current nodes properties 01498 MSNodeSBAttributes nodeAttr= nodeToAttributesMap.find(*I)->second; 01499 01500 if(height < nodeAttr.height) { 01501 highestHeightNode = *I; 01502 height = nodeAttr.height; 01503 MOB = nodeAttr.MOB; 01504 } 01505 else if(height == nodeAttr.height) { 01506 if(MOB > nodeAttr.height) { 01507 highestHeightNode = *I; 01508 height = nodeAttr.height; 01509 MOB = nodeAttr.MOB; 01510 } 01511 } 01512 } 01513 01514 //Append our node with greatest height to the NodeOrder 01515 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestHeightNode) == FinalNodeOrder.end()) { 01516 DEBUG(std::cerr << "Adding node to Final Order: " << *highestHeightNode << "\n"); 01517 FinalNodeOrder.push_back(highestHeightNode); 01518 } 01519 01520 //Remove V from IntersectOrder 01521 IntersectCurrent.erase(std::find(IntersectCurrent.begin(), 01522 IntersectCurrent.end(), highestHeightNode)); 01523 01524 01525 //Intersect V's successors with CurrentSet 01526 for(MSchedGraphSBNode::succ_iterator P = highestHeightNode->succ_begin(), 01527 E = highestHeightNode->succ_end(); P != E; ++P) { 01528 //if(lower_bound(CurrentSet->begin(), 01529 // CurrentSet->end(), *P) != CurrentSet->end()) { 01530 if(std::find(CurrentSet->begin(), CurrentSet->end(), *P) != CurrentSet->end()) { 01531 if(ignoreEdge(highestHeightNode, *P)) 01532 continue; 01533 //If not already in Intersect, add 01534 if(!IntersectCurrent.count(*P)) 01535 IntersectCurrent.insert(*P); 01536 } 01537 } 01538 } //End while loop over Intersect Size 01539 01540 //Change direction 01541 order = BOTTOM_UP; 01542 01543 //Reset Intersect to reflect changes in OrderNodes 01544 IntersectCurrent.clear(); 01545 predIntersect(*CurrentSet, IntersectCurrent); 01546 01547 } //End If TOP_DOWN 01548 01549 //Begin if BOTTOM_UP 01550 else { 01551 DEBUG(std::cerr << "Order is BOTTOM UP\n"); 01552 while(IntersectCurrent.size() > 0) { 01553 DEBUG(std::cerr << "Intersection of size " << IntersectCurrent.size() << ", finding highest depth\n"); 01554 01555 //dump intersection 01556 DEBUG(dumpIntersection(IntersectCurrent)); 01557 //Get node with highest depth, if a tie, use one with lowest 01558 //MOB 01559 int MOB = 0; 01560 int depth = 0; 01561 MSchedGraphSBNode *highestDepthNode = *(IntersectCurrent.begin()); 01562 01563 for(std::set<MSchedGraphSBNode*>::iterator I = IntersectCurrent.begin(), 01564 E = IntersectCurrent.end(); I != E; ++I) { 01565 //Find node attribute in graph 01566 MSNodeSBAttributes nodeAttr= nodeToAttributesMap.find(*I)->second; 01567 01568 if(depth < nodeAttr.depth) { 01569 highestDepthNode = *I; 01570 depth = nodeAttr.depth; 01571 MOB = nodeAttr.MOB; 01572 } 01573 else if(depth == nodeAttr.depth) { 01574 if(MOB > nodeAttr.MOB) { 01575 highestDepthNode = *I; 01576 depth = nodeAttr.depth; 01577 MOB = nodeAttr.MOB; 01578 } 01579 } 01580 } 01581 01582 01583 01584 //Append highest depth node to the NodeOrder 01585 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestDepthNode) == FinalNodeOrder.end()) { 01586 DEBUG(std::cerr << "Adding node to Final Order: " << *highestDepthNode << "\n"); 01587 FinalNodeOrder.push_back(highestDepthNode); 01588 } 01589 //Remove heightestDepthNode from IntersectOrder 01590 IntersectCurrent.erase(highestDepthNode); 01591 01592 01593 //Intersect heightDepthNode's pred with CurrentSet 01594 for(MSchedGraphSBNode::pred_iterator P = highestDepthNode->pred_begin(), 01595 E = highestDepthNode->pred_end(); P != E; ++P) { 01596 if(CurrentSet->count(*P)) { 01597 if(ignoreEdge(*P, highestDepthNode)) 01598 continue; 01599 01600 //If not already in Intersect, add 01601 if(!IntersectCurrent.count(*P)) 01602 IntersectCurrent.insert(*P); 01603 } 01604 } 01605 01606 } //End while loop over Intersect Size 01607 01608 //Change order 01609 order = TOP_DOWN; 01610 01611 //Reset IntersectCurrent to reflect changes in OrderNodes 01612 IntersectCurrent.clear(); 01613 succIntersect(*CurrentSet, IntersectCurrent); 01614 } //End if BOTTOM_DOWN 01615 01616 DEBUG(std::cerr << "Current Intersection Size: " << IntersectCurrent.size() << "\n"); 01617 } 01618 //End Wrapping while loop 01619 DEBUG(std::cerr << "Ending Size of Current Set: " << CurrentSet->size() << "\n"); 01620 }//End for over all sets of nodes 01621 01622 //FIXME: As the algorithm stands it will NEVER add an instruction such as ba (with no 01623 //data dependencies) to the final order. We add this manually. It will always be 01624 //in the last set of S since its not part of a recurrence 01625 //Loop over all the sets and place them in the final node order 01626 std::vector<std::set<MSchedGraphSBNode*> > ::reverse_iterator LastSet = partialOrder.rbegin(); 01627 for(std::set<MSchedGraphSBNode*>::iterator CurrentNode = LastSet->begin(), LastNode = LastSet->end(); 01628 CurrentNode != LastNode; ++CurrentNode) { 01629 if((*CurrentNode)->getInst()->getOpcode() == V9::BA) 01630 FinalNodeOrder.push_back(*CurrentNode); 01631 } 01632 //Return final Order 01633 //return FinalNodeOrder; 01634 } 01635 01636 01637 void ModuloSchedulingSBPass::predIntersect(std::set<MSchedGraphSBNode*> &CurrentSet, std::set<MSchedGraphSBNode*> &IntersectResult) { 01638 01639 for(unsigned j=0; j < FinalNodeOrder.size(); ++j) { 01640 for(MSchedGraphSBNode::pred_iterator P = FinalNodeOrder[j]->pred_begin(), 01641 E = FinalNodeOrder[j]->pred_end(); P != E; ++P) { 01642 01643 //Check if we are supposed to ignore this edge or not 01644 if(ignoreEdge(*P,FinalNodeOrder[j])) 01645 continue; 01646 01647 if(CurrentSet.count(*P)) 01648 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end()) 01649 IntersectResult.insert(*P); 01650 } 01651 } 01652 } 01653 01654 void ModuloSchedulingSBPass::succIntersect(std::set<MSchedGraphSBNode*> &CurrentSet, std::set<MSchedGraphSBNode*> &IntersectResult) { 01655 01656 for(unsigned j=0; j < FinalNodeOrder.size(); ++j) { 01657 for(MSchedGraphSBNode::succ_iterator P = FinalNodeOrder[j]->succ_begin(), 01658 E = FinalNodeOrder[j]->succ_end(); P != E; ++P) { 01659 01660 //Check if we are supposed to ignore this edge or not 01661 if(ignoreEdge(FinalNodeOrder[j],*P)) 01662 continue; 01663 01664 if(CurrentSet.count(*P)) 01665 if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end()) 01666 IntersectResult.insert(*P); 01667 } 01668 } 01669 } 01670 01671 01672 01673 bool ModuloSchedulingSBPass::computeSchedule(std::vector<const MachineBasicBlock*> &SB, MSchedGraphSB *MSG) { 01674 01675 TIME_REGION(X, "computeSchedule"); 01676 01677 bool success = false; 01678 01679 //FIXME: Should be set to max II of the original loop 01680 //Cap II in order to prevent infinite loop 01681 int capII = MSG->totalDelay(); 01682 01683 while(!success) { 01684 01685 //Keep track of branches, but do not insert into the schedule 01686 std::vector<MSchedGraphSBNode*> branches; 01687 01688 //Loop over the final node order and process each node 01689 for(std::vector<MSchedGraphSBNode*>::iterator I = FinalNodeOrder.begin(), 01690 E = FinalNodeOrder.end(); I != E; ++I) { 01691 01692 //CalculateEarly and Late start 01693 bool initialLSVal = false; 01694 bool initialESVal = false; 01695 int EarlyStart = 0; 01696 int LateStart = 0; 01697 bool hasSucc = false; 01698 bool hasPred = false; 01699 bool sched; 01700 01701 if((*I)->isBranch()) 01702 if((*I)->hasPredecessors()) 01703 sched = true; 01704 else 01705 sched = false; 01706 else 01707 sched = true; 01708 01709 if(sched) { 01710 //Loop over nodes in the schedule and determine if they are predecessors 01711 //or successors of the node we are trying to schedule 01712 for(MSScheduleSB::schedule_iterator nodesByCycle = schedule.begin(), nodesByCycleEnd = schedule.end(); 01713 nodesByCycle != nodesByCycleEnd; ++nodesByCycle) { 01714 01715 //For this cycle, get the vector of nodes schedule and loop over it 01716 for(std::vector<MSchedGraphSBNode*>::iterator schedNode = nodesByCycle->second.begin(), SNE = nodesByCycle->second.end(); schedNode != SNE; ++schedNode) { 01717 01718 if((*I)->isPredecessor(*schedNode)) { 01719 int diff = (*I)->getInEdge(*schedNode).getIteDiff(); 01720 int ES_Temp = nodesByCycle->first + (*schedNode)->getLatency() - diff * II; 01721 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n"); 01722 DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n"); 01723 if(initialESVal) 01724 EarlyStart = std::max(EarlyStart, ES_Temp); 01725 else { 01726 EarlyStart = ES_Temp; 01727 initialESVal = true; 01728 } 01729 hasPred = true; 01730 } 01731 if((*I)->isSuccessor(*schedNode)) { 01732 int diff = (*schedNode)->getInEdge(*I).getIteDiff(); 01733 int LS_Temp = nodesByCycle->first - (*I)->getLatency() + diff * II; 01734 DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n"); 01735 DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n"); 01736 if(initialLSVal) 01737 LateStart = std::min(LateStart, LS_Temp); 01738 else { 01739 LateStart = LS_Temp; 01740 initialLSVal = true; 01741 } 01742 hasSucc = true; 01743 } 01744 } 01745 } 01746 } 01747 else { 01748 branches.push_back(*I); 01749 continue; 01750 } 01751 01752 //Check if the node has no pred or successors and set Early Start to its ASAP 01753 if(!hasSucc && !hasPred) 01754 EarlyStart = nodeToAttributesMap.find(*I)->second.ASAP; 01755 01756 DEBUG(std::cerr << "Has Successors: " << hasSucc << ", Has Pred: " << hasPred << "\n"); 01757 DEBUG(std::cerr << "EarlyStart: " << EarlyStart << ", LateStart: " << LateStart << "\n"); 01758 01759 //Now, try to schedule this node depending upon its pred and successor in the schedule 01760 //already 01761 if(!hasSucc && hasPred) 01762 success = scheduleNode(*I, EarlyStart, (EarlyStart + II -1)); 01763 else if(!hasPred && hasSucc) 01764 success = scheduleNode(*I, LateStart, (LateStart - II +1)); 01765 else if(hasPred && hasSucc) { 01766 if(EarlyStart > LateStart) { 01767 success = false; 01768 //LateStart = EarlyStart; 01769 DEBUG(std::cerr << "Early Start can not be later then the late start cycle, schedule fails\n"); 01770 } 01771 else 01772 success = scheduleNode(*I, EarlyStart, std::min(LateStart, (EarlyStart + II -1))); 01773 } 01774 else 01775 success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1); 01776 01777 if(!success) { 01778 ++II; 01779 schedule.clear(); 01780 break; 01781 } 01782 01783 } 01784 01785 if(success) { 01786 DEBUG(std::cerr << "Constructing Schedule Kernel\n"); 01787 success = schedule.constructKernel(II, branches, indVarInstrs[SB]); 01788 DEBUG(std::cerr << "Done Constructing Schedule Kernel\n"); 01789 if(!success) { 01790 ++II; 01791 schedule.clear(); 01792 } 01793 DEBUG(std::cerr << "Final II: " << II << "\n"); 01794 01795 } 01796 01797 if(II >= capII) { 01798 DEBUG(std::cerr << "Maximum II reached, giving up\n"); 01799 return false; 01800 } 01801 01802 assert(II < capII && "The II should not exceed the original loop number of cycles"); 01803 } 01804 return true; 01805 } 01806 01807 01808 bool ModuloSchedulingSBPass::scheduleNode(MSchedGraphSBNode *node, 01809 int start, int end) { 01810 bool success = false; 01811 01812 DEBUG(std::cerr << *node << " (Start Cycle: " << start << ", End Cycle: " << end << ")\n"); 01813 01814 //Make sure start and end are not negative 01815 //if(start < 0) { 01816 //start = 0; 01817 01818 //} 01819 //if(end < 0) 01820 //end = 0; 01821 01822 bool forward = true; 01823 if(start > end) 01824 forward = false; 01825 01826 bool increaseSC = true; 01827 int cycle = start ; 01828 01829 01830 while(increaseSC) { 01831 01832 increaseSC = false; 01833 01834 increaseSC = schedule.insert(node, cycle, II); 01835 01836 if(!increaseSC) 01837 return true; 01838 01839 //Increment cycle to try again 01840 if(forward) { 01841 ++cycle; 01842 DEBUG(std::cerr << "Increase cycle: " << cycle << "\n"); 01843 if(cycle > end) 01844 return false; 01845 } 01846 else { 01847 --cycle; 01848 DEBUG(std::cerr << "Decrease cycle: " << cycle << "\n"); 01849 if(cycle < end) 01850 return false; 01851 } 01852 } 01853 01854 return success; 01855 } 01856 01857 void ModuloSchedulingSBPass::reconstructLoop(std::vector<const MachineBasicBlock*> &SB) { 01858 01859 TIME_REGION(X, "reconstructLoop"); 01860 01861 01862 DEBUG(std::cerr << "Reconstructing Loop\n"); 01863 01864 //First find the value *'s that we need to "save" 01865 std::map<const Value*, std::pair<const MachineInstr*, int> > valuesToSave; 01866 01867 //Keep track of instructions we have already seen and their stage because 01868 //we don't want to "save" values if they are used in the kernel immediately 01869 std::map<const MachineInstr*, int> lastInstrs; 01870 01871 01872 std::set<MachineBasicBlock*> seenBranchesBB; 01873 const TargetInstrInfo *MTI = target.getInstrInfo(); 01874 std::map<MachineBasicBlock*, std::vector<std::pair<MachineInstr*, int> > > instrsMovedDown; 01875 std::map<MachineBasicBlock*, int> branchStage; 01876 01877 //Loop over kernel and only look at instructions from a stage > 0 01878 //Look at its operands and save values *'s that are read 01879 for(MSScheduleSB::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 01880 01881 if(I->second !=0) { 01882 //For this instruction, get the Value*'s that it reads and put them into the set. 01883 //Assert if there is an operand of another type that we need to save 01884 const MachineInstr *inst = I->first; 01885 lastInstrs[inst] = I->second; 01886 01887 for(unsigned i=0; i < inst->getNumOperands(); ++i) { 01888 //get machine operand 01889 const MachineOperand &mOp = inst->getOperand(i); 01890 01891 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 01892 //find the value in the map 01893 if (const Value* srcI = mOp.getVRegValue()) { 01894 01895 if(isa<Constant>(srcI) || isa<Argument>(srcI)) 01896 continue; 01897 01898 //Before we declare this Value* one that we should save 01899 //make sure its def is not of the same stage as this instruction 01900 //because it will be consumed before its used 01901 Instruction *defInst = (Instruction*) srcI; 01902 01903 //Should we save this value? 01904 bool save = true; 01905 01906 //Continue if not in the def map, loop invariant code does not need to be saved 01907 if(!defMap.count(srcI)) 01908 continue; 01909 01910 MachineInstr *defInstr = defMap[srcI]; 01911 01912 01913 if(lastInstrs.count(defInstr)) { 01914 if(lastInstrs[defInstr] == I->second) { 01915 save = false; 01916 01917 } 01918 } 01919 01920 if(save) 01921 valuesToSave[srcI] = std::make_pair(I->first, i); 01922 } 01923 } 01924 01925 if(mOp.getType() != MachineOperand::MO_VirtualRegister && mOp.isUse()) { 01926 assert("Our assumption is wrong. We have another type of register that needs to be saved\n"); 01927 } 01928 } 01929 } 01930 01931 01932 //Do a check to see if instruction was moved below its original branch 01933 if(MTI->isBranch(I->first->getOpcode())) { 01934 seenBranchesBB.insert(I->first->getParent()); 01935 branchStage[I->first->getParent()] = I->second; 01936 } 01937 else { 01938 instrsMovedDown[I->first->getParent()].push_back(std::make_pair(I->first, I->second)); 01939 //assert(seenBranchesBB.count(I->first->getParent()) && "Instruction moved below branch\n"); 01940 } 01941 01942 } 01943 01944 //The new loop will consist of one or more prologues, the kernel, and one or more epilogues. 01945 01946 //Map to keep track of old to new values 01947 std::map<Value*, std::map<int, Value*> > newValues; 01948 01949 //Map to keep track of old to new values in kernel 01950 std::map<Value*, std::map<int, Value*> > kernelPHIs; 01951 01952 //Another map to keep track of what machine basic blocks these new value*s are in since 01953 //they have no llvm instruction equivalent 01954 std::map<Value*, MachineBasicBlock*> newValLocation; 01955 01956 std::vector<std::vector<MachineBasicBlock*> > prologues; 01957 std::vector<std::vector<BasicBlock*> > llvm_prologues; 01958 01959 //Map to keep track of where the inner branches go 01960 std::map<const MachineBasicBlock*, Value*> sideExits; 01961 01962 01963 //Write prologue 01964 if(schedule.getMaxStage() != 0) 01965 writePrologues(prologues, SB, llvm_prologues, valuesToSave, newValues, newValLocation); 01966 01967 std::vector<BasicBlock*> llvmKernelBBs; 01968 std::vector<MachineBasicBlock*> machineKernelBBs; 01969 Function *parent = (Function*) SB[0]->getBasicBlock()->getParent(); 01970 01971 for(unsigned i = 0; i < SB.size(); ++i) { 01972 llvmKernelBBs.push_back(new BasicBlock("Kernel", parent)); 01973 01974 machineKernelBBs.push_back(new MachineBasicBlock(llvmKernelBBs[i])); 01975 (((MachineBasicBlock*)SB[0])->getParent())->getBasicBlockList().push_back(machineKernelBBs[i]); 01976 } 01977 01978 writeKernel(llvmKernelBBs, machineKernelBBs, valuesToSave, newValues, newValLocation, kernelPHIs); 01979 01980 01981 std::vector<std::vector<MachineBasicBlock*> > epilogues; 01982 std::vector<std::vector<BasicBlock*> > llvm_epilogues; 01983 01984 //Write epilogues 01985 if(schedule.getMaxStage() != 0) 01986 writeEpilogues(epilogues, SB, llvm_epilogues, valuesToSave, newValues, newValLocation, kernelPHIs); 01987 01988 01989 //Fix our branches 01990 fixBranches(prologues, llvm_prologues, machineKernelBBs, llvmKernelBBs, epilogues, llvm_epilogues, SB, sideExits); 01991 01992 //Print out epilogues and prologue 01993 DEBUG(for(std::vector<std::vector<MachineBasicBlock*> >::iterator PI = prologues.begin(), PE = prologues.end(); 01994 PI != PE; ++PI) { 01995 std::cerr << "PROLOGUE\n"; 01996 for(std::vector<MachineBasicBlock*>::iterator I = PI->begin(), E = PI->end(); I != E; ++I) 01997 (*I)->print(std::cerr); 01998 }); 01999 02000 DEBUG(std::cerr << "KERNEL\n"); 02001 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = machineKernelBBs.begin(), E = machineKernelBBs.end(); I != E; ++I) { (*I)->print(std::cerr);}); 02002 02003 DEBUG(for(std::vector<std::vector<MachineBasicBlock*> >::iterator EI = epilogues.begin(), EE = epilogues.end(); 02004 EI != EE; ++EI) { 02005 std::cerr << "EPILOGUE\n"; 02006 for(std::vector<MachineBasicBlock*>::iterator I = EI->begin(), E = EI->end(); I != E; ++I) 02007 (*I)->print(std::cerr); 02008 }); 02009 02010 02011 //Remove phis 02012 removePHIs(SB, prologues, epilogues, machineKernelBBs, newValLocation); 02013 02014 //Print out epilogues and prologue 02015 DEBUG(for(std::vector<std::vector<MachineBasicBlock*> >::iterator PI = prologues.begin(), PE = prologues.end(); 02016 PI != PE; ++PI) { 02017 std::cerr << "PROLOGUE\n"; 02018 for(std::vector<MachineBasicBlock*>::iterator I = PI->begin(), E = PI->end(); I != E; ++I) 02019 (*I)->print(std::cerr); 02020 }); 02021 02022 DEBUG(std::cerr << "KERNEL\n"); 02023 DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = machineKernelBBs.begin(), E = machineKernelBBs.end(); I != E; ++I) { (*I)->print(std::cerr);}); 02024 02025 DEBUG(for(std::vector<std::vector<MachineBasicBlock*> >::iterator EI = epilogues.begin(), EE = epilogues.end(); 02026 EI != EE; ++EI) { 02027 std::cerr << "EPILOGUE\n"; 02028 for(std::vector<MachineBasicBlock*>::iterator I = EI->begin(), E = EI->end(); I != E; ++I) 02029 (*I)->print(std::cerr); 02030 }); 02031 02032 writeSideExits(prologues, llvm_prologues, epilogues, llvm_epilogues, sideExits, instrsMovedDown, SB, machineKernelBBs, branchStage); 02033 02034 02035 DEBUG(std::cerr << "New Machine Function" << "\n"); 02036 } 02037 02038 02039 void ModuloSchedulingSBPass::fixBranches(std::vector<std::vector<MachineBasicBlock*> > &prologues, std::vector<std::vector<BasicBlock*> > &llvm_prologues, std::vector<MachineBasicBlock*> &machineKernelBB, std::vector<BasicBlock*> &llvmKernelBB, std::vector<std::vector<MachineBasicBlock*> > &epilogues, std::vector<std::vector<BasicBlock*> > &llvm_epilogues, std::vector<const MachineBasicBlock*> &SB, std::map<const MachineBasicBlock*, Value*> &sideExits) { 02040 02041 const TargetInstrInfo *TMI = target.getInstrInfo(); 02042 02043 //Get exit BB 02044 BasicBlock *last = (BasicBlock*) SB[SB.size()-1]->getBasicBlock(); 02045 BasicBlock *kernel_exit = 0; 02046 bool sawFirst = false; 02047 02048 for(succ_iterator I = succ_begin(last), 02049 E = succ_end(last); I != E; ++I) { 02050 if (*I != SB[0]->getBasicBlock()) { 02051 kernel_exit = *I; 02052 break; 02053 } 02054 else 02055 sawFirst = true; 02056 } 02057 if(!kernel_exit && sawFirst) { 02058 kernel_exit = (BasicBlock*) SB[0]->getBasicBlock(); 02059 } 02060 02061 assert(kernel_exit && "Kernel Exit can not be null"); 02062 02063 if(schedule.getMaxStage() != 0) { 02064 //Fix prologue branches 02065 for(unsigned i = 0; i < prologues.size(); ++i) { 02066 02067 for(unsigned j = 0; j < prologues[i].size(); ++j) { 02068 02069 MachineBasicBlock *currentMBB = prologues[i][j]; 02070 02071 //Find terminator since getFirstTerminator does not work! 02072 for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) { 02073 MachineOpCode OC = mInst->getOpcode(); 02074 //If its a branch update its branchto 02075 if(TMI->isBranch(OC)) { 02076 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 02077 MachineOperand &mOp = mInst->getOperand(opNum); 02078 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02079 //Check if we are branching to the kernel, if not branch to epilogue 02080 if(mOp.getVRegValue() == SB[0]->getBasicBlock()) { 02081 if(i >= prologues.size()-1) 02082 mOp.setValueReg(llvmKernelBB[0]); 02083 else 02084 mOp.setValueReg(llvm_prologues[i+1][0]); 02085 } 02086 else if( (mOp.getVRegValue() == kernel_exit) && (j == prologues[i].size()-1)) { 02087 mOp.setValueReg(llvm_epilogues[i][0]); 02088 } 02089 else if(mOp.getVRegValue() == SB[j+1]->getBasicBlock()) { 02090 mOp.setValueReg(llvm_prologues[i][j+1]); 02091 } 02092 02093 } 02094 } 02095 02096 DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n"); 02097 } 02098 } 02099 02100 //Update llvm basic block with our new branch instr 02101 DEBUG(std::cerr << SB[i]->getBasicBlock()->getTerminator() << "\n"); 02102 02103 const BranchInst *branchVal = dyn_cast<BranchInst>(SB[i]->getBasicBlock()->getTerminator()); 02104 02105 //Check for inner branch 02106 if(j < prologues[i].size()-1) { 02107 //Find our side exit LLVM basic block 02108 BasicBlock *sideExit = 0; 02109 for(unsigned s = 0; s < branchVal->getNumSuccessors(); ++s) { 02110 if(branchVal->getSuccessor(s) != SB[i+1]->getBasicBlock()) 02111 sideExit = branchVal->getSuccessor(s); 02112 } 02113 assert(sideExit && "Must have side exit llvm basic block"); 02114 TerminatorInst *newBranch = new BranchInst(sideExit, 02115 llvm_prologues[i][j+1], 02116 branchVal->getCondition(), 02117 llvm_prologues[i][j]); 02118 } 02119 else { 02120 //If last prologue 02121 if(i == prologues.size()-1) { 02122 TerminatorInst *newBranch = new BranchInst(llvmKernelBB[0], 02123 llvm_epilogues[i][0], 02124 branchVal->getCondition(), 02125 llvm_prologues[i][j]); 02126 } 02127 else { 02128 TerminatorInst *newBranch = new BranchInst(llvm_prologues[i+1][0], 02129 llvm_epilogues[i][0], 02130 branchVal->getCondition(), 02131 llvm_prologues[i][j]); 02132 } 02133 } 02134 } 02135 } 02136 } 02137 02138 //Fix up kernel machine branches 02139 for(unsigned i = 0; i < machineKernelBB.size(); ++i) { 02140 MachineBasicBlock *currentMBB = machineKernelBB[i]; 02141 02142 for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) { 02143 MachineOpCode OC = mInst->getOpcode(); 02144 if(TMI->isBranch(OC)) { 02145 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 02146 MachineOperand &mOp = mInst->getOperand(opNum); 02147 02148 if(mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02149 //Deal with inner kernel branches 02150 if(i < machineKernelBB.size()-1) { 02151 if(mOp.getVRegValue() == SB[i+1]->getBasicBlock()) 02152 mOp.setValueReg(llvmKernelBB[i+1]); 02153 //Side exit! 02154 else { 02155 sideExits[SB[i]] = mOp.getVRegValue(); 02156 } 02157 } 02158 else { 02159 if(mOp.getVRegValue() == SB[0]->getBasicBlock()) 02160 mOp.setValueReg(llvmKernelBB[0]); 02161 else { 02162 if(llvm_epilogues.size() > 0) 02163 mOp.setValueReg(llvm_epilogues[0][0]); 02164 } 02165 } 02166 } 02167 } 02168 } 02169 } 02170 02171 //Update kernelLLVM branches 02172 const BranchInst *branchVal = dyn_cast<BranchInst>(SB[0]->getBasicBlock()->getTerminator()); 02173 02174 //deal with inner branch 02175 if(i < machineKernelBB.size()-1) { 02176 02177 //Find our side exit LLVM basic block 02178 BasicBlock *sideExit = 0; 02179 for(unsigned s = 0; s < branchVal->getNumSuccessors(); ++s) { 02180 if(branchVal->getSuccessor(s) != SB[i+1]->getBasicBlock()) 02181 sideExit = branchVal->getSuccessor(s); 02182 } 02183 assert(sideExit && "Must have side exit llvm basic block"); 02184 TerminatorInst *newBranch = new BranchInst(sideExit, 02185 llvmKernelBB[i+1], 02186 branchVal->getCondition(), 02187 llvmKernelBB[i]); 02188 } 02189 else { 02190 //Deal with outter branches 02191 if(epilogues.size() > 0) { 02192 TerminatorInst *newBranch = new BranchInst(llvmKernelBB[0], 02193 llvm_epilogues[0][0], 02194 branchVal->getCondition(), 02195 llvmKernelBB[i]); 02196 } 02197 else { 02198 TerminatorInst *newBranch = new BranchInst(llvmKernelBB[0], 02199 kernel_exit, 02200 branchVal->getCondition(), 02201 llvmKernelBB[i]); 02202 } 02203 } 02204 } 02205 02206 if(schedule.getMaxStage() != 0) { 02207 02208 //Lastly add unconditional branches for the epilogues 02209 for(unsigned i = 0; i < epilogues.size(); ++i) { 02210 02211 for(unsigned j=0; j < epilogues[i].size(); ++j) { 02212 //Now since we don't have fall throughs, add a unconditional 02213 //branch to the next prologue 02214 02215 //Before adding these, we need to check if the epilogue already has 02216 //a branch in it 02217 bool hasBranch = false; 02218 /*if(j < epilogues[i].size()-1) { 02219 MachineBasicBlock *currentMBB = epilogues[i][j]; 02220 for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) { 02221 02222 MachineOpCode OC = mInst->getOpcode(); 02223 02224 //If its a branch update its branchto 02225 if(TMI->isBranch(OC)) { 02226 hasBranch = true; 02227 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 02228 MachineOperand &mOp = mInst->getOperand(opNum); 02229 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02230 02231 if(mOp.getVRegValue() != sideExits[SB[j]]) { 02232 mOp.setValueReg(llvm_epilogues[i][j+1]); 02233 } 02234 02235 } 02236 } 02237 02238 02239 DEBUG(std::cerr << "New Epilogue Branch: " << *mInst << "\n"); 02240 } 02241 } 02242 if(hasBranch) { 02243 const BranchInst *branchVal = dyn_cast<BranchInst>(SB[j]->getBasicBlock()->getTerminator()); 02244 TerminatorInst *newBranch = new BranchInst((BasicBlock*)sideExits[SB[j]], 02245 llvm_epilogues[i][j+1], 02246 branchVal->getCondition(), 02247 llvm_epilogues[i][j]); 02248 } 02249 }*/ 02250 02251 if(!hasBranch) { 02252 02253 //Handle inner branches 02254 if(j < epilogues[i].size()-1) { 02255 BuildMI(epilogues[i][j], V9::BA, 1).addPCDisp(llvm_epilogues[i][j+1]); 02256 TerminatorInst *newBranch = new BranchInst(llvm_epilogues[i][j+1], 02257 llvm_epilogues[i][j]); 02258 } 02259 else { 02260 02261 //Check if this is the last epilogue 02262 if(i != epilogues.size()-1) { 02263 BuildMI(epilogues[i][j], V9::BA, 1).addPCDisp(llvm_epilogues[i+1][0]); 02264 //Add unconditional branch to end of epilogue 02265 TerminatorInst *newBranch = new BranchInst(llvm_epilogues[i+1][0], 02266 llvm_epilogues[i][j]); 02267 02268 } 02269 else { 02270 BuildMI(epilogues[i][j], V9::BA, 1).addPCDisp(kernel_exit); 02271 TerminatorInst *newBranch = new BranchInst(kernel_exit, llvm_epilogues[i][j]); 02272 } 02273 } 02274 02275 //Add one more nop! 02276 BuildMI(epilogues[i][j], V9::NOP, 0); 02277 02278 } 02279 } 02280 } 02281 } 02282 02283 //Find all llvm basic blocks that branch to the loop entry and 02284 //change to our first prologue. 02285 const BasicBlock *llvmBB = SB[0]->getBasicBlock(); 02286 02287 std::vector<const BasicBlock*>Preds (pred_begin(llvmBB), pred_end(llvmBB)); 02288 02289 for(std::vector<const BasicBlock*>::iterator P = Preds.begin(), 02290 PE = Preds.end(); P != PE; ++P) { 02291 if(*P == SB[SB.size()-1]->getBasicBlock()) 02292 continue; 02293 else { 02294 DEBUG(std::cerr << "Found our entry BB\n"); 02295 DEBUG((*P)->print(std::cerr)); 02296 //Get the Terminator instruction for this basic block and print it out 02297 //DEBUG(std::cerr << *((*P)->getTerminator()) << "\n"); 02298 02299 //Update the terminator 02300 TerminatorInst *term = ((BasicBlock*)*P)->getTerminator(); 02301 for(unsigned i=0; i < term->getNumSuccessors(); ++i) { 02302 if(term->getSuccessor(i) == llvmBB) { 02303 DEBUG(std::cerr << "Replacing successor bb\n"); 02304 if(llvm_prologues.size() > 0) { 02305 term->setSuccessor(i, llvm_prologues[0][0]); 02306 02307 DEBUG(std::cerr << "New Term" << *((*P)->getTerminator()) << "\n"); 02308 02309 //Also update its corresponding machine instruction 02310 MachineCodeForInstruction & tempMvec = 02311 MachineCodeForInstruction::get(term); 02312 for (unsigned j = 0; j < tempMvec.size(); j++) { 02313 MachineInstr *temp = tempMvec[j]; 02314 MachineOpCode opc = temp->getOpcode(); 02315 if(TMI->isBranch(opc)) { 02316 DEBUG(std::cerr << *temp << "\n"); 02317 //Update branch 02318 for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) { 02319 MachineOperand &mOp = temp->getOperand(opNum); 02320 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02321 if(mOp.getVRegValue() == llvmBB) 02322 mOp.setValueReg(llvm_prologues[0][0]); 02323 } 02324 } 02325 } 02326 } 02327 } 02328 else { 02329 term->setSuccessor(i, llvmKernelBB[0]); 02330 02331 //Also update its corresponding machine instruction 02332 MachineCodeForInstruction & tempMvec = 02333 MachineCodeForInstruction::get(term); 02334 for(unsigned j = 0; j < tempMvec.size(); j++) { 02335 MachineInstr *temp = tempMvec[j]; 02336 MachineOpCode opc = temp->getOpcode(); 02337 if(TMI->isBranch(opc)) { 02338 DEBUG(std::cerr << *temp << "\n"); 02339 //Update branch 02340 for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) { 02341 MachineOperand &mOp = temp->getOperand(opNum); 02342 if(mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 02343 if(mOp.getVRegValue() == llvmBB) 02344 mOp.setValueReg(llvmKernelBB[0]); 02345 } 02346 } 02347 } 02348 } 02349 } 02350 } 02351 } 02352 break; 02353 } 02354 } 02355 02356 } 02357 02358 02359 void ModuloSchedulingSBPass::writePrologues(std::vector<std::vector<MachineBasicBlock *> > &prologues, std::vector<const MachineBasicBlock*> &origSB, std::vector<std::vector<BasicBlock*> > &llvm_prologues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation) { 02360 02361 //Keep a map to easily know whats in the kernel 02362 std::map<int, std::set<const MachineInstr*> > inKernel; 02363 int maxStageCount = 0; 02364 02365 //Keep a map of new values we consumed in case they need to be added back 02366 std::map<Value*, std::map<int, Value*> > consumedValues; 02367 02368 DEBUG(schedule.print(std::cerr)); 02369 02370 for(MSScheduleSB::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 02371 maxStageCount = std::max(maxStageCount, I->second); 02372 02373 //Put int the map so we know what instructions in each stage are in the kernel 02374 DEBUG(std::cerr << "Inserting instruction " << *(I->first) << " into map at stage " << I->second << "\n"); 02375 inKernel[I->second].insert(I->first); 02376 } 02377 02378 //Get target information to look at machine operands 02379 const TargetInstrInfo *mii = target.getInstrInfo(); 02380 02381 //Now write the prologues 02382 for(int i = 0; i < maxStageCount; ++i) { 02383 std::vector<MachineBasicBlock*> current_prologue; 02384 std::vector<BasicBlock*> current_llvm_prologue; 02385 02386 for(std::vector<const MachineBasicBlock*>::iterator MB = origSB.begin(), 02387 MBE = origSB.end(); MB != MBE; ++MB) { 02388 const MachineBasicBlock *MBB = *MB; 02389 //Create new llvm and machine bb 02390 BasicBlock *llvmBB = new BasicBlock("PROLOGUE", (Function*) (MBB->getBasicBlock()->getParent())); 02391 MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); 02392 02393 DEBUG(std::cerr << "i=" << i << "\n"); 02394 02395 for(int j = i; j >= 0; --j) { 02396 //iterate over instructions in original bb 02397 for(MachineBasicBlock::const_iterator MI = MBB->begin(), 02398 ME = MBB->end(); ME != MI; ++MI) { 02399 if(inKernel[j].count(&*MI)) { 02400 MachineInstr *instClone = MI->clone(); 02401 machineBB->push_back(instClone); 02402 02403 //If its a branch, insert a nop 02404 if(mii->isBranch(instClone->getOpcode())) 02405 BuildMI(machineBB, V9::NOP, 0); 02406 02407 02408 DEBUG(std::cerr << "Cloning: " << *MI << "\n"); 02409 02410 //After cloning, we may need to save the value that this instruction defines 02411 for(unsigned opNum=0; opNum < MI->getNumOperands(); ++opNum) { 02412 Instruction *tmp; 02413 02414 //get machine operand 02415 MachineOperand &mOp = instClone->getOperand(opNum); 02416 if(mOp.getType() == MachineOperand::MO_VirtualRegister 02417 && mOp.isDef()) { 02418 02419 //Check if this is a value we should save 02420 if(valuesToSave.count(mOp.getVRegValue())) { 02421 //Save copy in tmpInstruction 02422 tmp = new TmpInstruction(mOp.getVRegValue()); 02423 02424 //Add TmpInstruction to safe LLVM Instruction MCFI 02425 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02426 tempMvec.addTemp((Value*) tmp); 02427 02428 DEBUG(std::cerr << "Value: " << *(mOp.getVRegValue()) 02429 << " New Value: " << *tmp << " Stage: " << i << "\n"); 02430 02431 newValues[mOp.getVRegValue()][i]= tmp; 02432 newValLocation[tmp] = machineBB; 02433 02434 DEBUG(std::cerr << "Machine Instr Operands: " 02435 << *(mOp.getVRegValue()) << ", 0, " << *tmp << "\n"); 02436 02437 //Create machine instruction and put int machineBB 02438 MachineInstr *saveValue; 02439 if(mOp.getVRegValue()->getType() == Type::FloatTy) 02440 saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02441 else if(mOp.getVRegValue()->getType() == Type::DoubleTy) 02442 saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02443 else 02444 saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 02445 02446 02447 DEBUG(std::cerr << "Created new machine instr: " << *saveValue << "\n"); 02448 } 02449 } 02450 02451 //We may also need to update the value that we use if 02452 //its from an earlier prologue 02453 if(j != 0) { 02454 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 02455 if(newValues.count(mOp.getVRegValue())) { 02456 if(newValues[mOp.getVRegValue()].count(i-1)) { 02457 Value *oldV = mOp.getVRegValue(); 02458 DEBUG(std::cerr << "Replaced this value: " << mOp.getVRegValue() << " With:" << (newValues[mOp.getVRegValue()][i-1]) << "\n"); 02459 //Update the operand with the right value 02460 mOp.setValueReg(newValues[mOp.getVRegValue()][i-1]); 02461 02462 //Remove this value since we have consumed it 02463 //NOTE: Should this only be done if j != maxStage? 02464 consumedValues[oldV][i-1] = (newValues[oldV][i-1]); 02465 DEBUG(std::cerr << "Deleted value: " << consumedValues[oldV][i-1] << "\n"); 02466 newValues[oldV].erase(i-1); 02467 } 02468 } 02469 else 02470 if(consumedValues.count(mOp.getVRegValue())) 02471 assert(!consumedValues[mOp.getVRegValue()].count(i-1) && "Found a case where we need the value"); 02472 } 02473 } 02474 } 02475 } 02476 } 02477 } 02478 (((MachineBasicBlock*)MBB)->getParent())->getBasicBlockList().push_back(machineBB); 02479 current_prologue.push_back(machineBB); 02480 current_llvm_prologue.push_back(llvmBB); 02481 } 02482 prologues.push_back(current_prologue); 02483 llvm_prologues.push_back(current_llvm_prologue); 02484 02485 } 02486 } 02487 02488 02489 void ModuloSchedulingSBPass::writeEpilogues(std::vector<std::vector<MachineBasicBlock*> > &epilogues, std::vector<const MachineBasicBlock*> &origSB, std::vector<std::vector<BasicBlock*> > &llvm_epilogues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues,std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs ) { 02490 02491 std::map<int, std::set<const MachineInstr*> > inKernel; 02492 const TargetInstrInfo *MTI = target.getInstrInfo(); 02493 02494 for(MSScheduleSB::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 02495 02496 //Put int the map so we know what instructions in each stage are in the kernel 02497 inKernel[I->second].insert(I->first); 02498 } 02499 02500 std::map<Value*, Value*> valPHIs; 02501 02502 //some debug stuff, will remove later 02503 DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), E = newValues.end(); V !=E; ++V) { 02504 std::cerr << "Old Value: " << *(V->first) << "\n"; 02505 for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I) 02506 std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n"; 02507 }); 02508 02509 02510 //Now write the epilogues 02511 for(int i = schedule.getMaxStage()-1; i >= 0; --i) { 02512 std::vector<MachineBasicBlock*> current_epilogue; 02513 std::vector<BasicBlock*> current_llvm_epilogue; 02514 02515 for(std::vector<const MachineBasicBlock*>::iterator MB = origSB.begin(), MBE = origSB.end(); MB != MBE; ++MB) { 02516 const MachineBasicBlock *MBB = *MB; 02517 02518 BasicBlock *llvmBB = new BasicBlock("EPILOGUE", (Function*) (MBB->getBasicBlock()->getParent())); 02519 MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB); 02520 02521 DEBUG(std::cerr << " Epilogue #: " << i << "\n"); 02522 02523 std::map<Value*, int> inEpilogue; 02524 02525 for(MachineBasicBlock::const_iterator MI = MBB->begin(), ME = MBB->end(); ME != MI; ++MI) { 02526 for(int j=schedule.getMaxStage(); j > i; --j) { 02527 if(inKernel[j].count(&*MI)) { 02528 DEBUG(std::cerr << "Cloning instruction " << *MI << "\n"); 02529 MachineInstr *clone = MI->clone(); 02530 02531 //Update operands that need to use the result from the phi 02532 for(unsigned opNum=0; opNum < clone->getNumOperands(); ++opNum) { 02533 //get machine operand 02534 const MachineOperand &mOp = clone->getOperand(opNum); 02535 02536 if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse())) { 02537 02538 DEBUG(std::cerr << "Writing PHI for " << (mOp.getVRegValue()) << "\n"); 02539 02540 //If this is the last instructions for the max iterations ago, don't update operands 02541 if(inEpilogue.count(mOp.getVRegValue())) 02542 if(inEpilogue[mOp.getVRegValue()] == i) 02543 continue; 02544 02545 //Quickly write appropriate phis for this operand 02546 if(newValues.count(mOp.getVRegValue())) { 02547 if(newValues[mOp.getVRegValue()].count(i)) { 02548 Instruction *tmp = new TmpInstruction(newValues[mOp.getVRegValue()][i]); 02549 02550 //Get machine code for this instruction 02551 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02552 tempMvec.addTemp((Value*) tmp); 02553 02554 //assert of no kernelPHI for this value 02555 assert(kernelPHIs[mOp.getVRegValue()][i] !=0 && "Must have final kernel phi to construct epilogue phi"); 02556 02557 MachineInstr *saveValue = BuildMI(machineBB, V9::PHI, 3).addReg(newValues[mOp.getVRegValue()][i]).addReg(kernelPHIs[mOp.getVRegValue()][i]).addRegDef(tmp); 02558 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 02559 valPHIs[mOp.getVRegValue()] = tmp; 02560 } 02561 } 02562 02563 if(valPHIs.count(mOp.getVRegValue())) { 02564 //Update the operand in the cloned instruction 02565 clone->getOperand(opNum).setValueReg(valPHIs[mOp.getVRegValue()]); 02566 } 02567 } 02568 else if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef())) { 02569 inEpilogue[mOp.getVRegValue()] = i; 02570 } 02571 02572 } 02573 machineBB->push_back(clone); 02574 //if(MTI->isBranch(clone->getOpcode())) 02575 //BuildMI(machineBB, V9::NOP, 0); 02576 } 02577 } 02578 } 02579 (((MachineBasicBlock*)MBB)->getParent())->getBasicBlockList().push_back(machineBB); 02580 current_epilogue.push_back(machineBB); 02581 current_llvm_epilogue.push_back(llvmBB); 02582 } 02583 02584 DEBUG(std::cerr << "EPILOGUE #" << i << "\n"); 02585 DEBUG(for(std::vector<MachineBasicBlock*>::iterator B = current_epilogue.begin(), BE = current_epilogue.end(); B != BE; ++B) { 02586 (*B)->print(std::cerr);}); 02587 02588 epilogues.push_back(current_epilogue); 02589 llvm_epilogues.push_back(current_llvm_epilogue); 02590 } 02591 } 02592 02593 void ModuloSchedulingSBPass::writeKernel(std::vector<BasicBlock*> &llvmBB, std::vector<MachineBasicBlock*> &machineBB, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs) { 02594 02595 //Keep track of operands that are read and saved from a previous iteration. The new clone 02596 //instruction will use the result of the phi instead. 02597 std::map<Value*, Value*> finalPHIValue; 02598 std::map<Value*, Value*> kernelValue; 02599 02600 //Branches are a special case 02601 std::vector<MachineInstr*> branches; 02602 02603 //Get target information to look at machine operands 02604 const TargetInstrInfo *mii = target.getInstrInfo(); 02605 unsigned index = 0; 02606 int numBr = 0; 02607 bool seenBranch = false; 02608 02609 //Create TmpInstructions for the final phis 02610 for(MSScheduleSB::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) { 02611 02612 DEBUG(std::cerr << "Stage: " << I->second << " Inst: " << *(I->first) << "\n";); 02613 02614 //Clone instruction 02615 const MachineInstr *inst = I->first; 02616 MachineInstr *instClone = inst->clone(); 02617 02618 if(seenBranch && !mii->isBranch(instClone->getOpcode())) { 02619 index++; 02620 seenBranch = false; 02621 numBr = 0; 02622 } 02623 else if(seenBranch && (numBr == 2)) { 02624 index++; 02625 numBr = 0; 02626 } 02627 02628 //Insert into machine basic block 02629 assert(index < machineBB.size() && "Must have a valid index into kernel MBBs"); 02630 machineBB[index]->push_back(instClone); 02631 02632 if(mii->isBranch(instClone->getOpcode())) { 02633 BuildMI(machineBB[index], V9::NOP, 0); 02634 02635 seenBranch = true; 02636 numBr++; 02637 } 02638 02639 DEBUG(std::cerr << "Cloned Inst: " << *instClone << "\n"); 02640 02641 //Loop over Machine Operands 02642 for(unsigned i=0; i < inst->getNumOperands(); ++i) { 02643 //get machine operand 02644 const MachineOperand &mOp = inst->getOperand(i); 02645 02646 if(I->second != 0) { 02647 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) { 02648 02649 //Check to see where this operand is defined if this instruction is from max stage 02650 if(I->second == schedule.getMaxStage()) { 02651 DEBUG(std::cerr << "VREG: " << *(mOp.getVRegValue()) << "\n"); 02652 } 02653 02654 //If its in the value saved, we need to create a temp instruction and use that instead 02655 if(valuesToSave.count(mOp.getVRegValue())) { 02656 02657 //Check if we already have a final PHI value for this 02658 if(!finalPHIValue.count(mOp.getVRegValue())) { 02659 //Only create phi if the operand def is from a stage before this one 02660 if(schedule.defPreviousStage(mOp.getVRegValue(), I->second)) { 02661 TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue()); 02662 02663 //Get machine code for this instruction 02664 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02665 tempMvec.addTemp((Value*) tmp); 02666 02667 //Update the operand in the cloned instruction 02668 instClone->getOperand(i).setValueReg(tmp); 02669 02670 //save this as our final phi 02671 finalPHIValue[mOp.getVRegValue()] = tmp; 02672 newValLocation[tmp] = machineBB[index]; 02673 } 02674 } 02675 else { 02676 //Use the previous final phi value 02677 instClone->getOperand(i).setValueReg(finalPHIValue[mOp.getVRegValue()]); 02678 } 02679 } 02680 } 02681 } 02682 if(I->second != schedule.getMaxStage()) { 02683 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) { 02684 if(valuesToSave.count(mOp.getVRegValue())) { 02685 02686 TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue()); 02687 02688 //Get machine code for this instruction 02689 MachineCodeForInstruction & tempVec = MachineCodeForInstruction::get(defaultInst); 02690 tempVec.addTemp((Value*) tmp); 02691 02692 //Create new machine instr and put in MBB 02693 MachineInstr *saveValue; 02694 if(mOp.getVRegValue()->getType() == Type::FloatTy) 02695 saveValue = BuildMI(machineBB[index], V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02696 else if(mOp.getVRegValue()->getType() == Type::DoubleTy) 02697 saveValue = BuildMI(machineBB[index], V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02698 else 02699 saveValue = BuildMI(machineBB[index], V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 02700 02701 02702 //Save for future cleanup 02703 kernelValue[mOp.getVRegValue()] = tmp; 02704 newValLocation[tmp] = machineBB[index]; 02705 kernelPHIs[mOp.getVRegValue()][schedule.getMaxStage()-1] = tmp; 02706 } 02707 } 02708 } 02709 } 02710 02711 } 02712 02713 //Loop over each value we need to generate phis for 02714 for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), 02715 E = newValues.end(); V != E; ++V) { 02716 02717 02718 DEBUG(std::cerr << "Writing phi for" << *(V->first)); 02719 DEBUG(std::cerr << "\nMap of Value* for this phi\n"); 02720 DEBUG(for(std::map<int, Value*>::iterator I = V->second.begin(), 02721 IE = V->second.end(); I != IE; ++I) { 02722 std::cerr << "Stage: " << I->first; 02723 std::cerr << " Value: " << *(I->second) << "\n"; 02724 }); 02725 02726 //If we only have one current iteration live, its safe to set 02727 //lastPhi = to kernel value 02728 if(V->second.size() == 1) { 02729 assert(kernelValue[V->first] != 0 && "Kernel value* must exist to create phi"); 02730 MachineInstr *saveValue = BuildMI(*machineBB[0], machineBB[0]->begin(),V9::PHI, 3).addReg(V->second.begin()->second).addReg(kernelValue[V->first]).addRegDef(finalPHIValue[V->first]); 02731 DEBUG(std::cerr << "Resulting PHI (one live): " << *saveValue << "\n"); 02732 kernelPHIs[V->first][V->second.begin()->first] = kernelValue[V->first]; 02733 DEBUG(std::cerr << "Put kernel phi in at stage: " << schedule.getMaxStage()-1 << " (map stage = " << V->second.begin()->first << ")\n"); 02734 } 02735 else { 02736 02737 //Keep track of last phi created. 02738 Instruction *lastPhi = 0; 02739 02740 unsigned count = 1; 02741 //Loop over the the map backwards to generate phis 02742 for(std::map<int, Value*>::reverse_iterator I = V->second.rbegin(), IE = V->second.rend(); 02743 I != IE; ++I) { 02744 02745 if(count < (V->second).size()) { 02746 if(lastPhi == 0) { 02747 lastPhi = new TmpInstruction(I->second); 02748 02749 //Get machine code for this instruction 02750 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02751 tempMvec.addTemp((Value*) lastPhi); 02752 02753 MachineInstr *saveValue = BuildMI(*machineBB[0], machineBB[0]->begin(), V9::PHI, 3).addReg(kernelValue[V->first]).addReg(I->second).addRegDef(lastPhi); 02754 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 02755 newValLocation[lastPhi] = machineBB[0]; 02756 } 02757 else { 02758 Instruction *tmp = new TmpInstruction(I->second); 02759 02760 //Get machine code for this instruction 02761 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02762 tempMvec.addTemp((Value*) tmp); 02763 02764 02765 MachineInstr *saveValue = BuildMI(*machineBB[0], machineBB[0]->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(tmp); 02766 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 02767 lastPhi = tmp; 02768 kernelPHIs[V->first][I->first] = lastPhi; 02769 newValLocation[lastPhi] = machineBB[0]; 02770 } 02771 } 02772 //Final phi value 02773 else { 02774 //The resulting value must be the Value* we created earlier 02775 assert(lastPhi != 0 && "Last phi is NULL!\n"); 02776 MachineInstr *saveValue = BuildMI(*machineBB[0], machineBB[0]->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(finalPHIValue[V->first]); 02777 DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n"); 02778 kernelPHIs[V->first][I->first] = finalPHIValue[V->first]; 02779 } 02780 02781 ++count; 02782 } 02783 02784 } 02785 } 02786 } 02787 02788 02789 void ModuloSchedulingSBPass::removePHIs(std::vector<const MachineBasicBlock*> &SB, std::vector<std::vector<MachineBasicBlock*> > &prologues, std::vector<std::vector<MachineBasicBlock*> > &epilogues, std::vector<MachineBasicBlock*> &kernelBB, std::map<Value*, MachineBasicBlock*> &newValLocation) { 02790 02791 //Worklist to delete things 02792 std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> > worklist; 02793 02794 //Worklist of TmpInstructions that need to be added to a MCFI 02795 std::vector<Instruction*> addToMCFI; 02796 02797 //Worklist to add OR instructions to end of kernel so not to invalidate the iterator 02798 //std::vector<std::pair<Instruction*, Value*> > newORs; 02799 02800 const TargetInstrInfo *TMI = target.getInstrInfo(); 02801 02802 //Start with the kernel and for each phi insert a copy for the phi 02803 //def and for each arg 02804 //phis are only in the first BB in the kernel 02805 for(MachineBasicBlock::iterator I = kernelBB[0]->begin(), E = kernelBB[0]->end(); 02806 I != E; ++I) { 02807 02808 DEBUG(std::cerr << "Looking at Instr: " << *I << "\n"); 02809 02810 //Get op code and check if its a phi 02811 if(I->getOpcode() == V9::PHI) { 02812 02813 DEBUG(std::cerr << "Replacing PHI: " << *I << "\n"); 02814 Instruction *tmp = 0; 02815 02816 for(unsigned i = 0; i < I->getNumOperands(); ++i) { 02817 02818 //Get Operand 02819 const MachineOperand &mOp = I->getOperand(i); 02820 assert(mOp.getType() == MachineOperand::MO_VirtualRegister 02821 && "Should be a Value*\n"); 02822 02823 if(!tmp) { 02824 tmp = new TmpInstruction(mOp.getVRegValue()); 02825 addToMCFI.push_back(tmp); 02826 } 02827 02828 //Now for all our arguments we read, OR to the new 02829 //TmpInstruction that we created 02830 if(mOp.isUse()) { 02831 DEBUG(std::cerr << "Use: " << mOp << "\n"); 02832 //Place a copy at the end of its BB but before the branches 02833 assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n"); 02834 //Reverse iterate to find the branches, we can safely assume no instructions have been 02835 //put in the nop positions 02836 for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) { 02837 MachineOpCode opc = inst->getOpcode(); 02838 if(TMI->isBranch(opc) || TMI->isNop(opc)) 02839 continue; 02840 else { 02841 if(mOp.getVRegValue()->getType() == Type::FloatTy) 02842 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02843 else if(mOp.getVRegValue()->getType() == Type::DoubleTy) 02844 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02845 else 02846 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 02847 02848 break; 02849 } 02850 02851 } 02852 02853 } 02854 else { 02855 //Remove the phi and replace it with an OR 02856 DEBUG(std::cerr << "Def: " << mOp << "\n"); 02857 //newORs.push_back(std::make_pair(tmp, mOp.getVRegValue())); 02858 if(tmp->getType() == Type::FloatTy) 02859 BuildMI(*kernelBB[0], I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); 02860 else if(tmp->getType() == Type::DoubleTy) 02861 BuildMI(*kernelBB[0], I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); 02862 else 02863 BuildMI(*kernelBB[0], I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); 02864 02865 02866 worklist.push_back(std::make_pair(kernelBB[0], I)); 02867 } 02868 02869 } 02870 02871 } 02872 02873 02874 } 02875 02876 //Add TmpInstructions to some MCFI 02877 if(addToMCFI.size() > 0) { 02878 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02879 for(unsigned x = 0; x < addToMCFI.size(); ++x) { 02880 tempMvec.addTemp(addToMCFI[x]); 02881 } 02882 addToMCFI.clear(); 02883 } 02884 02885 02886 //Remove phis from epilogue 02887 for(std::vector<std::vector<MachineBasicBlock*> >::iterator MB = epilogues.begin(), 02888 ME = epilogues.end(); MB != ME; ++MB) { 02889 02890 for(std::vector<MachineBasicBlock*>::iterator currentMBB = MB->begin(), currentME = MB->end(); currentMBB != currentME; ++currentMBB) { 02891 02892 for(MachineBasicBlock::iterator I = (*currentMBB)->begin(), 02893 E = (*currentMBB)->end(); I != E; ++I) { 02894 02895 DEBUG(std::cerr << "Looking at Instr: " << *I << "\n"); 02896 //Get op code and check if its a phi 02897 if(I->getOpcode() == V9::PHI) { 02898 Instruction *tmp = 0; 02899 02900 for(unsigned i = 0; i < I->getNumOperands(); ++i) { 02901 //Get Operand 02902 const MachineOperand &mOp = I->getOperand(i); 02903 assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n"); 02904 02905 if(!tmp) { 02906 tmp = new TmpInstruction(mOp.getVRegValue()); 02907 addToMCFI.push_back(tmp); 02908 } 02909 02910 //Now for all our arguments we read, OR to the new TmpInstruction that we created 02911 if(mOp.isUse()) { 02912 DEBUG(std::cerr << "Use: " << mOp << "\n"); 02913 //Place a copy at the end of its BB but before the branches 02914 assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n"); 02915 //Reverse iterate to find the branches, we can safely assume no instructions have been 02916 //put in the nop positions 02917 for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) { 02918 MachineOpCode opc = inst->getOpcode(); 02919 if(TMI->isBranch(opc) || TMI->isNop(opc)) 02920 continue; 02921 else { 02922 if(mOp.getVRegValue()->getType() == Type::FloatTy) 02923 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02924 else if(mOp.getVRegValue()->getType() == Type::DoubleTy) 02925 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp); 02926 else 02927 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp); 02928 02929 02930 break; 02931 } 02932 02933 } 02934 02935 } 02936 else { 02937 //Remove the phi and replace it with an OR 02938 DEBUG(std::cerr << "Def: " << mOp << "\n"); 02939 if(tmp->getType() == Type::FloatTy) 02940 BuildMI(**currentMBB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); 02941 else if(tmp->getType() == Type::DoubleTy) 02942 BuildMI(**currentMBB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue()); 02943 else 02944 BuildMI(**currentMBB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue()); 02945 02946 worklist.push_back(std::make_pair(*currentMBB,I)); 02947 } 02948 } 02949 } 02950 } 02951 } 02952 } 02953 02954 02955 if(addToMCFI.size() > 0) { 02956 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst); 02957 for(unsigned x = 0; x < addToMCFI.size(); ++x) { 02958 tempMvec.addTemp(addToMCFI[x]); 02959 } 02960 addToMCFI.clear(); 02961 } 02962 02963 //Delete the phis 02964 for(std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> >::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { 02965 DEBUG(std::cerr << "Deleting PHI " << *I->second << "\n"); 02966 I->first->erase(I->second); 02967 02968 } 02969 02970 02971 assert((addToMCFI.size() == 0) && "We should have added all TmpInstructions to some MachineCodeForInstruction"); 02972 } 02973 02974 02975 02976 02977 void ModuloSchedulingSBPass::writeSideExits(std::vector<std::vector<MachineBasicBlock *> > &prologues, std::vector<std::vector<BasicBlock*> > &llvm_prologues, std::vector<std::vector<MachineBasicBlock *> > &epilogues, std::vector<std::vector<BasicBlock*> > &llvm_epilogues, std::map<const MachineBasicBlock*, Value*> &sideExits, std::map<MachineBasicBlock*, std::vector<std::pair<MachineInstr*, int> > > &instrsMovedDown, std::vector<const MachineBasicBlock*> &SB, std::vector<MachineBasicBlock*> &kernelMBBs, std::map<MachineBasicBlock*, int> branchStage) { 02978 02979 const TargetInstrInfo *TMI = target.getInstrInfo(); 02980 02981 //Repeat for each side exit 02982 for(unsigned sideExitNum = 0; sideExitNum < SB.size()-1; ++sideExitNum) { 02983 02984 std::vector<std::vector<BasicBlock*> > side_llvm_epilogues; 02985 std::vector<std::vector<MachineBasicBlock*> > side_epilogues; 02986 MachineBasicBlock* sideMBB; 02987 BasicBlock* sideBB; 02988 02989 //Create side exit blocks 02990 //Get the LLVM basic block 02991 BasicBlock *bb = (BasicBlock*) SB[sideExitNum]->getBasicBlock(); 02992 MachineBasicBlock *mbb = (MachineBasicBlock*) SB[sideExitNum]; 02993 02994 int stage = branchStage[mbb]; 02995 02996 //Create new basic blocks for our side exit instructios that were moved below the branch 02997 sideBB = new BasicBlock("SideExit", (Function*) bb->getParent()); 02998 sideMBB = new MachineBasicBlock(sideBB); 02999 (((MachineBasicBlock*)SB[0])->getParent())->getBasicBlockList().push_back(sideMBB); 03000 03001 03002 if(instrsMovedDown.count(mbb)) { 03003 for(std::vector<std::pair<MachineInstr*, int> >::iterator I = instrsMovedDown[mbb].begin(), E = instrsMovedDown[mbb].end(); I != E; ++I) { 03004 if(branchStage[mbb] == I->second) 03005 sideMBB->push_back((I->first)->clone()); 03006 } 03007 03008 //Add unconditional branches to original exits 03009 BuildMI(sideMBB, V9::BA, 1).addPCDisp(sideExits[mbb]); 03010 BuildMI(sideMBB, V9::NOP, 0); 03011 03012 //Add unconditioal branch to llvm BB 03013 BasicBlock *extBB = dyn_cast<BasicBlock>(sideExits[mbb]); 03014 assert(extBB && "Side exit basicblock can not be null"); 03015 TerminatorInst *newBranch = new BranchInst(extBB, sideBB); 03016 } 03017 03018 //Clone epilogues and update their branches, one cloned epilogue set per side exit 03019 //only clone epilogues that are from a greater stage! 03020 for(unsigned i = 0; i < epilogues.size()-stage; ++i) { 03021 std::vector<MachineBasicBlock*> MB = epilogues[i]; 03022 03023 std::vector<MachineBasicBlock*> newEp; 03024 std::vector<BasicBlock*> newLLVMEp; 03025 03026 for(std::vector<MachineBasicBlock*>::iterator currentMBB = MB.begin(), 03027 lastMBB = MB.end(); currentMBB != lastMBB; ++currentMBB) { 03028 BasicBlock *tmpBB = new BasicBlock("SideEpilogue", (Function*) (*currentMBB)->getBasicBlock()->getParent()); 03029 MachineBasicBlock *tmp = new MachineBasicBlock(tmpBB); 03030 03031 //Clone instructions and insert into new MBB 03032 for(MachineBasicBlock::iterator I = (*currentMBB)->begin(), 03033 E = (*currentMBB)->end(); I != E; ++I) { 03034 03035 MachineInstr *clone = I->clone(); 03036 if(clone->getOpcode() == V9::BA && (currentMBB+1 == lastMBB)) { 03037 //update branch to side exit 03038 for(unsigned i = 0; i < clone->getNumOperands(); ++i) { 03039 MachineOperand &mOp = clone->getOperand(i); 03040 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 03041 mOp.setValueReg(sideBB); 03042 } 03043 } 03044 } 03045 03046 tmp->push_back(clone); 03047 03048 } 03049 03050 //Add llvm branch 03051 TerminatorInst *newBranch = new BranchInst(sideBB, tmpBB); 03052 03053 newEp.push_back(tmp); 03054 (((MachineBasicBlock*)SB[0])->getParent())->getBasicBlockList().push_back(tmp); 03055 03056 newLLVMEp.push_back(tmpBB); 03057 03058 } 03059 side_llvm_epilogues.push_back(newLLVMEp); 03060 side_epilogues.push_back(newEp); 03061 } 03062 03063 //Now stich up all the branches 03064 03065 //Loop over prologues, and if its an inner branch and branches to our original side exit 03066 //then have it branch to the appropriate epilogue first (if it exists) 03067 for(unsigned P = 0; P < prologues.size(); ++P) { 03068 03069 //Get BB side exit we are dealing with 03070 MachineBasicBlock *currentMBB = prologues[P][sideExitNum]; 03071 if(P >= (unsigned) stage) { 03072 //Iterate backwards of machine instructions to find the branch we need to update 03073 for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) { 03074 MachineOpCode OC = mInst->getOpcode(); 03075 03076 //If its a branch update its branchto 03077 if(TMI->isBranch(OC)) { 03078 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 03079 MachineOperand &mOp = mInst->getOperand(opNum); 03080 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 03081 //Check if we branch to side exit 03082 if(mOp.getVRegValue() == sideExits[mbb]) { 03083 mOp.setValueReg(side_llvm_epilogues[P][0]); 03084 } 03085 } 03086 } 03087 DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n"); 03088 } 03089 } 03090 03091 //Update llvm branch 03092 TerminatorInst *branchVal = ((BasicBlock*) currentMBB->getBasicBlock())->getTerminator(); 03093 DEBUG(std::cerr << *branchVal << "\n"); 03094 03095 for(unsigned i=0; i < branchVal->getNumSuccessors(); ++i) { 03096 if(branchVal->getSuccessor(i) == sideExits[mbb]) { 03097 DEBUG(std::cerr << "Replacing successor bb\n"); 03098 branchVal->setSuccessor(i, side_llvm_epilogues[P][0]); 03099 } 03100 } 03101 } 03102 else { 03103 //must add BA branch because another prologue or kernel has the actual side exit branch 03104 //Add unconditional branches to original exits 03105 assert( (sideExitNum+1) < prologues[P].size() && "must have valid prologue to branch to"); 03106 BuildMI(prologues[P][sideExitNum], V9::BA, 1).addPCDisp((BasicBlock*)(prologues[P][sideExitNum+1])->getBasicBlock()); 03107 BuildMI(prologues[P][sideExitNum], V9::NOP, 0); 03108 03109 TerminatorInst *newBranch = new BranchInst((BasicBlock*) (prologues[P][sideExitNum+1])->getBasicBlock(), (BasicBlock*) (prologues[P][sideExitNum])->getBasicBlock()); 03110 03111 } 03112 } 03113 03114 03115 //Update side exits in kernel 03116 MachineBasicBlock *currentMBB = kernelMBBs[sideExitNum]; 03117 //Iterate backwards of machine instructions to find the branch we need to update 03118 for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) { 03119 MachineOpCode OC = mInst->getOpcode(); 03120 03121 //If its a branch update its branchto 03122 if(TMI->isBranch(OC)) { 03123 for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) { 03124 MachineOperand &mOp = mInst->getOperand(opNum); 03125 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) { 03126 //Check if we branch to side exit 03127 if(mOp.getVRegValue() == sideExits[mbb]) { 03128 if(side_llvm_epilogues.size() > 0) 03129 mOp.setValueReg(side_llvm_epilogues[0][0]); 03130 else 03131 mOp.setValueReg(sideBB); 03132 } 03133 } 03134 } 03135 DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n"); 03136 } 03137 } 03138 03139 //Update llvm branch 03140 //Update llvm branch 03141 TerminatorInst *branchVal = ((BasicBlock*)currentMBB->getBasicBlock())->getTerminator(); 03142 DEBUG(std::cerr << *branchVal << "\n"); 03143 03144 for(unsigned i=0; i < branchVal->getNumSuccessors(); ++i) { 03145 if(branchVal->getSuccessor(i) == sideExits[mbb]) { 03146 DEBUG(std::cerr << "Replacing successor bb\n"); 03147 if(side_llvm_epilogues.size() > 0) 03148 branchVal->setSuccessor(i, side_llvm_epilogues[0][0]); 03149 else 03150 branchVal->setSuccessor(i, sideBB); 03151 } 03152 } 03153 } 03154 } 03155