LLVM API Documentation

ModuloScheduling.cpp

Go to the documentation of this file.
00001 //===-- ModuloScheduling.cpp - ModuloScheduling  ----------------*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This ModuloScheduling pass is based on the Swing Modulo Scheduling
00011 //  algorithm.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #define DEBUG_TYPE "ModuloSched"
00016 
00017 #include "ModuloScheduling.h"
00018 #include "llvm/Constants.h"
00019 #include "llvm/Instructions.h"
00020 #include "llvm/Function.h"
00021 #include "llvm/CodeGen/MachineFunction.h"
00022 #include "llvm/CodeGen/Passes.h"
00023 #include "llvm/Support/CFG.h"
00024 #include "llvm/Target/TargetSchedInfo.h"
00025 #include "llvm/Support/Debug.h"
00026 #include "llvm/Support/GraphWriter.h"
00027 #include "llvm/ADT/SCCIterator.h"
00028 #include "llvm/ADT/StringExtras.h"
00029 #include "llvm/ADT/Statistic.h"
00030 #include "llvm/Support/Timer.h"
00031 #include <cmath>
00032 #include <algorithm>
00033 #include <fstream>
00034 #include <sstream>
00035 #include <utility>
00036 #include <vector>
00037 #include "../MachineCodeForInstruction.h"
00038 #include "../SparcV9TmpInstr.h"
00039 #include "../SparcV9Internals.h"
00040 #include "../SparcV9RegisterInfo.h"
00041 using namespace llvm;
00042 
00043 /// Create ModuloSchedulingPass
00044 ///
00045 FunctionPass *llvm::createModuloSchedulingPass(TargetMachine & targ) {
00046   DEBUG(std::cerr << "Created ModuloSchedulingPass\n");
00047   return new ModuloSchedulingPass(targ);
00048 }
00049 
00050 
00051 //Graph Traits for printing out the dependence graph
00052 template<typename GraphType>
00053 static void WriteGraphToFile(std::ostream &O, const std::string &GraphName,
00054                              const GraphType &GT) {
00055   std::string Filename = GraphName + ".dot";
00056   O << "Writing '" << Filename << "'...";
00057   std::ofstream F(Filename.c_str());
00058 
00059   if (F.good())
00060     WriteGraph(F, GT);
00061   else
00062     O << "  error opening file for writing!";
00063   O << "\n";
00064 };
00065 
00066 
00067 #if 1
00068 #define TIME_REGION(VARNAME, DESC) \
00069    NamedRegionTimer VARNAME(DESC)
00070 #else
00071 #define TIME_REGION(VARNAME, DESC)
00072 #endif
00073 
00074 
00075 //Graph Traits for printing out the dependence graph
00076 namespace llvm {
00077 
00078   //Loop statistics
00079   Statistic<> ValidLoops("modulosched-validLoops", "Number of candidate loops modulo-scheduled");
00080   Statistic<> JumboBB("modulosched-jumboBB", "Basic Blocks with more then 100 instructions");
00081   Statistic<> LoopsWithCalls("modulosched-loopCalls", "Loops with calls");
00082   Statistic<> LoopsWithCondMov("modulosched-loopCondMov", "Loops with conditional moves");
00083   Statistic<> InvalidLoops("modulosched-invalidLoops", "Loops with unknown trip counts or loop invariant trip counts");
00084   Statistic<> SingleBBLoops("modulosched-singeBBLoops", "Number of single basic block loops");
00085 
00086   //Scheduling Statistics
00087   Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled");
00088   Statistic<> NoSched("modulosched-noSched", "No schedule");
00089   Statistic<> SameStage("modulosched-sameStage", "Max stage is 0");
00090   Statistic<> ResourceConstraint("modulosched-resourceConstraint", "Loops constrained by resources");
00091   Statistic<> RecurrenceConstraint("modulosched-recurrenceConstraint", "Loops constrained by recurrences");
00092    Statistic<> FinalIISum("modulosched-finalIISum", "Sum of all final II");
00093   Statistic<> IISum("modulosched-IISum", "Sum of all theoretical II");
00094 
00095   template<>
00096   struct DOTGraphTraits<MSchedGraph*> : public DefaultDOTGraphTraits {
00097     static std::string getGraphName(MSchedGraph *F) {
00098       return "Dependence Graph";
00099     }
00100 
00101     static std::string getNodeLabel(MSchedGraphNode *Node, MSchedGraph *Graph) {
00102       if (Node->getInst()) {
00103         std::stringstream ss;
00104         ss << *(Node->getInst());
00105         return ss.str(); //((MachineInstr*)Node->getInst());
00106       }
00107       else
00108         return "No Inst";
00109     }
00110     static std::string getEdgeSourceLabel(MSchedGraphNode *Node,
00111                                           MSchedGraphNode::succ_iterator I) {
00112       //Label each edge with the type of dependence
00113       std::string edgelabel = "";
00114       switch (I.getEdge().getDepOrderType()) {
00115 
00116       case MSchedGraphEdge::TrueDep:
00117         edgelabel = "True";
00118         break;
00119 
00120       case MSchedGraphEdge::AntiDep:
00121         edgelabel =  "Anti";
00122         break;
00123 
00124       case MSchedGraphEdge::OutputDep:
00125         edgelabel = "Output";
00126         break;
00127 
00128       default:
00129         edgelabel = "Unknown";
00130         break;
00131       }
00132 
00133       //FIXME
00134       int iteDiff = I.getEdge().getIteDiff();
00135       std::string intStr = "(IteDiff: ";
00136       intStr += itostr(iteDiff);
00137 
00138       intStr += ")";
00139       edgelabel += intStr;
00140 
00141       return edgelabel;
00142     }
00143   };
00144 }
00145 
00146 
00147 #include <unistd.h>
00148 
00149 /// ModuloScheduling::runOnFunction - main transformation entry point
00150 /// The Swing Modulo Schedule algorithm has three basic steps:
00151 /// 1) Computation and Analysis of the dependence graph
00152 /// 2) Ordering of the nodes
00153 /// 3) Scheduling
00154 ///
00155 bool ModuloSchedulingPass::runOnFunction(Function &F) {
00156   alarm(100);
00157 
00158   bool Changed = false;
00159   int numMS = 0;
00160 
00161   DEBUG(std::cerr << "Creating ModuloSchedGraph for each valid BasicBlock in " + F.getName() + "\n");
00162 
00163   //Get MachineFunction
00164   MachineFunction &MF = MachineFunction::get(&F);
00165 
00166   DependenceAnalyzer &DA = getAnalysis<DependenceAnalyzer>();
00167 
00168 
00169   //Worklist
00170   std::vector<MachineBasicBlock*> Worklist;
00171 
00172   //Iterate over BasicBlocks and put them into our worklist if they are valid
00173   for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI)
00174     if(MachineBBisValid(BI)) {
00175       if(BI->size() < 100) {
00176         Worklist.push_back(&*BI);
00177         ++ValidLoops;
00178       }
00179       else
00180         ++JumboBB;
00181 
00182     }
00183 
00184   defaultInst = 0;
00185 
00186   DEBUG(if(Worklist.size() == 0) std::cerr << "No single basic block loops in function to ModuloSchedule\n");
00187 
00188   //Iterate over the worklist and perform scheduling
00189   for(std::vector<MachineBasicBlock*>::iterator BI = Worklist.begin(),
00190         BE = Worklist.end(); BI != BE; ++BI) {
00191 
00192     //Print out BB for debugging
00193     DEBUG(std::cerr << "BB Size: " << (*BI)->size() << "\n");
00194     DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr));
00195 
00196     //Print out LLVM BB
00197     DEBUG(std::cerr << "ModuloScheduling LLVMBB: \n"; (*BI)->getBasicBlock()->print(std::cerr));
00198 
00199     //Catch the odd case where we only have TmpInstructions and no real Value*s
00200     if(!CreateDefMap(*BI)) {
00201       //Clear out our maps for the next basic block that is processed
00202       nodeToAttributesMap.clear();
00203       partialOrder.clear();
00204       recurrenceList.clear();
00205       FinalNodeOrder.clear();
00206       schedule.clear();
00207       defMap.clear();
00208       continue;
00209     }
00210 
00211     MSchedGraph *MSG = new MSchedGraph(*BI, target, indVarInstrs[*BI], DA, machineTollvm[*BI]);
00212 
00213     //Write Graph out to file
00214     DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG));
00215     DEBUG(MSG->print(std::cerr));
00216 
00217     //Calculate Resource II
00218     int ResMII = calculateResMII(*BI);
00219 
00220     //Calculate Recurrence II
00221     int RecMII = calculateRecMII(MSG, ResMII);
00222 
00223     DEBUG(std::cerr << "Number of reccurrences found: " << recurrenceList.size() << "\n");
00224 
00225     //Our starting initiation interval is the maximum of RecMII and ResMII
00226     if(RecMII < ResMII)
00227       ++RecurrenceConstraint;
00228     else
00229       ++ResourceConstraint;
00230 
00231     II = std::max(RecMII, ResMII);
00232     int mII = II;
00233 
00234     //Print out II, RecMII, and ResMII
00235     DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n");
00236 
00237     //Dump node properties if in debug mode
00238     DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I =  nodeToAttributesMap.begin(),
00239                 E = nodeToAttributesMap.end(); I !=E; ++I) {
00240             std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: "
00241                       << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth
00242                       << " Height: " << I->second.height << "\n";
00243           });
00244 
00245     //Calculate Node Properties
00246     calculateNodeAttributes(MSG, ResMII);
00247 
00248     //Dump node properties if in debug mode
00249     DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I =  nodeToAttributesMap.begin(),
00250                 E = nodeToAttributesMap.end(); I !=E; ++I) {
00251             std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: "
00252                       << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth
00253                       << " Height: " << I->second.height << "\n";
00254           });
00255 
00256     //Put nodes in order to schedule them
00257     computePartialOrder();
00258 
00259     //Dump out partial order
00260     DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(),
00261                 E = partialOrder.end(); I !=E; ++I) {
00262             std::cerr << "Start set in PO\n";
00263             for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J)
00264               std::cerr << "PO:" << **J << "\n";
00265           });
00266 
00267     //Place nodes in final order
00268     orderNodes();
00269 
00270     //Dump out order of nodes
00271     DEBUG(for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) {
00272             std::cerr << "FO:" << **I << "\n";
00273           });
00274 
00275     //Finally schedule nodes
00276     bool haveSched = computeSchedule(*BI, MSG);
00277 
00278     //Print out final schedule
00279     DEBUG(schedule.print(std::cerr));
00280 
00281     //Final scheduling step is to reconstruct the loop only if we actual have
00282     //stage > 0
00283     if(haveSched) {
00284       reconstructLoop(*BI);
00285       ++MSLoops;
00286       Changed = true;
00287       FinalIISum += II;
00288       IISum += mII;
00289 
00290       if(schedule.getMaxStage() == 0)
00291         ++SameStage;
00292     }
00293     else {
00294       ++NoSched;
00295     }
00296 
00297     //Clear out our maps for the next basic block that is processed
00298     nodeToAttributesMap.clear();
00299     partialOrder.clear();
00300     recurrenceList.clear();
00301     FinalNodeOrder.clear();
00302     schedule.clear();
00303     defMap.clear();
00304     //Clean up. Nuke old MachineBB and llvmBB
00305     //BasicBlock *llvmBB = (BasicBlock*) (*BI)->getBasicBlock();
00306     //Function *parent = (Function*) llvmBB->getParent();
00307     //Should't std::find work??
00308     //parent->getBasicBlockList().erase(std::find(parent->getBasicBlockList().begin(), parent->getBasicBlockList().end(), *llvmBB));
00309     //parent->getBasicBlockList().erase(llvmBB);
00310 
00311     //delete(llvmBB);
00312     //delete(*BI);
00313   }
00314 
00315   alarm(0);
00316   return Changed;
00317 }
00318 
00319 bool ModuloSchedulingPass::CreateDefMap(MachineBasicBlock *BI) {
00320   defaultInst = 0;
00321 
00322   for(MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
00323     for(unsigned opNum = 0; opNum < I->getNumOperands(); ++opNum) {
00324       const MachineOperand &mOp = I->getOperand(opNum);
00325       if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
00326         //assert if this is the second def we have seen
00327         //DEBUG(std::cerr << "Putting " << *(mOp.getVRegValue()) << " into map\n");
00328         //assert(!defMap.count(mOp.getVRegValue()) && "Def already in the map");
00329         if(defMap.count(mOp.getVRegValue()))
00330           return false;
00331 
00332         defMap[mOp.getVRegValue()] = &*I;
00333       }
00334 
00335       //See if we can use this Value* as our defaultInst
00336       if(!defaultInst && mOp.getType() == MachineOperand::MO_VirtualRegister) {
00337         Value *V = mOp.getVRegValue();
00338         if(!isa<TmpInstruction>(V) && !isa<Argument>(V) && !isa<Constant>(V) && !isa<PHINode>(V))
00339           defaultInst = (Instruction*) V;
00340       }
00341     }
00342   }
00343 
00344   if(!defaultInst)
00345     return false;
00346 
00347   return true;
00348 
00349 }
00350 /// This function checks if a Machine Basic Block is valid for modulo
00351 /// scheduling. This means that it has no control flow (if/else or
00352 /// calls) in the block.  Currently ModuloScheduling only works on
00353 /// single basic block loops.
00354 bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) {
00355 
00356   bool isLoop = false;
00357 
00358   //Check first if its a valid loop
00359   for(succ_const_iterator I = succ_begin(BI->getBasicBlock()),
00360         E = succ_end(BI->getBasicBlock()); I != E; ++I) {
00361     if (*I == BI->getBasicBlock())    // has single block loop
00362       isLoop = true;
00363   }
00364 
00365   if(!isLoop)
00366     return false;
00367 
00368   //Check that we have a conditional branch (avoiding MS infinite loops)
00369   if(BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) BI->getBasicBlock())->getTerminator()))
00370     if(b->isUnconditional())
00371       return false;
00372 
00373   //Check size of our basic block.. make sure we have more then just the terminator in it
00374   if(BI->getBasicBlock()->size() == 1)
00375     return false;
00376 
00377   //Increase number of single basic block loops for stats
00378   ++SingleBBLoops;
00379 
00380   //Get Target machine instruction info
00381   const TargetInstrInfo *TMI = target.getInstrInfo();
00382 
00383   //Check each instruction and look for calls, keep map to get index later
00384   std::map<const MachineInstr*, unsigned> indexMap;
00385 
00386   unsigned count = 0;
00387   for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
00388     //Get opcode to check instruction type
00389     MachineOpCode OC = I->getOpcode();
00390 
00391     //Look for calls
00392     if(TMI->isCall(OC)) {
00393       ++LoopsWithCalls;
00394       return false;
00395     }
00396 
00397     //Look for conditional move
00398     if(OC == V9::MOVRZr || OC == V9::MOVRZi || OC == V9::MOVRLEZr || OC == V9::MOVRLEZi
00399        || OC == V9::MOVRLZr || OC == V9::MOVRLZi || OC == V9::MOVRNZr || OC == V9::MOVRNZi
00400        || OC == V9::MOVRGZr || OC == V9::MOVRGZi || OC == V9::MOVRGEZr
00401        || OC == V9::MOVRGEZi || OC == V9::MOVLEr || OC == V9::MOVLEi || OC == V9::MOVLEUr
00402        || OC == V9::MOVLEUi || OC == V9::MOVFLEr || OC == V9::MOVFLEi
00403        || OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi
00404        || OC == V9::MOVFNEr || OC == V9::MOVFNEi || OC == V9::MOVGr || OC == V9::MOVGi) {
00405       ++LoopsWithCondMov;
00406       return false;
00407     }
00408 
00409     indexMap[I] = count;
00410 
00411     if(TMI->isNop(OC))
00412       continue;
00413 
00414     ++count;
00415   }
00416 
00417   //Apply a simple pattern match to make sure this loop can be modulo scheduled
00418   //This means only loops with a branch associated to the iteration count
00419 
00420   //Get the branch
00421   BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) BI->getBasicBlock())->getTerminator());
00422 
00423   //Get the condition for the branch (we already checked if it was conditional)
00424   Value *cond = b->getCondition();
00425 
00426   DEBUG(std::cerr << "Condition: " << *cond << "\n");
00427 
00428   //List of instructions associated with induction variable
00429   std::set<Instruction*> indVar;
00430   std::vector<Instruction*> stack;
00431 
00432   BasicBlock *BB = (BasicBlock*) BI->getBasicBlock();
00433 
00434   //Add branch
00435   indVar.insert(b);
00436 
00437   if(Instruction *I = dyn_cast<Instruction>(cond))
00438     if(I->getParent() == BB) {
00439       if (!assocIndVar(I, indVar, stack, BB)) {
00440         ++InvalidLoops;
00441         return false;
00442       }
00443     }
00444     else {
00445       ++InvalidLoops;
00446       return false;
00447     }
00448   else {
00449     ++InvalidLoops;
00450     return false;
00451   }
00452   //The indVar set must be >= 3 instructions for this loop to match (FIX ME!)
00453   if(indVar.size() < 3 )
00454     return false;
00455 
00456   //Dump out instructions associate with indvar for debug reasons
00457   DEBUG(for(std::set<Instruction*>::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) {
00458           std::cerr << **N << "\n";
00459         });
00460 
00461   //Create map of machine instr to llvm instr
00462   std::map<MachineInstr*, Instruction*> mllvm;
00463   for(BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
00464     MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(I);
00465     for (unsigned j = 0; j < tempMvec.size(); j++) {
00466       mllvm[tempMvec[j]] = I;
00467     }
00468   }
00469 
00470   //Convert list of LLVM Instructions to list of Machine instructions
00471   std::map<const MachineInstr*, unsigned> mIndVar;
00472   for(std::set<Instruction*>::iterator N = indVar.begin(), NE = indVar.end(); N != NE; ++N) {
00473 
00474     //If we have a load, we can't handle this loop because there is no way to preserve dependences
00475     //between loads and stores
00476     if(isa<LoadInst>(*N))
00477       return false;
00478 
00479     MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(*N);
00480     for (unsigned j = 0; j < tempMvec.size(); j++) {
00481       MachineOpCode OC = (tempMvec[j])->getOpcode();
00482       if(TMI->isNop(OC))
00483         continue;
00484       if(!indexMap.count(tempMvec[j]))
00485         continue;
00486       mIndVar[(MachineInstr*) tempMvec[j]] = indexMap[(MachineInstr*) tempMvec[j]];
00487       DEBUG(std::cerr << *(tempMvec[j]) << " at index " << indexMap[(MachineInstr*) tempMvec[j]] << "\n");
00488     }
00489   }
00490 
00491    //Must have some guts to the loop body (more then 1 instr, dont count nops in size)
00492   if(mIndVar.size() >= (BI->size()-3))
00493     return false;
00494 
00495   //Put into a map for future access
00496   indVarInstrs[BI] = mIndVar;
00497   machineTollvm[BI] = mllvm;
00498   return true;
00499 }
00500 
00501 bool ModuloSchedulingPass::assocIndVar(Instruction *I, std::set<Instruction*> &indVar,
00502                                        std::vector<Instruction*> &stack, BasicBlock *BB) {
00503 
00504   stack.push_back(I);
00505 
00506   //If this is a phi node, check if its the canonical indvar
00507   if(PHINode *PN = dyn_cast<PHINode>(I)) {
00508     if (Instruction *Inc =
00509         dyn_cast<Instruction>(PN->getIncomingValueForBlock(BB)))
00510       if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN)
00511         if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
00512           if (CI->equalsInt(1)) {
00513             //We have found the indvar, so add the stack, and inc instruction to the set
00514             indVar.insert(stack.begin(), stack.end());
00515             indVar.insert(Inc);
00516             stack.pop_back();
00517             return true;
00518           }
00519     return false;
00520   }
00521   else {
00522     //Loop over each of the instructions operands, check if they are an instruction and in this BB
00523     for(unsigned i = 0; i < I->getNumOperands(); ++i) {
00524       if(Instruction *N =  dyn_cast<Instruction>(I->getOperand(i))) {
00525         if(N->getParent() == BB)
00526           if(!assocIndVar(N, indVar, stack, BB))
00527             return false;
00528       }
00529     }
00530   }
00531 
00532   stack.pop_back();
00533   return true;
00534 }
00535 
00536 //ResMII is calculated by determining the usage count for each resource
00537 //and using the maximum.
00538 //FIXME: In future there should be a way to get alternative resources
00539 //for each instruction
00540 int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) {
00541 
00542   TIME_REGION(X, "calculateResMII");
00543 
00544   const TargetInstrInfo *mii = target.getInstrInfo();
00545   const TargetSchedInfo *msi = target.getSchedInfo();
00546 
00547   int ResMII = 0;
00548 
00549   //Map to keep track of usage count of each resource
00550   std::map<unsigned, unsigned> resourceUsageCount;
00551 
00552   for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
00553 
00554     //Get resource usage for this instruction
00555     InstrRUsage rUsage = msi->getInstrRUsage(I->getOpcode());
00556     std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle;
00557 
00558     //Loop over resources in each cycle and increments their usage count
00559     for(unsigned i=0; i < resources.size(); ++i)
00560       for(unsigned j=0; j < resources[i].size(); ++j) {
00561         if(!resourceUsageCount.count(resources[i][j])) {
00562           resourceUsageCount[resources[i][j]] = 1;
00563         }
00564         else {
00565           resourceUsageCount[resources[i][j]] =  resourceUsageCount[resources[i][j]] + 1;
00566         }
00567       }
00568   }
00569 
00570   //Find maximum usage count
00571 
00572   //Get max number of instructions that can be issued at once. (FIXME)
00573   int issueSlots = msi->maxNumIssueTotal;
00574 
00575   for(std::map<unsigned,unsigned>::iterator RB = resourceUsageCount.begin(), RE = resourceUsageCount.end(); RB != RE; ++RB) {
00576 
00577     //Get the total number of the resources in our cpu
00578     int resourceNum = CPUResource::getCPUResource(RB->first)->maxNumUsers;
00579 
00580     //Get total usage count for this resources
00581     unsigned usageCount = RB->second;
00582 
00583     //Divide the usage count by either the max number we can issue or the number of
00584     //resources (whichever is its upper bound)
00585     double finalUsageCount;
00586     DEBUG(std::cerr << "Resource Num: " << RB->first << " Usage: " << usageCount << " TotalNum: " << resourceNum << "\n");
00587 
00588     if( resourceNum <= issueSlots)
00589       finalUsageCount = ceil(1.0 * usageCount / resourceNum);
00590     else
00591       finalUsageCount = ceil(1.0 * usageCount / issueSlots);
00592 
00593 
00594     //Only keep track of the max
00595     ResMII = std::max( (int) finalUsageCount, ResMII);
00596 
00597   }
00598 
00599   return ResMII;
00600 
00601 }
00602 
00603 /// calculateRecMII - Calculates the value of the highest recurrence
00604 /// By value we mean the total latency
00605 int ModuloSchedulingPass::calculateRecMII(MSchedGraph *graph, int MII) {
00606   /*std::vector<MSchedGraphNode*> vNodes;
00607   //Loop over all nodes in the graph
00608   for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) {
00609     findAllReccurrences(I->second, vNodes, MII);
00610     vNodes.clear();
00611   }*/
00612 
00613   TIME_REGION(X, "calculateRecMII");
00614 
00615   findAllCircuits(graph, MII);
00616   int RecMII = 0;
00617 
00618  for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) {
00619     RecMII = std::max(RecMII, I->first);
00620   }
00621 
00622   return MII;
00623 }
00624 
00625 /// calculateNodeAttributes - The following properties are calculated for
00626 /// each node in the dependence graph: ASAP, ALAP, Depth, Height, and
00627 /// MOB.
00628 void ModuloSchedulingPass::calculateNodeAttributes(MSchedGraph *graph, int MII) {
00629 
00630   TIME_REGION(X, "calculateNodeAttributes");
00631 
00632   assert(nodeToAttributesMap.empty() && "Node attribute map was not cleared");
00633 
00634   //Loop over the nodes and add them to the map
00635   for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) {
00636 
00637     DEBUG(std::cerr << "Inserting node into attribute map: " << *I->second << "\n");
00638 
00639     //Assert if its already in the map
00640     assert(nodeToAttributesMap.count(I->second) == 0 &&
00641            "Node attributes are already in the map");
00642 
00643     //Put into the map with default attribute values
00644     nodeToAttributesMap[I->second] = MSNodeAttributes();
00645   }
00646 
00647   //Create set to deal with reccurrences
00648   std::set<MSchedGraphNode*> visitedNodes;
00649 
00650   //Now Loop over map and calculate the node attributes
00651   for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
00652     calculateASAP(I->first, MII, (MSchedGraphNode*) 0);
00653     visitedNodes.clear();
00654   }
00655 
00656   int maxASAP = findMaxASAP();
00657   //Calculate ALAP which depends on ASAP being totally calculated
00658   for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
00659     calculateALAP(I->first, MII, maxASAP, (MSchedGraphNode*) 0);
00660     visitedNodes.clear();
00661   }
00662 
00663   //Calculate MOB which depends on ASAP being totally calculated, also do depth and height
00664   for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
00665     (I->second).MOB = std::max(0,(I->second).ALAP - (I->second).ASAP);
00666 
00667     DEBUG(std::cerr << "MOB: " << (I->second).MOB << " (" << *(I->first) << ")\n");
00668     calculateDepth(I->first, (MSchedGraphNode*) 0);
00669     calculateHeight(I->first, (MSchedGraphNode*) 0);
00670   }
00671 
00672 
00673 }
00674 
00675 /// ignoreEdge - Checks to see if this edge of a recurrence should be ignored or not
00676 bool ModuloSchedulingPass::ignoreEdge(MSchedGraphNode *srcNode, MSchedGraphNode *destNode) {
00677   if(destNode == 0 || srcNode ==0)
00678     return false;
00679 
00680   bool findEdge = edgesToIgnore.count(std::make_pair(srcNode, destNode->getInEdgeNum(srcNode)));
00681 
00682   DEBUG(std::cerr << "Ignoring edge? from: " << *srcNode << " to " << *destNode << "\n");
00683 
00684   return findEdge;
00685 }
00686 
00687 
00688 /// calculateASAP - Calculates the
00689 int  ModuloSchedulingPass::calculateASAP(MSchedGraphNode *node, int MII, MSchedGraphNode *destNode) {
00690 
00691   DEBUG(std::cerr << "Calculating ASAP for " << *node << "\n");
00692 
00693   //Get current node attributes
00694   MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second;
00695 
00696   if(attributes.ASAP != -1)
00697     return attributes.ASAP;
00698 
00699   int maxPredValue = 0;
00700 
00701   //Iterate over all of the predecessors and find max
00702   for(MSchedGraphNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) {
00703 
00704     //Only process if we are not ignoring the edge
00705     if(!ignoreEdge(*P, node)) {
00706       int predASAP = -1;
00707       predASAP = calculateASAP(*P, MII, node);
00708 
00709       assert(predASAP != -1 && "ASAP has not been calculated");
00710       int iteDiff = node->getInEdge(*P).getIteDiff();
00711 
00712       int currentPredValue = predASAP + (*P)->getLatency() - (iteDiff * MII);
00713       DEBUG(std::cerr << "pred ASAP: " << predASAP << ", iteDiff: " << iteDiff << ", PredLatency: " << (*P)->getLatency() << ", Current ASAP pred: " << currentPredValue << "\n");
00714       maxPredValue = std::max(maxPredValue, currentPredValue);
00715     }
00716   }
00717 
00718   attributes.ASAP = maxPredValue;
00719 
00720   DEBUG(std::cerr << "ASAP: " << attributes.ASAP << " (" << *node << ")\n");
00721 
00722   return maxPredValue;
00723 }
00724 
00725 
00726 int ModuloSchedulingPass::calculateALAP(MSchedGraphNode *node, int MII,
00727                                         int maxASAP, MSchedGraphNode *srcNode) {
00728 
00729   DEBUG(std::cerr << "Calculating ALAP for " << *node << "\n");
00730 
00731   MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second;
00732 
00733   if(attributes.ALAP != -1)
00734     return attributes.ALAP;
00735 
00736   if(node->hasSuccessors()) {
00737 
00738     //Trying to deal with the issue where the node has successors, but
00739     //we are ignoring all of the edges to them. So this is my hack for
00740     //now.. there is probably a more elegant way of doing this (FIXME)
00741     bool processedOneEdge = false;
00742 
00743     //FIXME, set to something high to start
00744     int minSuccValue = 9999999;
00745 
00746     //Iterate over all of the predecessors and fine max
00747     for(MSchedGraphNode::succ_iterator P = node->succ_begin(),
00748           E = node->succ_end(); P != E; ++P) {
00749 
00750       //Only process if we are not ignoring the edge
00751       if(!ignoreEdge(node, *P)) {
00752         processedOneEdge = true;
00753         int succALAP = -1;
00754         succALAP = calculateALAP(*P, MII, maxASAP, node);
00755 
00756         assert(succALAP != -1 && "Successors ALAP should have been caclulated");
00757 
00758         int iteDiff = P.getEdge().getIteDiff();
00759 
00760         int currentSuccValue = succALAP - node->getLatency() + iteDiff * MII;
00761 
00762         DEBUG(std::cerr << "succ ALAP: " << succALAP << ", iteDiff: " << iteDiff << ", SuccLatency: " << (*P)->getLatency() << ", Current ALAP succ: " << currentSuccValue << "\n");
00763 
00764         minSuccValue = std::min(minSuccValue, currentSuccValue);
00765       }
00766     }
00767 
00768     if(processedOneEdge)
00769         attributes.ALAP = minSuccValue;
00770 
00771     else
00772       attributes.ALAP = maxASAP;
00773   }
00774   else
00775     attributes.ALAP = maxASAP;
00776 
00777   DEBUG(std::cerr << "ALAP: " << attributes.ALAP << " (" << *node << ")\n");
00778 
00779   if(attributes.ALAP < 0)
00780     attributes.ALAP = 0;
00781 
00782   return attributes.ALAP;
00783 }
00784 
00785 int ModuloSchedulingPass::findMaxASAP() {
00786   int maxASAP = 0;
00787 
00788   for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(),
00789         E = nodeToAttributesMap.end(); I != E; ++I)
00790     maxASAP = std::max(maxASAP, I->second.ASAP);
00791   return maxASAP;
00792 }
00793 
00794 
00795 int ModuloSchedulingPass::calculateHeight(MSchedGraphNode *node,MSchedGraphNode *srcNode) {
00796 
00797   MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second;
00798 
00799   if(attributes.height != -1)
00800     return attributes.height;
00801 
00802   int maxHeight = 0;
00803 
00804   //Iterate over all of the predecessors and find max
00805   for(MSchedGraphNode::succ_iterator P = node->succ_begin(),
00806         E = node->succ_end(); P != E; ++P) {
00807 
00808 
00809     if(!ignoreEdge(node, *P)) {
00810       int succHeight = calculateHeight(*P, node);
00811 
00812       assert(succHeight != -1 && "Successors Height should have been caclulated");
00813 
00814       int currentHeight = succHeight + node->getLatency();
00815       maxHeight = std::max(maxHeight, currentHeight);
00816     }
00817   }
00818   attributes.height = maxHeight;
00819   DEBUG(std::cerr << "Height: " << attributes.height << " (" << *node << ")\n");
00820   return maxHeight;
00821 }
00822 
00823 
00824 int ModuloSchedulingPass::calculateDepth(MSchedGraphNode *node,
00825                                           MSchedGraphNode *destNode) {
00826 
00827   MSNodeAttributes &attributes = nodeToAttributesMap.find(node)->second;
00828 
00829   if(attributes.depth != -1)
00830     return attributes.depth;
00831 
00832   int maxDepth = 0;
00833 
00834   //Iterate over all of the predecessors and fine max
00835   for(MSchedGraphNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) {
00836 
00837     if(!ignoreEdge(*P, node)) {
00838       int predDepth = -1;
00839       predDepth = calculateDepth(*P, node);
00840 
00841       assert(predDepth != -1 && "Predecessors ASAP should have been caclulated");
00842 
00843       int currentDepth = predDepth + (*P)->getLatency();
00844       maxDepth = std::max(maxDepth, currentDepth);
00845     }
00846   }
00847   attributes.depth = maxDepth;
00848 
00849   DEBUG(std::cerr << "Depth: " << attributes.depth << " (" << *node << "*)\n");
00850   return maxDepth;
00851 }
00852 
00853 
00854 
00855 void ModuloSchedulingPass::addReccurrence(std::vector<MSchedGraphNode*> &recurrence, int II, MSchedGraphNode *srcBENode, MSchedGraphNode *destBENode) {
00856   //Check to make sure that this recurrence is unique
00857   bool same = false;
00858 
00859 
00860   //Loop over all recurrences already in our list
00861   for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator R = recurrenceList.begin(), RE = recurrenceList.end(); R != RE; ++R) {
00862 
00863     bool all_same = true;
00864      //First compare size
00865     if(R->second.size() == recurrence.size()) {
00866 
00867       for(std::vector<MSchedGraphNode*>::const_iterator node = R->second.begin(), end = R->second.end(); node != end; ++node) {
00868         if(std::find(recurrence.begin(), recurrence.end(), *node) == recurrence.end()) {
00869           all_same = all_same && false;
00870           break;
00871         }
00872         else
00873           all_same = all_same && true;
00874       }
00875       if(all_same) {
00876         same = true;
00877         break;
00878       }
00879     }
00880   }
00881 
00882   if(!same) {
00883     srcBENode = recurrence.back();
00884     destBENode = recurrence.front();
00885 
00886     //FIXME
00887     if(destBENode->getInEdge(srcBENode).getIteDiff() == 0) {
00888       //DEBUG(std::cerr << "NOT A BACKEDGE\n");
00889       //find actual backedge HACK HACK
00890       for(unsigned i=0; i< recurrence.size()-1; ++i) {
00891         if(recurrence[i+1]->getInEdge(recurrence[i]).getIteDiff() == 1) {
00892           srcBENode = recurrence[i];
00893           destBENode = recurrence[i+1];
00894           break;
00895         }
00896 
00897       }
00898 
00899     }
00900     DEBUG(std::cerr << "Back Edge to Remove: " << *srcBENode << " to " << *destBENode << "\n");
00901     edgesToIgnore.insert(std::make_pair(srcBENode, destBENode->getInEdgeNum(srcBENode)));
00902     recurrenceList.insert(std::make_pair(II, recurrence));
00903   }
00904 
00905 }
00906 
00907 int CircCount;
00908 
00909 void ModuloSchedulingPass::unblock(MSchedGraphNode *u, std::set<MSchedGraphNode*> &blocked,
00910              std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B) {
00911 
00912   //Unblock u
00913   DEBUG(std::cerr << "Unblocking: " << *u << "\n");
00914   blocked.erase(u);
00915 
00916   //std::set<MSchedGraphNode*> toErase;
00917   while (!B[u].empty()) {
00918     MSchedGraphNode *W = *B[u].begin();
00919     B[u].erase(W);
00920     //toErase.insert(*W);
00921     DEBUG(std::cerr << "Removed: " << *W << "from B-List\n");
00922     if(blocked.count(W))
00923       unblock(W, blocked, B);
00924   }
00925 
00926 }
00927 
00928 bool ModuloSchedulingPass::circuit(MSchedGraphNode *v, std::vector<MSchedGraphNode*> &stack,
00929              std::set<MSchedGraphNode*> &blocked, std::vector<MSchedGraphNode*> &SCC,
00930              MSchedGraphNode *s, std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B,
00931                                    int II, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) {
00932   bool f = false;
00933 
00934   DEBUG(std::cerr << "Finding Circuits Starting with: ( " << v << ")"<< *v << "\n");
00935 
00936   //Push node onto the stack
00937   stack.push_back(v);
00938 
00939   //block this node
00940   blocked.insert(v);
00941 
00942   //Loop over all successors of node v that are in the scc, create Adjaceny list
00943   std::set<MSchedGraphNode*> AkV;
00944   for(MSchedGraphNode::succ_iterator I = v->succ_begin(), E = v->succ_end(); I != E; ++I) {
00945     if((std::find(SCC.begin(), SCC.end(), *I) != SCC.end())) {
00946       AkV.insert(*I);
00947     }
00948   }
00949 
00950   for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) {
00951     if(*I == s) {
00952       //We have a circuit, so add it to our list
00953       addRecc(stack, newNodes);
00954       f = true;
00955     }
00956     else if(!blocked.count(*I)) {
00957       if(circuit(*I, stack, blocked, SCC, s, B, II, newNodes))
00958         f = true;
00959     }
00960     else
00961       DEBUG(std::cerr << "Blocked: " << **I << "\n");
00962   }
00963 
00964 
00965   if(f) {
00966     unblock(v, blocked, B);
00967   }
00968   else {
00969     for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I)
00970       B[*I].insert(v);
00971 
00972   }
00973 
00974   //Pop v
00975   stack.pop_back();
00976 
00977   return f;
00978 
00979 }
00980 
00981 void ModuloSchedulingPass::addRecc(std::vector<MSchedGraphNode*> &stack, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) {
00982   std::vector<MSchedGraphNode*> recc;
00983   //Dump recurrence for now
00984   DEBUG(std::cerr << "Starting Recc\n");
00985 
00986   int totalDelay = 0;
00987   int totalDistance = 0;
00988   MSchedGraphNode *lastN = 0;
00989   MSchedGraphNode *start = 0;
00990   MSchedGraphNode *end = 0;
00991 
00992   //Loop over recurrence, get delay and distance
00993   for(std::vector<MSchedGraphNode*>::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) {
00994     DEBUG(std::cerr << **N << "\n");
00995     totalDelay += (*N)->getLatency();
00996     if(lastN) {
00997       int iteDiff = (*N)->getInEdge(lastN).getIteDiff();
00998       totalDistance += iteDiff;
00999 
01000       if(iteDiff > 0) {
01001         start = lastN;
01002         end = *N;
01003       }
01004     }
01005     //Get the original node
01006     lastN = *N;
01007     recc.push_back(newNodes[*N]);
01008 
01009 
01010   }
01011 
01012   //Get the loop edge
01013   totalDistance += lastN->getIteDiff(*stack.begin());
01014 
01015   DEBUG(std::cerr << "End Recc\n");
01016   CircCount++;
01017 
01018   if(start && end) {
01019     //Insert reccurrence into the list
01020     DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n");
01021     edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start])));
01022   }
01023   else {
01024     //Insert reccurrence into the list
01025     DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n");
01026     edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN])));
01027 
01028   }
01029   //Adjust II until we get close to the inequality delay - II*distance <= 0
01030   int RecMII = II; //Starting value
01031   int value = totalDelay-(RecMII * totalDistance);
01032   int lastII = II;
01033   while(value < 0) {
01034 
01035     lastII = RecMII;
01036     RecMII--;
01037     value = totalDelay-(RecMII * totalDistance);
01038   }
01039 
01040   recurrenceList.insert(std::make_pair(lastII, recc));
01041 
01042 }
01043 
01044 void ModuloSchedulingPass::addSCC(std::vector<MSchedGraphNode*> &SCC, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) {
01045 
01046   int totalDelay = 0;
01047   int totalDistance = 0;
01048   std::vector<MSchedGraphNode*> recc;
01049   MSchedGraphNode *start = 0;
01050   MSchedGraphNode *end = 0;
01051 
01052   //Loop over recurrence, get delay and distance
01053   for(std::vector<MSchedGraphNode*>::iterator N = SCC.begin(), NE = SCC.end(); N != NE; ++N) {
01054     DEBUG(std::cerr << **N << "\n");
01055     totalDelay += (*N)->getLatency();
01056 
01057     for(unsigned i = 0; i < (*N)->succ_size(); ++i) {
01058       MSchedGraphEdge *edge = (*N)->getSuccessor(i);
01059       if(find(SCC.begin(), SCC.end(), edge->getDest()) != SCC.end()) {
01060         totalDistance += edge->getIteDiff();
01061         if(edge->getIteDiff() > 0)
01062           if(!start && !end) {
01063             start = *N;
01064             end = edge->getDest();
01065           }
01066 
01067       }
01068     }
01069 
01070 
01071     //Get the original node
01072     recc.push_back(newNodes[*N]);
01073 
01074 
01075   }
01076 
01077   DEBUG(std::cerr << "End Recc\n");
01078   CircCount++;
01079 
01080   assert( (start && end) && "Must have start and end node to ignore edge for SCC");
01081 
01082   if(start && end) {
01083     //Insert reccurrence into the list
01084     DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n");
01085     edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start])));
01086   }
01087 
01088   int lastII = totalDelay / totalDistance;
01089 
01090 
01091   recurrenceList.insert(std::make_pair(lastII, recc));
01092 
01093 }
01094 
01095 void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) {
01096 
01097   CircCount = 0;
01098 
01099   //Keep old to new node mapping information
01100   std::map<MSchedGraphNode*, MSchedGraphNode*> newNodes;
01101 
01102   //copy the graph
01103   MSchedGraph *MSG = new MSchedGraph(*g, newNodes);
01104 
01105   DEBUG(std::cerr << "Finding All Circuits\n");
01106 
01107   //Set of blocked nodes
01108   std::set<MSchedGraphNode*> blocked;
01109 
01110   //Stack holding current circuit
01111   std::vector<MSchedGraphNode*> stack;
01112 
01113   //Map for B Lists
01114   std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > B;
01115 
01116   //current node
01117   MSchedGraphNode *s;
01118 
01119 
01120   //Iterate over the graph until its down to one node or empty
01121   while(MSG->size() > 1) {
01122 
01123     //Write Graph out to file
01124     //WriteGraphToFile(std::cerr, "Graph" + utostr(MSG->size()), MSG);
01125 
01126     DEBUG(std::cerr << "Graph Size: " << MSG->size() << "\n");
01127     DEBUG(std::cerr << "Finding strong component Vk with least vertex\n");
01128 
01129     //Iterate over all the SCCs in the graph
01130     std::set<MSchedGraphNode*> Visited;
01131     std::vector<MSchedGraphNode*> Vk;
01132     MSchedGraphNode* s = 0;
01133     int numEdges = 0;
01134 
01135     //Find scc with the least vertex
01136     for (MSchedGraph::iterator GI = MSG->begin(), E = MSG->end(); GI != E; ++GI)
01137       if (Visited.insert(GI->second).second) {
01138         for (scc_iterator<MSchedGraphNode*> SCCI = scc_begin(GI->second),
01139                E = scc_end(GI->second); SCCI != E; ++SCCI) {
01140           std::vector<MSchedGraphNode*> &nextSCC = *SCCI;
01141 
01142           if (Visited.insert(nextSCC[0]).second) {
01143             Visited.insert(nextSCC.begin()+1, nextSCC.end());
01144 
01145             if(nextSCC.size() > 1) {
01146               std::cerr << "SCC size: " << nextSCC.size() << "\n";
01147 
01148               for(unsigned i = 0; i < nextSCC.size(); ++i) {
01149                 //Loop over successor and see if in scc, then count edge
01150                 MSchedGraphNode *node = nextSCC[i];
01151                 for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE; ++S) {
01152                   if(find(nextSCC.begin(), nextSCC.end(), *S) != nextSCC.end())
01153                     numEdges++;
01154                 }
01155               }
01156               std::cerr << "Num Edges: " << numEdges << "\n";
01157             }
01158 
01159             //Ignore self loops
01160             if(nextSCC.size() > 1) {
01161 
01162               //Get least vertex in Vk
01163               if(!s) {
01164                 s = nextSCC[0];
01165                 Vk = nextSCC;
01166               }
01167 
01168               for(unsigned i = 0; i < nextSCC.size(); ++i) {
01169                 if(nextSCC[i] < s) {
01170                   s = nextSCC[i];
01171                   Vk = nextSCC;
01172                 }
01173               }
01174             }
01175           }
01176         }
01177       }
01178 
01179 
01180 
01181     //Process SCC
01182     DEBUG(for(std::vector<MSchedGraphNode*>::iterator N = Vk.begin(), NE = Vk.end();
01183               N != NE; ++N) { std::cerr << *((*N)->getInst()); });
01184 
01185     //Iterate over all nodes in this scc
01186     for(std::vector<MSchedGraphNode*>::iterator N = Vk.begin(), NE = Vk.end();
01187         N != NE; ++N) {
01188       blocked.erase(*N);
01189       B[*N].clear();
01190     }
01191     if(Vk.size() > 1) {
01192       if(numEdges < 98)
01193         circuit(s, stack, blocked, Vk, s, B, II, newNodes);
01194       else
01195         addSCC(Vk, newNodes);
01196 
01197       //Delete nodes from the graph
01198       //Find all nodes up to s and delete them
01199       std::vector<MSchedGraphNode*> nodesToRemove;
01200       nodesToRemove.push_back(s);
01201       for(MSchedGraph::iterator N = MSG->begin(), NE = MSG->end(); N != NE; ++N) {
01202         if(N->second < s )
01203             nodesToRemove.push_back(N->second);
01204       }
01205       for(std::vector<MSchedGraphNode*>::iterator N = nodesToRemove.begin(), NE = nodesToRemove.end(); N != NE; ++N) {
01206         DEBUG(std::cerr << "Deleting Node: " << **N << "\n");
01207         MSG->deleteNode(*N);
01208       }
01209     }
01210     else
01211       break;
01212   }
01213   DEBUG(std::cerr << "Num Circuits found: " << CircCount << "\n");
01214 }
01215 
01216 
01217 void ModuloSchedulingPass::findAllReccurrences(MSchedGraphNode *node,
01218                                                std::vector<MSchedGraphNode*> &visitedNodes,
01219                                                int II) {
01220 
01221 
01222   if(std::find(visitedNodes.begin(), visitedNodes.end(), node) != visitedNodes.end()) {
01223     std::vector<MSchedGraphNode*> recurrence;
01224     bool first = true;
01225     int delay = 0;
01226     int distance = 0;
01227     int RecMII = II; //Starting value
01228     MSchedGraphNode *last = node;
01229     MSchedGraphNode *srcBackEdge = 0;
01230     MSchedGraphNode *destBackEdge = 0;
01231 
01232 
01233 
01234     for(std::vector<MSchedGraphNode*>::iterator I = visitedNodes.begin(), E = visitedNodes.end();
01235         I !=E; ++I) {
01236 
01237       if(*I == node)
01238         first = false;
01239       if(first)
01240         continue;
01241 
01242       delay = delay + (*I)->getLatency();
01243 
01244       if(*I != node) {
01245         int diff = (*I)->getInEdge(last).getIteDiff();
01246         distance += diff;
01247         if(diff > 0) {
01248           srcBackEdge = last;
01249           destBackEdge = *I;
01250         }
01251       }
01252 
01253       recurrence.push_back(*I);
01254       last = *I;
01255     }
01256 
01257 
01258 
01259     //Get final distance calc
01260     distance += node->getInEdge(last).getIteDiff();
01261     DEBUG(std::cerr << "Reccurrence Distance: " << distance << "\n");
01262 
01263     //Adjust II until we get close to the inequality delay - II*distance <= 0
01264 
01265     int value = delay-(RecMII * distance);
01266     int lastII = II;
01267     while(value <= 0) {
01268 
01269       lastII = RecMII;
01270       RecMII--;
01271       value = delay-(RecMII * distance);
01272     }
01273 
01274 
01275     DEBUG(std::cerr << "Final II for this recurrence: " << lastII << "\n");
01276     addReccurrence(recurrence, lastII, srcBackEdge, destBackEdge);
01277     assert(distance != 0 && "Recurrence distance should not be zero");
01278     return;
01279   }
01280 
01281   unsigned count = 0;
01282   for(MSchedGraphNode::succ_iterator I = node->succ_begin(), E = node->succ_end(); I != E; ++I) {
01283     visitedNodes.push_back(node);
01284     //if(!edgesToIgnore.count(std::make_pair(node, count)))
01285     findAllReccurrences(*I, visitedNodes, II);
01286     visitedNodes.pop_back();
01287     count++;
01288   }
01289 }
01290 
01291 void ModuloSchedulingPass::searchPath(MSchedGraphNode *node,
01292                                       std::vector<MSchedGraphNode*> &path,
01293                                       std::set<MSchedGraphNode*> &nodesToAdd,
01294                                      std::set<MSchedGraphNode*> &new_reccurrence) {
01295   //Push node onto the path
01296   path.push_back(node);
01297 
01298   //Loop over all successors and see if there is a path from this node to
01299   //a recurrence in the partial order, if so.. add all nodes to be added to recc
01300   for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE;
01301       ++S) {
01302 
01303     //Check if we should ignore this edge first
01304     if(ignoreEdge(node,*S))
01305       continue;
01306 
01307     //check if successor is in this recurrence, we will get to it eventually
01308     if(new_reccurrence.count(*S))
01309       continue;
01310 
01311     //If this node exists in a recurrence already in the partial
01312     //order, then add all nodes in the path to the set of nodes to add
01313      //Check if its already in our partial order, if not add it to the
01314      //final vector
01315     bool found = false;
01316     for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
01317           PE = partialOrder.end(); PO != PE; ++PO) {
01318 
01319       if(PO->count(*S)) {
01320         found = true;
01321         break;
01322       }
01323     }
01324 
01325     if(!found) {
01326       nodesToAdd.insert(*S);
01327       searchPath(*S, path, nodesToAdd, new_reccurrence);
01328     }
01329   }
01330 
01331   //Pop Node off the path
01332   path.pop_back();
01333 }
01334 
01335 void ModuloSchedulingPass::pathToRecc(MSchedGraphNode *node,
01336                                       std::vector<MSchedGraphNode*> &path,
01337                                       std::set<MSchedGraphNode*> &poSet,
01338                                       std::set<MSchedGraphNode*> &lastNodes) {
01339   //Push node onto the path
01340   path.push_back(node);
01341 
01342   DEBUG(std::cerr << "Current node: " << *node << "\n");
01343 
01344   //Loop over all successors and see if there is a path from this node to
01345   //a recurrence in the partial order, if so.. add all nodes to be added to recc
01346   for(MSchedGraphNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE;
01347       ++S) {
01348     DEBUG(std::cerr << "Succ:" << **S << "\n");
01349     //Check if we should ignore this edge first
01350     if(ignoreEdge(node,*S))
01351       continue;
01352 
01353     if(poSet.count(*S)) {
01354       DEBUG(std::cerr << "Found path to recc from no pred\n");
01355       //Loop over path, if it exists in lastNodes, then add to poset, and remove from lastNodes
01356       for(std::vector<MSchedGraphNode*>::iterator I = path.begin(), IE = path.end(); I != IE; ++I) {
01357         if(lastNodes.count(*I)) {
01358           DEBUG(std::cerr << "Inserting node into recc: " << **I << "\n");
01359           poSet.insert(*I);
01360           lastNodes.erase(*I);
01361         }
01362       }
01363     }
01364     else
01365       pathToRecc(*S, path, poSet, lastNodes);
01366   }
01367 
01368   //Pop Node off the path
01369   path.pop_back();
01370 }
01371 
01372 void ModuloSchedulingPass::computePartialOrder() {
01373 
01374   TIME_REGION(X, "calculatePartialOrder");
01375 
01376   DEBUG(std::cerr << "Computing Partial Order\n");
01377 
01378   //Only push BA branches onto the final node order, we put other
01379   //branches after it FIXME: Should we really be pushing branches on
01380   //it a specific order instead of relying on BA being there?
01381 
01382   std::vector<MSchedGraphNode*> branches;
01383 
01384   //Steps to add a recurrence to the partial order 1) Find reccurrence
01385   //with the highest RecMII. Add it to the partial order.  2) For each
01386   //recurrence with decreasing RecMII, add it to the partial order
01387   //along with any nodes that connect this recurrence to recurrences
01388   //already in the partial order
01389   for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::reverse_iterator
01390         I = recurrenceList.rbegin(), E=recurrenceList.rend(); I !=E; ++I) {
01391 
01392     std::set<MSchedGraphNode*> new_recurrence;
01393 
01394     //Loop through recurrence and remove any nodes already in the partial order
01395     for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(),
01396           NE = I->second.end(); N != NE; ++N) {
01397 
01398       bool found = false;
01399       for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
01400             PE = partialOrder.end(); PO != PE; ++PO) {
01401         if(PO->count(*N))
01402           found = true;
01403       }
01404 
01405       //Check if its a branch, and remove to handle special
01406       if(!found) {
01407         if((*N)->isBranch() && !(*N)->hasPredecessors()) {
01408           branches.push_back(*N);
01409         }
01410         else
01411           new_recurrence.insert(*N);
01412       }
01413 
01414     }
01415 
01416 
01417     if(new_recurrence.size() > 0) {
01418 
01419       std::vector<MSchedGraphNode*> path;
01420       std::set<MSchedGraphNode*> nodesToAdd;
01421 
01422       //Dump recc we are dealing with (minus nodes already in PO)
01423       DEBUG(std::cerr << "Recc: ");
01424       DEBUG(for(std::set<MSchedGraphNode*>::iterator R = new_recurrence.begin(), RE = new_recurrence.end(); R != RE; ++R) { std::cerr << **R ; });
01425 
01426       //Add nodes that connect this recurrence to recurrences in the partial path
01427       for(std::set<MSchedGraphNode*>::iterator N = new_recurrence.begin(),
01428           NE = new_recurrence.end(); N != NE; ++N)
01429         searchPath(*N, path, nodesToAdd, new_recurrence);
01430 
01431       //Add nodes to this recurrence if they are not already in the partial order
01432       for(std::set<MSchedGraphNode*>::iterator N = nodesToAdd.begin(), NE = nodesToAdd.end();
01433           N != NE; ++N) {
01434         bool found = false;
01435         for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
01436               PE = partialOrder.end(); PO != PE; ++PO) {
01437           if(PO->count(*N))
01438             found = true;
01439         }
01440         if(!found) {
01441           assert("FOUND CONNECTOR");
01442           new_recurrence.insert(*N);
01443         }
01444       }
01445 
01446       partialOrder.push_back(new_recurrence);
01447 
01448 
01449       //Dump out partial order
01450       DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(),
01451                   E = partialOrder.end(); I !=E; ++I) {
01452               std::cerr << "Start set in PO\n";
01453               for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J)
01454                 std::cerr << "PO:" << **J << "\n";
01455             });
01456 
01457     }
01458   }
01459 
01460   //Add any nodes that are not already in the partial order
01461   //Add them in a set, one set per connected component
01462   std::set<MSchedGraphNode*> lastNodes;
01463   std::set<MSchedGraphNode*> noPredNodes;
01464   for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I = nodeToAttributesMap.begin(),
01465         E = nodeToAttributesMap.end(); I != E; ++I) {
01466 
01467     bool found = false;
01468 
01469     //Check if its already in our partial order, if not add it to the final vector
01470     for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
01471           PE = partialOrder.end(); PO != PE; ++PO) {
01472       if(PO->count(I->first))
01473         found = true;
01474     }
01475     if(!found)
01476       lastNodes.insert(I->first);
01477   }
01478 
01479   //For each node w/out preds, see if there is a path to one of the
01480   //recurrences, and if so add them to that current recc
01481   /*for(std::set<MSchedGraphNode*>::iterator N = noPredNodes.begin(), NE = noPredNodes.end();
01482       N != NE; ++N) {
01483     DEBUG(std::cerr << "No Pred Path from: " << **N << "\n");
01484     for(std::vector<std::set<MSchedGraphNode*> >::iterator PO = partialOrder.begin(),
01485           PE = partialOrder.end(); PO != PE; ++PO) {
01486       std::vector<MSchedGraphNode*> path;
01487       pathToRecc(*N, path, *PO, lastNodes);
01488     }
01489     }*/
01490 
01491 
01492   //Break up remaining nodes that are not in the partial order
01493   ///into their connected compoenents
01494     while(lastNodes.size() > 0) {
01495       std::set<MSchedGraphNode*> ccSet;
01496       connectedComponentSet(*(lastNodes.begin()),ccSet, lastNodes);
01497       if(ccSet.size() > 0)
01498         partialOrder.push_back(ccSet);
01499     }
01500 
01501 
01502   //Clean up branches by putting them in final order
01503     assert(branches.size() == 0 && "We should not have any branches in our graph");
01504 }
01505 
01506 
01507 void ModuloSchedulingPass::connectedComponentSet(MSchedGraphNode *node, std::set<MSchedGraphNode*> &ccSet, std::set<MSchedGraphNode*> &lastNodes) {
01508 
01509 //Add to final set
01510   if( !ccSet.count(node) && lastNodes.count(node)) {
01511     lastNodes.erase(node);
01512     ccSet.insert(node);
01513   }
01514   else
01515     return;
01516 
01517   //Loop over successors and recurse if we have not seen this node before
01518   for(MSchedGraphNode::succ_iterator node_succ = node->succ_begin(), end=node->succ_end(); node_succ != end; ++node_succ) {
01519     connectedComponentSet(*node_succ, ccSet, lastNodes);
01520   }
01521 
01522 }
01523 
01524 void ModuloSchedulingPass::predIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult) {
01525 
01526   for(unsigned j=0; j < FinalNodeOrder.size(); ++j) {
01527     for(MSchedGraphNode::pred_iterator P = FinalNodeOrder[j]->pred_begin(),
01528           E = FinalNodeOrder[j]->pred_end(); P != E; ++P) {
01529 
01530       //Check if we are supposed to ignore this edge or not
01531       if(ignoreEdge(*P,FinalNodeOrder[j]))
01532         continue;
01533 
01534       if(CurrentSet.count(*P))
01535         if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end())
01536           IntersectResult.insert(*P);
01537     }
01538   }
01539 }
01540 
01541 
01542 
01543 
01544 
01545 void ModuloSchedulingPass::succIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult) {
01546 
01547   for(unsigned j=0; j < FinalNodeOrder.size(); ++j) {
01548     for(MSchedGraphNode::succ_iterator P = FinalNodeOrder[j]->succ_begin(),
01549           E = FinalNodeOrder[j]->succ_end(); P != E; ++P) {
01550 
01551       //Check if we are supposed to ignore this edge or not
01552       if(ignoreEdge(FinalNodeOrder[j],*P))
01553         continue;
01554 
01555       if(CurrentSet.count(*P))
01556         if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end())
01557           IntersectResult.insert(*P);
01558     }
01559   }
01560 }
01561 
01562 void dumpIntersection(std::set<MSchedGraphNode*> &IntersectCurrent) {
01563   std::cerr << "Intersection (";
01564   for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(), E = IntersectCurrent.end(); I != E; ++I)
01565     std::cerr << **I << ", ";
01566   std::cerr << ")\n";
01567 }
01568 
01569 
01570 
01571 void ModuloSchedulingPass::orderNodes() {
01572 
01573   TIME_REGION(X, "orderNodes");
01574 
01575   int BOTTOM_UP = 0;
01576   int TOP_DOWN = 1;
01577 
01578   //Set default order
01579   int order = BOTTOM_UP;
01580 
01581 
01582   //Loop over all the sets and place them in the final node order
01583   for(std::vector<std::set<MSchedGraphNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) {
01584 
01585     DEBUG(std::cerr << "Processing set in S\n");
01586     DEBUG(dumpIntersection(*CurrentSet));
01587 
01588     //Result of intersection
01589     std::set<MSchedGraphNode*> IntersectCurrent;
01590 
01591     predIntersect(*CurrentSet, IntersectCurrent);
01592 
01593     //If the intersection of predecessor and current set is not empty
01594     //sort nodes bottom up
01595     if(IntersectCurrent.size() != 0) {
01596       DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is NOT empty\n");
01597       order = BOTTOM_UP;
01598     }
01599     //If empty, use successors
01600     else {
01601       DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is empty\n");
01602 
01603       succIntersect(*CurrentSet, IntersectCurrent);
01604 
01605       //sort top-down
01606       if(IntersectCurrent.size() != 0) {
01607          DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is NOT empty\n");
01608         order = TOP_DOWN;
01609       }
01610       else {
01611         DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is empty\n");
01612         //Find node with max ASAP in current Set
01613         MSchedGraphNode *node;
01614         int maxASAP = 0;
01615         DEBUG(std::cerr << "Using current set of size " << CurrentSet->size() << "to find max ASAP\n");
01616         for(std::set<MSchedGraphNode*>::iterator J = CurrentSet->begin(), JE = CurrentSet->end(); J != JE; ++J) {
01617           //Get node attributes
01618           MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*J)->second;
01619           //assert(nodeAttr != nodeToAttributesMap.end() && "Node not in attributes map!");
01620 
01621           if(maxASAP <= nodeAttr.ASAP) {
01622             maxASAP = nodeAttr.ASAP;
01623             node = *J;
01624           }
01625         }
01626         assert(node != 0 && "In node ordering node should not be null");
01627         IntersectCurrent.insert(node);
01628         order = BOTTOM_UP;
01629       }
01630     }
01631 
01632     //Repeat until all nodes are put into the final order from current set
01633     while(IntersectCurrent.size() > 0) {
01634 
01635       if(order == TOP_DOWN) {
01636         DEBUG(std::cerr << "Order is TOP DOWN\n");
01637 
01638         while(IntersectCurrent.size() > 0) {
01639           DEBUG(std::cerr << "Intersection is not empty, so find heighest height\n");
01640 
01641           int MOB = 0;
01642           int height = 0;
01643           MSchedGraphNode *highestHeightNode = *(IntersectCurrent.begin());
01644 
01645           //Find node in intersection with highest heigh and lowest MOB
01646           for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(),
01647                 E = IntersectCurrent.end(); I != E; ++I) {
01648 
01649             //Get current nodes properties
01650             MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*I)->second;
01651 
01652             if(height < nodeAttr.height) {
01653               highestHeightNode = *I;
01654               height = nodeAttr.height;
01655               MOB = nodeAttr.MOB;
01656             }
01657             else if(height ==  nodeAttr.height) {
01658               if(MOB > nodeAttr.height) {
01659                 highestHeightNode = *I;
01660                 height =  nodeAttr.height;
01661                 MOB = nodeAttr.MOB;
01662               }
01663             }
01664           }
01665 
01666           //Append our node with greatest height to the NodeOrder
01667           if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestHeightNode) == FinalNodeOrder.end()) {
01668             DEBUG(std::cerr << "Adding node to Final Order: " << *highestHeightNode << "\n");
01669             FinalNodeOrder.push_back(highestHeightNode);
01670           }
01671 
01672           //Remove V from IntersectOrder
01673           IntersectCurrent.erase(std::find(IntersectCurrent.begin(),
01674                                       IntersectCurrent.end(), highestHeightNode));
01675 
01676 
01677           //Intersect V's successors with CurrentSet
01678           for(MSchedGraphNode::succ_iterator P = highestHeightNode->succ_begin(),
01679                 E = highestHeightNode->succ_end(); P != E; ++P) {
01680             //if(lower_bound(CurrentSet->begin(),
01681             //     CurrentSet->end(), *P) != CurrentSet->end()) {
01682             if(std::find(CurrentSet->begin(), CurrentSet->end(), *P) != CurrentSet->end()) {
01683               if(ignoreEdge(highestHeightNode, *P))
01684                 continue;
01685               //If not already in Intersect, add
01686               if(!IntersectCurrent.count(*P))
01687                 IntersectCurrent.insert(*P);
01688             }
01689           }
01690         } //End while loop over Intersect Size
01691 
01692         //Change direction
01693         order = BOTTOM_UP;
01694 
01695         //Reset Intersect to reflect changes in OrderNodes
01696         IntersectCurrent.clear();
01697         predIntersect(*CurrentSet, IntersectCurrent);
01698 
01699       } //End If TOP_DOWN
01700 
01701         //Begin if BOTTOM_UP
01702       else {
01703         DEBUG(std::cerr << "Order is BOTTOM UP\n");
01704         while(IntersectCurrent.size() > 0) {
01705           DEBUG(std::cerr << "Intersection of size " << IntersectCurrent.size() << ", finding highest depth\n");
01706 
01707           //dump intersection
01708           DEBUG(dumpIntersection(IntersectCurrent));
01709           //Get node with highest depth, if a tie, use one with lowest
01710           //MOB
01711           int MOB = 0;
01712           int depth = 0;
01713           MSchedGraphNode *highestDepthNode = *(IntersectCurrent.begin());
01714 
01715           for(std::set<MSchedGraphNode*>::iterator I = IntersectCurrent.begin(),
01716                 E = IntersectCurrent.end(); I != E; ++I) {
01717             //Find node attribute in graph
01718             MSNodeAttributes nodeAttr= nodeToAttributesMap.find(*I)->second;
01719 
01720             if(depth < nodeAttr.depth) {
01721               highestDepthNode = *I;
01722               depth = nodeAttr.depth;
01723               MOB = nodeAttr.MOB;
01724             }
01725             else if(depth == nodeAttr.depth) {
01726               if(MOB > nodeAttr.MOB) {
01727                 highestDepthNode = *I;
01728                 depth = nodeAttr.depth;
01729                 MOB = nodeAttr.MOB;
01730               }
01731             }
01732           }
01733 
01734 
01735 
01736           //Append highest depth node to the NodeOrder
01737            if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestDepthNode) == FinalNodeOrder.end()) {
01738              DEBUG(std::cerr << "Adding node to Final Order: " << *highestDepthNode << "\n");
01739              FinalNodeOrder.push_back(highestDepthNode);
01740            }
01741           //Remove heightestDepthNode from IntersectOrder
01742            IntersectCurrent.erase(highestDepthNode);
01743 
01744 
01745           //Intersect heightDepthNode's pred with CurrentSet
01746           for(MSchedGraphNode::pred_iterator P = highestDepthNode->pred_begin(),
01747                 E = highestDepthNode->pred_end(); P != E; ++P) {
01748             if(CurrentSet->count(*P)) {
01749               if(ignoreEdge(*P, highestDepthNode))
01750                 continue;
01751 
01752             //If not already in Intersect, add
01753             if(!IntersectCurrent.count(*P))
01754               IntersectCurrent.insert(*P);
01755             }
01756           }
01757 
01758         } //End while loop over Intersect Size
01759 
01760           //Change order
01761         order = TOP_DOWN;
01762 
01763         //Reset IntersectCurrent to reflect changes in OrderNodes
01764         IntersectCurrent.clear();
01765         succIntersect(*CurrentSet, IntersectCurrent);
01766         } //End if BOTTOM_DOWN
01767 
01768       DEBUG(std::cerr << "Current Intersection Size: " << IntersectCurrent.size() << "\n");
01769     }
01770     //End Wrapping while loop
01771     DEBUG(std::cerr << "Ending Size of Current Set: " << CurrentSet->size() << "\n");
01772   }//End for over all sets of nodes
01773 
01774   //FIXME: As the algorithm stands it will NEVER add an instruction such as ba (with no
01775   //data dependencies) to the final order. We add this manually. It will always be
01776   //in the last set of S since its not part of a recurrence
01777     //Loop over all the sets and place them in the final node order
01778   std::vector<std::set<MSchedGraphNode*> > ::reverse_iterator LastSet = partialOrder.rbegin();
01779   for(std::set<MSchedGraphNode*>::iterator CurrentNode = LastSet->begin(), LastNode = LastSet->end();
01780       CurrentNode != LastNode; ++CurrentNode) {
01781     if((*CurrentNode)->getInst()->getOpcode() == V9::BA)
01782       FinalNodeOrder.push_back(*CurrentNode);
01783   }
01784   //Return final Order
01785   //return FinalNodeOrder;
01786 }
01787 
01788 bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB, MSchedGraph *MSG) {
01789 
01790   TIME_REGION(X, "computeSchedule");
01791 
01792   bool success = false;
01793 
01794   //FIXME: Should be set to max II of the original loop
01795   //Cap II in order to prevent infinite loop
01796   int capII = MSG->totalDelay();
01797 
01798   while(!success) {
01799 
01800     //Keep track of branches, but do not insert into the schedule
01801     std::vector<MSchedGraphNode*> branches;
01802 
01803     //Loop over the final node order and process each node
01804     for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(),
01805           E = FinalNodeOrder.end(); I != E; ++I) {
01806 
01807       //CalculateEarly and Late start
01808       bool initialLSVal = false;
01809       bool initialESVal = false;
01810       int EarlyStart = 0;
01811       int LateStart = 0;
01812       bool hasSucc = false;
01813       bool hasPred = false;
01814       bool sched;
01815 
01816       if((*I)->isBranch())
01817         if((*I)->hasPredecessors())
01818           sched = true;
01819         else
01820           sched = false;
01821       else
01822         sched = true;
01823 
01824       if(sched) {
01825         //Loop over nodes in the schedule and determine if they are predecessors
01826         //or successors of the node we are trying to schedule
01827         for(MSSchedule::schedule_iterator nodesByCycle = schedule.begin(), nodesByCycleEnd = schedule.end();
01828             nodesByCycle != nodesByCycleEnd; ++nodesByCycle) {
01829 
01830           //For this cycle, get the vector of nodes schedule and loop over it
01831           for(std::vector<MSchedGraphNode*>::iterator schedNode = nodesByCycle->second.begin(), SNE = nodesByCycle->second.end(); schedNode != SNE; ++schedNode) {
01832 
01833             if((*I)->isPredecessor(*schedNode)) {
01834               int diff = (*I)->getInEdge(*schedNode).getIteDiff();
01835               int ES_Temp = nodesByCycle->first + (*schedNode)->getLatency() - diff * II;
01836               DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n");
01837               DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n");
01838               if(initialESVal)
01839                 EarlyStart = std::max(EarlyStart, ES_Temp);
01840               else {
01841                 EarlyStart = ES_Temp;
01842                 initialESVal = true;
01843               }
01844               hasPred = true;
01845             }
01846             if((*I)->isSuccessor(*schedNode)) {
01847               int diff = (*schedNode)->getInEdge(*I).getIteDiff();
01848               int LS_Temp = nodesByCycle->first - (*I)->getLatency() + diff * II;
01849               DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n");
01850               DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n");
01851               if(initialLSVal)
01852                 LateStart = std::min(LateStart, LS_Temp);
01853               else {
01854                 LateStart = LS_Temp;
01855                 initialLSVal = true;
01856               }
01857               hasSucc = true;
01858             }
01859           }
01860         }
01861       }
01862       else {
01863         branches.push_back(*I);
01864         continue;
01865       }
01866 
01867       //Check if this node is a pred or succ to a branch, and restrict its placement
01868       //even though the branch is not in the schedule
01869       /*int count = branches.size();
01870       for(std::vector<MSchedGraphNode*>::iterator B = branches.begin(), BE = branches.end();
01871           B != BE; ++B) {
01872         if((*I)->isPredecessor(*B)) {
01873           int diff = (*I)->getInEdge(*B).getIteDiff();
01874           int ES_Temp = (II+count-1) + (*B)->getLatency() - diff * II;
01875           DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count)-1 << "\n");
01876           DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n");
01877           EarlyStart = std::max(EarlyStart, ES_Temp);
01878           hasPred = true;
01879         }
01880 
01881         if((*I)->isSuccessor(*B)) {
01882           int diff = (*B)->getInEdge(*I).getIteDiff();
01883           int LS_Temp = (II+count-1) - (*I)->getLatency() + diff * II;
01884           DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << (II+count-1) << "\n");
01885           DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n");
01886           LateStart = std::min(LateStart, LS_Temp);
01887           hasSucc = true;
01888         }
01889 
01890         count--;
01891       }*/
01892 
01893       //Check if the node has no pred or successors and set Early Start to its ASAP
01894       if(!hasSucc && !hasPred)
01895         EarlyStart = nodeToAttributesMap.find(*I)->second.ASAP;
01896 
01897       DEBUG(std::cerr << "Has Successors: " << hasSucc << ", Has Pred: " << hasPred << "\n");
01898       DEBUG(std::cerr << "EarlyStart: " << EarlyStart << ", LateStart: " << LateStart << "\n");
01899 
01900       //Now, try to schedule this node depending upon its pred and successor in the schedule
01901       //already
01902       if(!hasSucc && hasPred)
01903         success = scheduleNode(*I, EarlyStart, (EarlyStart + II -1));
01904       else if(!hasPred && hasSucc)
01905         success = scheduleNode(*I, LateStart, (LateStart - II +1));
01906       else if(hasPred && hasSucc) {
01907         if(EarlyStart > LateStart) {
01908         success = false;
01909           //LateStart = EarlyStart;
01910           DEBUG(std::cerr << "Early Start can not be later then the late start cycle, schedule fails\n");
01911         }
01912         else
01913           success = scheduleNode(*I, EarlyStart, std::min(LateStart, (EarlyStart + II -1)));
01914       }
01915       else
01916         success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1);
01917 
01918       if(!success) {
01919         ++II;
01920         schedule.clear();
01921         break;
01922       }
01923 
01924     }
01925 
01926     if(success) {
01927       DEBUG(std::cerr << "Constructing Schedule Kernel\n");
01928       success = schedule.constructKernel(II, branches, indVarInstrs[BB]);
01929       DEBUG(std::cerr << "Done Constructing Schedule Kernel\n");
01930       if(!success) {
01931         ++II;
01932         schedule.clear();
01933       }
01934       DEBUG(std::cerr << "Final II: " << II << "\n");
01935     }
01936 
01937 
01938     if(II >= capII) {
01939       DEBUG(std::cerr << "Maximum II reached, giving up\n");
01940       return false;
01941     }
01942 
01943     assert(II < capII && "The II should not exceed the original loop number of cycles");
01944   }
01945   return true;
01946 }
01947 
01948 
01949 bool ModuloSchedulingPass::scheduleNode(MSchedGraphNode *node,
01950                                       int start, int end) {
01951   bool success = false;
01952 
01953   DEBUG(std::cerr << *node << " (Start Cycle: " << start << ", End Cycle: " << end << ")\n");
01954 
01955   //Make sure start and end are not negative
01956   //if(start < 0) {
01957   //start = 0;
01958 
01959   //}
01960   //if(end < 0)
01961   //end = 0;
01962 
01963   bool forward = true;
01964   if(start > end)
01965     forward = false;
01966 
01967   bool increaseSC = true;
01968   int cycle = start ;
01969 
01970 
01971   while(increaseSC) {
01972 
01973     increaseSC = false;
01974 
01975     increaseSC = schedule.insert(node, cycle, II);
01976 
01977     if(!increaseSC)
01978       return true;
01979 
01980     //Increment cycle to try again
01981     if(forward) {
01982       ++cycle;
01983       DEBUG(std::cerr << "Increase cycle: " << cycle << "\n");
01984       if(cycle > end)
01985         return false;
01986     }
01987     else {
01988       --cycle;
01989       DEBUG(std::cerr << "Decrease cycle: " << cycle << "\n");
01990       if(cycle < end)
01991         return false;
01992     }
01993   }
01994 
01995   return success;
01996 }
01997 
01998 void ModuloSchedulingPass::writePrologues(std::vector<MachineBasicBlock *> &prologues, MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_prologues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation) {
01999 
02000   //Keep a map to easily know whats in the kernel
02001   std::map<int, std::set<const MachineInstr*> > inKernel;
02002   int maxStageCount = 0;
02003 
02004   //Keep a map of new values we consumed in case they need to be added back
02005   std::map<Value*, std::map<int, Value*> > consumedValues;
02006 
02007   MSchedGraphNode *branch = 0;
02008   MSchedGraphNode *BAbranch = 0;
02009 
02010   DEBUG(schedule.print(std::cerr));
02011 
02012   std::vector<MSchedGraphNode*> branches;
02013 
02014   for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
02015     maxStageCount = std::max(maxStageCount, I->second);
02016 
02017     //Put int the map so we know what instructions in each stage are in the kernel
02018     DEBUG(std::cerr << "Inserting instruction " << *(I->first) << " into map at stage " << I->second << "\n");
02019     inKernel[I->second].insert(I->first);
02020   }
02021 
02022   //Get target information to look at machine operands
02023   const TargetInstrInfo *mii = target.getInstrInfo();
02024 
02025  //Now write the prologues
02026   for(int i = 0; i < maxStageCount; ++i) {
02027     BasicBlock *llvmBB = new BasicBlock("PROLOGUE", (Function*) (origBB->getBasicBlock()->getParent()));
02028     MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB);
02029 
02030     DEBUG(std::cerr << "i=" << i << "\n");
02031     for(int j = i; j >= 0; --j) {
02032       for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) {
02033         if(inKernel[j].count(&*MI)) {
02034           MachineInstr *instClone = MI->clone();
02035           machineBB->push_back(instClone);
02036 
02037           //If its a branch, insert a nop
02038           if(mii->isBranch(instClone->getOpcode()))
02039             BuildMI(machineBB, V9::NOP, 0);
02040 
02041 
02042           DEBUG(std::cerr << "Cloning: " << *MI << "\n");
02043 
02044           //After cloning, we may need to save the value that this instruction defines
02045           for(unsigned opNum=0; opNum < MI->getNumOperands(); ++opNum) {
02046             Instruction *tmp;
02047 
02048             //get machine operand
02049             MachineOperand &mOp = instClone->getOperand(opNum);
02050             if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
02051 
02052               //Check if this is a value we should save
02053               if(valuesToSave.count(mOp.getVRegValue())) {
02054                 //Save copy in tmpInstruction
02055                 tmp = new TmpInstruction(mOp.getVRegValue());
02056 
02057                 //Add TmpInstruction to safe LLVM Instruction MCFI
02058                 MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02059                 tempMvec.addTemp((Value*) tmp);
02060 
02061                 DEBUG(std::cerr << "Value: " << *(mOp.getVRegValue()) << " New Value: " << *tmp << " Stage: " << i << "\n");
02062 
02063                 newValues[mOp.getVRegValue()][i]= tmp;
02064                 newValLocation[tmp] = machineBB;
02065 
02066                 DEBUG(std::cerr << "Machine Instr Operands: " << *(mOp.getVRegValue()) << ", 0, " << *tmp << "\n");
02067 
02068                 //Create machine instruction and put int machineBB
02069                 MachineInstr *saveValue;
02070                 if(mOp.getVRegValue()->getType() == Type::FloatTy)
02071                   saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02072                 else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
02073                   saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02074                 else
02075                   saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
02076 
02077 
02078                 DEBUG(std::cerr << "Created new machine instr: " << *saveValue << "\n");
02079               }
02080             }
02081 
02082             //We may also need to update the value that we use if its from an earlier prologue
02083             if(j != 0) {
02084               if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
02085                 if(newValues.count(mOp.getVRegValue())) {
02086                   if(newValues[mOp.getVRegValue()].count(i-1)) {
02087                     Value *oldV =  mOp.getVRegValue();
02088                     DEBUG(std::cerr << "Replaced this value: " << mOp.getVRegValue() << " With:" << (newValues[mOp.getVRegValue()][i-1]) << "\n");
02089                     //Update the operand with the right value
02090                     mOp.setValueReg(newValues[mOp.getVRegValue()][i-1]);
02091 
02092                     //Remove this value since we have consumed it
02093                     //NOTE: Should this only be done if j != maxStage?
02094                     consumedValues[oldV][i-1] = (newValues[oldV][i-1]);
02095                     DEBUG(std::cerr << "Deleted value: " << consumedValues[oldV][i-1] << "\n");
02096                     newValues[oldV].erase(i-1);
02097                   }
02098                 }
02099                 else
02100                   if(consumedValues.count(mOp.getVRegValue()))
02101                     assert(!consumedValues[mOp.getVRegValue()].count(i-1) && "Found a case where we need the value");
02102               }
02103             }
02104           }
02105         }
02106       }
02107     }
02108 
02109     MachineFunction *F = (((MachineBasicBlock*)origBB)->getParent());
02110     MachineFunction::BasicBlockListType &BL = F->getBasicBlockList();
02111     MachineFunction::BasicBlockListType::iterator BLI = origBB;
02112     assert(BLI != BL.end() && "Must find original BB in machine function\n");
02113     BL.insert(BLI,machineBB);
02114     prologues.push_back(machineBB);
02115     llvm_prologues.push_back(llvmBB);
02116   }
02117 }
02118 
02119 void ModuloSchedulingPass::writeEpilogues(std::vector<MachineBasicBlock *> &epilogues, const MachineBasicBlock *origBB, std::vector<BasicBlock*> &llvm_epilogues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues,std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs ) {
02120 
02121   std::map<int, std::set<const MachineInstr*> > inKernel;
02122 
02123   for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
02124 
02125     //Ignore the branch, we will handle this separately
02126     //if(I->first->isBranch())
02127     //continue;
02128 
02129     //Put int the map so we know what instructions in each stage are in the kernel
02130     inKernel[I->second].insert(I->first);
02131   }
02132 
02133   std::map<Value*, Value*> valPHIs;
02134 
02135   //some debug stuff, will remove later
02136   DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), E = newValues.end(); V !=E; ++V) {
02137     std::cerr << "Old Value: " << *(V->first) << "\n";
02138     for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I)
02139       std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n";
02140   });
02141 
02142   //some debug stuff, will remove later
02143   DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = kernelPHIs.begin(), E = kernelPHIs.end(); V !=E; ++V) {
02144     std::cerr << "Old Value: " << *(V->first) << "\n";
02145     for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I)
02146       std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n";
02147   });
02148 
02149   //Now write the epilogues
02150   for(int i = schedule.getMaxStage()-1; i >= 0; --i) {
02151     BasicBlock *llvmBB = new BasicBlock("EPILOGUE", (Function*) (origBB->getBasicBlock()->getParent()));
02152     MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB);
02153 
02154     DEBUG(std::cerr << " Epilogue #: " << i << "\n");
02155 
02156 
02157     std::map<Value*, int> inEpilogue;
02158 
02159      for(MachineBasicBlock::const_iterator MI = origBB->begin(), ME = origBB->end(); ME != MI; ++MI) {
02160       for(int j=schedule.getMaxStage(); j > i; --j) {
02161         if(inKernel[j].count(&*MI)) {
02162           DEBUG(std::cerr << "Cloning instruction " << *MI << "\n");
02163           MachineInstr *clone = MI->clone();
02164 
02165           //Update operands that need to use the result from the phi
02166           for(unsigned opNum=0; opNum < clone->getNumOperands(); ++opNum) {
02167             //get machine operand
02168             const MachineOperand &mOp = clone->getOperand(opNum);
02169 
02170             if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse())) {
02171 
02172               DEBUG(std::cerr << "Writing PHI for " << (mOp.getVRegValue()) << "\n");
02173 
02174               //If this is the last instructions for the max iterations ago, don't update operands
02175               if(inEpilogue.count(mOp.getVRegValue()))
02176                 if(inEpilogue[mOp.getVRegValue()] == i)
02177                   continue;
02178 
02179               //Quickly write appropriate phis for this operand
02180               if(newValues.count(mOp.getVRegValue())) {
02181                 if(newValues[mOp.getVRegValue()].count(i)) {
02182                   Instruction *tmp = new TmpInstruction(newValues[mOp.getVRegValue()][i]);
02183 
02184                   //Get machine code for this instruction
02185                   MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02186                   tempMvec.addTemp((Value*) tmp);
02187 
02188                   //assert of no kernelPHI for this value
02189                   assert(kernelPHIs[mOp.getVRegValue()][i] !=0 && "Must have final kernel phi to construct epilogue phi");
02190 
02191                   MachineInstr *saveValue = BuildMI(machineBB, V9::PHI, 3).addReg(newValues[mOp.getVRegValue()][i]).addReg(kernelPHIs[mOp.getVRegValue()][i]).addRegDef(tmp);
02192                   DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
02193                   valPHIs[mOp.getVRegValue()] = tmp;
02194                 }
02195               }
02196 
02197               if(valPHIs.count(mOp.getVRegValue())) {
02198                 //Update the operand in the cloned instruction
02199                 clone->getOperand(opNum).setValueReg(valPHIs[mOp.getVRegValue()]);
02200               }
02201             }
02202             else if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef())) {
02203               inEpilogue[mOp.getVRegValue()] = i;
02204             }
02205           }
02206           machineBB->push_back(clone);
02207         }
02208       }
02209      }
02210 
02211      MachineFunction *F = (((MachineBasicBlock*)origBB)->getParent());
02212      MachineFunction::BasicBlockListType &BL = F->getBasicBlockList();
02213      MachineFunction::BasicBlockListType::iterator BLI = (MachineBasicBlock*) origBB;
02214      assert(BLI != BL.end() && "Must find original BB in machine function\n");
02215      BL.insert(BLI,machineBB);
02216      epilogues.push_back(machineBB);
02217      llvm_epilogues.push_back(llvmBB);
02218 
02219      DEBUG(std::cerr << "EPILOGUE #" << i << "\n");
02220      DEBUG(machineBB->print(std::cerr));
02221   }
02222 }
02223 
02224 void ModuloSchedulingPass::writeKernel(BasicBlock *llvmBB, MachineBasicBlock *machineBB, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs) {
02225 
02226   //Keep track of operands that are read and saved from a previous iteration. The new clone
02227   //instruction will use the result of the phi instead.
02228   std::map<Value*, Value*> finalPHIValue;
02229   std::map<Value*, Value*> kernelValue;
02230 
02231   //Branches are a special case
02232   std::vector<MachineInstr*> branches;
02233 
02234   //Get target information to look at machine operands
02235   const TargetInstrInfo *mii = target.getInstrInfo();
02236 
02237   //Create TmpInstructions for the final phis
02238   for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
02239 
02240    DEBUG(std::cerr << "Stage: " << I->second << " Inst: " << *(I->first) << "\n";);
02241 
02242    //Clone instruction
02243    const MachineInstr *inst = I->first;
02244    MachineInstr *instClone = inst->clone();
02245 
02246    //Insert into machine basic block
02247    machineBB->push_back(instClone);
02248 
02249    if(mii->isBranch(instClone->getOpcode()))
02250      BuildMI(machineBB, V9::NOP, 0);
02251 
02252    DEBUG(std::cerr <<  "Cloned Inst: " << *instClone << "\n");
02253 
02254    //Loop over Machine Operands
02255    for(unsigned i=0; i < inst->getNumOperands(); ++i) {
02256      //get machine operand
02257      const MachineOperand &mOp = inst->getOperand(i);
02258 
02259      if(I->second != 0) {
02260        if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
02261 
02262          //Check to see where this operand is defined if this instruction is from max stage
02263          if(I->second == schedule.getMaxStage()) {
02264            DEBUG(std::cerr << "VREG: " << *(mOp.getVRegValue()) << "\n");
02265          }
02266 
02267          //If its in the value saved, we need to create a temp instruction and use that instead
02268          if(valuesToSave.count(mOp.getVRegValue())) {
02269 
02270            //Check if we already have a final PHI value for this
02271            if(!finalPHIValue.count(mOp.getVRegValue())) {
02272              //Only create phi if the operand def is from a stage before this one
02273              if(schedule.defPreviousStage(mOp.getVRegValue(), I->second)) {
02274              TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue());
02275 
02276              //Get machine code for this instruction
02277              MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02278              tempMvec.addTemp((Value*) tmp);
02279 
02280              //Update the operand in the cloned instruction
02281              instClone->getOperand(i).setValueReg(tmp);
02282 
02283              //save this as our final phi
02284              finalPHIValue[mOp.getVRegValue()] = tmp;
02285              newValLocation[tmp] = machineBB;
02286              }
02287            }
02288            else {
02289              //Use the previous final phi value
02290              instClone->getOperand(i).setValueReg(finalPHIValue[mOp.getVRegValue()]);
02291            }
02292          }
02293        }
02294      }
02295      if(I->second != schedule.getMaxStage()) {
02296        if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
02297          if(valuesToSave.count(mOp.getVRegValue())) {
02298 
02299            TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue());
02300 
02301            //Get machine code for this instruction
02302            MachineCodeForInstruction & tempVec = MachineCodeForInstruction::get(defaultInst);
02303            tempVec.addTemp((Value*) tmp);
02304 
02305            //Create new machine instr and put in MBB
02306            MachineInstr *saveValue;
02307            if(mOp.getVRegValue()->getType() == Type::FloatTy)
02308              saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02309            else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
02310              saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02311            else
02312              saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
02313 
02314 
02315            //Save for future cleanup
02316            kernelValue[mOp.getVRegValue()] = tmp;
02317            newValLocation[tmp] = machineBB;
02318            kernelPHIs[mOp.getVRegValue()][schedule.getMaxStage()-1] = tmp;
02319          }
02320        }
02321      }
02322    }
02323 
02324  }
02325 
02326  //Add branches
02327  for(std::vector<MachineInstr*>::iterator I = branches.begin(), E = branches.end(); I != E; ++I) {
02328    machineBB->push_back(*I);
02329    BuildMI(machineBB, V9::NOP, 0);
02330  }
02331 
02332 
02333   DEBUG(std::cerr << "KERNEL before PHIs\n");
02334   DEBUG(machineBB->print(std::cerr));
02335 
02336 
02337  //Loop over each value we need to generate phis for
02338  for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(),
02339        E = newValues.end(); V != E; ++V) {
02340 
02341 
02342    DEBUG(std::cerr << "Writing phi for" << *(V->first));
02343    DEBUG(std::cerr << "\nMap of Value* for this phi\n");
02344    DEBUG(for(std::map<int, Value*>::iterator I = V->second.begin(),
02345                IE = V->second.end(); I != IE; ++I) {
02346      std::cerr << "Stage: " << I->first;
02347      std::cerr << " Value: " << *(I->second) << "\n";
02348    });
02349 
02350    //If we only have one current iteration live, its safe to set lastPhi = to kernel value
02351    if(V->second.size() == 1) {
02352      assert(kernelValue[V->first] != 0 && "Kernel value* must exist to create phi");
02353      MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(),V9::PHI, 3).addReg(V->second.begin()->second).addReg(kernelValue[V->first]).addRegDef(finalPHIValue[V->first]);
02354      DEBUG(std::cerr << "Resulting PHI (one live): " << *saveValue << "\n");
02355      kernelPHIs[V->first][V->second.begin()->first] = kernelValue[V->first];
02356      DEBUG(std::cerr << "Put kernel phi in at stage: " << schedule.getMaxStage()-1 << " (map stage = " << V->second.begin()->first << ")\n");
02357     }
02358    else {
02359 
02360      //Keep track of last phi created.
02361      Instruction *lastPhi = 0;
02362 
02363      unsigned count = 1;
02364      //Loop over the the map backwards to generate phis
02365      for(std::map<int, Value*>::reverse_iterator I = V->second.rbegin(), IE = V->second.rend();
02366          I != IE; ++I) {
02367 
02368        if(count < (V->second).size()) {
02369          if(lastPhi == 0) {
02370            lastPhi = new TmpInstruction(I->second);
02371 
02372            //Get machine code for this instruction
02373            MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02374            tempMvec.addTemp((Value*) lastPhi);
02375 
02376            MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(kernelValue[V->first]).addReg(I->second).addRegDef(lastPhi);
02377            DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
02378            newValLocation[lastPhi] = machineBB;
02379          }
02380          else {
02381            Instruction *tmp = new TmpInstruction(I->second);
02382 
02383            //Get machine code for this instruction
02384            MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02385            tempMvec.addTemp((Value*) tmp);
02386 
02387 
02388            MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(tmp);
02389            DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
02390            lastPhi = tmp;
02391            kernelPHIs[V->first][I->first] = lastPhi;
02392            newValLocation[lastPhi] = machineBB;
02393          }
02394        }
02395        //Final phi value
02396        else {
02397          //The resulting value must be the Value* we created earlier
02398          assert(lastPhi != 0 && "Last phi is NULL!\n");
02399          MachineInstr *saveValue = BuildMI(*machineBB, machineBB->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(finalPHIValue[V->first]);
02400          DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
02401          kernelPHIs[V->first][I->first] = finalPHIValue[V->first];
02402        }
02403 
02404        ++count;
02405      }
02406 
02407    }
02408  }
02409 
02410   DEBUG(std::cerr << "KERNEL after PHIs\n");
02411   DEBUG(machineBB->print(std::cerr));
02412 }
02413 
02414 
02415 void ModuloSchedulingPass::removePHIs(const MachineBasicBlock *origBB, std::vector<MachineBasicBlock *> &prologues, std::vector<MachineBasicBlock *> &epilogues, MachineBasicBlock *kernelBB, std::map<Value*, MachineBasicBlock*> &newValLocation) {
02416 
02417   //Worklist to delete things
02418   std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> > worklist;
02419 
02420   //Worklist of TmpInstructions that need to be added to a MCFI
02421   std::vector<Instruction*> addToMCFI;
02422 
02423   //Worklist to add OR instructions to end of kernel so not to invalidate the iterator
02424   //std::vector<std::pair<Instruction*, Value*> > newORs;
02425 
02426   const TargetInstrInfo *TMI = target.getInstrInfo();
02427 
02428   //Start with the kernel and for each phi insert a copy for the phi def and for each arg
02429   for(MachineBasicBlock::iterator I = kernelBB->begin(), E = kernelBB->end(); I != E; ++I) {
02430 
02431     DEBUG(std::cerr << "Looking at Instr: " << *I << "\n");
02432     //Get op code and check if its a phi
02433     if(I->getOpcode() == V9::PHI) {
02434 
02435       DEBUG(std::cerr << "Replacing PHI: " << *I << "\n");
02436       Instruction *tmp = 0;
02437 
02438       for(unsigned i = 0; i < I->getNumOperands(); ++i) {
02439         //Get Operand
02440         const MachineOperand &mOp = I->getOperand(i);
02441         assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n");
02442 
02443         if(!tmp) {
02444           tmp = new TmpInstruction(mOp.getVRegValue());
02445           addToMCFI.push_back(tmp);
02446         }
02447 
02448         //Now for all our arguments we read, OR to the new TmpInstruction that we created
02449         if(mOp.isUse()) {
02450           DEBUG(std::cerr << "Use: " << mOp << "\n");
02451           //Place a copy at the end of its BB but before the branches
02452           assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n");
02453           //Reverse iterate to find the branches, we can safely assume no instructions have been
02454           //put in the nop positions
02455           for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) {
02456             MachineOpCode opc = inst->getOpcode();
02457             if(TMI->isBranch(opc) || TMI->isNop(opc))
02458               continue;
02459             else {
02460               if(mOp.getVRegValue()->getType() == Type::FloatTy)
02461                 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02462               else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
02463                 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02464               else
02465                 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
02466 
02467               break;
02468             }
02469 
02470           }
02471 
02472         }
02473         else {
02474           //Remove the phi and replace it with an OR
02475           DEBUG(std::cerr << "Def: " << mOp << "\n");
02476           //newORs.push_back(std::make_pair(tmp, mOp.getVRegValue()));
02477           if(tmp->getType() == Type::FloatTy)
02478             BuildMI(*kernelBB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
02479           else if(tmp->getType() == Type::DoubleTy)
02480             BuildMI(*kernelBB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
02481           else
02482             BuildMI(*kernelBB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue());
02483 
02484 
02485           worklist.push_back(std::make_pair(kernelBB, I));
02486         }
02487 
02488       }
02489 
02490     }
02491 
02492 
02493   }
02494 
02495   //Add TmpInstructions to some MCFI
02496   if(addToMCFI.size() > 0) {
02497     MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02498     for(unsigned x = 0; x < addToMCFI.size(); ++x) {
02499       tempMvec.addTemp(addToMCFI[x]);
02500     }
02501     addToMCFI.clear();
02502   }
02503 
02504 
02505   //Remove phis from epilogue
02506   for(std::vector<MachineBasicBlock*>::iterator MB = epilogues.begin(), ME = epilogues.end(); MB != ME; ++MB) {
02507     for(MachineBasicBlock::iterator I = (*MB)->begin(), E = (*MB)->end(); I != E; ++I) {
02508 
02509       DEBUG(std::cerr << "Looking at Instr: " << *I << "\n");
02510       //Get op code and check if its a phi
02511       if(I->getOpcode() == V9::PHI) {
02512         Instruction *tmp = 0;
02513 
02514         for(unsigned i = 0; i < I->getNumOperands(); ++i) {
02515           //Get Operand
02516           const MachineOperand &mOp = I->getOperand(i);
02517           assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n");
02518 
02519           if(!tmp) {
02520             tmp = new TmpInstruction(mOp.getVRegValue());
02521             addToMCFI.push_back(tmp);
02522           }
02523 
02524           //Now for all our arguments we read, OR to the new TmpInstruction that we created
02525           if(mOp.isUse()) {
02526             DEBUG(std::cerr << "Use: " << mOp << "\n");
02527             //Place a copy at the end of its BB but before the branches
02528             assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n");
02529             //Reverse iterate to find the branches, we can safely assume no instructions have been
02530             //put in the nop positions
02531             for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) {
02532               MachineOpCode opc = inst->getOpcode();
02533               if(TMI->isBranch(opc) || TMI->isNop(opc))
02534                 continue;
02535               else {
02536                 if(mOp.getVRegValue()->getType() == Type::FloatTy)
02537                   BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02538                 else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
02539                   BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02540                 else
02541                   BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
02542 
02543 
02544                 break;
02545               }
02546 
02547             }
02548 
02549           }
02550           else {
02551             //Remove the phi and replace it with an OR
02552             DEBUG(std::cerr << "Def: " << mOp << "\n");
02553              if(tmp->getType() == Type::FloatTy)
02554                BuildMI(**MB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
02555              else if(tmp->getType() == Type::DoubleTy)
02556                BuildMI(**MB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
02557              else
02558                BuildMI(**MB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue());
02559 
02560             worklist.push_back(std::make_pair(*MB,I));
02561           }
02562 
02563         }
02564       }
02565 
02566 
02567     }
02568   }
02569 
02570 
02571   if(addToMCFI.size() > 0) {
02572     MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02573     for(unsigned x = 0; x < addToMCFI.size(); ++x) {
02574       tempMvec.addTemp(addToMCFI[x]);
02575     }
02576     addToMCFI.clear();
02577   }
02578 
02579     //Delete the phis
02580   for(std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> >::iterator I =  worklist.begin(), E = worklist.end(); I != E; ++I) {
02581 
02582     DEBUG(std::cerr << "Deleting PHI " << *I->second << "\n");
02583     I->first->erase(I->second);
02584 
02585   }
02586 
02587 
02588   assert((addToMCFI.size() == 0) && "We should have added all TmpInstructions to some MachineCodeForInstruction");
02589 }
02590 
02591 
02592 void ModuloSchedulingPass::reconstructLoop(MachineBasicBlock *BB) {
02593 
02594   TIME_REGION(X, "reconstructLoop");
02595 
02596 
02597   DEBUG(std::cerr << "Reconstructing Loop\n");
02598 
02599   //First find the value *'s that we need to "save"
02600   std::map<const Value*, std::pair<const MachineInstr*, int> > valuesToSave;
02601 
02602   //Keep track of instructions we have already seen and their stage because
02603   //we don't want to "save" values if they are used in the kernel immediately
02604   std::map<const MachineInstr*, int> lastInstrs;
02605   std::map<const Value*, int> phiUses;
02606 
02607   //Loop over kernel and only look at instructions from a stage > 0
02608   //Look at its operands and save values *'s that are read
02609   for(MSSchedule::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
02610 
02611     if(I->second !=0) {
02612       //For this instruction, get the Value*'s that it reads and put them into the set.
02613       //Assert if there is an operand of another type that we need to save
02614       const MachineInstr *inst = I->first;
02615       lastInstrs[inst] = I->second;
02616 
02617       for(unsigned i=0; i < inst->getNumOperands(); ++i) {
02618         //get machine operand
02619         const MachineOperand &mOp = inst->getOperand(i);
02620 
02621         if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
02622           //find the value in the map
02623           if (const Value* srcI = mOp.getVRegValue()) {
02624 
02625             if(isa<Constant>(srcI) || isa<Argument>(srcI))
02626               continue;
02627 
02628             //Before we declare this Value* one that we should save
02629             //make sure its def is not of the same stage as this instruction
02630             //because it will be consumed before its used
02631             Instruction *defInst = (Instruction*) srcI;
02632 
02633             //Should we save this value?
02634             bool save = true;
02635 
02636             //Continue if not in the def map, loop invariant code does not need to be saved
02637             if(!defMap.count(srcI))
02638               continue;
02639 
02640             MachineInstr *defInstr = defMap[srcI];
02641 
02642 
02643             if(lastInstrs.count(defInstr)) {
02644               if(lastInstrs[defInstr] == I->second) {
02645                 save = false;
02646 
02647               }
02648             }
02649 
02650             if(save) {
02651               assert(!phiUses.count(srcI) && "Did not expect to see phi use twice");
02652               if(isa<PHINode>(srcI))
02653                 phiUses[srcI] = I->second;
02654 
02655               valuesToSave[srcI] = std::make_pair(I->first, i);
02656 
02657             }
02658           }
02659         }
02660         else if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
02661           if (const Value* destI = mOp.getVRegValue()) {
02662             if(!isa<PHINode>(destI))
02663               continue;
02664             if(phiUses.count(destI)) {
02665               if(phiUses[destI] == I->second) {
02666                 //remove from save list
02667                 valuesToSave.erase(destI);
02668               }
02669             }
02670           }
02671         }
02672 
02673         if(mOp.getType() != MachineOperand::MO_VirtualRegister && mOp.isUse()) {
02674           assert("Our assumption is wrong. We have another type of register that needs to be saved\n");
02675         }
02676       }
02677     }
02678   }
02679 
02680   //The new loop will consist of one or more prologues, the kernel, and one or more epilogues.
02681 
02682   //Map to keep track of old to new values
02683   std::map<Value*, std::map<int, Value*> > newValues;
02684 
02685   //Map to keep track of old to new values in kernel
02686   std::map<Value*, std::map<int, Value*> > kernelPHIs;
02687 
02688   //Another map to keep track of what machine basic blocks these new value*s are in since
02689   //they have no llvm instruction equivalent
02690   std::map<Value*, MachineBasicBlock*> newValLocation;
02691 
02692   std::vector<MachineBasicBlock*> prologues;
02693   std::vector<BasicBlock*> llvm_prologues;
02694 
02695 
02696   //Write prologue
02697   if(schedule.getMaxStage() != 0)
02698     writePrologues(prologues, BB, llvm_prologues, valuesToSave, newValues, newValLocation);
02699 
02700   //Print out epilogues and prologue
02701   DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end();
02702       I != E; ++I) {
02703     std::cerr << "PROLOGUE\n";
02704     (*I)->print(std::cerr);
02705   });
02706 
02707   BasicBlock *llvmKernelBB = new BasicBlock("Kernel", (Function*) (BB->getBasicBlock()->getParent()));
02708   MachineBasicBlock *machineKernelBB = new MachineBasicBlock(llvmKernelBB);
02709 
02710   MachineFunction *F = (((MachineBasicBlock*)BB)->getParent());
02711   MachineFunction::BasicBlockListType &BL = F->getBasicBlockList();
02712   MachineFunction::BasicBlockListType::iterator BLI = BB;
02713   assert(BLI != BL.end() && "Must find original BB in machine function\n");
02714   BL.insert(BLI,machineKernelBB);
02715 
02716   //(((MachineBasicBlock*)BB)->getParent())->getBasicBlockList().push_back(machineKernelBB);
02717   writeKernel(llvmKernelBB, machineKernelBB, valuesToSave, newValues, newValLocation, kernelPHIs);
02718 
02719 
02720   std::vector<MachineBasicBlock*> epilogues;
02721   std::vector<BasicBlock*> llvm_epilogues;
02722 
02723   //Write epilogues
02724   if(schedule.getMaxStage() != 0)
02725     writeEpilogues(epilogues, BB, llvm_epilogues, valuesToSave, newValues, newValLocation, kernelPHIs);
02726 
02727 
02728   //Fix our branches
02729   fixBranches(prologues, llvm_prologues, machineKernelBB, llvmKernelBB, epilogues, llvm_epilogues, BB);
02730 
02731   //Remove phis
02732   removePHIs(BB, prologues, epilogues, machineKernelBB, newValLocation);
02733 
02734   //Print out epilogues and prologue
02735   DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = prologues.begin(), E = prologues.end();
02736       I != E; ++I) {
02737     std::cerr << "PROLOGUE\n";
02738     (*I)->print(std::cerr);
02739   });
02740 
02741   DEBUG(std::cerr << "KERNEL\n");
02742   DEBUG(machineKernelBB->print(std::cerr));
02743 
02744   DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = epilogues.begin(), E = epilogues.end();
02745       I != E; ++I) {
02746     std::cerr << "EPILOGUE\n";
02747     (*I)->print(std::cerr);
02748   });
02749 
02750 
02751   DEBUG(std::cerr << "New Machine Function" << "\n");
02752   DEBUG(std::cerr << BB->getParent() << "\n");
02753 
02754 
02755 }
02756 
02757 void ModuloSchedulingPass::fixBranches(std::vector<MachineBasicBlock *> &prologues, std::vector<BasicBlock*> &llvm_prologues, MachineBasicBlock *machineKernelBB, BasicBlock *llvmKernelBB, std::vector<MachineBasicBlock *> &epilogues, std::vector<BasicBlock*> &llvm_epilogues, MachineBasicBlock *BB) {
02758 
02759   const TargetInstrInfo *TMI = target.getInstrInfo();
02760 
02761   if(schedule.getMaxStage() != 0) {
02762     //Fix prologue branches
02763     for(unsigned I = 0; I <  prologues.size(); ++I) {
02764 
02765       //Find terminator since getFirstTerminator does not work!
02766       for(MachineBasicBlock::reverse_iterator mInst = prologues[I]->rbegin(), mInstEnd = prologues[I]->rend(); mInst != mInstEnd; ++mInst) {
02767         MachineOpCode OC = mInst->getOpcode();
02768         //If its a branch update its branchto
02769         if(TMI->isBranch(OC)) {
02770           for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
02771             MachineOperand &mOp = mInst->getOperand(opNum);
02772             if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02773               //Check if we are branching to the kernel, if not branch to epilogue
02774               if(mOp.getVRegValue() == BB->getBasicBlock()) {
02775                 if(I == prologues.size()-1)
02776                   mOp.setValueReg(llvmKernelBB);
02777                 else
02778                   mOp.setValueReg(llvm_prologues[I+1]);
02779               }
02780               else {
02781                 mOp.setValueReg(llvm_epilogues[(llvm_epilogues.size()-1-I)]);
02782               }
02783             }
02784           }
02785 
02786           DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n");
02787         }
02788       }
02789 
02790 
02791       //Update llvm basic block with our new branch instr
02792       DEBUG(std::cerr << BB->getBasicBlock()->getTerminator() << "\n");
02793       const BranchInst *branchVal = dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator());
02794 
02795       if(I == prologues.size()-1) {
02796         TerminatorInst *newBranch = new BranchInst(llvmKernelBB,
02797                                                    llvm_epilogues[(llvm_epilogues.size()-1-I)],
02798                                                    branchVal->getCondition(),
02799                                                    llvm_prologues[I]);
02800       }
02801       else
02802         TerminatorInst *newBranch = new BranchInst(llvm_prologues[I+1],
02803                                                    llvm_epilogues[(llvm_epilogues.size()-1-I)],
02804                                                    branchVal->getCondition(),
02805                                                    llvm_prologues[I]);
02806 
02807     }
02808   }
02809 
02810   Value *origBranchExit = 0;
02811 
02812   //Fix up kernel machine branches
02813   for(MachineBasicBlock::reverse_iterator mInst = machineKernelBB->rbegin(), mInstEnd = machineKernelBB->rend(); mInst != mInstEnd; ++mInst) {
02814     MachineOpCode OC = mInst->getOpcode();
02815     if(TMI->isBranch(OC)) {
02816       for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
02817         MachineOperand &mOp = mInst->getOperand(opNum);
02818 
02819         if(mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02820           if(mOp.getVRegValue() == BB->getBasicBlock())
02821             mOp.setValueReg(llvmKernelBB);
02822           else
02823             if(llvm_epilogues.size() > 0) {
02824               assert(origBranchExit == 0 && "There should only be one branch out of the loop");
02825 
02826               origBranchExit = mOp.getVRegValue();
02827               mOp.setValueReg(llvm_epilogues[0]);
02828             }
02829             else
02830               origBranchExit = mOp.getVRegValue();
02831         }
02832       }
02833     }
02834   }
02835 
02836   //Update kernelLLVM branches
02837   const BranchInst *branchVal = dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator());
02838 
02839   assert(origBranchExit != 0 && "We must have the original bb the kernel exits to!");
02840 
02841   if(epilogues.size() > 0) {
02842     TerminatorInst *newBranch = new BranchInst(llvmKernelBB,
02843                                                llvm_epilogues[0],
02844                                                branchVal->getCondition(),
02845                                                llvmKernelBB);
02846   }
02847   else {
02848     BasicBlock *origBBExit = dyn_cast<BasicBlock>(origBranchExit);
02849     assert(origBBExit !=0 && "Original exit basic block must be set");
02850     TerminatorInst *newBranch = new BranchInst(llvmKernelBB,
02851                                                origBBExit,
02852                                                branchVal->getCondition(),
02853                                                llvmKernelBB);
02854   }
02855 
02856   if(schedule.getMaxStage() != 0) {
02857    //Lastly add unconditional branches for the epilogues
02858    for(unsigned I = 0; I <  epilogues.size(); ++I) {
02859 
02860     //Now since we don't have fall throughs, add a unconditional branch to the next prologue
02861      if(I != epilogues.size()-1) {
02862        BuildMI(epilogues[I], V9::BA, 1).addPCDisp(llvm_epilogues[I+1]);
02863        //Add unconditional branch to end of epilogue
02864        TerminatorInst *newBranch = new BranchInst(llvm_epilogues[I+1],
02865                                                   llvm_epilogues[I]);
02866 
02867      }
02868      else {
02869        BuildMI(epilogues[I], V9::BA, 1).addPCDisp(origBranchExit);
02870 
02871 
02872        //Update last epilogue exit branch
02873        BranchInst *branchVal = (BranchInst*) dyn_cast<BranchInst>(BB->getBasicBlock()->getTerminator());
02874        //Find where we are supposed to branch to
02875        BasicBlock *nextBlock = 0;
02876        for(unsigned j=0; j <branchVal->getNumSuccessors(); ++j) {
02877          if(branchVal->getSuccessor(j) != BB->getBasicBlock())
02878            nextBlock = branchVal->getSuccessor(j);
02879        }
02880 
02881        assert((nextBlock != 0) && "Next block should not be null!");
02882        TerminatorInst *newBranch = new BranchInst(nextBlock, llvm_epilogues[I]);
02883      }
02884      //Add one more nop!
02885      BuildMI(epilogues[I], V9::NOP, 0);
02886 
02887    }
02888   }
02889 
02890    //FIX UP Machine BB entry!!
02891    //We are looking at the predecesor of our loop basic block and we want to change its ba instruction
02892 
02893 
02894    //Find all llvm basic blocks that branch to the loop entry and change to our first prologue.
02895    const BasicBlock *llvmBB = BB->getBasicBlock();
02896 
02897    std::vector<const BasicBlock*>Preds (pred_begin(llvmBB), pred_end(llvmBB));
02898 
02899    //for(pred_const_iterator P = pred_begin(llvmBB), PE = pred_end(llvmBB); P != PE; ++PE) {
02900    for(std::vector<const BasicBlock*>::iterator P = Preds.begin(), PE = Preds.end(); P != PE; ++P) {
02901      if(*P == llvmBB)
02902        continue;
02903      else {
02904        DEBUG(std::cerr << "Found our entry BB\n");
02905        //Get the Terminator instruction for this basic block and print it out
02906        DEBUG(std::cerr << *((*P)->getTerminator()) << "\n");
02907        //Update the terminator
02908        TerminatorInst *term = ((BasicBlock*)*P)->getTerminator();
02909        for(unsigned i=0; i < term->getNumSuccessors(); ++i) {
02910          if(term->getSuccessor(i) == llvmBB) {
02911            DEBUG(std::cerr << "Replacing successor bb\n");
02912            if(llvm_prologues.size() > 0) {
02913              term->setSuccessor(i, llvm_prologues[0]);
02914              //Also update its corresponding machine instruction
02915              MachineCodeForInstruction & tempMvec =
02916                MachineCodeForInstruction::get(term);
02917              for (unsigned j = 0; j < tempMvec.size(); j++) {
02918                MachineInstr *temp = tempMvec[j];
02919                MachineOpCode opc = temp->getOpcode();
02920                if(TMI->isBranch(opc)) {
02921                  DEBUG(std::cerr << *temp << "\n");
02922                  //Update branch
02923                  for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) {
02924                    MachineOperand &mOp = temp->getOperand(opNum);
02925                    if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02926                      if(mOp.getVRegValue() == llvmBB)
02927                        mOp.setValueReg(llvm_prologues[0]);
02928                    }
02929                  }
02930                }
02931              }
02932            }
02933            else {
02934              term->setSuccessor(i, llvmKernelBB);
02935            //Also update its corresponding machine instruction
02936              MachineCodeForInstruction & tempMvec =
02937                MachineCodeForInstruction::get(term);
02938              for (unsigned j = 0; j < tempMvec.size(); j++) {
02939                MachineInstr *temp = tempMvec[j];
02940                MachineOpCode opc = temp->getOpcode();
02941                if(TMI->isBranch(opc)) {
02942                  DEBUG(std::cerr << *temp << "\n");
02943                  //Update branch
02944                  for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) {
02945                    MachineOperand &mOp = temp->getOperand(opNum);
02946                    if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02947                      if(mOp.getVRegValue() == llvmBB)
02948                        mOp.setValueReg(llvmKernelBB);
02949                    }
02950                  }
02951                }
02952              }
02953            }
02954          }
02955        }
02956        break;
02957      }
02958    }
02959 
02960 
02961   //BB->getParent()->getBasicBlockList().erase(BB);
02962 
02963 }
02964