LLVM API Documentation

ModuloSchedulingSuperBlock.cpp

Go to the documentation of this file.
00001 //===-- ModuloSchedulingSuperBlock.cpp - ModuloScheduling--------*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This ModuloScheduling pass is based on the Swing Modulo Scheduling
00011 //  algorithm, but has been extended to support SuperBlocks (multiple
00012 //  basic block, single entry, multipl exit loops).
00013 //
00014 //===----------------------------------------------------------------------===//
00015 
00016 #define DEBUG_TYPE "ModuloSchedSB"
00017 
00018 #include "DependenceAnalyzer.h"
00019 #include "ModuloSchedulingSuperBlock.h"
00020 #include "llvm/Constants.h"
00021 #include "llvm/ADT/Statistic.h"
00022 #include "llvm/CodeGen/MachineFunction.h"
00023 #include "llvm/CodeGen/Passes.h"
00024 #include "llvm/Support/CFG.h"
00025 #include "llvm/Support/Debug.h"
00026 #include "llvm/Support/GraphWriter.h"
00027 #include "llvm/Support/Timer.h"
00028 #include "llvm/ADT/StringExtras.h"
00029 #include "llvm/ADT/SCCIterator.h"
00030 #include "llvm/Instructions.h"
00031 #include "../MachineCodeForInstruction.h"
00032 #include "../SparcV9RegisterInfo.h"
00033 #include "../SparcV9Internals.h"
00034 #include "../SparcV9TmpInstr.h"
00035 #include <fstream>
00036 #include <sstream>
00037 #include <cmath>
00038 #include <utility>
00039 
00040 using namespace llvm;
00041 /// Create ModuloSchedulingSBPass
00042 ///
00043 FunctionPass *llvm::createModuloSchedulingSBPass(TargetMachine & targ) {
00044   DEBUG(std::cerr << "Created ModuloSchedulingSBPass\n");
00045   return new ModuloSchedulingSBPass(targ);
00046 }
00047 
00048 
00049 #if 1
00050 #define TIME_REGION(VARNAME, DESC) \
00051    NamedRegionTimer VARNAME(DESC)
00052 #else
00053 #define TIME_REGION(VARNAME, DESC)
00054 #endif
00055 
00056 
00057 //Graph Traits for printing out the dependence graph
00058 template<typename GraphType>
00059 static void WriteGraphToFileSB(std::ostream &O, const std::string &GraphName,
00060                              const GraphType &GT) {
00061   std::string Filename = GraphName + ".dot";
00062   O << "Writing '" << Filename << "'...";
00063   std::ofstream F(Filename.c_str());
00064 
00065   if (F.good())
00066     WriteGraph(F, GT);
00067   else
00068     O << "  error opening file for writing!";
00069   O << "\n";
00070 };
00071 
00072 namespace llvm {
00073   Statistic<> NumLoops("moduloschedSB-numLoops", "Total Number of Loops");
00074   Statistic<> NumSB("moduloschedSB-numSuperBlocks", "Total Number of SuperBlocks");
00075   Statistic<> BBWithCalls("modulosched-BBCalls", "Basic Blocks rejected due to calls");
00076   Statistic<> BBWithCondMov("modulosched-loopCondMov",
00077                             "Basic Blocks rejected due to conditional moves");
00078   Statistic<> SBResourceConstraint("modulosched-resourceConstraint",
00079                                  "Loops constrained by resources");
00080   Statistic<> SBRecurrenceConstraint("modulosched-recurrenceConstraint",
00081                                    "Loops constrained by recurrences");
00082   Statistic<> SBFinalIISum("modulosched-finalIISum", "Sum of all final II");
00083   Statistic<> SBIISum("modulosched-IISum", "Sum of all theoretical II");
00084   Statistic<> SBMSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled");
00085   Statistic<> SBNoSched("modulosched-noSched", "No schedule");
00086   Statistic<> SBSameStage("modulosched-sameStage", "Max stage is 0");
00087   Statistic<> SBBLoops("modulosched-SBBLoops", "Number single basic block loops");
00088   Statistic<> SBInvalid("modulosched-SBInvalid", "Number invalid superblock loops");
00089   Statistic<> SBValid("modulosched-SBValid", "Number valid superblock loops");
00090  Statistic<> SBSize("modulosched-SBSize", "Total size of all valid superblocks");
00091 
00092   template<>
00093   struct DOTGraphTraits<MSchedGraphSB*> : public DefaultDOTGraphTraits {
00094     static std::string getGraphName(MSchedGraphSB *F) {
00095       return "Dependence Graph";
00096     }
00097 
00098     static std::string getNodeLabel(MSchedGraphSBNode *Node, MSchedGraphSB *Graph) {
00099       if(!Node->isPredicate()) {
00100         if (Node->getInst()) {
00101           std::stringstream ss;
00102           ss << *(Node->getInst());
00103           return ss.str(); //((MachineInstr*)Node->getInst());
00104         }
00105         else
00106           return "No Inst";
00107       }
00108       else
00109         return "Pred Node";
00110     }
00111     static std::string getEdgeSourceLabel(MSchedGraphSBNode *Node,
00112                                           MSchedGraphSBNode::succ_iterator I) {
00113       //Label each edge with the type of dependence
00114       std::string edgelabel = "";
00115       switch (I.getEdge().getDepOrderType()) {
00116 
00117       case MSchedGraphSBEdge::TrueDep:
00118         edgelabel = "True";
00119         break;
00120 
00121       case MSchedGraphSBEdge::AntiDep:
00122         edgelabel =  "Anti";
00123         break;
00124 
00125       case MSchedGraphSBEdge::OutputDep:
00126         edgelabel = "Output";
00127         break;
00128 
00129       case MSchedGraphSBEdge::NonDataDep:
00130         edgelabel = "Pred";
00131         break;
00132 
00133       default:
00134         edgelabel = "Unknown";
00135         break;
00136       }
00137 
00138       //FIXME
00139       int iteDiff = I.getEdge().getIteDiff();
00140       std::string intStr = "(IteDiff: ";
00141       intStr += itostr(iteDiff);
00142 
00143       intStr += ")";
00144       edgelabel += intStr;
00145 
00146       return edgelabel;
00147     }
00148   };
00149 
00150   bool ModuloSchedulingSBPass::runOnFunction(Function &F) {
00151     bool Changed = false;
00152 
00153     //Get MachineFunction
00154     MachineFunction &MF = MachineFunction::get(&F);
00155 
00156     //Get Loop Info & Dependence Anaysis info
00157     LoopInfo &LI = getAnalysis<LoopInfo>();
00158     DependenceAnalyzer &DA = getAnalysis<DependenceAnalyzer>();
00159 
00160     //Worklist of superblocks
00161     std::vector<std::vector<const MachineBasicBlock*> > Worklist;
00162     FindSuperBlocks(F, LI, Worklist);
00163 
00164     DEBUG(if(Worklist.size() == 0) std::cerr << "No superblocks in function to ModuloSchedule\n");
00165 
00166     //Loop over worklist and ModuloSchedule each SuperBlock
00167     for(std::vector<std::vector<const MachineBasicBlock*> >::iterator SB = Worklist.begin(),
00168           SBE = Worklist.end(); SB != SBE; ++SB) {
00169 
00170       //Print out Superblock
00171       DEBUG(std::cerr << "ModuloScheduling SB: \n";
00172             for(std::vector<const MachineBasicBlock*>::const_iterator BI = SB->begin(),
00173                   BE = SB->end(); BI != BE; ++BI) {
00174               (*BI)->print(std::cerr);});
00175 
00176       if(!CreateDefMap(*SB)) {
00177         defaultInst = 0;
00178         defMap.clear();
00179         continue;
00180       }
00181 
00182       MSchedGraphSB *MSG = new MSchedGraphSB(*SB, target, indVarInstrs[*SB], DA,
00183                                          machineTollvm[*SB]);
00184 
00185       //Write Graph out to file
00186       DEBUG(WriteGraphToFileSB(std::cerr, F.getName(), MSG));
00187 
00188       //Calculate Resource II
00189       int ResMII = calculateResMII(*SB);
00190 
00191       //Calculate Recurrence II
00192       int RecMII = calculateRecMII(MSG, ResMII);
00193 
00194       DEBUG(std::cerr << "Number of reccurrences found: " << recurrenceList.size() << "\n");
00195 
00196       //Our starting initiation interval is the maximum of RecMII and ResMII
00197       if(RecMII < ResMII)
00198         ++SBRecurrenceConstraint;
00199       else
00200         ++SBResourceConstraint;
00201 
00202       II = std::max(RecMII, ResMII);
00203       int mII = II;
00204 
00205 
00206       //Print out II, RecMII, and ResMII
00207       DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n");
00208 
00209       //Calculate Node Properties
00210       calculateNodeAttributes(MSG, ResMII);
00211 
00212       //Dump node properties if in debug mode
00213       DEBUG(for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I =  nodeToAttributesMap.begin(),
00214                   E = nodeToAttributesMap.end(); I !=E; ++I) {
00215               std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: "
00216                         << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth
00217                         << " Height: " << I->second.height << "\n";
00218             });
00219 
00220 
00221       //Put nodes in order to schedule them
00222       computePartialOrder();
00223 
00224       //Dump out partial order
00225       DEBUG(for(std::vector<std::set<MSchedGraphSBNode*> >::iterator I = partialOrder.begin(),
00226                   E = partialOrder.end(); I !=E; ++I) {
00227               std::cerr << "Start set in PO\n";
00228               for(std::set<MSchedGraphSBNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J)
00229                 std::cerr << "PO:" << **J << "\n";
00230             });
00231 
00232       //Place nodes in final order
00233       orderNodes();
00234 
00235       //Dump out order of nodes
00236       DEBUG(for(std::vector<MSchedGraphSBNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) {
00237               std::cerr << "FO:" << **I << "\n";
00238             });
00239 
00240 
00241       //Finally schedule nodes
00242       bool haveSched = computeSchedule(*SB, MSG);
00243 
00244       //Print out final schedule
00245       DEBUG(schedule.print(std::cerr));
00246 
00247       //Final scheduling step is to reconstruct the loop only if we actual have
00248       //stage > 0
00249       if(haveSched) {
00250         //schedule.printSchedule(std::cerr);
00251         reconstructLoop(*SB);
00252         ++SBMSLoops;
00253         //Changed = true;
00254         SBIISum += mII;
00255         SBFinalIISum += II;
00256 
00257       if(schedule.getMaxStage() == 0)
00258         ++SBSameStage;
00259       }
00260       else
00261         ++SBNoSched;
00262 
00263       //Clear out our maps for the next basic block that is processed
00264       nodeToAttributesMap.clear();
00265       partialOrder.clear();
00266       recurrenceList.clear();
00267       FinalNodeOrder.clear();
00268       schedule.clear();
00269       defMap.clear();
00270 
00271     }
00272     return Changed;
00273   }
00274 
00275   void ModuloSchedulingSBPass::FindSuperBlocks(Function &F, LoopInfo &LI,
00276                       std::vector<std::vector<const MachineBasicBlock*> > &Worklist) {
00277 
00278     //Get MachineFunction
00279     MachineFunction &MF = MachineFunction::get(&F);
00280 
00281     //Map of LLVM BB to machine BB
00282     std::map<BasicBlock*, MachineBasicBlock*> bbMap;
00283 
00284     for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI) {
00285       BasicBlock *llvmBB = (BasicBlock*) BI->getBasicBlock();
00286       assert(!bbMap.count(llvmBB) && "LLVM BB already in map!");
00287       bbMap[llvmBB] = &*BI;
00288     }
00289 
00290     //Iterate over the loops, and find super blocks
00291     for(LoopInfo::iterator LB = LI.begin(), LE = LI.end(); LB != LE; ++LB) {
00292       Loop *L = *LB;
00293       ++NumLoops;
00294 
00295       //If loop is not single entry, try the next one
00296       if(!L->getLoopPreheader())
00297         continue;
00298 
00299       //Check size of this loop, we don't want SBB loops
00300       if(L->getBlocks().size() == 1)
00301         continue;
00302 
00303       //Check if this loop contains no sub loops
00304       if(L->getSubLoops().size() == 0) {
00305 
00306         std::vector<const MachineBasicBlock*> superBlock;
00307 
00308         //Get Loop Headers
00309         BasicBlock *header = L->getHeader();
00310 
00311         //Follow the header and make sure each BB only has one entry and is valid
00312         BasicBlock *current = header;
00313         assert(bbMap.count(current) && "LLVM BB must have corresponding Machine BB\n");
00314         MachineBasicBlock *currentMBB = bbMap[header];
00315         bool done = false;
00316         bool success = true;
00317         unsigned offset = 0;
00318         std::map<const MachineInstr*, unsigned> indexMap;
00319 
00320         while(!done) {
00321           //Loop over successors of this BB, they should be in the
00322           //loop block and be valid
00323           BasicBlock *next = 0;
00324           for(succ_iterator I = succ_begin(current), E = succ_end(current);
00325               I != E; ++I) {
00326             if(L->contains(*I)) {
00327               if(!next)
00328                 next = *I;
00329               else {
00330                 done = true;
00331                 success = false;
00332                 break;
00333               }
00334             }
00335           }
00336 
00337           if(success) {
00338             superBlock.push_back(currentMBB);
00339             if(next == header)
00340               done = true;
00341             else if(!next->getSinglePredecessor()) {
00342               done = true;
00343               success = false;
00344             }
00345             else {
00346               //Check that the next BB only has one entry
00347               current = next;
00348               assert(bbMap.count(current) && "LLVM BB must have corresponding Machine BB");
00349               currentMBB = bbMap[current];
00350             }
00351           }
00352         }
00353 
00354 
00355 
00356 
00357 
00358         if(success) {
00359           ++NumSB;
00360 
00361           //Loop over all the blocks in the superblock
00362           for(std::vector<const MachineBasicBlock*>::iterator currentMBB = superBlock.begin(), MBBEnd = superBlock.end(); currentMBB != MBBEnd; ++currentMBB) {
00363             if(!MachineBBisValid(*currentMBB, indexMap, offset)) {
00364               success = false;
00365               break;
00366             }
00367           }
00368         }
00369 
00370         if(success) {
00371           if(getIndVar(superBlock, bbMap, indexMap)) {
00372             ++SBValid;
00373             Worklist.push_back(superBlock);
00374             SBSize += superBlock.size();
00375           }
00376           else
00377             ++SBInvalid;
00378         }
00379       }
00380     }
00381   }
00382 
00383 
00384   bool ModuloSchedulingSBPass::getIndVar(std::vector<const MachineBasicBlock*> &superBlock, std::map<BasicBlock*, MachineBasicBlock*> &bbMap,
00385                                   std::map<const MachineInstr*, unsigned> &indexMap) {
00386     //See if we can get induction var instructions
00387     std::set<const BasicBlock*> llvmSuperBlock;
00388 
00389     for(unsigned i =0; i < superBlock.size(); ++i)
00390       llvmSuperBlock.insert(superBlock[i]->getBasicBlock());
00391 
00392     //Get Target machine instruction info
00393     const TargetInstrInfo *TMI = target.getInstrInfo();
00394 
00395     //Get the loop back branch
00396     BranchInst *b = dyn_cast<BranchInst>(((BasicBlock*) (superBlock[superBlock.size()-1])->getBasicBlock())->getTerminator());
00397      std::set<Instruction*> indVar;
00398 
00399     if(b->isConditional()) {
00400       //Get the condition for the branch
00401       Value *cond = b->getCondition();
00402 
00403       DEBUG(std::cerr << "Condition: " << *cond << "\n");
00404 
00405       //List of instructions associated with induction variable
00406       std::vector<Instruction*> stack;
00407 
00408       //Add branch
00409       indVar.insert(b);
00410 
00411       if(Instruction *I = dyn_cast<Instruction>(cond))
00412         if(bbMap.count(I->getParent())) {
00413           if (!assocIndVar(I, indVar, stack, bbMap, superBlock[(superBlock.size()-1)]->getBasicBlock(), llvmSuperBlock))
00414             return false;
00415         }
00416         else
00417           return false;
00418       else
00419         return false;
00420     }
00421     else {
00422       indVar.insert(b);
00423     }
00424 
00425     //Dump out instructions associate with indvar for debug reasons
00426     DEBUG(for(std::set<Instruction*>::iterator N = indVar.begin(), NE = indVar.end();
00427               N != NE; ++N) {
00428             std::cerr << **N << "\n";
00429           });
00430 
00431     //Create map of machine instr to llvm instr
00432     std::map<MachineInstr*, Instruction*> mllvm;
00433     for(std::vector<const MachineBasicBlock*>::iterator MBB = superBlock.begin(), MBE = superBlock.end(); MBB != MBE; ++MBB) {
00434       BasicBlock *BB = (BasicBlock*) (*MBB)->getBasicBlock();
00435       for(BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
00436         MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(I);
00437         for (unsigned j = 0; j < tempMvec.size(); j++) {
00438           mllvm[tempMvec[j]] = I;
00439         }
00440       }
00441     }
00442 
00443       //Convert list of LLVM Instructions to list of Machine instructions
00444       std::map<const MachineInstr*, unsigned> mIndVar;
00445       for(std::set<Instruction*>::iterator N = indVar.begin(),
00446             NE = indVar.end(); N != NE; ++N) {
00447 
00448         //If we have a load, we can't handle this loop because
00449         //there is no way to preserve dependences between loads
00450         //and stores
00451         if(isa<LoadInst>(*N))
00452           return false;
00453 
00454         MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(*N);
00455         for (unsigned j = 0; j < tempMvec.size(); j++) {
00456           MachineOpCode OC = (tempMvec[j])->getOpcode();
00457           if(TMI->isNop(OC))
00458             continue;
00459           if(!indexMap.count(tempMvec[j]))
00460             continue;
00461           mIndVar[(MachineInstr*) tempMvec[j]] = indexMap[(MachineInstr*) tempMvec[j]];
00462           DEBUG(std::cerr << *(tempMvec[j]) << " at index " << indexMap[(MachineInstr*) tempMvec[j]] << "\n");
00463         }
00464       }
00465 
00466       //Put into a map for future access
00467       indVarInstrs[superBlock] = mIndVar;
00468       machineTollvm[superBlock] = mllvm;
00469 
00470       return true;
00471 
00472   }
00473 
00474   bool ModuloSchedulingSBPass::assocIndVar(Instruction *I,
00475                                            std::set<Instruction*> &indVar,
00476                                            std::vector<Instruction*> &stack,
00477                                        std::map<BasicBlock*, MachineBasicBlock*> &bbMap,
00478                                            const BasicBlock *last, std::set<const BasicBlock*> &llvmSuperBlock) {
00479 
00480     stack.push_back(I);
00481 
00482     //If this is a phi node, check if its the canonical indvar
00483     if(PHINode *PN = dyn_cast<PHINode>(I)) {
00484       if(llvmSuperBlock.count(PN->getParent())) {
00485         if (Instruction *Inc =
00486             dyn_cast<Instruction>(PN->getIncomingValueForBlock(last)))
00487           if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN)
00488             if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
00489               if (CI->equalsInt(1)) {
00490                 //We have found the indvar, so add the stack, and inc instruction to the set
00491                 indVar.insert(stack.begin(), stack.end());
00492                 indVar.insert(Inc);
00493                 stack.pop_back();
00494                 return true;
00495               }
00496         return false;
00497       }
00498     }
00499     else {
00500       //Loop over each of the instructions operands, check if they are an instruction and in this BB
00501       for(unsigned i = 0; i < I->getNumOperands(); ++i) {
00502         if(Instruction *N =  dyn_cast<Instruction>(I->getOperand(i))) {
00503           if(bbMap.count(N->getParent()))
00504             if(!assocIndVar(N, indVar, stack, bbMap, last, llvmSuperBlock))
00505               return false;
00506         }
00507       }
00508     }
00509 
00510     stack.pop_back();
00511     return true;
00512   }
00513 
00514 
00515   /// This function checks if a Machine Basic Block is valid for modulo
00516   /// scheduling. This means that it has no control flow (if/else or
00517   /// calls) in the block.  Currently ModuloScheduling only works on
00518   /// single basic block loops.
00519   bool ModuloSchedulingSBPass::MachineBBisValid(const MachineBasicBlock *BI,
00520                         std::map<const MachineInstr*, unsigned> &indexMap,
00521                                                 unsigned &offset) {
00522 
00523     //Check size of our basic block.. make sure we have more then just the terminator in it
00524     if(BI->getBasicBlock()->size() == 1)
00525       return false;
00526 
00527     //Get Target machine instruction info
00528     const TargetInstrInfo *TMI = target.getInstrInfo();
00529 
00530     unsigned count = 0;
00531     for(MachineBasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
00532       //Get opcode to check instruction type
00533       MachineOpCode OC = I->getOpcode();
00534 
00535       //Look for calls
00536       if(TMI->isCall(OC)) {
00537         ++BBWithCalls;
00538         return false;
00539       }
00540 
00541       //Look for conditional move
00542       if(OC == V9::MOVRZr || OC == V9::MOVRZi || OC == V9::MOVRLEZr || OC == V9::MOVRLEZi
00543          || OC == V9::MOVRLZr || OC == V9::MOVRLZi || OC == V9::MOVRNZr || OC == V9::MOVRNZi
00544          || OC == V9::MOVRGZr || OC == V9::MOVRGZi || OC == V9::MOVRGEZr
00545          || OC == V9::MOVRGEZi || OC == V9::MOVLEr || OC == V9::MOVLEi || OC == V9::MOVLEUr
00546          || OC == V9::MOVLEUi || OC == V9::MOVFLEr || OC == V9::MOVFLEi
00547          || OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi
00548          || OC == V9::MOVFNEr || OC == V9::MOVFNEi) {
00549         ++BBWithCondMov;
00550         return false;
00551       }
00552 
00553       indexMap[I] = count + offset;
00554 
00555       if(TMI->isNop(OC))
00556         continue;
00557 
00558       ++count;
00559     }
00560 
00561     offset += count;
00562 
00563     return true;
00564   }
00565 }
00566 
00567 bool ModuloSchedulingSBPass::CreateDefMap(std::vector<const MachineBasicBlock*> &SB) {
00568   defaultInst = 0;
00569 
00570   for(std::vector<const MachineBasicBlock*>::iterator BI = SB.begin(),
00571         BE = SB.end(); BI != BE; ++BI) {
00572 
00573     for(MachineBasicBlock::const_iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) {
00574       for(unsigned opNum = 0; opNum < I->getNumOperands(); ++opNum) {
00575         const MachineOperand &mOp = I->getOperand(opNum);
00576         if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
00577           Value *V = mOp.getVRegValue();
00578           //assert if this is the second def we have seen
00579           if(defMap.count(V) && isa<PHINode>(V))
00580             DEBUG(std::cerr << "FIXME: Dup def for phi!\n");
00581           else {
00582             //assert(!defMap.count(V) && "Def already in the map");
00583             if(defMap.count(V))
00584               return false;
00585             defMap[V] = (MachineInstr*) &*I;
00586           }
00587         }
00588 
00589         //See if we can use this Value* as our defaultInst
00590         if(!defaultInst && mOp.getType() == MachineOperand::MO_VirtualRegister) {
00591           Value *V = mOp.getVRegValue();
00592           if(!isa<TmpInstruction>(V) && !isa<Argument>(V) && !isa<Constant>(V) && !isa<PHINode>(V))
00593             defaultInst = (Instruction*) V;
00594         }
00595       }
00596     }
00597   }
00598 
00599   if(!defaultInst)
00600     return false;
00601 
00602   return true;
00603 
00604 }
00605 
00606 
00607 //ResMII is calculated by determining the usage count for each resource
00608 //and using the maximum.
00609 //FIXME: In future there should be a way to get alternative resources
00610 //for each instruction
00611 int ModuloSchedulingSBPass::calculateResMII(std::vector<const MachineBasicBlock*> &superBlock) {
00612 
00613   TIME_REGION(X, "calculateResMII");
00614 
00615   const TargetInstrInfo *mii = target.getInstrInfo();
00616   const TargetSchedInfo *msi = target.getSchedInfo();
00617 
00618   int ResMII = 0;
00619 
00620   //Map to keep track of usage count of each resource
00621   std::map<unsigned, unsigned> resourceUsageCount;
00622 
00623   for(std::vector<const MachineBasicBlock*>::iterator BI = superBlock.begin(), BE = superBlock.end(); BI != BE; ++BI) {
00624     for(MachineBasicBlock::const_iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) {
00625 
00626       //Get resource usage for this instruction
00627       InstrRUsage rUsage = msi->getInstrRUsage(I->getOpcode());
00628       std::vector<std::vector<resourceId_t> > resources = rUsage.resourcesByCycle;
00629 
00630       //Loop over resources in each cycle and increments their usage count
00631       for(unsigned i=0; i < resources.size(); ++i)
00632         for(unsigned j=0; j < resources[i].size(); ++j) {
00633           if(!resourceUsageCount.count(resources[i][j])) {
00634             resourceUsageCount[resources[i][j]] = 1;
00635           }
00636           else {
00637             resourceUsageCount[resources[i][j]] =  resourceUsageCount[resources[i][j]] + 1;
00638           }
00639         }
00640     }
00641   }
00642 
00643   //Find maximum usage count
00644 
00645   //Get max number of instructions that can be issued at once. (FIXME)
00646   int issueSlots = msi->maxNumIssueTotal;
00647 
00648   for(std::map<unsigned,unsigned>::iterator RB = resourceUsageCount.begin(), RE = resourceUsageCount.end(); RB != RE; ++RB) {
00649 
00650     //Get the total number of the resources in our cpu
00651     int resourceNum = CPUResource::getCPUResource(RB->first)->maxNumUsers;
00652 
00653     //Get total usage count for this resources
00654     unsigned usageCount = RB->second;
00655 
00656     //Divide the usage count by either the max number we can issue or the number of
00657     //resources (whichever is its upper bound)
00658     double finalUsageCount;
00659     DEBUG(std::cerr << "Resource Num: " << RB->first << " Usage: " << usageCount << " TotalNum: " << resourceNum << "\n");
00660 
00661     if( resourceNum <= issueSlots)
00662       finalUsageCount = ceil(1.0 * usageCount / resourceNum);
00663     else
00664       finalUsageCount = ceil(1.0 * usageCount / issueSlots);
00665 
00666 
00667     //Only keep track of the max
00668     ResMII = std::max( (int) finalUsageCount, ResMII);
00669 
00670   }
00671 
00672   return ResMII;
00673 
00674 }
00675 
00676 /// calculateRecMII - Calculates the value of the highest recurrence
00677 /// By value we mean the total latency/distance
00678 int ModuloSchedulingSBPass::calculateRecMII(MSchedGraphSB *graph, int MII) {
00679 
00680   TIME_REGION(X, "calculateRecMII");
00681 
00682   findAllCircuits(graph, MII);
00683   int RecMII = 0;
00684 
00685   for(std::set<std::pair<int, std::vector<MSchedGraphSBNode*> > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) {
00686     RecMII = std::max(RecMII, I->first);
00687   }
00688 
00689   return MII;
00690 }
00691 
00692 int CircCountSB;
00693 
00694 void ModuloSchedulingSBPass::unblock(MSchedGraphSBNode *u, std::set<MSchedGraphSBNode*> &blocked,
00695              std::map<MSchedGraphSBNode*, std::set<MSchedGraphSBNode*> > &B) {
00696 
00697   //Unblock u
00698   DEBUG(std::cerr << "Unblocking: " << *u << "\n");
00699   blocked.erase(u);
00700 
00701   //std::set<MSchedGraphSBNode*> toErase;
00702   while (!B[u].empty()) {
00703     MSchedGraphSBNode *W = *B[u].begin();
00704     B[u].erase(W);
00705     //toErase.insert(*W);
00706     DEBUG(std::cerr << "Removed: " << *W << "from B-List\n");
00707     if(blocked.count(W))
00708       unblock(W, blocked, B);
00709   }
00710 
00711 }
00712 
00713 void ModuloSchedulingSBPass::addSCC(std::vector<MSchedGraphSBNode*> &SCC, std::map<MSchedGraphSBNode*, MSchedGraphSBNode*> &newNodes) {
00714 
00715   int totalDelay = 0;
00716   int totalDistance = 0;
00717   std::vector<MSchedGraphSBNode*> recc;
00718   MSchedGraphSBNode *start = 0;
00719   MSchedGraphSBNode *end = 0;
00720 
00721   //Loop over recurrence, get delay and distance
00722   for(std::vector<MSchedGraphSBNode*>::iterator N = SCC.begin(), NE = SCC.end(); N != NE; ++N) {
00723     DEBUG(std::cerr << **N << "\n");
00724     totalDelay += (*N)->getLatency();
00725 
00726     for(unsigned i = 0; i < (*N)->succ_size(); ++i) {
00727       MSchedGraphSBEdge *edge = (*N)->getSuccessor(i);
00728       if(find(SCC.begin(), SCC.end(), edge->getDest()) != SCC.end()) {
00729         totalDistance += edge->getIteDiff();
00730         if(edge->getIteDiff() > 0)
00731           if(!start && !end) {
00732             start = *N;
00733             end = edge->getDest();
00734           }
00735 
00736       }
00737     }
00738 
00739 
00740     //Get the original node
00741     recc.push_back(newNodes[*N]);
00742 
00743 
00744   }
00745 
00746   DEBUG(std::cerr << "End Recc\n");
00747 
00748 
00749   assert( (start && end) && "Must have start and end node to ignore edge for SCC");
00750 
00751   if(start && end) {
00752     //Insert reccurrence into the list
00753     DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n");
00754     edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start])));
00755   }
00756 
00757   int lastII = totalDelay / totalDistance;
00758 
00759 
00760   recurrenceList.insert(std::make_pair(lastII, recc));
00761 
00762 }
00763 
00764 bool ModuloSchedulingSBPass::circuit(MSchedGraphSBNode *v, std::vector<MSchedGraphSBNode*> &stack,
00765              std::set<MSchedGraphSBNode*> &blocked, std::vector<MSchedGraphSBNode*> &SCC,
00766              MSchedGraphSBNode *s, std::map<MSchedGraphSBNode*, std::set<MSchedGraphSBNode*> > &B,
00767                                    int II, std::map<MSchedGraphSBNode*, MSchedGraphSBNode*> &newNodes) {
00768   bool f = false;
00769 
00770   DEBUG(std::cerr << "Finding Circuits Starting with: ( " << v << ")"<< *v << "\n");
00771 
00772   //Push node onto the stack
00773   stack.push_back(v);
00774 
00775   //block this node
00776   blocked.insert(v);
00777 
00778   //Loop over all successors of node v that are in the scc, create Adjaceny list
00779   std::set<MSchedGraphSBNode*> AkV;
00780   for(MSchedGraphSBNode::succ_iterator I = v->succ_begin(), E = v->succ_end(); I != E; ++I) {
00781     if((std::find(SCC.begin(), SCC.end(), *I) != SCC.end())) {
00782       AkV.insert(*I);
00783     }
00784   }
00785 
00786   for(std::set<MSchedGraphSBNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) {
00787     if(*I == s) {
00788       //We have a circuit, so add it to our list
00789       addRecc(stack, newNodes);
00790       f = true;
00791     }
00792     else if(!blocked.count(*I)) {
00793       if(circuit(*I, stack, blocked, SCC, s, B, II, newNodes))
00794         f = true;
00795     }
00796     else
00797       DEBUG(std::cerr << "Blocked: " << **I << "\n");
00798   }
00799 
00800 
00801   if(f) {
00802     unblock(v, blocked, B);
00803   }
00804   else {
00805     for(std::set<MSchedGraphSBNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I)
00806       B[*I].insert(v);
00807 
00808   }
00809 
00810   //Pop v
00811   stack.pop_back();
00812 
00813   return f;
00814 
00815 }
00816 
00817 void ModuloSchedulingSBPass::addRecc(std::vector<MSchedGraphSBNode*> &stack, std::map<MSchedGraphSBNode*, MSchedGraphSBNode*> &newNodes) {
00818   std::vector<MSchedGraphSBNode*> recc;
00819   //Dump recurrence for now
00820   DEBUG(std::cerr << "Starting Recc\n");
00821 
00822   int totalDelay = 0;
00823   int totalDistance = 0;
00824   MSchedGraphSBNode *lastN = 0;
00825   MSchedGraphSBNode *start = 0;
00826   MSchedGraphSBNode *end = 0;
00827 
00828   //Loop over recurrence, get delay and distance
00829   for(std::vector<MSchedGraphSBNode*>::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) {
00830     DEBUG(std::cerr << **N << "\n");
00831     totalDelay += (*N)->getLatency();
00832     if(lastN) {
00833       int iteDiff = (*N)->getInEdge(lastN).getIteDiff();
00834       totalDistance += iteDiff;
00835 
00836       if(iteDiff > 0) {
00837         start = lastN;
00838         end = *N;
00839       }
00840     }
00841     //Get the original node
00842     lastN = *N;
00843     recc.push_back(newNodes[*N]);
00844 
00845 
00846   }
00847 
00848   //Get the loop edge
00849   totalDistance += lastN->getIteDiff(*stack.begin());
00850 
00851   DEBUG(std::cerr << "End Recc\n");
00852   CircCountSB++;
00853 
00854   if(start && end) {
00855     //Insert reccurrence into the list
00856     DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n");
00857     edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start])));
00858   }
00859   else {
00860     //Insert reccurrence into the list
00861     DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n");
00862     edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN])));
00863 
00864   }
00865   //Adjust II until we get close to the inequality delay - II*distance <= 0
00866   int RecMII = II; //Starting value
00867   int value = totalDelay-(RecMII * totalDistance);
00868   int lastII = II;
00869   while(value < 0) {
00870 
00871     lastII = RecMII;
00872     RecMII--;
00873     value = totalDelay-(RecMII * totalDistance);
00874   }
00875 
00876   recurrenceList.insert(std::make_pair(lastII, recc));
00877 
00878 }
00879 
00880 
00881 void ModuloSchedulingSBPass::findAllCircuits(MSchedGraphSB *g, int II) {
00882 
00883   CircCountSB = 0;
00884 
00885   //Keep old to new node mapping information
00886   std::map<MSchedGraphSBNode*, MSchedGraphSBNode*> newNodes;
00887 
00888   //copy the graph
00889   MSchedGraphSB *MSG = new MSchedGraphSB(*g, newNodes);
00890 
00891   DEBUG(std::cerr << "Finding All Circuits\n");
00892 
00893   //Set of blocked nodes
00894   std::set<MSchedGraphSBNode*> blocked;
00895 
00896   //Stack holding current circuit
00897   std::vector<MSchedGraphSBNode*> stack;
00898 
00899   //Map for B Lists
00900   std::map<MSchedGraphSBNode*, std::set<MSchedGraphSBNode*> > B;
00901 
00902   //current node
00903   MSchedGraphSBNode *s;
00904 
00905 
00906   //Iterate over the graph until its down to one node or empty
00907   while(MSG->size() > 1) {
00908 
00909     //Write Graph out to file
00910     //WriteGraphToFile(std::cerr, "Graph" + utostr(MSG->size()), MSG);
00911 
00912     DEBUG(std::cerr << "Graph Size: " << MSG->size() << "\n");
00913     DEBUG(std::cerr << "Finding strong component Vk with least vertex\n");
00914 
00915     //Iterate over all the SCCs in the graph
00916     std::set<MSchedGraphSBNode*> Visited;
00917     std::vector<MSchedGraphSBNode*> Vk;
00918     MSchedGraphSBNode* s = 0;
00919     int numEdges = 0;
00920 
00921     //Find scc with the least vertex
00922     for (MSchedGraphSB::iterator GI = MSG->begin(), E = MSG->end(); GI != E; ++GI)
00923       if (Visited.insert(GI->second).second) {
00924         for (scc_iterator<MSchedGraphSBNode*> SCCI = scc_begin(GI->second),
00925                E = scc_end(GI->second); SCCI != E; ++SCCI) {
00926           std::vector<MSchedGraphSBNode*> &nextSCC = *SCCI;
00927 
00928           if (Visited.insert(nextSCC[0]).second) {
00929             Visited.insert(nextSCC.begin()+1, nextSCC.end());
00930 
00931             if(nextSCC.size() > 1) {
00932               DEBUG(std::cerr << "SCC size: " << nextSCC.size() << "\n");
00933 
00934               for(unsigned i = 0; i < nextSCC.size(); ++i) {
00935                 //Loop over successor and see if in scc, then count edge
00936                 MSchedGraphSBNode *node = nextSCC[i];
00937                 for(MSchedGraphSBNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE; ++S) {
00938                   if(find(nextSCC.begin(), nextSCC.end(), *S) != nextSCC.end())
00939                     numEdges++;
00940                 }
00941               }
00942               DEBUG(std::cerr << "Num Edges: " << numEdges << "\n");
00943             }
00944 
00945             //Ignore self loops
00946             if(nextSCC.size() > 1) {
00947 
00948               //Get least vertex in Vk
00949               if(!s) {
00950                 s = nextSCC[0];
00951                 Vk = nextSCC;
00952               }
00953 
00954               for(unsigned i = 0; i < nextSCC.size(); ++i) {
00955                 if(nextSCC[i] < s) {
00956                   s = nextSCC[i];
00957                   Vk = nextSCC;
00958                 }
00959               }
00960             }
00961           }
00962         }
00963       }
00964 
00965 
00966 
00967     //Process SCC
00968     DEBUG(for(std::vector<MSchedGraphSBNode*>::iterator N = Vk.begin(), NE = Vk.end();
00969               N != NE; ++N) { std::cerr << *((*N)->getInst()); });
00970 
00971     //Iterate over all nodes in this scc
00972     for(std::vector<MSchedGraphSBNode*>::iterator N = Vk.begin(), NE = Vk.end();
00973         N != NE; ++N) {
00974       blocked.erase(*N);
00975       B[*N].clear();
00976     }
00977     if(Vk.size() > 1) {
00978       if(numEdges < 98)
00979         circuit(s, stack, blocked, Vk, s, B, II, newNodes);
00980       else
00981         addSCC(Vk, newNodes);
00982 
00983 
00984       //Delete nodes from the graph
00985       //Find all nodes up to s and delete them
00986       std::vector<MSchedGraphSBNode*> nodesToRemove;
00987       nodesToRemove.push_back(s);
00988       for(MSchedGraphSB::iterator N = MSG->begin(), NE = MSG->end(); N != NE; ++N) {
00989         if(N->second < s )
00990             nodesToRemove.push_back(N->second);
00991       }
00992       for(std::vector<MSchedGraphSBNode*>::iterator N = nodesToRemove.begin(), NE = nodesToRemove.end(); N != NE; ++N) {
00993         DEBUG(std::cerr << "Deleting Node: " << **N << "\n");
00994         MSG->deleteNode(*N);
00995       }
00996     }
00997     else
00998       break;
00999   }
01000   DEBUG(std::cerr << "Num Circuits found: " << CircCountSB << "\n");
01001 }
01002 /// calculateNodeAttributes - The following properties are calculated for
01003 /// each node in the dependence graph: ASAP, ALAP, Depth, Height, and
01004 /// MOB.
01005 void ModuloSchedulingSBPass::calculateNodeAttributes(MSchedGraphSB *graph, int MII) {
01006 
01007   TIME_REGION(X, "calculateNodeAttributes");
01008 
01009   assert(nodeToAttributesMap.empty() && "Node attribute map was not cleared");
01010 
01011   //Loop over the nodes and add them to the map
01012   for(MSchedGraphSB::iterator I = graph->begin(), E = graph->end(); I != E; ++I) {
01013 
01014     DEBUG(std::cerr << "Inserting node into attribute map: " << *I->second << "\n");
01015 
01016     //Assert if its already in the map
01017     assert(nodeToAttributesMap.count(I->second) == 0 &&
01018            "Node attributes are already in the map");
01019 
01020     //Put into the map with default attribute values
01021     nodeToAttributesMap[I->second] = MSNodeSBAttributes();
01022   }
01023 
01024   //Create set to deal with reccurrences
01025   std::set<MSchedGraphSBNode*> visitedNodes;
01026 
01027   //Now Loop over map and calculate the node attributes
01028   for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
01029     calculateASAP(I->first, MII, (MSchedGraphSBNode*) 0);
01030     visitedNodes.clear();
01031   }
01032 
01033   int maxASAP = findMaxASAP();
01034   //Calculate ALAP which depends on ASAP being totally calculated
01035   for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
01036     calculateALAP(I->first, MII, maxASAP, (MSchedGraphSBNode*) 0);
01037     visitedNodes.clear();
01038   }
01039 
01040   //Calculate MOB which depends on ASAP being totally calculated, also do depth and height
01041   for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(), E = nodeToAttributesMap.end(); I != E; ++I) {
01042     (I->second).MOB = std::max(0,(I->second).ALAP - (I->second).ASAP);
01043 
01044     DEBUG(std::cerr << "MOB: " << (I->second).MOB << " (" << *(I->first) << ")\n");
01045     calculateDepth(I->first, (MSchedGraphSBNode*) 0);
01046     calculateHeight(I->first, (MSchedGraphSBNode*) 0);
01047   }
01048 
01049 
01050 }
01051 
01052 /// ignoreEdge - Checks to see if this edge of a recurrence should be ignored or not
01053 bool ModuloSchedulingSBPass::ignoreEdge(MSchedGraphSBNode *srcNode, MSchedGraphSBNode *destNode) {
01054   if(destNode == 0 || srcNode ==0)
01055     return false;
01056 
01057   bool findEdge = edgesToIgnore.count(std::make_pair(srcNode, destNode->getInEdgeNum(srcNode)));
01058 
01059   DEBUG(std::cerr << "Ignoring edge? from: " << *srcNode << " to " << *destNode << "\n");
01060 
01061   return findEdge;
01062 }
01063 
01064 
01065 /// calculateASAP - Calculates the
01066 int  ModuloSchedulingSBPass::calculateASAP(MSchedGraphSBNode *node, int MII, MSchedGraphSBNode *destNode) {
01067 
01068   DEBUG(std::cerr << "Calculating ASAP for " << *node << "\n");
01069 
01070   //Get current node attributes
01071   MSNodeSBAttributes &attributes = nodeToAttributesMap.find(node)->second;
01072 
01073   if(attributes.ASAP != -1)
01074     return attributes.ASAP;
01075 
01076   int maxPredValue = 0;
01077 
01078   //Iterate over all of the predecessors and find max
01079   for(MSchedGraphSBNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) {
01080 
01081     //Only process if we are not ignoring the edge
01082     if(!ignoreEdge(*P, node)) {
01083       int predASAP = -1;
01084       predASAP = calculateASAP(*P, MII, node);
01085 
01086       assert(predASAP != -1 && "ASAP has not been calculated");
01087       int iteDiff = node->getInEdge(*P).getIteDiff();
01088 
01089       int currentPredValue = predASAP + (*P)->getLatency() - (iteDiff * MII);
01090       DEBUG(std::cerr << "pred ASAP: " << predASAP << ", iteDiff: " << iteDiff << ", PredLatency: " << (*P)->getLatency() << ", Current ASAP pred: " << currentPredValue << "\n");
01091       maxPredValue = std::max(maxPredValue, currentPredValue);
01092     }
01093   }
01094 
01095   attributes.ASAP = maxPredValue;
01096 
01097   DEBUG(std::cerr << "ASAP: " << attributes.ASAP << " (" << *node << ")\n");
01098 
01099   return maxPredValue;
01100 }
01101 
01102 
01103 int ModuloSchedulingSBPass::calculateALAP(MSchedGraphSBNode *node, int MII,
01104                                         int maxASAP, MSchedGraphSBNode *srcNode) {
01105 
01106   DEBUG(std::cerr << "Calculating ALAP for " << *node << "\n");
01107 
01108   MSNodeSBAttributes &attributes = nodeToAttributesMap.find(node)->second;
01109 
01110   if(attributes.ALAP != -1)
01111     return attributes.ALAP;
01112 
01113   if(node->hasSuccessors()) {
01114 
01115     //Trying to deal with the issue where the node has successors, but
01116     //we are ignoring all of the edges to them. So this is my hack for
01117     //now.. there is probably a more elegant way of doing this (FIXME)
01118     bool processedOneEdge = false;
01119 
01120     //FIXME, set to something high to start
01121     int minSuccValue = 9999999;
01122 
01123     //Iterate over all of the predecessors and fine max
01124     for(MSchedGraphSBNode::succ_iterator P = node->succ_begin(),
01125           E = node->succ_end(); P != E; ++P) {
01126 
01127       //Only process if we are not ignoring the edge
01128       if(!ignoreEdge(node, *P)) {
01129         processedOneEdge = true;
01130         int succALAP = -1;
01131         succALAP = calculateALAP(*P, MII, maxASAP, node);
01132 
01133         assert(succALAP != -1 && "Successors ALAP should have been caclulated");
01134 
01135         int iteDiff = P.getEdge().getIteDiff();
01136 
01137         int currentSuccValue = succALAP - node->getLatency() + iteDiff * MII;
01138 
01139         DEBUG(std::cerr << "succ ALAP: " << succALAP << ", iteDiff: " << iteDiff << ", SuccLatency: " << (*P)->getLatency() << ", Current ALAP succ: " << currentSuccValue << "\n");
01140 
01141         minSuccValue = std::min(minSuccValue, currentSuccValue);
01142       }
01143     }
01144 
01145     if(processedOneEdge)
01146         attributes.ALAP = minSuccValue;
01147 
01148     else
01149       attributes.ALAP = maxASAP;
01150   }
01151   else
01152     attributes.ALAP = maxASAP;
01153 
01154   DEBUG(std::cerr << "ALAP: " << attributes.ALAP << " (" << *node << ")\n");
01155 
01156   if(attributes.ALAP < 0)
01157     attributes.ALAP = 0;
01158 
01159   return attributes.ALAP;
01160 }
01161 
01162 int ModuloSchedulingSBPass::findMaxASAP() {
01163   int maxASAP = 0;
01164 
01165   for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(),
01166         E = nodeToAttributesMap.end(); I != E; ++I)
01167     maxASAP = std::max(maxASAP, I->second.ASAP);
01168   return maxASAP;
01169 }
01170 
01171 
01172 int ModuloSchedulingSBPass::calculateHeight(MSchedGraphSBNode *node,MSchedGraphSBNode *srcNode) {
01173 
01174   MSNodeSBAttributes &attributes = nodeToAttributesMap.find(node)->second;
01175 
01176   if(attributes.height != -1)
01177     return attributes.height;
01178 
01179   int maxHeight = 0;
01180 
01181   //Iterate over all of the predecessors and find max
01182   for(MSchedGraphSBNode::succ_iterator P = node->succ_begin(),
01183         E = node->succ_end(); P != E; ++P) {
01184 
01185 
01186     if(!ignoreEdge(node, *P)) {
01187       int succHeight = calculateHeight(*P, node);
01188 
01189       assert(succHeight != -1 && "Successors Height should have been caclulated");
01190 
01191       int currentHeight = succHeight + node->getLatency();
01192       maxHeight = std::max(maxHeight, currentHeight);
01193     }
01194   }
01195   attributes.height = maxHeight;
01196   DEBUG(std::cerr << "Height: " << attributes.height << " (" << *node << ")\n");
01197   return maxHeight;
01198 }
01199 
01200 
01201 int ModuloSchedulingSBPass::calculateDepth(MSchedGraphSBNode *node,
01202                                           MSchedGraphSBNode *destNode) {
01203 
01204   MSNodeSBAttributes &attributes = nodeToAttributesMap.find(node)->second;
01205 
01206   if(attributes.depth != -1)
01207     return attributes.depth;
01208 
01209   int maxDepth = 0;
01210 
01211   //Iterate over all of the predecessors and fine max
01212   for(MSchedGraphSBNode::pred_iterator P = node->pred_begin(), E = node->pred_end(); P != E; ++P) {
01213 
01214     if(!ignoreEdge(*P, node)) {
01215       int predDepth = -1;
01216       predDepth = calculateDepth(*P, node);
01217 
01218       assert(predDepth != -1 && "Predecessors ASAP should have been caclulated");
01219 
01220       int currentDepth = predDepth + (*P)->getLatency();
01221       maxDepth = std::max(maxDepth, currentDepth);
01222     }
01223   }
01224   attributes.depth = maxDepth;
01225 
01226   DEBUG(std::cerr << "Depth: " << attributes.depth << " (" << *node << "*)\n");
01227   return maxDepth;
01228 }
01229 
01230 void ModuloSchedulingSBPass::computePartialOrder() {
01231 
01232   TIME_REGION(X, "calculatePartialOrder");
01233 
01234   DEBUG(std::cerr << "Computing Partial Order\n");
01235 
01236   //Steps to add a recurrence to the partial order 1) Find reccurrence
01237   //with the highest RecMII. Add it to the partial order.  2) For each
01238   //recurrence with decreasing RecMII, add it to the partial order
01239   //along with any nodes that connect this recurrence to recurrences
01240   //already in the partial order
01241   for(std::set<std::pair<int, std::vector<MSchedGraphSBNode*> > >::reverse_iterator
01242         I = recurrenceList.rbegin(), E=recurrenceList.rend(); I !=E; ++I) {
01243 
01244     std::set<MSchedGraphSBNode*> new_recurrence;
01245 
01246     //Loop through recurrence and remove any nodes already in the partial order
01247     for(std::vector<MSchedGraphSBNode*>::const_iterator N = I->second.begin(),
01248           NE = I->second.end(); N != NE; ++N) {
01249 
01250       bool found = false;
01251       for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(),
01252             PE = partialOrder.end(); PO != PE; ++PO) {
01253         if(PO->count(*N))
01254           found = true;
01255       }
01256 
01257       //Check if its a branch, and remove to handle special
01258       if(!found) {
01259         new_recurrence.insert(*N);
01260       }
01261 
01262     }
01263 
01264 
01265     if(new_recurrence.size() > 0) {
01266 
01267       std::vector<MSchedGraphSBNode*> path;
01268       std::set<MSchedGraphSBNode*> nodesToAdd;
01269 
01270       //Dump recc we are dealing with (minus nodes already in PO)
01271       DEBUG(std::cerr << "Recc: ");
01272       DEBUG(for(std::set<MSchedGraphSBNode*>::iterator R = new_recurrence.begin(), RE = new_recurrence.end(); R != RE; ++R) { std::cerr << **R ; });
01273 
01274       //Add nodes that connect this recurrence to recurrences in the partial path
01275       for(std::set<MSchedGraphSBNode*>::iterator N = new_recurrence.begin(),
01276           NE = new_recurrence.end(); N != NE; ++N)
01277         searchPath(*N, path, nodesToAdd, new_recurrence);
01278 
01279       //Add nodes to this recurrence if they are not already in the partial order
01280       for(std::set<MSchedGraphSBNode*>::iterator N = nodesToAdd.begin(), NE = nodesToAdd.end();
01281           N != NE; ++N) {
01282         bool found = false;
01283         for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(),
01284               PE = partialOrder.end(); PO != PE; ++PO) {
01285           if(PO->count(*N))
01286             found = true;
01287         }
01288         if(!found) {
01289           assert("FOUND CONNECTOR");
01290           new_recurrence.insert(*N);
01291         }
01292       }
01293 
01294       partialOrder.push_back(new_recurrence);
01295     }
01296   }
01297 
01298   //Add any nodes that are not already in the partial order
01299   //Add them in a set, one set per connected component
01300   std::set<MSchedGraphSBNode*> lastNodes;
01301   std::set<MSchedGraphSBNode*> noPredNodes;
01302   for(std::map<MSchedGraphSBNode*, MSNodeSBAttributes>::iterator I = nodeToAttributesMap.begin(),
01303         E = nodeToAttributesMap.end(); I != E; ++I) {
01304 
01305     bool found = false;
01306 
01307     //Check if its already in our partial order, if not add it to the final vector
01308     for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(),
01309           PE = partialOrder.end(); PO != PE; ++PO) {
01310       if(PO->count(I->first))
01311         found = true;
01312     }
01313     if(!found)
01314       lastNodes.insert(I->first);
01315   }
01316 
01317   //For each node w/out preds, see if there is a path to one of the
01318   //recurrences, and if so add them to that current recc
01319   /*for(std::set<MSchedGraphSBNode*>::iterator N = noPredNodes.begin(), NE = noPredNodes.end();
01320       N != NE; ++N) {
01321     DEBUG(std::cerr << "No Pred Path from: " << **N << "\n");
01322     for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(),
01323           PE = partialOrder.end(); PO != PE; ++PO) {
01324       std::vector<MSchedGraphSBNode*> path;
01325       pathToRecc(*N, path, *PO, lastNodes);
01326     }
01327     }*/
01328 
01329 
01330   //Break up remaining nodes that are not in the partial order
01331   ///into their connected compoenents
01332     while(lastNodes.size() > 0) {
01333       std::set<MSchedGraphSBNode*> ccSet;
01334       connectedComponentSet(*(lastNodes.begin()),ccSet, lastNodes);
01335       if(ccSet.size() > 0)
01336         partialOrder.push_back(ccSet);
01337     }
01338 
01339 }
01340 
01341 void ModuloSchedulingSBPass::connectedComponentSet(MSchedGraphSBNode *node, std::set<MSchedGraphSBNode*> &ccSet, std::set<MSchedGraphSBNode*> &lastNodes) {
01342 
01343   //Add to final set
01344   if( !ccSet.count(node) && lastNodes.count(node)) {
01345     lastNodes.erase(node);
01346     ccSet.insert(node);
01347   }
01348   else
01349     return;
01350 
01351   //Loop over successors and recurse if we have not seen this node before
01352   for(MSchedGraphSBNode::succ_iterator node_succ = node->succ_begin(), end=node->succ_end(); node_succ != end; ++node_succ) {
01353     connectedComponentSet(*node_succ, ccSet, lastNodes);
01354   }
01355 
01356 }
01357 
01358 void ModuloSchedulingSBPass::searchPath(MSchedGraphSBNode *node,
01359                                       std::vector<MSchedGraphSBNode*> &path,
01360                                       std::set<MSchedGraphSBNode*> &nodesToAdd,
01361                                      std::set<MSchedGraphSBNode*> &new_reccurrence) {
01362   //Push node onto the path
01363   path.push_back(node);
01364 
01365   //Loop over all successors and see if there is a path from this node to
01366   //a recurrence in the partial order, if so.. add all nodes to be added to recc
01367   for(MSchedGraphSBNode::succ_iterator S = node->succ_begin(), SE = node->succ_end(); S != SE;
01368       ++S) {
01369 
01370     //Check if we should ignore this edge first
01371     if(ignoreEdge(node,*S))
01372       continue;
01373 
01374     //check if successor is in this recurrence, we will get to it eventually
01375     if(new_reccurrence.count(*S))
01376       continue;
01377 
01378     //If this node exists in a recurrence already in the partial
01379     //order, then add all nodes in the path to the set of nodes to add
01380      //Check if its already in our partial order, if not add it to the
01381      //final vector
01382     bool found = false;
01383     for(std::vector<std::set<MSchedGraphSBNode*> >::iterator PO = partialOrder.begin(),
01384           PE = partialOrder.end(); PO != PE; ++PO) {
01385 
01386       if(PO->count(*S)) {
01387         found = true;
01388         break;
01389       }
01390     }
01391 
01392     if(!found) {
01393       nodesToAdd.insert(*S);
01394       searchPath(*S, path, nodesToAdd, new_reccurrence);
01395     }
01396   }
01397 
01398   //Pop Node off the path
01399   path.pop_back();
01400 }
01401 
01402 void dumpIntersection(std::set<MSchedGraphSBNode*> &IntersectCurrent) {
01403   std::cerr << "Intersection (";
01404   for(std::set<MSchedGraphSBNode*>::iterator I = IntersectCurrent.begin(), E = IntersectCurrent.end(); I != E; ++I)
01405     std::cerr << **I << ", ";
01406   std::cerr << ")\n";
01407 }
01408 
01409 void ModuloSchedulingSBPass::orderNodes() {
01410 
01411   TIME_REGION(X, "orderNodes");
01412 
01413   int BOTTOM_UP = 0;
01414   int TOP_DOWN = 1;
01415 
01416   //Set default order
01417   int order = BOTTOM_UP;
01418 
01419   //Loop over and find all pred nodes and schedule them first
01420   /*for(std::vector<std::set<MSchedGraphSBNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) {
01421     for(std::set<MSchedGraphSBNode*>::iterator N = CurrentSet->begin(), NE = CurrentSet->end(); N != NE; ++N)
01422       if((*N)->isPredicate()) {
01423         FinalNodeOrder.push_back(*N);
01424         CurrentSet->erase(*N);
01425       }
01426       }*/
01427 
01428 
01429 
01430   //Loop over all the sets and place them in the final node order
01431   for(std::vector<std::set<MSchedGraphSBNode*> >::iterator CurrentSet = partialOrder.begin(), E= partialOrder.end(); CurrentSet != E; ++CurrentSet) {
01432 
01433     DEBUG(std::cerr << "Processing set in S\n");
01434     DEBUG(dumpIntersection(*CurrentSet));
01435 
01436     //Result of intersection
01437     std::set<MSchedGraphSBNode*> IntersectCurrent;
01438 
01439     predIntersect(*CurrentSet, IntersectCurrent);
01440 
01441     //If the intersection of predecessor and current set is not empty
01442     //sort nodes bottom up
01443     if(IntersectCurrent.size() != 0) {
01444       DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is NOT empty\n");
01445       order = BOTTOM_UP;
01446     }
01447     //If empty, use successors
01448     else {
01449       DEBUG(std::cerr << "Final Node Order Predecessors and Current Set interesection is empty\n");
01450 
01451       succIntersect(*CurrentSet, IntersectCurrent);
01452 
01453       //sort top-down
01454       if(IntersectCurrent.size() != 0) {
01455          DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is NOT empty\n");
01456         order = TOP_DOWN;
01457       }
01458       else {
01459         DEBUG(std::cerr << "Final Node Order Successors and Current Set interesection is empty\n");
01460         //Find node with max ASAP in current Set
01461         MSchedGraphSBNode *node;
01462         int maxASAP = 0;
01463         DEBUG(std::cerr << "Using current set of size " << CurrentSet->size() << "to find max ASAP\n");
01464         for(std::set<MSchedGraphSBNode*>::iterator J = CurrentSet->begin(), JE = CurrentSet->end(); J != JE; ++J) {
01465           //Get node attributes
01466           MSNodeSBAttributes nodeAttr= nodeToAttributesMap.find(*J)->second;
01467           //assert(nodeAttr != nodeToAttributesMap.end() && "Node not in attributes map!");
01468 
01469           if(maxASAP <= nodeAttr.ASAP) {
01470             maxASAP = nodeAttr.ASAP;
01471             node = *J;
01472           }
01473         }
01474         assert(node != 0 && "In node ordering node should not be null");
01475         IntersectCurrent.insert(node);
01476         order = BOTTOM_UP;
01477       }
01478     }
01479 
01480     //Repeat until all nodes are put into the final order from current set
01481     while(IntersectCurrent.size() > 0) {
01482 
01483       if(order == TOP_DOWN) {
01484         DEBUG(std::cerr << "Order is TOP DOWN\n");
01485 
01486         while(IntersectCurrent.size() > 0) {
01487           DEBUG(std::cerr << "Intersection is not empty, so find heighest height\n");
01488 
01489           int MOB = 0;
01490           int height = 0;
01491           MSchedGraphSBNode *highestHeightNode = *(IntersectCurrent.begin());
01492 
01493           //Find node in intersection with highest heigh and lowest MOB
01494           for(std::set<MSchedGraphSBNode*>::iterator I = IntersectCurrent.begin(),
01495                 E = IntersectCurrent.end(); I != E; ++I) {
01496 
01497             //Get current nodes properties
01498             MSNodeSBAttributes nodeAttr= nodeToAttributesMap.find(*I)->second;
01499 
01500             if(height < nodeAttr.height) {
01501               highestHeightNode = *I;
01502               height = nodeAttr.height;
01503               MOB = nodeAttr.MOB;
01504             }
01505             else if(height ==  nodeAttr.height) {
01506               if(MOB > nodeAttr.height) {
01507                 highestHeightNode = *I;
01508                 height =  nodeAttr.height;
01509                 MOB = nodeAttr.MOB;
01510               }
01511             }
01512           }
01513 
01514           //Append our node with greatest height to the NodeOrder
01515           if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestHeightNode) == FinalNodeOrder.end()) {
01516             DEBUG(std::cerr << "Adding node to Final Order: " << *highestHeightNode << "\n");
01517             FinalNodeOrder.push_back(highestHeightNode);
01518           }
01519 
01520           //Remove V from IntersectOrder
01521           IntersectCurrent.erase(std::find(IntersectCurrent.begin(),
01522                                       IntersectCurrent.end(), highestHeightNode));
01523 
01524 
01525           //Intersect V's successors with CurrentSet
01526           for(MSchedGraphSBNode::succ_iterator P = highestHeightNode->succ_begin(),
01527                 E = highestHeightNode->succ_end(); P != E; ++P) {
01528             //if(lower_bound(CurrentSet->begin(),
01529             //     CurrentSet->end(), *P) != CurrentSet->end()) {
01530             if(std::find(CurrentSet->begin(), CurrentSet->end(), *P) != CurrentSet->end()) {
01531               if(ignoreEdge(highestHeightNode, *P))
01532                 continue;
01533               //If not already in Intersect, add
01534               if(!IntersectCurrent.count(*P))
01535                 IntersectCurrent.insert(*P);
01536             }
01537           }
01538         } //End while loop over Intersect Size
01539 
01540         //Change direction
01541         order = BOTTOM_UP;
01542 
01543         //Reset Intersect to reflect changes in OrderNodes
01544         IntersectCurrent.clear();
01545         predIntersect(*CurrentSet, IntersectCurrent);
01546 
01547       } //End If TOP_DOWN
01548 
01549         //Begin if BOTTOM_UP
01550       else {
01551         DEBUG(std::cerr << "Order is BOTTOM UP\n");
01552         while(IntersectCurrent.size() > 0) {
01553           DEBUG(std::cerr << "Intersection of size " << IntersectCurrent.size() << ", finding highest depth\n");
01554 
01555           //dump intersection
01556           DEBUG(dumpIntersection(IntersectCurrent));
01557           //Get node with highest depth, if a tie, use one with lowest
01558           //MOB
01559           int MOB = 0;
01560           int depth = 0;
01561           MSchedGraphSBNode *highestDepthNode = *(IntersectCurrent.begin());
01562 
01563           for(std::set<MSchedGraphSBNode*>::iterator I = IntersectCurrent.begin(),
01564                 E = IntersectCurrent.end(); I != E; ++I) {
01565             //Find node attribute in graph
01566             MSNodeSBAttributes nodeAttr= nodeToAttributesMap.find(*I)->second;
01567 
01568             if(depth < nodeAttr.depth) {
01569               highestDepthNode = *I;
01570               depth = nodeAttr.depth;
01571               MOB = nodeAttr.MOB;
01572             }
01573             else if(depth == nodeAttr.depth) {
01574               if(MOB > nodeAttr.MOB) {
01575                 highestDepthNode = *I;
01576                 depth = nodeAttr.depth;
01577                 MOB = nodeAttr.MOB;
01578               }
01579             }
01580           }
01581 
01582 
01583 
01584           //Append highest depth node to the NodeOrder
01585            if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), highestDepthNode) == FinalNodeOrder.end()) {
01586              DEBUG(std::cerr << "Adding node to Final Order: " << *highestDepthNode << "\n");
01587              FinalNodeOrder.push_back(highestDepthNode);
01588            }
01589           //Remove heightestDepthNode from IntersectOrder
01590            IntersectCurrent.erase(highestDepthNode);
01591 
01592 
01593           //Intersect heightDepthNode's pred with CurrentSet
01594           for(MSchedGraphSBNode::pred_iterator P = highestDepthNode->pred_begin(),
01595                 E = highestDepthNode->pred_end(); P != E; ++P) {
01596             if(CurrentSet->count(*P)) {
01597               if(ignoreEdge(*P, highestDepthNode))
01598                 continue;
01599 
01600             //If not already in Intersect, add
01601             if(!IntersectCurrent.count(*P))
01602               IntersectCurrent.insert(*P);
01603             }
01604           }
01605 
01606         } //End while loop over Intersect Size
01607 
01608           //Change order
01609         order = TOP_DOWN;
01610 
01611         //Reset IntersectCurrent to reflect changes in OrderNodes
01612         IntersectCurrent.clear();
01613         succIntersect(*CurrentSet, IntersectCurrent);
01614         } //End if BOTTOM_DOWN
01615 
01616       DEBUG(std::cerr << "Current Intersection Size: " << IntersectCurrent.size() << "\n");
01617     }
01618     //End Wrapping while loop
01619     DEBUG(std::cerr << "Ending Size of Current Set: " << CurrentSet->size() << "\n");
01620   }//End for over all sets of nodes
01621 
01622   //FIXME: As the algorithm stands it will NEVER add an instruction such as ba (with no
01623   //data dependencies) to the final order. We add this manually. It will always be
01624   //in the last set of S since its not part of a recurrence
01625     //Loop over all the sets and place them in the final node order
01626   std::vector<std::set<MSchedGraphSBNode*> > ::reverse_iterator LastSet = partialOrder.rbegin();
01627   for(std::set<MSchedGraphSBNode*>::iterator CurrentNode = LastSet->begin(), LastNode = LastSet->end();
01628       CurrentNode != LastNode; ++CurrentNode) {
01629     if((*CurrentNode)->getInst()->getOpcode() == V9::BA)
01630       FinalNodeOrder.push_back(*CurrentNode);
01631   }
01632   //Return final Order
01633   //return FinalNodeOrder;
01634 }
01635 
01636 
01637 void ModuloSchedulingSBPass::predIntersect(std::set<MSchedGraphSBNode*> &CurrentSet, std::set<MSchedGraphSBNode*> &IntersectResult) {
01638 
01639   for(unsigned j=0; j < FinalNodeOrder.size(); ++j) {
01640     for(MSchedGraphSBNode::pred_iterator P = FinalNodeOrder[j]->pred_begin(),
01641           E = FinalNodeOrder[j]->pred_end(); P != E; ++P) {
01642 
01643       //Check if we are supposed to ignore this edge or not
01644       if(ignoreEdge(*P,FinalNodeOrder[j]))
01645         continue;
01646 
01647       if(CurrentSet.count(*P))
01648         if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end())
01649           IntersectResult.insert(*P);
01650     }
01651   }
01652 }
01653 
01654 void ModuloSchedulingSBPass::succIntersect(std::set<MSchedGraphSBNode*> &CurrentSet, std::set<MSchedGraphSBNode*> &IntersectResult) {
01655 
01656   for(unsigned j=0; j < FinalNodeOrder.size(); ++j) {
01657     for(MSchedGraphSBNode::succ_iterator P = FinalNodeOrder[j]->succ_begin(),
01658           E = FinalNodeOrder[j]->succ_end(); P != E; ++P) {
01659 
01660       //Check if we are supposed to ignore this edge or not
01661       if(ignoreEdge(FinalNodeOrder[j],*P))
01662         continue;
01663 
01664       if(CurrentSet.count(*P))
01665         if(std::find(FinalNodeOrder.begin(), FinalNodeOrder.end(), *P) == FinalNodeOrder.end())
01666           IntersectResult.insert(*P);
01667     }
01668   }
01669 }
01670 
01671 
01672 
01673 bool ModuloSchedulingSBPass::computeSchedule(std::vector<const MachineBasicBlock*> &SB, MSchedGraphSB *MSG) {
01674 
01675   TIME_REGION(X, "computeSchedule");
01676 
01677   bool success = false;
01678 
01679   //FIXME: Should be set to max II of the original loop
01680   //Cap II in order to prevent infinite loop
01681   int capII = MSG->totalDelay();
01682 
01683   while(!success) {
01684 
01685     //Keep track of branches, but do not insert into the schedule
01686     std::vector<MSchedGraphSBNode*> branches;
01687 
01688     //Loop over the final node order and process each node
01689     for(std::vector<MSchedGraphSBNode*>::iterator I = FinalNodeOrder.begin(),
01690           E = FinalNodeOrder.end(); I != E; ++I) {
01691 
01692       //CalculateEarly and Late start
01693       bool initialLSVal = false;
01694       bool initialESVal = false;
01695       int EarlyStart = 0;
01696       int LateStart = 0;
01697       bool hasSucc = false;
01698       bool hasPred = false;
01699       bool sched;
01700 
01701       if((*I)->isBranch())
01702         if((*I)->hasPredecessors())
01703           sched = true;
01704         else
01705           sched = false;
01706       else
01707         sched = true;
01708 
01709       if(sched) {
01710         //Loop over nodes in the schedule and determine if they are predecessors
01711         //or successors of the node we are trying to schedule
01712         for(MSScheduleSB::schedule_iterator nodesByCycle = schedule.begin(), nodesByCycleEnd = schedule.end();
01713             nodesByCycle != nodesByCycleEnd; ++nodesByCycle) {
01714 
01715           //For this cycle, get the vector of nodes schedule and loop over it
01716           for(std::vector<MSchedGraphSBNode*>::iterator schedNode = nodesByCycle->second.begin(), SNE = nodesByCycle->second.end(); schedNode != SNE; ++schedNode) {
01717 
01718             if((*I)->isPredecessor(*schedNode)) {
01719               int diff = (*I)->getInEdge(*schedNode).getIteDiff();
01720               int ES_Temp = nodesByCycle->first + (*schedNode)->getLatency() - diff * II;
01721               DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n");
01722               DEBUG(std::cerr << "Temp EarlyStart: " << ES_Temp << " Prev EarlyStart: " << EarlyStart << "\n");
01723               if(initialESVal)
01724                 EarlyStart = std::max(EarlyStart, ES_Temp);
01725               else {
01726                 EarlyStart = ES_Temp;
01727                 initialESVal = true;
01728               }
01729               hasPred = true;
01730             }
01731             if((*I)->isSuccessor(*schedNode)) {
01732               int diff = (*schedNode)->getInEdge(*I).getIteDiff();
01733               int LS_Temp = nodesByCycle->first - (*I)->getLatency() + diff * II;
01734               DEBUG(std::cerr << "Diff: " << diff << " Cycle: " << nodesByCycle->first << "\n");
01735               DEBUG(std::cerr << "Temp LateStart: " << LS_Temp << " Prev LateStart: " << LateStart << "\n");
01736               if(initialLSVal)
01737                 LateStart = std::min(LateStart, LS_Temp);
01738               else {
01739                 LateStart = LS_Temp;
01740                 initialLSVal = true;
01741               }
01742               hasSucc = true;
01743             }
01744           }
01745         }
01746       }
01747       else {
01748         branches.push_back(*I);
01749         continue;
01750       }
01751 
01752       //Check if the node has no pred or successors and set Early Start to its ASAP
01753       if(!hasSucc && !hasPred)
01754         EarlyStart = nodeToAttributesMap.find(*I)->second.ASAP;
01755 
01756       DEBUG(std::cerr << "Has Successors: " << hasSucc << ", Has Pred: " << hasPred << "\n");
01757       DEBUG(std::cerr << "EarlyStart: " << EarlyStart << ", LateStart: " << LateStart << "\n");
01758 
01759       //Now, try to schedule this node depending upon its pred and successor in the schedule
01760       //already
01761       if(!hasSucc && hasPred)
01762         success = scheduleNode(*I, EarlyStart, (EarlyStart + II -1));
01763       else if(!hasPred && hasSucc)
01764         success = scheduleNode(*I, LateStart, (LateStart - II +1));
01765       else if(hasPred && hasSucc) {
01766         if(EarlyStart > LateStart) {
01767         success = false;
01768           //LateStart = EarlyStart;
01769           DEBUG(std::cerr << "Early Start can not be later then the late start cycle, schedule fails\n");
01770         }
01771         else
01772           success = scheduleNode(*I, EarlyStart, std::min(LateStart, (EarlyStart + II -1)));
01773       }
01774       else
01775         success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1);
01776 
01777       if(!success) {
01778         ++II;
01779         schedule.clear();
01780         break;
01781       }
01782 
01783     }
01784 
01785     if(success) {
01786       DEBUG(std::cerr << "Constructing Schedule Kernel\n");
01787       success = schedule.constructKernel(II, branches, indVarInstrs[SB]);
01788       DEBUG(std::cerr << "Done Constructing Schedule Kernel\n");
01789       if(!success) {
01790         ++II;
01791         schedule.clear();
01792       }
01793       DEBUG(std::cerr << "Final II: " << II << "\n");
01794 
01795     }
01796 
01797     if(II >= capII) {
01798       DEBUG(std::cerr << "Maximum II reached, giving up\n");
01799       return false;
01800     }
01801 
01802     assert(II < capII && "The II should not exceed the original loop number of cycles");
01803   }
01804   return true;
01805 }
01806 
01807 
01808 bool ModuloSchedulingSBPass::scheduleNode(MSchedGraphSBNode *node,
01809                                       int start, int end) {
01810   bool success = false;
01811 
01812   DEBUG(std::cerr << *node << " (Start Cycle: " << start << ", End Cycle: " << end << ")\n");
01813 
01814   //Make sure start and end are not negative
01815   //if(start < 0) {
01816   //start = 0;
01817 
01818   //}
01819   //if(end < 0)
01820   //end = 0;
01821 
01822   bool forward = true;
01823   if(start > end)
01824     forward = false;
01825 
01826   bool increaseSC = true;
01827   int cycle = start ;
01828 
01829 
01830   while(increaseSC) {
01831 
01832     increaseSC = false;
01833 
01834     increaseSC = schedule.insert(node, cycle, II);
01835 
01836     if(!increaseSC)
01837       return true;
01838 
01839     //Increment cycle to try again
01840     if(forward) {
01841       ++cycle;
01842       DEBUG(std::cerr << "Increase cycle: " << cycle << "\n");
01843       if(cycle > end)
01844         return false;
01845     }
01846     else {
01847       --cycle;
01848       DEBUG(std::cerr << "Decrease cycle: " << cycle << "\n");
01849       if(cycle < end)
01850         return false;
01851     }
01852   }
01853 
01854   return success;
01855 }
01856 
01857 void ModuloSchedulingSBPass::reconstructLoop(std::vector<const MachineBasicBlock*> &SB) {
01858 
01859   TIME_REGION(X, "reconstructLoop");
01860 
01861 
01862   DEBUG(std::cerr << "Reconstructing Loop\n");
01863 
01864   //First find the value *'s that we need to "save"
01865   std::map<const Value*, std::pair<const MachineInstr*, int> > valuesToSave;
01866 
01867   //Keep track of instructions we have already seen and their stage because
01868   //we don't want to "save" values if they are used in the kernel immediately
01869   std::map<const MachineInstr*, int> lastInstrs;
01870 
01871 
01872   std::set<MachineBasicBlock*> seenBranchesBB;
01873   const TargetInstrInfo *MTI = target.getInstrInfo();
01874   std::map<MachineBasicBlock*, std::vector<std::pair<MachineInstr*, int> > > instrsMovedDown;
01875   std::map<MachineBasicBlock*, int> branchStage;
01876 
01877   //Loop over kernel and only look at instructions from a stage > 0
01878   //Look at its operands and save values *'s that are read
01879   for(MSScheduleSB::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
01880 
01881     if(I->second !=0) {
01882       //For this instruction, get the Value*'s that it reads and put them into the set.
01883       //Assert if there is an operand of another type that we need to save
01884       const MachineInstr *inst = I->first;
01885       lastInstrs[inst] = I->second;
01886 
01887       for(unsigned i=0; i < inst->getNumOperands(); ++i) {
01888         //get machine operand
01889         const MachineOperand &mOp = inst->getOperand(i);
01890 
01891         if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
01892           //find the value in the map
01893           if (const Value* srcI = mOp.getVRegValue()) {
01894 
01895             if(isa<Constant>(srcI) || isa<Argument>(srcI))
01896               continue;
01897 
01898             //Before we declare this Value* one that we should save
01899             //make sure its def is not of the same stage as this instruction
01900             //because it will be consumed before its used
01901             Instruction *defInst = (Instruction*) srcI;
01902 
01903             //Should we save this value?
01904             bool save = true;
01905 
01906             //Continue if not in the def map, loop invariant code does not need to be saved
01907             if(!defMap.count(srcI))
01908               continue;
01909 
01910             MachineInstr *defInstr = defMap[srcI];
01911 
01912 
01913             if(lastInstrs.count(defInstr)) {
01914               if(lastInstrs[defInstr] == I->second) {
01915                 save = false;
01916 
01917               }
01918             }
01919 
01920             if(save)
01921               valuesToSave[srcI] = std::make_pair(I->first, i);
01922           }
01923         }
01924 
01925         if(mOp.getType() != MachineOperand::MO_VirtualRegister && mOp.isUse()) {
01926           assert("Our assumption is wrong. We have another type of register that needs to be saved\n");
01927         }
01928       }
01929     }
01930 
01931 
01932     //Do a check to see if instruction was moved below its original branch
01933     if(MTI->isBranch(I->first->getOpcode())) {
01934       seenBranchesBB.insert(I->first->getParent());
01935       branchStage[I->first->getParent()] = I->second;
01936     }
01937     else {
01938       instrsMovedDown[I->first->getParent()].push_back(std::make_pair(I->first, I->second));
01939       //assert(seenBranchesBB.count(I->first->getParent()) && "Instruction moved below branch\n");
01940     }
01941 
01942   }
01943 
01944   //The new loop will consist of one or more prologues, the kernel, and one or more epilogues.
01945 
01946   //Map to keep track of old to new values
01947   std::map<Value*, std::map<int, Value*> > newValues;
01948 
01949   //Map to keep track of old to new values in kernel
01950   std::map<Value*, std::map<int, Value*> > kernelPHIs;
01951 
01952   //Another map to keep track of what machine basic blocks these new value*s are in since
01953   //they have no llvm instruction equivalent
01954   std::map<Value*, MachineBasicBlock*> newValLocation;
01955 
01956   std::vector<std::vector<MachineBasicBlock*> > prologues;
01957   std::vector<std::vector<BasicBlock*> > llvm_prologues;
01958 
01959   //Map to keep track of where the inner branches go
01960   std::map<const MachineBasicBlock*, Value*> sideExits;
01961 
01962 
01963   //Write prologue
01964   if(schedule.getMaxStage() != 0)
01965     writePrologues(prologues, SB, llvm_prologues, valuesToSave, newValues, newValLocation);
01966 
01967   std::vector<BasicBlock*> llvmKernelBBs;
01968   std::vector<MachineBasicBlock*> machineKernelBBs;
01969   Function *parent = (Function*) SB[0]->getBasicBlock()->getParent();
01970 
01971   for(unsigned i = 0; i < SB.size(); ++i) {
01972     llvmKernelBBs.push_back(new BasicBlock("Kernel", parent));
01973 
01974     machineKernelBBs.push_back(new MachineBasicBlock(llvmKernelBBs[i]));
01975     (((MachineBasicBlock*)SB[0])->getParent())->getBasicBlockList().push_back(machineKernelBBs[i]);
01976   }
01977 
01978   writeKernel(llvmKernelBBs, machineKernelBBs, valuesToSave, newValues, newValLocation, kernelPHIs);
01979 
01980 
01981   std::vector<std::vector<MachineBasicBlock*> > epilogues;
01982   std::vector<std::vector<BasicBlock*> > llvm_epilogues;
01983 
01984   //Write epilogues
01985   if(schedule.getMaxStage() != 0)
01986     writeEpilogues(epilogues, SB, llvm_epilogues, valuesToSave, newValues, newValLocation, kernelPHIs);
01987 
01988 
01989   //Fix our branches
01990   fixBranches(prologues, llvm_prologues, machineKernelBBs, llvmKernelBBs, epilogues, llvm_epilogues, SB, sideExits);
01991 
01992   //Print out epilogues and prologue
01993   DEBUG(for(std::vector<std::vector<MachineBasicBlock*> >::iterator PI = prologues.begin(), PE = prologues.end();
01994       PI != PE; ++PI) {
01995           std::cerr << "PROLOGUE\n";
01996           for(std::vector<MachineBasicBlock*>::iterator I = PI->begin(), E = PI->end(); I != E; ++I)
01997             (*I)->print(std::cerr);
01998         });
01999 
02000   DEBUG(std::cerr << "KERNEL\n");
02001   DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = machineKernelBBs.begin(), E = machineKernelBBs.end(); I != E; ++I) { (*I)->print(std::cerr);});
02002 
02003   DEBUG(for(std::vector<std::vector<MachineBasicBlock*> >::iterator EI = epilogues.begin(), EE = epilogues.end();
02004       EI != EE; ++EI) {
02005     std::cerr << "EPILOGUE\n";
02006     for(std::vector<MachineBasicBlock*>::iterator I = EI->begin(), E = EI->end(); I != E; ++I)
02007       (*I)->print(std::cerr);
02008   });
02009 
02010 
02011   //Remove phis
02012   removePHIs(SB, prologues, epilogues, machineKernelBBs, newValLocation);
02013 
02014   //Print out epilogues and prologue
02015   DEBUG(for(std::vector<std::vector<MachineBasicBlock*> >::iterator PI = prologues.begin(), PE = prologues.end();
02016       PI != PE; ++PI) {
02017           std::cerr << "PROLOGUE\n";
02018           for(std::vector<MachineBasicBlock*>::iterator I = PI->begin(), E = PI->end(); I != E; ++I)
02019             (*I)->print(std::cerr);
02020         });
02021 
02022   DEBUG(std::cerr << "KERNEL\n");
02023   DEBUG(for(std::vector<MachineBasicBlock*>::iterator I = machineKernelBBs.begin(), E = machineKernelBBs.end(); I != E; ++I) { (*I)->print(std::cerr);});
02024 
02025   DEBUG(for(std::vector<std::vector<MachineBasicBlock*> >::iterator EI = epilogues.begin(), EE = epilogues.end();
02026       EI != EE; ++EI) {
02027     std::cerr << "EPILOGUE\n";
02028     for(std::vector<MachineBasicBlock*>::iterator I = EI->begin(), E = EI->end(); I != E; ++I)
02029       (*I)->print(std::cerr);
02030   });
02031 
02032   writeSideExits(prologues, llvm_prologues, epilogues, llvm_epilogues, sideExits, instrsMovedDown, SB, machineKernelBBs, branchStage);
02033 
02034 
02035   DEBUG(std::cerr << "New Machine Function" << "\n");
02036 }
02037 
02038 
02039 void ModuloSchedulingSBPass::fixBranches(std::vector<std::vector<MachineBasicBlock*> > &prologues, std::vector<std::vector<BasicBlock*> > &llvm_prologues, std::vector<MachineBasicBlock*> &machineKernelBB, std::vector<BasicBlock*> &llvmKernelBB, std::vector<std::vector<MachineBasicBlock*> > &epilogues, std::vector<std::vector<BasicBlock*> > &llvm_epilogues, std::vector<const MachineBasicBlock*> &SB, std::map<const MachineBasicBlock*, Value*> &sideExits) {
02040 
02041   const TargetInstrInfo *TMI = target.getInstrInfo();
02042 
02043   //Get exit BB
02044   BasicBlock *last = (BasicBlock*) SB[SB.size()-1]->getBasicBlock();
02045   BasicBlock *kernel_exit = 0;
02046   bool sawFirst = false;
02047 
02048   for(succ_iterator I = succ_begin(last),
02049         E = succ_end(last); I != E; ++I) {
02050     if (*I != SB[0]->getBasicBlock()) {
02051       kernel_exit = *I;
02052       break;
02053     }
02054     else
02055       sawFirst = true;
02056   }
02057   if(!kernel_exit && sawFirst) {
02058     kernel_exit = (BasicBlock*) SB[0]->getBasicBlock();
02059   }
02060 
02061   assert(kernel_exit && "Kernel Exit can not be null");
02062 
02063   if(schedule.getMaxStage() != 0) {
02064     //Fix prologue branches
02065     for(unsigned i = 0; i <  prologues.size(); ++i) {
02066 
02067       for(unsigned j = 0; j < prologues[i].size(); ++j) {
02068 
02069         MachineBasicBlock *currentMBB = prologues[i][j];
02070 
02071         //Find terminator since getFirstTerminator does not work!
02072         for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) {
02073           MachineOpCode OC = mInst->getOpcode();
02074           //If its a branch update its branchto
02075           if(TMI->isBranch(OC)) {
02076             for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
02077               MachineOperand &mOp = mInst->getOperand(opNum);
02078               if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02079                 //Check if we are branching to the kernel, if not branch to epilogue
02080                 if(mOp.getVRegValue() == SB[0]->getBasicBlock()) {
02081                   if(i >= prologues.size()-1)
02082                     mOp.setValueReg(llvmKernelBB[0]);
02083                   else
02084                     mOp.setValueReg(llvm_prologues[i+1][0]);
02085                 }
02086                 else if( (mOp.getVRegValue() == kernel_exit) && (j == prologues[i].size()-1)) {
02087                   mOp.setValueReg(llvm_epilogues[i][0]);
02088                 }
02089                 else if(mOp.getVRegValue() == SB[j+1]->getBasicBlock()) {
02090                   mOp.setValueReg(llvm_prologues[i][j+1]);
02091                 }
02092 
02093               }
02094             }
02095 
02096             DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n");
02097           }
02098         }
02099 
02100         //Update llvm basic block with our new branch instr
02101         DEBUG(std::cerr << SB[i]->getBasicBlock()->getTerminator() << "\n");
02102 
02103         const BranchInst *branchVal = dyn_cast<BranchInst>(SB[i]->getBasicBlock()->getTerminator());
02104 
02105         //Check for inner branch
02106         if(j < prologues[i].size()-1) {
02107           //Find our side exit LLVM basic block
02108           BasicBlock *sideExit = 0;
02109           for(unsigned s = 0; s < branchVal->getNumSuccessors(); ++s) {
02110             if(branchVal->getSuccessor(s) != SB[i+1]->getBasicBlock())
02111               sideExit = branchVal->getSuccessor(s);
02112           }
02113           assert(sideExit && "Must have side exit llvm basic block");
02114           TerminatorInst *newBranch = new BranchInst(sideExit,
02115                                         llvm_prologues[i][j+1],
02116                                         branchVal->getCondition(),
02117                                         llvm_prologues[i][j]);
02118         }
02119         else {
02120           //If last prologue
02121           if(i == prologues.size()-1) {
02122             TerminatorInst *newBranch = new BranchInst(llvmKernelBB[0],
02123                                                        llvm_epilogues[i][0],
02124                                                        branchVal->getCondition(),
02125                                                        llvm_prologues[i][j]);
02126           }
02127           else {
02128             TerminatorInst *newBranch = new BranchInst(llvm_prologues[i+1][0],
02129                                                        llvm_epilogues[i][0],
02130                                                        branchVal->getCondition(),
02131                                                        llvm_prologues[i][j]);
02132           }
02133         }
02134       }
02135     }
02136   }
02137 
02138   //Fix up kernel machine branches
02139   for(unsigned i = 0; i < machineKernelBB.size(); ++i) {
02140     MachineBasicBlock *currentMBB = machineKernelBB[i];
02141 
02142     for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) {
02143       MachineOpCode OC = mInst->getOpcode();
02144       if(TMI->isBranch(OC)) {
02145         for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
02146           MachineOperand &mOp = mInst->getOperand(opNum);
02147 
02148           if(mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02149             //Deal with inner kernel branches
02150             if(i < machineKernelBB.size()-1) {
02151               if(mOp.getVRegValue() == SB[i+1]->getBasicBlock())
02152                 mOp.setValueReg(llvmKernelBB[i+1]);
02153               //Side exit!
02154               else {
02155                 sideExits[SB[i]] = mOp.getVRegValue();
02156               }
02157             }
02158             else {
02159               if(mOp.getVRegValue() == SB[0]->getBasicBlock())
02160                 mOp.setValueReg(llvmKernelBB[0]);
02161               else {
02162                 if(llvm_epilogues.size() > 0)
02163                   mOp.setValueReg(llvm_epilogues[0][0]);
02164               }
02165             }
02166           }
02167         }
02168       }
02169     }
02170 
02171     //Update kernelLLVM branches
02172     const BranchInst *branchVal = dyn_cast<BranchInst>(SB[0]->getBasicBlock()->getTerminator());
02173 
02174     //deal with inner branch
02175     if(i < machineKernelBB.size()-1) {
02176 
02177       //Find our side exit LLVM basic block
02178       BasicBlock *sideExit = 0;
02179       for(unsigned s = 0; s < branchVal->getNumSuccessors(); ++s) {
02180         if(branchVal->getSuccessor(s) != SB[i+1]->getBasicBlock())
02181           sideExit = branchVal->getSuccessor(s);
02182       }
02183       assert(sideExit && "Must have side exit llvm basic block");
02184       TerminatorInst *newBranch = new BranchInst(sideExit,
02185                                                  llvmKernelBB[i+1],
02186                                                  branchVal->getCondition(),
02187                                                  llvmKernelBB[i]);
02188     }
02189     else {
02190       //Deal with outter branches
02191       if(epilogues.size() > 0) {
02192         TerminatorInst *newBranch = new BranchInst(llvmKernelBB[0],
02193                                                    llvm_epilogues[0][0],
02194                                                    branchVal->getCondition(),
02195                                                    llvmKernelBB[i]);
02196       }
02197       else {
02198         TerminatorInst *newBranch = new BranchInst(llvmKernelBB[0],
02199                                                    kernel_exit,
02200                                                    branchVal->getCondition(),
02201                                                    llvmKernelBB[i]);
02202       }
02203     }
02204   }
02205 
02206   if(schedule.getMaxStage() != 0) {
02207 
02208     //Lastly add unconditional branches for the epilogues
02209     for(unsigned i = 0; i <  epilogues.size(); ++i) {
02210 
02211       for(unsigned j=0; j < epilogues[i].size(); ++j) {
02212         //Now since we don't have fall throughs, add a unconditional
02213         //branch to the next prologue
02214 
02215         //Before adding these, we need to check if the epilogue already has
02216         //a branch in it
02217         bool hasBranch = false;
02218         /*if(j < epilogues[i].size()-1) {
02219           MachineBasicBlock *currentMBB = epilogues[i][j];
02220           for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) {
02221 
02222             MachineOpCode OC = mInst->getOpcode();
02223 
02224             //If its a branch update its branchto
02225             if(TMI->isBranch(OC)) {
02226               hasBranch = true;
02227               for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
02228                 MachineOperand &mOp = mInst->getOperand(opNum);
02229                 if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02230 
02231                   if(mOp.getVRegValue() != sideExits[SB[j]]) {
02232                     mOp.setValueReg(llvm_epilogues[i][j+1]);
02233                   }
02234 
02235                 }
02236               }
02237 
02238 
02239               DEBUG(std::cerr << "New Epilogue Branch: " << *mInst << "\n");
02240             }
02241           }
02242           if(hasBranch) {
02243             const BranchInst *branchVal = dyn_cast<BranchInst>(SB[j]->getBasicBlock()->getTerminator());
02244             TerminatorInst *newBranch = new BranchInst((BasicBlock*)sideExits[SB[j]],
02245                                                        llvm_epilogues[i][j+1],
02246                                                        branchVal->getCondition(),
02247                                                        llvm_epilogues[i][j]);
02248           }
02249           }*/
02250 
02251         if(!hasBranch) {
02252 
02253           //Handle inner branches
02254           if(j < epilogues[i].size()-1) {
02255             BuildMI(epilogues[i][j], V9::BA, 1).addPCDisp(llvm_epilogues[i][j+1]);
02256             TerminatorInst *newBranch = new BranchInst(llvm_epilogues[i][j+1],
02257                                                        llvm_epilogues[i][j]);
02258           }
02259           else {
02260 
02261             //Check if this is the last epilogue
02262             if(i != epilogues.size()-1) {
02263               BuildMI(epilogues[i][j], V9::BA, 1).addPCDisp(llvm_epilogues[i+1][0]);
02264               //Add unconditional branch to end of epilogue
02265               TerminatorInst *newBranch = new BranchInst(llvm_epilogues[i+1][0],
02266                                                          llvm_epilogues[i][j]);
02267 
02268             }
02269             else {
02270               BuildMI(epilogues[i][j], V9::BA, 1).addPCDisp(kernel_exit);
02271               TerminatorInst *newBranch = new BranchInst(kernel_exit, llvm_epilogues[i][j]);
02272             }
02273           }
02274 
02275           //Add one more nop!
02276           BuildMI(epilogues[i][j], V9::NOP, 0);
02277 
02278         }
02279       }
02280     }
02281   }
02282 
02283   //Find all llvm basic blocks that branch to the loop entry and
02284   //change to our first prologue.
02285   const BasicBlock *llvmBB = SB[0]->getBasicBlock();
02286 
02287   std::vector<const BasicBlock*>Preds (pred_begin(llvmBB), pred_end(llvmBB));
02288 
02289   for(std::vector<const BasicBlock*>::iterator P = Preds.begin(),
02290         PE = Preds.end(); P != PE; ++P) {
02291     if(*P == SB[SB.size()-1]->getBasicBlock())
02292        continue;
02293      else {
02294        DEBUG(std::cerr << "Found our entry BB\n");
02295        DEBUG((*P)->print(std::cerr));
02296        //Get the Terminator instruction for this basic block and print it out
02297        //DEBUG(std::cerr << *((*P)->getTerminator()) << "\n");
02298 
02299        //Update the terminator
02300        TerminatorInst *term = ((BasicBlock*)*P)->getTerminator();
02301        for(unsigned i=0; i < term->getNumSuccessors(); ++i) {
02302          if(term->getSuccessor(i) == llvmBB) {
02303            DEBUG(std::cerr << "Replacing successor bb\n");
02304            if(llvm_prologues.size() > 0) {
02305              term->setSuccessor(i, llvm_prologues[0][0]);
02306 
02307              DEBUG(std::cerr << "New Term" << *((*P)->getTerminator()) << "\n");
02308 
02309              //Also update its corresponding machine instruction
02310              MachineCodeForInstruction & tempMvec =
02311                MachineCodeForInstruction::get(term);
02312              for (unsigned j = 0; j < tempMvec.size(); j++) {
02313                MachineInstr *temp = tempMvec[j];
02314                MachineOpCode opc = temp->getOpcode();
02315                if(TMI->isBranch(opc)) {
02316                  DEBUG(std::cerr << *temp << "\n");
02317                  //Update branch
02318                  for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) {
02319                    MachineOperand &mOp = temp->getOperand(opNum);
02320                    if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02321                      if(mOp.getVRegValue() == llvmBB)
02322                        mOp.setValueReg(llvm_prologues[0][0]);
02323                    }
02324                  }
02325                }
02326              }
02327            }
02328            else {
02329              term->setSuccessor(i, llvmKernelBB[0]);
02330 
02331              //Also update its corresponding machine instruction
02332              MachineCodeForInstruction & tempMvec =
02333                MachineCodeForInstruction::get(term);
02334              for(unsigned j = 0; j < tempMvec.size(); j++) {
02335                MachineInstr *temp = tempMvec[j];
02336                MachineOpCode opc = temp->getOpcode();
02337                if(TMI->isBranch(opc)) {
02338                  DEBUG(std::cerr << *temp << "\n");
02339                  //Update branch
02340                  for(unsigned opNum = 0; opNum < temp->getNumOperands(); ++opNum) {
02341                    MachineOperand &mOp = temp->getOperand(opNum);
02342                    if(mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
02343                      if(mOp.getVRegValue() == llvmBB)
02344                        mOp.setValueReg(llvmKernelBB[0]);
02345                    }
02346                  }
02347                }
02348              }
02349            }
02350          }
02351        }
02352        break;
02353      }
02354   }
02355 
02356 }
02357 
02358 
02359 void ModuloSchedulingSBPass::writePrologues(std::vector<std::vector<MachineBasicBlock *> > &prologues, std::vector<const MachineBasicBlock*> &origSB, std::vector<std::vector<BasicBlock*> > &llvm_prologues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation) {
02360 
02361   //Keep a map to easily know whats in the kernel
02362   std::map<int, std::set<const MachineInstr*> > inKernel;
02363   int maxStageCount = 0;
02364 
02365   //Keep a map of new values we consumed in case they need to be added back
02366   std::map<Value*, std::map<int, Value*> > consumedValues;
02367 
02368   DEBUG(schedule.print(std::cerr));
02369 
02370   for(MSScheduleSB::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
02371     maxStageCount = std::max(maxStageCount, I->second);
02372 
02373     //Put int the map so we know what instructions in each stage are in the kernel
02374     DEBUG(std::cerr << "Inserting instruction " << *(I->first) << " into map at stage " << I->second << "\n");
02375     inKernel[I->second].insert(I->first);
02376   }
02377 
02378   //Get target information to look at machine operands
02379   const TargetInstrInfo *mii = target.getInstrInfo();
02380 
02381  //Now write the prologues
02382   for(int i = 0; i < maxStageCount; ++i) {
02383     std::vector<MachineBasicBlock*> current_prologue;
02384     std::vector<BasicBlock*> current_llvm_prologue;
02385 
02386     for(std::vector<const MachineBasicBlock*>::iterator MB = origSB.begin(),
02387           MBE = origSB.end(); MB != MBE; ++MB) {
02388       const MachineBasicBlock *MBB = *MB;
02389       //Create new llvm and machine bb
02390       BasicBlock *llvmBB = new BasicBlock("PROLOGUE", (Function*) (MBB->getBasicBlock()->getParent()));
02391       MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB);
02392 
02393       DEBUG(std::cerr << "i=" << i << "\n");
02394 
02395       for(int j = i; j >= 0; --j) {
02396         //iterate over instructions in original bb
02397         for(MachineBasicBlock::const_iterator MI = MBB->begin(),
02398               ME = MBB->end(); ME != MI; ++MI) {
02399           if(inKernel[j].count(&*MI)) {
02400             MachineInstr *instClone = MI->clone();
02401             machineBB->push_back(instClone);
02402 
02403             //If its a branch, insert a nop
02404             if(mii->isBranch(instClone->getOpcode()))
02405               BuildMI(machineBB, V9::NOP, 0);
02406 
02407 
02408             DEBUG(std::cerr << "Cloning: " << *MI << "\n");
02409 
02410             //After cloning, we may need to save the value that this instruction defines
02411             for(unsigned opNum=0; opNum < MI->getNumOperands(); ++opNum) {
02412               Instruction *tmp;
02413 
02414               //get machine operand
02415               MachineOperand &mOp = instClone->getOperand(opNum);
02416               if(mOp.getType() == MachineOperand::MO_VirtualRegister
02417                  && mOp.isDef()) {
02418 
02419                 //Check if this is a value we should save
02420                 if(valuesToSave.count(mOp.getVRegValue())) {
02421                   //Save copy in tmpInstruction
02422                   tmp = new TmpInstruction(mOp.getVRegValue());
02423 
02424                   //Add TmpInstruction to safe LLVM Instruction MCFI
02425                   MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02426                   tempMvec.addTemp((Value*) tmp);
02427 
02428                   DEBUG(std::cerr << "Value: " << *(mOp.getVRegValue())
02429                         << " New Value: " << *tmp << " Stage: " << i << "\n");
02430 
02431                 newValues[mOp.getVRegValue()][i]= tmp;
02432                 newValLocation[tmp] = machineBB;
02433 
02434                 DEBUG(std::cerr << "Machine Instr Operands: "
02435                       << *(mOp.getVRegValue()) << ", 0, " << *tmp << "\n");
02436 
02437                 //Create machine instruction and put int machineBB
02438                 MachineInstr *saveValue;
02439                 if(mOp.getVRegValue()->getType() == Type::FloatTy)
02440                   saveValue = BuildMI(machineBB, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02441                 else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
02442                   saveValue = BuildMI(machineBB, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02443                 else
02444                   saveValue = BuildMI(machineBB, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
02445 
02446 
02447                 DEBUG(std::cerr << "Created new machine instr: " << *saveValue << "\n");
02448                 }
02449               }
02450 
02451               //We may also need to update the value that we use if
02452               //its from an earlier prologue
02453               if(j != 0) {
02454                 if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
02455                   if(newValues.count(mOp.getVRegValue())) {
02456                     if(newValues[mOp.getVRegValue()].count(i-1)) {
02457                       Value *oldV =  mOp.getVRegValue();
02458                       DEBUG(std::cerr << "Replaced this value: " << mOp.getVRegValue() << " With:" << (newValues[mOp.getVRegValue()][i-1]) << "\n");
02459                       //Update the operand with the right value
02460                       mOp.setValueReg(newValues[mOp.getVRegValue()][i-1]);
02461 
02462                       //Remove this value since we have consumed it
02463                       //NOTE: Should this only be done if j != maxStage?
02464                       consumedValues[oldV][i-1] = (newValues[oldV][i-1]);
02465                       DEBUG(std::cerr << "Deleted value: " << consumedValues[oldV][i-1] << "\n");
02466                       newValues[oldV].erase(i-1);
02467                     }
02468                   }
02469                   else
02470                     if(consumedValues.count(mOp.getVRegValue()))
02471                       assert(!consumedValues[mOp.getVRegValue()].count(i-1) && "Found a case where we need the value");
02472                 }
02473               }
02474             }
02475           }
02476         }
02477       }
02478         (((MachineBasicBlock*)MBB)->getParent())->getBasicBlockList().push_back(machineBB);
02479         current_prologue.push_back(machineBB);
02480         current_llvm_prologue.push_back(llvmBB);
02481     }
02482     prologues.push_back(current_prologue);
02483     llvm_prologues.push_back(current_llvm_prologue);
02484 
02485   }
02486 }
02487 
02488 
02489 void ModuloSchedulingSBPass::writeEpilogues(std::vector<std::vector<MachineBasicBlock*> > &epilogues, std::vector<const MachineBasicBlock*> &origSB, std::vector<std::vector<BasicBlock*> > &llvm_epilogues, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues,std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs ) {
02490 
02491   std::map<int, std::set<const MachineInstr*> > inKernel;
02492    const TargetInstrInfo *MTI = target.getInstrInfo();
02493 
02494   for(MSScheduleSB::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
02495 
02496     //Put int the map so we know what instructions in each stage are in the kernel
02497     inKernel[I->second].insert(I->first);
02498   }
02499 
02500   std::map<Value*, Value*> valPHIs;
02501 
02502   //some debug stuff, will remove later
02503   DEBUG(for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(), E = newValues.end(); V !=E; ++V) {
02504     std::cerr << "Old Value: " << *(V->first) << "\n";
02505     for(std::map<int, Value*>::iterator I = V->second.begin(), IE = V->second.end(); I != IE; ++I)
02506       std::cerr << "Stage: " << I->first << " Value: " << *(I->second) << "\n";
02507   });
02508 
02509 
02510   //Now write the epilogues
02511   for(int i = schedule.getMaxStage()-1; i >= 0; --i) {
02512     std::vector<MachineBasicBlock*> current_epilogue;
02513     std::vector<BasicBlock*> current_llvm_epilogue;
02514 
02515     for(std::vector<const MachineBasicBlock*>::iterator MB = origSB.begin(), MBE = origSB.end(); MB != MBE; ++MB) {
02516       const MachineBasicBlock *MBB = *MB;
02517 
02518       BasicBlock *llvmBB = new BasicBlock("EPILOGUE", (Function*) (MBB->getBasicBlock()->getParent()));
02519       MachineBasicBlock *machineBB = new MachineBasicBlock(llvmBB);
02520 
02521       DEBUG(std::cerr << " Epilogue #: " << i << "\n");
02522 
02523       std::map<Value*, int> inEpilogue;
02524 
02525       for(MachineBasicBlock::const_iterator MI = MBB->begin(), ME = MBB->end(); ME != MI; ++MI) {
02526         for(int j=schedule.getMaxStage(); j > i; --j) {
02527           if(inKernel[j].count(&*MI)) {
02528             DEBUG(std::cerr << "Cloning instruction " << *MI << "\n");
02529             MachineInstr *clone = MI->clone();
02530 
02531             //Update operands that need to use the result from the phi
02532             for(unsigned opNum=0; opNum < clone->getNumOperands(); ++opNum) {
02533               //get machine operand
02534               const MachineOperand &mOp = clone->getOperand(opNum);
02535 
02536               if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse())) {
02537 
02538                 DEBUG(std::cerr << "Writing PHI for " << (mOp.getVRegValue()) << "\n");
02539 
02540                 //If this is the last instructions for the max iterations ago, don't update operands
02541                 if(inEpilogue.count(mOp.getVRegValue()))
02542                   if(inEpilogue[mOp.getVRegValue()] == i)
02543                     continue;
02544 
02545                 //Quickly write appropriate phis for this operand
02546                 if(newValues.count(mOp.getVRegValue())) {
02547                   if(newValues[mOp.getVRegValue()].count(i)) {
02548                     Instruction *tmp = new TmpInstruction(newValues[mOp.getVRegValue()][i]);
02549 
02550                     //Get machine code for this instruction
02551                     MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02552                     tempMvec.addTemp((Value*) tmp);
02553 
02554                     //assert of no kernelPHI for this value
02555                     assert(kernelPHIs[mOp.getVRegValue()][i] !=0 && "Must have final kernel phi to construct epilogue phi");
02556 
02557                     MachineInstr *saveValue = BuildMI(machineBB, V9::PHI, 3).addReg(newValues[mOp.getVRegValue()][i]).addReg(kernelPHIs[mOp.getVRegValue()][i]).addRegDef(tmp);
02558                     DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
02559                     valPHIs[mOp.getVRegValue()] = tmp;
02560                   }
02561                 }
02562 
02563                 if(valPHIs.count(mOp.getVRegValue())) {
02564                   //Update the operand in the cloned instruction
02565                   clone->getOperand(opNum).setValueReg(valPHIs[mOp.getVRegValue()]);
02566                 }
02567               }
02568               else if((mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef())) {
02569                 inEpilogue[mOp.getVRegValue()] = i;
02570               }
02571 
02572             }
02573             machineBB->push_back(clone);
02574             //if(MTI->isBranch(clone->getOpcode()))
02575             //BuildMI(machineBB, V9::NOP, 0);
02576           }
02577         }
02578       }
02579       (((MachineBasicBlock*)MBB)->getParent())->getBasicBlockList().push_back(machineBB);
02580       current_epilogue.push_back(machineBB);
02581       current_llvm_epilogue.push_back(llvmBB);
02582     }
02583 
02584     DEBUG(std::cerr << "EPILOGUE #" << i << "\n");
02585     DEBUG(for(std::vector<MachineBasicBlock*>::iterator B = current_epilogue.begin(), BE = current_epilogue.end(); B != BE; ++B) {
02586             (*B)->print(std::cerr);});
02587 
02588     epilogues.push_back(current_epilogue);
02589     llvm_epilogues.push_back(current_llvm_epilogue);
02590   }
02591 }
02592 
02593 void ModuloSchedulingSBPass::writeKernel(std::vector<BasicBlock*> &llvmBB, std::vector<MachineBasicBlock*> &machineBB, std::map<const Value*, std::pair<const MachineInstr*, int> > &valuesToSave, std::map<Value*, std::map<int, Value*> > &newValues, std::map<Value*, MachineBasicBlock*> &newValLocation, std::map<Value*, std::map<int, Value*> > &kernelPHIs) {
02594 
02595   //Keep track of operands that are read and saved from a previous iteration. The new clone
02596   //instruction will use the result of the phi instead.
02597   std::map<Value*, Value*> finalPHIValue;
02598   std::map<Value*, Value*> kernelValue;
02599 
02600   //Branches are a special case
02601   std::vector<MachineInstr*> branches;
02602 
02603   //Get target information to look at machine operands
02604   const TargetInstrInfo *mii = target.getInstrInfo();
02605   unsigned index = 0;
02606   int numBr = 0;
02607   bool seenBranch = false;
02608 
02609   //Create TmpInstructions for the final phis
02610   for(MSScheduleSB::kernel_iterator I = schedule.kernel_begin(), E = schedule.kernel_end(); I != E; ++I) {
02611 
02612    DEBUG(std::cerr << "Stage: " << I->second << " Inst: " << *(I->first) << "\n";);
02613 
02614    //Clone instruction
02615    const MachineInstr *inst = I->first;
02616    MachineInstr *instClone = inst->clone();
02617 
02618    if(seenBranch && !mii->isBranch(instClone->getOpcode())) {
02619      index++;
02620      seenBranch = false;
02621      numBr = 0;
02622    }
02623    else if(seenBranch && (numBr == 2)) {
02624      index++;
02625      numBr = 0;
02626    }
02627 
02628    //Insert into machine basic block
02629    assert(index < machineBB.size() && "Must have a valid index into kernel MBBs");
02630    machineBB[index]->push_back(instClone);
02631 
02632    if(mii->isBranch(instClone->getOpcode())) {
02633      BuildMI(machineBB[index], V9::NOP, 0);
02634 
02635      seenBranch = true;
02636      numBr++;
02637    }
02638 
02639    DEBUG(std::cerr <<  "Cloned Inst: " << *instClone << "\n");
02640 
02641    //Loop over Machine Operands
02642    for(unsigned i=0; i < inst->getNumOperands(); ++i) {
02643      //get machine operand
02644      const MachineOperand &mOp = inst->getOperand(i);
02645 
02646      if(I->second != 0) {
02647        if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isUse()) {
02648 
02649          //Check to see where this operand is defined if this instruction is from max stage
02650          if(I->second == schedule.getMaxStage()) {
02651            DEBUG(std::cerr << "VREG: " << *(mOp.getVRegValue()) << "\n");
02652          }
02653 
02654          //If its in the value saved, we need to create a temp instruction and use that instead
02655          if(valuesToSave.count(mOp.getVRegValue())) {
02656 
02657            //Check if we already have a final PHI value for this
02658            if(!finalPHIValue.count(mOp.getVRegValue())) {
02659              //Only create phi if the operand def is from a stage before this one
02660              if(schedule.defPreviousStage(mOp.getVRegValue(), I->second)) {
02661              TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue());
02662 
02663              //Get machine code for this instruction
02664              MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02665              tempMvec.addTemp((Value*) tmp);
02666 
02667              //Update the operand in the cloned instruction
02668              instClone->getOperand(i).setValueReg(tmp);
02669 
02670              //save this as our final phi
02671              finalPHIValue[mOp.getVRegValue()] = tmp;
02672              newValLocation[tmp] = machineBB[index];
02673              }
02674            }
02675            else {
02676              //Use the previous final phi value
02677              instClone->getOperand(i).setValueReg(finalPHIValue[mOp.getVRegValue()]);
02678            }
02679          }
02680        }
02681      }
02682      if(I->second != schedule.getMaxStage()) {
02683        if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
02684          if(valuesToSave.count(mOp.getVRegValue())) {
02685 
02686            TmpInstruction *tmp = new TmpInstruction(mOp.getVRegValue());
02687 
02688            //Get machine code for this instruction
02689            MachineCodeForInstruction & tempVec = MachineCodeForInstruction::get(defaultInst);
02690            tempVec.addTemp((Value*) tmp);
02691 
02692            //Create new machine instr and put in MBB
02693            MachineInstr *saveValue;
02694            if(mOp.getVRegValue()->getType() == Type::FloatTy)
02695              saveValue = BuildMI(machineBB[index], V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02696            else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
02697              saveValue = BuildMI(machineBB[index], V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02698            else
02699              saveValue = BuildMI(machineBB[index], V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
02700 
02701 
02702            //Save for future cleanup
02703            kernelValue[mOp.getVRegValue()] = tmp;
02704            newValLocation[tmp] = machineBB[index];
02705            kernelPHIs[mOp.getVRegValue()][schedule.getMaxStage()-1] = tmp;
02706          }
02707        }
02708      }
02709    }
02710 
02711  }
02712 
02713   //Loop over each value we need to generate phis for
02714  for(std::map<Value*, std::map<int, Value*> >::iterator V = newValues.begin(),
02715        E = newValues.end(); V != E; ++V) {
02716 
02717 
02718    DEBUG(std::cerr << "Writing phi for" << *(V->first));
02719    DEBUG(std::cerr << "\nMap of Value* for this phi\n");
02720    DEBUG(for(std::map<int, Value*>::iterator I = V->second.begin(),
02721                IE = V->second.end(); I != IE; ++I) {
02722      std::cerr << "Stage: " << I->first;
02723      std::cerr << " Value: " << *(I->second) << "\n";
02724    });
02725 
02726    //If we only have one current iteration live, its safe to set
02727    //lastPhi = to kernel value
02728    if(V->second.size() == 1) {
02729      assert(kernelValue[V->first] != 0 && "Kernel value* must exist to create phi");
02730      MachineInstr *saveValue = BuildMI(*machineBB[0], machineBB[0]->begin(),V9::PHI, 3).addReg(V->second.begin()->second).addReg(kernelValue[V->first]).addRegDef(finalPHIValue[V->first]);
02731      DEBUG(std::cerr << "Resulting PHI (one live): " << *saveValue << "\n");
02732      kernelPHIs[V->first][V->second.begin()->first] = kernelValue[V->first];
02733      DEBUG(std::cerr << "Put kernel phi in at stage: " << schedule.getMaxStage()-1 << " (map stage = " << V->second.begin()->first << ")\n");
02734    }
02735    else {
02736 
02737      //Keep track of last phi created.
02738      Instruction *lastPhi = 0;
02739 
02740      unsigned count = 1;
02741      //Loop over the the map backwards to generate phis
02742      for(std::map<int, Value*>::reverse_iterator I = V->second.rbegin(), IE = V->second.rend();
02743          I != IE; ++I) {
02744 
02745        if(count < (V->second).size()) {
02746          if(lastPhi == 0) {
02747            lastPhi = new TmpInstruction(I->second);
02748 
02749            //Get machine code for this instruction
02750            MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02751            tempMvec.addTemp((Value*) lastPhi);
02752 
02753            MachineInstr *saveValue = BuildMI(*machineBB[0], machineBB[0]->begin(), V9::PHI, 3).addReg(kernelValue[V->first]).addReg(I->second).addRegDef(lastPhi);
02754            DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
02755            newValLocation[lastPhi] = machineBB[0];
02756          }
02757          else {
02758            Instruction *tmp = new TmpInstruction(I->second);
02759 
02760            //Get machine code for this instruction
02761            MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02762            tempMvec.addTemp((Value*) tmp);
02763 
02764 
02765            MachineInstr *saveValue = BuildMI(*machineBB[0], machineBB[0]->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(tmp);
02766            DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
02767            lastPhi = tmp;
02768            kernelPHIs[V->first][I->first] = lastPhi;
02769            newValLocation[lastPhi] = machineBB[0];
02770          }
02771        }
02772        //Final phi value
02773        else {
02774          //The resulting value must be the Value* we created earlier
02775          assert(lastPhi != 0 && "Last phi is NULL!\n");
02776          MachineInstr *saveValue = BuildMI(*machineBB[0], machineBB[0]->begin(), V9::PHI, 3).addReg(lastPhi).addReg(I->second).addRegDef(finalPHIValue[V->first]);
02777          DEBUG(std::cerr << "Resulting PHI: " << *saveValue << "\n");
02778          kernelPHIs[V->first][I->first] = finalPHIValue[V->first];
02779        }
02780 
02781        ++count;
02782      }
02783 
02784    }
02785  }
02786 }
02787 
02788 
02789 void ModuloSchedulingSBPass::removePHIs(std::vector<const MachineBasicBlock*> &SB, std::vector<std::vector<MachineBasicBlock*> > &prologues, std::vector<std::vector<MachineBasicBlock*> > &epilogues, std::vector<MachineBasicBlock*> &kernelBB, std::map<Value*, MachineBasicBlock*> &newValLocation) {
02790 
02791   //Worklist to delete things
02792   std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> > worklist;
02793 
02794   //Worklist of TmpInstructions that need to be added to a MCFI
02795   std::vector<Instruction*> addToMCFI;
02796 
02797   //Worklist to add OR instructions to end of kernel so not to invalidate the iterator
02798   //std::vector<std::pair<Instruction*, Value*> > newORs;
02799 
02800   const TargetInstrInfo *TMI = target.getInstrInfo();
02801 
02802   //Start with the kernel and for each phi insert a copy for the phi
02803   //def and for each arg
02804   //phis are only in the first BB in the kernel
02805   for(MachineBasicBlock::iterator I = kernelBB[0]->begin(), E = kernelBB[0]->end();
02806       I != E; ++I) {
02807 
02808     DEBUG(std::cerr << "Looking at Instr: " << *I << "\n");
02809 
02810     //Get op code and check if its a phi
02811     if(I->getOpcode() == V9::PHI) {
02812 
02813       DEBUG(std::cerr << "Replacing PHI: " << *I << "\n");
02814       Instruction *tmp = 0;
02815 
02816       for(unsigned i = 0; i < I->getNumOperands(); ++i) {
02817 
02818         //Get Operand
02819         const MachineOperand &mOp = I->getOperand(i);
02820         assert(mOp.getType() == MachineOperand::MO_VirtualRegister
02821                && "Should be a Value*\n");
02822 
02823         if(!tmp) {
02824           tmp = new TmpInstruction(mOp.getVRegValue());
02825           addToMCFI.push_back(tmp);
02826         }
02827 
02828         //Now for all our arguments we read, OR to the new
02829         //TmpInstruction that we created
02830         if(mOp.isUse()) {
02831           DEBUG(std::cerr << "Use: " << mOp << "\n");
02832           //Place a copy at the end of its BB but before the branches
02833           assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n");
02834           //Reverse iterate to find the branches, we can safely assume no instructions have been
02835           //put in the nop positions
02836           for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) {
02837             MachineOpCode opc = inst->getOpcode();
02838             if(TMI->isBranch(opc) || TMI->isNop(opc))
02839               continue;
02840             else {
02841               if(mOp.getVRegValue()->getType() == Type::FloatTy)
02842                 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02843               else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
02844                 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02845               else
02846                 BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
02847 
02848               break;
02849             }
02850 
02851           }
02852 
02853         }
02854         else {
02855           //Remove the phi and replace it with an OR
02856           DEBUG(std::cerr << "Def: " << mOp << "\n");
02857           //newORs.push_back(std::make_pair(tmp, mOp.getVRegValue()));
02858           if(tmp->getType() == Type::FloatTy)
02859             BuildMI(*kernelBB[0], I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
02860           else if(tmp->getType() == Type::DoubleTy)
02861             BuildMI(*kernelBB[0], I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
02862           else
02863             BuildMI(*kernelBB[0], I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue());
02864 
02865 
02866           worklist.push_back(std::make_pair(kernelBB[0], I));
02867         }
02868 
02869       }
02870 
02871     }
02872 
02873 
02874   }
02875 
02876   //Add TmpInstructions to some MCFI
02877   if(addToMCFI.size() > 0) {
02878     MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02879     for(unsigned x = 0; x < addToMCFI.size(); ++x) {
02880       tempMvec.addTemp(addToMCFI[x]);
02881     }
02882     addToMCFI.clear();
02883   }
02884 
02885 
02886   //Remove phis from epilogue
02887   for(std::vector<std::vector<MachineBasicBlock*> >::iterator MB = epilogues.begin(),
02888         ME = epilogues.end(); MB != ME; ++MB) {
02889 
02890     for(std::vector<MachineBasicBlock*>::iterator currentMBB = MB->begin(), currentME = MB->end(); currentMBB != currentME; ++currentMBB) {
02891 
02892       for(MachineBasicBlock::iterator I = (*currentMBB)->begin(),
02893             E = (*currentMBB)->end(); I != E; ++I) {
02894 
02895         DEBUG(std::cerr << "Looking at Instr: " << *I << "\n");
02896         //Get op code and check if its a phi
02897         if(I->getOpcode() == V9::PHI) {
02898           Instruction *tmp = 0;
02899 
02900           for(unsigned i = 0; i < I->getNumOperands(); ++i) {
02901             //Get Operand
02902             const MachineOperand &mOp = I->getOperand(i);
02903             assert(mOp.getType() == MachineOperand::MO_VirtualRegister && "Should be a Value*\n");
02904 
02905             if(!tmp) {
02906               tmp = new TmpInstruction(mOp.getVRegValue());
02907               addToMCFI.push_back(tmp);
02908             }
02909 
02910             //Now for all our arguments we read, OR to the new TmpInstruction that we created
02911             if(mOp.isUse()) {
02912               DEBUG(std::cerr << "Use: " << mOp << "\n");
02913               //Place a copy at the end of its BB but before the branches
02914               assert(newValLocation.count(mOp.getVRegValue()) && "We must know where this value is located\n");
02915               //Reverse iterate to find the branches, we can safely assume no instructions have been
02916               //put in the nop positions
02917               for(MachineBasicBlock::iterator inst = --(newValLocation[mOp.getVRegValue()])->end(), endBB = (newValLocation[mOp.getVRegValue()])->begin(); inst != endBB; --inst) {
02918                 MachineOpCode opc = inst->getOpcode();
02919                 if(TMI->isBranch(opc) || TMI->isNop(opc))
02920                   continue;
02921                 else {
02922                   if(mOp.getVRegValue()->getType() == Type::FloatTy)
02923                     BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVS, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02924                   else if(mOp.getVRegValue()->getType() == Type::DoubleTy)
02925                     BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::FMOVD, 3).addReg(mOp.getVRegValue()).addRegDef(tmp);
02926                   else
02927                     BuildMI(*(newValLocation[mOp.getVRegValue()]), ++inst, V9::ORr, 3).addReg(mOp.getVRegValue()).addImm(0).addRegDef(tmp);
02928 
02929 
02930                   break;
02931                 }
02932 
02933               }
02934 
02935             }
02936             else {
02937               //Remove the phi and replace it with an OR
02938               DEBUG(std::cerr << "Def: " << mOp << "\n");
02939               if(tmp->getType() == Type::FloatTy)
02940                 BuildMI(**currentMBB, I, V9::FMOVS, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
02941               else if(tmp->getType() == Type::DoubleTy)
02942                 BuildMI(**currentMBB, I, V9::FMOVD, 3).addReg(tmp).addRegDef(mOp.getVRegValue());
02943               else
02944                 BuildMI(**currentMBB, I, V9::ORr, 3).addReg(tmp).addImm(0).addRegDef(mOp.getVRegValue());
02945 
02946               worklist.push_back(std::make_pair(*currentMBB,I));
02947             }
02948           }
02949         }
02950       }
02951     }
02952   }
02953 
02954 
02955   if(addToMCFI.size() > 0) {
02956     MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(defaultInst);
02957     for(unsigned x = 0; x < addToMCFI.size(); ++x) {
02958       tempMvec.addTemp(addToMCFI[x]);
02959     }
02960     addToMCFI.clear();
02961   }
02962 
02963     //Delete the phis
02964   for(std::vector<std::pair<MachineBasicBlock*, MachineBasicBlock::iterator> >::iterator I =  worklist.begin(), E = worklist.end(); I != E; ++I) {
02965     DEBUG(std::cerr << "Deleting PHI " << *I->second << "\n");
02966     I->first->erase(I->second);
02967 
02968   }
02969 
02970 
02971   assert((addToMCFI.size() == 0) && "We should have added all TmpInstructions to some MachineCodeForInstruction");
02972 }
02973 
02974 
02975 
02976 
02977 void ModuloSchedulingSBPass::writeSideExits(std::vector<std::vector<MachineBasicBlock *> > &prologues, std::vector<std::vector<BasicBlock*> > &llvm_prologues, std::vector<std::vector<MachineBasicBlock *> > &epilogues, std::vector<std::vector<BasicBlock*> > &llvm_epilogues, std::map<const MachineBasicBlock*, Value*> &sideExits, std::map<MachineBasicBlock*, std::vector<std::pair<MachineInstr*, int> > > &instrsMovedDown, std::vector<const MachineBasicBlock*> &SB, std::vector<MachineBasicBlock*> &kernelMBBs, std::map<MachineBasicBlock*, int> branchStage) {
02978 
02979   const TargetInstrInfo *TMI = target.getInstrInfo();
02980 
02981   //Repeat for each side exit
02982   for(unsigned sideExitNum = 0; sideExitNum < SB.size()-1; ++sideExitNum) {
02983 
02984     std::vector<std::vector<BasicBlock*> > side_llvm_epilogues;
02985     std::vector<std::vector<MachineBasicBlock*> > side_epilogues;
02986     MachineBasicBlock* sideMBB;
02987     BasicBlock* sideBB;
02988 
02989     //Create side exit blocks
02990     //Get the LLVM basic block
02991     BasicBlock *bb = (BasicBlock*) SB[sideExitNum]->getBasicBlock();
02992     MachineBasicBlock *mbb = (MachineBasicBlock*) SB[sideExitNum];
02993 
02994     int stage = branchStage[mbb];
02995 
02996     //Create new basic blocks for our side exit instructios that were moved below the branch
02997     sideBB = new BasicBlock("SideExit", (Function*) bb->getParent());
02998     sideMBB = new MachineBasicBlock(sideBB);
02999     (((MachineBasicBlock*)SB[0])->getParent())->getBasicBlockList().push_back(sideMBB);
03000 
03001 
03002     if(instrsMovedDown.count(mbb)) {
03003       for(std::vector<std::pair<MachineInstr*, int> >::iterator I = instrsMovedDown[mbb].begin(), E = instrsMovedDown[mbb].end(); I != E; ++I) {
03004         if(branchStage[mbb] == I->second)
03005           sideMBB->push_back((I->first)->clone());
03006       }
03007 
03008       //Add unconditional branches to original exits
03009       BuildMI(sideMBB, V9::BA, 1).addPCDisp(sideExits[mbb]);
03010       BuildMI(sideMBB, V9::NOP, 0);
03011 
03012       //Add unconditioal branch to llvm BB
03013       BasicBlock *extBB = dyn_cast<BasicBlock>(sideExits[mbb]);
03014       assert(extBB && "Side exit basicblock can not be null");
03015       TerminatorInst *newBranch = new BranchInst(extBB, sideBB);
03016     }
03017 
03018      //Clone epilogues and update their branches, one cloned epilogue set per side exit
03019     //only clone epilogues that are from a greater stage!
03020     for(unsigned i = 0; i < epilogues.size()-stage; ++i) {
03021       std::vector<MachineBasicBlock*> MB = epilogues[i];
03022 
03023       std::vector<MachineBasicBlock*> newEp;
03024       std::vector<BasicBlock*> newLLVMEp;
03025 
03026       for(std::vector<MachineBasicBlock*>::iterator currentMBB = MB.begin(),
03027             lastMBB = MB.end(); currentMBB != lastMBB; ++currentMBB) {
03028         BasicBlock *tmpBB = new BasicBlock("SideEpilogue", (Function*) (*currentMBB)->getBasicBlock()->getParent());
03029         MachineBasicBlock *tmp = new MachineBasicBlock(tmpBB);
03030 
03031         //Clone instructions and insert into new MBB
03032         for(MachineBasicBlock::iterator I = (*currentMBB)->begin(),
03033               E = (*currentMBB)->end(); I != E; ++I) {
03034 
03035           MachineInstr *clone = I->clone();
03036           if(clone->getOpcode() == V9::BA && (currentMBB+1 == lastMBB)) {
03037             //update branch to side exit
03038             for(unsigned i = 0; i < clone->getNumOperands(); ++i) {
03039               MachineOperand &mOp = clone->getOperand(i);
03040               if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
03041                 mOp.setValueReg(sideBB);
03042               }
03043             }
03044           }
03045 
03046           tmp->push_back(clone);
03047 
03048         }
03049 
03050         //Add llvm branch
03051         TerminatorInst *newBranch = new BranchInst(sideBB, tmpBB);
03052 
03053         newEp.push_back(tmp);
03054         (((MachineBasicBlock*)SB[0])->getParent())->getBasicBlockList().push_back(tmp);
03055 
03056         newLLVMEp.push_back(tmpBB);
03057 
03058       }
03059       side_llvm_epilogues.push_back(newLLVMEp);
03060       side_epilogues.push_back(newEp);
03061     }
03062 
03063   //Now stich up all the branches
03064 
03065   //Loop over prologues, and if its an inner branch and branches to our original side exit
03066   //then have it branch to the appropriate epilogue first (if it exists)
03067     for(unsigned P = 0; P < prologues.size(); ++P) {
03068 
03069       //Get BB side exit we are dealing with
03070       MachineBasicBlock *currentMBB = prologues[P][sideExitNum];
03071       if(P >= (unsigned) stage) {
03072         //Iterate backwards of machine instructions to find the branch we need to update
03073         for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) {
03074           MachineOpCode OC = mInst->getOpcode();
03075 
03076           //If its a branch update its branchto
03077           if(TMI->isBranch(OC)) {
03078             for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
03079               MachineOperand &mOp = mInst->getOperand(opNum);
03080               if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
03081                 //Check if we branch to side exit
03082                 if(mOp.getVRegValue() == sideExits[mbb]) {
03083                   mOp.setValueReg(side_llvm_epilogues[P][0]);
03084                 }
03085               }
03086             }
03087             DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n");
03088           }
03089         }
03090 
03091         //Update llvm branch
03092         TerminatorInst *branchVal = ((BasicBlock*) currentMBB->getBasicBlock())->getTerminator();
03093         DEBUG(std::cerr << *branchVal << "\n");
03094 
03095         for(unsigned i=0; i < branchVal->getNumSuccessors(); ++i) {
03096           if(branchVal->getSuccessor(i) == sideExits[mbb]) {
03097             DEBUG(std::cerr << "Replacing successor bb\n");
03098             branchVal->setSuccessor(i, side_llvm_epilogues[P][0]);
03099           }
03100         }
03101       }
03102       else {
03103         //must add BA branch because another prologue or kernel has the actual side exit branch
03104          //Add unconditional branches to original exits
03105         assert( (sideExitNum+1) < prologues[P].size() && "must have valid prologue to branch to");
03106         BuildMI(prologues[P][sideExitNum], V9::BA, 1).addPCDisp((BasicBlock*)(prologues[P][sideExitNum+1])->getBasicBlock());
03107         BuildMI(prologues[P][sideExitNum], V9::NOP, 0);
03108 
03109         TerminatorInst *newBranch = new BranchInst((BasicBlock*) (prologues[P][sideExitNum+1])->getBasicBlock(), (BasicBlock*) (prologues[P][sideExitNum])->getBasicBlock());
03110 
03111       }
03112     }
03113 
03114 
03115     //Update side exits in kernel
03116     MachineBasicBlock *currentMBB = kernelMBBs[sideExitNum];
03117     //Iterate backwards of machine instructions to find the branch we need to update
03118     for(MachineBasicBlock::reverse_iterator mInst = currentMBB->rbegin(), mInstEnd = currentMBB->rend(); mInst != mInstEnd; ++mInst) {
03119       MachineOpCode OC = mInst->getOpcode();
03120 
03121       //If its a branch update its branchto
03122       if(TMI->isBranch(OC)) {
03123         for(unsigned opNum = 0; opNum < mInst->getNumOperands(); ++opNum) {
03124           MachineOperand &mOp = mInst->getOperand(opNum);
03125           if (mOp.getType() == MachineOperand::MO_PCRelativeDisp) {
03126             //Check if we branch to side exit
03127             if(mOp.getVRegValue() == sideExits[mbb]) {
03128               if(side_llvm_epilogues.size() > 0)
03129                 mOp.setValueReg(side_llvm_epilogues[0][0]);
03130               else
03131                 mOp.setValueReg(sideBB);
03132             }
03133           }
03134         }
03135         DEBUG(std::cerr << "New Prologue Branch: " << *mInst << "\n");
03136       }
03137     }
03138 
03139     //Update llvm branch
03140     //Update llvm branch
03141     TerminatorInst *branchVal = ((BasicBlock*)currentMBB->getBasicBlock())->getTerminator();
03142     DEBUG(std::cerr << *branchVal << "\n");
03143 
03144     for(unsigned i=0; i < branchVal->getNumSuccessors(); ++i) {
03145       if(branchVal->getSuccessor(i) == sideExits[mbb]) {
03146         DEBUG(std::cerr << "Replacing successor bb\n");
03147         if(side_llvm_epilogues.size() > 0)
03148           branchVal->setSuccessor(i, side_llvm_epilogues[0][0]);
03149         else
03150           branchVal->setSuccessor(i, sideBB);
03151       }
03152     }
03153   }
03154 }
03155