LLVM API Documentation

InstrScheduling.cpp

Go to the documentation of this file.
00001 //===- InstrScheduling.cpp - Generic Instruction Scheduling support -------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the llvm/CodeGen/InstrScheduling.h interface, along with
00011 // generic support routines for instruction scheduling.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "SchedPriorities.h"
00016 #include "llvm/BasicBlock.h"
00017 #include "llvm/CodeGen/MachineInstr.h"
00018 #include "llvm/CodeGen/MachineFunction.h"
00019 #include "llvm/Target/TargetMachine.h"
00020 #include "../MachineCodeForInstruction.h"
00021 #include "../LiveVar/FunctionLiveVarInfo.h"
00022 #include "../SparcV9InstrInfo.h"
00023 #include "llvm/Support/CommandLine.h"
00024 #include <algorithm>
00025 #include <iostream>
00026 
00027 namespace llvm {
00028 
00029 SchedDebugLevel_t SchedDebugLevel;
00030 
00031 static cl::opt<bool> EnableFillingDelaySlots("sched-fill-delay-slots", cl::Hidden,
00032               cl::desc("Fill branch delay slots during local scheduling"));
00033 
00034 static cl::opt<SchedDebugLevel_t, true>
00035 SDL_opt("dsched", cl::Hidden, cl::location(SchedDebugLevel),
00036         cl::desc("enable instruction scheduling debugging information"),
00037         cl::values(
00038  clEnumValN(Sched_NoDebugInfo,      "n", "disable debug output"),
00039  clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"),
00040  clEnumValN(Sched_PrintSchedTrace,  "t", "print trace of scheduling actions"),
00041  clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"),
00042                    clEnumValEnd));
00043 
00044 
00045 //************************* Internal Data Types *****************************/
00046 
00047 class InstrSchedule;
00048 class SchedulingManager;
00049 
00050 
00051 //----------------------------------------------------------------------
00052 // class InstrGroup:
00053 //
00054 // Represents a group of instructions scheduled to be issued
00055 // in a single cycle.
00056 //----------------------------------------------------------------------
00057 
00058 class InstrGroup {
00059   InstrGroup(const InstrGroup&);       // DO NOT IMPLEMENT
00060   void operator=(const InstrGroup&);   // DO NOT IMPLEMENT
00061 
00062 public:
00063   inline const SchedGraphNode* operator[](unsigned int slotNum) const {
00064     assert(slotNum  < group.size());
00065     return group[slotNum];
00066   }
00067 
00068 private:
00069   friend class InstrSchedule;
00070 
00071   inline void   addInstr(const SchedGraphNode* node, unsigned int slotNum) {
00072     assert(slotNum < group.size());
00073     group[slotNum] = node;
00074   }
00075 
00076   /*ctor*/      InstrGroup(unsigned int nslots)
00077     : group(nslots, NULL) {}
00078 
00079   /*ctor*/      InstrGroup();           // disable: DO NOT IMPLEMENT
00080 
00081 private:
00082   std::vector<const SchedGraphNode*> group;
00083 };
00084 
00085 
00086 //----------------------------------------------------------------------
00087 // class ScheduleIterator:
00088 //
00089 // Iterates over the machine instructions in the for a single basic block.
00090 // The schedule is represented by an InstrSchedule object.
00091 //----------------------------------------------------------------------
00092 
00093 template<class _NodeType>
00094 class ScheduleIterator : public forward_iterator<_NodeType, ptrdiff_t> {
00095 private:
00096   unsigned cycleNum;
00097   unsigned slotNum;
00098   const InstrSchedule& S;
00099 public:
00100   typedef ScheduleIterator<_NodeType> _Self;
00101 
00102   /*ctor*/ inline ScheduleIterator(const InstrSchedule& _schedule,
00103                                    unsigned _cycleNum,
00104                                    unsigned _slotNum)
00105     : cycleNum(_cycleNum), slotNum(_slotNum), S(_schedule) {
00106     skipToNextInstr();
00107   }
00108 
00109   /*ctor*/ inline ScheduleIterator(const _Self& x)
00110     : cycleNum(x.cycleNum), slotNum(x.slotNum), S(x.S) {}
00111 
00112   inline bool operator==(const _Self& x) const {
00113     return (slotNum == x.slotNum && cycleNum== x.cycleNum && &S==&x.S);
00114   }
00115 
00116   inline bool operator!=(const _Self& x) const { return !operator==(x); }
00117 
00118   inline _NodeType* operator*() const;
00119   inline _NodeType* operator->() const { return operator*(); }
00120 
00121          _Self& operator++();                           // Preincrement
00122   inline _Self operator++(int) {                        // Postincrement
00123     _Self tmp(*this); ++*this; return tmp;
00124   }
00125 
00126   static _Self begin(const InstrSchedule& _schedule);
00127   static _Self end(  const InstrSchedule& _schedule);
00128 
00129 private:
00130   inline _Self& operator=(const _Self& x); // DISABLE -- DO NOT IMPLEMENT
00131   void  skipToNextInstr();
00132 };
00133 
00134 
00135 //----------------------------------------------------------------------
00136 // class InstrSchedule:
00137 //
00138 // Represents the schedule of machine instructions for a single basic block.
00139 //----------------------------------------------------------------------
00140 
00141 class InstrSchedule {
00142   const unsigned int nslots;
00143   unsigned int numInstr;
00144   std::vector<InstrGroup*> groups;              // indexed by cycle number
00145   std::vector<CycleCount_t> startTime;          // indexed by node id
00146 
00147   InstrSchedule(InstrSchedule&);   // DO NOT IMPLEMENT
00148   void operator=(InstrSchedule&);  // DO NOT IMPLEMENT
00149 
00150 public: // iterators
00151   typedef ScheduleIterator<SchedGraphNode> iterator;
00152   typedef ScheduleIterator<const SchedGraphNode> const_iterator;
00153 
00154         iterator begin()       { return iterator::begin(*this); }
00155   const_iterator begin() const { return const_iterator::begin(*this); }
00156         iterator end()         { return iterator::end(*this); }
00157   const_iterator end() const   { return const_iterator::end(*this); }
00158 
00159 public: // constructors and destructor
00160   /*ctor*/              InstrSchedule   (unsigned int _nslots,
00161                                          unsigned int _numNodes);
00162   /*dtor*/              ~InstrSchedule  ();
00163 
00164 public: // accessor functions to query chosen schedule
00165   const SchedGraphNode* getInstr        (unsigned int slotNum,
00166                                          CycleCount_t c) {
00167     const InstrGroup* igroup = this->getIGroup(c);
00168     return (igroup == NULL)? NULL : (*igroup)[slotNum];
00169   }
00170 
00171   inline InstrGroup*    getIGroup       (CycleCount_t c) {
00172     if ((unsigned)c >= groups.size())
00173       groups.resize(c+1);
00174     if (groups[c] == NULL)
00175       groups[c] = new InstrGroup(nslots);
00176     return groups[c];
00177   }
00178 
00179   inline const InstrGroup* getIGroup    (CycleCount_t c) const {
00180     assert((unsigned)c < groups.size());
00181     return groups[c];
00182   }
00183 
00184   inline CycleCount_t   getStartTime    (unsigned int nodeId) const {
00185     assert(nodeId < startTime.size());
00186     return startTime[nodeId];
00187   }
00188 
00189   unsigned int          getNumInstructions() const {
00190     return numInstr;
00191   }
00192 
00193   inline void           scheduleInstr   (const SchedGraphNode* node,
00194                                          unsigned int slotNum,
00195                                          CycleCount_t cycle) {
00196     InstrGroup* igroup = this->getIGroup(cycle);
00197     if (!((*igroup)[slotNum] == NULL)) {
00198       std::cerr << "Slot already filled?\n";
00199       abort();
00200     }
00201     igroup->addInstr(node, slotNum);
00202     assert(node->getNodeId() < startTime.size());
00203     startTime[node->getNodeId()] = cycle;
00204     ++numInstr;
00205   }
00206 
00207 private:
00208   friend class ScheduleIterator<SchedGraphNode>;
00209   friend class ScheduleIterator<const SchedGraphNode>;
00210   /*ctor*/      InstrSchedule   ();     // Disable: DO NOT IMPLEMENT.
00211 };
00212 
00213 template<class NodeType>
00214 inline NodeType *ScheduleIterator<NodeType>::operator*() const {
00215   assert(cycleNum < S.groups.size());
00216   return (*S.groups[cycleNum])[slotNum];
00217 }
00218 
00219 
00220 /*ctor*/
00221 InstrSchedule::InstrSchedule(unsigned int _nslots, unsigned int _numNodes)
00222   : nslots(_nslots),
00223     numInstr(0),
00224     groups(2 * _numNodes / _nslots),            // 2 x lower-bound for #cycles
00225     startTime(_numNodes, (CycleCount_t) -1)             // set all to -1
00226 {
00227 }
00228 
00229 
00230 /*dtor*/
00231 InstrSchedule::~InstrSchedule()
00232 {
00233   for (unsigned c=0, NC=groups.size(); c < NC; c++)
00234     if (groups[c] != NULL)
00235       delete groups[c];                 // delete InstrGroup objects
00236 }
00237 
00238 
00239 template<class _NodeType>
00240 inline
00241 void
00242 ScheduleIterator<_NodeType>::skipToNextInstr()
00243 {
00244   while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL)
00245     ++cycleNum;                 // skip cycles with no instructions
00246 
00247   while (cycleNum < S.groups.size() &&
00248          (*S.groups[cycleNum])[slotNum] == NULL)
00249   {
00250     ++slotNum;
00251     if (slotNum == S.nslots) {
00252       ++cycleNum;
00253       slotNum = 0;
00254       while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL)
00255         ++cycleNum;                     // skip cycles with no instructions
00256     }
00257   }
00258 }
00259 
00260 template<class _NodeType>
00261 inline
00262 ScheduleIterator<_NodeType>&
00263 ScheduleIterator<_NodeType>::operator++()       // Preincrement
00264 {
00265   ++slotNum;
00266   if (slotNum == S.nslots) {
00267     ++cycleNum;
00268     slotNum = 0;
00269   }
00270   skipToNextInstr();
00271   return *this;
00272 }
00273 
00274 template<class _NodeType>
00275 ScheduleIterator<_NodeType>
00276 ScheduleIterator<_NodeType>::begin(const InstrSchedule& _schedule)
00277 {
00278   return _Self(_schedule, 0, 0);
00279 }
00280 
00281 template<class _NodeType>
00282 ScheduleIterator<_NodeType>
00283 ScheduleIterator<_NodeType>::end(const InstrSchedule& _schedule)
00284 {
00285   return _Self(_schedule, _schedule.groups.size(), 0);
00286 }
00287 
00288 
00289 //----------------------------------------------------------------------
00290 // class DelaySlotInfo:
00291 //
00292 // Record information about delay slots for a single branch instruction.
00293 // Delay slots are simply indexed by slot number 1 ... numDelaySlots
00294 //----------------------------------------------------------------------
00295 
00296 class DelaySlotInfo {
00297   const SchedGraphNode* brNode;
00298   unsigned ndelays;
00299   std::vector<const SchedGraphNode*> delayNodeVec;
00300   CycleCount_t delayedNodeCycle;
00301   unsigned delayedNodeSlotNum;
00302 
00303   DelaySlotInfo(const DelaySlotInfo &);  // DO NOT IMPLEMENT
00304   void operator=(const DelaySlotInfo&);  // DO NOT IMPLEMENT
00305 public:
00306   /*ctor*/      DelaySlotInfo           (const SchedGraphNode* _brNode,
00307                                          unsigned _ndelays)
00308     : brNode(_brNode), ndelays(_ndelays),
00309       delayedNodeCycle(0), delayedNodeSlotNum(0) {}
00310 
00311   inline unsigned getNumDelays  () {
00312     return ndelays;
00313   }
00314 
00315   inline const std::vector<const SchedGraphNode*>& getDelayNodeVec() {
00316     return delayNodeVec;
00317   }
00318 
00319   inline void   addDelayNode            (const SchedGraphNode* node) {
00320     delayNodeVec.push_back(node);
00321     assert(delayNodeVec.size() <= ndelays && "Too many delay slot instrs!");
00322   }
00323 
00324   inline void   recordChosenSlot        (CycleCount_t cycle, unsigned slotNum) {
00325     delayedNodeCycle = cycle;
00326     delayedNodeSlotNum = slotNum;
00327   }
00328 
00329   unsigned      scheduleDelayedNode     (SchedulingManager& S);
00330 };
00331 
00332 
00333 //----------------------------------------------------------------------
00334 // class SchedulingManager:
00335 //
00336 // Represents the schedule of machine instructions for a single basic block.
00337 //----------------------------------------------------------------------
00338 
00339 class SchedulingManager {
00340   SchedulingManager(SchedulingManager &);    // DO NOT IMPLEMENT
00341   void operator=(const SchedulingManager &); // DO NOT IMPLEMENT
00342 public: // publicly accessible data members
00343   const unsigned nslots;
00344   const TargetSchedInfo& schedInfo;
00345   SchedPriorities& schedPrio;
00346   InstrSchedule isched;
00347 
00348 private:
00349   unsigned totalInstrCount;
00350   CycleCount_t curTime;
00351   CycleCount_t nextEarliestIssueTime;           // next cycle we can issue
00352   // indexed by slot#
00353   std::vector<hash_set<const SchedGraphNode*> > choicesForSlot;
00354   std::vector<const SchedGraphNode*> choiceVec; // indexed by node ptr
00355   std::vector<int> numInClass;                  // indexed by sched class
00356   std::vector<CycleCount_t> nextEarliestStartTime;      // indexed by opCode
00357   hash_map<const SchedGraphNode*, DelaySlotInfo*> delaySlotInfoForBranches;
00358                                                 // indexed by branch node ptr
00359 
00360 public:
00361   SchedulingManager(const TargetMachine& _target, const SchedGraph* graph,
00362                     SchedPriorities& schedPrio);
00363   ~SchedulingManager() {
00364     for (hash_map<const SchedGraphNode*,
00365            DelaySlotInfo*>::iterator I = delaySlotInfoForBranches.begin(),
00366            E = delaySlotInfoForBranches.end(); I != E; ++I)
00367       delete I->second;
00368   }
00369 
00370   //----------------------------------------------------------------------
00371   // Simplify access to the machine instruction info
00372   //----------------------------------------------------------------------
00373 
00374   inline const TargetInstrInfo& getInstrInfo    () const {
00375     return schedInfo.getInstrInfo();
00376   }
00377 
00378   //----------------------------------------------------------------------
00379   // Interface for checking and updating the current time
00380   //----------------------------------------------------------------------
00381 
00382   inline CycleCount_t   getTime                 () const {
00383     return curTime;
00384   }
00385 
00386   inline CycleCount_t   getEarliestIssueTime() const {
00387     return nextEarliestIssueTime;
00388   }
00389 
00390   inline CycleCount_t   getEarliestStartTimeForOp(MachineOpCode opCode) const {
00391     assert(opCode < (int) nextEarliestStartTime.size());
00392     return nextEarliestStartTime[opCode];
00393   }
00394 
00395   // Update current time to specified cycle
00396   inline void   updateTime              (CycleCount_t c) {
00397     curTime = c;
00398     schedPrio.updateTime(c);
00399   }
00400 
00401   //----------------------------------------------------------------------
00402   // Functions to manage the choices for the current cycle including:
00403   // -- a vector of choices by priority (choiceVec)
00404   // -- vectors of the choices for each instruction slot (choicesForSlot[])
00405   // -- number of choices in each sched class, used to check issue conflicts
00406   //    between choices for a single cycle
00407   //----------------------------------------------------------------------
00408 
00409   inline unsigned int getNumChoices     () const {
00410     return choiceVec.size();
00411   }
00412 
00413   inline unsigned getNumChoicesInClass  (const InstrSchedClass& sc) const {
00414     assert(sc < numInClass.size() && "Invalid op code or sched class!");
00415     return numInClass[sc];
00416   }
00417 
00418   inline const SchedGraphNode* getChoice(unsigned int i) const {
00419     // assert(i < choiceVec.size());    don't check here.
00420     return choiceVec[i];
00421   }
00422 
00423   inline hash_set<const SchedGraphNode*>& getChoicesForSlot(unsigned slotNum) {
00424     assert(slotNum < nslots);
00425     return choicesForSlot[slotNum];
00426   }
00427 
00428   inline void   addChoice               (const SchedGraphNode* node) {
00429     // Append the instruction to the vector of choices for current cycle.
00430     // Increment numInClass[c] for the sched class to which the instr belongs.
00431     choiceVec.push_back(node);
00432     const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode());
00433     assert(sc < numInClass.size());
00434     numInClass[sc]++;
00435   }
00436 
00437   inline void   addChoiceToSlot         (unsigned int slotNum,
00438                                          const SchedGraphNode* node) {
00439     // Add the instruction to the choice set for the specified slot
00440     assert(slotNum < nslots);
00441     choicesForSlot[slotNum].insert(node);
00442   }
00443 
00444   inline void   resetChoices            () {
00445     choiceVec.clear();
00446     for (unsigned int s=0; s < nslots; s++)
00447       choicesForSlot[s].clear();
00448     for (unsigned int c=0; c < numInClass.size(); c++)
00449       numInClass[c] = 0;
00450   }
00451 
00452   //----------------------------------------------------------------------
00453   // Code to query and manage the partial instruction schedule so far
00454   //----------------------------------------------------------------------
00455 
00456   inline unsigned int   getNumScheduled () const {
00457     return isched.getNumInstructions();
00458   }
00459 
00460   inline unsigned int   getNumUnscheduled() const {
00461     return totalInstrCount - isched.getNumInstructions();
00462   }
00463 
00464   inline bool           isScheduled     (const SchedGraphNode* node) const {
00465     return (isched.getStartTime(node->getNodeId()) >= 0);
00466   }
00467 
00468   inline void   scheduleInstr           (const SchedGraphNode* node,
00469                                          unsigned int slotNum,
00470                                          CycleCount_t cycle)
00471   {
00472     assert(! isScheduled(node) && "Instruction already scheduled?");
00473 
00474     // add the instruction to the schedule
00475     isched.scheduleInstr(node, slotNum, cycle);
00476 
00477     // update the earliest start times of all nodes that conflict with `node'
00478     // and the next-earliest time anything can issue if `node' causes bubbles
00479     updateEarliestStartTimes(node, cycle);
00480 
00481     // remove the instruction from the choice sets for all slots
00482     for (unsigned s=0; s < nslots; s++)
00483       choicesForSlot[s].erase(node);
00484 
00485     // and decrement the instr count for the sched class to which it belongs
00486     const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode());
00487     assert(sc < numInClass.size());
00488     numInClass[sc]--;
00489   }
00490 
00491   //----------------------------------------------------------------------
00492   // Create and retrieve delay slot info for delayed instructions
00493   //----------------------------------------------------------------------
00494 
00495   inline DelaySlotInfo* getDelaySlotInfoForInstr(const SchedGraphNode* bn,
00496                                                  bool createIfMissing=false)
00497   {
00498     hash_map<const SchedGraphNode*, DelaySlotInfo*>::const_iterator
00499       I = delaySlotInfoForBranches.find(bn);
00500     if (I != delaySlotInfoForBranches.end())
00501       return I->second;
00502 
00503     if (!createIfMissing) return 0;
00504 
00505     DelaySlotInfo *dinfo =
00506       new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpcode()));
00507     return delaySlotInfoForBranches[bn] = dinfo;
00508   }
00509 
00510 private:
00511   SchedulingManager();     // DISABLED: DO NOT IMPLEMENT
00512   void updateEarliestStartTimes(const SchedGraphNode* node, CycleCount_t schedTime);
00513 };
00514 
00515 
00516 /*ctor*/
00517 SchedulingManager::SchedulingManager(const TargetMachine& target,
00518                                      const SchedGraph* graph,
00519                                      SchedPriorities& _schedPrio)
00520   : nslots(target.getSchedInfo()->getMaxNumIssueTotal()),
00521     schedInfo(*target.getSchedInfo()),
00522     schedPrio(_schedPrio),
00523     isched(nslots, graph->getNumNodes()),
00524     totalInstrCount(graph->getNumNodes() - 2),
00525     nextEarliestIssueTime(0),
00526     choicesForSlot(nslots),
00527     numInClass(target.getSchedInfo()->getNumSchedClasses(), 0), // set all to 0
00528     nextEarliestStartTime(target.getInstrInfo()->getNumOpcodes(),
00529                           (CycleCount_t) 0)                             // set all to 0
00530 {
00531   updateTime(0);
00532 
00533   // Note that an upper bound on #choices for each slot is = nslots since
00534   // we use this vector to hold a feasible set of instructions, and more
00535   // would be infeasible. Reserve that much memory since it is probably small.
00536   for (unsigned int i=0; i < nslots; i++)
00537     choicesForSlot[i].resize(nslots);
00538 }
00539 
00540 
00541 void
00542 SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node,
00543                                             CycleCount_t schedTime)
00544 {
00545   if (schedInfo.numBubblesAfter(node->getOpcode()) > 0)
00546     { // Update next earliest time before which *nothing* can issue.
00547       nextEarliestIssueTime = std::max(nextEarliestIssueTime,
00548                   curTime + 1 + schedInfo.numBubblesAfter(node->getOpcode()));
00549     }
00550 
00551   const std::vector<MachineOpCode>&
00552     conflictVec = schedInfo.getConflictList(node->getOpcode());
00553 
00554   for (unsigned i=0; i < conflictVec.size(); i++)
00555     {
00556       MachineOpCode toOp = conflictVec[i];
00557       CycleCount_t est=schedTime + schedInfo.getMinIssueGap(node->getOpcode(),toOp);
00558       assert(toOp < (int) nextEarliestStartTime.size());
00559       if (nextEarliestStartTime[toOp] < est)
00560         nextEarliestStartTime[toOp] = est;
00561     }
00562 }
00563 
00564 //************************* Internal Functions *****************************/
00565 
00566 
00567 static void
00568 AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
00569 {
00570   // find the slot to start from, in the current cycle
00571   unsigned int startSlot = 0;
00572   CycleCount_t curTime = S.getTime();
00573 
00574   assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
00575 
00576   // If only one instruction can be issued, do so.
00577   if (maxIssue == 1)
00578     for (unsigned s=startSlot; s < S.nslots; s++)
00579       if (S.getChoicesForSlot(s).size() > 0) {
00580         // found the one instruction
00581         S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
00582         return;
00583       }
00584 
00585   // Otherwise, choose from the choices for each slot
00586   //
00587   InstrGroup* igroup = S.isched.getIGroup(S.getTime());
00588   assert(igroup != NULL && "Group creation failed?");
00589 
00590   // Find a slot that has only a single choice, and take it.
00591   // If all slots have 0 or multiple choices, pick the first slot with
00592   // choices and use its last instruction (just to avoid shifting the vector).
00593   unsigned numIssued;
00594   for (numIssued = 0; numIssued < maxIssue; numIssued++) {
00595     int chosenSlot = -1;
00596     for (unsigned s=startSlot; s < S.nslots; s++)
00597       if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) {
00598         chosenSlot = (int) s;
00599         break;
00600       }
00601 
00602     if (chosenSlot == -1)
00603       for (unsigned s=startSlot; s < S.nslots; s++)
00604         if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) {
00605           chosenSlot = (int) s;
00606           break;
00607         }
00608 
00609     if (chosenSlot != -1) {
00610       // Insert the chosen instr in the chosen slot and
00611       // erase it from all slots.
00612       const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
00613       S.scheduleInstr(node, chosenSlot, curTime);
00614     }
00615   }
00616 
00617   assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
00618 }
00619 
00620 
00621 //
00622 // For now, just assume we are scheduling within a single basic block.
00623 // Get the machine instruction vector for the basic block and clear it,
00624 // then append instructions in scheduled order.
00625 // Also, re-insert the dummy PHI instructions that were at the beginning
00626 // of the basic block, since they are not part of the schedule.
00627 //
00628 static void
00629 RecordSchedule(MachineBasicBlock &MBB, const SchedulingManager& S)
00630 {
00631   const TargetInstrInfo& mii = S.schedInfo.getInstrInfo();
00632 
00633   // Lets make sure we didn't lose any instructions, except possibly
00634   // some NOPs from delay slots.  Also, PHIs are not included in the schedule.
00635   unsigned numInstr = 0;
00636   for (MachineBasicBlock::iterator I=MBB.begin(); I != MBB.end(); ++I)
00637     if (!(I->getOpcode() == V9::NOP || I->getOpcode() == V9::PHI))
00638       ++numInstr;
00639   assert(S.isched.getNumInstructions() >= numInstr &&
00640          "Lost some non-NOP instructions during scheduling!");
00641 
00642   if (S.isched.getNumInstructions() == 0)
00643     return;                             // empty basic block!
00644 
00645   // First find the dummy instructions at the start of the basic block
00646   MachineBasicBlock::iterator I = MBB.begin();
00647   for ( ; I != MBB.end(); ++I)
00648     if (I->getOpcode() != V9::PHI)
00649       break;
00650 
00651   // Remove all except the dummy PHI instructions from MBB, and
00652   // pre-allocate create space for the ones we will put back in.
00653   while (I != MBB.end())
00654     MBB.remove(I++);
00655 
00656   InstrSchedule::const_iterator NIend = S.isched.end();
00657   for (InstrSchedule::const_iterator NI = S.isched.begin(); NI != NIend; ++NI)
00658     MBB.push_back(const_cast<MachineInstr*>((*NI)->getMachineInstr()));
00659 }
00660 
00661 
00662 
00663 static void
00664 MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node)
00665 {
00666   // Check if any successors are now ready that were not already marked
00667   // ready before, and that have not yet been scheduled.
00668   //
00669   for (sg_succ_const_iterator SI = succ_begin(node); SI !=succ_end(node); ++SI)
00670     if (! (*SI)->isDummyNode()
00671         && ! S.isScheduled(*SI)
00672         && ! S.schedPrio.nodeIsReady(*SI))
00673     {
00674       // successor not scheduled and not marked ready; check *its* preds.
00675 
00676       bool succIsReady = true;
00677       for (sg_pred_const_iterator P=pred_begin(*SI); P != pred_end(*SI); ++P)
00678         if (! (*P)->isDummyNode() && ! S.isScheduled(*P)) {
00679           succIsReady = false;
00680           break;
00681         }
00682 
00683       if (succIsReady)  // add the successor to the ready list
00684         S.schedPrio.insertReady(*SI);
00685     }
00686 }
00687 
00688 
00689 // Choose up to `nslots' FEASIBLE instructions and assign each
00690 // instruction to all possible slots that do not violate feasibility.
00691 // FEASIBLE means it should be guaranteed that the set
00692 // of chosen instructions can be issued in a single group.
00693 //
00694 // Return value:
00695 //      maxIssue : total number of feasible instructions
00696 //      S.choicesForSlot[i=0..nslots] : set of instructions feasible in slot i
00697 //
00698 static unsigned
00699 FindSlotChoices(SchedulingManager& S,
00700                 DelaySlotInfo*& getDelaySlotInfo)
00701 {
00702   // initialize result vectors to empty
00703   S.resetChoices();
00704 
00705   // find the slot to start from, in the current cycle
00706   unsigned int startSlot = 0;
00707   InstrGroup* igroup = S.isched.getIGroup(S.getTime());
00708   for (int s = S.nslots - 1; s >= 0; s--)
00709     if ((*igroup)[s] != NULL) {
00710       startSlot = s+1;
00711       break;
00712     }
00713 
00714   // Make sure we pick at most one instruction that would break the group.
00715   // Also, if we do pick one, remember which it was.
00716   unsigned int indexForBreakingNode = S.nslots;
00717   unsigned int indexForDelayedInstr = S.nslots;
00718   DelaySlotInfo* delaySlotInfo = NULL;
00719 
00720   getDelaySlotInfo = NULL;
00721 
00722   // Choose instructions in order of priority.
00723   // Add choices to the choice vector in the SchedulingManager class as
00724   // we choose them so that subsequent choices will be correctly tested
00725   // for feasibility, w.r.t. higher priority choices for the same cycle.
00726   //
00727   while (S.getNumChoices() < S.nslots - startSlot) {
00728     const SchedGraphNode* nextNode=S.schedPrio.getNextHighest(S,S.getTime());
00729     if (nextNode == NULL)
00730       break;                    // no more instructions for this cycle
00731 
00732     if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpcode()) > 0) {
00733       delaySlotInfo = S.getDelaySlotInfoForInstr(nextNode);
00734       if (delaySlotInfo != NULL) {
00735         if (indexForBreakingNode < S.nslots)
00736           // cannot issue a delayed instr in the same cycle as one
00737           // that breaks the issue group or as another delayed instr
00738           nextNode = NULL;
00739         else
00740           indexForDelayedInstr = S.getNumChoices();
00741       }
00742     } else if (S.schedInfo.breaksIssueGroup(nextNode->getOpcode())) {
00743       if (indexForBreakingNode < S.nslots)
00744         // have a breaking instruction already so throw this one away
00745         nextNode = NULL;
00746       else
00747         indexForBreakingNode = S.getNumChoices();
00748     }
00749 
00750     if (nextNode != NULL) {
00751       S.addChoice(nextNode);
00752 
00753       if (S.schedInfo.isSingleIssue(nextNode->getOpcode())) {
00754         assert(S.getNumChoices() == 1 &&
00755                "Prioritizer returned invalid instr for this cycle!");
00756         break;
00757       }
00758     }
00759 
00760     if (indexForDelayedInstr < S.nslots)
00761       break;                    // leave the rest for delay slots
00762   }
00763 
00764   assert(S.getNumChoices() <= S.nslots);
00765   assert(! (indexForDelayedInstr < S.nslots &&
00766             indexForBreakingNode < S.nslots) && "Cannot have both in a cycle");
00767 
00768   // Assign each chosen instruction to all possible slots for that instr.
00769   // But if only one instruction was chosen, put it only in the first
00770   // feasible slot; no more analysis will be needed.
00771   //
00772   if (indexForDelayedInstr >= S.nslots &&
00773       indexForBreakingNode >= S.nslots)
00774   { // No instructions that break the issue group or that have delay slots.
00775     // This is the common case, so handle it separately for efficiency.
00776 
00777     if (S.getNumChoices() == 1) {
00778       MachineOpCode opCode = S.getChoice(0)->getOpcode();
00779       unsigned int s;
00780       for (s=startSlot; s < S.nslots; s++)
00781         if (S.schedInfo.instrCanUseSlot(opCode, s))
00782           break;
00783       assert(s < S.nslots && "No feasible slot for this opCode?");
00784       S.addChoiceToSlot(s, S.getChoice(0));
00785     } else {
00786       for (unsigned i=0; i < S.getNumChoices(); i++) {
00787         MachineOpCode opCode = S.getChoice(i)->getOpcode();
00788         for (unsigned int s=startSlot; s < S.nslots; s++)
00789           if (S.schedInfo.instrCanUseSlot(opCode, s))
00790             S.addChoiceToSlot(s, S.getChoice(i));
00791       }
00792     }
00793   } else if (indexForDelayedInstr < S.nslots) {
00794     // There is an instruction that needs delay slots.
00795     // Try to assign that instruction to a higher slot than any other
00796     // instructions in the group, so that its delay slots can go
00797     // right after it.
00798     //
00799 
00800     assert(indexForDelayedInstr == S.getNumChoices() - 1 &&
00801            "Instruction with delay slots should be last choice!");
00802     assert(delaySlotInfo != NULL && "No delay slot info for instr?");
00803 
00804     const SchedGraphNode* delayedNode = S.getChoice(indexForDelayedInstr);
00805     MachineOpCode delayOpCode = delayedNode->getOpcode();
00806     unsigned ndelays= S.getInstrInfo().getNumDelaySlots(delayOpCode);
00807 
00808     unsigned delayedNodeSlot = S.nslots;
00809     int highestSlotUsed;
00810 
00811     // Find the last possible slot for the delayed instruction that leaves
00812     // at least `d' slots vacant after it (d = #delay slots)
00813     for (int s = S.nslots-ndelays-1; s >= (int) startSlot; s--)
00814       if (S.schedInfo.instrCanUseSlot(delayOpCode, s)) {
00815         delayedNodeSlot = s;
00816         break;
00817       }
00818 
00819     highestSlotUsed = -1;
00820     for (unsigned i=0; i < S.getNumChoices() - 1; i++) {
00821       // Try to assign every other instruction to a lower numbered
00822       // slot than delayedNodeSlot.
00823       MachineOpCode opCode =S.getChoice(i)->getOpcode();
00824       bool noSlotFound = true;
00825       unsigned int s;
00826       for (s=startSlot; s < delayedNodeSlot; s++)
00827         if (S.schedInfo.instrCanUseSlot(opCode, s)) {
00828           S.addChoiceToSlot(s, S.getChoice(i));
00829           noSlotFound = false;
00830         }
00831 
00832       // No slot before `delayedNodeSlot' was found for this opCode
00833       // Use a later slot, and allow some delay slots to fall in
00834       // the next cycle.
00835       if (noSlotFound)
00836         for ( ; s < S.nslots; s++)
00837           if (S.schedInfo.instrCanUseSlot(opCode, s)) {
00838             S.addChoiceToSlot(s, S.getChoice(i));
00839             break;
00840           }
00841 
00842       assert(s < S.nslots && "No feasible slot for instruction?");
00843 
00844       highestSlotUsed = std::max(highestSlotUsed, (int) s);
00845     }
00846 
00847     assert(highestSlotUsed <= (int) S.nslots-1 && "Invalid slot used?");
00848 
00849     // We will put the delayed node in the first slot after the
00850     // highest slot used.  But we just mark that for now, and
00851     // schedule it separately because we want to schedule the delay
00852     // slots for the node at the same time.
00853     CycleCount_t dcycle = S.getTime();
00854     unsigned int dslot = highestSlotUsed + 1;
00855     if (dslot == S.nslots) {
00856       dslot = 0;
00857       ++dcycle;
00858     }
00859     delaySlotInfo->recordChosenSlot(dcycle, dslot);
00860     getDelaySlotInfo = delaySlotInfo;
00861   } else {
00862     // There is an instruction that breaks the issue group.
00863     // For such an instruction, assign to the last possible slot in
00864     // the current group, and then don't assign any other instructions
00865     // to later slots.
00866     assert(indexForBreakingNode < S.nslots);
00867     const SchedGraphNode* breakingNode=S.getChoice(indexForBreakingNode);
00868     unsigned breakingSlot = INT_MAX;
00869     unsigned int nslotsToUse = S.nslots;
00870 
00871     // Find the last possible slot for this instruction.
00872     for (int s = S.nslots-1; s >= (int) startSlot; s--)
00873       if (S.schedInfo.instrCanUseSlot(breakingNode->getOpcode(), s)) {
00874         breakingSlot = s;
00875         break;
00876       }
00877     assert(breakingSlot < S.nslots &&
00878            "No feasible slot for `breakingNode'?");
00879 
00880     // Higher priority instructions than the one that breaks the group:
00881     // These can be assigned to all slots, but will be assigned only
00882     // to earlier slots if possible.
00883     for (unsigned i=0;
00884          i < S.getNumChoices() && i < indexForBreakingNode; i++)
00885     {
00886       MachineOpCode opCode =S.getChoice(i)->getOpcode();
00887 
00888       // If a higher priority instruction cannot be assigned to
00889       // any earlier slots, don't schedule the breaking instruction.
00890       //
00891       bool foundLowerSlot = false;
00892       nslotsToUse = S.nslots;       // May be modified in the loop
00893       for (unsigned int s=startSlot; s < nslotsToUse; s++)
00894         if (S.schedInfo.instrCanUseSlot(opCode, s)) {
00895           if (breakingSlot < S.nslots && s < breakingSlot) {
00896             foundLowerSlot = true;
00897             nslotsToUse = breakingSlot; // RESETS LOOP UPPER BOUND!
00898           }
00899 
00900           S.addChoiceToSlot(s, S.getChoice(i));
00901         }
00902 
00903       if (!foundLowerSlot)
00904         breakingSlot = INT_MAX;         // disable breaking instr
00905     }
00906 
00907     // Assign the breaking instruction (if any) to a single slot
00908     // Otherwise, just ignore the instruction.  It will simply be
00909     // scheduled in a later cycle.
00910     if (breakingSlot < S.nslots) {
00911       S.addChoiceToSlot(breakingSlot, breakingNode);
00912       nslotsToUse = breakingSlot;
00913     } else
00914       nslotsToUse = S.nslots;
00915 
00916     // For lower priority instructions than the one that breaks the
00917     // group, only assign them to slots lower than the breaking slot.
00918     // Otherwise, just ignore the instruction.
00919     for (unsigned i=indexForBreakingNode+1; i < S.getNumChoices(); i++) {
00920       MachineOpCode opCode = S.getChoice(i)->getOpcode();
00921       for (unsigned int s=startSlot; s < nslotsToUse; s++)
00922         if (S.schedInfo.instrCanUseSlot(opCode, s))
00923           S.addChoiceToSlot(s, S.getChoice(i));
00924     }
00925   } // endif (no delay slots and no breaking slots)
00926 
00927   return S.getNumChoices();
00928 }
00929 
00930 
00931 static unsigned
00932 ChooseOneGroup(SchedulingManager& S)
00933 {
00934   assert(S.schedPrio.getNumReady() > 0
00935          && "Don't get here without ready instructions.");
00936 
00937   CycleCount_t firstCycle = S.getTime();
00938   DelaySlotInfo* getDelaySlotInfo = NULL;
00939 
00940   // Choose up to `nslots' feasible instructions and their possible slots.
00941   unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
00942 
00943   while (numIssued == 0) {
00944     S.updateTime(S.getTime()+1);
00945     numIssued = FindSlotChoices(S, getDelaySlotInfo);
00946   }
00947 
00948   AssignInstructionsToSlots(S, numIssued);
00949 
00950   if (getDelaySlotInfo != NULL)
00951     numIssued += getDelaySlotInfo->scheduleDelayedNode(S);
00952 
00953   // Print trace of scheduled instructions before newly ready ones
00954   if (SchedDebugLevel >= Sched_PrintSchedTrace) {
00955     for (CycleCount_t c = firstCycle; c <= S.getTime(); c++) {
00956       std::cerr << "    Cycle " << (long)c <<" : Scheduled instructions:\n";
00957       const InstrGroup* igroup = S.isched.getIGroup(c);
00958       for (unsigned int s=0; s < S.nslots; s++) {
00959         std::cerr << "        ";
00960         if ((*igroup)[s] != NULL)
00961           std::cerr << * ((*igroup)[s])->getMachineInstr() << "\n";
00962         else
00963           std::cerr << "<none>\n";
00964       }
00965     }
00966   }
00967 
00968   return numIssued;
00969 }
00970 
00971 
00972 static void
00973 ForwardListSchedule(SchedulingManager& S)
00974 {
00975   unsigned N;
00976   const SchedGraphNode* node;
00977 
00978   S.schedPrio.initialize();
00979 
00980   while ((N = S.schedPrio.getNumReady()) > 0) {
00981     CycleCount_t nextCycle = S.getTime();
00982 
00983     // Choose one group of instructions for a cycle, plus any delay slot
00984     // instructions (which may overflow into successive cycles).
00985     // This will advance S.getTime() to the last cycle in which
00986     // instructions are actually issued.
00987     //
00988     unsigned numIssued = ChooseOneGroup(S);
00989     assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
00990 
00991     // Notify the priority manager of scheduled instructions and mark
00992     // any successors that may now be ready
00993     //
00994     for (CycleCount_t c = nextCycle; c <= S.getTime(); c++) {
00995       const InstrGroup* igroup = S.isched.getIGroup(c);
00996       for (unsigned int s=0; s < S.nslots; s++)
00997         if ((node = (*igroup)[s]) != NULL) {
00998           S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
00999           MarkSuccessorsReady(S, node);
01000         }
01001     }
01002 
01003     // Move to the next the next earliest cycle for which
01004     // an instruction can be issued, or the next earliest in which
01005     // one will be ready, or to the next cycle, whichever is latest.
01006     //
01007     S.updateTime(std::max(S.getTime() + 1,
01008                           std::max(S.getEarliestIssueTime(),
01009                                    S.schedPrio.getEarliestReadyTime())));
01010   }
01011 }
01012 
01013 
01014 //---------------------------------------------------------------------
01015 // Code for filling delay slots for delayed terminator instructions
01016 // (e.g., BRANCH and RETURN).  Delay slots for non-terminator
01017 // instructions (e.g., CALL) are not handled here because they almost
01018 // always can be filled with instructions from the call sequence code
01019 // before a call.  That's preferable because we incur many tradeoffs here
01020 // when we cannot find single-cycle instructions that can be reordered.
01021 //----------------------------------------------------------------------
01022 
01023 static bool
01024 NodeCanFillDelaySlot(const SchedulingManager& S,
01025                      const SchedGraphNode* node,
01026                      const SchedGraphNode* brNode,
01027                      bool nodeIsPredecessor)
01028 {
01029   assert(! node->isDummyNode());
01030 
01031   // don't put a branch in the delay slot of another branch
01032   if (S.getInstrInfo().isBranch(node->getOpcode()))
01033     return false;
01034 
01035   // don't put a single-issue instruction in the delay slot of a branch
01036   if (S.schedInfo.isSingleIssue(node->getOpcode()))
01037     return false;
01038 
01039   // don't put a load-use dependence in the delay slot of a branch
01040   const TargetInstrInfo& mii = S.getInstrInfo();
01041 
01042   for (SchedGraphNode::const_iterator EI = node->beginInEdges();
01043        EI != node->endInEdges(); ++EI)
01044     if (! ((SchedGraphNode*)(*EI)->getSrc())->isDummyNode()
01045         && mii.isLoad(((SchedGraphNode*)(*EI)->getSrc())->getOpcode())
01046         && (*EI)->getDepType() == SchedGraphEdge::CtrlDep)
01047       return false;
01048 
01049   // Finally, if the instruction precedes the branch, we make sure the
01050   // instruction can be reordered relative to the branch.  We simply check
01051   // if the instr. has only 1 outgoing edge, viz., a CD edge to the branch.
01052   //
01053   if (nodeIsPredecessor) {
01054     bool onlyCDEdgeToBranch = true;
01055     for (SchedGraphNode::const_iterator OEI = node->beginOutEdges();
01056          OEI != node->endOutEdges(); ++OEI)
01057       if (! ((SchedGraphNode*)(*OEI)->getSink())->isDummyNode()
01058           && ((*OEI)->getSink() != brNode
01059               || (*OEI)->getDepType() != SchedGraphEdge::CtrlDep))
01060       {
01061         onlyCDEdgeToBranch = false;
01062         break;
01063       }
01064 
01065     if (!onlyCDEdgeToBranch)
01066       return false;
01067   }
01068 
01069   return true;
01070 }
01071 
01072 
01073 static void
01074 MarkNodeForDelaySlot(SchedulingManager& S,
01075                      SchedGraph* graph,
01076                      SchedGraphNode* node,
01077                      const SchedGraphNode* brNode,
01078                      bool nodeIsPredecessor)
01079 {
01080   if (nodeIsPredecessor) {
01081     // If node is in the same basic block (i.e., precedes brNode),
01082     // remove it and all its incident edges from the graph.  Make sure we
01083     // add dummy edges for pred/succ nodes that become entry/exit nodes.
01084     graph->eraseIncidentEdges(node, /*addDummyEdges*/ true);
01085   } else {
01086     // If the node was from a target block, add the node to the graph
01087     // and add a CD edge from brNode to node.
01088     assert(0 && "NOT IMPLEMENTED YET");
01089   }
01090 
01091   DelaySlotInfo* dinfo = S.getDelaySlotInfoForInstr(brNode, /*create*/ true);
01092   dinfo->addDelayNode(node);
01093 }
01094 
01095 
01096 void
01097 FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
01098                                     SchedGraphNode* brNode,
01099                                     std::vector<SchedGraphNode*>& sdelayNodeVec)
01100 {
01101   const TargetInstrInfo& mii = S.getInstrInfo();
01102   unsigned ndelays =
01103     mii.getNumDelaySlots(brNode->getOpcode());
01104 
01105   if (ndelays == 0)
01106     return;
01107 
01108   sdelayNodeVec.reserve(ndelays);
01109 
01110   // Use a separate vector to hold the feasible multi-cycle nodes.
01111   // These will be used if not enough single-cycle nodes are found.
01112   //
01113   std::vector<SchedGraphNode*> mdelayNodeVec;
01114 
01115   for (sg_pred_iterator P = pred_begin(brNode);
01116        P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
01117     if (! (*P)->isDummyNode() &&
01118         ! mii.isNop((*P)->getOpcode()) &&
01119         NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
01120     {
01121       if (mii.maxLatency((*P)->getOpcode()) > 1)
01122         mdelayNodeVec.push_back(*P);
01123       else
01124         sdelayNodeVec.push_back(*P);
01125     }
01126 
01127   // If not enough single-cycle instructions were found, select the
01128   // lowest-latency multi-cycle instructions and use them.
01129   // Note that this is the most efficient code when only 1 (or even 2)
01130   // values need to be selected.
01131   //
01132   while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) {
01133     unsigned lmin =
01134       mii.maxLatency(mdelayNodeVec[0]->getOpcode());
01135     unsigned minIndex   = 0;
01136     for (unsigned i=1; i < mdelayNodeVec.size(); i++)
01137     {
01138       unsigned li =
01139         mii.maxLatency(mdelayNodeVec[i]->getOpcode());
01140       if (lmin >= li)
01141       {
01142         lmin = li;
01143         minIndex = i;
01144       }
01145     }
01146     sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
01147     if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
01148       mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
01149   }
01150 }
01151 
01152 
01153 // Remove the NOPs currently in delay slots from the graph.
01154 // Mark instructions specified in sdelayNodeVec to replace them.
01155 // If not enough useful instructions were found, mark the NOPs to be used
01156 // for filling delay slots, otherwise, otherwise just discard them.
01157 //
01158 static void ReplaceNopsWithUsefulInstr(SchedulingManager& S,
01159                                        SchedGraphNode* node,
01160                                        // FIXME: passing vector BY VALUE!!!
01161                                      std::vector<SchedGraphNode*> sdelayNodeVec,
01162                                        SchedGraph* graph)
01163 {
01164   std::vector<SchedGraphNode*> nopNodeVec;   // this will hold unused NOPs
01165   const TargetInstrInfo& mii = S.getInstrInfo();
01166   const MachineInstr* brInstr = node->getMachineInstr();
01167   unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpcode());
01168   assert(ndelays > 0 && "Unnecessary call to replace NOPs");
01169 
01170   // Remove the NOPs currently in delay slots from the graph.
01171   // If not enough useful instructions were found, use the NOPs to
01172   // fill delay slots, otherwise, just discard them.
01173   //
01174   unsigned int firstDelaySlotIdx = node->getOrigIndexInBB() + 1;
01175   MachineBasicBlock& MBB = node->getMachineBasicBlock();
01176   MachineBasicBlock::iterator MBBI = MBB.begin();
01177   std::advance(MBBI, firstDelaySlotIdx - 1);
01178   if (!(&*MBBI++ == brInstr)) {
01179     std::cerr << "Incorrect instr. index in basic block for brInstr";
01180     abort();
01181   }
01182 
01183   // First find all useful instructions already in the delay slots
01184   // and USE THEM.  We'll throw away the unused alternatives below
01185   //
01186   MachineBasicBlock::iterator Tmp = MBBI;
01187   for (unsigned i = 0; i != ndelays; ++i, ++MBBI)
01188     if (!mii.isNop(MBBI->getOpcode()))
01189       sdelayNodeVec.insert(sdelayNodeVec.begin(),
01190                            graph->getGraphNodeForInstr(MBBI));
01191   MBBI = Tmp;
01192 
01193   // Then find the NOPs and keep only as many as are needed.
01194   // Put the rest in nopNodeVec to be deleted.
01195   for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx+ndelays; ++i, ++MBBI)
01196     if (mii.isNop(MBBI->getOpcode()))
01197       if (sdelayNodeVec.size() < ndelays)
01198         sdelayNodeVec.push_back(graph->getGraphNodeForInstr(MBBI));
01199       else {
01200         nopNodeVec.push_back(graph->getGraphNodeForInstr(MBBI));
01201 
01202         //remove the MI from the Machine Code For Instruction
01203         const TerminatorInst *TI = MBB.getBasicBlock()->getTerminator();
01204         MachineCodeForInstruction& llvmMvec =
01205           MachineCodeForInstruction::get((const Instruction *)TI);
01206 
01207         for(MachineCodeForInstruction::iterator mciI=llvmMvec.begin(),
01208               mciE=llvmMvec.end(); mciI!=mciE; ++mciI){
01209           if (*mciI == MBBI)
01210             llvmMvec.erase(mciI);
01211         }
01212       }
01213 
01214   assert(sdelayNodeVec.size() >= ndelays);
01215 
01216   // If some delay slots were already filled, throw away that many new choices
01217   if (sdelayNodeVec.size() > ndelays)
01218     sdelayNodeVec.resize(ndelays);
01219 
01220   // Mark the nodes chosen for delay slots.  This removes them from the graph.
01221   for (unsigned i=0; i < sdelayNodeVec.size(); i++)
01222     MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true);
01223 
01224   // And remove the unused NOPs from the graph.
01225   for (unsigned i=0; i < nopNodeVec.size(); i++)
01226     graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
01227 }
01228 
01229 
01230 // For all delayed instructions, choose instructions to put in the delay
01231 // slots and pull those out of the graph.  Mark them for the delay slots
01232 // in the DelaySlotInfo object for that graph node.  If no useful work
01233 // is found for a delay slot, use the NOP that is currently in that slot.
01234 //
01235 // We try to fill the delay slots with useful work for all instructions
01236 // EXCEPT CALLS AND RETURNS.
01237 // For CALLs and RETURNs, it is nearly always possible to use one of the
01238 // call sequence instrs and putting anything else in the delay slot could be
01239 // suboptimal.  Also, it complicates generating the calling sequence code in
01240 // regalloc.
01241 //
01242 static void
01243 ChooseInstructionsForDelaySlots(SchedulingManager& S, MachineBasicBlock &MBB,
01244                                 SchedGraph *graph)
01245 {
01246   const TargetInstrInfo& mii = S.getInstrInfo();
01247 
01248   Instruction *termInstr = (Instruction*)MBB.getBasicBlock()->getTerminator();
01249   MachineCodeForInstruction &termMvec=MachineCodeForInstruction::get(termInstr);
01250   std::vector<SchedGraphNode*> delayNodeVec;
01251   const MachineInstr* brInstr = NULL;
01252 
01253   if (EnableFillingDelaySlots &&
01254       termInstr->getOpcode() != Instruction::Ret)
01255   {
01256     // To find instructions that need delay slots without searching the full
01257     // machine code, we assume that the only delayed instructions are CALLs
01258     // or instructions generated for the terminator inst.
01259     // Find the first branch instr in the sequence of machine instrs for term
01260     //
01261     unsigned first = 0;
01262     while (first < termMvec.size() &&
01263            ! mii.isBranch(termMvec[first]->getOpcode()))
01264     {
01265       ++first;
01266     }
01267     assert(first < termMvec.size() &&
01268            "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
01269 
01270     brInstr = (first < termMvec.size())? termMvec[first] : NULL;
01271 
01272     // Compute a vector of the nodes chosen for delay slots and then
01273     // mark delay slots to replace NOPs with these useful instructions.
01274     //
01275     if (brInstr != NULL) {
01276       SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
01277       FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
01278       ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
01279     }
01280   }
01281 
01282   // Also mark delay slots for other delayed instructions to hold NOPs.
01283   // Simply passing in an empty delayNodeVec will have this effect.
01284   // If brInstr is not handled above (EnableFillingDelaySlots == false),
01285   // brInstr will be NULL so this will handle the branch instrs. as well.
01286   //
01287   delayNodeVec.clear();
01288   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I)
01289     if (I != brInstr && mii.getNumDelaySlots(I->getOpcode()) > 0) {
01290       SchedGraphNode* node = graph->getGraphNodeForInstr(I);
01291       ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
01292     }
01293 }
01294 
01295 
01296 //
01297 // Schedule the delayed branch and its delay slots
01298 //
01299 unsigned
01300 DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
01301 {
01302   assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch");
01303   assert(S.isched.getInstr(delayedNodeSlotNum, delayedNodeCycle) == NULL
01304          && "Slot for branch should be empty");
01305 
01306   unsigned int nextSlot = delayedNodeSlotNum;
01307   CycleCount_t nextTime = delayedNodeCycle;
01308 
01309   S.scheduleInstr(brNode, nextSlot, nextTime);
01310 
01311   for (unsigned d=0; d < ndelays; d++) {
01312     ++nextSlot;
01313     if (nextSlot == S.nslots) {
01314       nextSlot = 0;
01315       nextTime++;
01316     }
01317 
01318     // Find the first feasible instruction for this delay slot
01319     // Note that we only check for issue restrictions here.
01320     // We do *not* check for flow dependences but rely on pipeline
01321     // interlocks to resolve them.  Machines without interlocks
01322     // will require this code to be modified.
01323     for (unsigned i=0; i < delayNodeVec.size(); i++) {
01324       const SchedGraphNode* dnode = delayNodeVec[i];
01325       if ( ! S.isScheduled(dnode)
01326            && S.schedInfo.instrCanUseSlot(dnode->getOpcode(), nextSlot)
01327            && instrIsFeasible(S, dnode->getOpcode())) {
01328         S.scheduleInstr(dnode, nextSlot, nextTime);
01329         break;
01330       }
01331     }
01332   }
01333 
01334   // Update current time if delay slots overflowed into later cycles.
01335   // Do this here because we know exactly which cycle is the last cycle
01336   // that contains delay slots.  The next loop doesn't compute that.
01337   if (nextTime > S.getTime())
01338     S.updateTime(nextTime);
01339 
01340   // Now put any remaining instructions in the unfilled delay slots.
01341   // This could lead to suboptimal performance but needed for correctness.
01342   nextSlot = delayedNodeSlotNum;
01343   nextTime = delayedNodeCycle;
01344   for (unsigned i=0; i < delayNodeVec.size(); i++)
01345     if (! S.isScheduled(delayNodeVec[i])) {
01346       do { // find the next empty slot
01347         ++nextSlot;
01348         if (nextSlot == S.nslots) {
01349           nextSlot = 0;
01350           nextTime++;
01351         }
01352       } while (S.isched.getInstr(nextSlot, nextTime) != NULL);
01353 
01354       S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime);
01355       break;
01356     }
01357 
01358   return 1 + ndelays;
01359 }
01360 
01361 
01362 // Check if the instruction would conflict with instructions already
01363 // chosen for the current cycle
01364 //
01365 static inline bool
01366 ConflictsWithChoices(const SchedulingManager& S,
01367                      MachineOpCode opCode)
01368 {
01369   // Check if the instruction must issue by itself, and some feasible
01370   // choices have already been made for this cycle
01371   if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
01372     return true;
01373 
01374   // For each class that opCode belongs to, check if there are too many
01375   // instructions of that class.
01376   //
01377   const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
01378   return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
01379 }
01380 
01381 
01382 //************************* External Functions *****************************/
01383 
01384 
01385 //---------------------------------------------------------------------------
01386 // Function: ViolatesMinimumGap
01387 //
01388 // Purpose:
01389 //   Check minimum gap requirements relative to instructions scheduled in
01390 //   previous cycles.
01391 //   Note that we do not need to consider `nextEarliestIssueTime' here because
01392 //   that is also captured in the earliest start times for each opcode.
01393 //---------------------------------------------------------------------------
01394 
01395 static inline bool
01396 ViolatesMinimumGap(const SchedulingManager& S,
01397                    MachineOpCode opCode,
01398                    const CycleCount_t inCycle)
01399 {
01400   return (inCycle < S.getEarliestStartTimeForOp(opCode));
01401 }
01402 
01403 
01404 //---------------------------------------------------------------------------
01405 // Function: instrIsFeasible
01406 //
01407 // Purpose:
01408 //   Check if any issue restrictions would prevent the instruction from
01409 //   being issued in the current cycle
01410 //---------------------------------------------------------------------------
01411 
01412 bool
01413 instrIsFeasible(const SchedulingManager& S,
01414                 MachineOpCode opCode)
01415 {
01416   // skip the instruction if it cannot be issued due to issue restrictions
01417   // caused by previously issued instructions
01418   if (ViolatesMinimumGap(S, opCode, S.getTime()))
01419     return false;
01420 
01421   // skip the instruction if it cannot be issued due to issue restrictions
01422   // caused by previously chosen instructions for the current cycle
01423   if (ConflictsWithChoices(S, opCode))
01424     return false;
01425 
01426   return true;
01427 }
01428 
01429 //---------------------------------------------------------------------------
01430 // Function: ScheduleInstructionsWithSSA
01431 //
01432 // Purpose:
01433 //   Entry point for instruction scheduling on SSA form.
01434 //   Schedules the machine instructions generated by instruction selection.
01435 //   Assumes that register allocation has not been done, i.e., operands
01436 //   are still in SSA form.
01437 //---------------------------------------------------------------------------
01438 
01439 namespace {
01440   class InstructionSchedulingWithSSA : public FunctionPass {
01441     const TargetMachine &target;
01442   public:
01443     inline InstructionSchedulingWithSSA(const TargetMachine &T) : target(T) {}
01444 
01445     const char *getPassName() const { return "Instruction Scheduling"; }
01446 
01447     // getAnalysisUsage - We use LiveVarInfo...
01448     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
01449       AU.addRequired<FunctionLiveVarInfo>();
01450       AU.setPreservesCFG();
01451     }
01452 
01453     bool runOnFunction(Function &F);
01454   };
01455 } // end anonymous namespace
01456 
01457 
01458 bool InstructionSchedulingWithSSA::runOnFunction(Function &F)
01459 {
01460   SchedGraphSet graphSet(&F, target);
01461 
01462   if (SchedDebugLevel >= Sched_PrintSchedGraphs) {
01463       std::cerr << "\n*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING\n";
01464       graphSet.dump();
01465     }
01466 
01467   for (SchedGraphSet::const_iterator GI=graphSet.begin(), GE=graphSet.end();
01468        GI != GE; ++GI)
01469   {
01470     SchedGraph* graph = (*GI);
01471     MachineBasicBlock &MBB = graph->getBasicBlock();
01472 
01473     if (SchedDebugLevel >= Sched_PrintSchedTrace)
01474       std::cerr << "\n*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
01475 
01476     // expensive!
01477     SchedPriorities schedPrio(&F, graph, getAnalysis<FunctionLiveVarInfo>());
01478     SchedulingManager S(target, graph, schedPrio);
01479 
01480     ChooseInstructionsForDelaySlots(S, MBB, graph); // modifies graph
01481     ForwardListSchedule(S);               // computes schedule in S
01482     RecordSchedule(MBB, S);                // records schedule in BB
01483   }
01484 
01485   if (SchedDebugLevel >= Sched_PrintMachineCode) {
01486     std::cerr << "\n*** Machine instructions after INSTRUCTION SCHEDULING\n";
01487     MachineFunction::get(&F).dump();
01488   }
01489 
01490   return false;
01491 }
01492 
01493 
01494 FunctionPass *createInstructionSchedulingWithSSAPass(const TargetMachine &tgt) {
01495   return new InstructionSchedulingWithSSA(tgt);
01496 }
01497 
01498 } // End llvm namespace
01499