LLVM API Documentation
00001 //===- InstrScheduling.cpp - Generic Instruction Scheduling support -------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the llvm/CodeGen/InstrScheduling.h interface, along with 00011 // generic support routines for instruction scheduling. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "SchedPriorities.h" 00016 #include "llvm/BasicBlock.h" 00017 #include "llvm/CodeGen/MachineInstr.h" 00018 #include "llvm/CodeGen/MachineFunction.h" 00019 #include "llvm/Target/TargetMachine.h" 00020 #include "../MachineCodeForInstruction.h" 00021 #include "../LiveVar/FunctionLiveVarInfo.h" 00022 #include "../SparcV9InstrInfo.h" 00023 #include "llvm/Support/CommandLine.h" 00024 #include <algorithm> 00025 #include <iostream> 00026 00027 namespace llvm { 00028 00029 SchedDebugLevel_t SchedDebugLevel; 00030 00031 static cl::opt<bool> EnableFillingDelaySlots("sched-fill-delay-slots", 00032 cl::desc("Fill branch delay slots during local scheduling")); 00033 00034 static cl::opt<SchedDebugLevel_t, true> 00035 SDL_opt("dsched", cl::Hidden, cl::location(SchedDebugLevel), 00036 cl::desc("enable instruction scheduling debugging information"), 00037 cl::values( 00038 clEnumValN(Sched_NoDebugInfo, "n", "disable debug output"), 00039 clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"), 00040 clEnumValN(Sched_PrintSchedTrace, "t", "print trace of scheduling actions"), 00041 clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 00042 clEnumValEnd)); 00043 00044 00045 //************************* Internal Data Types *****************************/ 00046 00047 class InstrSchedule; 00048 class SchedulingManager; 00049 00050 00051 //---------------------------------------------------------------------- 00052 // class InstrGroup: 00053 // 00054 // Represents a group of instructions scheduled to be issued 00055 // in a single cycle. 00056 //---------------------------------------------------------------------- 00057 00058 class InstrGroup { 00059 InstrGroup(const InstrGroup&); // DO NOT IMPLEMENT 00060 void operator=(const InstrGroup&); // DO NOT IMPLEMENT 00061 00062 public: 00063 inline const SchedGraphNode* operator[](unsigned int slotNum) const { 00064 assert(slotNum < group.size()); 00065 return group[slotNum]; 00066 } 00067 00068 private: 00069 friend class InstrSchedule; 00070 00071 inline void addInstr(const SchedGraphNode* node, unsigned int slotNum) { 00072 assert(slotNum < group.size()); 00073 group[slotNum] = node; 00074 } 00075 00076 /*ctor*/ InstrGroup(unsigned int nslots) 00077 : group(nslots, NULL) {} 00078 00079 /*ctor*/ InstrGroup(); // disable: DO NOT IMPLEMENT 00080 00081 private: 00082 std::vector<const SchedGraphNode*> group; 00083 }; 00084 00085 00086 //---------------------------------------------------------------------- 00087 // class ScheduleIterator: 00088 // 00089 // Iterates over the machine instructions in the for a single basic block. 00090 // The schedule is represented by an InstrSchedule object. 00091 //---------------------------------------------------------------------- 00092 00093 template<class _NodeType> 00094 class ScheduleIterator : public forward_iterator<_NodeType, ptrdiff_t> { 00095 private: 00096 unsigned cycleNum; 00097 unsigned slotNum; 00098 const InstrSchedule& S; 00099 public: 00100 typedef ScheduleIterator<_NodeType> _Self; 00101 00102 /*ctor*/ inline ScheduleIterator(const InstrSchedule& _schedule, 00103 unsigned _cycleNum, 00104 unsigned _slotNum) 00105 : cycleNum(_cycleNum), slotNum(_slotNum), S(_schedule) { 00106 skipToNextInstr(); 00107 } 00108 00109 /*ctor*/ inline ScheduleIterator(const _Self& x) 00110 : cycleNum(x.cycleNum), slotNum(x.slotNum), S(x.S) {} 00111 00112 inline bool operator==(const _Self& x) const { 00113 return (slotNum == x.slotNum && cycleNum== x.cycleNum && &S==&x.S); 00114 } 00115 00116 inline bool operator!=(const _Self& x) const { return !operator==(x); } 00117 00118 inline _NodeType* operator*() const; 00119 inline _NodeType* operator->() const { return operator*(); } 00120 00121 _Self& operator++(); // Preincrement 00122 inline _Self operator++(int) { // Postincrement 00123 _Self tmp(*this); ++*this; return tmp; 00124 } 00125 00126 static _Self begin(const InstrSchedule& _schedule); 00127 static _Self end( const InstrSchedule& _schedule); 00128 00129 private: 00130 inline _Self& operator=(const _Self& x); // DISABLE -- DO NOT IMPLEMENT 00131 void skipToNextInstr(); 00132 }; 00133 00134 00135 //---------------------------------------------------------------------- 00136 // class InstrSchedule: 00137 // 00138 // Represents the schedule of machine instructions for a single basic block. 00139 //---------------------------------------------------------------------- 00140 00141 class InstrSchedule { 00142 const unsigned int nslots; 00143 unsigned int numInstr; 00144 std::vector<InstrGroup*> groups; // indexed by cycle number 00145 std::vector<cycles_t> startTime; // indexed by node id 00146 00147 InstrSchedule(InstrSchedule&); // DO NOT IMPLEMENT 00148 void operator=(InstrSchedule&); // DO NOT IMPLEMENT 00149 00150 public: // iterators 00151 typedef ScheduleIterator<SchedGraphNode> iterator; 00152 typedef ScheduleIterator<const SchedGraphNode> const_iterator; 00153 00154 iterator begin() { return iterator::begin(*this); } 00155 const_iterator begin() const { return const_iterator::begin(*this); } 00156 iterator end() { return iterator::end(*this); } 00157 const_iterator end() const { return const_iterator::end(*this); } 00158 00159 public: // constructors and destructor 00160 /*ctor*/ InstrSchedule (unsigned int _nslots, 00161 unsigned int _numNodes); 00162 /*dtor*/ ~InstrSchedule (); 00163 00164 public: // accessor functions to query chosen schedule 00165 const SchedGraphNode* getInstr (unsigned int slotNum, 00166 cycles_t c) const { 00167 const InstrGroup* igroup = this->getIGroup(c); 00168 return (igroup == NULL)? NULL : (*igroup)[slotNum]; 00169 } 00170 00171 inline InstrGroup* getIGroup (cycles_t c) { 00172 if ((unsigned)c >= groups.size()) 00173 groups.resize(c+1); 00174 if (groups[c] == NULL) 00175 groups[c] = new InstrGroup(nslots); 00176 return groups[c]; 00177 } 00178 00179 inline const InstrGroup* getIGroup (cycles_t c) const { 00180 assert((unsigned)c < groups.size()); 00181 return groups[c]; 00182 } 00183 00184 inline cycles_t getStartTime (unsigned int nodeId) const { 00185 assert(nodeId < startTime.size()); 00186 return startTime[nodeId]; 00187 } 00188 00189 unsigned int getNumInstructions() const { 00190 return numInstr; 00191 } 00192 00193 inline void scheduleInstr (const SchedGraphNode* node, 00194 unsigned int slotNum, 00195 cycles_t cycle) { 00196 InstrGroup* igroup = this->getIGroup(cycle); 00197 if (!((*igroup)[slotNum] == NULL)) { 00198 std::cerr << "Slot already filled?\n"; 00199 abort(); 00200 } 00201 igroup->addInstr(node, slotNum); 00202 assert(node->getNodeId() < startTime.size()); 00203 startTime[node->getNodeId()] = cycle; 00204 ++numInstr; 00205 } 00206 00207 private: 00208 friend class ScheduleIterator<SchedGraphNode>; 00209 friend class ScheduleIterator<const SchedGraphNode>; 00210 /*ctor*/ InstrSchedule (); // Disable: DO NOT IMPLEMENT. 00211 }; 00212 00213 template<class NodeType> 00214 inline NodeType *ScheduleIterator<NodeType>::operator*() const { 00215 assert(cycleNum < S.groups.size()); 00216 return (*S.groups[cycleNum])[slotNum]; 00217 } 00218 00219 00220 /*ctor*/ 00221 InstrSchedule::InstrSchedule(unsigned int _nslots, unsigned int _numNodes) 00222 : nslots(_nslots), 00223 numInstr(0), 00224 groups(2 * _numNodes / _nslots), // 2 x lower-bound for #cycles 00225 startTime(_numNodes, (cycles_t) -1) // set all to -1 00226 { 00227 } 00228 00229 00230 /*dtor*/ 00231 InstrSchedule::~InstrSchedule() 00232 { 00233 for (unsigned c=0, NC=groups.size(); c < NC; c++) 00234 if (groups[c] != NULL) 00235 delete groups[c]; // delete InstrGroup objects 00236 } 00237 00238 00239 template<class _NodeType> 00240 inline 00241 void 00242 ScheduleIterator<_NodeType>::skipToNextInstr() 00243 { 00244 while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL) 00245 ++cycleNum; // skip cycles with no instructions 00246 00247 while (cycleNum < S.groups.size() && 00248 (*S.groups[cycleNum])[slotNum] == NULL) 00249 { 00250 ++slotNum; 00251 if (slotNum == S.nslots) { 00252 ++cycleNum; 00253 slotNum = 0; 00254 while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL) 00255 ++cycleNum; // skip cycles with no instructions 00256 } 00257 } 00258 } 00259 00260 template<class _NodeType> 00261 inline 00262 ScheduleIterator<_NodeType>& 00263 ScheduleIterator<_NodeType>::operator++() // Preincrement 00264 { 00265 ++slotNum; 00266 if (slotNum == S.nslots) { 00267 ++cycleNum; 00268 slotNum = 0; 00269 } 00270 skipToNextInstr(); 00271 return *this; 00272 } 00273 00274 template<class _NodeType> 00275 ScheduleIterator<_NodeType> 00276 ScheduleIterator<_NodeType>::begin(const InstrSchedule& _schedule) 00277 { 00278 return _Self(_schedule, 0, 0); 00279 } 00280 00281 template<class _NodeType> 00282 ScheduleIterator<_NodeType> 00283 ScheduleIterator<_NodeType>::end(const InstrSchedule& _schedule) 00284 { 00285 return _Self(_schedule, _schedule.groups.size(), 0); 00286 } 00287 00288 00289 //---------------------------------------------------------------------- 00290 // class DelaySlotInfo: 00291 // 00292 // Record information about delay slots for a single branch instruction. 00293 // Delay slots are simply indexed by slot number 1 ... numDelaySlots 00294 //---------------------------------------------------------------------- 00295 00296 class DelaySlotInfo { 00297 const SchedGraphNode* brNode; 00298 unsigned ndelays; 00299 std::vector<const SchedGraphNode*> delayNodeVec; 00300 cycles_t delayedNodeCycle; 00301 unsigned delayedNodeSlotNum; 00302 00303 DelaySlotInfo(const DelaySlotInfo &); // DO NOT IMPLEMENT 00304 void operator=(const DelaySlotInfo&); // DO NOT IMPLEMENT 00305 public: 00306 /*ctor*/ DelaySlotInfo (const SchedGraphNode* _brNode, 00307 unsigned _ndelays) 00308 : brNode(_brNode), ndelays(_ndelays), 00309 delayedNodeCycle(0), delayedNodeSlotNum(0) {} 00310 00311 inline unsigned getNumDelays () { 00312 return ndelays; 00313 } 00314 00315 inline const std::vector<const SchedGraphNode*>& getDelayNodeVec() { 00316 return delayNodeVec; 00317 } 00318 00319 inline void addDelayNode (const SchedGraphNode* node) { 00320 delayNodeVec.push_back(node); 00321 assert(delayNodeVec.size() <= ndelays && "Too many delay slot instrs!"); 00322 } 00323 00324 inline void recordChosenSlot (cycles_t cycle, unsigned slotNum) { 00325 delayedNodeCycle = cycle; 00326 delayedNodeSlotNum = slotNum; 00327 } 00328 00329 unsigned scheduleDelayedNode (SchedulingManager& S); 00330 }; 00331 00332 00333 //---------------------------------------------------------------------- 00334 // class SchedulingManager: 00335 // 00336 // Represents the schedule of machine instructions for a single basic block. 00337 //---------------------------------------------------------------------- 00338 00339 class SchedulingManager { 00340 SchedulingManager(SchedulingManager &); // DO NOT IMPLEMENT 00341 void operator=(const SchedulingManager &); // DO NOT IMPLEMENT 00342 public: // publicly accessible data members 00343 const unsigned nslots; 00344 const TargetSchedInfo& schedInfo; 00345 SchedPriorities& schedPrio; 00346 InstrSchedule isched; 00347 00348 private: 00349 unsigned totalInstrCount; 00350 cycles_t curTime; 00351 cycles_t nextEarliestIssueTime; // next cycle we can issue 00352 // indexed by slot# 00353 std::vector<hash_set<const SchedGraphNode*> > choicesForSlot; 00354 std::vector<const SchedGraphNode*> choiceVec; // indexed by node ptr 00355 std::vector<int> numInClass; // indexed by sched class 00356 std::vector<cycles_t> nextEarliestStartTime; // indexed by opCode 00357 hash_map<const SchedGraphNode*, DelaySlotInfo*> delaySlotInfoForBranches; 00358 // indexed by branch node ptr 00359 00360 public: 00361 SchedulingManager(const TargetMachine& _target, const SchedGraph* graph, 00362 SchedPriorities& schedPrio); 00363 ~SchedulingManager() { 00364 for (hash_map<const SchedGraphNode*, 00365 DelaySlotInfo*>::iterator I = delaySlotInfoForBranches.begin(), 00366 E = delaySlotInfoForBranches.end(); I != E; ++I) 00367 delete I->second; 00368 } 00369 00370 //---------------------------------------------------------------------- 00371 // Simplify access to the machine instruction info 00372 //---------------------------------------------------------------------- 00373 00374 inline const TargetInstrInfo& getInstrInfo () const { 00375 return schedInfo.getInstrInfo(); 00376 } 00377 00378 //---------------------------------------------------------------------- 00379 // Interface for checking and updating the current time 00380 //---------------------------------------------------------------------- 00381 00382 inline cycles_t getTime () const { 00383 return curTime; 00384 } 00385 00386 inline cycles_t getEarliestIssueTime() const { 00387 return nextEarliestIssueTime; 00388 } 00389 00390 inline cycles_t getEarliestStartTimeForOp(MachineOpCode opCode) const { 00391 assert(opCode < (int) nextEarliestStartTime.size()); 00392 return nextEarliestStartTime[opCode]; 00393 } 00394 00395 // Update current time to specified cycle 00396 inline void updateTime (cycles_t c) { 00397 curTime = c; 00398 schedPrio.updateTime(c); 00399 } 00400 00401 //---------------------------------------------------------------------- 00402 // Functions to manage the choices for the current cycle including: 00403 // -- a vector of choices by priority (choiceVec) 00404 // -- vectors of the choices for each instruction slot (choicesForSlot[]) 00405 // -- number of choices in each sched class, used to check issue conflicts 00406 // between choices for a single cycle 00407 //---------------------------------------------------------------------- 00408 00409 inline unsigned int getNumChoices () const { 00410 return choiceVec.size(); 00411 } 00412 00413 inline unsigned getNumChoicesInClass (const InstrSchedClass& sc) const { 00414 assert(sc < numInClass.size() && "Invalid op code or sched class!"); 00415 return numInClass[sc]; 00416 } 00417 00418 inline const SchedGraphNode* getChoice(unsigned int i) const { 00419 // assert(i < choiceVec.size()); don't check here. 00420 return choiceVec[i]; 00421 } 00422 00423 inline hash_set<const SchedGraphNode*>& getChoicesForSlot(unsigned slotNum) { 00424 assert(slotNum < nslots); 00425 return choicesForSlot[slotNum]; 00426 } 00427 00428 inline void addChoice (const SchedGraphNode* node) { 00429 // Append the instruction to the vector of choices for current cycle. 00430 // Increment numInClass[c] for the sched class to which the instr belongs. 00431 choiceVec.push_back(node); 00432 const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode()); 00433 assert(sc < numInClass.size()); 00434 numInClass[sc]++; 00435 } 00436 00437 inline void addChoiceToSlot (unsigned int slotNum, 00438 const SchedGraphNode* node) { 00439 // Add the instruction to the choice set for the specified slot 00440 assert(slotNum < nslots); 00441 choicesForSlot[slotNum].insert(node); 00442 } 00443 00444 inline void resetChoices () { 00445 choiceVec.clear(); 00446 for (unsigned int s=0; s < nslots; s++) 00447 choicesForSlot[s].clear(); 00448 for (unsigned int c=0; c < numInClass.size(); c++) 00449 numInClass[c] = 0; 00450 } 00451 00452 //---------------------------------------------------------------------- 00453 // Code to query and manage the partial instruction schedule so far 00454 //---------------------------------------------------------------------- 00455 00456 inline unsigned int getNumScheduled () const { 00457 return isched.getNumInstructions(); 00458 } 00459 00460 inline unsigned int getNumUnscheduled() const { 00461 return totalInstrCount - isched.getNumInstructions(); 00462 } 00463 00464 inline bool isScheduled (const SchedGraphNode* node) const { 00465 return (isched.getStartTime(node->getNodeId()) >= 0); 00466 } 00467 00468 inline void scheduleInstr (const SchedGraphNode* node, 00469 unsigned int slotNum, 00470 cycles_t cycle) 00471 { 00472 assert(! isScheduled(node) && "Instruction already scheduled?"); 00473 00474 // add the instruction to the schedule 00475 isched.scheduleInstr(node, slotNum, cycle); 00476 00477 // update the earliest start times of all nodes that conflict with `node' 00478 // and the next-earliest time anything can issue if `node' causes bubbles 00479 updateEarliestStartTimes(node, cycle); 00480 00481 // remove the instruction from the choice sets for all slots 00482 for (unsigned s=0; s < nslots; s++) 00483 choicesForSlot[s].erase(node); 00484 00485 // and decrement the instr count for the sched class to which it belongs 00486 const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode()); 00487 assert(sc < numInClass.size()); 00488 numInClass[sc]--; 00489 } 00490 00491 //---------------------------------------------------------------------- 00492 // Create and retrieve delay slot info for delayed instructions 00493 //---------------------------------------------------------------------- 00494 00495 inline DelaySlotInfo* getDelaySlotInfoForInstr(const SchedGraphNode* bn, 00496 bool createIfMissing=false) 00497 { 00498 hash_map<const SchedGraphNode*, DelaySlotInfo*>::const_iterator 00499 I = delaySlotInfoForBranches.find(bn); 00500 if (I != delaySlotInfoForBranches.end()) 00501 return I->second; 00502 00503 if (!createIfMissing) return 0; 00504 00505 DelaySlotInfo *dinfo = 00506 new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpcode())); 00507 return delaySlotInfoForBranches[bn] = dinfo; 00508 } 00509 00510 private: 00511 SchedulingManager(); // DISABLED: DO NOT IMPLEMENT 00512 void updateEarliestStartTimes(const SchedGraphNode* node, cycles_t schedTime); 00513 }; 00514 00515 00516 /*ctor*/ 00517 SchedulingManager::SchedulingManager(const TargetMachine& target, 00518 const SchedGraph* graph, 00519 SchedPriorities& _schedPrio) 00520 : nslots(target.getSchedInfo()->getMaxNumIssueTotal()), 00521 schedInfo(*target.getSchedInfo()), 00522 schedPrio(_schedPrio), 00523 isched(nslots, graph->getNumNodes()), 00524 totalInstrCount(graph->getNumNodes() - 2), 00525 nextEarliestIssueTime(0), 00526 choicesForSlot(nslots), 00527 numInClass(target.getSchedInfo()->getNumSchedClasses(), 0), // set all to 0 00528 nextEarliestStartTime(target.getInstrInfo()->getNumOpcodes(), 00529 (cycles_t) 0) // set all to 0 00530 { 00531 updateTime(0); 00532 00533 // Note that an upper bound on #choices for each slot is = nslots since 00534 // we use this vector to hold a feasible set of instructions, and more 00535 // would be infeasible. Reserve that much memory since it is probably small. 00536 for (unsigned int i=0; i < nslots; i++) 00537 choicesForSlot[i].resize(nslots); 00538 } 00539 00540 00541 void 00542 SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node, 00543 cycles_t schedTime) 00544 { 00545 if (schedInfo.numBubblesAfter(node->getOpcode()) > 0) 00546 { // Update next earliest time before which *nothing* can issue. 00547 nextEarliestIssueTime = std::max(nextEarliestIssueTime, 00548 curTime + 1 + schedInfo.numBubblesAfter(node->getOpcode())); 00549 } 00550 00551 const std::vector<MachineOpCode>& 00552 conflictVec = schedInfo.getConflictList(node->getOpcode()); 00553 00554 for (unsigned i=0; i < conflictVec.size(); i++) 00555 { 00556 MachineOpCode toOp = conflictVec[i]; 00557 cycles_t est=schedTime + schedInfo.getMinIssueGap(node->getOpcode(),toOp); 00558 assert(toOp < (int) nextEarliestStartTime.size()); 00559 if (nextEarliestStartTime[toOp] < est) 00560 nextEarliestStartTime[toOp] = est; 00561 } 00562 } 00563 00564 //************************* Internal Functions *****************************/ 00565 00566 00567 static void 00568 AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) 00569 { 00570 // find the slot to start from, in the current cycle 00571 unsigned int startSlot = 0; 00572 cycles_t curTime = S.getTime(); 00573 00574 assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); 00575 00576 // If only one instruction can be issued, do so. 00577 if (maxIssue == 1) 00578 for (unsigned s=startSlot; s < S.nslots; s++) 00579 if (S.getChoicesForSlot(s).size() > 0) { 00580 // found the one instruction 00581 S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); 00582 return; 00583 } 00584 00585 // Otherwise, choose from the choices for each slot 00586 // 00587 InstrGroup* igroup = S.isched.getIGroup(S.getTime()); 00588 assert(igroup != NULL && "Group creation failed?"); 00589 00590 // Find a slot that has only a single choice, and take it. 00591 // If all slots have 0 or multiple choices, pick the first slot with 00592 // choices and use its last instruction (just to avoid shifting the vector). 00593 unsigned numIssued; 00594 for (numIssued = 0; numIssued < maxIssue; numIssued++) { 00595 int chosenSlot = -1; 00596 for (unsigned s=startSlot; s < S.nslots; s++) 00597 if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) { 00598 chosenSlot = (int) s; 00599 break; 00600 } 00601 00602 if (chosenSlot == -1) 00603 for (unsigned s=startSlot; s < S.nslots; s++) 00604 if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) { 00605 chosenSlot = (int) s; 00606 break; 00607 } 00608 00609 if (chosenSlot != -1) { 00610 // Insert the chosen instr in the chosen slot and 00611 // erase it from all slots. 00612 const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); 00613 S.scheduleInstr(node, chosenSlot, curTime); 00614 } 00615 } 00616 00617 assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); 00618 } 00619 00620 00621 // 00622 // For now, just assume we are scheduling within a single basic block. 00623 // Get the machine instruction vector for the basic block and clear it, 00624 // then append instructions in scheduled order. 00625 // Also, re-insert the dummy PHI instructions that were at the beginning 00626 // of the basic block, since they are not part of the schedule. 00627 // 00628 static void 00629 RecordSchedule(MachineBasicBlock &MBB, const SchedulingManager& S) 00630 { 00631 const TargetInstrInfo& mii = S.schedInfo.getInstrInfo(); 00632 00633 // Lets make sure we didn't lose any instructions, except possibly 00634 // some NOPs from delay slots. Also, PHIs are not included in the schedule. 00635 unsigned numInstr = 0; 00636 for (MachineBasicBlock::iterator I=MBB.begin(); I != MBB.end(); ++I) 00637 if (!(I->getOpcode() == V9::NOP || I->getOpcode() == V9::PHI)) 00638 ++numInstr; 00639 assert(S.isched.getNumInstructions() >= numInstr && 00640 "Lost some non-NOP instructions during scheduling!"); 00641 00642 if (S.isched.getNumInstructions() == 0) 00643 return; // empty basic block! 00644 00645 // First find the dummy instructions at the start of the basic block 00646 MachineBasicBlock::iterator I = MBB.begin(); 00647 for ( ; I != MBB.end(); ++I) 00648 if (I->getOpcode() != V9::PHI) 00649 break; 00650 00651 // Remove all except the dummy PHI instructions from MBB, and 00652 // pre-allocate create space for the ones we will put back in. 00653 while (I != MBB.end()) 00654 MBB.remove(I++); 00655 00656 InstrSchedule::const_iterator NIend = S.isched.end(); 00657 for (InstrSchedule::const_iterator NI = S.isched.begin(); NI != NIend; ++NI) 00658 MBB.push_back(const_cast<MachineInstr*>((*NI)->getMachineInstr())); 00659 } 00660 00661 00662 00663 static void 00664 MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node) 00665 { 00666 // Check if any successors are now ready that were not already marked 00667 // ready before, and that have not yet been scheduled. 00668 // 00669 for (sg_succ_const_iterator SI = succ_begin(node); SI !=succ_end(node); ++SI) 00670 if (! (*SI)->isDummyNode() 00671 && ! S.isScheduled(*SI) 00672 && ! S.schedPrio.nodeIsReady(*SI)) 00673 { 00674 // successor not scheduled and not marked ready; check *its* preds. 00675 00676 bool succIsReady = true; 00677 for (sg_pred_const_iterator P=pred_begin(*SI); P != pred_end(*SI); ++P) 00678 if (! (*P)->isDummyNode() && ! S.isScheduled(*P)) { 00679 succIsReady = false; 00680 break; 00681 } 00682 00683 if (succIsReady) // add the successor to the ready list 00684 S.schedPrio.insertReady(*SI); 00685 } 00686 } 00687 00688 00689 // Choose up to `nslots' FEASIBLE instructions and assign each 00690 // instruction to all possible slots that do not violate feasibility. 00691 // FEASIBLE means it should be guaranteed that the set 00692 // of chosen instructions can be issued in a single group. 00693 // 00694 // Return value: 00695 // maxIssue : total number of feasible instructions 00696 // S.choicesForSlot[i=0..nslots] : set of instructions feasible in slot i 00697 // 00698 static unsigned 00699 FindSlotChoices(SchedulingManager& S, 00700 DelaySlotInfo*& getDelaySlotInfo) 00701 { 00702 // initialize result vectors to empty 00703 S.resetChoices(); 00704 00705 // find the slot to start from, in the current cycle 00706 unsigned int startSlot = 0; 00707 InstrGroup* igroup = S.isched.getIGroup(S.getTime()); 00708 for (int s = S.nslots - 1; s >= 0; s--) 00709 if ((*igroup)[s] != NULL) { 00710 startSlot = s+1; 00711 break; 00712 } 00713 00714 // Make sure we pick at most one instruction that would break the group. 00715 // Also, if we do pick one, remember which it was. 00716 unsigned int indexForBreakingNode = S.nslots; 00717 unsigned int indexForDelayedInstr = S.nslots; 00718 DelaySlotInfo* delaySlotInfo = NULL; 00719 00720 getDelaySlotInfo = NULL; 00721 00722 // Choose instructions in order of priority. 00723 // Add choices to the choice vector in the SchedulingManager class as 00724 // we choose them so that subsequent choices will be correctly tested 00725 // for feasibility, w.r.t. higher priority choices for the same cycle. 00726 // 00727 while (S.getNumChoices() < S.nslots - startSlot) { 00728 const SchedGraphNode* nextNode=S.schedPrio.getNextHighest(S,S.getTime()); 00729 if (nextNode == NULL) 00730 break; // no more instructions for this cycle 00731 00732 if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpcode()) > 0) { 00733 delaySlotInfo = S.getDelaySlotInfoForInstr(nextNode); 00734 if (delaySlotInfo != NULL) { 00735 if (indexForBreakingNode < S.nslots) 00736 // cannot issue a delayed instr in the same cycle as one 00737 // that breaks the issue group or as another delayed instr 00738 nextNode = NULL; 00739 else 00740 indexForDelayedInstr = S.getNumChoices(); 00741 } 00742 } else if (S.schedInfo.breaksIssueGroup(nextNode->getOpcode())) { 00743 if (indexForBreakingNode < S.nslots) 00744 // have a breaking instruction already so throw this one away 00745 nextNode = NULL; 00746 else 00747 indexForBreakingNode = S.getNumChoices(); 00748 } 00749 00750 if (nextNode != NULL) { 00751 S.addChoice(nextNode); 00752 00753 if (S.schedInfo.isSingleIssue(nextNode->getOpcode())) { 00754 assert(S.getNumChoices() == 1 && 00755 "Prioritizer returned invalid instr for this cycle!"); 00756 break; 00757 } 00758 } 00759 00760 if (indexForDelayedInstr < S.nslots) 00761 break; // leave the rest for delay slots 00762 } 00763 00764 assert(S.getNumChoices() <= S.nslots); 00765 assert(! (indexForDelayedInstr < S.nslots && 00766 indexForBreakingNode < S.nslots) && "Cannot have both in a cycle"); 00767 00768 // Assign each chosen instruction to all possible slots for that instr. 00769 // But if only one instruction was chosen, put it only in the first 00770 // feasible slot; no more analysis will be needed. 00771 // 00772 if (indexForDelayedInstr >= S.nslots && 00773 indexForBreakingNode >= S.nslots) 00774 { // No instructions that break the issue group or that have delay slots. 00775 // This is the common case, so handle it separately for efficiency. 00776 00777 if (S.getNumChoices() == 1) { 00778 MachineOpCode opCode = S.getChoice(0)->getOpcode(); 00779 unsigned int s; 00780 for (s=startSlot; s < S.nslots; s++) 00781 if (S.schedInfo.instrCanUseSlot(opCode, s)) 00782 break; 00783 assert(s < S.nslots && "No feasible slot for this opCode?"); 00784 S.addChoiceToSlot(s, S.getChoice(0)); 00785 } else { 00786 for (unsigned i=0; i < S.getNumChoices(); i++) { 00787 MachineOpCode opCode = S.getChoice(i)->getOpcode(); 00788 for (unsigned int s=startSlot; s < S.nslots; s++) 00789 if (S.schedInfo.instrCanUseSlot(opCode, s)) 00790 S.addChoiceToSlot(s, S.getChoice(i)); 00791 } 00792 } 00793 } else if (indexForDelayedInstr < S.nslots) { 00794 // There is an instruction that needs delay slots. 00795 // Try to assign that instruction to a higher slot than any other 00796 // instructions in the group, so that its delay slots can go 00797 // right after it. 00798 // 00799 00800 assert(indexForDelayedInstr == S.getNumChoices() - 1 && 00801 "Instruction with delay slots should be last choice!"); 00802 assert(delaySlotInfo != NULL && "No delay slot info for instr?"); 00803 00804 const SchedGraphNode* delayedNode = S.getChoice(indexForDelayedInstr); 00805 MachineOpCode delayOpCode = delayedNode->getOpcode(); 00806 unsigned ndelays= S.getInstrInfo().getNumDelaySlots(delayOpCode); 00807 00808 unsigned delayedNodeSlot = S.nslots; 00809 int highestSlotUsed; 00810 00811 // Find the last possible slot for the delayed instruction that leaves 00812 // at least `d' slots vacant after it (d = #delay slots) 00813 for (int s = S.nslots-ndelays-1; s >= (int) startSlot; s--) 00814 if (S.schedInfo.instrCanUseSlot(delayOpCode, s)) { 00815 delayedNodeSlot = s; 00816 break; 00817 } 00818 00819 highestSlotUsed = -1; 00820 for (unsigned i=0; i < S.getNumChoices() - 1; i++) { 00821 // Try to assign every other instruction to a lower numbered 00822 // slot than delayedNodeSlot. 00823 MachineOpCode opCode =S.getChoice(i)->getOpcode(); 00824 bool noSlotFound = true; 00825 unsigned int s; 00826 for (s=startSlot; s < delayedNodeSlot; s++) 00827 if (S.schedInfo.instrCanUseSlot(opCode, s)) { 00828 S.addChoiceToSlot(s, S.getChoice(i)); 00829 noSlotFound = false; 00830 } 00831 00832 // No slot before `delayedNodeSlot' was found for this opCode 00833 // Use a later slot, and allow some delay slots to fall in 00834 // the next cycle. 00835 if (noSlotFound) 00836 for ( ; s < S.nslots; s++) 00837 if (S.schedInfo.instrCanUseSlot(opCode, s)) { 00838 S.addChoiceToSlot(s, S.getChoice(i)); 00839 break; 00840 } 00841 00842 assert(s < S.nslots && "No feasible slot for instruction?"); 00843 00844 highestSlotUsed = std::max(highestSlotUsed, (int) s); 00845 } 00846 00847 assert(highestSlotUsed <= (int) S.nslots-1 && "Invalid slot used?"); 00848 00849 // We will put the delayed node in the first slot after the 00850 // highest slot used. But we just mark that for now, and 00851 // schedule it separately because we want to schedule the delay 00852 // slots for the node at the same time. 00853 cycles_t dcycle = S.getTime(); 00854 unsigned int dslot = highestSlotUsed + 1; 00855 if (dslot == S.nslots) { 00856 dslot = 0; 00857 ++dcycle; 00858 } 00859 delaySlotInfo->recordChosenSlot(dcycle, dslot); 00860 getDelaySlotInfo = delaySlotInfo; 00861 } else { 00862 // There is an instruction that breaks the issue group. 00863 // For such an instruction, assign to the last possible slot in 00864 // the current group, and then don't assign any other instructions 00865 // to later slots. 00866 assert(indexForBreakingNode < S.nslots); 00867 const SchedGraphNode* breakingNode=S.getChoice(indexForBreakingNode); 00868 unsigned breakingSlot = INT_MAX; 00869 unsigned int nslotsToUse = S.nslots; 00870 00871 // Find the last possible slot for this instruction. 00872 for (int s = S.nslots-1; s >= (int) startSlot; s--) 00873 if (S.schedInfo.instrCanUseSlot(breakingNode->getOpcode(), s)) { 00874 breakingSlot = s; 00875 break; 00876 } 00877 assert(breakingSlot < S.nslots && 00878 "No feasible slot for `breakingNode'?"); 00879 00880 // Higher priority instructions than the one that breaks the group: 00881 // These can be assigned to all slots, but will be assigned only 00882 // to earlier slots if possible. 00883 for (unsigned i=0; 00884 i < S.getNumChoices() && i < indexForBreakingNode; i++) 00885 { 00886 MachineOpCode opCode =S.getChoice(i)->getOpcode(); 00887 00888 // If a higher priority instruction cannot be assigned to 00889 // any earlier slots, don't schedule the breaking instruction. 00890 // 00891 bool foundLowerSlot = false; 00892 nslotsToUse = S.nslots; // May be modified in the loop 00893 for (unsigned int s=startSlot; s < nslotsToUse; s++) 00894 if (S.schedInfo.instrCanUseSlot(opCode, s)) { 00895 if (breakingSlot < S.nslots && s < breakingSlot) { 00896 foundLowerSlot = true; 00897 nslotsToUse = breakingSlot; // RESETS LOOP UPPER BOUND! 00898 } 00899 00900 S.addChoiceToSlot(s, S.getChoice(i)); 00901 } 00902 00903 if (!foundLowerSlot) 00904 breakingSlot = INT_MAX; // disable breaking instr 00905 } 00906 00907 // Assign the breaking instruction (if any) to a single slot 00908 // Otherwise, just ignore the instruction. It will simply be 00909 // scheduled in a later cycle. 00910 if (breakingSlot < S.nslots) { 00911 S.addChoiceToSlot(breakingSlot, breakingNode); 00912 nslotsToUse = breakingSlot; 00913 } else 00914 nslotsToUse = S.nslots; 00915 00916 // For lower priority instructions than the one that breaks the 00917 // group, only assign them to slots lower than the breaking slot. 00918 // Otherwise, just ignore the instruction. 00919 for (unsigned i=indexForBreakingNode+1; i < S.getNumChoices(); i++) { 00920 MachineOpCode opCode = S.getChoice(i)->getOpcode(); 00921 for (unsigned int s=startSlot; s < nslotsToUse; s++) 00922 if (S.schedInfo.instrCanUseSlot(opCode, s)) 00923 S.addChoiceToSlot(s, S.getChoice(i)); 00924 } 00925 } // endif (no delay slots and no breaking slots) 00926 00927 return S.getNumChoices(); 00928 } 00929 00930 00931 static unsigned 00932 ChooseOneGroup(SchedulingManager& S) 00933 { 00934 assert(S.schedPrio.getNumReady() > 0 00935 && "Don't get here without ready instructions."); 00936 00937 cycles_t firstCycle = S.getTime(); 00938 DelaySlotInfo* getDelaySlotInfo = NULL; 00939 00940 // Choose up to `nslots' feasible instructions and their possible slots. 00941 unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); 00942 00943 while (numIssued == 0) { 00944 S.updateTime(S.getTime()+1); 00945 numIssued = FindSlotChoices(S, getDelaySlotInfo); 00946 } 00947 00948 AssignInstructionsToSlots(S, numIssued); 00949 00950 if (getDelaySlotInfo != NULL) 00951 numIssued += getDelaySlotInfo->scheduleDelayedNode(S); 00952 00953 // Print trace of scheduled instructions before newly ready ones 00954 if (SchedDebugLevel >= Sched_PrintSchedTrace) { 00955 for (cycles_t c = firstCycle; c <= S.getTime(); c++) { 00956 std::cerr << " Cycle " << (long)c <<" : Scheduled instructions:\n"; 00957 const InstrGroup* igroup = S.isched.getIGroup(c); 00958 for (unsigned int s=0; s < S.nslots; s++) { 00959 std::cerr << " "; 00960 if ((*igroup)[s] != NULL) 00961 std::cerr << * ((*igroup)[s])->getMachineInstr() << "\n"; 00962 else 00963 std::cerr << "<none>\n"; 00964 } 00965 } 00966 } 00967 00968 return numIssued; 00969 } 00970 00971 00972 static void 00973 ForwardListSchedule(SchedulingManager& S) 00974 { 00975 unsigned N; 00976 const SchedGraphNode* node; 00977 00978 S.schedPrio.initialize(); 00979 00980 while ((N = S.schedPrio.getNumReady()) > 0) { 00981 cycles_t nextCycle = S.getTime(); 00982 00983 // Choose one group of instructions for a cycle, plus any delay slot 00984 // instructions (which may overflow into successive cycles). 00985 // This will advance S.getTime() to the last cycle in which 00986 // instructions are actually issued. 00987 // 00988 unsigned numIssued = ChooseOneGroup(S); 00989 assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); 00990 00991 // Notify the priority manager of scheduled instructions and mark 00992 // any successors that may now be ready 00993 // 00994 for (cycles_t c = nextCycle; c <= S.getTime(); c++) { 00995 const InstrGroup* igroup = S.isched.getIGroup(c); 00996 for (unsigned int s=0; s < S.nslots; s++) 00997 if ((node = (*igroup)[s]) != NULL) { 00998 S.schedPrio.issuedReadyNodeAt(S.getTime(), node); 00999 MarkSuccessorsReady(S, node); 01000 } 01001 } 01002 01003 // Move to the next the next earliest cycle for which 01004 // an instruction can be issued, or the next earliest in which 01005 // one will be ready, or to the next cycle, whichever is latest. 01006 // 01007 S.updateTime(std::max(S.getTime() + 1, 01008 std::max(S.getEarliestIssueTime(), 01009 S.schedPrio.getEarliestReadyTime()))); 01010 } 01011 } 01012 01013 01014 //--------------------------------------------------------------------- 01015 // Code for filling delay slots for delayed terminator instructions 01016 // (e.g., BRANCH and RETURN). Delay slots for non-terminator 01017 // instructions (e.g., CALL) are not handled here because they almost 01018 // always can be filled with instructions from the call sequence code 01019 // before a call. That's preferable because we incur many tradeoffs here 01020 // when we cannot find single-cycle instructions that can be reordered. 01021 //---------------------------------------------------------------------- 01022 01023 static bool 01024 NodeCanFillDelaySlot(const SchedulingManager& S, 01025 const SchedGraphNode* node, 01026 const SchedGraphNode* brNode, 01027 bool nodeIsPredecessor) 01028 { 01029 assert(! node->isDummyNode()); 01030 01031 // don't put a branch in the delay slot of another branch 01032 if (S.getInstrInfo().isBranch(node->getOpcode())) 01033 return false; 01034 01035 // don't put a single-issue instruction in the delay slot of a branch 01036 if (S.schedInfo.isSingleIssue(node->getOpcode())) 01037 return false; 01038 01039 // don't put a load-use dependence in the delay slot of a branch 01040 const TargetInstrInfo& mii = S.getInstrInfo(); 01041 01042 for (SchedGraphNode::const_iterator EI = node->beginInEdges(); 01043 EI != node->endInEdges(); ++EI) 01044 if (! ((SchedGraphNode*)(*EI)->getSrc())->isDummyNode() 01045 && mii.isLoad(((SchedGraphNode*)(*EI)->getSrc())->getOpcode()) 01046 && (*EI)->getDepType() == SchedGraphEdge::CtrlDep) 01047 return false; 01048 01049 // Finally, if the instruction precedes the branch, we make sure the 01050 // instruction can be reordered relative to the branch. We simply check 01051 // if the instr. has only 1 outgoing edge, viz., a CD edge to the branch. 01052 // 01053 if (nodeIsPredecessor) { 01054 bool onlyCDEdgeToBranch = true; 01055 for (SchedGraphNode::const_iterator OEI = node->beginOutEdges(); 01056 OEI != node->endOutEdges(); ++OEI) 01057 if (! ((SchedGraphNode*)(*OEI)->getSink())->isDummyNode() 01058 && ((*OEI)->getSink() != brNode 01059 || (*OEI)->getDepType() != SchedGraphEdge::CtrlDep)) 01060 { 01061 onlyCDEdgeToBranch = false; 01062 break; 01063 } 01064 01065 if (!onlyCDEdgeToBranch) 01066 return false; 01067 } 01068 01069 return true; 01070 } 01071 01072 01073 static void 01074 MarkNodeForDelaySlot(SchedulingManager& S, 01075 SchedGraph* graph, 01076 SchedGraphNode* node, 01077 const SchedGraphNode* brNode, 01078 bool nodeIsPredecessor) 01079 { 01080 if (nodeIsPredecessor) { 01081 // If node is in the same basic block (i.e., precedes brNode), 01082 // remove it and all its incident edges from the graph. Make sure we 01083 // add dummy edges for pred/succ nodes that become entry/exit nodes. 01084 graph->eraseIncidentEdges(node, /*addDummyEdges*/ true); 01085 } else { 01086 // If the node was from a target block, add the node to the graph 01087 // and add a CD edge from brNode to node. 01088 assert(0 && "NOT IMPLEMENTED YET"); 01089 } 01090 01091 DelaySlotInfo* dinfo = S.getDelaySlotInfoForInstr(brNode, /*create*/ true); 01092 dinfo->addDelayNode(node); 01093 } 01094 01095 01096 void 01097 FindUsefulInstructionsForDelaySlots(SchedulingManager& S, 01098 SchedGraphNode* brNode, 01099 std::vector<SchedGraphNode*>& sdelayNodeVec) 01100 { 01101 const TargetInstrInfo& mii = S.getInstrInfo(); 01102 unsigned ndelays = 01103 mii.getNumDelaySlots(brNode->getOpcode()); 01104 01105 if (ndelays == 0) 01106 return; 01107 01108 sdelayNodeVec.reserve(ndelays); 01109 01110 // Use a separate vector to hold the feasible multi-cycle nodes. 01111 // These will be used if not enough single-cycle nodes are found. 01112 // 01113 std::vector<SchedGraphNode*> mdelayNodeVec; 01114 01115 for (sg_pred_iterator P = pred_begin(brNode); 01116 P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) 01117 if (! (*P)->isDummyNode() && 01118 ! mii.isNop((*P)->getOpcode()) && 01119 NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) 01120 { 01121 if (mii.maxLatency((*P)->getOpcode()) > 1) 01122 mdelayNodeVec.push_back(*P); 01123 else 01124 sdelayNodeVec.push_back(*P); 01125 } 01126 01127 // If not enough single-cycle instructions were found, select the 01128 // lowest-latency multi-cycle instructions and use them. 01129 // Note that this is the most efficient code when only 1 (or even 2) 01130 // values need to be selected. 01131 // 01132 while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) { 01133 unsigned lmin = 01134 mii.maxLatency(mdelayNodeVec[0]->getOpcode()); 01135 unsigned minIndex = 0; 01136 for (unsigned i=1; i < mdelayNodeVec.size(); i++) 01137 { 01138 unsigned li = 01139 mii.maxLatency(mdelayNodeVec[i]->getOpcode()); 01140 if (lmin >= li) 01141 { 01142 lmin = li; 01143 minIndex = i; 01144 } 01145 } 01146 sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); 01147 if (sdelayNodeVec.size() < ndelays) // avoid the last erase! 01148 mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); 01149 } 01150 } 01151 01152 01153 // Remove the NOPs currently in delay slots from the graph. 01154 // Mark instructions specified in sdelayNodeVec to replace them. 01155 // If not enough useful instructions were found, mark the NOPs to be used 01156 // for filling delay slots, otherwise, otherwise just discard them. 01157 // 01158 static void ReplaceNopsWithUsefulInstr(SchedulingManager& S, 01159 SchedGraphNode* node, 01160 // FIXME: passing vector BY VALUE!!! 01161 std::vector<SchedGraphNode*> sdelayNodeVec, 01162 SchedGraph* graph) 01163 { 01164 std::vector<SchedGraphNode*> nopNodeVec; // this will hold unused NOPs 01165 const TargetInstrInfo& mii = S.getInstrInfo(); 01166 const MachineInstr* brInstr = node->getMachineInstr(); 01167 unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpcode()); 01168 assert(ndelays > 0 && "Unnecessary call to replace NOPs"); 01169 01170 // Remove the NOPs currently in delay slots from the graph. 01171 // If not enough useful instructions were found, use the NOPs to 01172 // fill delay slots, otherwise, just discard them. 01173 // 01174 unsigned int firstDelaySlotIdx = node->getOrigIndexInBB() + 1; 01175 MachineBasicBlock& MBB = node->getMachineBasicBlock(); 01176 MachineBasicBlock::iterator MBBI = MBB.begin(); 01177 std::advance(MBBI, firstDelaySlotIdx - 1); 01178 if (!(&*MBBI++ == brInstr)) { 01179 std::cerr << "Incorrect instr. index in basic block for brInstr"; 01180 abort(); 01181 } 01182 01183 // First find all useful instructions already in the delay slots 01184 // and USE THEM. We'll throw away the unused alternatives below 01185 // 01186 MachineBasicBlock::iterator Tmp = MBBI; 01187 for (unsigned i = 0; i != ndelays; ++i, ++MBBI) 01188 if (!mii.isNop(MBBI->getOpcode())) 01189 sdelayNodeVec.insert(sdelayNodeVec.begin(), 01190 graph->getGraphNodeForInstr(MBBI)); 01191 MBBI = Tmp; 01192 01193 // Then find the NOPs and keep only as many as are needed. 01194 // Put the rest in nopNodeVec to be deleted. 01195 for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx+ndelays; ++i, ++MBBI) 01196 if (mii.isNop(MBBI->getOpcode())) 01197 if (sdelayNodeVec.size() < ndelays) 01198 sdelayNodeVec.push_back(graph->getGraphNodeForInstr(MBBI)); 01199 else { 01200 nopNodeVec.push_back(graph->getGraphNodeForInstr(MBBI)); 01201 01202 //remove the MI from the Machine Code For Instruction 01203 const TerminatorInst *TI = MBB.getBasicBlock()->getTerminator(); 01204 MachineCodeForInstruction& llvmMvec = 01205 MachineCodeForInstruction::get((const Instruction *)TI); 01206 01207 for(MachineCodeForInstruction::iterator mciI=llvmMvec.begin(), 01208 mciE=llvmMvec.end(); mciI!=mciE; ++mciI){ 01209 if (*mciI == MBBI) 01210 llvmMvec.erase(mciI); 01211 } 01212 } 01213 01214 assert(sdelayNodeVec.size() >= ndelays); 01215 01216 // If some delay slots were already filled, throw away that many new choices 01217 if (sdelayNodeVec.size() > ndelays) 01218 sdelayNodeVec.resize(ndelays); 01219 01220 // Mark the nodes chosen for delay slots. This removes them from the graph. 01221 for (unsigned i=0; i < sdelayNodeVec.size(); i++) 01222 MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true); 01223 01224 // And remove the unused NOPs from the graph. 01225 for (unsigned i=0; i < nopNodeVec.size(); i++) 01226 graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); 01227 } 01228 01229 01230 // For all delayed instructions, choose instructions to put in the delay 01231 // slots and pull those out of the graph. Mark them for the delay slots 01232 // in the DelaySlotInfo object for that graph node. If no useful work 01233 // is found for a delay slot, use the NOP that is currently in that slot. 01234 // 01235 // We try to fill the delay slots with useful work for all instructions 01236 // EXCEPT CALLS AND RETURNS. 01237 // For CALLs and RETURNs, it is nearly always possible to use one of the 01238 // call sequence instrs and putting anything else in the delay slot could be 01239 // suboptimal. Also, it complicates generating the calling sequence code in 01240 // regalloc. 01241 // 01242 static void 01243 ChooseInstructionsForDelaySlots(SchedulingManager& S, MachineBasicBlock &MBB, 01244 SchedGraph *graph) 01245 { 01246 const TargetInstrInfo& mii = S.getInstrInfo(); 01247 01248 Instruction *termInstr = (Instruction*)MBB.getBasicBlock()->getTerminator(); 01249 MachineCodeForInstruction &termMvec=MachineCodeForInstruction::get(termInstr); 01250 std::vector<SchedGraphNode*> delayNodeVec; 01251 const MachineInstr* brInstr = NULL; 01252 01253 if (EnableFillingDelaySlots && 01254 termInstr->getOpcode() != Instruction::Ret) 01255 { 01256 // To find instructions that need delay slots without searching the full 01257 // machine code, we assume that the only delayed instructions are CALLs 01258 // or instructions generated for the terminator inst. 01259 // Find the first branch instr in the sequence of machine instrs for term 01260 // 01261 unsigned first = 0; 01262 while (first < termMvec.size() && 01263 ! mii.isBranch(termMvec[first]->getOpcode())) 01264 { 01265 ++first; 01266 } 01267 assert(first < termMvec.size() && 01268 "No branch instructions for BR? Ok, but weird! Delete assertion."); 01269 01270 brInstr = (first < termMvec.size())? termMvec[first] : NULL; 01271 01272 // Compute a vector of the nodes chosen for delay slots and then 01273 // mark delay slots to replace NOPs with these useful instructions. 01274 // 01275 if (brInstr != NULL) { 01276 SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr); 01277 FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec); 01278 ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph); 01279 } 01280 } 01281 01282 // Also mark delay slots for other delayed instructions to hold NOPs. 01283 // Simply passing in an empty delayNodeVec will have this effect. 01284 // If brInstr is not handled above (EnableFillingDelaySlots == false), 01285 // brInstr will be NULL so this will handle the branch instrs. as well. 01286 // 01287 delayNodeVec.clear(); 01288 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) 01289 if (I != brInstr && mii.getNumDelaySlots(I->getOpcode()) > 0) { 01290 SchedGraphNode* node = graph->getGraphNodeForInstr(I); 01291 ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph); 01292 } 01293 } 01294 01295 01296 // 01297 // Schedule the delayed branch and its delay slots 01298 // 01299 unsigned 01300 DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) 01301 { 01302 assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch"); 01303 assert(S.isched.getInstr(delayedNodeSlotNum, delayedNodeCycle) == NULL 01304 && "Slot for branch should be empty"); 01305 01306 unsigned int nextSlot = delayedNodeSlotNum; 01307 cycles_t nextTime = delayedNodeCycle; 01308 01309 S.scheduleInstr(brNode, nextSlot, nextTime); 01310 01311 for (unsigned d=0; d < ndelays; d++) { 01312 ++nextSlot; 01313 if (nextSlot == S.nslots) { 01314 nextSlot = 0; 01315 nextTime++; 01316 } 01317 01318 // Find the first feasible instruction for this delay slot 01319 // Note that we only check for issue restrictions here. 01320 // We do *not* check for flow dependences but rely on pipeline 01321 // interlocks to resolve them. Machines without interlocks 01322 // will require this code to be modified. 01323 for (unsigned i=0; i < delayNodeVec.size(); i++) { 01324 const SchedGraphNode* dnode = delayNodeVec[i]; 01325 if ( ! S.isScheduled(dnode) 01326 && S.schedInfo.instrCanUseSlot(dnode->getOpcode(), nextSlot) 01327 && instrIsFeasible(S, dnode->getOpcode())) { 01328 S.scheduleInstr(dnode, nextSlot, nextTime); 01329 break; 01330 } 01331 } 01332 } 01333 01334 // Update current time if delay slots overflowed into later cycles. 01335 // Do this here because we know exactly which cycle is the last cycle 01336 // that contains delay slots. The next loop doesn't compute that. 01337 if (nextTime > S.getTime()) 01338 S.updateTime(nextTime); 01339 01340 // Now put any remaining instructions in the unfilled delay slots. 01341 // This could lead to suboptimal performance but needed for correctness. 01342 nextSlot = delayedNodeSlotNum; 01343 nextTime = delayedNodeCycle; 01344 for (unsigned i=0; i < delayNodeVec.size(); i++) 01345 if (! S.isScheduled(delayNodeVec[i])) { 01346 do { // find the next empty slot 01347 ++nextSlot; 01348 if (nextSlot == S.nslots) { 01349 nextSlot = 0; 01350 nextTime++; 01351 } 01352 } while (S.isched.getInstr(nextSlot, nextTime) != NULL); 01353 01354 S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime); 01355 break; 01356 } 01357 01358 return 1 + ndelays; 01359 } 01360 01361 01362 // Check if the instruction would conflict with instructions already 01363 // chosen for the current cycle 01364 // 01365 static inline bool 01366 ConflictsWithChoices(const SchedulingManager& S, 01367 MachineOpCode opCode) 01368 { 01369 // Check if the instruction must issue by itself, and some feasible 01370 // choices have already been made for this cycle 01371 if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) 01372 return true; 01373 01374 // For each class that opCode belongs to, check if there are too many 01375 // instructions of that class. 01376 // 01377 const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); 01378 return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); 01379 } 01380 01381 01382 //************************* External Functions *****************************/ 01383 01384 01385 //--------------------------------------------------------------------------- 01386 // Function: ViolatesMinimumGap 01387 // 01388 // Purpose: 01389 // Check minimum gap requirements relative to instructions scheduled in 01390 // previous cycles. 01391 // Note that we do not need to consider `nextEarliestIssueTime' here because 01392 // that is also captured in the earliest start times for each opcode. 01393 //--------------------------------------------------------------------------- 01394 01395 static inline bool 01396 ViolatesMinimumGap(const SchedulingManager& S, 01397 MachineOpCode opCode, 01398 const cycles_t inCycle) 01399 { 01400 return (inCycle < S.getEarliestStartTimeForOp(opCode)); 01401 } 01402 01403 01404 //--------------------------------------------------------------------------- 01405 // Function: instrIsFeasible 01406 // 01407 // Purpose: 01408 // Check if any issue restrictions would prevent the instruction from 01409 // being issued in the current cycle 01410 //--------------------------------------------------------------------------- 01411 01412 bool 01413 instrIsFeasible(const SchedulingManager& S, 01414 MachineOpCode opCode) 01415 { 01416 // skip the instruction if it cannot be issued due to issue restrictions 01417 // caused by previously issued instructions 01418 if (ViolatesMinimumGap(S, opCode, S.getTime())) 01419 return false; 01420 01421 // skip the instruction if it cannot be issued due to issue restrictions 01422 // caused by previously chosen instructions for the current cycle 01423 if (ConflictsWithChoices(S, opCode)) 01424 return false; 01425 01426 return true; 01427 } 01428 01429 //--------------------------------------------------------------------------- 01430 // Function: ScheduleInstructionsWithSSA 01431 // 01432 // Purpose: 01433 // Entry point for instruction scheduling on SSA form. 01434 // Schedules the machine instructions generated by instruction selection. 01435 // Assumes that register allocation has not been done, i.e., operands 01436 // are still in SSA form. 01437 //--------------------------------------------------------------------------- 01438 01439 namespace { 01440 class InstructionSchedulingWithSSA : public FunctionPass { 01441 const TargetMachine ⌖ 01442 public: 01443 inline InstructionSchedulingWithSSA(const TargetMachine &T) : target(T) {} 01444 01445 const char *getPassName() const { return "Instruction Scheduling"; } 01446 01447 // getAnalysisUsage - We use LiveVarInfo... 01448 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 01449 AU.addRequired<FunctionLiveVarInfo>(); 01450 AU.setPreservesCFG(); 01451 } 01452 01453 bool runOnFunction(Function &F); 01454 }; 01455 } // end anonymous namespace 01456 01457 01458 bool InstructionSchedulingWithSSA::runOnFunction(Function &F) 01459 { 01460 SchedGraphSet graphSet(&F, target); 01461 01462 if (SchedDebugLevel >= Sched_PrintSchedGraphs) { 01463 std::cerr << "\n*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING\n"; 01464 graphSet.dump(); 01465 } 01466 01467 for (SchedGraphSet::const_iterator GI=graphSet.begin(), GE=graphSet.end(); 01468 GI != GE; ++GI) 01469 { 01470 SchedGraph* graph = (*GI); 01471 MachineBasicBlock &MBB = graph->getBasicBlock(); 01472 01473 if (SchedDebugLevel >= Sched_PrintSchedTrace) 01474 std::cerr << "\n*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; 01475 01476 // expensive! 01477 SchedPriorities schedPrio(&F, graph, getAnalysis<FunctionLiveVarInfo>()); 01478 SchedulingManager S(target, graph, schedPrio); 01479 01480 ChooseInstructionsForDelaySlots(S, MBB, graph); // modifies graph 01481 ForwardListSchedule(S); // computes schedule in S 01482 RecordSchedule(MBB, S); // records schedule in BB 01483 } 01484 01485 if (SchedDebugLevel >= Sched_PrintMachineCode) { 01486 std::cerr << "\n*** Machine instructions after INSTRUCTION SCHEDULING\n"; 01487 MachineFunction::get(&F).dump(); 01488 } 01489 01490 return false; 01491 } 01492 01493 01494 FunctionPass *createInstructionSchedulingWithSSAPass(const TargetMachine &tgt) { 01495 return new InstructionSchedulingWithSSA(tgt); 01496 } 01497 01498 } // End llvm namespace 01499