LLVM API Documentation
00001 //===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Evan Cheng and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This implements bottom-up and top-down list schedulers, using standard 00011 // algorithms. The basic approach uses a priority queue of available nodes to 00012 // schedule. One at a time, nodes are taken from the priority queue (thus in 00013 // priority order), checked for legality to schedule, and emitted if legal. 00014 // 00015 // Nodes may not be legal to schedule either due to structural hazards (e.g. 00016 // pipeline or resource constraints) or because an input to the instruction has 00017 // not completed execution. 00018 // 00019 //===----------------------------------------------------------------------===// 00020 00021 #define DEBUG_TYPE "sched" 00022 #include "llvm/CodeGen/ScheduleDAG.h" 00023 #include "llvm/Target/TargetMachine.h" 00024 #include "llvm/Target/TargetInstrInfo.h" 00025 #include "llvm/Support/Debug.h" 00026 #include "llvm/ADT/Statistic.h" 00027 #include <climits> 00028 #include <iostream> 00029 #include <queue> 00030 #include <set> 00031 #include <vector> 00032 #include "llvm/Support/CommandLine.h" 00033 using namespace llvm; 00034 00035 namespace { 00036 Statistic<> NumNoops ("scheduler", "Number of noops inserted"); 00037 Statistic<> NumStalls("scheduler", "Number of pipeline stalls"); 00038 00039 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or 00040 /// a group of nodes flagged together. 00041 struct SUnit { 00042 SDNode *Node; // Representative node. 00043 std::vector<SDNode*> FlaggedNodes; // All nodes flagged to Node. 00044 00045 // Preds/Succs - The SUnits before/after us in the graph. The boolean value 00046 // is true if the edge is a token chain edge, false if it is a value edge. 00047 std::set<std::pair<SUnit*,bool> > Preds; // All sunit predecessors. 00048 std::set<std::pair<SUnit*,bool> > Succs; // All sunit successors. 00049 00050 short NumPredsLeft; // # of preds not scheduled. 00051 short NumSuccsLeft; // # of succs not scheduled. 00052 short NumChainPredsLeft; // # of chain preds not scheduled. 00053 short NumChainSuccsLeft; // # of chain succs not scheduled. 00054 bool isTwoAddress : 1; // Is a two-address instruction. 00055 bool isDefNUseOperand : 1; // Is a def&use operand. 00056 bool isPending : 1; // True once pending. 00057 bool isAvailable : 1; // True once available. 00058 bool isScheduled : 1; // True once scheduled. 00059 unsigned short Latency; // Node latency. 00060 unsigned CycleBound; // Upper/lower cycle to be scheduled at. 00061 unsigned Cycle; // Once scheduled, the cycle of the op. 00062 unsigned NodeNum; // Entry # of node in the node vector. 00063 00064 SUnit(SDNode *node, unsigned nodenum) 00065 : Node(node), NumPredsLeft(0), NumSuccsLeft(0), 00066 NumChainPredsLeft(0), NumChainSuccsLeft(0), 00067 isTwoAddress(false), isDefNUseOperand(false), 00068 isPending(false), isAvailable(false), isScheduled(false), 00069 Latency(0), CycleBound(0), Cycle(0), NodeNum(nodenum) {} 00070 00071 void dump(const SelectionDAG *G) const; 00072 void dumpAll(const SelectionDAG *G) const; 00073 }; 00074 } 00075 00076 void SUnit::dump(const SelectionDAG *G) const { 00077 std::cerr << "SU: "; 00078 Node->dump(G); 00079 std::cerr << "\n"; 00080 if (FlaggedNodes.size() != 0) { 00081 for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) { 00082 std::cerr << " "; 00083 FlaggedNodes[i]->dump(G); 00084 std::cerr << "\n"; 00085 } 00086 } 00087 } 00088 00089 void SUnit::dumpAll(const SelectionDAG *G) const { 00090 dump(G); 00091 00092 std::cerr << " # preds left : " << NumPredsLeft << "\n"; 00093 std::cerr << " # succs left : " << NumSuccsLeft << "\n"; 00094 std::cerr << " # chain preds left : " << NumChainPredsLeft << "\n"; 00095 std::cerr << " # chain succs left : " << NumChainSuccsLeft << "\n"; 00096 std::cerr << " Latency : " << Latency << "\n"; 00097 00098 if (Preds.size() != 0) { 00099 std::cerr << " Predecessors:\n"; 00100 for (std::set<std::pair<SUnit*,bool> >::const_iterator I = Preds.begin(), 00101 E = Preds.end(); I != E; ++I) { 00102 if (I->second) 00103 std::cerr << " ch "; 00104 else 00105 std::cerr << " val "; 00106 I->first->dump(G); 00107 } 00108 } 00109 if (Succs.size() != 0) { 00110 std::cerr << " Successors:\n"; 00111 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = Succs.begin(), 00112 E = Succs.end(); I != E; ++I) { 00113 if (I->second) 00114 std::cerr << " ch "; 00115 else 00116 std::cerr << " val "; 00117 I->first->dump(G); 00118 } 00119 } 00120 std::cerr << "\n"; 00121 } 00122 00123 //===----------------------------------------------------------------------===// 00124 /// SchedulingPriorityQueue - This interface is used to plug different 00125 /// priorities computation algorithms into the list scheduler. It implements the 00126 /// interface of a standard priority queue, where nodes are inserted in 00127 /// arbitrary order and returned in priority order. The computation of the 00128 /// priority and the representation of the queue are totally up to the 00129 /// implementation to decide. 00130 /// 00131 namespace { 00132 class SchedulingPriorityQueue { 00133 public: 00134 virtual ~SchedulingPriorityQueue() {} 00135 00136 virtual void initNodes(const std::vector<SUnit> &SUnits) = 0; 00137 virtual void releaseState() = 0; 00138 00139 virtual bool empty() const = 0; 00140 virtual void push(SUnit *U) = 0; 00141 00142 virtual void push_all(const std::vector<SUnit *> &Nodes) = 0; 00143 virtual SUnit *pop() = 0; 00144 00145 /// ScheduledNode - As each node is scheduled, this method is invoked. This 00146 /// allows the priority function to adjust the priority of node that have 00147 /// already been emitted. 00148 virtual void ScheduledNode(SUnit *Node) {} 00149 }; 00150 } 00151 00152 00153 00154 namespace { 00155 //===----------------------------------------------------------------------===// 00156 /// ScheduleDAGList - The actual list scheduler implementation. This supports 00157 /// both top-down and bottom-up scheduling. 00158 /// 00159 class ScheduleDAGList : public ScheduleDAG { 00160 private: 00161 // SDNode to SUnit mapping (many to one). 00162 std::map<SDNode*, SUnit*> SUnitMap; 00163 // The schedule. Null SUnit*'s represent noop instructions. 00164 std::vector<SUnit*> Sequence; 00165 00166 // The scheduling units. 00167 std::vector<SUnit> SUnits; 00168 00169 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 00170 /// it is top-down. 00171 bool isBottomUp; 00172 00173 /// AvailableQueue - The priority queue to use for the available SUnits. 00174 /// 00175 SchedulingPriorityQueue *AvailableQueue; 00176 00177 /// PendingQueue - This contains all of the instructions whose operands have 00178 /// been issued, but their results are not ready yet (due to the latency of 00179 /// the operation). Once the operands becomes available, the instruction is 00180 /// added to the AvailableQueue. This keeps track of each SUnit and the 00181 /// number of cycles left to execute before the operation is available. 00182 std::vector<std::pair<unsigned, SUnit*> > PendingQueue; 00183 00184 /// HazardRec - The hazard recognizer to use. 00185 HazardRecognizer *HazardRec; 00186 00187 public: 00188 ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb, 00189 const TargetMachine &tm, bool isbottomup, 00190 SchedulingPriorityQueue *availqueue, 00191 HazardRecognizer *HR) 00192 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 00193 AvailableQueue(availqueue), HazardRec(HR) { 00194 } 00195 00196 ~ScheduleDAGList() { 00197 delete HazardRec; 00198 delete AvailableQueue; 00199 } 00200 00201 void Schedule(); 00202 00203 void dumpSchedule() const; 00204 00205 private: 00206 SUnit *NewSUnit(SDNode *N); 00207 void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); 00208 void ReleaseSucc(SUnit *SuccSU, bool isChain); 00209 void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle); 00210 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 00211 void ListScheduleTopDown(); 00212 void ListScheduleBottomUp(); 00213 void BuildSchedUnits(); 00214 void EmitSchedule(); 00215 }; 00216 } // end anonymous namespace 00217 00218 HazardRecognizer::~HazardRecognizer() {} 00219 00220 00221 /// NewSUnit - Creates a new SUnit and return a ptr to it. 00222 SUnit *ScheduleDAGList::NewSUnit(SDNode *N) { 00223 SUnits.push_back(SUnit(N, SUnits.size())); 00224 return &SUnits.back(); 00225 } 00226 00227 /// BuildSchedUnits - Build SUnits from the selection dag that we are input. 00228 /// This SUnit graph is similar to the SelectionDAG, but represents flagged 00229 /// together nodes with a single SUnit. 00230 void ScheduleDAGList::BuildSchedUnits() { 00231 // Reserve entries in the vector for each of the SUnits we are creating. This 00232 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get 00233 // invalidated. 00234 SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end())); 00235 00236 const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); 00237 00238 for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(), 00239 E = DAG.allnodes_end(); NI != E; ++NI) { 00240 if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. 00241 continue; 00242 00243 // If this node has already been processed, stop now. 00244 if (SUnitMap[NI]) continue; 00245 00246 SUnit *NodeSUnit = NewSUnit(NI); 00247 00248 // See if anything is flagged to this node, if so, add them to flagged 00249 // nodes. Nodes can have at most one flag input and one flag output. Flags 00250 // are required the be the last operand and result of a node. 00251 00252 // Scan up, adding flagged preds to FlaggedNodes. 00253 SDNode *N = NI; 00254 while (N->getNumOperands() && 00255 N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { 00256 N = N->getOperand(N->getNumOperands()-1).Val; 00257 NodeSUnit->FlaggedNodes.push_back(N); 00258 SUnitMap[N] = NodeSUnit; 00259 } 00260 00261 // Scan down, adding this node and any flagged succs to FlaggedNodes if they 00262 // have a user of the flag operand. 00263 N = NI; 00264 while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { 00265 SDOperand FlagVal(N, N->getNumValues()-1); 00266 00267 // There are either zero or one users of the Flag result. 00268 bool HasFlagUse = false; 00269 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 00270 UI != E; ++UI) 00271 if (FlagVal.isOperand(*UI)) { 00272 HasFlagUse = true; 00273 NodeSUnit->FlaggedNodes.push_back(N); 00274 SUnitMap[N] = NodeSUnit; 00275 N = *UI; 00276 break; 00277 } 00278 if (!HasFlagUse) break; 00279 } 00280 00281 // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. 00282 // Update the SUnit 00283 NodeSUnit->Node = N; 00284 SUnitMap[N] = NodeSUnit; 00285 00286 // Compute the latency for the node. We use the sum of the latencies for 00287 // all nodes flagged together into this SUnit. 00288 if (InstrItins.isEmpty()) { 00289 // No latency information. 00290 NodeSUnit->Latency = 1; 00291 } else { 00292 NodeSUnit->Latency = 0; 00293 if (N->isTargetOpcode()) { 00294 unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode()); 00295 InstrStage *S = InstrItins.begin(SchedClass); 00296 InstrStage *E = InstrItins.end(SchedClass); 00297 for (; S != E; ++S) 00298 NodeSUnit->Latency += S->Cycles; 00299 } 00300 for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) { 00301 SDNode *FNode = NodeSUnit->FlaggedNodes[i]; 00302 if (FNode->isTargetOpcode()) { 00303 unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode()); 00304 InstrStage *S = InstrItins.begin(SchedClass); 00305 InstrStage *E = InstrItins.end(SchedClass); 00306 for (; S != E; ++S) 00307 NodeSUnit->Latency += S->Cycles; 00308 } 00309 } 00310 } 00311 } 00312 00313 // Pass 2: add the preds, succs, etc. 00314 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { 00315 SUnit *SU = &SUnits[su]; 00316 SDNode *MainNode = SU->Node; 00317 00318 if (MainNode->isTargetOpcode() && 00319 TII->isTwoAddrInstr(MainNode->getTargetOpcode())) 00320 SU->isTwoAddress = true; 00321 00322 // Find all predecessors and successors of the group. 00323 // Temporarily add N to make code simpler. 00324 SU->FlaggedNodes.push_back(MainNode); 00325 00326 for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) { 00327 SDNode *N = SU->FlaggedNodes[n]; 00328 00329 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 00330 SDNode *OpN = N->getOperand(i).Val; 00331 if (isPassiveNode(OpN)) continue; // Not scheduled. 00332 SUnit *OpSU = SUnitMap[OpN]; 00333 assert(OpSU && "Node has no SUnit!"); 00334 if (OpSU == SU) continue; // In the same group. 00335 00336 MVT::ValueType OpVT = N->getOperand(i).getValueType(); 00337 assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); 00338 bool isChain = OpVT == MVT::Other; 00339 00340 if (SU->Preds.insert(std::make_pair(OpSU, isChain)).second) { 00341 if (!isChain) { 00342 SU->NumPredsLeft++; 00343 } else { 00344 SU->NumChainPredsLeft++; 00345 } 00346 } 00347 if (OpSU->Succs.insert(std::make_pair(SU, isChain)).second) { 00348 if (!isChain) { 00349 OpSU->NumSuccsLeft++; 00350 } else { 00351 OpSU->NumChainSuccsLeft++; 00352 } 00353 } 00354 } 00355 } 00356 00357 // Remove MainNode from FlaggedNodes again. 00358 SU->FlaggedNodes.pop_back(); 00359 } 00360 00361 return; 00362 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 00363 SUnits[su].dumpAll(&DAG)); 00364 } 00365 00366 /// EmitSchedule - Emit the machine code in scheduled order. 00367 void ScheduleDAGList::EmitSchedule() { 00368 std::map<SDNode*, unsigned> VRBaseMap; 00369 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 00370 if (SUnit *SU = Sequence[i]) { 00371 for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++) 00372 EmitNode(SU->FlaggedNodes[j], VRBaseMap); 00373 EmitNode(SU->Node, VRBaseMap); 00374 } else { 00375 // Null SUnit* is a noop. 00376 EmitNoop(); 00377 } 00378 } 00379 } 00380 00381 /// dump - dump the schedule. 00382 void ScheduleDAGList::dumpSchedule() const { 00383 for (unsigned i = 0, e = Sequence.size(); i != e; i++) { 00384 if (SUnit *SU = Sequence[i]) 00385 SU->dump(&DAG); 00386 else 00387 std::cerr << "**** NOOP ****\n"; 00388 } 00389 } 00390 00391 /// Schedule - Schedule the DAG using list scheduling. 00392 void ScheduleDAGList::Schedule() { 00393 DEBUG(std::cerr << "********** List Scheduling **********\n"); 00394 00395 // Build scheduling units. 00396 BuildSchedUnits(); 00397 00398 AvailableQueue->initNodes(SUnits); 00399 00400 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 00401 if (isBottomUp) 00402 ListScheduleBottomUp(); 00403 else 00404 ListScheduleTopDown(); 00405 00406 AvailableQueue->releaseState(); 00407 00408 DEBUG(std::cerr << "*** Final schedule ***\n"); 00409 DEBUG(dumpSchedule()); 00410 DEBUG(std::cerr << "\n"); 00411 00412 // Emit in scheduled order 00413 EmitSchedule(); 00414 } 00415 00416 //===----------------------------------------------------------------------===// 00417 // Bottom-Up Scheduling 00418 //===----------------------------------------------------------------------===// 00419 00420 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 00421 /// the Available queue is the count reaches zero. Also update its cycle bound. 00422 void ScheduleDAGList::ReleasePred(SUnit *PredSU, bool isChain, 00423 unsigned CurCycle) { 00424 // FIXME: the distance between two nodes is not always == the predecessor's 00425 // latency. For example, the reader can very well read the register written 00426 // by the predecessor later than the issue cycle. It also depends on the 00427 // interrupt model (drain vs. freeze). 00428 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 00429 00430 if (!isChain) 00431 PredSU->NumSuccsLeft--; 00432 else 00433 PredSU->NumChainSuccsLeft--; 00434 00435 #ifndef NDEBUG 00436 if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { 00437 std::cerr << "*** List scheduling failed! ***\n"; 00438 PredSU->dump(&DAG); 00439 std::cerr << " has been released too many times!\n"; 00440 assert(0); 00441 } 00442 #endif 00443 00444 if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { 00445 // EntryToken has to go last! Special case it here. 00446 if (PredSU->Node->getOpcode() != ISD::EntryToken) { 00447 PredSU->isAvailable = true; 00448 AvailableQueue->push(PredSU); 00449 } 00450 } 00451 } 00452 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 00453 /// count of its predecessors. If a predecessor pending count is zero, add it to 00454 /// the Available queue. 00455 void ScheduleDAGList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 00456 DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: "); 00457 DEBUG(SU->dump(&DAG)); 00458 SU->Cycle = CurCycle; 00459 00460 Sequence.push_back(SU); 00461 00462 // Bottom up: release predecessors 00463 for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(), 00464 E = SU->Preds.end(); I != E; ++I) { 00465 ReleasePred(I->first, I->second, CurCycle); 00466 // FIXME: This is something used by the priority function that it should 00467 // calculate directly. 00468 if (!I->second) 00469 SU->NumPredsLeft--; 00470 } 00471 } 00472 00473 /// isReady - True if node's lower cycle bound is less or equal to the current 00474 /// scheduling cycle. Always true if all nodes have uniform latency 1. 00475 static inline bool isReady(SUnit *SU, unsigned CurrCycle) { 00476 return SU->CycleBound <= CurrCycle; 00477 } 00478 00479 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 00480 /// schedulers. 00481 void ScheduleDAGList::ListScheduleBottomUp() { 00482 unsigned CurrCycle = 0; 00483 // Add root to Available queue. 00484 AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); 00485 00486 // While Available queue is not empty, grab the node with the highest 00487 // priority. If it is not ready put it back. Schedule the node. 00488 std::vector<SUnit*> NotReady; 00489 while (!AvailableQueue->empty()) { 00490 SUnit *CurrNode = AvailableQueue->pop(); 00491 00492 while (!isReady(CurrNode, CurrCycle)) { 00493 NotReady.push_back(CurrNode); 00494 CurrNode = AvailableQueue->pop(); 00495 } 00496 00497 // Add the nodes that aren't ready back onto the available list. 00498 AvailableQueue->push_all(NotReady); 00499 NotReady.clear(); 00500 00501 ScheduleNodeBottomUp(CurrNode, CurrCycle); 00502 CurrCycle++; 00503 CurrNode->isScheduled = true; 00504 AvailableQueue->ScheduledNode(CurrNode); 00505 } 00506 00507 // Add entry node last 00508 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 00509 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; 00510 Sequence.push_back(Entry); 00511 } 00512 00513 // Reverse the order if it is bottom up. 00514 std::reverse(Sequence.begin(), Sequence.end()); 00515 00516 00517 #ifndef NDEBUG 00518 // Verify that all SUnits were scheduled. 00519 bool AnyNotSched = false; 00520 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 00521 if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { 00522 if (!AnyNotSched) 00523 std::cerr << "*** List scheduling failed! ***\n"; 00524 SUnits[i].dump(&DAG); 00525 std::cerr << "has not been scheduled!\n"; 00526 AnyNotSched = true; 00527 } 00528 } 00529 assert(!AnyNotSched); 00530 #endif 00531 } 00532 00533 //===----------------------------------------------------------------------===// 00534 // Top-Down Scheduling 00535 //===----------------------------------------------------------------------===// 00536 00537 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 00538 /// the PendingQueue if the count reaches zero. 00539 void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) { 00540 if (!isChain) 00541 SuccSU->NumPredsLeft--; 00542 else 00543 SuccSU->NumChainPredsLeft--; 00544 00545 assert(SuccSU->NumPredsLeft >= 0 && SuccSU->NumChainPredsLeft >= 0 && 00546 "List scheduling internal error"); 00547 00548 if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) { 00549 // Compute how many cycles it will be before this actually becomes 00550 // available. This is the max of the start time of all predecessors plus 00551 // their latencies. 00552 unsigned AvailableCycle = 0; 00553 for (std::set<std::pair<SUnit*, bool> >::iterator I = SuccSU->Preds.begin(), 00554 E = SuccSU->Preds.end(); I != E; ++I) { 00555 // If this is a token edge, we don't need to wait for the latency of the 00556 // preceeding instruction (e.g. a long-latency load) unless there is also 00557 // some other data dependence. 00558 unsigned PredDoneCycle = I->first->Cycle; 00559 if (!I->second) 00560 PredDoneCycle += I->first->Latency; 00561 else if (I->first->Latency) 00562 PredDoneCycle += 1; 00563 00564 AvailableCycle = std::max(AvailableCycle, PredDoneCycle); 00565 } 00566 00567 PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU)); 00568 SuccSU->isPending = true; 00569 } 00570 } 00571 00572 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 00573 /// count of its successors. If a successor pending count is zero, add it to 00574 /// the Available queue. 00575 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 00576 DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: "); 00577 DEBUG(SU->dump(&DAG)); 00578 00579 Sequence.push_back(SU); 00580 SU->Cycle = CurCycle; 00581 00582 // Bottom up: release successors. 00583 for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Succs.begin(), 00584 E = SU->Succs.end(); I != E; ++I) 00585 ReleaseSucc(I->first, I->second); 00586 } 00587 00588 /// ListScheduleTopDown - The main loop of list scheduling for top-down 00589 /// schedulers. 00590 void ScheduleDAGList::ListScheduleTopDown() { 00591 unsigned CurCycle = 0; 00592 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; 00593 00594 // All leaves to Available queue. 00595 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 00596 // It is available if it has no predecessors. 00597 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 00598 AvailableQueue->push(&SUnits[i]); 00599 SUnits[i].isAvailable = SUnits[i].isPending = true; 00600 } 00601 } 00602 00603 // Emit the entry node first. 00604 ScheduleNodeTopDown(Entry, CurCycle); 00605 HazardRec->EmitInstruction(Entry->Node); 00606 00607 // While Available queue is not empty, grab the node with the highest 00608 // priority. If it is not ready put it back. Schedule the node. 00609 std::vector<SUnit*> NotReady; 00610 while (!AvailableQueue->empty() || !PendingQueue.empty()) { 00611 // Check to see if any of the pending instructions are ready to issue. If 00612 // so, add them to the available queue. 00613 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 00614 if (PendingQueue[i].first == CurCycle) { 00615 AvailableQueue->push(PendingQueue[i].second); 00616 PendingQueue[i].second->isAvailable = true; 00617 PendingQueue[i] = PendingQueue.back(); 00618 PendingQueue.pop_back(); 00619 --i; --e; 00620 } else { 00621 assert(PendingQueue[i].first > CurCycle && "Negative latency?"); 00622 } 00623 } 00624 00625 // If there are no instructions available, don't try to issue anything, and 00626 // don't advance the hazard recognizer. 00627 if (AvailableQueue->empty()) { 00628 ++CurCycle; 00629 continue; 00630 } 00631 00632 SUnit *FoundSUnit = 0; 00633 SDNode *FoundNode = 0; 00634 00635 bool HasNoopHazards = false; 00636 while (!AvailableQueue->empty()) { 00637 SUnit *CurSUnit = AvailableQueue->pop(); 00638 00639 // Get the node represented by this SUnit. 00640 FoundNode = CurSUnit->Node; 00641 00642 // If this is a pseudo op, like copyfromreg, look to see if there is a 00643 // real target node flagged to it. If so, use the target node. 00644 for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size(); 00645 FoundNode->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i) 00646 FoundNode = CurSUnit->FlaggedNodes[i]; 00647 00648 HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode); 00649 if (HT == HazardRecognizer::NoHazard) { 00650 FoundSUnit = CurSUnit; 00651 break; 00652 } 00653 00654 // Remember if this is a noop hazard. 00655 HasNoopHazards |= HT == HazardRecognizer::NoopHazard; 00656 00657 NotReady.push_back(CurSUnit); 00658 } 00659 00660 // Add the nodes that aren't ready back onto the available list. 00661 if (!NotReady.empty()) { 00662 AvailableQueue->push_all(NotReady); 00663 NotReady.clear(); 00664 } 00665 00666 // If we found a node to schedule, do it now. 00667 if (FoundSUnit) { 00668 ScheduleNodeTopDown(FoundSUnit, CurCycle); 00669 HazardRec->EmitInstruction(FoundNode); 00670 FoundSUnit->isScheduled = true; 00671 AvailableQueue->ScheduledNode(FoundSUnit); 00672 00673 // If this is a pseudo-op node, we don't want to increment the current 00674 // cycle. 00675 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 00676 ++CurCycle; 00677 } else if (!HasNoopHazards) { 00678 // Otherwise, we have a pipeline stall, but no other problem, just advance 00679 // the current cycle and try again. 00680 DEBUG(std::cerr << "*** Advancing cycle, no work to do\n"); 00681 HazardRec->AdvanceCycle(); 00682 ++NumStalls; 00683 ++CurCycle; 00684 } else { 00685 // Otherwise, we have no instructions to issue and we have instructions 00686 // that will fault if we don't do this right. This is the case for 00687 // processors without pipeline interlocks and other cases. 00688 DEBUG(std::cerr << "*** Emitting noop\n"); 00689 HazardRec->EmitNoop(); 00690 Sequence.push_back(0); // NULL SUnit* -> noop 00691 ++NumNoops; 00692 ++CurCycle; 00693 } 00694 } 00695 00696 #ifndef NDEBUG 00697 // Verify that all SUnits were scheduled. 00698 bool AnyNotSched = false; 00699 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 00700 if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) { 00701 if (!AnyNotSched) 00702 std::cerr << "*** List scheduling failed! ***\n"; 00703 SUnits[i].dump(&DAG); 00704 std::cerr << "has not been scheduled!\n"; 00705 AnyNotSched = true; 00706 } 00707 } 00708 assert(!AnyNotSched); 00709 #endif 00710 } 00711 00712 //===----------------------------------------------------------------------===// 00713 // RegReductionPriorityQueue Implementation 00714 //===----------------------------------------------------------------------===// 00715 // 00716 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 00717 // to reduce register pressure. 00718 // 00719 namespace { 00720 class RegReductionPriorityQueue; 00721 00722 /// Sorting functions for the Available queue. 00723 struct ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 00724 RegReductionPriorityQueue *SPQ; 00725 ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} 00726 ls_rr_sort(const ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 00727 00728 bool operator()(const SUnit* left, const SUnit* right) const; 00729 }; 00730 } // end anonymous namespace 00731 00732 namespace { 00733 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 00734 // SUnits - The SUnits for the current graph. 00735 const std::vector<SUnit> *SUnits; 00736 00737 // SethiUllmanNumbers - The SethiUllman number for each node. 00738 std::vector<int> SethiUllmanNumbers; 00739 00740 std::priority_queue<SUnit*, std::vector<SUnit*>, ls_rr_sort> Queue; 00741 public: 00742 RegReductionPriorityQueue() : Queue(ls_rr_sort(this)) { 00743 } 00744 00745 void initNodes(const std::vector<SUnit> &sunits) { 00746 SUnits = &sunits; 00747 // Calculate node priorities. 00748 CalculatePriorities(); 00749 } 00750 void releaseState() { 00751 SUnits = 0; 00752 SethiUllmanNumbers.clear(); 00753 } 00754 00755 unsigned getSethiUllmanNumber(unsigned NodeNum) const { 00756 assert(NodeNum < SethiUllmanNumbers.size()); 00757 return SethiUllmanNumbers[NodeNum]; 00758 } 00759 00760 bool empty() const { return Queue.empty(); } 00761 00762 void push(SUnit *U) { 00763 Queue.push(U); 00764 } 00765 void push_all(const std::vector<SUnit *> &Nodes) { 00766 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 00767 Queue.push(Nodes[i]); 00768 } 00769 00770 SUnit *pop() { 00771 SUnit *V = Queue.top(); 00772 Queue.pop(); 00773 return V; 00774 } 00775 private: 00776 void CalculatePriorities(); 00777 int CalcNodePriority(const SUnit *SU); 00778 }; 00779 } 00780 00781 bool ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 00782 unsigned LeftNum = left->NodeNum; 00783 unsigned RightNum = right->NodeNum; 00784 00785 int LBonus = (int)left ->isDefNUseOperand; 00786 int RBonus = (int)right->isDefNUseOperand; 00787 00788 // Special tie breaker: if two nodes share a operand, the one that 00789 // use it as a def&use operand is preferred. 00790 if (left->isTwoAddress && !right->isTwoAddress) { 00791 SDNode *DUNode = left->Node->getOperand(0).Val; 00792 if (DUNode->isOperand(right->Node)) 00793 LBonus++; 00794 } 00795 if (!left->isTwoAddress && right->isTwoAddress) { 00796 SDNode *DUNode = right->Node->getOperand(0).Val; 00797 if (DUNode->isOperand(left->Node)) 00798 RBonus++; 00799 } 00800 00801 // Priority1 is just the number of live range genned. 00802 int LPriority1 = left ->NumPredsLeft - LBonus; 00803 int RPriority1 = right->NumPredsLeft - RBonus; 00804 int LPriority2 = SPQ->getSethiUllmanNumber(LeftNum) + LBonus; 00805 int RPriority2 = SPQ->getSethiUllmanNumber(RightNum) + RBonus; 00806 00807 if (LPriority1 > RPriority1) 00808 return true; 00809 else if (LPriority1 == RPriority1) 00810 if (LPriority2 < RPriority2) 00811 return true; 00812 else if (LPriority2 == RPriority2) 00813 if (left->CycleBound > right->CycleBound) 00814 return true; 00815 00816 return false; 00817 } 00818 00819 00820 /// CalcNodePriority - Priority is the Sethi Ullman number. 00821 /// Smaller number is the higher priority. 00822 int RegReductionPriorityQueue::CalcNodePriority(const SUnit *SU) { 00823 int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 00824 if (SethiUllmanNumber != INT_MIN) 00825 return SethiUllmanNumber; 00826 00827 if (SU->Preds.size() == 0) { 00828 SethiUllmanNumber = 1; 00829 } else { 00830 int Extra = 0; 00831 for (std::set<std::pair<SUnit*, bool> >::const_iterator 00832 I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { 00833 if (I->second) continue; // ignore chain preds. 00834 SUnit *PredSU = I->first; 00835 int PredSethiUllman = CalcNodePriority(PredSU); 00836 if (PredSethiUllman > SethiUllmanNumber) { 00837 SethiUllmanNumber = PredSethiUllman; 00838 Extra = 0; 00839 } else if (PredSethiUllman == SethiUllmanNumber) 00840 Extra++; 00841 } 00842 00843 if (SU->Node->getOpcode() != ISD::TokenFactor) 00844 SethiUllmanNumber += Extra; 00845 else 00846 SethiUllmanNumber = (Extra == 1) ? 0 : Extra-1; 00847 } 00848 00849 return SethiUllmanNumber; 00850 } 00851 00852 /// CalculatePriorities - Calculate priorities of all scheduling units. 00853 void RegReductionPriorityQueue::CalculatePriorities() { 00854 SethiUllmanNumbers.assign(SUnits->size(), INT_MIN); 00855 00856 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 00857 CalcNodePriority(&(*SUnits)[i]); 00858 } 00859 00860 //===----------------------------------------------------------------------===// 00861 // LatencyPriorityQueue Implementation 00862 //===----------------------------------------------------------------------===// 00863 // 00864 // This is a SchedulingPriorityQueue that schedules using latency information to 00865 // reduce the length of the critical path through the basic block. 00866 // 00867 namespace { 00868 class LatencyPriorityQueue; 00869 00870 /// Sorting functions for the Available queue. 00871 struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> { 00872 LatencyPriorityQueue *PQ; 00873 latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {} 00874 latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {} 00875 00876 bool operator()(const SUnit* left, const SUnit* right) const; 00877 }; 00878 } // end anonymous namespace 00879 00880 namespace { 00881 class LatencyPriorityQueue : public SchedulingPriorityQueue { 00882 // SUnits - The SUnits for the current graph. 00883 const std::vector<SUnit> *SUnits; 00884 00885 // Latencies - The latency (max of latency from this node to the bb exit) 00886 // for each node. 00887 std::vector<int> Latencies; 00888 00889 /// NumNodesSolelyBlocking - This vector contains, for every node in the 00890 /// Queue, the number of nodes that the node is the sole unscheduled 00891 /// predecessor for. This is used as a tie-breaker heuristic for better 00892 /// mobility. 00893 std::vector<unsigned> NumNodesSolelyBlocking; 00894 00895 std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue; 00896 public: 00897 LatencyPriorityQueue() : Queue(latency_sort(this)) { 00898 } 00899 00900 void initNodes(const std::vector<SUnit> &sunits) { 00901 SUnits = &sunits; 00902 // Calculate node priorities. 00903 CalculatePriorities(); 00904 } 00905 void releaseState() { 00906 SUnits = 0; 00907 Latencies.clear(); 00908 } 00909 00910 unsigned getLatency(unsigned NodeNum) const { 00911 assert(NodeNum < Latencies.size()); 00912 return Latencies[NodeNum]; 00913 } 00914 00915 unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { 00916 assert(NodeNum < NumNodesSolelyBlocking.size()); 00917 return NumNodesSolelyBlocking[NodeNum]; 00918 } 00919 00920 bool empty() const { return Queue.empty(); } 00921 00922 virtual void push(SUnit *U) { 00923 push_impl(U); 00924 } 00925 void push_impl(SUnit *U); 00926 00927 void push_all(const std::vector<SUnit *> &Nodes) { 00928 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 00929 push_impl(Nodes[i]); 00930 } 00931 00932 SUnit *pop() { 00933 SUnit *V = Queue.top(); 00934 Queue.pop(); 00935 return V; 00936 } 00937 00938 // ScheduledNode - As nodes are scheduled, we look to see if there are any 00939 // successor nodes that have a single unscheduled predecessor. If so, that 00940 // single predecessor has a higher priority, since scheduling it will make 00941 // the node available. 00942 void ScheduledNode(SUnit *Node); 00943 00944 private: 00945 void CalculatePriorities(); 00946 int CalcLatency(const SUnit &SU); 00947 void AdjustPriorityOfUnscheduledPreds(SUnit *SU); 00948 00949 /// RemoveFromPriorityQueue - This is a really inefficient way to remove a 00950 /// node from a priority queue. We should roll our own heap to make this 00951 /// better or something. 00952 void RemoveFromPriorityQueue(SUnit *SU) { 00953 std::vector<SUnit*> Temp; 00954 00955 assert(!Queue.empty() && "Not in queue!"); 00956 while (Queue.top() != SU) { 00957 Temp.push_back(Queue.top()); 00958 Queue.pop(); 00959 assert(!Queue.empty() && "Not in queue!"); 00960 } 00961 00962 // Remove the node from the PQ. 00963 Queue.pop(); 00964 00965 // Add all the other nodes back. 00966 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 00967 Queue.push(Temp[i]); 00968 } 00969 }; 00970 } 00971 00972 bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { 00973 unsigned LHSNum = LHS->NodeNum; 00974 unsigned RHSNum = RHS->NodeNum; 00975 00976 // The most important heuristic is scheduling the critical path. 00977 unsigned LHSLatency = PQ->getLatency(LHSNum); 00978 unsigned RHSLatency = PQ->getLatency(RHSNum); 00979 if (LHSLatency < RHSLatency) return true; 00980 if (LHSLatency > RHSLatency) return false; 00981 00982 // After that, if two nodes have identical latencies, look to see if one will 00983 // unblock more other nodes than the other. 00984 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); 00985 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); 00986 if (LHSBlocked < RHSBlocked) return true; 00987 if (LHSBlocked > RHSBlocked) return false; 00988 00989 // Finally, just to provide a stable ordering, use the node number as a 00990 // deciding factor. 00991 return LHSNum < RHSNum; 00992 } 00993 00994 00995 /// CalcNodePriority - Calculate the maximal path from the node to the exit. 00996 /// 00997 int LatencyPriorityQueue::CalcLatency(const SUnit &SU) { 00998 int &Latency = Latencies[SU.NodeNum]; 00999 if (Latency != -1) 01000 return Latency; 01001 01002 int MaxSuccLatency = 0; 01003 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU.Succs.begin(), 01004 E = SU.Succs.end(); I != E; ++I) 01005 MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first)); 01006 01007 return Latency = MaxSuccLatency + SU.Latency; 01008 } 01009 01010 /// CalculatePriorities - Calculate priorities of all scheduling units. 01011 void LatencyPriorityQueue::CalculatePriorities() { 01012 Latencies.assign(SUnits->size(), -1); 01013 NumNodesSolelyBlocking.assign(SUnits->size(), 0); 01014 01015 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 01016 CalcLatency((*SUnits)[i]); 01017 } 01018 01019 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor 01020 /// of SU, return it, otherwise return null. 01021 static SUnit *getSingleUnscheduledPred(SUnit *SU) { 01022 SUnit *OnlyAvailablePred = 0; 01023 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Preds.begin(), 01024 E = SU->Preds.end(); I != E; ++I) 01025 if (!I->first->isScheduled) { 01026 // We found an available, but not scheduled, predecessor. If it's the 01027 // only one we have found, keep track of it... otherwise give up. 01028 if (OnlyAvailablePred && OnlyAvailablePred != I->first) 01029 return 0; 01030 OnlyAvailablePred = I->first; 01031 } 01032 01033 return OnlyAvailablePred; 01034 } 01035 01036 void LatencyPriorityQueue::push_impl(SUnit *SU) { 01037 // Look at all of the successors of this node. Count the number of nodes that 01038 // this node is the sole unscheduled node for. 01039 unsigned NumNodesBlocking = 0; 01040 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(), 01041 E = SU->Succs.end(); I != E; ++I) 01042 if (getSingleUnscheduledPred(I->first) == SU) 01043 ++NumNodesBlocking; 01044 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; 01045 01046 Queue.push(SU); 01047 } 01048 01049 01050 // ScheduledNode - As nodes are scheduled, we look to see if there are any 01051 // successor nodes that have a single unscheduled predecessor. If so, that 01052 // single predecessor has a higher priority, since scheduling it will make 01053 // the node available. 01054 void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { 01055 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(), 01056 E = SU->Succs.end(); I != E; ++I) 01057 AdjustPriorityOfUnscheduledPreds(I->first); 01058 } 01059 01060 /// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just 01061 /// scheduled. If SU is not itself available, then there is at least one 01062 /// predecessor node that has not been scheduled yet. If SU has exactly ONE 01063 /// unscheduled predecessor, we want to increase its priority: it getting 01064 /// scheduled will make this node available, so it is better than some other 01065 /// node of the same priority that will not make a node available. 01066 void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { 01067 if (SU->isPending) return; // All preds scheduled. 01068 01069 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); 01070 if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; 01071 01072 // Okay, we found a single predecessor that is available, but not scheduled. 01073 // Since it is available, it must be in the priority queue. First remove it. 01074 RemoveFromPriorityQueue(OnlyAvailablePred); 01075 01076 // Reinsert the node into the priority queue, which recomputes its 01077 // NumNodesSolelyBlocking value. 01078 push(OnlyAvailablePred); 01079 } 01080 01081 01082 //===----------------------------------------------------------------------===// 01083 // Public Constructor Functions 01084 //===----------------------------------------------------------------------===// 01085 01086 llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAG &DAG, 01087 MachineBasicBlock *BB) { 01088 return new ScheduleDAGList(DAG, BB, DAG.getTarget(), true, 01089 new RegReductionPriorityQueue(), 01090 new HazardRecognizer()); 01091 } 01092 01093 /// createTDListDAGScheduler - This creates a top-down list scheduler with the 01094 /// specified hazard recognizer. 01095 ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAG &DAG, 01096 MachineBasicBlock *BB, 01097 HazardRecognizer *HR) { 01098 return new ScheduleDAGList(DAG, BB, DAG.getTarget(), false, 01099 new LatencyPriorityQueue(), 01100 HR); 01101 }