LLVM API Documentation
00001 //===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Evan Cheng and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This implements a top-down list scheduler, using standard algorithms. 00011 // The basic approach uses a priority queue of available nodes to schedule. 00012 // One at a time, nodes are taken from the priority queue (thus in priority 00013 // order), checked for legality to schedule, and emitted if legal. 00014 // 00015 // Nodes may not be legal to schedule either due to structural hazards (e.g. 00016 // pipeline or resource constraints) or because an input to the instruction has 00017 // not completed execution. 00018 // 00019 //===----------------------------------------------------------------------===// 00020 00021 #define DEBUG_TYPE "sched" 00022 #include "llvm/CodeGen/ScheduleDAG.h" 00023 #include "llvm/CodeGen/SSARegMap.h" 00024 #include "llvm/Target/MRegisterInfo.h" 00025 #include "llvm/Target/TargetData.h" 00026 #include "llvm/Target/TargetMachine.h" 00027 #include "llvm/Target/TargetInstrInfo.h" 00028 #include "llvm/Support/Debug.h" 00029 #include "llvm/Support/Visibility.h" 00030 #include "llvm/ADT/Statistic.h" 00031 #include <climits> 00032 #include <iostream> 00033 #include <queue> 00034 using namespace llvm; 00035 00036 namespace { 00037 static Statistic<> NumNoops ("scheduler", "Number of noops inserted"); 00038 static Statistic<> NumStalls("scheduler", "Number of pipeline stalls"); 00039 } 00040 00041 namespace { 00042 //===----------------------------------------------------------------------===// 00043 /// ScheduleDAGList - The actual list scheduler implementation. This supports 00044 /// top-down scheduling. 00045 /// 00046 class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAG { 00047 private: 00048 /// AvailableQueue - The priority queue to use for the available SUnits. 00049 /// 00050 SchedulingPriorityQueue *AvailableQueue; 00051 00052 /// PendingQueue - This contains all of the instructions whose operands have 00053 /// been issued, but their results are not ready yet (due to the latency of 00054 /// the operation). Once the operands becomes available, the instruction is 00055 /// added to the AvailableQueue. This keeps track of each SUnit and the 00056 /// number of cycles left to execute before the operation is available. 00057 std::vector<std::pair<unsigned, SUnit*> > PendingQueue; 00058 00059 /// HazardRec - The hazard recognizer to use. 00060 HazardRecognizer *HazardRec; 00061 00062 public: 00063 ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb, 00064 const TargetMachine &tm, 00065 SchedulingPriorityQueue *availqueue, 00066 HazardRecognizer *HR) 00067 : ScheduleDAG(dag, bb, tm), 00068 AvailableQueue(availqueue), HazardRec(HR) { 00069 } 00070 00071 ~ScheduleDAGList() { 00072 delete HazardRec; 00073 delete AvailableQueue; 00074 } 00075 00076 void Schedule(); 00077 00078 private: 00079 void ReleaseSucc(SUnit *SuccSU, bool isChain); 00080 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 00081 void ListScheduleTopDown(); 00082 }; 00083 } // end anonymous namespace 00084 00085 HazardRecognizer::~HazardRecognizer() {} 00086 00087 00088 /// Schedule - Schedule the DAG using list scheduling. 00089 void ScheduleDAGList::Schedule() { 00090 DEBUG(std::cerr << "********** List Scheduling **********\n"); 00091 00092 // Build scheduling units. 00093 BuildSchedUnits(); 00094 00095 AvailableQueue->initNodes(SUnits); 00096 00097 ListScheduleTopDown(); 00098 00099 AvailableQueue->releaseState(); 00100 00101 DEBUG(std::cerr << "*** Final schedule ***\n"); 00102 DEBUG(dumpSchedule()); 00103 DEBUG(std::cerr << "\n"); 00104 00105 // Emit in scheduled order 00106 EmitSchedule(); 00107 } 00108 00109 //===----------------------------------------------------------------------===// 00110 // Top-Down Scheduling 00111 //===----------------------------------------------------------------------===// 00112 00113 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 00114 /// the PendingQueue if the count reaches zero. 00115 void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) { 00116 if (!isChain) 00117 SuccSU->NumPredsLeft--; 00118 else 00119 SuccSU->NumChainPredsLeft--; 00120 00121 assert(SuccSU->NumPredsLeft >= 0 && SuccSU->NumChainPredsLeft >= 0 && 00122 "List scheduling internal error"); 00123 00124 if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) { 00125 // Compute how many cycles it will be before this actually becomes 00126 // available. This is the max of the start time of all predecessors plus 00127 // their latencies. 00128 unsigned AvailableCycle = 0; 00129 for (std::set<std::pair<SUnit*, bool> >::iterator I = SuccSU->Preds.begin(), 00130 E = SuccSU->Preds.end(); I != E; ++I) { 00131 // If this is a token edge, we don't need to wait for the latency of the 00132 // preceeding instruction (e.g. a long-latency load) unless there is also 00133 // some other data dependence. 00134 unsigned PredDoneCycle = I->first->Cycle; 00135 if (!I->second) 00136 PredDoneCycle += I->first->Latency; 00137 else if (I->first->Latency) 00138 PredDoneCycle += 1; 00139 00140 AvailableCycle = std::max(AvailableCycle, PredDoneCycle); 00141 } 00142 00143 PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU)); 00144 } 00145 } 00146 00147 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 00148 /// count of its successors. If a successor pending count is zero, add it to 00149 /// the Available queue. 00150 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 00151 DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: "); 00152 DEBUG(SU->dump(&DAG)); 00153 00154 Sequence.push_back(SU); 00155 SU->Cycle = CurCycle; 00156 00157 // Bottom up: release successors. 00158 for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Succs.begin(), 00159 E = SU->Succs.end(); I != E; ++I) 00160 ReleaseSucc(I->first, I->second); 00161 } 00162 00163 /// ListScheduleTopDown - The main loop of list scheduling for top-down 00164 /// schedulers. 00165 void ScheduleDAGList::ListScheduleTopDown() { 00166 unsigned CurCycle = 0; 00167 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; 00168 00169 // All leaves to Available queue. 00170 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 00171 // It is available if it has no predecessors. 00172 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 00173 AvailableQueue->push(&SUnits[i]); 00174 SUnits[i].isAvailable = SUnits[i].isPending = true; 00175 } 00176 } 00177 00178 // Emit the entry node first. 00179 ScheduleNodeTopDown(Entry, CurCycle); 00180 HazardRec->EmitInstruction(Entry->Node); 00181 00182 // While Available queue is not empty, grab the node with the highest 00183 // priority. If it is not ready put it back. Schedule the node. 00184 std::vector<SUnit*> NotReady; 00185 while (!AvailableQueue->empty() || !PendingQueue.empty()) { 00186 // Check to see if any of the pending instructions are ready to issue. If 00187 // so, add them to the available queue. 00188 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 00189 if (PendingQueue[i].first == CurCycle) { 00190 AvailableQueue->push(PendingQueue[i].second); 00191 PendingQueue[i].second->isAvailable = true; 00192 PendingQueue[i] = PendingQueue.back(); 00193 PendingQueue.pop_back(); 00194 --i; --e; 00195 } else { 00196 assert(PendingQueue[i].first > CurCycle && "Negative latency?"); 00197 } 00198 } 00199 00200 // If there are no instructions available, don't try to issue anything, and 00201 // don't advance the hazard recognizer. 00202 if (AvailableQueue->empty()) { 00203 ++CurCycle; 00204 continue; 00205 } 00206 00207 SUnit *FoundSUnit = 0; 00208 SDNode *FoundNode = 0; 00209 00210 bool HasNoopHazards = false; 00211 while (!AvailableQueue->empty()) { 00212 SUnit *CurSUnit = AvailableQueue->pop(); 00213 00214 // Get the node represented by this SUnit. 00215 FoundNode = CurSUnit->Node; 00216 00217 // If this is a pseudo op, like copyfromreg, look to see if there is a 00218 // real target node flagged to it. If so, use the target node. 00219 for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size(); 00220 FoundNode->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i) 00221 FoundNode = CurSUnit->FlaggedNodes[i]; 00222 00223 HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode); 00224 if (HT == HazardRecognizer::NoHazard) { 00225 FoundSUnit = CurSUnit; 00226 break; 00227 } 00228 00229 // Remember if this is a noop hazard. 00230 HasNoopHazards |= HT == HazardRecognizer::NoopHazard; 00231 00232 NotReady.push_back(CurSUnit); 00233 } 00234 00235 // Add the nodes that aren't ready back onto the available list. 00236 if (!NotReady.empty()) { 00237 AvailableQueue->push_all(NotReady); 00238 NotReady.clear(); 00239 } 00240 00241 // If we found a node to schedule, do it now. 00242 if (FoundSUnit) { 00243 ScheduleNodeTopDown(FoundSUnit, CurCycle); 00244 HazardRec->EmitInstruction(FoundNode); 00245 FoundSUnit->isScheduled = true; 00246 AvailableQueue->ScheduledNode(FoundSUnit); 00247 00248 // If this is a pseudo-op node, we don't want to increment the current 00249 // cycle. 00250 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 00251 ++CurCycle; 00252 } else if (!HasNoopHazards) { 00253 // Otherwise, we have a pipeline stall, but no other problem, just advance 00254 // the current cycle and try again. 00255 DEBUG(std::cerr << "*** Advancing cycle, no work to do\n"); 00256 HazardRec->AdvanceCycle(); 00257 ++NumStalls; 00258 ++CurCycle; 00259 } else { 00260 // Otherwise, we have no instructions to issue and we have instructions 00261 // that will fault if we don't do this right. This is the case for 00262 // processors without pipeline interlocks and other cases. 00263 DEBUG(std::cerr << "*** Emitting noop\n"); 00264 HazardRec->EmitNoop(); 00265 Sequence.push_back(0); // NULL SUnit* -> noop 00266 ++NumNoops; 00267 ++CurCycle; 00268 } 00269 } 00270 00271 #ifndef NDEBUG 00272 // Verify that all SUnits were scheduled. 00273 bool AnyNotSched = false; 00274 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 00275 if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) { 00276 if (!AnyNotSched) 00277 std::cerr << "*** List scheduling failed! ***\n"; 00278 SUnits[i].dump(&DAG); 00279 std::cerr << "has not been scheduled!\n"; 00280 AnyNotSched = true; 00281 } 00282 } 00283 assert(!AnyNotSched); 00284 #endif 00285 } 00286 00287 //===----------------------------------------------------------------------===// 00288 // LatencyPriorityQueue Implementation 00289 //===----------------------------------------------------------------------===// 00290 // 00291 // This is a SchedulingPriorityQueue that schedules using latency information to 00292 // reduce the length of the critical path through the basic block. 00293 // 00294 namespace { 00295 class LatencyPriorityQueue; 00296 00297 /// Sorting functions for the Available queue. 00298 struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> { 00299 LatencyPriorityQueue *PQ; 00300 latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {} 00301 latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {} 00302 00303 bool operator()(const SUnit* left, const SUnit* right) const; 00304 }; 00305 } // end anonymous namespace 00306 00307 namespace { 00308 class LatencyPriorityQueue : public SchedulingPriorityQueue { 00309 // SUnits - The SUnits for the current graph. 00310 const std::vector<SUnit> *SUnits; 00311 00312 // Latencies - The latency (max of latency from this node to the bb exit) 00313 // for each node. 00314 std::vector<int> Latencies; 00315 00316 /// NumNodesSolelyBlocking - This vector contains, for every node in the 00317 /// Queue, the number of nodes that the node is the sole unscheduled 00318 /// predecessor for. This is used as a tie-breaker heuristic for better 00319 /// mobility. 00320 std::vector<unsigned> NumNodesSolelyBlocking; 00321 00322 std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue; 00323 public: 00324 LatencyPriorityQueue() : Queue(latency_sort(this)) { 00325 } 00326 00327 void initNodes(const std::vector<SUnit> &sunits) { 00328 SUnits = &sunits; 00329 // Calculate node priorities. 00330 CalculatePriorities(); 00331 } 00332 void releaseState() { 00333 SUnits = 0; 00334 Latencies.clear(); 00335 } 00336 00337 unsigned getLatency(unsigned NodeNum) const { 00338 assert(NodeNum < Latencies.size()); 00339 return Latencies[NodeNum]; 00340 } 00341 00342 unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { 00343 assert(NodeNum < NumNodesSolelyBlocking.size()); 00344 return NumNodesSolelyBlocking[NodeNum]; 00345 } 00346 00347 bool empty() const { return Queue.empty(); } 00348 00349 virtual void push(SUnit *U) { 00350 push_impl(U); 00351 } 00352 void push_impl(SUnit *U); 00353 00354 void push_all(const std::vector<SUnit *> &Nodes) { 00355 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 00356 push_impl(Nodes[i]); 00357 } 00358 00359 SUnit *pop() { 00360 if (empty()) return NULL; 00361 SUnit *V = Queue.top(); 00362 Queue.pop(); 00363 return V; 00364 } 00365 00366 // ScheduledNode - As nodes are scheduled, we look to see if there are any 00367 // successor nodes that have a single unscheduled predecessor. If so, that 00368 // single predecessor has a higher priority, since scheduling it will make 00369 // the node available. 00370 void ScheduledNode(SUnit *Node); 00371 00372 private: 00373 void CalculatePriorities(); 00374 int CalcLatency(const SUnit &SU); 00375 void AdjustPriorityOfUnscheduledPreds(SUnit *SU); 00376 00377 /// RemoveFromPriorityQueue - This is a really inefficient way to remove a 00378 /// node from a priority queue. We should roll our own heap to make this 00379 /// better or something. 00380 void RemoveFromPriorityQueue(SUnit *SU) { 00381 std::vector<SUnit*> Temp; 00382 00383 assert(!Queue.empty() && "Not in queue!"); 00384 while (Queue.top() != SU) { 00385 Temp.push_back(Queue.top()); 00386 Queue.pop(); 00387 assert(!Queue.empty() && "Not in queue!"); 00388 } 00389 00390 // Remove the node from the PQ. 00391 Queue.pop(); 00392 00393 // Add all the other nodes back. 00394 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 00395 Queue.push(Temp[i]); 00396 } 00397 }; 00398 } 00399 00400 bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { 00401 unsigned LHSNum = LHS->NodeNum; 00402 unsigned RHSNum = RHS->NodeNum; 00403 00404 // The most important heuristic is scheduling the critical path. 00405 unsigned LHSLatency = PQ->getLatency(LHSNum); 00406 unsigned RHSLatency = PQ->getLatency(RHSNum); 00407 if (LHSLatency < RHSLatency) return true; 00408 if (LHSLatency > RHSLatency) return false; 00409 00410 // After that, if two nodes have identical latencies, look to see if one will 00411 // unblock more other nodes than the other. 00412 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); 00413 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); 00414 if (LHSBlocked < RHSBlocked) return true; 00415 if (LHSBlocked > RHSBlocked) return false; 00416 00417 // Finally, just to provide a stable ordering, use the node number as a 00418 // deciding factor. 00419 return LHSNum < RHSNum; 00420 } 00421 00422 00423 /// CalcNodePriority - Calculate the maximal path from the node to the exit. 00424 /// 00425 int LatencyPriorityQueue::CalcLatency(const SUnit &SU) { 00426 int &Latency = Latencies[SU.NodeNum]; 00427 if (Latency != -1) 00428 return Latency; 00429 00430 int MaxSuccLatency = 0; 00431 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU.Succs.begin(), 00432 E = SU.Succs.end(); I != E; ++I) 00433 MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first)); 00434 00435 return Latency = MaxSuccLatency + SU.Latency; 00436 } 00437 00438 /// CalculatePriorities - Calculate priorities of all scheduling units. 00439 void LatencyPriorityQueue::CalculatePriorities() { 00440 Latencies.assign(SUnits->size(), -1); 00441 NumNodesSolelyBlocking.assign(SUnits->size(), 0); 00442 00443 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 00444 CalcLatency((*SUnits)[i]); 00445 } 00446 00447 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor 00448 /// of SU, return it, otherwise return null. 00449 static SUnit *getSingleUnscheduledPred(SUnit *SU) { 00450 SUnit *OnlyAvailablePred = 0; 00451 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Preds.begin(), 00452 E = SU->Preds.end(); I != E; ++I) 00453 if (!I->first->isScheduled) { 00454 // We found an available, but not scheduled, predecessor. If it's the 00455 // only one we have found, keep track of it... otherwise give up. 00456 if (OnlyAvailablePred && OnlyAvailablePred != I->first) 00457 return 0; 00458 OnlyAvailablePred = I->first; 00459 } 00460 00461 return OnlyAvailablePred; 00462 } 00463 00464 void LatencyPriorityQueue::push_impl(SUnit *SU) { 00465 // Look at all of the successors of this node. Count the number of nodes that 00466 // this node is the sole unscheduled node for. 00467 unsigned NumNodesBlocking = 0; 00468 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(), 00469 E = SU->Succs.end(); I != E; ++I) 00470 if (getSingleUnscheduledPred(I->first) == SU) 00471 ++NumNodesBlocking; 00472 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; 00473 00474 Queue.push(SU); 00475 } 00476 00477 00478 // ScheduledNode - As nodes are scheduled, we look to see if there are any 00479 // successor nodes that have a single unscheduled predecessor. If so, that 00480 // single predecessor has a higher priority, since scheduling it will make 00481 // the node available. 00482 void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { 00483 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(), 00484 E = SU->Succs.end(); I != E; ++I) 00485 AdjustPriorityOfUnscheduledPreds(I->first); 00486 } 00487 00488 /// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just 00489 /// scheduled. If SU is not itself available, then there is at least one 00490 /// predecessor node that has not been scheduled yet. If SU has exactly ONE 00491 /// unscheduled predecessor, we want to increase its priority: it getting 00492 /// scheduled will make this node available, so it is better than some other 00493 /// node of the same priority that will not make a node available. 00494 void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { 00495 if (SU->isPending) return; // All preds scheduled. 00496 00497 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); 00498 if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; 00499 00500 // Okay, we found a single predecessor that is available, but not scheduled. 00501 // Since it is available, it must be in the priority queue. First remove it. 00502 RemoveFromPriorityQueue(OnlyAvailablePred); 00503 00504 // Reinsert the node into the priority queue, which recomputes its 00505 // NumNodesSolelyBlocking value. 00506 push(OnlyAvailablePred); 00507 } 00508 00509 00510 //===----------------------------------------------------------------------===// 00511 // Public Constructor Functions 00512 //===----------------------------------------------------------------------===// 00513 00514 /// createTDListDAGScheduler - This creates a top-down list scheduler with the 00515 /// specified hazard recognizer. 00516 ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAG &DAG, 00517 MachineBasicBlock *BB, 00518 HazardRecognizer *HR) { 00519 return new ScheduleDAGList(DAG, BB, DAG.getTarget(), 00520 new LatencyPriorityQueue(), 00521 HR); 00522 }