LLVM API Documentation

SparcV9SchedInfo.cpp

Go to the documentation of this file.
00001 //===-- SparcV9SchedInfo.cpp ----------------------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // Describe the scheduling characteristics of the UltraSparc IIi.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "SparcV9Internals.h"
00015 
00016 using namespace llvm;
00017 
00018 /*---------------------------------------------------------------------------
00019 Scheduling guidelines for SPARC IIi:
00020 
00021 I-Cache alignment rules (pg 326)
00022 -- Align a branch target instruction so that it's entire group is within
00023    the same cache line (may be 1-4 instructions).
00024 ** Don't let a branch that is predicted taken be the last instruction
00025    on an I-cache line: delay slot will need an entire line to be fetched
00026 -- Make a FP instruction or a branch be the 4th instruction in a group.
00027    For branches, there are tradeoffs in reordering to make this happen
00028    (see pg. 327).
00029 ** Don't put a branch in a group that crosses a 32-byte boundary!
00030    An artificial branch is inserted after every 32 bytes, and having
00031    another branch will force the group to be broken into 2 groups.
00032 
00033 iTLB rules:
00034 -- Don't let a loop span two memory pages, if possible
00035 
00036 Branch prediction performance:
00037 -- Don't make the branch in a delay slot the target of a branch
00038 -- Try not to have 2 predicted branches within a group of 4 instructions
00039    (because each such group has a single branch target field).
00040 -- Try to align branches in slots 0, 2, 4 or 6 of a cache line (to avoid
00041    the wrong prediction bits being used in some cases).
00042 
00043 D-Cache timing constraints:
00044 -- Signed int loads of less than 64 bits have 3 cycle latency, not 2
00045 -- All other loads that hit in D-Cache have 2 cycle latency
00046 -- All loads are returned IN ORDER, so a D-Cache miss will delay a later hit
00047 -- Mis-aligned loads or stores cause a trap.  In particular, replace
00048    mis-aligned FP double precision l/s with 2 single-precision l/s.
00049 -- Simulations of integer codes show increase in avg. group size of
00050    33% when code (including esp. non-faulting loads) is moved across
00051    one branch, and 50% across 2 branches.
00052 
00053 E-Cache timing constraints:
00054 -- Scheduling for E-cache (D-Cache misses) is effective (due to load buffering)
00055 
00056 Store buffer timing constraints:
00057 -- Stores can be executed in same cycle as instruction producing the value
00058 -- Stores are buffered and have lower priority for E-cache until
00059    highwater mark is reached in the store buffer (5 stores)
00060 
00061 Pipeline constraints:
00062 -- Shifts can only use IEU0.
00063 -- CC setting instructions can only use IEU1.
00064 -- Several other instructions must only use IEU1:
00065    EDGE(?), ARRAY(?), CALL, JMPL, BPr, PST, and FCMP.
00066 -- Two instructions cannot store to the same register file in a single cycle
00067    (single write port per file).
00068 
00069 Issue and grouping constraints:
00070 -- FP and branch instructions must use slot 4.
00071 -- Shift instructions cannot be grouped with other IEU0-specific instructions.
00072 -- CC setting instructions cannot be grouped with other IEU1-specific instrs.
00073 -- Several instructions must be issued in a single-instruction group:
00074         MOVcc or MOVr, MULs/x and DIVs/x, SAVE/RESTORE, many others
00075 -- A CALL or JMPL breaks a group, ie, is not combined with subsequent instrs.
00076 --
00077 --
00078 
00079 Branch delay slot scheduling rules:
00080 -- A CTI couple (two back-to-back CTI instructions in the dynamic stream)
00081    has a 9-instruction penalty: the entire pipeline is flushed when the
00082    second instruction reaches stage 9 (W-Writeback).
00083 -- Avoid putting multicycle instructions, and instructions that may cause
00084    load misses, in the delay slot of an annulling branch.
00085 -- Avoid putting WR, SAVE..., RESTORE and RETURN instructions in the
00086    delay slot of an annulling branch.
00087 
00088  *--------------------------------------------------------------------------- */
00089 
00090 //---------------------------------------------------------------------------
00091 // List of CPUResources for UltraSPARC IIi.
00092 //---------------------------------------------------------------------------
00093 
00094 static const CPUResource  AllIssueSlots(   "All Instr Slots", 4);
00095 static const CPUResource  IntIssueSlots(   "Int Instr Slots", 3);
00096 static const CPUResource  First3IssueSlots("Instr Slots 0-3", 3);
00097 static const CPUResource  LSIssueSlots(    "Load-Store Instr Slot", 1);
00098 static const CPUResource  CTIIssueSlots(   "Ctrl Transfer Instr Slot", 1);
00099 static const CPUResource  FPAIssueSlots(   "FP Instr Slot 1", 1);
00100 static const CPUResource  FPMIssueSlots(   "FP Instr Slot 2", 1);
00101 
00102 // IEUN instructions can use either Alu and should use IAluN.
00103 // IEU0 instructions must use Alu 1 and should use both IAluN and IAlu0.
00104 // IEU1 instructions must use Alu 2 and should use both IAluN and IAlu1.
00105 static const CPUResource  IAluN("Int ALU 1or2", 2);
00106 static const CPUResource  IAlu0("Int ALU 1",    1);
00107 static const CPUResource  IAlu1("Int ALU 2",    1);
00108 
00109 static const CPUResource  LSAluC1("Load/Store Unit Addr Cycle", 1);
00110 static const CPUResource  LSAluC2("Load/Store Unit Issue Cycle", 1);
00111 static const CPUResource  LdReturn("Load Return Unit", 1);
00112 
00113 static const CPUResource  FPMAluC1("FP Mul/Div Alu Cycle 1", 1);
00114 static const CPUResource  FPMAluC2("FP Mul/Div Alu Cycle 2", 1);
00115 static const CPUResource  FPMAluC3("FP Mul/Div Alu Cycle 3", 1);
00116 
00117 static const CPUResource  FPAAluC1("FP Other Alu Cycle 1", 1);
00118 static const CPUResource  FPAAluC2("FP Other Alu Cycle 2", 1);
00119 static const CPUResource  FPAAluC3("FP Other Alu Cycle 3", 1);
00120 
00121 static const CPUResource  IRegReadPorts("Int Reg ReadPorts", INT_MAX); // CHECK
00122 static const CPUResource  IRegWritePorts("Int Reg WritePorts", 2);     // CHECK
00123 static const CPUResource  FPRegReadPorts("FP Reg Read Ports", INT_MAX);// CHECK
00124 static const CPUResource  FPRegWritePorts("FP Reg Write Ports", 1);    // CHECK
00125 
00126 static const CPUResource  CTIDelayCycle( "CTI  delay cycle", 1);
00127 static const CPUResource  FCMPDelayCycle("FCMP delay cycle", 1);
00128 
00129 
00130 
00131 //---------------------------------------------------------------------------
00132 // const InstrClassRUsage SparcV9RUsageDesc[]
00133 //
00134 // Purpose:
00135 //   Resource usage information for instruction in each scheduling class.
00136 //   The InstrRUsage Objects for individual classes are specified first.
00137 //   Note that fetch and decode are decoupled from the execution pipelines
00138 //   via an instr buffer, so they are not included in the cycles below.
00139 //---------------------------------------------------------------------------
00140 
00141 static const InstrClassRUsage NoneClassRUsage = {
00142   SPARC_NONE,
00143   /*totCycles*/ 7,
00144 
00145   /* maxIssueNum */ 4,
00146   /* isSingleIssue */ false,
00147   /* breaksGroup */ false,
00148   /* numBubbles */ 0,
00149 
00150   /*numSlots*/ 4,
00151   /* feasibleSlots[] */ { 0, 1, 2, 3 },
00152 
00153   /*numEntries*/ 0,
00154   /* V[] */ {
00155     /*Cycle G */
00156     /*Ccle E */
00157     /*Cycle C */
00158     /*Cycle N1*/
00159     /*Cycle N1*/
00160     /*Cycle N1*/
00161     /*Cycle W */
00162   }
00163 };
00164 
00165 static const InstrClassRUsage IEUNClassRUsage = {
00166   SPARC_IEUN,
00167   /*totCycles*/ 7,
00168 
00169   /* maxIssueNum */ 3,
00170   /* isSingleIssue */ false,
00171   /* breaksGroup */ false,
00172   /* numBubbles */ 0,
00173 
00174   /*numSlots*/ 3,
00175   /* feasibleSlots[] */ { 0, 1, 2 },
00176 
00177   /*numEntries*/ 4,
00178   /* V[] */ {
00179     /*Cycle G */ { AllIssueSlots.rid, 0, 1 },
00180                  { IntIssueSlots.rid, 0, 1 },
00181     /*Cycle E */ { IAluN.rid, 1, 1 },
00182     /*Cycle C */
00183     /*Cycle N1*/
00184     /*Cycle N1*/
00185     /*Cycle N1*/
00186     /*Cycle W */ { IRegWritePorts.rid, 6, 1  }
00187   }
00188 };
00189 
00190 static const InstrClassRUsage IEU0ClassRUsage = {
00191   SPARC_IEU0,
00192   /*totCycles*/ 7,
00193 
00194   /* maxIssueNum */ 1,
00195   /* isSingleIssue */ false,
00196   /* breaksGroup */ false,
00197   /* numBubbles */ 0,
00198 
00199   /*numSlots*/ 3,
00200   /* feasibleSlots[] */ { 0, 1, 2 },
00201 
00202   /*numEntries*/ 5,
00203   /* V[] */ {
00204     /*Cycle G */ { AllIssueSlots.rid, 0, 1 },
00205                  { IntIssueSlots.rid, 0, 1 },
00206     /*Cycle E */ { IAluN.rid, 1, 1 },
00207                  { IAlu0.rid, 1, 1 },
00208     /*Cycle C */
00209     /*Cycle N1*/
00210     /*Cycle N1*/
00211     /*Cycle N1*/
00212     /*Cycle W */ { IRegWritePorts.rid, 6, 1 }
00213   }
00214 };
00215 
00216 static const InstrClassRUsage IEU1ClassRUsage = {
00217   SPARC_IEU1,
00218   /*totCycles*/ 7,
00219 
00220   /* maxIssueNum */ 1,
00221   /* isSingleIssue */ false,
00222   /* breaksGroup */ false,
00223   /* numBubbles */ 0,
00224 
00225   /*numSlots*/ 3,
00226   /* feasibleSlots[] */ { 0, 1, 2 },
00227 
00228   /*numEntries*/ 5,
00229   /* V[] */ {
00230     /*Cycle G */ { AllIssueSlots.rid, 0, 1 },
00231                { IntIssueSlots.rid, 0, 1 },
00232     /*Cycle E */ { IAluN.rid, 1, 1 },
00233                { IAlu1.rid, 1, 1 },
00234     /*Cycle C */
00235     /*Cycle N1*/
00236     /*Cycle N1*/
00237     /*Cycle N1*/
00238     /*Cycle W */ { IRegWritePorts.rid, 6, 1 }
00239   }
00240 };
00241 
00242 static const InstrClassRUsage FPMClassRUsage = {
00243   SPARC_FPM,
00244   /*totCycles*/ 7,
00245 
00246   /* maxIssueNum */ 1,
00247   /* isSingleIssue */ false,
00248   /* breaksGroup */ false,
00249   /* numBubbles */ 0,
00250 
00251   /*numSlots*/ 4,
00252   /* feasibleSlots[] */ { 0, 1, 2, 3 },
00253 
00254   /*numEntries*/ 7,
00255   /* V[] */ {
00256     /*Cycle G */ { AllIssueSlots.rid,   0, 1 },
00257                  { FPMIssueSlots.rid,   0, 1 },
00258     /*Cycle E */ { FPRegReadPorts.rid,  1, 1 },
00259     /*Cycle C */ { FPMAluC1.rid,        2, 1 },
00260     /*Cycle N1*/ { FPMAluC2.rid,        3, 1 },
00261     /*Cycle N1*/ { FPMAluC3.rid,        4, 1 },
00262     /*Cycle N1*/
00263     /*Cycle W */ { FPRegWritePorts.rid, 6, 1 }
00264   }
00265 };
00266 
00267 static const InstrClassRUsage FPAClassRUsage = {
00268   SPARC_FPA,
00269   /*totCycles*/ 7,
00270 
00271   /* maxIssueNum */ 1,
00272   /* isSingleIssue */ false,
00273   /* breaksGroup */ false,
00274   /* numBubbles */ 0,
00275 
00276   /*numSlots*/ 4,
00277   /* feasibleSlots[] */ { 0, 1, 2, 3 },
00278 
00279   /*numEntries*/ 7,
00280   /* V[] */ {
00281     /*Cycle G */ { AllIssueSlots.rid,   0, 1 },
00282                  { FPAIssueSlots.rid,   0, 1 },
00283     /*Cycle E */ { FPRegReadPorts.rid,  1, 1 },
00284     /*Cycle C */ { FPAAluC1.rid,        2, 1 },
00285     /*Cycle N1*/ { FPAAluC2.rid,        3, 1 },
00286     /*Cycle N1*/ { FPAAluC3.rid,        4, 1 },
00287     /*Cycle N1*/
00288     /*Cycle W */ { FPRegWritePorts.rid, 6, 1 }
00289   }
00290 };
00291 
00292 static const InstrClassRUsage LDClassRUsage = {
00293   SPARC_LD,
00294   /*totCycles*/ 7,
00295 
00296   /* maxIssueNum */ 1,
00297   /* isSingleIssue */ false,
00298   /* breaksGroup */ false,
00299   /* numBubbles */ 0,
00300 
00301   /*numSlots*/ 3,
00302   /* feasibleSlots[] */ { 0, 1, 2, },
00303 
00304   /*numEntries*/ 6,
00305   /* V[] */ {
00306     /*Cycle G */ { AllIssueSlots.rid,    0, 1 },
00307                  { First3IssueSlots.rid, 0, 1 },
00308                  { LSIssueSlots.rid,     0, 1 },
00309     /*Cycle E */ { LSAluC1.rid,          1, 1 },
00310     /*Cycle C */ { LSAluC2.rid,          2, 1 },
00311                  { LdReturn.rid,         2, 1 },
00312     /*Cycle N1*/
00313     /*Cycle N1*/
00314     /*Cycle N1*/
00315     /*Cycle W */ { IRegWritePorts.rid,   6, 1 }
00316   }
00317 };
00318 
00319 static const InstrClassRUsage STClassRUsage = {
00320   SPARC_ST,
00321   /*totCycles*/ 7,
00322 
00323   /* maxIssueNum */ 1,
00324   /* isSingleIssue */ false,
00325   /* breaksGroup */ false,
00326   /* numBubbles */ 0,
00327 
00328   /*numSlots*/ 3,
00329   /* feasibleSlots[] */ { 0, 1, 2 },
00330 
00331   /*numEntries*/ 4,
00332   /* V[] */ {
00333     /*Cycle G */ { AllIssueSlots.rid,    0, 1 },
00334                  { First3IssueSlots.rid, 0, 1 },
00335                  { LSIssueSlots.rid,     0, 1 },
00336     /*Cycle E */ { LSAluC1.rid,          1, 1 },
00337     /*Cycle C */ { LSAluC2.rid,          2, 1 }
00338     /*Cycle N1*/
00339     /*Cycle N1*/
00340     /*Cycle N1*/
00341     /*Cycle W */
00342   }
00343 };
00344 
00345 static const InstrClassRUsage CTIClassRUsage = {
00346   SPARC_CTI,
00347   /*totCycles*/ 7,
00348 
00349   /* maxIssueNum */ 1,
00350   /* isSingleIssue */ false,
00351   /* breaksGroup */ false,
00352   /* numBubbles */ 0,
00353 
00354   /*numSlots*/ 4,
00355   /* feasibleSlots[] */ { 0, 1, 2, 3 },
00356 
00357   /*numEntries*/ 4,
00358   /* V[] */ {
00359     /*Cycle G */ { AllIssueSlots.rid,    0, 1 },
00360                  { CTIIssueSlots.rid,    0, 1 },
00361     /*Cycle E */ { IAlu0.rid,            1, 1 },
00362     /*Cycles E-C */ { CTIDelayCycle.rid, 1, 2 }
00363     /*Cycle C */
00364     /*Cycle N1*/
00365     /*Cycle N1*/
00366     /*Cycle N1*/
00367     /*Cycle W */
00368   }
00369 };
00370 
00371 static const InstrClassRUsage SingleClassRUsage = {
00372   SPARC_SINGLE,
00373   /*totCycles*/ 7,
00374 
00375   /* maxIssueNum */ 1,
00376   /* isSingleIssue */ true,
00377   /* breaksGroup */ false,
00378   /* numBubbles */ 0,
00379 
00380   /*numSlots*/ 1,
00381   /* feasibleSlots[] */ { 0 },
00382 
00383   /*numEntries*/ 5,
00384   /* V[] */ {
00385     /*Cycle G */ { AllIssueSlots.rid,    0, 1 },
00386                  { AllIssueSlots.rid,    0, 1 },
00387                  { AllIssueSlots.rid,    0, 1 },
00388                  { AllIssueSlots.rid,    0, 1 },
00389     /*Cycle E */ { IAlu0.rid,            1, 1 }
00390     /*Cycle C */
00391     /*Cycle N1*/
00392     /*Cycle N1*/
00393     /*Cycle N1*/
00394     /*Cycle W */
00395   }
00396 };
00397 
00398 
00399 static const InstrClassRUsage SparcV9RUsageDesc[] = {
00400   NoneClassRUsage,
00401   IEUNClassRUsage,
00402   IEU0ClassRUsage,
00403   IEU1ClassRUsage,
00404   FPMClassRUsage,
00405   FPAClassRUsage,
00406   CTIClassRUsage,
00407   LDClassRUsage,
00408   STClassRUsage,
00409   SingleClassRUsage
00410 };
00411 
00412 
00413 
00414 //---------------------------------------------------------------------------
00415 // const InstrIssueDelta  SparcV9InstrIssueDeltas[]
00416 //
00417 // Purpose:
00418 //   Changes to issue restrictions information in InstrClassRUsage for
00419 //   instructions that differ from other instructions in their class.
00420 //---------------------------------------------------------------------------
00421 
00422 static const InstrIssueDelta  SparcV9InstrIssueDeltas[] = {
00423 
00424   // opCode,  isSingleIssue,  breaksGroup,  numBubbles
00425 
00426                                 // Special cases for single-issue only
00427                                 // Other single issue cases are below.
00428 //{ V9::LDDA,           true,   true,   0 },
00429 //{ V9::STDA,           true,   true,   0 },
00430 //{ V9::LDDF,           true,   true,   0 },
00431 //{ V9::LDDFA,          true,   true,   0 },
00432   { V9::ADDCr,          true,   true,   0 },
00433   { V9::ADDCi,          true,   true,   0 },
00434   { V9::ADDCccr,        true,   true,   0 },
00435   { V9::ADDCcci,        true,   true,   0 },
00436   { V9::SUBCr,          true,   true,   0 },
00437   { V9::SUBCi,          true,   true,   0 },
00438   { V9::SUBCccr,        true,   true,   0 },
00439   { V9::SUBCcci,        true,   true,   0 },
00440 //{ V9::LDSTUB,         true,   true,   0 },
00441 //{ V9::SWAP,           true,   true,   0 },
00442 //{ V9::SWAPA,          true,   true,   0 },
00443 //{ V9::CAS,            true,   true,   0 },
00444 //{ V9::CASA,           true,   true,   0 },
00445 //{ V9::CASX,           true,   true,   0 },
00446 //{ V9::CASXA,          true,   true,   0 },
00447 //{ V9::LDFSR,          true,   true,   0 },
00448 //{ V9::LDFSRA,         true,   true,   0 },
00449 //{ V9::LDXFSR,         true,   true,   0 },
00450 //{ V9::LDXFSRA,        true,   true,   0 },
00451 //{ V9::STFSR,          true,   true,   0 },
00452 //{ V9::STFSRA,         true,   true,   0 },
00453 //{ V9::STXFSR,         true,   true,   0 },
00454 //{ V9::STXFSRA,        true,   true,   0 },
00455 //{ V9::SAVED,          true,   true,   0 },
00456 //{ V9::RESTORED,       true,   true,   0 },
00457 //{ V9::FLUSH,          true,   true,   9 },
00458 //{ V9::FLUSHW,         true,   true,   9 },
00459 //{ V9::ALIGNADDR,      true,   true,   0 },
00460 //{ V9::DONE,           true,   true,   0 },
00461 //{ V9::RETRY,          true,   true,   0 },
00462 //{ V9::TCC,            true,   true,   0 },
00463 //{ V9::SHUTDOWN,       true,   true,   0 },
00464 
00465                                 // Special cases for breaking group *before*
00466                                 // CURRENTLY NOT SUPPORTED!
00467   { V9::CALL,           false,  false,  0 },
00468   { V9::JMPLCALLr,      false,  false,  0 },
00469   { V9::JMPLCALLi,      false,  false,  0 },
00470   { V9::JMPLRETr,       false,  false,  0 },
00471   { V9::JMPLRETi,       false,  false,  0 },
00472 
00473                                 // Special cases for breaking the group *after*
00474   { V9::MULXr,          true,   true,   (4+34)/2 },
00475   { V9::MULXi,          true,   true,   (4+34)/2 },
00476   { V9::FDIVS,          false,  true,   0 },
00477   { V9::FDIVD,          false,  true,   0 },
00478   { V9::FDIVQ,          false,  true,   0 },
00479   { V9::FSQRTS,         false,  true,   0 },
00480   { V9::FSQRTD,         false,  true,   0 },
00481   { V9::FSQRTQ,         false,  true,   0 },
00482 //{ V9::FCMP{LE,GT,NE,EQ}, false, true, 0 },
00483 
00484                                 // Instructions that introduce bubbles
00485 //{ V9::MULScc,         true,   true,   2 },
00486 //{ V9::SMULcc,         true,   true,   (4+18)/2 },
00487 //{ V9::UMULcc,         true,   true,   (4+19)/2 },
00488   { V9::SDIVXr,         true,   true,   68 },
00489   { V9::SDIVXi,         true,   true,   68 },
00490   { V9::UDIVXr,         true,   true,   68 },
00491   { V9::UDIVXi,         true,   true,   68 },
00492 //{ V9::SDIVcc,         true,   true,   36 },
00493 //{ V9::UDIVcc,         true,   true,   37 },
00494   { V9::WRCCRr,         true,   true,   4 },
00495   { V9::WRCCRi,         true,   true,   4 },
00496 //{ V9::WRPR,           true,   true,   4 },
00497 //{ V9::RDCCR,          true,   true,   0 }, // no bubbles after, but see below
00498 //{ V9::RDPR,           true,   true,   0 },
00499 };
00500 
00501 
00502 
00503 
00504 //---------------------------------------------------------------------------
00505 // const InstrRUsageDelta SparcV9InstrUsageDeltas[]
00506 //
00507 // Purpose:
00508 //   Changes to resource usage information in InstrClassRUsage for
00509 //   instructions that differ from other instructions in their class.
00510 //---------------------------------------------------------------------------
00511 
00512 static const InstrRUsageDelta SparcV9InstrUsageDeltas[] = {
00513 
00514   // MachineOpCode, Resource, Start cycle, Num cycles
00515 
00516   //
00517   // JMPL counts as a load/store instruction for issue!
00518   //
00519   { V9::JMPLCALLr, LSIssueSlots.rid,  0,  1 },
00520   { V9::JMPLCALLi, LSIssueSlots.rid,  0,  1 },
00521   { V9::JMPLRETr,  LSIssueSlots.rid,  0,  1 },
00522   { V9::JMPLRETi,  LSIssueSlots.rid,  0,  1 },
00523 
00524   //
00525   // Many instructions cannot issue for the next 2 cycles after an FCMP
00526   // We model that with a fake resource FCMPDelayCycle.
00527   //
00528   { V9::FCMPS,    FCMPDelayCycle.rid, 1, 3 },
00529   { V9::FCMPD,    FCMPDelayCycle.rid, 1, 3 },
00530   { V9::FCMPQ,    FCMPDelayCycle.rid, 1, 3 },
00531 
00532   { V9::MULXr,     FCMPDelayCycle.rid, 1, 1 },
00533   { V9::MULXi,     FCMPDelayCycle.rid, 1, 1 },
00534   { V9::SDIVXr,    FCMPDelayCycle.rid, 1, 1 },
00535   { V9::SDIVXi,    FCMPDelayCycle.rid, 1, 1 },
00536   { V9::UDIVXr,    FCMPDelayCycle.rid, 1, 1 },
00537   { V9::UDIVXi,    FCMPDelayCycle.rid, 1, 1 },
00538 //{ V9::SMULcc,   FCMPDelayCycle.rid, 1, 1 },
00539 //{ V9::UMULcc,   FCMPDelayCycle.rid, 1, 1 },
00540 //{ V9::SDIVcc,   FCMPDelayCycle.rid, 1, 1 },
00541 //{ V9::UDIVcc,   FCMPDelayCycle.rid, 1, 1 },
00542   { V9::STDFr,    FCMPDelayCycle.rid, 1, 1 },
00543   { V9::STDFi,    FCMPDelayCycle.rid, 1, 1 },
00544   { V9::FMOVRSZ,  FCMPDelayCycle.rid, 1, 1 },
00545   { V9::FMOVRSLEZ,FCMPDelayCycle.rid, 1, 1 },
00546   { V9::FMOVRSLZ, FCMPDelayCycle.rid, 1, 1 },
00547   { V9::FMOVRSNZ, FCMPDelayCycle.rid, 1, 1 },
00548   { V9::FMOVRSGZ, FCMPDelayCycle.rid, 1, 1 },
00549   { V9::FMOVRSGEZ,FCMPDelayCycle.rid, 1, 1 },
00550 
00551   //
00552   // Some instructions are stalled in the GROUP stage if a CTI is in
00553   // the E or C stage.  We model that with a fake resource CTIDelayCycle.
00554   //
00555   { V9::LDDFr,    CTIDelayCycle.rid,  1, 1 },
00556   { V9::LDDFi,    CTIDelayCycle.rid,  1, 1 },
00557 //{ V9::LDDA,     CTIDelayCycle.rid,  1, 1 },
00558 //{ V9::LDDSTUB,  CTIDelayCycle.rid,  1, 1 },
00559 //{ V9::LDDSTUBA, CTIDelayCycle.rid,  1, 1 },
00560 //{ V9::SWAP,     CTIDelayCycle.rid,  1, 1 },
00561 //{ V9::SWAPA,    CTIDelayCycle.rid,  1, 1 },
00562 //{ V9::CAS,      CTIDelayCycle.rid,  1, 1 },
00563 //{ V9::CASA,     CTIDelayCycle.rid,  1, 1 },
00564 //{ V9::CASX,     CTIDelayCycle.rid,  1, 1 },
00565 //{ V9::CASXA,    CTIDelayCycle.rid,  1, 1 },
00566 
00567   //
00568   // Signed int loads of less than dword size return data in cycle N1 (not C)
00569   // and put all loads in consecutive cycles into delayed load return mode.
00570   //
00571   { V9::LDSBr,    LdReturn.rid,  2, -1 },
00572   { V9::LDSBr,    LdReturn.rid,  3,  1 },
00573   { V9::LDSBi,    LdReturn.rid,  2, -1 },
00574   { V9::LDSBi,    LdReturn.rid,  3,  1 },
00575 
00576   { V9::LDSHr,    LdReturn.rid,  2, -1 },
00577   { V9::LDSHr,    LdReturn.rid,  3,  1 },
00578   { V9::LDSHi,    LdReturn.rid,  2, -1 },
00579   { V9::LDSHi,    LdReturn.rid,  3,  1 },
00580 
00581   { V9::LDSWr,    LdReturn.rid,  2, -1 },
00582   { V9::LDSWr,    LdReturn.rid,  3,  1 },
00583   { V9::LDSWi,    LdReturn.rid,  2, -1 },
00584   { V9::LDSWi,    LdReturn.rid,  3,  1 },
00585 
00586   //
00587   // RDPR from certain registers and RD from any register are not dispatchable
00588   // until four clocks after they reach the head of the instr. buffer.
00589   // Together with their single-issue requirement, this means all four issue
00590   // slots are effectively blocked for those cycles, plus the issue cycle.
00591   // This does not increase the latency of the instruction itself.
00592   //
00593   { V9::RDCCR,   AllIssueSlots.rid,     0,  5 },
00594   { V9::RDCCR,   AllIssueSlots.rid,     0,  5 },
00595   { V9::RDCCR,   AllIssueSlots.rid,     0,  5 },
00596   { V9::RDCCR,   AllIssueSlots.rid,     0,  5 },
00597 
00598 #undef EXPLICIT_BUBBLES_NEEDED
00599 #ifdef EXPLICIT_BUBBLES_NEEDED
00600   //
00601   // MULScc inserts one bubble.
00602   // This means it breaks the current group (captured in UltraSparcV9SchedInfo)
00603   // *and occupies all issue slots for the next cycle
00604   //
00605 //{ V9::MULScc,  AllIssueSlots.rid, 2, 2-1 },
00606 //{ V9::MULScc,  AllIssueSlots.rid, 2, 2-1 },
00607 //{ V9::MULScc,  AllIssueSlots.rid, 2, 2-1 },
00608 //{ V9::MULScc,  AllIssueSlots.rid, 2, 2-1 },
00609 
00610   //
00611   // SMULcc inserts between 4 and 18 bubbles, depending on #leading 0s in rs1.
00612   // We just model this with a simple average.
00613   //
00614 //{ V9::SMULcc,  AllIssueSlots.rid, 2, ((4+18)/2)-1 },
00615 //{ V9::SMULcc,  AllIssueSlots.rid, 2, ((4+18)/2)-1 },
00616 //{ V9::SMULcc,  AllIssueSlots.rid, 2, ((4+18)/2)-1 },
00617 //{ V9::SMULcc,  AllIssueSlots.rid, 2, ((4+18)/2)-1 },
00618 
00619   // SMULcc inserts between 4 and 19 bubbles, depending on #leading 0s in rs1.
00620 //{ V9::UMULcc,  AllIssueSlots.rid, 2, ((4+19)/2)-1 },
00621 //{ V9::UMULcc,  AllIssueSlots.rid, 2, ((4+19)/2)-1 },
00622 //{ V9::UMULcc,  AllIssueSlots.rid, 2, ((4+19)/2)-1 },
00623 //{ V9::UMULcc,  AllIssueSlots.rid, 2, ((4+19)/2)-1 },
00624 
00625   //
00626   // MULX inserts between 4 and 34 bubbles, depending on #leading 0s in rs1.
00627   //
00628   { V9::MULX,    AllIssueSlots.rid, 2, ((4+34)/2)-1 },
00629   { V9::MULX,    AllIssueSlots.rid, 2, ((4+34)/2)-1 },
00630   { V9::MULX,    AllIssueSlots.rid, 2, ((4+34)/2)-1 },
00631   { V9::MULX,    AllIssueSlots.rid, 2, ((4+34)/2)-1 },
00632 
00633   //
00634   // SDIVcc inserts 36 bubbles.
00635   //
00636 //{ V9::SDIVcc,  AllIssueSlots.rid, 2, 36-1 },
00637 //{ V9::SDIVcc,  AllIssueSlots.rid, 2, 36-1 },
00638 //{ V9::SDIVcc,  AllIssueSlots.rid, 2, 36-1 },
00639 //{ V9::SDIVcc,  AllIssueSlots.rid, 2, 36-1 },
00640 
00641   // UDIVcc inserts 37 bubbles.
00642 //{ V9::UDIVcc,  AllIssueSlots.rid, 2, 37-1 },
00643 //{ V9::UDIVcc,  AllIssueSlots.rid, 2, 37-1 },
00644 //{ V9::UDIVcc,  AllIssueSlots.rid, 2, 37-1 },
00645 //{ V9::UDIVcc,  AllIssueSlots.rid, 2, 37-1 },
00646 
00647   //
00648   // SDIVX inserts 68 bubbles.
00649   //
00650   { V9::SDIVX,   AllIssueSlots.rid, 2, 68-1 },
00651   { V9::SDIVX,   AllIssueSlots.rid, 2, 68-1 },
00652   { V9::SDIVX,   AllIssueSlots.rid, 2, 68-1 },
00653   { V9::SDIVX,   AllIssueSlots.rid, 2, 68-1 },
00654 
00655   //
00656   // UDIVX inserts 68 bubbles.
00657   //
00658   { V9::UDIVX,   AllIssueSlots.rid, 2, 68-1 },
00659   { V9::UDIVX,   AllIssueSlots.rid, 2, 68-1 },
00660   { V9::UDIVX,   AllIssueSlots.rid, 2, 68-1 },
00661   { V9::UDIVX,   AllIssueSlots.rid, 2, 68-1 },
00662 
00663   //
00664   // WR inserts 4 bubbles.
00665   //
00666 //{ V9::WR,     AllIssueSlots.rid, 2, 68-1 },
00667 //{ V9::WR,     AllIssueSlots.rid, 2, 68-1 },
00668 //{ V9::WR,     AllIssueSlots.rid, 2, 68-1 },
00669 //{ V9::WR,     AllIssueSlots.rid, 2, 68-1 },
00670 
00671   //
00672   // WRPR inserts 4 bubbles.
00673   //
00674 //{ V9::WRPR,   AllIssueSlots.rid, 2, 68-1 },
00675 //{ V9::WRPR,   AllIssueSlots.rid, 2, 68-1 },
00676 //{ V9::WRPR,   AllIssueSlots.rid, 2, 68-1 },
00677 //{ V9::WRPR,   AllIssueSlots.rid, 2, 68-1 },
00678 
00679   //
00680   // DONE inserts 9 bubbles.
00681   //
00682 //{ V9::DONE,   AllIssueSlots.rid, 2, 9-1 },
00683 //{ V9::DONE,   AllIssueSlots.rid, 2, 9-1 },
00684 //{ V9::DONE,   AllIssueSlots.rid, 2, 9-1 },
00685 //{ V9::DONE,   AllIssueSlots.rid, 2, 9-1 },
00686 
00687   //
00688   // RETRY inserts 9 bubbles.
00689   //
00690 //{ V9::RETRY,   AllIssueSlots.rid, 2, 9-1 },
00691 //{ V9::RETRY,   AllIssueSlots.rid, 2, 9-1 },
00692 //{ V9::RETRY,   AllIssueSlots.rid, 2, 9-1 },
00693 //{ V9::RETRY,   AllIssueSlots.rid, 2, 9-1 },
00694 
00695 #endif  /*EXPLICIT_BUBBLES_NEEDED */
00696 };
00697 
00698 // Additional delays to be captured in code:
00699 // 1. RDPR from several state registers (page 349)
00700 // 2. RD   from *any* register (page 349)
00701 // 3. Writes to TICK, PSTATE, TL registers and FLUSH{W} instr (page 349)
00702 // 4. Integer store can be in same group as instr producing value to store.
00703 // 5. BICC and BPICC can be in the same group as instr producing CC (pg 350)
00704 // 6. FMOVr cannot be in the same or next group as an IEU instr (pg 351).
00705 // 7. The second instr. of a CTI group inserts 9 bubbles (pg 351)
00706 // 8. WR{PR}, SVAE, SAVED, RESTORE, RESTORED, RETURN, RETRY, and DONE that
00707 //    follow an annulling branch cannot be issued in the same group or in
00708 //    the 3 groups following the branch.
00709 // 9. A predicted annulled load does not stall dependent instructions.
00710 //    Other annulled delay slot instructions *do* stall dependents, so
00711 //    nothing special needs to be done for them during scheduling.
00712 //10. Do not put a load use that may be annulled in the same group as the
00713 //    branch.  The group will stall until the load returns.
00714 //11. Single-prec. FP loads lock 2 registers, for dependency checking.
00715 //
00716 //
00717 // Additional delays we cannot or will not capture:
00718 // 1. If DCTI is last word of cache line, it is delayed until next line can be
00719 //    fetched.  Also, other DCTI alignment-related delays (pg 352)
00720 // 2. Load-after-store is delayed by 7 extra cycles if load hits in D-Cache.
00721 //    Also, several other store-load and load-store conflicts (pg 358)
00722 // 3. MEMBAR, LD{X}FSR, LDD{A} and a bunch of other load stalls (pg 358)
00723 // 4. There can be at most 8 outstanding buffered store instructions
00724 //     (including some others like MEMBAR, LDSTUB, CAS{AX}, and FLUSH)
00725 
00726 
00727 
00728 //---------------------------------------------------------------------------
00729 // class SparcV9SchedInfo
00730 //
00731 // Purpose:
00732 //   Scheduling information for the UltraSPARC.
00733 //   Primarily just initializes machine-dependent parameters in
00734 //   class TargetSchedInfo.
00735 //---------------------------------------------------------------------------
00736 
00737 /*ctor*/
00738 SparcV9SchedInfo::SparcV9SchedInfo(const TargetMachine& tgt)
00739   : TargetSchedInfo(tgt,
00740                      (unsigned int) SPARC_NUM_SCHED_CLASSES,
00741                      SparcV9RUsageDesc,
00742                      SparcV9InstrUsageDeltas,
00743                      SparcV9InstrIssueDeltas,
00744                      sizeof(SparcV9InstrUsageDeltas)/sizeof(InstrRUsageDelta),
00745                      sizeof(SparcV9InstrIssueDeltas)/sizeof(InstrIssueDelta))
00746 {
00747   maxNumIssueTotal = 4;
00748   longestIssueConflict = 0;             // computed from issuesGaps[]
00749 
00750   // must be called after above parameters are initialized.
00751   initializeResources();
00752 }
00753 
00754 void
00755 SparcV9SchedInfo::initializeResources()
00756 {
00757   // Compute TargetSchedInfo::instrRUsages and TargetSchedInfo::issueGaps
00758   TargetSchedInfo::initializeResources();
00759 
00760   // Machine-dependent fixups go here.  None for now.
00761 }