LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

X86PeepholeOpt.cpp

Go to the documentation of this file.
00001 //===-- X86PeepholeOpt.cpp - X86 Peephole Optimizer -----------------------===//
00002 // 
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 // 
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains a peephole optimizer for the X86.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "X86.h"
00015 #include "llvm/CodeGen/MachineFunctionPass.h"
00016 #include "llvm/CodeGen/MachineInstrBuilder.h"
00017 #include "llvm/Target/MRegisterInfo.h"
00018 #include "llvm/Target/TargetInstrInfo.h"
00019 #include "llvm/Target/TargetMachine.h"
00020 #include "llvm/ADT/Statistic.h"
00021 #include "llvm/ADT/STLExtras.h"
00022 
00023 using namespace llvm;
00024 
00025 namespace {
00026   Statistic<> NumPHOpts("x86-peephole",
00027                         "Number of peephole optimization performed");
00028   Statistic<> NumPHMoves("x86-peephole", "Number of peephole moves folded");
00029   struct PH : public MachineFunctionPass {
00030     virtual bool runOnMachineFunction(MachineFunction &MF);
00031 
00032     bool PeepholeOptimize(MachineBasicBlock &MBB,
00033         MachineBasicBlock::iterator &I);
00034 
00035     virtual const char *getPassName() const { return "X86 Peephole Optimizer"; }
00036   };
00037 }
00038 
00039 FunctionPass *llvm::createX86PeepholeOptimizerPass() { return new PH(); }
00040 
00041 bool PH::runOnMachineFunction(MachineFunction &MF) {
00042   bool Changed = false;
00043 
00044   for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI != E; ++BI)
00045     for (MachineBasicBlock::iterator I = BI->begin(); I != BI->end(); )
00046       if (PeepholeOptimize(*BI, I)) {
00047   Changed = true;
00048         ++NumPHOpts;
00049       } else
00050   ++I;
00051 
00052   return Changed;
00053 }
00054 
00055 
00056 bool PH::PeepholeOptimize(MachineBasicBlock &MBB,
00057         MachineBasicBlock::iterator &I) {
00058   assert(I != MBB.end());
00059   MachineBasicBlock::iterator NextI = next(I);
00060 
00061   MachineInstr *MI = I;
00062   MachineInstr *Next = (NextI != MBB.end()) ? &*NextI : (MachineInstr*)0;
00063   unsigned Size = 0;
00064   switch (MI->getOpcode()) {
00065   case X86::MOV8rr:
00066   case X86::MOV16rr:
00067   case X86::MOV32rr:   // Destroy X = X copies...
00068     if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
00069       I = MBB.erase(I);
00070       return true;
00071     }
00072     return false;
00073 
00074     // A large number of X86 instructions have forms which take an 8-bit
00075     // immediate despite the fact that the operands are 16 or 32 bits.  Because
00076     // this can save three bytes of code size (and icache space), we want to
00077     // shrink them if possible.
00078   case X86::IMUL16rri: case X86::IMUL32rri:
00079     assert(MI->getNumOperands() == 3 && "These should all have 3 operands!");
00080     if (MI->getOperand(2).isImmediate()) {
00081       int Val = MI->getOperand(2).getImmedValue();
00082       // If the value is the same when signed extended from 8 bits...
00083       if (Val == (signed int)(signed char)Val) {
00084         unsigned Opcode;
00085         switch (MI->getOpcode()) {
00086         default: assert(0 && "Unknown opcode value!");
00087         case X86::IMUL16rri: Opcode = X86::IMUL16rri8; break;
00088         case X86::IMUL32rri: Opcode = X86::IMUL32rri8; break;
00089         }
00090         unsigned R0 = MI->getOperand(0).getReg();
00091         unsigned R1 = MI->getOperand(1).getReg();
00092         I = MBB.insert(MBB.erase(I),
00093                        BuildMI(Opcode, 2, R0).addReg(R1).addZImm((char)Val));
00094         return true;
00095       }
00096     }
00097     return false;
00098 
00099 #if 0
00100   case X86::IMUL16rmi: case X86::IMUL32rmi:
00101     assert(MI->getNumOperands() == 6 && "These should all have 6 operands!");
00102     if (MI->getOperand(5).isImmediate()) {
00103       int Val = MI->getOperand(5).getImmedValue();
00104       // If the value is the same when signed extended from 8 bits...
00105       if (Val == (signed int)(signed char)Val) {
00106         unsigned Opcode;
00107         switch (MI->getOpcode()) {
00108         default: assert(0 && "Unknown opcode value!");
00109         case X86::IMUL16rmi: Opcode = X86::IMUL16rmi8; break;
00110         case X86::IMUL32rmi: Opcode = X86::IMUL32rmi8; break;
00111         }
00112         unsigned R0 = MI->getOperand(0).getReg();
00113         unsigned R1 = MI->getOperand(1).getReg();
00114         unsigned Scale = MI->getOperand(2).getImmedValue();
00115         unsigned R2 = MI->getOperand(3).getReg();
00116         unsigned Offset = MI->getOperand(4).getImmedValue();
00117         I = MBB.insert(MBB.erase(I),
00118                        BuildMI(Opcode, 5, R0).addReg(R1).addZImm(Scale).
00119                              addReg(R2).addSImm(Offset).addZImm((char)Val));
00120         return true;
00121       }
00122     }
00123     return false;
00124 #endif
00125 
00126   case X86::ADD16ri:  case X86::ADD32ri:  case X86::ADC32ri:
00127   case X86::SUB16ri:  case X86::SUB32ri:
00128   case X86::SBB16ri:  case X86::SBB32ri:
00129   case X86::AND16ri:  case X86::AND32ri:
00130   case X86::OR16ri:   case X86::OR32ri:
00131   case X86::XOR16ri:  case X86::XOR32ri:
00132     assert(MI->getNumOperands() == 2 && "These should all have 2 operands!");
00133     if (MI->getOperand(1).isImmediate()) {
00134       int Val = MI->getOperand(1).getImmedValue();
00135       // If the value is the same when signed extended from 8 bits...
00136       if (Val == (signed int)(signed char)Val) {
00137         unsigned Opcode;
00138         switch (MI->getOpcode()) {
00139         default: assert(0 && "Unknown opcode value!");
00140         case X86::ADD16ri:  Opcode = X86::ADD16ri8; break;
00141         case X86::ADD32ri:  Opcode = X86::ADD32ri8; break;
00142         case X86::ADC32ri:  Opcode = X86::ADC32ri8; break;
00143         case X86::SUB16ri:  Opcode = X86::SUB16ri8; break;
00144         case X86::SUB32ri:  Opcode = X86::SUB32ri8; break;
00145         case X86::SBB16ri:  Opcode = X86::SBB16ri8; break;
00146         case X86::SBB32ri:  Opcode = X86::SBB32ri8; break;
00147         case X86::AND16ri:  Opcode = X86::AND16ri8; break;
00148         case X86::AND32ri:  Opcode = X86::AND32ri8; break;
00149         case X86::OR16ri:   Opcode = X86::OR16ri8; break;
00150         case X86::OR32ri:   Opcode = X86::OR32ri8; break;
00151         case X86::XOR16ri:  Opcode = X86::XOR16ri8; break;
00152         case X86::XOR32ri:  Opcode = X86::XOR32ri8; break;
00153         }
00154         unsigned R0 = MI->getOperand(0).getReg();
00155         I = MBB.insert(MBB.erase(I),
00156                     BuildMI(Opcode, 1, R0, MachineOperand::UseAndDef)
00157                       .addZImm((char)Val));
00158         return true;
00159       }
00160     }
00161     return false;
00162 
00163   case X86::ADD16mi:  case X86::ADD32mi:  case X86::ADC32mi:
00164   case X86::SUB16mi:  case X86::SUB32mi:
00165   case X86::SBB16mi:  case X86::SBB32mi:
00166   case X86::AND16mi:  case X86::AND32mi:
00167   case X86::OR16mi:  case X86::OR32mi:
00168   case X86::XOR16mi:  case X86::XOR32mi:
00169     assert(MI->getNumOperands() == 5 && "These should all have 5 operands!");
00170     if (MI->getOperand(4).isImmediate()) {
00171       int Val = MI->getOperand(4).getImmedValue();
00172       // If the value is the same when signed extended from 8 bits...
00173       if (Val == (signed int)(signed char)Val) {
00174         unsigned Opcode;
00175         switch (MI->getOpcode()) {
00176         default: assert(0 && "Unknown opcode value!");
00177         case X86::ADD16mi:  Opcode = X86::ADD16mi8; break;
00178         case X86::ADD32mi:  Opcode = X86::ADD32mi8; break;
00179         case X86::ADC32mi:  Opcode = X86::ADC32mi8; break;
00180         case X86::SUB16mi:  Opcode = X86::SUB16mi8; break;
00181         case X86::SUB32mi:  Opcode = X86::SUB32mi8; break;
00182         case X86::SBB16mi:  Opcode = X86::SBB16mi8; break;
00183         case X86::SBB32mi:  Opcode = X86::SBB32mi8; break;
00184         case X86::AND16mi:  Opcode = X86::AND16mi8; break;
00185         case X86::AND32mi:  Opcode = X86::AND32mi8; break;
00186         case X86::OR16mi:   Opcode = X86::OR16mi8; break;
00187         case X86::OR32mi:   Opcode = X86::OR32mi8; break;
00188         case X86::XOR16mi:  Opcode = X86::XOR16mi8; break;
00189         case X86::XOR32mi:  Opcode = X86::XOR32mi8; break;
00190         }
00191         unsigned R0 = MI->getOperand(0).getReg();
00192         unsigned Scale = MI->getOperand(1).getImmedValue();
00193         unsigned R1 = MI->getOperand(2).getReg();
00194         unsigned Offset = MI->getOperand(3).getImmedValue();
00195         I = MBB.insert(MBB.erase(I),
00196                        BuildMI(Opcode, 5).addReg(R0).addZImm(Scale).
00197                              addReg(R1).addSImm(Offset).addZImm((char)Val));
00198         return true;
00199       }
00200     }
00201     return false;
00202 
00203 #if 0
00204   case X86::MOV32ri: Size++;
00205   case X86::MOV16ri: Size++;
00206   case X86::MOV8ri:
00207     // FIXME: We can only do this transformation if we know that flags are not
00208     // used here, because XOR clobbers the flags!
00209     if (MI->getOperand(1).isImmediate()) {         // avoid mov EAX, <value>
00210       int Val = MI->getOperand(1).getImmedValue();
00211       if (Val == 0) {                              // mov EAX, 0 -> xor EAX, EAX
00212   static const unsigned Opcode[] ={X86::XOR8rr,X86::XOR16rr,X86::XOR32rr};
00213   unsigned Reg = MI->getOperand(0).getReg();
00214   I = MBB.insert(MBB.erase(I),
00215                        BuildMI(Opcode[Size], 2, Reg).addReg(Reg).addReg(Reg));
00216   return true;
00217       } else if (Val == -1) {                     // mov EAX, -1 -> or EAX, -1
00218   // TODO: 'or Reg, -1' has a smaller encoding than 'mov Reg, -1'
00219       }
00220     }
00221     return false;
00222 #endif
00223   case X86::BSWAP32r:        // Change bswap EAX, bswap EAX into nothing
00224     if (Next->getOpcode() == X86::BSWAP32r &&
00225   MI->getOperand(0).getReg() == Next->getOperand(0).getReg()) {
00226       I = MBB.erase(MBB.erase(I));
00227       return true;
00228     }
00229     return false;
00230   default:
00231     return false;
00232   }
00233 }
00234 
00235 namespace {
00236   class UseDefChains : public MachineFunctionPass {
00237     std::vector<MachineInstr*> DefiningInst;
00238   public:
00239     // getDefinition - Return the machine instruction that defines the specified
00240     // SSA virtual register.
00241     MachineInstr *getDefinition(unsigned Reg) {
00242       assert(MRegisterInfo::isVirtualRegister(Reg) &&
00243              "use-def chains only exist for SSA registers!");
00244       assert(Reg - MRegisterInfo::FirstVirtualRegister < DefiningInst.size() &&
00245              "Unknown register number!");
00246       assert(DefiningInst[Reg-MRegisterInfo::FirstVirtualRegister] &&
00247              "Unknown register number!");
00248       return DefiningInst[Reg-MRegisterInfo::FirstVirtualRegister];
00249     }
00250 
00251     // setDefinition - Update the use-def chains to indicate that MI defines
00252     // register Reg.
00253     void setDefinition(unsigned Reg, MachineInstr *MI) {
00254       if (Reg-MRegisterInfo::FirstVirtualRegister >= DefiningInst.size())
00255         DefiningInst.resize(Reg-MRegisterInfo::FirstVirtualRegister+1);
00256       DefiningInst[Reg-MRegisterInfo::FirstVirtualRegister] = MI;
00257     }
00258 
00259     // removeDefinition - Update the use-def chains to forget about Reg
00260     // entirely.
00261     void removeDefinition(unsigned Reg) {
00262       assert(getDefinition(Reg));      // Check validity
00263       DefiningInst[Reg-MRegisterInfo::FirstVirtualRegister] = 0;
00264     }
00265 
00266     virtual bool runOnMachineFunction(MachineFunction &MF) {
00267       for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI!=E; ++BI)
00268         for (MachineBasicBlock::iterator I = BI->begin(); I != BI->end(); ++I) {
00269           for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
00270             MachineOperand &MO = I->getOperand(i);
00271             if (MO.isRegister() && MO.isDef() && !MO.isUse() &&
00272                 MRegisterInfo::isVirtualRegister(MO.getReg()))
00273               setDefinition(MO.getReg(), I);
00274           }
00275         }
00276       return false;
00277     }
00278 
00279     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
00280       AU.setPreservesAll();
00281       MachineFunctionPass::getAnalysisUsage(AU);
00282     }
00283 
00284     virtual void releaseMemory() {
00285       std::vector<MachineInstr*>().swap(DefiningInst);
00286     }
00287   };
00288 
00289   RegisterAnalysis<UseDefChains> X("use-def-chains",
00290                                 "use-def chain construction for machine code");
00291 }
00292 
00293 
00294 namespace {
00295   Statistic<> NumSSAPHOpts("x86-ssa-peephole",
00296                            "Number of SSA peephole optimization performed");
00297 
00298   /// SSAPH - This pass is an X86-specific, SSA-based, peephole optimizer.  This
00299   /// pass is really a bad idea: a better instruction selector should completely
00300   /// supersume it.  However, that will take some time to develop, and the
00301   /// simple things this can do are important now.
00302   class SSAPH : public MachineFunctionPass {
00303     UseDefChains *UDC;
00304   public:
00305     virtual bool runOnMachineFunction(MachineFunction &MF);
00306 
00307     bool PeepholeOptimize(MachineBasicBlock &MBB,
00308         MachineBasicBlock::iterator &I);
00309 
00310     virtual const char *getPassName() const {
00311       return "X86 SSA-based Peephole Optimizer";
00312     }
00313 
00314     /// Propagate - Set MI[DestOpNo] = Src[SrcOpNo], optionally change the
00315     /// opcode of the instruction, then return true.
00316     bool Propagate(MachineInstr *MI, unsigned DestOpNo,
00317                    MachineInstr *Src, unsigned SrcOpNo, unsigned NewOpcode = 0){
00318       MI->getOperand(DestOpNo) = Src->getOperand(SrcOpNo);
00319       if (NewOpcode) MI->setOpcode(NewOpcode);
00320       return true;
00321     }
00322 
00323     /// OptimizeAddress - If we can fold the addressing arithmetic for this
00324     /// memory instruction into the instruction itself, do so and return true.
00325     bool OptimizeAddress(MachineInstr *MI, unsigned OpNo);
00326 
00327     /// getDefininingInst - If the specified operand is a read of an SSA
00328     /// register, return the machine instruction defining it, otherwise, return
00329     /// null.
00330     MachineInstr *getDefiningInst(MachineOperand &MO) {
00331       if (MO.isDef() || !MO.isRegister() ||
00332           !MRegisterInfo::isVirtualRegister(MO.getReg())) return 0;
00333       return UDC->getDefinition(MO.getReg());
00334     }
00335 
00336     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
00337       AU.addRequired<UseDefChains>();
00338       AU.addPreserved<UseDefChains>();
00339       MachineFunctionPass::getAnalysisUsage(AU);
00340     }
00341   };
00342 }
00343 
00344 FunctionPass *llvm::createX86SSAPeepholeOptimizerPass() { return new SSAPH(); }
00345 
00346 bool SSAPH::runOnMachineFunction(MachineFunction &MF) {
00347   bool Changed = false;
00348   bool LocalChanged;
00349 
00350   UDC = &getAnalysis<UseDefChains>();
00351 
00352   do {
00353     LocalChanged = false;
00354 
00355     for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI != E; ++BI)
00356       for (MachineBasicBlock::iterator I = BI->begin(); I != BI->end(); )
00357         if (PeepholeOptimize(*BI, I)) {
00358           LocalChanged = true;
00359           ++NumSSAPHOpts;
00360         } else
00361           ++I;
00362     Changed |= LocalChanged;
00363   } while (LocalChanged);
00364 
00365   return Changed;
00366 }
00367 
00368 static bool isValidScaleAmount(unsigned Scale) {
00369   return Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8;
00370 }
00371 
00372 /// OptimizeAddress - If we can fold the addressing arithmetic for this
00373 /// memory instruction into the instruction itself, do so and return true.
00374 bool SSAPH::OptimizeAddress(MachineInstr *MI, unsigned OpNo) {
00375   MachineOperand &BaseRegOp      = MI->getOperand(OpNo+0);
00376   MachineOperand &ScaleOp        = MI->getOperand(OpNo+1);
00377   MachineOperand &IndexRegOp     = MI->getOperand(OpNo+2);
00378   MachineOperand &DisplacementOp = MI->getOperand(OpNo+3);
00379 
00380   unsigned BaseReg  = BaseRegOp.hasAllocatedReg() ? BaseRegOp.getReg() : 0;
00381   unsigned Scale    = ScaleOp.getImmedValue();
00382   unsigned IndexReg = IndexRegOp.hasAllocatedReg() ? IndexRegOp.getReg() : 0;
00383 
00384   bool Changed = false;
00385 
00386   // If the base register is unset, and the index register is set with a scale
00387   // of 1, move it to be the base register.
00388   if (BaseRegOp.hasAllocatedReg() && BaseReg == 0 &&
00389       Scale == 1 && IndexReg != 0) {
00390     BaseRegOp.setReg(IndexReg);
00391     IndexRegOp.setReg(0);
00392     return true;
00393   }
00394 
00395   // Attempt to fold instructions used by the base register into the instruction
00396   if (MachineInstr *DefInst = getDefiningInst(BaseRegOp)) {
00397     switch (DefInst->getOpcode()) {
00398     case X86::MOV32ri:
00399       // If there is no displacement set for this instruction set one now.
00400       // FIXME: If we can fold two immediates together, we should do so!
00401       if (DisplacementOp.isImmediate() && !DisplacementOp.getImmedValue()) {
00402         if (DefInst->getOperand(1).isImmediate()) {
00403           BaseRegOp.setReg(0);
00404           return Propagate(MI, OpNo+3, DefInst, 1);
00405         }
00406       }
00407       break;
00408 
00409     case X86::ADD32rr:
00410       // If the source is a register-register add, and we do not yet have an
00411       // index register, fold the add into the memory address.
00412       if (IndexReg == 0) {
00413         BaseRegOp = DefInst->getOperand(1);
00414         IndexRegOp = DefInst->getOperand(2);
00415         ScaleOp.setImmedValue(1);
00416         return true;
00417       }
00418       break;
00419 
00420     case X86::SHL32ri:
00421       // If this shift could be folded into the index portion of the address if
00422       // it were the index register, move it to the index register operand now,
00423       // so it will be folded in below.
00424       if ((Scale == 1 || (IndexReg == 0 && IndexRegOp.hasAllocatedReg())) &&
00425           DefInst->getOperand(2).getImmedValue() < 4) {
00426         std::swap(BaseRegOp, IndexRegOp);
00427         ScaleOp.setImmedValue(1); Scale = 1;
00428         std::swap(IndexReg, BaseReg);
00429         Changed = true;
00430         break;
00431       }
00432     }
00433   }
00434 
00435   // Attempt to fold instructions used by the index into the instruction
00436   if (MachineInstr *DefInst = getDefiningInst(IndexRegOp)) {
00437     switch (DefInst->getOpcode()) {
00438     case X86::SHL32ri: {
00439       // Figure out what the resulting scale would be if we folded this shift.
00440       unsigned ResScale = Scale * (1 << DefInst->getOperand(2).getImmedValue());
00441       if (isValidScaleAmount(ResScale)) {
00442         IndexRegOp = DefInst->getOperand(1);
00443         ScaleOp.setImmedValue(ResScale);
00444         return true;
00445       }
00446       break;
00447     }
00448     }
00449   }
00450 
00451   return Changed;
00452 }
00453 
00454 bool SSAPH::PeepholeOptimize(MachineBasicBlock &MBB,
00455                              MachineBasicBlock::iterator &I) {
00456     MachineBasicBlock::iterator NextI = next(I);
00457 
00458   MachineInstr *MI = I;
00459   MachineInstr *Next = (NextI != MBB.end()) ? &*NextI : (MachineInstr*)0;
00460 
00461   bool Changed = false;
00462 
00463   const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo();
00464 
00465   // Scan the operands of this instruction.  If any operands are
00466   // register-register copies, replace the operand with the source.
00467   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
00468     // Is this an SSA register use?
00469     if (MachineInstr *DefInst = getDefiningInst(MI->getOperand(i))) {
00470       // If the operand is a vreg-vreg copy, it is always safe to replace the
00471       // source value with the input operand.
00472       unsigned Source, Dest;
00473       if (TII.isMoveInstr(*DefInst, Source, Dest)) {
00474         // Don't propagate physical registers into any instructions.
00475         if (DefInst->getOperand(1).isRegister() &&
00476             MRegisterInfo::isVirtualRegister(Source)) {
00477           MI->getOperand(i).setReg(Source);
00478           Changed = true;
00479           ++NumPHMoves;
00480         }
00481       }
00482     }
00483   
00484   
00485   // Perform instruction specific optimizations.
00486   switch (MI->getOpcode()) {
00487 
00488     // Register to memory stores.  Format: <base,scale,indexreg,immdisp>, srcreg
00489   case X86::MOV32mr: case X86::MOV16mr: case X86::MOV8mr:
00490   case X86::MOV32mi: case X86::MOV16mi: case X86::MOV8mi:
00491     // Check to see if we can fold the source instruction into this one...
00492     if (MachineInstr *SrcInst = getDefiningInst(MI->getOperand(4))) {
00493       switch (SrcInst->getOpcode()) {
00494         // Fold the immediate value into the store, if possible.
00495       case X86::MOV8ri:  return Propagate(MI, 4, SrcInst, 1, X86::MOV8mi);
00496       case X86::MOV16ri: return Propagate(MI, 4, SrcInst, 1, X86::MOV16mi);
00497       case X86::MOV32ri: return Propagate(MI, 4, SrcInst, 1, X86::MOV32mi);
00498       default: break;
00499       }
00500     }
00501 
00502     // If we can optimize the addressing expression, do so now.
00503     if (OptimizeAddress(MI, 0))
00504       return true;
00505     break;
00506 
00507   case X86::MOV32rm:
00508   case X86::MOV16rm:
00509   case X86::MOV8rm:
00510     // If we can optimize the addressing expression, do so now.
00511     if (OptimizeAddress(MI, 1))
00512       return true;
00513     break;
00514 
00515   default: break;
00516   }
00517 
00518   return Changed;
00519 }