LLVM API Documentation
00001 //===-- X86PeepholeOpt.cpp - X86 Peephole Optimizer -----------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains a peephole optimizer for the X86. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "X86.h" 00015 #include "llvm/CodeGen/MachineFunctionPass.h" 00016 #include "llvm/CodeGen/MachineInstrBuilder.h" 00017 #include "llvm/Target/MRegisterInfo.h" 00018 #include "llvm/Target/TargetInstrInfo.h" 00019 #include "llvm/Target/TargetMachine.h" 00020 #include "llvm/ADT/Statistic.h" 00021 #include "llvm/ADT/STLExtras.h" 00022 00023 using namespace llvm; 00024 00025 namespace { 00026 Statistic<> NumPHOpts("x86-peephole", 00027 "Number of peephole optimization performed"); 00028 Statistic<> NumPHMoves("x86-peephole", "Number of peephole moves folded"); 00029 struct PH : public MachineFunctionPass { 00030 virtual bool runOnMachineFunction(MachineFunction &MF); 00031 00032 bool PeepholeOptimize(MachineBasicBlock &MBB, 00033 MachineBasicBlock::iterator &I); 00034 00035 virtual const char *getPassName() const { return "X86 Peephole Optimizer"; } 00036 }; 00037 } 00038 00039 FunctionPass *llvm::createX86PeepholeOptimizerPass() { return new PH(); } 00040 00041 bool PH::runOnMachineFunction(MachineFunction &MF) { 00042 bool Changed = false; 00043 00044 for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI != E; ++BI) 00045 for (MachineBasicBlock::iterator I = BI->begin(); I != BI->end(); ) 00046 if (PeepholeOptimize(*BI, I)) { 00047 Changed = true; 00048 ++NumPHOpts; 00049 } else 00050 ++I; 00051 00052 return Changed; 00053 } 00054 00055 00056 bool PH::PeepholeOptimize(MachineBasicBlock &MBB, 00057 MachineBasicBlock::iterator &I) { 00058 assert(I != MBB.end()); 00059 MachineBasicBlock::iterator NextI = next(I); 00060 00061 MachineInstr *MI = I; 00062 MachineInstr *Next = (NextI != MBB.end()) ? &*NextI : (MachineInstr*)0; 00063 unsigned Size = 0; 00064 switch (MI->getOpcode()) { 00065 case X86::MOV8rr: 00066 case X86::MOV16rr: 00067 case X86::MOV32rr: // Destroy X = X copies... 00068 if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 00069 I = MBB.erase(I); 00070 return true; 00071 } 00072 return false; 00073 00074 // A large number of X86 instructions have forms which take an 8-bit 00075 // immediate despite the fact that the operands are 16 or 32 bits. Because 00076 // this can save three bytes of code size (and icache space), we want to 00077 // shrink them if possible. 00078 case X86::IMUL16rri: case X86::IMUL32rri: 00079 assert(MI->getNumOperands() == 3 && "These should all have 3 operands!"); 00080 if (MI->getOperand(2).isImmediate()) { 00081 int Val = MI->getOperand(2).getImmedValue(); 00082 // If the value is the same when signed extended from 8 bits... 00083 if (Val == (signed int)(signed char)Val) { 00084 unsigned Opcode; 00085 switch (MI->getOpcode()) { 00086 default: assert(0 && "Unknown opcode value!"); 00087 case X86::IMUL16rri: Opcode = X86::IMUL16rri8; break; 00088 case X86::IMUL32rri: Opcode = X86::IMUL32rri8; break; 00089 } 00090 unsigned R0 = MI->getOperand(0).getReg(); 00091 unsigned R1 = MI->getOperand(1).getReg(); 00092 I = MBB.insert(MBB.erase(I), 00093 BuildMI(Opcode, 2, R0).addReg(R1).addZImm((char)Val)); 00094 return true; 00095 } 00096 } 00097 return false; 00098 00099 #if 0 00100 case X86::IMUL16rmi: case X86::IMUL32rmi: 00101 assert(MI->getNumOperands() == 6 && "These should all have 6 operands!"); 00102 if (MI->getOperand(5).isImmediate()) { 00103 int Val = MI->getOperand(5).getImmedValue(); 00104 // If the value is the same when signed extended from 8 bits... 00105 if (Val == (signed int)(signed char)Val) { 00106 unsigned Opcode; 00107 switch (MI->getOpcode()) { 00108 default: assert(0 && "Unknown opcode value!"); 00109 case X86::IMUL16rmi: Opcode = X86::IMUL16rmi8; break; 00110 case X86::IMUL32rmi: Opcode = X86::IMUL32rmi8; break; 00111 } 00112 unsigned R0 = MI->getOperand(0).getReg(); 00113 unsigned R1 = MI->getOperand(1).getReg(); 00114 unsigned Scale = MI->getOperand(2).getImmedValue(); 00115 unsigned R2 = MI->getOperand(3).getReg(); 00116 unsigned Offset = MI->getOperand(4).getImmedValue(); 00117 I = MBB.insert(MBB.erase(I), 00118 BuildMI(Opcode, 5, R0).addReg(R1).addZImm(Scale). 00119 addReg(R2).addSImm(Offset).addZImm((char)Val)); 00120 return true; 00121 } 00122 } 00123 return false; 00124 #endif 00125 00126 case X86::ADD16ri: case X86::ADD32ri: case X86::ADC32ri: 00127 case X86::SUB16ri: case X86::SUB32ri: 00128 case X86::SBB16ri: case X86::SBB32ri: 00129 case X86::AND16ri: case X86::AND32ri: 00130 case X86::OR16ri: case X86::OR32ri: 00131 case X86::XOR16ri: case X86::XOR32ri: 00132 assert(MI->getNumOperands() == 2 && "These should all have 2 operands!"); 00133 if (MI->getOperand(1).isImmediate()) { 00134 int Val = MI->getOperand(1).getImmedValue(); 00135 // If the value is the same when signed extended from 8 bits... 00136 if (Val == (signed int)(signed char)Val) { 00137 unsigned Opcode; 00138 switch (MI->getOpcode()) { 00139 default: assert(0 && "Unknown opcode value!"); 00140 case X86::ADD16ri: Opcode = X86::ADD16ri8; break; 00141 case X86::ADD32ri: Opcode = X86::ADD32ri8; break; 00142 case X86::ADC32ri: Opcode = X86::ADC32ri8; break; 00143 case X86::SUB16ri: Opcode = X86::SUB16ri8; break; 00144 case X86::SUB32ri: Opcode = X86::SUB32ri8; break; 00145 case X86::SBB16ri: Opcode = X86::SBB16ri8; break; 00146 case X86::SBB32ri: Opcode = X86::SBB32ri8; break; 00147 case X86::AND16ri: Opcode = X86::AND16ri8; break; 00148 case X86::AND32ri: Opcode = X86::AND32ri8; break; 00149 case X86::OR16ri: Opcode = X86::OR16ri8; break; 00150 case X86::OR32ri: Opcode = X86::OR32ri8; break; 00151 case X86::XOR16ri: Opcode = X86::XOR16ri8; break; 00152 case X86::XOR32ri: Opcode = X86::XOR32ri8; break; 00153 } 00154 unsigned R0 = MI->getOperand(0).getReg(); 00155 I = MBB.insert(MBB.erase(I), 00156 BuildMI(Opcode, 1, R0, MachineOperand::UseAndDef) 00157 .addZImm((char)Val)); 00158 return true; 00159 } 00160 } 00161 return false; 00162 00163 case X86::ADD16mi: case X86::ADD32mi: case X86::ADC32mi: 00164 case X86::SUB16mi: case X86::SUB32mi: 00165 case X86::SBB16mi: case X86::SBB32mi: 00166 case X86::AND16mi: case X86::AND32mi: 00167 case X86::OR16mi: case X86::OR32mi: 00168 case X86::XOR16mi: case X86::XOR32mi: 00169 assert(MI->getNumOperands() == 5 && "These should all have 5 operands!"); 00170 if (MI->getOperand(4).isImmediate()) { 00171 int Val = MI->getOperand(4).getImmedValue(); 00172 // If the value is the same when signed extended from 8 bits... 00173 if (Val == (signed int)(signed char)Val) { 00174 unsigned Opcode; 00175 switch (MI->getOpcode()) { 00176 default: assert(0 && "Unknown opcode value!"); 00177 case X86::ADD16mi: Opcode = X86::ADD16mi8; break; 00178 case X86::ADD32mi: Opcode = X86::ADD32mi8; break; 00179 case X86::ADC32mi: Opcode = X86::ADC32mi8; break; 00180 case X86::SUB16mi: Opcode = X86::SUB16mi8; break; 00181 case X86::SUB32mi: Opcode = X86::SUB32mi8; break; 00182 case X86::SBB16mi: Opcode = X86::SBB16mi8; break; 00183 case X86::SBB32mi: Opcode = X86::SBB32mi8; break; 00184 case X86::AND16mi: Opcode = X86::AND16mi8; break; 00185 case X86::AND32mi: Opcode = X86::AND32mi8; break; 00186 case X86::OR16mi: Opcode = X86::OR16mi8; break; 00187 case X86::OR32mi: Opcode = X86::OR32mi8; break; 00188 case X86::XOR16mi: Opcode = X86::XOR16mi8; break; 00189 case X86::XOR32mi: Opcode = X86::XOR32mi8; break; 00190 } 00191 unsigned R0 = MI->getOperand(0).getReg(); 00192 unsigned Scale = MI->getOperand(1).getImmedValue(); 00193 unsigned R1 = MI->getOperand(2).getReg(); 00194 unsigned Offset = MI->getOperand(3).getImmedValue(); 00195 I = MBB.insert(MBB.erase(I), 00196 BuildMI(Opcode, 5).addReg(R0).addZImm(Scale). 00197 addReg(R1).addSImm(Offset).addZImm((char)Val)); 00198 return true; 00199 } 00200 } 00201 return false; 00202 00203 #if 0 00204 case X86::MOV32ri: Size++; 00205 case X86::MOV16ri: Size++; 00206 case X86::MOV8ri: 00207 // FIXME: We can only do this transformation if we know that flags are not 00208 // used here, because XOR clobbers the flags! 00209 if (MI->getOperand(1).isImmediate()) { // avoid mov EAX, <value> 00210 int Val = MI->getOperand(1).getImmedValue(); 00211 if (Val == 0) { // mov EAX, 0 -> xor EAX, EAX 00212 static const unsigned Opcode[] ={X86::XOR8rr,X86::XOR16rr,X86::XOR32rr}; 00213 unsigned Reg = MI->getOperand(0).getReg(); 00214 I = MBB.insert(MBB.erase(I), 00215 BuildMI(Opcode[Size], 2, Reg).addReg(Reg).addReg(Reg)); 00216 return true; 00217 } else if (Val == -1) { // mov EAX, -1 -> or EAX, -1 00218 // TODO: 'or Reg, -1' has a smaller encoding than 'mov Reg, -1' 00219 } 00220 } 00221 return false; 00222 #endif 00223 case X86::BSWAP32r: // Change bswap EAX, bswap EAX into nothing 00224 if (Next->getOpcode() == X86::BSWAP32r && 00225 MI->getOperand(0).getReg() == Next->getOperand(0).getReg()) { 00226 I = MBB.erase(MBB.erase(I)); 00227 return true; 00228 } 00229 return false; 00230 default: 00231 return false; 00232 } 00233 } 00234 00235 namespace { 00236 class UseDefChains : public MachineFunctionPass { 00237 std::vector<MachineInstr*> DefiningInst; 00238 public: 00239 // getDefinition - Return the machine instruction that defines the specified 00240 // SSA virtual register. 00241 MachineInstr *getDefinition(unsigned Reg) { 00242 assert(MRegisterInfo::isVirtualRegister(Reg) && 00243 "use-def chains only exist for SSA registers!"); 00244 assert(Reg - MRegisterInfo::FirstVirtualRegister < DefiningInst.size() && 00245 "Unknown register number!"); 00246 assert(DefiningInst[Reg-MRegisterInfo::FirstVirtualRegister] && 00247 "Unknown register number!"); 00248 return DefiningInst[Reg-MRegisterInfo::FirstVirtualRegister]; 00249 } 00250 00251 // setDefinition - Update the use-def chains to indicate that MI defines 00252 // register Reg. 00253 void setDefinition(unsigned Reg, MachineInstr *MI) { 00254 if (Reg-MRegisterInfo::FirstVirtualRegister >= DefiningInst.size()) 00255 DefiningInst.resize(Reg-MRegisterInfo::FirstVirtualRegister+1); 00256 DefiningInst[Reg-MRegisterInfo::FirstVirtualRegister] = MI; 00257 } 00258 00259 // removeDefinition - Update the use-def chains to forget about Reg 00260 // entirely. 00261 void removeDefinition(unsigned Reg) { 00262 assert(getDefinition(Reg)); // Check validity 00263 DefiningInst[Reg-MRegisterInfo::FirstVirtualRegister] = 0; 00264 } 00265 00266 virtual bool runOnMachineFunction(MachineFunction &MF) { 00267 for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI!=E; ++BI) 00268 for (MachineBasicBlock::iterator I = BI->begin(); I != BI->end(); ++I) { 00269 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 00270 MachineOperand &MO = I->getOperand(i); 00271 if (MO.isRegister() && MO.isDef() && !MO.isUse() && 00272 MRegisterInfo::isVirtualRegister(MO.getReg())) 00273 setDefinition(MO.getReg(), I); 00274 } 00275 } 00276 return false; 00277 } 00278 00279 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00280 AU.setPreservesAll(); 00281 MachineFunctionPass::getAnalysisUsage(AU); 00282 } 00283 00284 virtual void releaseMemory() { 00285 std::vector<MachineInstr*>().swap(DefiningInst); 00286 } 00287 }; 00288 00289 RegisterAnalysis<UseDefChains> X("use-def-chains", 00290 "use-def chain construction for machine code"); 00291 } 00292 00293 00294 namespace { 00295 Statistic<> NumSSAPHOpts("x86-ssa-peephole", 00296 "Number of SSA peephole optimization performed"); 00297 00298 /// SSAPH - This pass is an X86-specific, SSA-based, peephole optimizer. This 00299 /// pass is really a bad idea: a better instruction selector should completely 00300 /// supersume it. However, that will take some time to develop, and the 00301 /// simple things this can do are important now. 00302 class SSAPH : public MachineFunctionPass { 00303 UseDefChains *UDC; 00304 public: 00305 virtual bool runOnMachineFunction(MachineFunction &MF); 00306 00307 bool PeepholeOptimize(MachineBasicBlock &MBB, 00308 MachineBasicBlock::iterator &I); 00309 00310 virtual const char *getPassName() const { 00311 return "X86 SSA-based Peephole Optimizer"; 00312 } 00313 00314 /// Propagate - Set MI[DestOpNo] = Src[SrcOpNo], optionally change the 00315 /// opcode of the instruction, then return true. 00316 bool Propagate(MachineInstr *MI, unsigned DestOpNo, 00317 MachineInstr *Src, unsigned SrcOpNo, unsigned NewOpcode = 0){ 00318 MI->getOperand(DestOpNo) = Src->getOperand(SrcOpNo); 00319 if (NewOpcode) MI->setOpcode(NewOpcode); 00320 return true; 00321 } 00322 00323 /// OptimizeAddress - If we can fold the addressing arithmetic for this 00324 /// memory instruction into the instruction itself, do so and return true. 00325 bool OptimizeAddress(MachineInstr *MI, unsigned OpNo); 00326 00327 /// getDefininingInst - If the specified operand is a read of an SSA 00328 /// register, return the machine instruction defining it, otherwise, return 00329 /// null. 00330 MachineInstr *getDefiningInst(MachineOperand &MO) { 00331 if (MO.isDef() || !MO.isRegister() || 00332 !MRegisterInfo::isVirtualRegister(MO.getReg())) return 0; 00333 return UDC->getDefinition(MO.getReg()); 00334 } 00335 00336 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00337 AU.addRequired<UseDefChains>(); 00338 AU.addPreserved<UseDefChains>(); 00339 MachineFunctionPass::getAnalysisUsage(AU); 00340 } 00341 }; 00342 } 00343 00344 FunctionPass *llvm::createX86SSAPeepholeOptimizerPass() { return new SSAPH(); } 00345 00346 bool SSAPH::runOnMachineFunction(MachineFunction &MF) { 00347 bool Changed = false; 00348 bool LocalChanged; 00349 00350 UDC = &getAnalysis<UseDefChains>(); 00351 00352 do { 00353 LocalChanged = false; 00354 00355 for (MachineFunction::iterator BI = MF.begin(), E = MF.end(); BI != E; ++BI) 00356 for (MachineBasicBlock::iterator I = BI->begin(); I != BI->end(); ) 00357 if (PeepholeOptimize(*BI, I)) { 00358 LocalChanged = true; 00359 ++NumSSAPHOpts; 00360 } else 00361 ++I; 00362 Changed |= LocalChanged; 00363 } while (LocalChanged); 00364 00365 return Changed; 00366 } 00367 00368 static bool isValidScaleAmount(unsigned Scale) { 00369 return Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8; 00370 } 00371 00372 /// OptimizeAddress - If we can fold the addressing arithmetic for this 00373 /// memory instruction into the instruction itself, do so and return true. 00374 bool SSAPH::OptimizeAddress(MachineInstr *MI, unsigned OpNo) { 00375 MachineOperand &BaseRegOp = MI->getOperand(OpNo+0); 00376 MachineOperand &ScaleOp = MI->getOperand(OpNo+1); 00377 MachineOperand &IndexRegOp = MI->getOperand(OpNo+2); 00378 MachineOperand &DisplacementOp = MI->getOperand(OpNo+3); 00379 00380 unsigned BaseReg = BaseRegOp.hasAllocatedReg() ? BaseRegOp.getReg() : 0; 00381 unsigned Scale = ScaleOp.getImmedValue(); 00382 unsigned IndexReg = IndexRegOp.hasAllocatedReg() ? IndexRegOp.getReg() : 0; 00383 00384 bool Changed = false; 00385 00386 // If the base register is unset, and the index register is set with a scale 00387 // of 1, move it to be the base register. 00388 if (BaseRegOp.hasAllocatedReg() && BaseReg == 0 && 00389 Scale == 1 && IndexReg != 0) { 00390 BaseRegOp.setReg(IndexReg); 00391 IndexRegOp.setReg(0); 00392 return true; 00393 } 00394 00395 // Attempt to fold instructions used by the base register into the instruction 00396 if (MachineInstr *DefInst = getDefiningInst(BaseRegOp)) { 00397 switch (DefInst->getOpcode()) { 00398 case X86::MOV32ri: 00399 // If there is no displacement set for this instruction set one now. 00400 // FIXME: If we can fold two immediates together, we should do so! 00401 if (DisplacementOp.isImmediate() && !DisplacementOp.getImmedValue()) { 00402 if (DefInst->getOperand(1).isImmediate()) { 00403 BaseRegOp.setReg(0); 00404 return Propagate(MI, OpNo+3, DefInst, 1); 00405 } 00406 } 00407 break; 00408 00409 case X86::ADD32rr: 00410 // If the source is a register-register add, and we do not yet have an 00411 // index register, fold the add into the memory address. 00412 if (IndexReg == 0) { 00413 BaseRegOp = DefInst->getOperand(1); 00414 IndexRegOp = DefInst->getOperand(2); 00415 ScaleOp.setImmedValue(1); 00416 return true; 00417 } 00418 break; 00419 00420 case X86::SHL32ri: 00421 // If this shift could be folded into the index portion of the address if 00422 // it were the index register, move it to the index register operand now, 00423 // so it will be folded in below. 00424 if ((Scale == 1 || (IndexReg == 0 && IndexRegOp.hasAllocatedReg())) && 00425 DefInst->getOperand(2).getImmedValue() < 4) { 00426 std::swap(BaseRegOp, IndexRegOp); 00427 ScaleOp.setImmedValue(1); Scale = 1; 00428 std::swap(IndexReg, BaseReg); 00429 Changed = true; 00430 break; 00431 } 00432 } 00433 } 00434 00435 // Attempt to fold instructions used by the index into the instruction 00436 if (MachineInstr *DefInst = getDefiningInst(IndexRegOp)) { 00437 switch (DefInst->getOpcode()) { 00438 case X86::SHL32ri: { 00439 // Figure out what the resulting scale would be if we folded this shift. 00440 unsigned ResScale = Scale * (1 << DefInst->getOperand(2).getImmedValue()); 00441 if (isValidScaleAmount(ResScale)) { 00442 IndexRegOp = DefInst->getOperand(1); 00443 ScaleOp.setImmedValue(ResScale); 00444 return true; 00445 } 00446 break; 00447 } 00448 } 00449 } 00450 00451 return Changed; 00452 } 00453 00454 bool SSAPH::PeepholeOptimize(MachineBasicBlock &MBB, 00455 MachineBasicBlock::iterator &I) { 00456 MachineBasicBlock::iterator NextI = next(I); 00457 00458 MachineInstr *MI = I; 00459 MachineInstr *Next = (NextI != MBB.end()) ? &*NextI : (MachineInstr*)0; 00460 00461 bool Changed = false; 00462 00463 const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo(); 00464 00465 // Scan the operands of this instruction. If any operands are 00466 // register-register copies, replace the operand with the source. 00467 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) 00468 // Is this an SSA register use? 00469 if (MachineInstr *DefInst = getDefiningInst(MI->getOperand(i))) { 00470 // If the operand is a vreg-vreg copy, it is always safe to replace the 00471 // source value with the input operand. 00472 unsigned Source, Dest; 00473 if (TII.isMoveInstr(*DefInst, Source, Dest)) { 00474 // Don't propagate physical registers into any instructions. 00475 if (DefInst->getOperand(1).isRegister() && 00476 MRegisterInfo::isVirtualRegister(Source)) { 00477 MI->getOperand(i).setReg(Source); 00478 Changed = true; 00479 ++NumPHMoves; 00480 } 00481 } 00482 } 00483 00484 00485 // Perform instruction specific optimizations. 00486 switch (MI->getOpcode()) { 00487 00488 // Register to memory stores. Format: <base,scale,indexreg,immdisp>, srcreg 00489 case X86::MOV32mr: case X86::MOV16mr: case X86::MOV8mr: 00490 case X86::MOV32mi: case X86::MOV16mi: case X86::MOV8mi: 00491 // Check to see if we can fold the source instruction into this one... 00492 if (MachineInstr *SrcInst = getDefiningInst(MI->getOperand(4))) { 00493 switch (SrcInst->getOpcode()) { 00494 // Fold the immediate value into the store, if possible. 00495 case X86::MOV8ri: return Propagate(MI, 4, SrcInst, 1, X86::MOV8mi); 00496 case X86::MOV16ri: return Propagate(MI, 4, SrcInst, 1, X86::MOV16mi); 00497 case X86::MOV32ri: return Propagate(MI, 4, SrcInst, 1, X86::MOV32mi); 00498 default: break; 00499 } 00500 } 00501 00502 // If we can optimize the addressing expression, do so now. 00503 if (OptimizeAddress(MI, 0)) 00504 return true; 00505 break; 00506 00507 case X86::MOV32rm: 00508 case X86::MOV16rm: 00509 case X86::MOV8rm: 00510 // If we can optimize the addressing expression, do so now. 00511 if (OptimizeAddress(MI, 1)) 00512 return true; 00513 break; 00514 00515 default: break; 00516 } 00517 00518 return Changed; 00519 }