LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

X86CodeEmitter.cpp

Go to the documentation of this file.
00001 //===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
00002 // 
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 // 
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file contains the pass that transforms the X86 machine instructions into
00011 // relocatable machine code.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "X86TargetMachine.h"
00016 #include "X86Relocations.h"
00017 #include "X86.h"
00018 #include "llvm/PassManager.h"
00019 #include "llvm/CodeGen/MachineCodeEmitter.h"
00020 #include "llvm/CodeGen/MachineFunctionPass.h"
00021 #include "llvm/CodeGen/MachineInstr.h"
00022 #include "llvm/CodeGen/Passes.h"
00023 #include "llvm/Function.h"
00024 #include "llvm/ADT/Statistic.h"
00025 using namespace llvm;
00026 
00027 namespace {
00028   Statistic<>
00029   NumEmitted("x86-emitter", "Number of machine instructions emitted");
00030 }
00031 
00032 namespace {
00033   class Emitter : public MachineFunctionPass {
00034     const X86InstrInfo  *II;
00035     MachineCodeEmitter  &MCE;
00036     std::map<const MachineBasicBlock*, unsigned> BasicBlockAddrs;
00037     std::vector<std::pair<const MachineBasicBlock *, unsigned> > BBRefs;
00038   public:
00039     explicit Emitter(MachineCodeEmitter &mce) : II(0), MCE(mce) {}
00040     Emitter(MachineCodeEmitter &mce, const X86InstrInfo& ii)
00041         : II(&ii), MCE(mce) {}
00042 
00043     bool runOnMachineFunction(MachineFunction &MF);
00044 
00045     virtual const char *getPassName() const {
00046       return "X86 Machine Code Emitter";
00047     }
00048 
00049     void emitInstruction(const MachineInstr &MI);
00050 
00051   private:
00052     void emitBasicBlock(const MachineBasicBlock &MBB);
00053 
00054     void emitPCRelativeBlockAddress(const MachineBasicBlock *BB);
00055     void emitPCRelativeValue(unsigned Address);
00056     void emitGlobalAddressForCall(GlobalValue *GV);
00057     void emitGlobalAddressForPtr(GlobalValue *GV, int Disp = 0);
00058     void emitExternalSymbolAddress(const char *ES, bool isPCRelative);
00059 
00060     void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
00061     void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
00062     void emitConstant(unsigned Val, unsigned Size);
00063 
00064     void emitMemModRMByte(const MachineInstr &MI,
00065                           unsigned Op, unsigned RegOpcodeField);
00066 
00067   };
00068 }
00069 
00070 /// addPassesToEmitMachineCode - Add passes to the specified pass manager to get
00071 /// machine code emitted.  This uses a MachineCodeEmitter object to handle
00072 /// actually outputting the machine code and resolving things like the address
00073 /// of functions.  This method should returns true if machine code emission is
00074 /// not supported.
00075 ///
00076 bool X86TargetMachine::addPassesToEmitMachineCode(FunctionPassManager &PM,
00077                                                   MachineCodeEmitter &MCE) {
00078   PM.add(new Emitter(MCE));
00079   // Delete machine code for this function
00080   PM.add(createMachineCodeDeleter());
00081   return false;
00082 }
00083 
00084 bool Emitter::runOnMachineFunction(MachineFunction &MF) {
00085   II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo();
00086 
00087   MCE.startFunction(MF);
00088   MCE.emitConstantPool(MF.getConstantPool());
00089   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
00090     emitBasicBlock(*I);
00091   MCE.finishFunction(MF);
00092 
00093   // Resolve all forward branches now...
00094   for (unsigned i = 0, e = BBRefs.size(); i != e; ++i) {
00095     unsigned Location = BasicBlockAddrs[BBRefs[i].first];
00096     unsigned Ref = BBRefs[i].second;
00097     MCE.emitWordAt(Location-Ref-4, (unsigned*)(intptr_t)Ref);
00098   }
00099   BBRefs.clear();
00100   BasicBlockAddrs.clear();
00101   return false;
00102 }
00103 
00104 void Emitter::emitBasicBlock(const MachineBasicBlock &MBB) {
00105   if (uint64_t Addr = MCE.getCurrentPCValue())
00106     BasicBlockAddrs[&MBB] = Addr;
00107 
00108   for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
00109        I != E; ++I)
00110     emitInstruction(*I);
00111 }
00112 
00113 /// emitPCRelativeValue - Emit a 32-bit PC relative address.
00114 ///
00115 void Emitter::emitPCRelativeValue(unsigned Address) {
00116   MCE.emitWord(Address-MCE.getCurrentPCValue()-4);
00117 }
00118 
00119 /// emitPCRelativeBlockAddress - This method emits the PC relative address of
00120 /// the specified basic block, or if the basic block hasn't been emitted yet
00121 /// (because this is a forward branch), it keeps track of the information
00122 /// necessary to resolve this address later (and emits a dummy value).
00123 ///
00124 void Emitter::emitPCRelativeBlockAddress(const MachineBasicBlock *MBB) {
00125   // If this is a backwards branch, we already know the address of the target,
00126   // so just emit the value.
00127   std::map<const MachineBasicBlock*, unsigned>::iterator I =
00128     BasicBlockAddrs.find(MBB);
00129   if (I != BasicBlockAddrs.end()) {
00130     emitPCRelativeValue(I->second);
00131   } else {
00132     // Otherwise, remember where this reference was and where it is to so we can
00133     // deal with it later.
00134     BBRefs.push_back(std::make_pair(MBB, MCE.getCurrentPCValue()));
00135     MCE.emitWord(0);
00136   }
00137 }
00138 
00139 /// emitGlobalAddressForCall - Emit the specified address to the code stream
00140 /// assuming this is part of a function call, which is PC relative.
00141 ///
00142 void Emitter::emitGlobalAddressForCall(GlobalValue *GV) {
00143   MCE.addRelocation(MachineRelocation(MCE.getCurrentPCOffset(),
00144                                       X86::reloc_pcrel_word, GV, 0, true));
00145   MCE.emitWord(0);
00146 }
00147 
00148 /// emitGlobalAddress - Emit the specified address to the code stream assuming
00149 /// this is part of a "take the address of a global" instruction, which is not
00150 /// PC relative.
00151 ///
00152 void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, int Disp /* = 0 */) {
00153   MCE.addRelocation(MachineRelocation(MCE.getCurrentPCOffset(),
00154                                       X86::reloc_absolute_word, GV));
00155   MCE.emitWord(Disp);   // The relocated value will be added to the displacement
00156 }
00157 
00158 /// emitExternalSymbolAddress - Arrange for the address of an external symbol to
00159 /// be emitted to the current location in the function, and allow it to be PC
00160 /// relative.
00161 void Emitter::emitExternalSymbolAddress(const char *ES, bool isPCRelative) {
00162   MCE.addRelocation(MachineRelocation(MCE.getCurrentPCOffset(),
00163           isPCRelative ? X86::reloc_pcrel_word : X86::reloc_absolute_word, ES));
00164   MCE.emitWord(0);
00165 }
00166 
00167 /// N86 namespace - Native X86 Register numbers... used by X86 backend.
00168 ///
00169 namespace N86 {
00170   enum {
00171     EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
00172   };
00173 }
00174 
00175 
00176 // getX86RegNum - This function maps LLVM register identifiers to their X86
00177 // specific numbering, which is used in various places encoding instructions.
00178 //
00179 static unsigned getX86RegNum(unsigned RegNo) {
00180   switch(RegNo) {
00181   case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
00182   case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
00183   case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
00184   case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
00185   case X86::ESP: case X86::SP: case X86::AH: return N86::ESP;
00186   case X86::EBP: case X86::BP: case X86::CH: return N86::EBP;
00187   case X86::ESI: case X86::SI: case X86::DH: return N86::ESI;
00188   case X86::EDI: case X86::DI: case X86::BH: return N86::EDI;
00189 
00190   case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
00191   case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
00192     return RegNo-X86::ST0;
00193   default:
00194     assert(MRegisterInfo::isVirtualRegister(RegNo) &&
00195            "Unknown physical register!");
00196     assert(0 && "Register allocator hasn't allocated reg correctly yet!");
00197     return 0;
00198   }
00199 }
00200 
00201 inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
00202                                       unsigned RM) {
00203   assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
00204   return RM | (RegOpcode << 3) | (Mod << 6);
00205 }
00206 
00207 void Emitter::emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeFld){
00208   MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
00209 }
00210 
00211 void Emitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base) {
00212   // SIB byte is in the same format as the ModRMByte...
00213   MCE.emitByte(ModRMByte(SS, Index, Base));
00214 }
00215 
00216 void Emitter::emitConstant(unsigned Val, unsigned Size) {
00217   // Output the constant in little endian byte order...
00218   for (unsigned i = 0; i != Size; ++i) {
00219     MCE.emitByte(Val & 255);
00220     Val >>= 8;
00221   }
00222 }
00223 
00224 static bool isDisp8(int Value) {
00225   return Value == (signed char)Value;
00226 }
00227 
00228 void Emitter::emitMemModRMByte(const MachineInstr &MI,
00229                                unsigned Op, unsigned RegOpcodeField) {
00230   const MachineOperand &Op3 = MI.getOperand(Op+3);
00231   GlobalValue *GV = 0;
00232   int DispVal = 0;
00233 
00234   if (Op3.isGlobalAddress()) {
00235     GV = Op3.getGlobal();
00236     DispVal = Op3.getOffset();
00237   } else {
00238     DispVal = Op3.getImmedValue();
00239   }
00240 
00241   const MachineOperand &Base     = MI.getOperand(Op);
00242   const MachineOperand &Scale    = MI.getOperand(Op+1);
00243   const MachineOperand &IndexReg = MI.getOperand(Op+2);
00244 
00245   unsigned BaseReg = 0;
00246 
00247   if (Base.isConstantPoolIndex()) {
00248     // Emit a direct address reference [disp32] where the displacement of the
00249     // constant pool entry is controlled by the MCE.
00250     assert(!GV && "Constant Pool reference cannot be relative to global!");
00251     DispVal += MCE.getConstantPoolEntryAddress(Base.getConstantPoolIndex());
00252   } else {
00253     BaseReg = Base.getReg();
00254   }
00255 
00256   // Is a SIB byte needed?
00257   if (IndexReg.getReg() == 0 && BaseReg != X86::ESP) {
00258     if (BaseReg == 0) {  // Just a displacement?
00259       // Emit special case [disp32] encoding
00260       MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
00261       if (GV)
00262         emitGlobalAddressForPtr(GV, DispVal);
00263       else
00264         emitConstant(DispVal, 4);
00265     } else {
00266       unsigned BaseRegNo = getX86RegNum(BaseReg);
00267       if (GV) {
00268         // Emit the most general non-SIB encoding: [REG+disp32]
00269         MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
00270         emitGlobalAddressForPtr(GV, DispVal);
00271       } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
00272         // Emit simple indirect register encoding... [EAX] f.e.
00273         MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
00274       } else if (isDisp8(DispVal)) {
00275         // Emit the disp8 encoding... [REG+disp8]
00276         MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
00277         emitConstant(DispVal, 1);
00278       } else {
00279         // Emit the most general non-SIB encoding: [REG+disp32]
00280         MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
00281         emitConstant(DispVal, 4);
00282       }
00283     }
00284 
00285   } else {  // We need a SIB byte, so start by outputting the ModR/M byte first
00286     assert(IndexReg.getReg() != X86::ESP && "Cannot use ESP as index reg!");
00287 
00288     bool ForceDisp32 = false;
00289     bool ForceDisp8  = false;
00290     if (BaseReg == 0) {
00291       // If there is no base register, we emit the special case SIB byte with
00292       // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
00293       MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
00294       ForceDisp32 = true;
00295     } else if (GV) {
00296       // Emit the normal disp32 encoding...
00297       MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
00298       ForceDisp32 = true;
00299     } else if (DispVal == 0 && BaseReg != X86::EBP) {
00300       // Emit no displacement ModR/M byte
00301       MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
00302     } else if (isDisp8(DispVal)) {
00303       // Emit the disp8 encoding...
00304       MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
00305       ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
00306     } else {
00307       // Emit the normal disp32 encoding...
00308       MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
00309     }
00310 
00311     // Calculate what the SS field value should be...
00312     static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
00313     unsigned SS = SSTable[Scale.getImmedValue()];
00314 
00315     if (BaseReg == 0) {
00316       // Handle the SIB byte for the case where there is no base.  The
00317       // displacement has already been output.
00318       assert(IndexReg.getReg() && "Index register must be specified!");
00319       emitSIBByte(SS, getX86RegNum(IndexReg.getReg()), 5);
00320     } else {
00321       unsigned BaseRegNo = getX86RegNum(BaseReg);
00322       unsigned IndexRegNo;
00323       if (IndexReg.getReg())
00324         IndexRegNo = getX86RegNum(IndexReg.getReg());
00325       else
00326         IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
00327       emitSIBByte(SS, IndexRegNo, BaseRegNo);
00328     }
00329 
00330     // Do we need to output a displacement?
00331     if (DispVal != 0 || ForceDisp32 || ForceDisp8) {
00332       if (!ForceDisp32 && isDisp8(DispVal))
00333         emitConstant(DispVal, 1);
00334       else if (GV)
00335         emitGlobalAddressForPtr(GV, DispVal);
00336       else
00337         emitConstant(DispVal, 4);
00338     }
00339   }
00340 }
00341 
00342 static unsigned sizeOfImm(const TargetInstrDescriptor &Desc) {
00343   switch (Desc.TSFlags & X86II::ImmMask) {
00344   case X86II::Imm8:   return 1;
00345   case X86II::Imm16:  return 2;
00346   case X86II::Imm32:  return 4;
00347   default: assert(0 && "Immediate size not set!");
00348     return 0;
00349   }
00350 }
00351 
00352 void Emitter::emitInstruction(const MachineInstr &MI) {
00353   NumEmitted++;  // Keep track of the # of mi's emitted
00354 
00355   unsigned Opcode = MI.getOpcode();
00356   const TargetInstrDescriptor &Desc = II->get(Opcode);
00357 
00358   // Emit the repeat opcode prefix as needed.
00359   if ((Desc.TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
00360 
00361   // Emit instruction prefixes if necessary
00362   if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);// Operand size...
00363 
00364   switch (Desc.TSFlags & X86II::Op0Mask) {
00365   case X86II::TB:
00366     MCE.emitByte(0x0F);   // Two-byte opcode prefix
00367     break;
00368   case X86II::REP: break; // already handled.
00369   case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
00370   case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
00371     MCE.emitByte(0xD8+
00372                  (((Desc.TSFlags & X86II::Op0Mask)-X86II::D8)
00373                                    >> X86II::Op0Shift));
00374     break; // Two-byte opcode prefix
00375   default: assert(0 && "Invalid prefix!");
00376   case 0: break;  // No prefix!
00377   }
00378 
00379   unsigned char BaseOpcode = II->getBaseOpcodeFor(Opcode);
00380   switch (Desc.TSFlags & X86II::FormMask) {
00381   default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
00382   case X86II::Pseudo:
00383     if (Opcode != X86::IMPLICIT_USE &&
00384         Opcode != X86::IMPLICIT_DEF &&
00385         Opcode != X86::FP_REG_KILL)
00386       std::cerr << "X86 Machine Code Emitter: No 'form', not emitting: " << MI;
00387     break;
00388 
00389   case X86II::RawFrm:
00390     MCE.emitByte(BaseOpcode);
00391     if (MI.getNumOperands() == 1) {
00392       const MachineOperand &MO = MI.getOperand(0);
00393       if (MO.isMachineBasicBlock()) {
00394         emitPCRelativeBlockAddress(MO.getMachineBasicBlock());
00395       } else if (MO.isGlobalAddress()) {
00396         assert(MO.isPCRelative() && "Call target is not PC Relative?");
00397         emitGlobalAddressForCall(MO.getGlobal());
00398       } else if (MO.isExternalSymbol()) {
00399         emitExternalSymbolAddress(MO.getSymbolName(), true);
00400       } else if (MO.isImmediate()) {
00401         emitConstant(MO.getImmedValue(), sizeOfImm(Desc));        
00402       } else {
00403         assert(0 && "Unknown RawFrm operand!");
00404       }
00405     }
00406     break;
00407 
00408   case X86II::AddRegFrm:
00409     MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(0).getReg()));
00410     if (MI.getNumOperands() == 2) {
00411       const MachineOperand &MO1 = MI.getOperand(1);
00412       if (Value *V = MO1.getVRegValueOrNull()) {
00413         assert(sizeOfImm(Desc) == 4 &&
00414                "Don't know how to emit non-pointer values!");
00415         emitGlobalAddressForPtr(cast<GlobalValue>(V));
00416       } else if (MO1.isGlobalAddress()) {
00417         assert(sizeOfImm(Desc) == 4 &&
00418                "Don't know how to emit non-pointer values!");
00419         assert(!MO1.isPCRelative() && "Function pointer ref is PC relative?");
00420         emitGlobalAddressForPtr(MO1.getGlobal(), MO1.getOffset());
00421       } else if (MO1.isExternalSymbol()) {
00422         assert(sizeOfImm(Desc) == 4 &&
00423                "Don't know how to emit non-pointer values!");
00424         emitExternalSymbolAddress(MO1.getSymbolName(), false);
00425       } else {
00426         emitConstant(MO1.getImmedValue(), sizeOfImm(Desc));
00427       }
00428     }
00429     break;
00430 
00431   case X86II::MRMDestReg: {
00432     MCE.emitByte(BaseOpcode);
00433     emitRegModRMByte(MI.getOperand(0).getReg(),
00434                      getX86RegNum(MI.getOperand(1).getReg()));
00435     if (MI.getNumOperands() == 3)
00436       emitConstant(MI.getOperand(2).getImmedValue(), sizeOfImm(Desc));
00437     break;
00438   }
00439   case X86II::MRMDestMem:
00440     MCE.emitByte(BaseOpcode);
00441     emitMemModRMByte(MI, 0, getX86RegNum(MI.getOperand(4).getReg()));
00442     if (MI.getNumOperands() == 6)
00443       emitConstant(MI.getOperand(5).getImmedValue(), sizeOfImm(Desc));
00444     break;
00445 
00446   case X86II::MRMSrcReg:
00447     MCE.emitByte(BaseOpcode);
00448 
00449     emitRegModRMByte(MI.getOperand(1).getReg(),
00450                      getX86RegNum(MI.getOperand(0).getReg()));
00451     if (MI.getNumOperands() == 3)
00452       emitConstant(MI.getOperand(2).getImmedValue(), sizeOfImm(Desc));
00453     break;
00454 
00455   case X86II::MRMSrcMem:
00456     MCE.emitByte(BaseOpcode);
00457     emitMemModRMByte(MI, 1, getX86RegNum(MI.getOperand(0).getReg()));
00458     if (MI.getNumOperands() == 2+4)
00459       emitConstant(MI.getOperand(5).getImmedValue(), sizeOfImm(Desc));
00460     break;
00461 
00462   case X86II::MRM0r: case X86II::MRM1r:
00463   case X86II::MRM2r: case X86II::MRM3r:
00464   case X86II::MRM4r: case X86II::MRM5r:
00465   case X86II::MRM6r: case X86II::MRM7r:
00466     MCE.emitByte(BaseOpcode);
00467     emitRegModRMByte(MI.getOperand(0).getReg(),
00468                      (Desc.TSFlags & X86II::FormMask)-X86II::MRM0r);
00469 
00470     if (MI.getOperand(MI.getNumOperands()-1).isImmediate()) {
00471       emitConstant(MI.getOperand(MI.getNumOperands()-1).getImmedValue(),
00472                    sizeOfImm(Desc));
00473     }
00474     break;
00475 
00476   case X86II::MRM0m: case X86II::MRM1m:
00477   case X86II::MRM2m: case X86II::MRM3m:
00478   case X86II::MRM4m: case X86II::MRM5m:
00479   case X86II::MRM6m: case X86II::MRM7m: 
00480     MCE.emitByte(BaseOpcode);
00481     emitMemModRMByte(MI, 0, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m);
00482 
00483     if (MI.getNumOperands() == 5) {
00484       if (MI.getOperand(4).isImmediate())
00485         emitConstant(MI.getOperand(4).getImmedValue(), sizeOfImm(Desc));
00486       else if (MI.getOperand(4).isGlobalAddress())
00487         emitGlobalAddressForPtr(MI.getOperand(4).getGlobal(),
00488                                 MI.getOperand(4).getOffset());
00489       else
00490         assert(0 && "Unknown operand!");
00491     }
00492     break;
00493   }
00494 }