LLVM API Documentation

PPCJITInfo.cpp

Go to the documentation of this file.
00001 //===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the JIT interfaces for the 32-bit PowerPC target.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #define DEBUG_TYPE "jit"
00015 #include "PPCJITInfo.h"
00016 #include "PPCRelocations.h"
00017 #include "llvm/CodeGen/MachineCodeEmitter.h"
00018 #include "llvm/Config/alloca.h"
00019 #include "llvm/Support/Debug.h"
00020 #include <set>
00021 #include <iostream>
00022 using namespace llvm;
00023 
00024 static TargetJITInfo::JITCompilerFn JITCompilerFunction;
00025 
00026 #define BUILD_ADDIS(RD,RS,IMM16) \
00027   ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
00028 #define BUILD_ORI(RD,RS,UIMM16) \
00029   ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
00030 #define BUILD_MTSPR(RS,SPR)      \
00031   ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
00032 #define BUILD_BCCTRx(BO,BI,LINK) \
00033   ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
00034 
00035 // Pseudo-ops
00036 #define BUILD_LIS(RD,IMM16)    BUILD_ADDIS(RD,0,IMM16)
00037 #define BUILD_MTCTR(RS)        BUILD_MTSPR(RS,9)
00038 #define BUILD_BCTR(LINK)       BUILD_BCCTRx(20,0,LINK)
00039 
00040 
00041 static void EmitBranchToAt(void *At, void *To, bool isCall) {
00042   intptr_t Addr = (intptr_t)To;
00043 
00044   // FIXME: should special case the short branch case.
00045   unsigned *AtI = (unsigned*)At;
00046 
00047   AtI[0] = BUILD_LIS(12, Addr >> 16);   // lis r12, hi16(address)
00048   AtI[1] = BUILD_ORI(12, 12, Addr);     // ori r12, r12, low16(address)
00049   AtI[2] = BUILD_MTCTR(12);             // mtctr r12
00050   AtI[3] = BUILD_BCTR(isCall);          // bctr/bctrl
00051 }
00052 
00053 extern "C" void PPC32CompilationCallback();
00054 
00055 #if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)
00056 // CompilationCallback stub - We can't use a C function with inline assembly in
00057 // it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
00058 // write our own wrapper, which does things our way, so we have complete control
00059 // over register saving and restoring.
00060 asm(
00061     ".text\n"
00062     ".align 2\n"
00063     ".globl _PPC32CompilationCallback\n"
00064 "_PPC32CompilationCallback:\n"
00065     // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
00066     // FIXME: need to save v[0-19] for altivec?
00067     // Set up a proper stack frame
00068     "stwu r1, -208(r1)\n"
00069     "mflr r0\n"
00070     "stw r0,  216(r1)\n"
00071     // Save all int arg registers
00072     "stw r10, 204(r1)\n"    "stw r9,  200(r1)\n"
00073     "stw r8,  196(r1)\n"    "stw r7,  192(r1)\n"
00074     "stw r6,  188(r1)\n"    "stw r5,  184(r1)\n"
00075     "stw r4,  180(r1)\n"    "stw r3,  176(r1)\n"
00076     // Save all call-clobbered FP regs.
00077     "stfd f13, 168(r1)\n"   "stfd f12, 160(r1)\n"
00078     "stfd f11, 152(r1)\n"   "stfd f10, 144(r1)\n"
00079     "stfd f9,  136(r1)\n"   "stfd f8,  128(r1)\n"
00080     "stfd f7,  120(r1)\n"   "stfd f6,  112(r1)\n"
00081     "stfd f5,  104(r1)\n"   "stfd f4,   96(r1)\n"
00082     "stfd f3,   88(r1)\n"   "stfd f2,   80(r1)\n"
00083     "stfd f1,   72(r1)\n"
00084     // Arguments to Compilation Callback:
00085     // r3 - our lr (address of the call instruction in stub plus 4)
00086     // r4 - stub's lr (address of instruction that called the stub plus 4)
00087     "mr   r3, r0\n"
00088     "lwz  r2, 208(r1)\n" // stub's frame
00089     "lwz  r4, 8(r2)\n" // stub's lr
00090     "bl _PPC32CompilationCallbackC\n"
00091     "mtctr r3\n"
00092     // Restore all int arg registers
00093     "lwz r10, 204(r1)\n"    "lwz r9,  200(r1)\n"
00094     "lwz r8,  196(r1)\n"    "lwz r7,  192(r1)\n"
00095     "lwz r6,  188(r1)\n"    "lwz r5,  184(r1)\n"
00096     "lwz r4,  180(r1)\n"    "lwz r3,  176(r1)\n"
00097     // Restore all FP arg registers
00098     "lfd f13, 168(r1)\n"    "lfd f12, 160(r1)\n"
00099     "lfd f11, 152(r1)\n"    "lfd f10, 144(r1)\n"
00100     "lfd f9,  136(r1)\n"    "lfd f8,  128(r1)\n"
00101     "lfd f7,  120(r1)\n"    "lfd f6,  112(r1)\n"
00102     "lfd f5,  104(r1)\n"    "lfd f4,   96(r1)\n"
00103     "lfd f3,   88(r1)\n"    "lfd f2,   80(r1)\n"
00104     "lfd f1,   72(r1)\n"
00105     // Pop 3 frames off the stack and branch to target
00106     "lwz  r1, 208(r1)\n"
00107     "lwz  r2, 8(r1)\n"
00108     "mtlr r2\n"
00109     "bctr\n"
00110     );
00111 #else
00112 void PPC32CompilationCallback() {
00113   assert(0 && "This is not a power pc, you can't execute this!");
00114   abort();
00115 }
00116 #endif
00117 
00118 extern "C" unsigned *PPC32CompilationCallbackC(unsigned *StubCallAddrPlus4,
00119                                                unsigned *OrigCallAddrPlus4) {
00120   // Adjust the pointer to the address of the call instruction in the stub
00121   // emitted by emitFunctionStub, rather than the instruction after it.
00122   unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
00123   unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
00124 
00125   void *Target = JITCompilerFunction(StubCallAddr);
00126 
00127   // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
00128   // it to branch directly to the destination.  If so, rewrite it so it does not
00129   // need to go through the stub anymore.
00130   unsigned OrigCallInst = *OrigCallAddr;
00131   if ((OrigCallInst >> 26) == 18) {     // Direct call.
00132     intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
00133     
00134     if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
00135       // Clear the original target out.
00136       OrigCallInst &= (63 << 26) | 3;
00137       // Fill in the new target.
00138       OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
00139       // Replace the call.
00140       *OrigCallAddr = OrigCallInst;
00141     }
00142   }
00143 
00144   // Assert that we are coming from a stub that was created with our
00145   // emitFunctionStub.
00146   assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
00147   StubCallAddr -= 6;
00148 
00149   // Rewrite the stub with an unconditional branch to the target, for any users
00150   // who took the address of the stub.
00151   EmitBranchToAt(StubCallAddr, Target, false);
00152 
00153   // Put the address of the target function to call and the address to return to
00154   // after calling the target function in a place that is easy to get on the
00155   // stack after we restore all regs.
00156   return (unsigned *)Target;
00157 }
00158 
00159 
00160 
00161 TargetJITInfo::LazyResolverFn
00162 PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
00163   JITCompilerFunction = Fn;
00164   return PPC32CompilationCallback;
00165 }
00166 
00167 void *PPCJITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
00168   // If this is just a call to an external function, emit a branch instead of a
00169   // call.  The code is the same except for one bit of the last instruction.
00170   if (Fn != (void*)(intptr_t)PPC32CompilationCallback) {
00171     MCE.startFunctionStub(4*4);
00172     void *Addr = (void*)(intptr_t)MCE.getCurrentPCValue();
00173     MCE.emitWordBE(0);
00174     MCE.emitWordBE(0);
00175     MCE.emitWordBE(0);
00176     MCE.emitWordBE(0);
00177     EmitBranchToAt(Addr, Fn, false);
00178     return MCE.finishFunctionStub(0);
00179   }
00180 
00181   MCE.startFunctionStub(4*7);
00182   MCE.emitWordBE(0x9421ffe0);     // stwu    r1,-32(r1)
00183   MCE.emitWordBE(0x7d6802a6);     // mflr r11
00184   MCE.emitWordBE(0x91610028);     // stw r11, 40(r1)
00185   void *Addr = (void*)(intptr_t)MCE.getCurrentPCValue();
00186   MCE.emitWordBE(0);
00187   MCE.emitWordBE(0);
00188   MCE.emitWordBE(0);
00189   MCE.emitWordBE(0);
00190   EmitBranchToAt(Addr, Fn, true/*is call*/);
00191   return MCE.finishFunctionStub(0);
00192 }
00193 
00194 
00195 void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
00196                           unsigned NumRelocs, unsigned char* GOTBase) {
00197   for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
00198     unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
00199     intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
00200     switch ((PPC::RelocationType)MR->getRelocationType()) {
00201     default: assert(0 && "Unknown relocation type!");
00202     case PPC::reloc_pcrel_bx:
00203       // PC-relative relocation for b and bl instructions.
00204       ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
00205       assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
00206              "Relocation out of range!");
00207       *RelocPos |= (ResultPtr & ((1 << 24)-1))  << 2;
00208       break;
00209     case PPC::reloc_absolute_ptr_high: // Pointer relocations.
00210     case PPC::reloc_absolute_ptr_low:
00211     case PPC::reloc_absolute_high:     // high bits of ref -> low 16 of instr
00212     case PPC::reloc_absolute_low: {    // low bits of ref  -> low 16 of instr
00213       ResultPtr += MR->getConstantVal();
00214 
00215       // If this is a high-part access, get the high-part.
00216       if (MR->getRelocationType() == PPC::reloc_absolute_high ||
00217           MR->getRelocationType() == PPC::reloc_absolute_ptr_high) {
00218         // If the low part will have a carry (really a borrow) from the low
00219         // 16-bits into the high 16, add a bit to borrow from.
00220         if (((int)ResultPtr << 16) < 0)
00221           ResultPtr += 1 << 16;
00222         ResultPtr >>= 16;
00223       }
00224 
00225       // Do the addition then mask, so the addition does not overflow the 16-bit
00226       // immediate section of the instruction.
00227       unsigned LowBits  = (*RelocPos + ResultPtr) & 65535;
00228       unsigned HighBits = *RelocPos & ~65535;
00229       *RelocPos = LowBits | HighBits;  // Slam into low 16-bits
00230       break;
00231     }
00232     case PPC::reloc_absolute_low_ix: {  // low bits of ref  -> low 14 of instr
00233       ResultPtr += MR->getConstantVal();
00234       // Do the addition then mask, so the addition does not overflow the 16-bit
00235       // immediate section of the instruction.
00236       unsigned LowBits  = (*RelocPos + ResultPtr) & 0xFFFC;
00237       unsigned HighBits = *RelocPos & 0xFFFF0003;
00238       *RelocPos = LowBits | HighBits;  // Slam into low 14-bits.
00239       break;
00240     }
00241     }
00242   }
00243 }
00244 
00245 void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
00246   EmitBranchToAt(Old, New, false);
00247 }
00248 
00249 void PPCJITInfo::resolveBBRefs(MachineCodeEmitter &MCE) {
00250   // Resolve branches to BasicBlocks for the entire function
00251   for (unsigned i = 0, e = BBRefs.size(); i != e; ++i) {
00252     intptr_t Location = MCE.getMachineBasicBlockAddress(BBRefs[i].first);
00253     unsigned *Ref = (unsigned *)BBRefs[i].second;
00254     DEBUG(std::cerr << "Fixup @ " << (void*)Ref << " to " << (void*)Location
00255                     << "\n");
00256     unsigned Instr = *Ref;
00257     intptr_t BranchTargetDisp = (Location - (intptr_t)Ref) >> 2;
00258 
00259     switch (Instr >> 26) {
00260     default: assert(0 && "Unknown branch user!");
00261     case 18:  // This is B or BL
00262       *Ref |= (BranchTargetDisp & ((1 << 24)-1)) << 2;
00263       break;
00264     case 16:  // This is BLT,BLE,BEQ,BGE,BGT,BNE, or other bcx instruction
00265       *Ref |= (BranchTargetDisp & ((1 << 14)-1)) << 2;
00266       break;
00267     }
00268   }
00269   BBRefs.clear();
00270 }
00271 
00272 #ifdef __APPLE__ 
00273 extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
00274 #endif
00275 
00276 void PPCJITInfo::synchronizeICache(const void *Addr, size_t Len) {
00277 #ifdef __APPLE__
00278   sys_icache_invalidate(Addr, Len);
00279 #endif
00280 }