LLVM API Documentation

PPCJITInfo.cpp

Go to the documentation of this file.
00001 //===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the JIT interfaces for the 32-bit PowerPC target.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #define DEBUG_TYPE "jit"
00015 #include "PPCJITInfo.h"
00016 #include "PPCRelocations.h"
00017 #include "llvm/CodeGen/MachineCodeEmitter.h"
00018 #include "llvm/Config/alloca.h"
00019 #include <set>
00020 using namespace llvm;
00021 
00022 static TargetJITInfo::JITCompilerFn JITCompilerFunction;
00023 
00024 #define BUILD_ADDIS(RD,RS,IMM16) \
00025   ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
00026 #define BUILD_ORI(RD,RS,UIMM16) \
00027   ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
00028 #define BUILD_MTSPR(RS,SPR)      \
00029   ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
00030 #define BUILD_BCCTRx(BO,BI,LINK) \
00031   ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
00032 
00033 // Pseudo-ops
00034 #define BUILD_LIS(RD,IMM16)    BUILD_ADDIS(RD,0,IMM16)
00035 #define BUILD_MTCTR(RS)        BUILD_MTSPR(RS,9)
00036 #define BUILD_BCTR(LINK)       BUILD_BCCTRx(20,0,LINK)
00037 
00038 
00039 static void EmitBranchToAt(void *At, void *To, bool isCall) {
00040   intptr_t Addr = (intptr_t)To;
00041 
00042   // FIXME: should special case the short branch case.
00043   unsigned *AtI = (unsigned*)At;
00044 
00045   AtI[0] = BUILD_LIS(12, Addr >> 16);   // lis r12, hi16(address)
00046   AtI[1] = BUILD_ORI(12, 12, Addr);     // ori r12, r12, low16(address)
00047   AtI[2] = BUILD_MTCTR(12);             // mtctr r12
00048   AtI[3] = BUILD_BCTR(isCall);          // bctr/bctrl
00049 }
00050 
00051 extern "C" void PPC32CompilationCallback();
00052 
00053 #if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)
00054 // CompilationCallback stub - We can't use a C function with inline assembly in
00055 // it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
00056 // write our own wrapper, which does things our way, so we have complete control
00057 // over register saving and restoring.
00058 asm(
00059     ".text\n"
00060     ".align 2\n"
00061     ".globl _PPC32CompilationCallback\n"
00062 "_PPC32CompilationCallback:\n"
00063     // Make space for 29 ints r[3-31] and 14 doubles f[0-13]
00064     "stwu r1, -272(r1)\n"
00065     "mflr r11\n"
00066     "stw r11, 280(r1)\n"    // Set up a proper stack frame
00067     "stmw r3, 156(r1)\n"    // Save all of the integer registers
00068     // Save all call-clobbered FP regs.
00069     "stfd f1, 44(r1)\n"  "stfd f2, 52(r1)\n"  "stfd f3, 60(r1)\n"
00070     "stfd f4, 68(r1)\n" "stfd f5, 76(r1)\n" "stfd f6, 84(r1)\n"
00071     "stfd f7, 92(r1)\n" "stfd f8, 100(r1)\n" "stfd f9, 108(r1)\n"
00072     "stfd f10, 116(r1)\n" "stfd f11, 124(r1)\n" "stfd f12, 132(r1)\n"
00073     "stfd f13, 140(r1)\n"
00074 
00075     // Now that everything is saved, go to the C compilation callback function,
00076     // passing the address of the intregs and fpregs.
00077     "addi r3, r1, 156\n"  // &IntRegs[0]
00078     "addi r4, r1, 44\n"   // &FPRegs[0]
00079     "bl _PPC32CompilationCallbackC\n"
00080     );
00081 #else
00082 void PPC32CompilationCallback() {
00083   assert(0 && "This is not a power pc, you can't execute this!");
00084   abort();
00085 }
00086 #endif
00087 
00088 extern "C" void PPC32CompilationCallbackC(unsigned *IntRegs, double *FPRegs) {
00089   unsigned *CameFromStub = (unsigned*)__builtin_return_address(0+1);
00090   unsigned *CameFromOrig = (unsigned*)__builtin_return_address(1+1);
00091   unsigned *CCStackPtr   = (unsigned*)__builtin_frame_address(0);
00092 //unsigned *StubStackPtr = (unsigned*)__builtin_frame_address(1);
00093   unsigned *OrigStackPtr = (unsigned*)__builtin_frame_address(2+1);
00094 
00095   // Adjust pointer to the branch, not the return address.
00096   --CameFromStub;
00097 
00098   void *Target = JITCompilerFunction(CameFromStub);
00099 
00100   // Check to see if CameFromOrig[-1] is a 'bl' instruction, and if we can
00101   // rewrite it to branch directly to the destination.  If so, rewrite it so it
00102   // does not need to go through the stub anymore.
00103   unsigned CameFromOrigInst = CameFromOrig[-1];
00104   if ((CameFromOrigInst >> 26) == 18) {     // Direct call.
00105     intptr_t Offset = ((intptr_t)Target-(intptr_t)CameFromOrig+4) >> 2;
00106     if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
00107       // Clear the original target out.
00108       CameFromOrigInst &= (63 << 26) | 3;
00109       // Fill in the new target.
00110       CameFromOrigInst |= (Offset & ((1 << 24)-1)) << 2;
00111       // Replace the call.
00112       CameFromOrig[-1] = CameFromOrigInst;
00113     }
00114   }
00115 
00116   // Locate the start of the stub.  If this is a short call, adjust backwards
00117   // the short amount, otherwise the full amount.
00118   bool isShortStub = (*CameFromStub >> 26) == 18;
00119   CameFromStub -= isShortStub ? 2 : 6;
00120 
00121   // Rewrite the stub with an unconditional branch to the target, for any users
00122   // who took the address of the stub.
00123   EmitBranchToAt(CameFromStub, Target, false);
00124 
00125   // Change the SP so that we pop two stack frames off when we return.
00126   *CCStackPtr = (intptr_t)OrigStackPtr;
00127 
00128   // Put the address of the stub and the LR value that originally came into the
00129   // stub in a place that is easy to get on the stack after we restore all regs.
00130   CCStackPtr[2] = (intptr_t)Target;
00131   CCStackPtr[1] = (intptr_t)CameFromOrig;
00132 
00133   // Note, this is not a standard epilog!
00134 #if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)
00135   register unsigned *IRR asm ("r2") = IntRegs;
00136   register double   *FRR asm ("r3") = FPRegs;
00137   __asm__ __volatile__ (
00138   "lfd f1, 0(%0)\n"  "lfd f2, 8(%0)\n"  "lfd f3, 16(%0)\n"
00139   "lfd f4, 24(%0)\n" "lfd f5, 32(%0)\n" "lfd f6, 40(%0)\n"
00140   "lfd f7, 48(%0)\n" "lfd f8, 56(%0)\n" "lfd f9, 64(%0)\n"
00141   "lfd f10, 72(%0)\n" "lfd f11, 80(%0)\n" "lfd f12, 88(%0)\n"
00142   "lfd f13, 96(%0)\n"
00143   "lmw r3, 0(%1)\n"  // Load all integer regs
00144   "lwz r0,4(r1)\n"   // Get CameFromOrig (LR into stub)
00145   "mtlr r0\n"        // Put it in the LR register
00146   "lwz r0,8(r1)\n"   // Get target function pointer
00147   "mtctr r0\n"       // Put it into the CTR register
00148   "lwz r1,0(r1)\n"   // Pop two frames off
00149   "bctr\n" ::        // Return to stub!
00150   "b" (FRR), "b" (IRR));
00151 #endif
00152 }
00153 
00154 
00155 
00156 TargetJITInfo::LazyResolverFn
00157 PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
00158   JITCompilerFunction = Fn;
00159   return PPC32CompilationCallback;
00160 }
00161 
00162 void *PPCJITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
00163   // If this is just a call to an external function, emit a branch instead of a
00164   // call.  The code is the same except for one bit of the last instruction.
00165   if (Fn != PPC32CompilationCallback) {
00166     MCE.startFunctionStub(4*4);
00167     void *Addr = (void*)(intptr_t)MCE.getCurrentPCValue();
00168     MCE.emitWord(0);
00169     MCE.emitWord(0);
00170     MCE.emitWord(0);
00171     MCE.emitWord(0);
00172     EmitBranchToAt(Addr, Fn, false);
00173     return MCE.finishFunctionStub(0);
00174   }
00175 
00176   MCE.startFunctionStub(4*7);
00177   MCE.emitWord(0x9421ffe0);     // stwu    r1,-32(r1)
00178   MCE.emitWord(0x7d6802a6);     // mflr r11
00179   MCE.emitWord(0x91610028);     // stw r11, 40(r1)
00180   void *Addr = (void*)(intptr_t)MCE.getCurrentPCValue();
00181   MCE.emitWord(0);
00182   MCE.emitWord(0);
00183   MCE.emitWord(0);
00184   MCE.emitWord(0);
00185   EmitBranchToAt(Addr, Fn, true/*is call*/);
00186   return MCE.finishFunctionStub(0);
00187 }
00188 
00189 
00190 void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
00191                           unsigned NumRelocs, unsigned char* GOTBase) {
00192   for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
00193     unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
00194     intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
00195     switch ((PPC::RelocationType)MR->getRelocationType()) {
00196     default: assert(0 && "Unknown relocation type!");
00197     case PPC::reloc_pcrel_bx:
00198       // PC-relative relocation for b and bl instructions.
00199       ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
00200       assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
00201              "Relocation out of range!");
00202       *RelocPos |= (ResultPtr & ((1 << 24)-1))  << 2;
00203       break;
00204 
00205     case PPC::reloc_absolute_ptr_high: // Pointer relocations.
00206     case PPC::reloc_absolute_ptr_low: {
00207       // Pointer relocations are used for the PPC external stubs and lazy
00208       // resolver pointers that the Darwin ABI likes to use.  Basically, the
00209       // address of the global is actually stored in memory, and the address of
00210       // the pointer is relocated into instructions instead of the pointer
00211       // itself.  Because we have to keep the mapping anyway, we just return
00212       // pointers to the values in the map as our new location.
00213       static std::set<void*> Pointers;
00214       ResultPtr = (intptr_t)&*Pointers.insert((void*)ResultPtr).first;
00215     }
00216       // FALL THROUGH
00217     case PPC::reloc_absolute_high:     // high bits of ref -> low 16 of instr
00218     case PPC::reloc_absolute_low:      // low bits of ref  -> low 16 of instr
00219       ResultPtr += MR->getConstantVal();
00220 
00221       // If this is a high-part access, get the high-part.
00222       if (MR->getRelocationType() == PPC::reloc_absolute_high ||
00223           MR->getRelocationType() == PPC::reloc_absolute_ptr_high) {
00224         // If the low part will have a carry (really a borrow) from the low
00225         // 16-bits into the high 16, add a bit to borrow from.
00226         if (((int)ResultPtr << 16) < 0)
00227           ResultPtr += 1 << 16;
00228         ResultPtr >>= 16;
00229       }
00230 
00231       // Do the addition then mask, so the addition does not overflow the 16-bit
00232       // immediate section of the instruction.
00233       unsigned LowBits  = (*RelocPos + ResultPtr) & 65535;
00234       unsigned HighBits = *RelocPos & ~65535;
00235       *RelocPos = LowBits | HighBits;  // Slam into low 16-bits
00236       break;
00237     }
00238   }
00239 }
00240 
00241 void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
00242   EmitBranchToAt(Old, New, false);
00243 }