LLVM API Documentation
00001 //===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the JIT interfaces for the 32-bit PowerPC target. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #define DEBUG_TYPE "jit" 00015 #include "PPCJITInfo.h" 00016 #include "PPCRelocations.h" 00017 #include "llvm/CodeGen/MachineCodeEmitter.h" 00018 #include "llvm/Config/alloca.h" 00019 #include "llvm/Support/Debug.h" 00020 #include <set> 00021 #include <iostream> 00022 using namespace llvm; 00023 00024 static TargetJITInfo::JITCompilerFn JITCompilerFunction; 00025 00026 #define BUILD_ADDIS(RD,RS,IMM16) \ 00027 ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) 00028 #define BUILD_ORI(RD,RS,UIMM16) \ 00029 ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) 00030 #define BUILD_MTSPR(RS,SPR) \ 00031 ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1)) 00032 #define BUILD_BCCTRx(BO,BI,LINK) \ 00033 ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1)) 00034 00035 // Pseudo-ops 00036 #define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16) 00037 #define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9) 00038 #define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK) 00039 00040 00041 static void EmitBranchToAt(void *At, void *To, bool isCall) { 00042 intptr_t Addr = (intptr_t)To; 00043 00044 // FIXME: should special case the short branch case. 00045 unsigned *AtI = (unsigned*)At; 00046 00047 AtI[0] = BUILD_LIS(12, Addr >> 16); // lis r12, hi16(address) 00048 AtI[1] = BUILD_ORI(12, 12, Addr); // ori r12, r12, low16(address) 00049 AtI[2] = BUILD_MTCTR(12); // mtctr r12 00050 AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl 00051 } 00052 00053 extern "C" void PPC32CompilationCallback(); 00054 00055 #if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) 00056 // CompilationCallback stub - We can't use a C function with inline assembly in 00057 // it, because we the prolog/epilog inserted by GCC won't work for us. Instead, 00058 // write our own wrapper, which does things our way, so we have complete control 00059 // over register saving and restoring. 00060 asm( 00061 ".text\n" 00062 ".align 2\n" 00063 ".globl _PPC32CompilationCallback\n" 00064 "_PPC32CompilationCallback:\n" 00065 // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 00066 // FIXME: need to save v[0-19] for altivec? 00067 // Set up a proper stack frame 00068 "stwu r1, -208(r1)\n" 00069 "mflr r0\n" 00070 "stw r0, 216(r1)\n" 00071 // Save all int arg registers 00072 "stw r10, 204(r1)\n" "stw r9, 200(r1)\n" 00073 "stw r8, 196(r1)\n" "stw r7, 192(r1)\n" 00074 "stw r6, 188(r1)\n" "stw r5, 184(r1)\n" 00075 "stw r4, 180(r1)\n" "stw r3, 176(r1)\n" 00076 // Save all call-clobbered FP regs. 00077 "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n" 00078 "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n" 00079 "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n" 00080 "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n" 00081 "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n" 00082 "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n" 00083 "stfd f1, 72(r1)\n" 00084 // Arguments to Compilation Callback: 00085 // r3 - our lr (address of the call instruction in stub plus 4) 00086 // r4 - stub's lr (address of instruction that called the stub plus 4) 00087 "mr r3, r0\n" 00088 "lwz r2, 208(r1)\n" // stub's frame 00089 "lwz r4, 8(r2)\n" // stub's lr 00090 "bl _PPC32CompilationCallbackC\n" 00091 "mtctr r3\n" 00092 // Restore all int arg registers 00093 "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" 00094 "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n" 00095 "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n" 00096 "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n" 00097 // Restore all FP arg registers 00098 "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n" 00099 "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n" 00100 "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n" 00101 "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n" 00102 "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n" 00103 "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n" 00104 "lfd f1, 72(r1)\n" 00105 // Pop 3 frames off the stack and branch to target 00106 "lwz r1, 208(r1)\n" 00107 "lwz r2, 8(r1)\n" 00108 "mtlr r2\n" 00109 "bctr\n" 00110 ); 00111 #else 00112 void PPC32CompilationCallback() { 00113 assert(0 && "This is not a power pc, you can't execute this!"); 00114 abort(); 00115 } 00116 #endif 00117 00118 extern "C" unsigned *PPC32CompilationCallbackC(unsigned *StubCallAddrPlus4, 00119 unsigned *OrigCallAddrPlus4) { 00120 // Adjust the pointer to the address of the call instruction in the stub 00121 // emitted by emitFunctionStub, rather than the instruction after it. 00122 unsigned *StubCallAddr = StubCallAddrPlus4 - 1; 00123 unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1; 00124 00125 void *Target = JITCompilerFunction(StubCallAddr); 00126 00127 // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite 00128 // it to branch directly to the destination. If so, rewrite it so it does not 00129 // need to go through the stub anymore. 00130 unsigned OrigCallInst = *OrigCallAddr; 00131 if ((OrigCallInst >> 26) == 18) { // Direct call. 00132 intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2; 00133 00134 if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? 00135 // Clear the original target out. 00136 OrigCallInst &= (63 << 26) | 3; 00137 // Fill in the new target. 00138 OrigCallInst |= (Offset & ((1 << 24)-1)) << 2; 00139 // Replace the call. 00140 *OrigCallAddr = OrigCallInst; 00141 } 00142 } 00143 00144 // Assert that we are coming from a stub that was created with our 00145 // emitFunctionStub. 00146 assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!"); 00147 StubCallAddr -= 6; 00148 00149 // Rewrite the stub with an unconditional branch to the target, for any users 00150 // who took the address of the stub. 00151 EmitBranchToAt(StubCallAddr, Target, false); 00152 00153 // Put the address of the target function to call and the address to return to 00154 // after calling the target function in a place that is easy to get on the 00155 // stack after we restore all regs. 00156 return (unsigned *)Target; 00157 } 00158 00159 00160 00161 TargetJITInfo::LazyResolverFn 00162 PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) { 00163 JITCompilerFunction = Fn; 00164 return PPC32CompilationCallback; 00165 } 00166 00167 void *PPCJITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) { 00168 // If this is just a call to an external function, emit a branch instead of a 00169 // call. The code is the same except for one bit of the last instruction. 00170 if (Fn != (void*)(intptr_t)PPC32CompilationCallback) { 00171 MCE.startFunctionStub(4*4); 00172 void *Addr = (void*)(intptr_t)MCE.getCurrentPCValue(); 00173 MCE.emitWordBE(0); 00174 MCE.emitWordBE(0); 00175 MCE.emitWordBE(0); 00176 MCE.emitWordBE(0); 00177 EmitBranchToAt(Addr, Fn, false); 00178 return MCE.finishFunctionStub(0); 00179 } 00180 00181 MCE.startFunctionStub(4*7); 00182 MCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) 00183 MCE.emitWordBE(0x7d6802a6); // mflr r11 00184 MCE.emitWordBE(0x91610028); // stw r11, 40(r1) 00185 void *Addr = (void*)(intptr_t)MCE.getCurrentPCValue(); 00186 MCE.emitWordBE(0); 00187 MCE.emitWordBE(0); 00188 MCE.emitWordBE(0); 00189 MCE.emitWordBE(0); 00190 EmitBranchToAt(Addr, Fn, true/*is call*/); 00191 return MCE.finishFunctionStub(0); 00192 } 00193 00194 00195 void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, 00196 unsigned NumRelocs, unsigned char* GOTBase) { 00197 for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { 00198 unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; 00199 intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); 00200 switch ((PPC::RelocationType)MR->getRelocationType()) { 00201 default: assert(0 && "Unknown relocation type!"); 00202 case PPC::reloc_pcrel_bx: 00203 // PC-relative relocation for b and bl instructions. 00204 ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; 00205 assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) && 00206 "Relocation out of range!"); 00207 *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2; 00208 break; 00209 case PPC::reloc_absolute_ptr_high: // Pointer relocations. 00210 case PPC::reloc_absolute_ptr_low: 00211 case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr 00212 case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr 00213 ResultPtr += MR->getConstantVal(); 00214 00215 // If this is a high-part access, get the high-part. 00216 if (MR->getRelocationType() == PPC::reloc_absolute_high || 00217 MR->getRelocationType() == PPC::reloc_absolute_ptr_high) { 00218 // If the low part will have a carry (really a borrow) from the low 00219 // 16-bits into the high 16, add a bit to borrow from. 00220 if (((int)ResultPtr << 16) < 0) 00221 ResultPtr += 1 << 16; 00222 ResultPtr >>= 16; 00223 } 00224 00225 // Do the addition then mask, so the addition does not overflow the 16-bit 00226 // immediate section of the instruction. 00227 unsigned LowBits = (*RelocPos + ResultPtr) & 65535; 00228 unsigned HighBits = *RelocPos & ~65535; 00229 *RelocPos = LowBits | HighBits; // Slam into low 16-bits 00230 break; 00231 } 00232 case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr 00233 ResultPtr += MR->getConstantVal(); 00234 // Do the addition then mask, so the addition does not overflow the 16-bit 00235 // immediate section of the instruction. 00236 unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC; 00237 unsigned HighBits = *RelocPos & 0xFFFF0003; 00238 *RelocPos = LowBits | HighBits; // Slam into low 14-bits. 00239 break; 00240 } 00241 } 00242 } 00243 } 00244 00245 void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { 00246 EmitBranchToAt(Old, New, false); 00247 } 00248 00249 void PPCJITInfo::resolveBBRefs(MachineCodeEmitter &MCE) { 00250 // Resolve branches to BasicBlocks for the entire function 00251 for (unsigned i = 0, e = BBRefs.size(); i != e; ++i) { 00252 intptr_t Location = MCE.getMachineBasicBlockAddress(BBRefs[i].first); 00253 unsigned *Ref = (unsigned *)BBRefs[i].second; 00254 DEBUG(std::cerr << "Fixup @ " << (void*)Ref << " to " << (void*)Location 00255 << "\n"); 00256 unsigned Instr = *Ref; 00257 intptr_t BranchTargetDisp = (Location - (intptr_t)Ref) >> 2; 00258 00259 switch (Instr >> 26) { 00260 default: assert(0 && "Unknown branch user!"); 00261 case 18: // This is B or BL 00262 *Ref |= (BranchTargetDisp & ((1 << 24)-1)) << 2; 00263 break; 00264 case 16: // This is BLT,BLE,BEQ,BGE,BGT,BNE, or other bcx instruction 00265 *Ref |= (BranchTargetDisp & ((1 << 14)-1)) << 2; 00266 break; 00267 } 00268 } 00269 BBRefs.clear(); 00270 } 00271 00272 #ifdef __APPLE__ 00273 extern "C" void sys_icache_invalidate(const void *Addr, size_t len); 00274 #endif 00275 00276 void PPCJITInfo::synchronizeICache(const void *Addr, size_t Len) { 00277 #ifdef __APPLE__ 00278 sys_icache_invalidate(Addr, Len); 00279 #endif 00280 }