LLVM API Documentation
00001 //===-- SparcJITInfo.cpp - Implement the JIT interfaces for SparcV9 -------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the JIT interfaces for the SparcV9 target. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #define DEBUG_TYPE "jit" 00015 #include "SparcV9JITInfo.h" 00016 #include "SparcV9Relocations.h" 00017 #include "llvm/CodeGen/MachineCodeEmitter.h" 00018 #include "llvm/Config/alloca.h" 00019 #include "llvm/Support/Debug.h" 00020 #include <iostream> 00021 using namespace llvm; 00022 00023 /// JITCompilerFunction - This contains the address of the JIT function used to 00024 /// compile a function lazily. 00025 static TargetJITInfo::JITCompilerFn JITCompilerFunction; 00026 00027 /// BUILD_SETHI/BUILD_ORI/BUILD_BA/BUILD_CALL - These macros build sparc machine 00028 /// instructions using lots of magic defined by the Sparc ISA. 00029 #define BUILD_SETHI(RD, C) (((RD) << 25) | (4 << 22) | (C & ((1 << 22)-1))) 00030 #define BUILD_ORI(RS, C, RD) ((2 << 30) | (RD << 25) | (2 << 19) | (RS << 14) |\ 00031 (1 << 13) | (C & ((1 << 12)-1))) 00032 #define BUILD_BA(DISP) ((8 << 25) | (2 << 22) | (DISP & ((1 << 22)-1))) 00033 #define BUILD_CALL(OFFSET) ((1 << 30) | (OFFSET & (1 << 30)-1)) 00034 00035 static void InsertJumpAtAddr(int64_t JumpTarget, unsigned *Addr) { 00036 // If the target function is close enough to fit into the 19bit disp of 00037 // BA, we should use this version, as it's much cheaper to generate. 00038 int64_t BranchTarget = (JumpTarget-(intptr_t)Addr) >> 2; 00039 if (BranchTarget < (1 << 19) && BranchTarget > -(1 << 19)) { 00040 // ba <target> 00041 Addr[0] = BUILD_BA(BranchTarget); 00042 00043 // nop 00044 Addr[1] = 0x01000000; 00045 } else { 00046 enum { G0 = 0, G1 = 1, G5 = 5 }; 00047 // Get address to branch into %g1, using %g5 as a temporary 00048 // 00049 // sethi %uhi(Target), %g5 ;; get upper 22 bits of Target into %g5 00050 Addr[0] = BUILD_SETHI(G5, JumpTarget >> 42); 00051 // or %g5, %ulo(Target), %g5 ;; get 10 lower bits of upper word into %1 00052 Addr[1] = BUILD_ORI(G5, JumpTarget >> 32, G5); 00053 // sllx %g5, 32, %g5 ;; shift those 10 bits to the upper word 00054 Addr[2] = 0x8B297020; 00055 // sethi %hi(Target), %g1 ;; extract bits 10-31 into the dest reg 00056 Addr[3] = BUILD_SETHI(G1, JumpTarget >> 10); 00057 // or %g5, %g1, %g1 ;; get upper word (in %g5) into %g1 00058 Addr[4] = 0x82114001; 00059 // or %g1, %lo(Target), %g1 ;; get lowest 10 bits of Target into %g1 00060 Addr[5] = BUILD_ORI(G1, JumpTarget, G1); 00061 00062 // jmpl %g1, %g0, %g0 ;; indirect branch on %g1 00063 Addr[6] = 0x81C00001; 00064 // nop ;; delay slot 00065 Addr[7] = 0x01000000; 00066 } 00067 } 00068 00069 void SparcV9JITInfo::replaceMachineCodeForFunction (void *Old, void *New) { 00070 InsertJumpAtAddr((intptr_t)New, (unsigned*)Old); 00071 } 00072 00073 00074 static void SaveRegisters(uint64_t DoubleFP[], uint64_t CC[], 00075 uint64_t Globals[]) { 00076 #if defined(__sparcv9) 00077 00078 __asm__ __volatile__ (// Save condition-code registers 00079 "stx %%fsr, %0;\n\t" 00080 "rd %%fprs, %1;\n\t" 00081 "rd %%ccr, %2;\n\t" 00082 : "=m"(CC[0]), "=r"(CC[1]), "=r"(CC[2])); 00083 00084 __asm__ __volatile__ (// Save globals g1 and g5 00085 "stx %%g1, %0;\n\t" 00086 "stx %%g5, %0;\n\t" 00087 : "=m"(Globals[0]), "=m"(Globals[1])); 00088 00089 // GCC says: `asm' only allows up to thirty parameters! 00090 __asm__ __volatile__ (// Save Single/Double FP registers, part 1 00091 "std %%f0, %0;\n\t" "std %%f2, %1;\n\t" 00092 "std %%f4, %2;\n\t" "std %%f6, %3;\n\t" 00093 "std %%f8, %4;\n\t" "std %%f10, %5;\n\t" 00094 "std %%f12, %6;\n\t" "std %%f14, %7;\n\t" 00095 "std %%f16, %8;\n\t" "std %%f18, %9;\n\t" 00096 "std %%f20, %10;\n\t" "std %%f22, %11;\n\t" 00097 "std %%f24, %12;\n\t" "std %%f26, %13;\n\t" 00098 "std %%f28, %14;\n\t" "std %%f30, %15;\n\t" 00099 : "=m"(DoubleFP[ 0]), "=m"(DoubleFP[ 1]), 00100 "=m"(DoubleFP[ 2]), "=m"(DoubleFP[ 3]), 00101 "=m"(DoubleFP[ 4]), "=m"(DoubleFP[ 5]), 00102 "=m"(DoubleFP[ 6]), "=m"(DoubleFP[ 7]), 00103 "=m"(DoubleFP[ 8]), "=m"(DoubleFP[ 9]), 00104 "=m"(DoubleFP[10]), "=m"(DoubleFP[11]), 00105 "=m"(DoubleFP[12]), "=m"(DoubleFP[13]), 00106 "=m"(DoubleFP[14]), "=m"(DoubleFP[15])); 00107 00108 __asm__ __volatile__ (// Save Double FP registers, part 2 00109 "std %%f32, %0;\n\t" "std %%f34, %1;\n\t" 00110 "std %%f36, %2;\n\t" "std %%f38, %3;\n\t" 00111 "std %%f40, %4;\n\t" "std %%f42, %5;\n\t" 00112 "std %%f44, %6;\n\t" "std %%f46, %7;\n\t" 00113 "std %%f48, %8;\n\t" "std %%f50, %9;\n\t" 00114 "std %%f52, %10;\n\t" "std %%f54, %11;\n\t" 00115 "std %%f56, %12;\n\t" "std %%f58, %13;\n\t" 00116 "std %%f60, %14;\n\t" "std %%f62, %15;\n\t" 00117 : "=m"(DoubleFP[16]), "=m"(DoubleFP[17]), 00118 "=m"(DoubleFP[18]), "=m"(DoubleFP[19]), 00119 "=m"(DoubleFP[20]), "=m"(DoubleFP[21]), 00120 "=m"(DoubleFP[22]), "=m"(DoubleFP[23]), 00121 "=m"(DoubleFP[24]), "=m"(DoubleFP[25]), 00122 "=m"(DoubleFP[26]), "=m"(DoubleFP[27]), 00123 "=m"(DoubleFP[28]), "=m"(DoubleFP[29]), 00124 "=m"(DoubleFP[30]), "=m"(DoubleFP[31])); 00125 #else 00126 std::cerr << "ERROR: RUNNING CODE THAT ONLY WORKS ON A SPARCV9 HOST!\n"; 00127 abort(); 00128 #endif 00129 } 00130 00131 static void RestoreRegisters(uint64_t DoubleFP[], uint64_t CC[], 00132 uint64_t Globals[]) { 00133 #if defined(__sparcv9) 00134 00135 __asm__ __volatile__ (// Restore condition-code registers 00136 "ldx %0, %%fsr;\n\t" 00137 "wr %1, 0, %%fprs;\n\t" 00138 "wr %2, 0, %%ccr;\n\t" 00139 :: "m"(CC[0]), "r"(CC[1]), "r"(CC[2])); 00140 00141 __asm__ __volatile__ (// Restore globals g1 and g5 00142 "ldx %0, %%g1;\n\t" 00143 "ldx %0, %%g5;\n\t" 00144 :: "m"(Globals[0]), "m"(Globals[1])); 00145 00146 // GCC says: `asm' only allows up to thirty parameters! 00147 __asm__ __volatile__ (// Restore Single/Double FP registers, part 1 00148 "ldd %0, %%f0;\n\t" "ldd %1, %%f2;\n\t" 00149 "ldd %2, %%f4;\n\t" "ldd %3, %%f6;\n\t" 00150 "ldd %4, %%f8;\n\t" "ldd %5, %%f10;\n\t" 00151 "ldd %6, %%f12;\n\t" "ldd %7, %%f14;\n\t" 00152 "ldd %8, %%f16;\n\t" "ldd %9, %%f18;\n\t" 00153 "ldd %10, %%f20;\n\t" "ldd %11, %%f22;\n\t" 00154 "ldd %12, %%f24;\n\t" "ldd %13, %%f26;\n\t" 00155 "ldd %14, %%f28;\n\t" "ldd %15, %%f30;\n\t" 00156 :: "m"(DoubleFP[0]), "m"(DoubleFP[1]), 00157 "m"(DoubleFP[2]), "m"(DoubleFP[3]), 00158 "m"(DoubleFP[4]), "m"(DoubleFP[5]), 00159 "m"(DoubleFP[6]), "m"(DoubleFP[7]), 00160 "m"(DoubleFP[8]), "m"(DoubleFP[9]), 00161 "m"(DoubleFP[10]), "m"(DoubleFP[11]), 00162 "m"(DoubleFP[12]), "m"(DoubleFP[13]), 00163 "m"(DoubleFP[14]), "m"(DoubleFP[15])); 00164 00165 __asm__ __volatile__ (// Restore Double FP registers, part 2 00166 "ldd %0, %%f32;\n\t" "ldd %1, %%f34;\n\t" 00167 "ldd %2, %%f36;\n\t" "ldd %3, %%f38;\n\t" 00168 "ldd %4, %%f40;\n\t" "ldd %5, %%f42;\n\t" 00169 "ldd %6, %%f44;\n\t" "ldd %7, %%f46;\n\t" 00170 "ldd %8, %%f48;\n\t" "ldd %9, %%f50;\n\t" 00171 "ldd %10, %%f52;\n\t" "ldd %11, %%f54;\n\t" 00172 "ldd %12, %%f56;\n\t" "ldd %13, %%f58;\n\t" 00173 "ldd %14, %%f60;\n\t" "ldd %15, %%f62;\n\t" 00174 :: "m"(DoubleFP[16]), "m"(DoubleFP[17]), 00175 "m"(DoubleFP[18]), "m"(DoubleFP[19]), 00176 "m"(DoubleFP[20]), "m"(DoubleFP[21]), 00177 "m"(DoubleFP[22]), "m"(DoubleFP[23]), 00178 "m"(DoubleFP[24]), "m"(DoubleFP[25]), 00179 "m"(DoubleFP[26]), "m"(DoubleFP[27]), 00180 "m"(DoubleFP[28]), "m"(DoubleFP[29]), 00181 "m"(DoubleFP[30]), "m"(DoubleFP[31])); 00182 #else 00183 std::cerr << "ERROR: RUNNING CODE THAT ONLY WORKS ON A SPARCV9 HOST!\n"; 00184 abort(); 00185 #endif 00186 } 00187 00188 00189 static void CompilationCallback() { 00190 // Local space to save the registers 00191 uint64_t DoubleFP[32]; 00192 uint64_t CC[3]; 00193 uint64_t Globals[2]; 00194 00195 SaveRegisters(DoubleFP, CC, Globals); 00196 00197 unsigned *CameFrom = (unsigned*)__builtin_return_address(0); 00198 unsigned *CameFrom1 = (unsigned*)__builtin_return_address(1); 00199 00200 int64_t Target = (intptr_t)JITCompilerFunction(CameFrom); 00201 00202 DEBUG(std::cerr << "In callback! Addr=" << (void*)CameFrom << "\n"); 00203 00204 // If we can rewrite the ORIGINAL caller, we eliminate the whole need for a 00205 // trampoline function stub!! 00206 unsigned OrigCallInst = *CameFrom1; 00207 int64_t OrigTarget = (Target-(intptr_t)CameFrom1) >> 2; 00208 if ((OrigCallInst >> 30) == 1 && 00209 (OrigTarget <= (1 << 30) && OrigTarget >= -(1 << 30))) { 00210 // The original call instruction was CALL <immed>, which means we can 00211 // overwrite it directly, since the offset will fit into 30 bits 00212 *CameFrom1 = BUILD_CALL(OrigTarget); 00213 //++OverwrittenCalls; 00214 } else { 00215 //++UnmodifiedCalls; 00216 } 00217 00218 // Rewrite the call target so that we don't fault every time we execute it. 00219 // 00220 unsigned OrigStubCallInst = *CameFrom; 00221 00222 // Subtract enough to overwrite up to the 'save' instruction 00223 // This depends on whether we made a short call (1 instruction) or the 00224 // farCall (7 instructions) 00225 int Offset = ((OrigStubCallInst >> 30) == 1) ? 1 : 7; 00226 unsigned *CodeBegin = CameFrom - Offset; 00227 00228 // FIXME: __builtin_frame_address doesn't work if frame pointer elimination 00229 // has been performed. Having a variable sized alloca disables frame pointer 00230 // elimination currently, even if it's dead. This is a gross hack. 00231 alloca(42+Offset); 00232 00233 // Make sure that what we're about to overwrite is indeed "save". 00234 if (*CodeBegin != 0x9DE3BF40) { 00235 std::cerr << "About to overwrite smthg not a save instr!"; 00236 abort(); 00237 } 00238 00239 // Overwrite it 00240 InsertJumpAtAddr(Target, CodeBegin); 00241 00242 // Flush the I-Cache: FLUSH clears out a doubleword at a given address 00243 // Self-modifying code MUST clear out the I-Cache to be portable 00244 #if defined(__sparcv9) 00245 for (int i = -Offset*4, e = 32-((int64_t)Offset*4); i < e; i += 8) 00246 __asm__ __volatile__ ("flush %%i7 + %0" : : "r" (i)); 00247 #endif 00248 00249 // Change the return address to re-execute the restore, then the jump. 00250 DEBUG(std::cerr << "Callback returning to: 0x" 00251 << std::hex << (CameFrom-Offset*4-12) << "\n"); 00252 #if defined(__sparcv9) 00253 __asm__ __volatile__ ("sub %%i7, %0, %%i7" : : "r" (Offset*4+12)); 00254 #endif 00255 00256 RestoreRegisters(DoubleFP, CC, Globals); 00257 } 00258 00259 00260 /// emitStubForFunction - This method is used by the JIT when it needs to emit 00261 /// the address of a function for a function whose code has not yet been 00262 /// generated. In order to do this, it generates a stub which jumps to the lazy 00263 /// function compiler, which will eventually get fixed to call the function 00264 /// directly. 00265 /// 00266 void *SparcV9JITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) { 00267 if (Fn != CompilationCallback) { 00268 // If this is just a call to an external function, 00269 MCE.startFunctionStub(4*8); 00270 unsigned *Stub = (unsigned*)(intptr_t)MCE.getCurrentPCValue(); 00271 for (unsigned i = 0; i != 8; ++i) 00272 MCE.emitWord(0); 00273 InsertJumpAtAddr((intptr_t)Fn, Stub); 00274 return MCE.finishFunctionStub(0); // 1 instr past the restore 00275 } 00276 00277 MCE.startFunctionStub(44); 00278 MCE.emitWord(0x81e82000); // restore %g0, 0, %g0 00279 MCE.emitWord(0x9DE3BF40); // save %sp, -192, %sp 00280 00281 int64_t CurrPC = MCE.getCurrentPCValue(); 00282 int64_t Addr = (intptr_t)Fn; 00283 int64_t CallTarget = (Addr-CurrPC) >> 2; 00284 if (CallTarget < (1 << 29) && CallTarget > -(1 << 29)) { 00285 // call CallTarget 00286 MCE.emitWord((0x01 << 30) | CallTarget); 00287 } else { 00288 enum {G5 = 5, G1 = 1 }; 00289 // Otherwise, we need to emit a sequence of instructions to call a distant 00290 // function. We use %g5 as a temporary, and compute the value into %g1 00291 00292 // sethi %uhi(Target), %g5 ;; get upper 22 bits of Target into %g5 00293 MCE.emitWord(BUILD_SETHI(G5, Addr >> 42)); 00294 // or %g5, %ulo(Target), %g5 ;; get 10 lower bits of upper word into %1 00295 MCE.emitWord(BUILD_ORI(G5, Addr >> 32, G5)); 00296 // sllx %g5, 32, %g5 ;; shift those 10 bits to the upper word 00297 MCE.emitWord(0x8B297020); 00298 // sethi %hi(Target), %g1 ;; extract bits 10-31 into the dest reg 00299 MCE.emitWord(BUILD_SETHI(G1, Addr >> 10)); 00300 // or %g5, %g1, %g1 ;; get upper word (in %g5) into %g1 00301 MCE.emitWord(0x82114001); 00302 // or %g1, %lo(Target), %g1 ;; get lowest 10 bits of Target into %g1 00303 MCE.emitWord(BUILD_ORI(G1, Addr, G1)); 00304 00305 // call %g1 ;; indirect call on %g1 00306 MCE.emitWord(0x9FC04000); 00307 } 00308 00309 // nop ;; call delay slot 00310 MCE.emitWord(0x1000000); 00311 00312 // FIXME: Should have a restore and return! 00313 00314 MCE.emitWord(0xDEADBEEF); // marker so that we know it's really a stub 00315 return (char*)MCE.finishFunctionStub(0)+4; // 1 instr past the restore 00316 } 00317 00318 00319 00320 TargetJITInfo::LazyResolverFn 00321 SparcV9JITInfo::getLazyResolverFunction(JITCompilerFn F) { 00322 JITCompilerFunction = F; 00323 return CompilationCallback; 00324 } 00325 00326 void SparcV9JITInfo::relocate(void *Function, MachineRelocation *MR, 00327 unsigned NumRelocs, unsigned char* GOTBase) { 00328 for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { 00329 unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; 00330 intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); 00331 switch ((V9::RelocationType)MR->getRelocationType()) { 00332 default: assert(0 && "Unknown relocation type!"); 00333 case V9::reloc_pcrel_call: 00334 ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; // PC relative. 00335 assert((ResultPtr < (1 << 29) && ResultPtr > -(1 << 29)) && 00336 "reloc_pcrel_call is out of range!"); 00337 // The high two bits of the call are always set to 01. 00338 *RelocPos = (1 << 30) | (ResultPtr & ((1 << 30)-1)) ; 00339 break; 00340 case V9::reloc_sethi_hh: 00341 case V9::reloc_sethi_lm: 00342 ResultPtr >>= (MR->getRelocationType() == V9::reloc_sethi_hh ? 32 : 0); 00343 ResultPtr >>= 10; 00344 ResultPtr &= (1 << 22)-1; 00345 *RelocPos |= (unsigned)ResultPtr; 00346 break; 00347 case V9::reloc_or_hm: 00348 case V9::reloc_or_lo: 00349 ResultPtr >>= (MR->getRelocationType() == V9::reloc_or_hm ? 32 : 0); 00350 ResultPtr &= (1 << 12)-1; 00351 *RelocPos |= (unsigned)ResultPtr; 00352 break; 00353 } 00354 } 00355 }