LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

SparcV9JITInfo.cpp

Go to the documentation of this file.
00001 //===-- SparcJITInfo.cpp - Implement the JIT interfaces for SparcV9 -------===//
00002 // 
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 // 
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the JIT interfaces for the SparcV9 target.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #define DEBUG_TYPE "jit"
00015 #include "SparcV9JITInfo.h"
00016 #include "SparcV9Relocations.h"
00017 #include "llvm/CodeGen/MachineCodeEmitter.h"
00018 #include "llvm/Config/alloca.h"
00019 #include "llvm/Support/Debug.h"
00020 using namespace llvm;
00021 
00022 /// JITCompilerFunction - This contains the address of the JIT function used to
00023 /// compile a function lazily.
00024 static TargetJITInfo::JITCompilerFn JITCompilerFunction;
00025 
00026 /// BUILD_SETHI/BUILD_ORI/BUILD_BA/BUILD_CALL - These macros build sparc machine
00027 /// instructions using lots of magic defined by the Sparc ISA.
00028 #define BUILD_SETHI(RD, C)   (((RD) << 25) | (4 << 22) | (C & ((1 << 22)-1)))
00029 #define BUILD_ORI(RS, C, RD) ((2 << 30) | (RD << 25) | (2 << 19) | (RS << 14) |\
00030                               (1 << 13) | (C & ((1 << 12)-1)))
00031 #define BUILD_BA(DISP)       ((8 << 25) | (2 << 22) | (DISP & ((1 << 22)-1)))
00032 #define BUILD_CALL(OFFSET)   ((1 << 30) | (OFFSET & (1 << 30)-1))
00033 
00034 static void InsertJumpAtAddr(int64_t JumpTarget, unsigned *Addr) {
00035   // If the target function is close enough to fit into the 19bit disp of
00036   // BA, we should use this version, as it's much cheaper to generate.
00037   int64_t BranchTarget = (JumpTarget-(intptr_t)Addr) >> 2;
00038   if (BranchTarget < (1 << 19) && BranchTarget > -(1 << 19)) {
00039     // ba <target>
00040     Addr[0] = BUILD_BA(BranchTarget);
00041 
00042     // nop
00043     Addr[1] = 0x01000000;
00044   } else {
00045     enum { G0 = 0, G1 = 1, G5 = 5 };
00046     // Get address to branch into %g1, using %g5 as a temporary
00047     //
00048     // sethi %uhi(Target), %g5   ;; get upper 22 bits of Target into %g5
00049     Addr[0] = BUILD_SETHI(G5, JumpTarget >> 42);
00050     // or %g5, %ulo(Target), %g5 ;; get 10 lower bits of upper word into %1
00051     Addr[1] = BUILD_ORI(G5, JumpTarget >> 32, G5);
00052     // sllx %g5, 32, %g5         ;; shift those 10 bits to the upper word
00053     Addr[2] = 0x8B297020;
00054     // sethi %hi(Target), %g1    ;; extract bits 10-31 into the dest reg
00055     Addr[3] = BUILD_SETHI(G1, JumpTarget >> 10);
00056     // or %g5, %g1, %g1          ;; get upper word (in %g5) into %g1
00057     Addr[4] = 0x82114001;
00058     // or %g1, %lo(Target), %g1  ;; get lowest 10 bits of Target into %g1
00059     Addr[5] = BUILD_ORI(G1, JumpTarget, G1);
00060     
00061     // jmpl %g1, %g0, %g0          ;; indirect branch on %g1
00062     Addr[6] = 0x81C00001;
00063     // nop                         ;; delay slot
00064     Addr[7] = 0x01000000;
00065   }
00066 }
00067 
00068 void SparcV9JITInfo::replaceMachineCodeForFunction (void *Old, void *New) {
00069   InsertJumpAtAddr((intptr_t)New, (unsigned*)Old);
00070 }
00071 
00072 
00073 static void SaveRegisters(uint64_t DoubleFP[], uint64_t CC[], 
00074                           uint64_t Globals[]) {
00075 #if defined(__sparcv9)
00076 
00077   __asm__ __volatile__ (// Save condition-code registers
00078                         "stx %%fsr, %0;\n\t" 
00079                         "rd %%fprs, %1;\n\t" 
00080                         "rd %%ccr,  %2;\n\t"
00081                         : "=m"(CC[0]), "=r"(CC[1]), "=r"(CC[2]));
00082 
00083   __asm__ __volatile__ (// Save globals g1 and g5
00084                         "stx %%g1, %0;\n\t"
00085                         "stx %%g5, %0;\n\t"
00086                         : "=m"(Globals[0]), "=m"(Globals[1]));
00087 
00088   // GCC says: `asm' only allows up to thirty parameters!
00089   __asm__ __volatile__ (// Save Single/Double FP registers, part 1
00090                         "std  %%f0,  %0;\n\t"  "std  %%f2,  %1;\n\t"
00091                         "std  %%f4,  %2;\n\t"  "std  %%f6,  %3;\n\t"
00092                         "std  %%f8,  %4;\n\t"  "std  %%f10, %5;\n\t"
00093                         "std  %%f12, %6;\n\t"  "std  %%f14, %7;\n\t"
00094                         "std  %%f16, %8;\n\t"  "std  %%f18, %9;\n\t"
00095                         "std  %%f20, %10;\n\t" "std  %%f22, %11;\n\t"
00096                         "std  %%f24, %12;\n\t" "std  %%f26, %13;\n\t"
00097                         "std  %%f28, %14;\n\t" "std  %%f30, %15;\n\t"
00098                         : "=m"(DoubleFP[ 0]), "=m"(DoubleFP[ 1]),
00099                           "=m"(DoubleFP[ 2]), "=m"(DoubleFP[ 3]),
00100                           "=m"(DoubleFP[ 4]), "=m"(DoubleFP[ 5]),
00101                           "=m"(DoubleFP[ 6]), "=m"(DoubleFP[ 7]),
00102                           "=m"(DoubleFP[ 8]), "=m"(DoubleFP[ 9]),
00103                           "=m"(DoubleFP[10]), "=m"(DoubleFP[11]),
00104                           "=m"(DoubleFP[12]), "=m"(DoubleFP[13]),
00105                           "=m"(DoubleFP[14]), "=m"(DoubleFP[15]));
00106                         
00107   __asm__ __volatile__ (// Save Double FP registers, part 2
00108                         "std %%f32, %0;\n\t"  "std %%f34, %1;\n\t"
00109                         "std %%f36, %2;\n\t"  "std %%f38, %3;\n\t"
00110                         "std %%f40, %4;\n\t"  "std %%f42, %5;\n\t"
00111                         "std %%f44, %6;\n\t"  "std %%f46, %7;\n\t"
00112                         "std %%f48, %8;\n\t"  "std %%f50, %9;\n\t"
00113                         "std %%f52, %10;\n\t" "std %%f54, %11;\n\t"
00114                         "std %%f56, %12;\n\t" "std %%f58, %13;\n\t"
00115                         "std %%f60, %14;\n\t" "std %%f62, %15;\n\t"
00116                         : "=m"(DoubleFP[16]), "=m"(DoubleFP[17]),
00117                           "=m"(DoubleFP[18]), "=m"(DoubleFP[19]),
00118                           "=m"(DoubleFP[20]), "=m"(DoubleFP[21]),
00119                           "=m"(DoubleFP[22]), "=m"(DoubleFP[23]),
00120                           "=m"(DoubleFP[24]), "=m"(DoubleFP[25]),
00121                           "=m"(DoubleFP[26]), "=m"(DoubleFP[27]),
00122                           "=m"(DoubleFP[28]), "=m"(DoubleFP[29]),
00123                           "=m"(DoubleFP[30]), "=m"(DoubleFP[31]));
00124 #else
00125   std::cerr << "ERROR: RUNNING CODE THAT ONLY WORKS ON A SPARCV9 HOST!\n";
00126   abort();
00127 #endif
00128 }
00129 
00130 static void RestoreRegisters(uint64_t DoubleFP[], uint64_t CC[], 
00131                              uint64_t Globals[]) {
00132 #if defined(__sparcv9)
00133 
00134   __asm__ __volatile__ (// Restore condition-code registers
00135                         "ldx %0,    %%fsr;\n\t" 
00136                         "wr  %1, 0, %%fprs;\n\t"
00137                         "wr  %2, 0, %%ccr;\n\t" 
00138                         :: "m"(CC[0]), "r"(CC[1]), "r"(CC[2]));
00139 
00140   __asm__ __volatile__ (// Restore globals g1 and g5
00141                         "ldx %0, %%g1;\n\t"
00142                         "ldx %0, %%g5;\n\t"
00143                         :: "m"(Globals[0]), "m"(Globals[1]));
00144 
00145   // GCC says: `asm' only allows up to thirty parameters!
00146   __asm__ __volatile__ (// Restore Single/Double FP registers, part 1
00147                         "ldd %0,  %%f0;\n\t"   "ldd %1, %%f2;\n\t" 
00148                         "ldd %2,  %%f4;\n\t"   "ldd %3, %%f6;\n\t" 
00149                         "ldd %4,  %%f8;\n\t"   "ldd %5, %%f10;\n\t" 
00150                         "ldd %6,  %%f12;\n\t"  "ldd %7, %%f14;\n\t" 
00151                         "ldd %8,  %%f16;\n\t"  "ldd %9, %%f18;\n\t" 
00152                         "ldd %10, %%f20;\n\t" "ldd %11, %%f22;\n\t"
00153                         "ldd %12, %%f24;\n\t" "ldd %13, %%f26;\n\t"
00154                         "ldd %14, %%f28;\n\t" "ldd %15, %%f30;\n\t"
00155                         :: "m"(DoubleFP[0]), "m"(DoubleFP[1]),
00156                            "m"(DoubleFP[2]), "m"(DoubleFP[3]),
00157                            "m"(DoubleFP[4]), "m"(DoubleFP[5]),
00158                            "m"(DoubleFP[6]), "m"(DoubleFP[7]),
00159                            "m"(DoubleFP[8]), "m"(DoubleFP[9]),
00160                            "m"(DoubleFP[10]), "m"(DoubleFP[11]),
00161                            "m"(DoubleFP[12]), "m"(DoubleFP[13]),
00162                            "m"(DoubleFP[14]), "m"(DoubleFP[15]));
00163 
00164   __asm__ __volatile__ (// Restore Double FP registers, part 2
00165                         "ldd %0, %%f32;\n\t"  "ldd %1, %%f34;\n\t"
00166                         "ldd %2, %%f36;\n\t"  "ldd %3, %%f38;\n\t"
00167                         "ldd %4, %%f40;\n\t"  "ldd %5, %%f42;\n\t"
00168                         "ldd %6, %%f44;\n\t"  "ldd %7, %%f46;\n\t"
00169                         "ldd %8, %%f48;\n\t"  "ldd %9, %%f50;\n\t"
00170                         "ldd %10, %%f52;\n\t" "ldd %11, %%f54;\n\t"
00171                         "ldd %12, %%f56;\n\t" "ldd %13, %%f58;\n\t"
00172                         "ldd %14, %%f60;\n\t" "ldd %15, %%f62;\n\t"
00173                         :: "m"(DoubleFP[16]), "m"(DoubleFP[17]),
00174                            "m"(DoubleFP[18]), "m"(DoubleFP[19]),
00175                            "m"(DoubleFP[20]), "m"(DoubleFP[21]),
00176                            "m"(DoubleFP[22]), "m"(DoubleFP[23]),
00177                            "m"(DoubleFP[24]), "m"(DoubleFP[25]),
00178                            "m"(DoubleFP[26]), "m"(DoubleFP[27]),
00179                            "m"(DoubleFP[28]), "m"(DoubleFP[29]),
00180                            "m"(DoubleFP[30]), "m"(DoubleFP[31]));
00181 #else
00182   std::cerr << "ERROR: RUNNING CODE THAT ONLY WORKS ON A SPARCV9 HOST!\n";
00183   abort();
00184 #endif
00185 }
00186 
00187 
00188 static void CompilationCallback() {
00189   // Local space to save the registers
00190   uint64_t DoubleFP[32];
00191   uint64_t CC[3];
00192   uint64_t Globals[2];
00193 
00194   SaveRegisters(DoubleFP, CC, Globals);
00195 
00196   unsigned *CameFrom = (unsigned*)__builtin_return_address(0);
00197   unsigned *CameFrom1 = (unsigned*)__builtin_return_address(1);
00198 
00199   int64_t Target = (intptr_t)JITCompilerFunction(CameFrom);
00200 
00201   DEBUG(std::cerr << "In callback! Addr=" << (void*)CameFrom << "\n");
00202 
00203   // If we can rewrite the ORIGINAL caller, we eliminate the whole need for a
00204   // trampoline function stub!!
00205   unsigned OrigCallInst = *CameFrom1;
00206   int64_t OrigTarget = (Target-(intptr_t)CameFrom1) >> 2;
00207   if ((OrigCallInst >> 30) == 1 &&
00208       (OrigTarget <= (1 << 30) && OrigTarget >= -(1 << 30))) {
00209     // The original call instruction was CALL <immed>, which means we can
00210     // overwrite it directly, since the offset will fit into 30 bits
00211     *CameFrom1 = BUILD_CALL(OrigTarget);
00212     //++OverwrittenCalls;
00213   } else {
00214     //++UnmodifiedCalls;
00215   }
00216 
00217   // Rewrite the call target so that we don't fault every time we execute it.
00218   //
00219   unsigned OrigStubCallInst = *CameFrom;
00220 
00221   // Subtract enough to overwrite up to the 'save' instruction
00222   // This depends on whether we made a short call (1 instruction) or the
00223   // farCall (7 instructions)
00224   int Offset = ((OrigStubCallInst >> 30) == 1) ? 1 : 7;
00225   unsigned *CodeBegin = CameFrom - Offset;
00226 
00227   // FIXME: __builtin_frame_address doesn't work if frame pointer elimination
00228   // has been performed.  Having a variable sized alloca disables frame pointer
00229   // elimination currently, even if it's dead.  This is a gross hack.
00230   alloca(42+Offset);
00231   
00232   // Make sure that what we're about to overwrite is indeed "save".
00233   if (*CodeBegin != 0x9DE3BF40) {
00234     std::cerr << "About to overwrite smthg not a save instr!";
00235     abort();
00236   }
00237 
00238   // Overwrite it
00239   InsertJumpAtAddr(Target, CodeBegin);
00240 
00241   // Flush the I-Cache: FLUSH clears out a doubleword at a given address
00242   // Self-modifying code MUST clear out the I-Cache to be portable
00243 #if defined(__sparcv9)
00244   for (int i = -Offset*4, e = 32-((int64_t)Offset*4); i < e; i += 8)
00245     __asm__ __volatile__ ("flush %%i7 + %0" : : "r" (i));
00246 #endif
00247 
00248   // Change the return address to re-execute the restore, then the jump.
00249   DEBUG(std::cerr << "Callback returning to: 0x"
00250                   << std::hex << (CameFrom-Offset*4-12) << "\n");
00251 #if defined(__sparcv9)
00252   __asm__ __volatile__ ("sub %%i7, %0, %%i7" : : "r" (Offset*4+12));
00253 #endif
00254 
00255   RestoreRegisters(DoubleFP, CC, Globals);
00256 }
00257 
00258 
00259 /// emitStubForFunction - This method is used by the JIT when it needs to emit
00260 /// the address of a function for a function whose code has not yet been
00261 /// generated.  In order to do this, it generates a stub which jumps to the lazy
00262 /// function compiler, which will eventually get fixed to call the function
00263 /// directly.
00264 ///
00265 void *SparcV9JITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
00266   if (Fn != CompilationCallback) {
00267     // If this is just a call to an external function, 
00268     MCE.startFunctionStub(4*8);
00269     unsigned *Stub = (unsigned*)(intptr_t)MCE.getCurrentPCValue();
00270     for (unsigned i = 0; i != 8; ++i)
00271       MCE.emitWord(0);
00272     InsertJumpAtAddr((intptr_t)Fn, Stub);
00273     return MCE.finishFunctionStub(0); // 1 instr past the restore
00274   }
00275 
00276   MCE.startFunctionStub(44);
00277   MCE.emitWord(0x81e82000); // restore %g0, 0, %g0
00278   MCE.emitWord(0x9DE3BF40); // save %sp, -192, %sp
00279 
00280   int64_t CurrPC = MCE.getCurrentPCValue();
00281   int64_t Addr = (intptr_t)Fn;
00282   int64_t CallTarget = (Addr-CurrPC) >> 2;
00283   if (CallTarget < (1 << 29) && CallTarget > -(1 << 29)) {
00284     // call CallTarget
00285     MCE.emitWord((0x01 << 30) | CallTarget);
00286   } else {
00287     enum {G5 = 5, G1 = 1 };
00288     // Otherwise, we need to emit a sequence of instructions to call a distant
00289     // function.  We use %g5 as a temporary, and compute the value into %g1
00290 
00291     // sethi %uhi(Target), %g5   ;; get upper 22 bits of Target into %g5
00292     MCE.emitWord(BUILD_SETHI(G5, Addr >> 42));
00293     // or %g5, %ulo(Target), %g5 ;; get 10 lower bits of upper word into %1
00294     MCE.emitWord(BUILD_ORI(G5, Addr >> 32, G5));
00295     // sllx %g5, 32, %g5         ;; shift those 10 bits to the upper word
00296     MCE.emitWord(0x8B297020);
00297     // sethi %hi(Target), %g1    ;; extract bits 10-31 into the dest reg
00298     MCE.emitWord(BUILD_SETHI(G1, Addr >> 10));
00299     // or %g5, %g1, %g1          ;; get upper word (in %g5) into %g1
00300     MCE.emitWord(0x82114001);
00301     // or %g1, %lo(Target), %g1  ;; get lowest 10 bits of Target into %g1
00302     MCE.emitWord(BUILD_ORI(G1, Addr, G1));
00303 
00304     // call %g1                  ;; indirect call on %g1
00305     MCE.emitWord(0x9FC04000);
00306   }
00307 
00308   // nop                         ;; call delay slot
00309   MCE.emitWord(0x1000000);
00310 
00311   // FIXME: Should have a restore and return!
00312 
00313   MCE.emitWord(0xDEADBEEF);    // marker so that we know it's really a stub
00314   return (char*)MCE.finishFunctionStub(0)+4; // 1 instr past the restore
00315 }
00316 
00317 
00318 
00319 TargetJITInfo::LazyResolverFn
00320 SparcV9JITInfo::getLazyResolverFunction(JITCompilerFn F) {
00321   JITCompilerFunction = F;
00322   return CompilationCallback;
00323 }
00324 
00325 void SparcV9JITInfo::relocate(void *Function, MachineRelocation *MR,
00326                               unsigned NumRelocs) {
00327   for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
00328     unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
00329     intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
00330     switch ((V9::RelocationType)MR->getRelocationType()) {
00331     default: assert(0 && "Unknown relocation type!");
00332     case V9::reloc_pcrel_call:
00333       ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;   // PC relative.
00334       assert((ResultPtr < (1 << 29) && ResultPtr > -(1 << 29)) &&
00335              "reloc_pcrel_call is out of range!");
00336       // The high two bits of the call are always set to 01.
00337       *RelocPos = (1 << 30) | (ResultPtr & ((1 << 30)-1)) ;
00338       break;
00339     case V9::reloc_sethi_hh:
00340     case V9::reloc_sethi_lm:
00341       ResultPtr >>= (MR->getRelocationType() == V9::reloc_sethi_hh ? 32 : 0);
00342       ResultPtr >>= 10;
00343       ResultPtr &= (1 << 22)-1;
00344       *RelocPos |= (unsigned)ResultPtr;
00345       break;
00346     case V9::reloc_or_hm:
00347     case V9::reloc_or_lo:
00348       ResultPtr >>= (MR->getRelocationType() == V9::reloc_or_hm ? 32 : 0);
00349       ResultPtr &= (1 << 12)-1;
00350       *RelocPos |= (unsigned)ResultPtr;
00351       break;
00352     }
00353   }
00354 }