Back to index

plt-scheme  4.2.1
ffi64.c
Go to the documentation of this file.
00001 /* -----------------------------------------------------------------------
00002    ffi.c - Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
00003    
00004    x86-64 Foreign Function Interface 
00005 
00006    Permission is hereby granted, free of charge, to any person obtaining
00007    a copy of this software and associated documentation files (the
00008    ``Software''), to deal in the Software without restriction, including
00009    without limitation the rights to use, copy, modify, merge, publish,
00010    distribute, sublicense, and/or sell copies of the Software, and to
00011    permit persons to whom the Software is furnished to do so, subject to
00012    the following conditions:
00013 
00014    The above copyright notice and this permission notice shall be included
00015    in all copies or substantial portions of the Software.
00016 
00017    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
00018    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00020    IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
00021    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
00022    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
00023    OTHER DEALINGS IN THE SOFTWARE.
00024    ----------------------------------------------------------------------- */
00025 
00026 #include <ffi.h>
00027 #include <ffi_common.h>
00028 
00029 #include <stdlib.h>
00030 #include <stdarg.h>
00031 
00032 #ifdef __x86_64__
00033 
00034 #define MAX_GPR_REGS 6
00035 #define MAX_SSE_REGS 8
00036 
00037 struct register_args
00038 {
00039   /* Registers for argument passing.  */
00040   UINT64 gpr[MAX_GPR_REGS];
00041   __int128_t sse[MAX_SSE_REGS];
00042 };
00043 
00044 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
00045                           void *raddr, void (*fnaddr)(), unsigned ssecount);
00046 
00047 /* All reference to register classes here is identical to the code in
00048    gcc/config/i386/i386.c. Do *not* change one without the other.  */
00049 
00050 /* Register class used for passing given 64bit part of the argument.
00051    These represent classes as documented by the PS ABI, with the exception
00052    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
00053    use SF or DFmode move instead of DImode to avoid reformating penalties.
00054 
00055    Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
00056    whenever possible (upper half does contain padding).  */
00057 enum x86_64_reg_class
00058   {
00059     X86_64_NO_CLASS,
00060     X86_64_INTEGER_CLASS,
00061     X86_64_INTEGERSI_CLASS,
00062     X86_64_SSE_CLASS,
00063     X86_64_SSESF_CLASS,
00064     X86_64_SSEDF_CLASS,
00065     X86_64_SSEUP_CLASS,
00066     X86_64_X87_CLASS,
00067     X86_64_X87UP_CLASS,
00068     X86_64_COMPLEX_X87_CLASS,
00069     X86_64_MEMORY_CLASS
00070   };
00071 
00072 #define MAX_CLASSES 4
00073 
00074 #define SSE_CLASS_P(X)      ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
00075 
00076 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
00077    of this code is to classify each 8bytes of incoming argument by the register
00078    class and assign registers accordingly.  */
00079 
00080 /* Return the union class of CLASS1 and CLASS2.
00081    See the x86-64 PS ABI for details.  */
00082 
00083 static enum x86_64_reg_class
00084 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
00085 {
00086   /* Rule #1: If both classes are equal, this is the resulting class.  */
00087   if (class1 == class2)
00088     return class1;
00089 
00090   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
00091      the other class.  */
00092   if (class1 == X86_64_NO_CLASS)
00093     return class2;
00094   if (class2 == X86_64_NO_CLASS)
00095     return class1;
00096 
00097   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
00098   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
00099     return X86_64_MEMORY_CLASS;
00100 
00101   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
00102   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
00103       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
00104     return X86_64_INTEGERSI_CLASS;
00105   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
00106       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
00107     return X86_64_INTEGER_CLASS;
00108 
00109   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
00110      MEMORY is used.  */
00111   if (class1 == X86_64_X87_CLASS
00112       || class1 == X86_64_X87UP_CLASS
00113       || class1 == X86_64_COMPLEX_X87_CLASS
00114       || class2 == X86_64_X87_CLASS
00115       || class2 == X86_64_X87UP_CLASS
00116       || class2 == X86_64_COMPLEX_X87_CLASS)
00117     return X86_64_MEMORY_CLASS;
00118 
00119   /* Rule #6: Otherwise class SSE is used.  */
00120   return X86_64_SSE_CLASS;
00121 }
00122 
00123 /* Classify the argument of type TYPE and mode MODE.
00124    CLASSES will be filled by the register class used to pass each word
00125    of the operand.  The number of words is returned.  In case the parameter
00126    should be passed in memory, 0 is returned. As a special case for zero
00127    sized containers, classes[0] will be NO_CLASS and 1 is returned.
00128 
00129    See the x86-64 PS ABI for details.
00130 */
00131 static int
00132 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
00133                  size_t byte_offset)
00134 {
00135   switch (type->type)
00136     {
00137     case FFI_TYPE_UINT8:
00138     case FFI_TYPE_SINT8:
00139     case FFI_TYPE_UINT16:
00140     case FFI_TYPE_SINT16:
00141     case FFI_TYPE_UINT32:
00142     case FFI_TYPE_SINT32:
00143     case FFI_TYPE_UINT64:
00144     case FFI_TYPE_SINT64:
00145     case FFI_TYPE_POINTER:
00146       if (byte_offset + type->size <= 4)
00147        classes[0] = X86_64_INTEGERSI_CLASS;
00148       else
00149        classes[0] = X86_64_INTEGER_CLASS;
00150       return 1;
00151     case FFI_TYPE_FLOAT:
00152       if (byte_offset == 0)
00153        classes[0] = X86_64_SSESF_CLASS;
00154       else
00155        classes[0] = X86_64_SSE_CLASS;
00156       return 1;
00157     case FFI_TYPE_DOUBLE:
00158       classes[0] = X86_64_SSEDF_CLASS;
00159       return 1;
00160     case FFI_TYPE_LONGDOUBLE:
00161       classes[0] = X86_64_X87_CLASS;
00162       classes[1] = X86_64_X87UP_CLASS;
00163       return 2;
00164     case FFI_TYPE_STRUCT:
00165       {
00166        const int UNITS_PER_WORD = 8;
00167        int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
00168        ffi_type **ptr; 
00169        int i;
00170        enum x86_64_reg_class subclasses[MAX_CLASSES];
00171 
00172        /* If the struct is larger than 16 bytes, pass it on the stack.  */
00173        if (type->size > 16)
00174          return 0;
00175 
00176        for (i = 0; i < words; i++)
00177          classes[i] = X86_64_NO_CLASS;
00178 
00179        /* Merge the fields of structure.  */
00180        for (ptr = type->elements; *ptr != NULL; ptr++)
00181          {
00182            int num;
00183 
00184            byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
00185 
00186            num = classify_argument (*ptr, subclasses, byte_offset % 8);
00187            if (num == 0)
00188              return 0;
00189            for (i = 0; i < num; i++)
00190              {
00191               int pos = byte_offset / 8;
00192               classes[i + pos] =
00193                 merge_classes (subclasses[i], classes[i + pos]);
00194              }
00195 
00196            byte_offset += (*ptr)->size;
00197          }
00198 
00199        /* Final merger cleanup.  */
00200        for (i = 0; i < words; i++)
00201          {
00202            /* If one class is MEMORY, everything should be passed in
00203               memory.  */
00204            if (classes[i] == X86_64_MEMORY_CLASS)
00205              return 0;
00206 
00207            /* The X86_64_SSEUP_CLASS should be always preceded by
00208               X86_64_SSE_CLASS.  */
00209            if (classes[i] == X86_64_SSEUP_CLASS
00210               && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
00211              classes[i] = X86_64_SSE_CLASS;
00212 
00213            /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
00214            if (classes[i] == X86_64_X87UP_CLASS
00215               && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
00216              classes[i] = X86_64_SSE_CLASS;
00217          }
00218        return words;
00219       }
00220 
00221     default:
00222       FFI_ASSERT(0);
00223     }
00224   return 0; /* Never reached.  */
00225 }
00226 
00227 /* Examine the argument and return set number of register required in each
00228    class.  Return zero iff parameter should be passed in memory, otherwise
00229    the number of registers.  */
00230 
00231 static int
00232 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
00233                 _Bool in_return, int *pngpr, int *pnsse)
00234 {
00235   int i, n, ngpr, nsse;
00236 
00237   n = classify_argument (type, classes, 0);
00238   if (n == 0)
00239     return 0;
00240 
00241   ngpr = nsse = 0;
00242   for (i = 0; i < n; ++i)
00243     switch (classes[i])
00244       {
00245       case X86_64_INTEGER_CLASS:
00246       case X86_64_INTEGERSI_CLASS:
00247        ngpr++;
00248        break;
00249       case X86_64_SSE_CLASS:
00250       case X86_64_SSESF_CLASS:
00251       case X86_64_SSEDF_CLASS:
00252        nsse++;
00253        break;
00254       case X86_64_NO_CLASS:
00255       case X86_64_SSEUP_CLASS:
00256        break;
00257       case X86_64_X87_CLASS:
00258       case X86_64_X87UP_CLASS:
00259       case X86_64_COMPLEX_X87_CLASS:
00260        return in_return != 0;
00261       default:
00262        abort ();
00263       }
00264 
00265   *pngpr = ngpr;
00266   *pnsse = nsse;
00267 
00268   return n;
00269 }
00270 
00271 /* Perform machine dependent cif processing.  */
00272 
00273 ffi_status
00274 ffi_prep_cif_machdep (ffi_cif *cif)
00275 {
00276   int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
00277   enum x86_64_reg_class classes[MAX_CLASSES];
00278   size_t bytes;
00279 
00280   gprcount = ssecount = 0;
00281 
00282   flags = cif->rtype->type;
00283   if (flags != FFI_TYPE_VOID)
00284     {
00285       n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
00286       if (n == 0)
00287        {
00288          /* The return value is passed in memory.  A pointer to that
00289             memory is the first argument.  Allocate a register for it.  */
00290          gprcount++;
00291          /* We don't have to do anything in asm for the return.  */
00292          flags = FFI_TYPE_VOID;
00293        }
00294       else if (flags == FFI_TYPE_STRUCT)
00295        {
00296          /* Mark which registers the result appears in.  */
00297          _Bool sse0 = SSE_CLASS_P (classes[0]);
00298          _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
00299          if (sse0 && !sse1)
00300            flags |= 1 << 8;
00301          else if (!sse0 && sse1)
00302            flags |= 1 << 9;
00303          else if (sse0 && sse1)
00304            flags |= 1 << 10;
00305          /* Mark the true size of the structure.  */
00306          flags |= cif->rtype->size << 12;
00307        }
00308     }
00309 
00310   /* Go over all arguments and determine the way they should be passed.
00311      If it's in a register and there is space for it, let that be so. If
00312      not, add it's size to the stack byte count.  */
00313   for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
00314     {
00315       if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
00316          || gprcount + ngpr > MAX_GPR_REGS
00317          || ssecount + nsse > MAX_SSE_REGS)
00318        {
00319          long align = cif->arg_types[i]->alignment;
00320 
00321          if (align < 8)
00322            align = 8;
00323 
00324          bytes = ALIGN(bytes, align);
00325          bytes += cif->arg_types[i]->size;
00326        }
00327       else
00328        {
00329          gprcount += ngpr;
00330          ssecount += nsse;
00331        }
00332     }
00333   if (ssecount)
00334     flags |= 1 << 11;
00335   cif->flags = flags;
00336   cif->bytes = bytes;
00337 
00338   return FFI_OK;
00339 }
00340 
00341 void
00342 ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
00343 {
00344   enum x86_64_reg_class classes[MAX_CLASSES];
00345   char *stack, *argp;
00346   ffi_type **arg_types;
00347   int gprcount, ssecount, ngpr, nsse, i, avn;
00348   _Bool ret_in_memory;
00349   struct register_args *reg_args;
00350 
00351   /* Can't call 32-bit mode from 64-bit mode.  */
00352   FFI_ASSERT (cif->abi == FFI_UNIX64);
00353 
00354   /* If the return value is a struct and we don't have a return value
00355      address then we need to make one.  Note the setting of flags to
00356      VOID above in ffi_prep_cif_machdep.  */
00357   ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
00358                  && (cif->flags & 0xff) == FFI_TYPE_VOID);
00359   if (rvalue == NULL && ret_in_memory)
00360     rvalue = alloca (cif->rtype->size);
00361 
00362   /* Allocate the space for the arguments, plus 4 words of temp space.  */
00363   stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
00364   reg_args = (struct register_args *) stack;
00365   argp = stack + sizeof (struct register_args);
00366 
00367   gprcount = ssecount = 0;
00368 
00369   /* If the return value is passed in memory, add the pointer as the
00370      first integer argument.  */
00371   if (ret_in_memory)
00372     reg_args->gpr[gprcount++] = (long) rvalue;
00373 
00374   avn = cif->nargs;
00375   arg_types = cif->arg_types;
00376 
00377   for (i = 0; i < avn; ++i)
00378     {
00379       size_t size = arg_types[i]->size;
00380       int n;
00381 
00382       n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
00383       if (n == 0
00384          || gprcount + ngpr > MAX_GPR_REGS
00385          || ssecount + nsse > MAX_SSE_REGS)
00386        {
00387          long align = arg_types[i]->alignment;
00388 
00389          /* Stack arguments are *always* at least 8 byte aligned.  */
00390          if (align < 8)
00391            align = 8;
00392 
00393          /* Pass this argument in memory.  */
00394          argp = (void *) ALIGN (argp, align);
00395          memcpy (argp, avalue[i], size);
00396          argp += size;
00397        }
00398       else
00399        {
00400          /* The argument is passed entirely in registers.  */
00401          char *a = (char *) avalue[i];
00402          int j;
00403 
00404          for (j = 0; j < n; j++, a += 8, size -= 8)
00405            {
00406              switch (classes[j])
00407               {
00408               case X86_64_INTEGER_CLASS:
00409               case X86_64_INTEGERSI_CLASS:
00410                 reg_args->gpr[gprcount] = 0;
00411                 memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
00412                 gprcount++;
00413                 break;
00414               case X86_64_SSE_CLASS:
00415               case X86_64_SSEDF_CLASS:
00416                 reg_args->sse[ssecount++] = *(UINT64 *) a;
00417                 break;
00418               case X86_64_SSESF_CLASS:
00419                 reg_args->sse[ssecount++] = *(UINT32 *) a;
00420                 break;
00421               default:
00422                 abort();
00423               }
00424            }
00425        }
00426     }
00427 
00428   ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
00429                  cif->flags, rvalue, fn, ssecount);
00430 }
00431 
00432 
00433 extern void ffi_closure_unix64(void);
00434 
00435 ffi_status
00436 ffi_prep_closure_loc (ffi_closure* closure,
00437                     ffi_cif* cif,
00438                     void (*fun)(ffi_cif*, void*, void**, void*),
00439                     void *user_data,
00440                     void *codeloc)
00441 {
00442   volatile unsigned short *tramp;
00443 
00444   tramp = (volatile unsigned short *) &closure->tramp[0];
00445 
00446   tramp[0] = 0xbb49;        /* mov <code>, %r11  */
00447   *(void * volatile *) &tramp[1] = ffi_closure_unix64;
00448   tramp[5] = 0xba49;        /* mov <data>, %r10  */
00449   *(void * volatile *) &tramp[6] = codeloc;
00450 
00451   /* Set the carry bit iff the function uses any sse registers.
00452      This is clc or stc, together with the first byte of the jmp.  */
00453   tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
00454 
00455   tramp[11] = 0xe3ff;                     /* jmp *%r11    */
00456 
00457   closure->cif = cif;
00458   closure->fun = fun;
00459   closure->user_data = user_data;
00460 
00461   return FFI_OK;
00462 }
00463 
00464 int
00465 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
00466                       struct register_args *reg_args, char *argp)
00467 {
00468   ffi_cif *cif;
00469   void **avalue;
00470   ffi_type **arg_types;
00471   long i, avn;
00472   int gprcount, ssecount, ngpr, nsse;
00473   int ret;
00474 
00475   cif = closure->cif;
00476   avalue = alloca(cif->nargs * sizeof(void *));
00477   gprcount = ssecount = 0;
00478 
00479   ret = cif->rtype->type;
00480   if (ret != FFI_TYPE_VOID)
00481     {
00482       enum x86_64_reg_class classes[MAX_CLASSES];
00483       int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
00484       if (n == 0)
00485        {
00486          /* The return value goes in memory.  Arrange for the closure
00487             return value to go directly back to the original caller.  */
00488          rvalue = (void *) reg_args->gpr[gprcount++];
00489          /* We don't have to do anything in asm for the return.  */
00490          ret = FFI_TYPE_VOID;
00491        }
00492       else if (ret == FFI_TYPE_STRUCT && n == 2)
00493        {
00494          /* Mark which register the second word of the structure goes in.  */
00495          _Bool sse0 = SSE_CLASS_P (classes[0]);
00496          _Bool sse1 = SSE_CLASS_P (classes[1]);
00497          if (!sse0 && sse1)
00498            ret |= 1 << 8;
00499          else if (sse0 && !sse1)
00500            ret |= 1 << 9;
00501        }
00502     }
00503 
00504   avn = cif->nargs;
00505   arg_types = cif->arg_types;
00506   
00507   for (i = 0; i < avn; ++i)
00508     {
00509       enum x86_64_reg_class classes[MAX_CLASSES];
00510       int n;
00511 
00512       n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
00513       if (n == 0
00514          || gprcount + ngpr > MAX_GPR_REGS
00515          || ssecount + nsse > MAX_SSE_REGS)
00516        {
00517          long align = arg_types[i]->alignment;
00518 
00519          /* Stack arguments are *always* at least 8 byte aligned.  */
00520          if (align < 8)
00521            align = 8;
00522 
00523          /* Pass this argument in memory.  */
00524          argp = (void *) ALIGN (argp, align);
00525          avalue[i] = argp;
00526          argp += arg_types[i]->size;
00527        }
00528       /* If the argument is in a single register, or two consecutive
00529         registers, then we can use that address directly.  */
00530       else if (n == 1
00531               || (n == 2
00532                  && SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
00533        {
00534          /* The argument is in a single register.  */
00535          if (SSE_CLASS_P (classes[0]))
00536            {
00537              avalue[i] = &reg_args->sse[ssecount];
00538              ssecount += n;
00539            }
00540          else
00541            {
00542              avalue[i] = &reg_args->gpr[gprcount];
00543              gprcount += n;
00544            }
00545        }
00546       /* Otherwise, allocate space to make them consecutive.  */
00547       else
00548        {
00549          char *a = alloca (16);
00550          int j;
00551 
00552          avalue[i] = a;
00553          for (j = 0; j < n; j++, a += 8)
00554            {
00555              if (SSE_CLASS_P (classes[j]))
00556               memcpy (a, &reg_args->sse[ssecount++], 8);
00557              else
00558               memcpy (a, &reg_args->gpr[gprcount++], 8);
00559            }
00560        }
00561     }
00562 
00563   /* Invoke the closure.  */
00564   closure->fun (cif, rvalue, avalue, closure->user_data);
00565 
00566   /* Tell assembly how to perform return type promotions.  */
00567   return ret;
00568 }
00569 
00570 #endif /* __x86_64__ */