emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

feature/native-comp 9812400: * Reduce (half) the number of loads emitted


From: Andrea Corallo
Subject: feature/native-comp 9812400: * Reduce (half) the number of loads emitted for calling into C code
Date: Thu, 3 Dec 2020 18:41:11 -0500 (EST)

branch: feature/native-comp
commit 981240078cddbd26b35a65e5311350196542b42b
Author: Andrea Corallo <akrl@sdf.org>
Commit: Andrea Corallo <akrl@sdf.org>

    * Reduce (half) the number of loads emitted for calling into C code
    
    As after each function call GCC clobbers the pointer to the function
    relocation table.  This commit modify the code generation to create a
    local copy of it for each function.  This reduces the average number
    of loads for each function call into C from two to one.
    
        * src/comp.c (comp_t): Add 'func_relocs_ptr_type' and
        'func_relocs_local' fields.
        (emit_call): Use the local func_relocs pointer when possible.
        (emit_ctxt_code): Fill 'comp.func_relocs_ptr_type'.
        (compile_function): Declare 'func_relocs_ptr_local'.
        (compile_function): Assign 'func_relocs_ptr_local' from the global
        value in each function prologue.
---
 src/comp.c | 46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/src/comp.c b/src/comp.c
index 12ff985..590e330 100644
--- a/src/comp.c
+++ b/src/comp.c
@@ -580,8 +580,11 @@ typedef struct {
   gcc_jit_rvalue *data_relocs_impure;
   /* Same as before but content does not survive load phase. */
   gcc_jit_rvalue *data_relocs_ephemeral;
-  /* Synthesized struct holding func relocs.  */
+  /* Global structure holding function relocations.  */
   gcc_jit_lvalue *func_relocs;
+  gcc_jit_type *func_relocs_ptr_type;
+  /* Pointer to this structure local to each function.  */
+  gcc_jit_lvalue *func_relocs_local;
   gcc_jit_function *memcpy;
   Lisp_Object d_default_idx;
   Lisp_Object d_impure_idx;
@@ -1013,9 +1016,17 @@ emit_call (Lisp_Object func, gcc_jit_type *ret_type, 
ptrdiff_t nargs,
     }
   else
     {
+      /* Inline functions so far don't have a local variable for
+        function reloc table so we fall back to the global one.  Even
+        if this is not aesthetic calling into C from open-code is
+        always a fallback and therefore not be performance critical.
+        To fix this could think do the inline our-self without
+        relying on GCC. */
       gcc_jit_lvalue *f_ptr =
        gcc_jit_rvalue_dereference_field (
-         gcc_jit_lvalue_as_rvalue (comp.func_relocs),
+         gcc_jit_lvalue_as_rvalue (comp.func_relocs_local
+                                   ? comp.func_relocs_local
+                                   : comp.func_relocs),
          NULL,
          (gcc_jit_field *) xmint_pointer (gcc_func));
 
@@ -2862,15 +2873,16 @@ emit_ctxt_code (void)
                                     NULL,
                                     "freloc_link_table",
                                     n_frelocs, fields);
+  comp.func_relocs_ptr_type =
+    gcc_jit_type_get_pointer (
+      gcc_jit_struct_as_type (f_reloc_struct));
+
   comp.func_relocs =
-    gcc_jit_context_new_global (
-      comp.ctxt,
-      NULL,
-      GCC_JIT_GLOBAL_EXPORTED,
-      gcc_jit_type_get_pointer (
-       gcc_jit_type_get_const (
-         gcc_jit_struct_as_type (f_reloc_struct))),
-      FUNC_LINK_TABLE_SYM);
+    gcc_jit_context_new_global (comp.ctxt,
+                               NULL,
+                               GCC_JIT_GLOBAL_EXPORTED,
+                               comp.func_relocs_ptr_type,
+                               FUNC_LINK_TABLE_SYM);
 
   xfree (fields);
 }
@@ -3931,6 +3943,12 @@ compile_function (Lisp_Object func)
   comp.func_has_non_local = !NILP (CALL1I (comp-func-has-non-local, func));
   comp.func_speed = XFIXNUM (CALL1I (comp-func-speed, func));
 
+  comp.func_relocs_local =
+    gcc_jit_function_new_local (comp.func,
+                               NULL,
+                               comp.func_relocs_ptr_type,
+                               "freloc");
+
   comp.frame = SAFE_ALLOCA (frame_size * sizeof (*comp.frame));
   if (comp.func_has_non_local || !comp.func_speed)
     {
@@ -3985,6 +4003,12 @@ compile_function (Lisp_Object func)
        declare_block (HASH_KEY (ht, i));
     }
 
+  gcc_jit_block_add_assignment (retrive_block (Qentry),
+                               NULL,
+                               comp.func_relocs_local,
+                               gcc_jit_lvalue_as_rvalue (comp.func_relocs));
+
+
   for (ptrdiff_t i = 0; i < ht->count; i++)
     {
       Lisp_Object block_name = HASH_KEY (ht, i);
@@ -4397,6 +4421,8 @@ DEFUN ("comp--compile-ctxt-to-file", 
Fcomp__compile_ctxt_to_file,
   CHECK_STRING (filename);
   Lisp_Object base_name = Fsubstring (filename, Qnil, make_fixnum (-4));
 
+  comp.func_relocs_local = NULL;
+
   comp.speed = XFIXNUM (CALL1I (comp-ctxt-speed, Vcomp_ctxt));
   comp.debug = XFIXNUM (CALL1I (comp-ctxt-debug, Vcomp_ctxt));
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]