[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
feature/native-comp 9812400: * Reduce (half) the number of loads emitted
From: |
Andrea Corallo |
Subject: |
feature/native-comp 9812400: * Reduce (half) the number of loads emitted for calling into C code |
Date: |
Thu, 3 Dec 2020 18:41:11 -0500 (EST) |
branch: feature/native-comp
commit 981240078cddbd26b35a65e5311350196542b42b
Author: Andrea Corallo <akrl@sdf.org>
Commit: Andrea Corallo <akrl@sdf.org>
* Reduce (half) the number of loads emitted for calling into C code
As after each function call GCC clobbers the pointer to the function
relocation table. This commit modify the code generation to create a
local copy of it for each function. This reduces the average number
of loads for each function call into C from two to one.
* src/comp.c (comp_t): Add 'func_relocs_ptr_type' and
'func_relocs_local' fields.
(emit_call): Use the local func_relocs pointer when possible.
(emit_ctxt_code): Fill 'comp.func_relocs_ptr_type'.
(compile_function): Declare 'func_relocs_ptr_local'.
(compile_function): Assign 'func_relocs_ptr_local' from the global
value in each function prologue.
---
src/comp.c | 46 ++++++++++++++++++++++++++++++++++++----------
1 file changed, 36 insertions(+), 10 deletions(-)
diff --git a/src/comp.c b/src/comp.c
index 12ff985..590e330 100644
--- a/src/comp.c
+++ b/src/comp.c
@@ -580,8 +580,11 @@ typedef struct {
gcc_jit_rvalue *data_relocs_impure;
/* Same as before but content does not survive load phase. */
gcc_jit_rvalue *data_relocs_ephemeral;
- /* Synthesized struct holding func relocs. */
+ /* Global structure holding function relocations. */
gcc_jit_lvalue *func_relocs;
+ gcc_jit_type *func_relocs_ptr_type;
+ /* Pointer to this structure local to each function. */
+ gcc_jit_lvalue *func_relocs_local;
gcc_jit_function *memcpy;
Lisp_Object d_default_idx;
Lisp_Object d_impure_idx;
@@ -1013,9 +1016,17 @@ emit_call (Lisp_Object func, gcc_jit_type *ret_type,
ptrdiff_t nargs,
}
else
{
+ /* Inline functions so far don't have a local variable for
+ function reloc table so we fall back to the global one. Even
+ if this is not aesthetic calling into C from open-code is
+ always a fallback and therefore not be performance critical.
+ To fix this could think do the inline our-self without
+ relying on GCC. */
gcc_jit_lvalue *f_ptr =
gcc_jit_rvalue_dereference_field (
- gcc_jit_lvalue_as_rvalue (comp.func_relocs),
+ gcc_jit_lvalue_as_rvalue (comp.func_relocs_local
+ ? comp.func_relocs_local
+ : comp.func_relocs),
NULL,
(gcc_jit_field *) xmint_pointer (gcc_func));
@@ -2862,15 +2873,16 @@ emit_ctxt_code (void)
NULL,
"freloc_link_table",
n_frelocs, fields);
+ comp.func_relocs_ptr_type =
+ gcc_jit_type_get_pointer (
+ gcc_jit_struct_as_type (f_reloc_struct));
+
comp.func_relocs =
- gcc_jit_context_new_global (
- comp.ctxt,
- NULL,
- GCC_JIT_GLOBAL_EXPORTED,
- gcc_jit_type_get_pointer (
- gcc_jit_type_get_const (
- gcc_jit_struct_as_type (f_reloc_struct))),
- FUNC_LINK_TABLE_SYM);
+ gcc_jit_context_new_global (comp.ctxt,
+ NULL,
+ GCC_JIT_GLOBAL_EXPORTED,
+ comp.func_relocs_ptr_type,
+ FUNC_LINK_TABLE_SYM);
xfree (fields);
}
@@ -3931,6 +3943,12 @@ compile_function (Lisp_Object func)
comp.func_has_non_local = !NILP (CALL1I (comp-func-has-non-local, func));
comp.func_speed = XFIXNUM (CALL1I (comp-func-speed, func));
+ comp.func_relocs_local =
+ gcc_jit_function_new_local (comp.func,
+ NULL,
+ comp.func_relocs_ptr_type,
+ "freloc");
+
comp.frame = SAFE_ALLOCA (frame_size * sizeof (*comp.frame));
if (comp.func_has_non_local || !comp.func_speed)
{
@@ -3985,6 +4003,12 @@ compile_function (Lisp_Object func)
declare_block (HASH_KEY (ht, i));
}
+ gcc_jit_block_add_assignment (retrive_block (Qentry),
+ NULL,
+ comp.func_relocs_local,
+ gcc_jit_lvalue_as_rvalue (comp.func_relocs));
+
+
for (ptrdiff_t i = 0; i < ht->count; i++)
{
Lisp_Object block_name = HASH_KEY (ht, i);
@@ -4397,6 +4421,8 @@ DEFUN ("comp--compile-ctxt-to-file",
Fcomp__compile_ctxt_to_file,
CHECK_STRING (filename);
Lisp_Object base_name = Fsubstring (filename, Qnil, make_fixnum (-4));
+ comp.func_relocs_local = NULL;
+
comp.speed = XFIXNUM (CALL1I (comp-ctxt-speed, Vcomp_ctxt));
comp.debug = XFIXNUM (CALL1I (comp-ctxt-debug, Vcomp_ctxt));
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- feature/native-comp 9812400: * Reduce (half) the number of loads emitted for calling into C code,
Andrea Corallo <=