libjit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Libjit] Register allocation optimizations.


From: Rajan Walia
Subject: [Libjit] Register allocation optimizations.
Date: Wed, 8 Mar 2017 16:24:48 -0500

Hi,
 I am using libjit to compile code at runtime and I wanted to know if
there is a way to get better register allocation. I was looking at the
assembly output of a normal dotproduct function which takes pointer to
two array and size, loops through the two array at the same time and
sums the product at each position.
----------- similar c code-------
uint dotproduct (uint* arr1, uint* arr2, uint size) {
  uint sum = 0;
  uint i = 0;
  while (i<size) {
    sum = sum + arr1 [i] + arr2 [i];
    i = i+1;
  }
  return sum;
}
------------------------------------------------
The three address code
function dot-product(l1 : ptr, l2 : ptr, i3 : uint) : uint
    incoming_reg(l1, rdi)
    incoming_reg(l2, rsi)
    incoming_reg(i3, rdx)
.L:
    i8 = 0
    i7 = 0
.L0:
    i11 = ilt_un(i8, i3)
    if ige_un(i8, i3) then goto .L1
.L:
    i14 = i8 * 4
    i15 = trunc_int(i14)
    i12 = i15
    l16 = expand_int(i12)
    l17 = l1 + l16
    i19 = load_relative_int(l17, 0)
    l20 = expand_int(i12)
    l21 = l2 + l20
    i22 = load_relative_int(l21, 0)
    i23 = i19 * i22
    i24 = i7 + i23
    i7 = i24
    i26 = i8 + 1
    i8 = i26
    goto .L0
    ends_in_dead
.L:
.L1:
    return_int(i7)
    ends_in_dead
.L:
.L:
end
-------------------------------------------
and compiled assembly code:
ptrstruct<16>ptrfunction dot-product(ptr, ptr, uint) : uint

/tmp/libjit-dump.o:     file format elf64-x86-64


Disassembly of section .text:

00007f6e1b9acf6e <.text>:
    7f6e1b9acf6e:    55                       push   %rbp
    7f6e1b9acf6f:    48 8b ec                 mov    %rsp,%rbp
    7f6e1b9acf72:    48 83 ec 30              sub    $0x30,%rsp
    7f6e1b9acf76:    4c 89 2c 24              mov    %r13,(%rsp)
    7f6e1b9acf7a:    4c 89 74 24 08           mov    %r14,0x8(%rsp)
    7f6e1b9acf7f:    4c 89 7c 24 10           mov    %r15,0x10(%rsp)
    7f6e1b9acf84:    48 89 7d f8              mov    %rdi,-0x8(%rbp)
    7f6e1b9acf88:    48 89 75 f0              mov    %rsi,-0x10(%rbp)
    7f6e1b9acf8c:    4c 8b ea                 mov    %rdx,%r13
    7f6e1b9acf8f:    45 33 ff                 xor    %r15d,%r15d
    7f6e1b9acf92:    45 33 f6                 xor    %r14d,%r14d
    7f6e1b9acf95:    45 3b fd                 cmp    %r13d,%r15d
    7f6e1b9acf98:    0f 83 29 00 00 00        jae    0x7f6e1b9acfc7
    7f6e1b9acf9e:    41 8b c7                 mov    %r15d,%eax
    7f6e1b9acfa1:    c1 e0 02                 shl    $0x2,%eax
    7f6e1b9acfa4:    48 63 c8                 movslq %eax,%rcx
    7f6e1b9acfa7:    48 8b 55 f8              mov    -0x8(%rbp),%rdx
    7f6e1b9acfab:    48 03 ca                 add    %rdx,%rcx
    7f6e1b9acfae:    8b 09                    mov    (%rcx),%ecx
    7f6e1b9acfb0:    48 63 c0                 movslq %eax,%rax
    7f6e1b9acfb3:    48 8b 55 f0              mov    -0x10(%rbp),%rdx
    7f6e1b9acfb7:    48 03 c2                 add    %rdx,%rax
    7f6e1b9acfba:    8b 00                    mov    (%rax),%eax
    7f6e1b9acfbc:    0f af c8                 imul   %eax,%ecx
    7f6e1b9acfbf:    44 03 f1                 add    %ecx,%r14d
    7f6e1b9acfc2:    41 ff c7                 inc    %r15d
    7f6e1b9acfc5:    eb ce                    jmp    0x7f6e1b9acf95
    7f6e1b9acfc7:    41 8b c6                 mov    %r14d,%eax
    7f6e1b9acfca:    4c 8b 2c 24              mov    (%rsp),%r13
    7f6e1b9acfce:    4c 8b 74 24 08           mov    0x8(%rsp),%r14
    7f6e1b9acfd3:    4c 8b 7c 24 10           mov    0x10(%rsp),%r15
    7f6e1b9acfd8:    48 8b e5                 mov    %rbp,%rsp
    7f6e1b9acfdb:    5d                       pop    %rbp
    7f6e1b9acfdc:    c3                       retq

end
----------------------------------------
Here the incoming pointer are stored on stack and in every loop
iteration first we get the pointer value from stack and then we add
index.
I was wondering if there is a way to optimize this and keep the array
pointers in registers.
I am using libjit from the repo directly with commit:
http://git.savannah.gnu.org/cgit/libjit.git/commit/?id=0f176f54f5f2d0e15e9d87940e5c4f51e6b5e850
Any help will be appreciated.
Thanks,
Rajan



reply via email to

[Prev in Thread] Current Thread [Next in Thread]