[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Libjit] Register allocation optimizations.
From: |
Rajan Walia |
Subject: |
[Libjit] Register allocation optimizations. |
Date: |
Wed, 8 Mar 2017 16:24:48 -0500 |
Hi,
I am using libjit to compile code at runtime and I wanted to know if
there is a way to get better register allocation. I was looking at the
assembly output of a normal dotproduct function which takes pointer to
two array and size, loops through the two array at the same time and
sums the product at each position.
----------- similar c code-------
uint dotproduct (uint* arr1, uint* arr2, uint size) {
uint sum = 0;
uint i = 0;
while (i<size) {
sum = sum + arr1 [i] + arr2 [i];
i = i+1;
}
return sum;
}
------------------------------------------------
The three address code
function dot-product(l1 : ptr, l2 : ptr, i3 : uint) : uint
incoming_reg(l1, rdi)
incoming_reg(l2, rsi)
incoming_reg(i3, rdx)
.L:
i8 = 0
i7 = 0
.L0:
i11 = ilt_un(i8, i3)
if ige_un(i8, i3) then goto .L1
.L:
i14 = i8 * 4
i15 = trunc_int(i14)
i12 = i15
l16 = expand_int(i12)
l17 = l1 + l16
i19 = load_relative_int(l17, 0)
l20 = expand_int(i12)
l21 = l2 + l20
i22 = load_relative_int(l21, 0)
i23 = i19 * i22
i24 = i7 + i23
i7 = i24
i26 = i8 + 1
i8 = i26
goto .L0
ends_in_dead
.L:
.L1:
return_int(i7)
ends_in_dead
.L:
.L:
end
-------------------------------------------
and compiled assembly code:
ptrstruct<16>ptrfunction dot-product(ptr, ptr, uint) : uint
/tmp/libjit-dump.o: file format elf64-x86-64
Disassembly of section .text:
00007f6e1b9acf6e <.text>:
7f6e1b9acf6e: 55 push %rbp
7f6e1b9acf6f: 48 8b ec mov %rsp,%rbp
7f6e1b9acf72: 48 83 ec 30 sub $0x30,%rsp
7f6e1b9acf76: 4c 89 2c 24 mov %r13,(%rsp)
7f6e1b9acf7a: 4c 89 74 24 08 mov %r14,0x8(%rsp)
7f6e1b9acf7f: 4c 89 7c 24 10 mov %r15,0x10(%rsp)
7f6e1b9acf84: 48 89 7d f8 mov %rdi,-0x8(%rbp)
7f6e1b9acf88: 48 89 75 f0 mov %rsi,-0x10(%rbp)
7f6e1b9acf8c: 4c 8b ea mov %rdx,%r13
7f6e1b9acf8f: 45 33 ff xor %r15d,%r15d
7f6e1b9acf92: 45 33 f6 xor %r14d,%r14d
7f6e1b9acf95: 45 3b fd cmp %r13d,%r15d
7f6e1b9acf98: 0f 83 29 00 00 00 jae 0x7f6e1b9acfc7
7f6e1b9acf9e: 41 8b c7 mov %r15d,%eax
7f6e1b9acfa1: c1 e0 02 shl $0x2,%eax
7f6e1b9acfa4: 48 63 c8 movslq %eax,%rcx
7f6e1b9acfa7: 48 8b 55 f8 mov -0x8(%rbp),%rdx
7f6e1b9acfab: 48 03 ca add %rdx,%rcx
7f6e1b9acfae: 8b 09 mov (%rcx),%ecx
7f6e1b9acfb0: 48 63 c0 movslq %eax,%rax
7f6e1b9acfb3: 48 8b 55 f0 mov -0x10(%rbp),%rdx
7f6e1b9acfb7: 48 03 c2 add %rdx,%rax
7f6e1b9acfba: 8b 00 mov (%rax),%eax
7f6e1b9acfbc: 0f af c8 imul %eax,%ecx
7f6e1b9acfbf: 44 03 f1 add %ecx,%r14d
7f6e1b9acfc2: 41 ff c7 inc %r15d
7f6e1b9acfc5: eb ce jmp 0x7f6e1b9acf95
7f6e1b9acfc7: 41 8b c6 mov %r14d,%eax
7f6e1b9acfca: 4c 8b 2c 24 mov (%rsp),%r13
7f6e1b9acfce: 4c 8b 74 24 08 mov 0x8(%rsp),%r14
7f6e1b9acfd3: 4c 8b 7c 24 10 mov 0x10(%rsp),%r15
7f6e1b9acfd8: 48 8b e5 mov %rbp,%rsp
7f6e1b9acfdb: 5d pop %rbp
7f6e1b9acfdc: c3 retq
end
----------------------------------------
Here the incoming pointer are stored on stack and in every loop
iteration first we get the pointer value from stack and then we add
index.
I was wondering if there is a way to optimize this and keep the array
pointers in registers.
I am using libjit from the repo directly with commit:
http://git.savannah.gnu.org/cgit/libjit.git/commit/?id=0f176f54f5f2d0e15e9d87940e5c4f51e6b5e850
Any help will be appreciated.
Thanks,
Rajan
- [Libjit] Register allocation optimizations.,
Rajan Walia <=