lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed


From: Paulo César Pereira de Andrade
Subject: Re: [PATCH] ppc: Fix 'calli' when floating-point arguments are passed
Date: Thu, 8 Sep 2022 14:06:39 -0300

Em qui., 8 de set. de 2022 às 06:37, Paul Cercueil
<paul@crapouillou.net> escreveu:

  Hi Paul,

[snip]

> I spoke too soon, I'm still getting problems :(
>
> See the attachment.

  Reading jit_print output I could not understand why r26 is not listed in:

L2: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r10 f14 f15 f16
f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31

> The problem now is that r26 should be live at L2 but is not detected as
> such.
> This causes Lightning to use r26 as a temporary for the andi line 34
> (lines 153-155 in the generated code).

  I tried mimic'ing the code with:

"""
.disasm
.data    32
.code
anywhere:
    movi %r0 0
    prolog
    ldxi_i %v0 %v(13) 0x44
    addi %v0 %v0 0x1
    lti %v1 %v0 0x8
    stxi_i 0x44 %v(13) %v0
    ldxi_i %v0 %v(13) 0x40
    addi %v0 %v0 0x18
    stxi_i 0x40 %v(13) %v0
    stxi_i 0x8 %v(13) %v1
    subi %r(5) %r(5) 0x8
    beqi L2 %v1 0x0
    movi %v0 0x8008a1ac
    live %r(5)
    jmpi anywhere
L2:
    movi %v0 0x70
    stxi_i 0x8 %v(13) %v0
    movi %v0 0x800c0000
    andi %r2 %v0 0x7fffff
    addi %r1 %r2 0x10000000
    stxi_i 0x3e7c %r1 %v1
    addr %r2 %r2 %v(13)
    stxi_i 0x4110 %r2 %v1
    andi %r2 %v0 0x7fffff
    addi %r1 %r2 0x10000000
    stxi_i 0x3e78 %r1 %v1
    addr %r2 %r2 %v(13)
    stxi_i 0x410c %r2 %v1
    andi %r2 %v0 0x7fffff
    addi %r1 %r2 0x10000000
    stxi_i 0x3e80 %r1 %v1
    addr %r2 %r2 %v(13)
    stxi_i 0x4114 %r2 %v1
    stxi_i 0x4 %v(13) %v0
    subi %r(5) %r(5) 0xe
L3:
    movi %v0 0x800c0000
    ldxi_i %v1 %v(13) 0x8
    addr %v0 %v0 %v1
    stxi_i 0x4 %v(13) %v0
    ldxi_i %r2 %v(13) 0x244
    movi %r0 0x286d0000
    prepare
    ori %r(12) %r0 0xd
    live %r0
    live %r1
    live %r2
    live %v(13)
    live %r(5)
    callr %r2
    live %r0
    live %r1
    live %r2
    live %v(13)
    live %r(5)
    addi %v1 %v1 0xfffffff0
    stxi_i 0x8 %v(13) %v1
    subi %r(5) %r(5) 0xc
    blti L4 %v1 0x0
    bgti L3 %r(5) 0x0
    live %r(5)
    jmpi anywhere
L4:
    movi %v0 0x800c0000
    movi %v1 0x0
    andi %v(12) %v0 0x7fffff
    addi %v(11) %v(12) 0x10000000
    stxi_i 0x3f04 %v(11) %v1
    addr %v(12) %v(12) %v(13)
    stxi_i 0x4198 %v(12) %v1
    stxi_i 0x4 %v(13) %v0
    movi %v0 0x8008a208
    stxi_i 0x7c %v(13) %v0
    movi %v0 0x8008ae68
    subi %r(5) %r(5) 0x8
    live %r(5)
    jmpi anywhere
    ret
    epilog
"""

but check/lightning does not allow a jmpi to an address that is not
a node.
  I did also need to hack check/lighnting.c to be able to use r10 and
r3, used as %r(5) and %r(12).
  It shows the expected output in L2, as:

L2: r14 r26 r10

but it is because it can follow jumps. See below:

"""
L0:
    #note x.tst:4
    movi r28 0x0
L1: r14 r10 /* prolog */
    ldxi_i r27 r14 0x44
    addi r27 r27 0x1
    lti r26 r27 0x8
    stxi_i 0x44 r14 r27
    ldxi_i r27 r14 0x40
    addi r27 r27 0x18
    stxi_i 0x40 r14 r27
    stxi_i 0x8 r14 r26
    subi r10 r10 0x8
    beqi L2 r26 0x0
L6: r14 r26 r10
    movi r27 0x8008a1ac
    live r10
    jmpi L0
L2: r14 r26 r10
    #note x.tst:20
    movi r27 0x70
    stxi_i 0x8 r14 r27
    movi r27 0x800c0000
    andi r30 r27 0x7fffff
    addi r29 r30 0x10000000
    stxi_i 0x3e7c r29 r26
    addr r30 r30 r14
    stxi_i 0x4110 r30 r26
    andi r30 r27 0x7fffff
    addi r29 r30 0x10000000
    stxi_i 0x3e78 r29 r26
    addr r30 r30 r14
    stxi_i 0x410c r30 r26
    andi r30 r27 0x7fffff
    addi r29 r30 0x10000000
    stxi_i 0x3e80 r29 r26
    addr r30 r30 r14
    stxi_i 0x4114 r30 r26
    stxi_i 0x4 r14 r27
    subi r10 r10 0xe
L3: r14 r29 r10
    #note x.tst:41
    movi r27 0x800c0000
    ldxi_i r26 r14 0x8
    addr r27 r27 r26
    stxi_i 0x4 r14 r27
    ldxi_i r30 r14 0x244
    movi r28 0x286d0000
    prepare
    ori r3 r28 0xd
    live r28
    live r29
    live r30
    live r14
    live r10
    callr r30
L7: r14 r26 r28 r29 r30 r10
    live r28
    live r29
    live r30
    live r14
    live r10
    addi r26 r26 0xfffffff0
    stxi_i 0x8 r14 r26
    subi r10 r10 0xc
    blti L4 r26 0x0
L8: r14 r29 r10
    bgti L3 r10 0x0
L9: r14 r10
    live r10
    jmpi L0
L4: r14 r10
    #note x.tst:68
    movi r27 0x800c0000
    movi r26 0x0
    andi r15 r27 0x7fffff
    addi r16 r15 0x10000000
    stxi_i 0x3f04 r16 r26
    addr r15 r15 r14
    stxi_i 0x4198 r15 r26
    stxi_i 0x4 r14 r27
    movi r27 0x8008a208
    stxi_i 0x7c r14 r27
    movi r27 0x8008ae68
    subi r10 r10 0x8
    live r10
    jmpi L0
L10:
    ret
L5: /* epilog */
"""

  So, I hacked a bit more with a C code:

"""
#include <lightning.h>

int
main(int argc, char *argv[])
{
    jit_state_t        *_jit;
    void        (*code)(void);
    jit_node_t        *L2, *L3, *L4;

    init_jit(argv[0]);
    _jit = jit_new_state();

    jit_prolog();
    jit_ldxi_i(JIT_V0, JIT_V(13), 0x44);
    jit_addi(JIT_V0, JIT_V0, 0x1);
    jit_lti(JIT_V1, JIT_V0, 0x8);
    jit_stxi_i(0x44, JIT_V(13), JIT_V0);
    jit_ldxi_i(JIT_V0, JIT_V(13), 0x40);
    jit_addi(JIT_V0, JIT_V0, 0x18);
    jit_stxi_i(0x40, JIT_V(13), JIT_V0);
    jit_stxi_i(0x8, JIT_V(13), JIT_V1);
    jit_subi(JIT_R(5), JIT_R(5), 0x8);
    L2 = jit_beqi(JIT_V1, 0x0);
    jit_movi(JIT_V0, 0x8008a1ac);
    jit_live(JIT_R(5));
    jit_patch_abs(jit_jmpi(), 0xdeadbeef);

    jit_patch(L2);

    jit_movi(JIT_V0, 0x70);
    jit_stxi_i(0x8, JIT_V(13), JIT_V0);
    jit_movi(JIT_V0, 0x800c0000);
    jit_andi(JIT_R2, JIT_V0, 0x7fffff);
    jit_addi(JIT_R1, JIT_R2, 0x10000000);
    jit_stxi_i(0x3e7c, JIT_R1, JIT_V1);
    jit_addr(JIT_R2, JIT_R2, JIT_V(13));
    jit_stxi_i(0x4110, JIT_R2, JIT_V1);
    jit_andi(JIT_R2, JIT_V0, 0x7fffff);
    jit_addi(JIT_R1, JIT_R2, 0x10000000);
    jit_stxi_i(0x3e78, JIT_R1, JIT_V1);
    jit_addr(JIT_R2, JIT_R2, JIT_V(13));
    jit_stxi_i(0x410c, JIT_R2, JIT_V1);
    jit_andi(JIT_R2, JIT_V0, 0x7fffff);
    jit_addi(JIT_R1, JIT_R2, 0x10000000);
    jit_stxi_i(0x3e80, JIT_R1, JIT_V1);
    jit_addr(JIT_R2, JIT_R2, JIT_V(13));
    jit_stxi_i(0x4114, JIT_R2, JIT_V1);
    jit_stxi_i(0x4, JIT_V(13), JIT_V0);
    jit_subi(JIT_R(5), JIT_R(5), 0xe);

    L3 = jit_label();

    jit_movi(JIT_V0, 0x800c0000);
    jit_ldxi_i(JIT_V1, JIT_V(13), 0x8);
    jit_addr(JIT_V0, JIT_V0, JIT_V1);
    jit_stxi_i(0x4, JIT_V(13), JIT_V0);
    jit_ldxi_i(JIT_R2, JIT_V(13), 0x244);
    jit_movi(JIT_R0, 0x286d0000);
    jit_prepare();
    jit_ori(JIT_R(12), JIT_R0, 0xd);
    jit_live(JIT_R0);
    jit_live(JIT_R1);
    jit_live(JIT_R2);
    jit_live(JIT_V(13));
    jit_live(JIT_R(5));

    jit_callr(JIT_R2);

    jit_live(JIT_R0);
    jit_live(JIT_R1);
    jit_live(JIT_R2);
    jit_live(JIT_V(13));
    jit_live(JIT_R(5));
    jit_addi(JIT_V1, JIT_V1, 0xfffffff0);
    jit_stxi_i(0x8, JIT_V(13), JIT_V1);
    jit_subi(JIT_R(5), JIT_R(5), 0xc);
    L4 = jit_blti(JIT_V1, 0x0);

    jit_patch_at(jit_bgti(JIT_R(5), 0x0), L3);

    jit_live(JIT_R(5));
    jit_patch_abs(jit_jmpi(), 0xdeadbeef);

    jit_patch(L4);

    jit_movi(JIT_V0, 0x800c0000);
    jit_movi(JIT_V1, 0x0);
    jit_andi(JIT_V(12), JIT_V0, 0x7fffff);
    jit_addi(JIT_V(11), JIT_V(12), 0x10000000);
    jit_stxi_i(0x3f04, JIT_V(11), JIT_V1);
    jit_addr(JIT_V(12), JIT_V(12), JIT_V(13));
    jit_stxi_i(0x4198, JIT_V(12), JIT_V1);
    jit_stxi_i(0x4, JIT_V(13), JIT_V0);
    jit_movi(JIT_V0, 0x8008a208);
    jit_stxi_i(0x7c, JIT_V(13), JIT_V0);
    jit_movi(JIT_V0, 0x8008ae68);
    jit_subi(JIT_R(5), JIT_R(5), 0x8);
    jit_live(JIT_R(5));
    jit_patch_abs(jit_jmpi(), 0xdeadbeef);

    jit_ret();
    jit_epilog();

    code = jit_emit();

#if 1
    jit_print();
#endif

    jit_clear_state();

    //(*code)();
    jit_destroy_state();

    finish_jit();

    return (0);
}
"""

and it still prints the expected state of r26 as live:

output gets renamed to L

"""
L0: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r28 r29 r30 r10
f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30
f31 /* prolog */
    ldxi_i r27 r14 0x44
    addi r27 r27 0x1
    lti r26 r27 0x8
    stxi_i 0x44 r14 r27
    ldxi_i r27 r14 0x40
    addi r27 r27 0x18
    stxi_i 0x40 r14 r27
    stxi_i 0x8 r14 r26
    subi r10 r10 0x8
    beqi L1 r26 0x0
L5: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
    movi r27 0x8008a1ac
    live r10
    jmpi 0xdeadbeef
L1: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r10 f14 f15
f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
    movi r27 0x70
    stxi_i 0x8 r14 r27
    movi r27 0x800c0000
    andi r30 r27 0x7fffff
    addi r29 r30 0x10000000
    stxi_i 0x3e7c r29 r26
    addr r30 r30 r14
    stxi_i 0x4110 r30 r26
    andi r30 r27 0x7fffff
    addi r29 r30 0x10000000
    stxi_i 0x3e78 r29 r26
    addr r30 r30 r14
    stxi_i 0x410c r30 r26
    andi r30 r27 0x7fffff
    addi r29 r30 0x10000000
    stxi_i 0x3e80 r29 r26
    addr r30 r30 r14
    stxi_i 0x4114 r30 r26
    stxi_i 0x4 r14 r27
    subi r10 r10 0xe
L2: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r29 r10 f14 f15
f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
    movi r27 0x800c0000
    ldxi_i r26 r14 0x8
    addr r27 r27 r26
    stxi_i 0x4 r14 r27
    ldxi_i r30 r14 0x244
    movi r28 0x286d0000
    prepare
    ori r3 r28 0xd
    live r28
    live r29
    live r30
    live r14
    live r10
    callr r30
L6: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
    live r28
    live r29
    live r30
    live r14
    live r10
    addi r26 r26 0xfffffff0
    stxi_i 0x8 r14 r26
    subi r10 r10 0xc
    blti L3 r26 0x0
L7: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
    bgti L2 r10 0x0
L8: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
    live r10
    jmpi 0xdeadbeef
L3: r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29
r30 r10 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28
f29 f30 f31
    movi r27 0x800c0000
    movi r26 0x0
    andi r15 r27 0x7fffff
    addi r16 r15 0x10000000
    stxi_i 0x3f04 r16 r26
    addr r15 r15 r14
    stxi_i 0x4198 r15 r26
    stxi_i 0x4 r14 r27
    movi r27 0x8008a208
    stxi_i 0x7c r14 r27
    movi r27 0x8008ae68
    subi r10 r10 0x8
    live r10
    jmpi 0xdeadbeef
L9:
    ret
L4: /* epilog */
"""

> Cheers,
> -Paul

Thanks,
Paulo



reply via email to

[Prev in Thread] Current Thread [Next in Thread]