tinycc-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Tinycc-devel] [PATCH] arm: Handle __builtin_frame_address(1) correctly


From: Kirill Smelkov
Subject: [Tinycc-devel] [PATCH] arm: Handle __builtin_frame_address(1) correctly
Date: Thu, 29 Nov 2012 21:43:33 +0400
User-agent: Mutt/1.5.21 (2010-09-15)

On Mon, Nov 26, 2012 at 12:05:19AM +0400, Kirill Smelkov wrote:
> On Sat, Nov 24, 2012 at 02:43:34PM +0100, Thomas Preud'homme wrote:
> > Le samedi 24 novembre 2012 10:02:54, Kirill Smelkov a écrit :
> > > 
> > > Thanks for the info. The progress on my side is as follows: I've learned
> > > arm assembly and setup arm and armhf cross-toolchains (turned out to be
> > > very easy, thanks to emdebian[1]). Also I can run arm binaries via
> > > qemu-arm and basic hello world works.
> > 
> > I'im ashamed byt I never tried using emdebian yet. Is there some ready 
> > packages in emdebian for installing a cross-toolchains easily?
> 
> Exactly. The link I gave you has instructions. Also, go to search tab at
> emdebian.org and search e.g. for arm, mips, etc - there are packages for
> cross gcc, g++, gdb, gfortran, glibc, etc. Only uclibc is not there.
> 
> I've installed toolchains for armel and armhf from unstable.
> 
> 
> > > The next step is to analyze the __builtin_frame_address problem. I'll
> > > keep you posted.
> > 
> > Great thanks.
> 
> It looks like I know what is happening. e.g. for
> 
>     void f(int x, int y)
>     {
>     }
> 
> tcc first saves r0 & r1, and only then fp:
> 
>     $ ./arm-eabi-tcc -c y.c
>     $ arm-linux-gnueabi-objdump -d y.o
> 
>     00000000 <f>:
>        0:   e1a0c00d        mov     ip, sp
>        4:   e92d0003        push    {r0, r1}
>        8:   e92d5800        push    {fp, ip, lr}
>        c:   e28db00c        add     fp, sp, #12
>       10:   e1a00000        nop                     ; (mov r0, r0)
>       14:   e91ba800        ldmdb   fp, {fp, sp, pc}
> 
> gcc does not, but it will save r5,r6,etc. before fp(=r13 iirc) ...
> 
>     $ arm-linux-gnueabihf-gcc-4.7 -marm -c y.c 
>     $ arm-linux-gnueabihf-objdump  -d y.o
>     
>     00000000 <f>:
>        0:   e52db004        push    {fp}            ; (str fp, [sp, #-4]!)
>        4:   e28db000        add     fp, sp, #0
>        8:   e24dd00c        sub     sp, sp, #12
>        c:   e50b0008        str     r0, [fp, #-8]
>       10:   e50b100c        str     r1, [fp, #-12]
>       14:   e28bd000        add     sp, fp, #0
>       18:   e8bd0800        pop     {fp}
>       1c:   e12fff1e        bx      lr
> 
>     
>     $ cat y.c
>     char *f(int x, int y)
>     {
>         int i, j, k;
>         int r=0;
>     
>         /* lots of stuff to force gcc use r4,r5,... */
>         for (i=0; i<10; ++i)
>             for (j=0; j<15; ++j)
>                 for (k=0; k< 20; ++k)
>                     r += x+y + i+j+k;
>     
>         /* + r is just not to kill the above as dead code */
>         return (char *)__builtin_frame_address(1) + r;
>     }
> 
>     $ arm-linux-gnueabihf-gcc-4.7 -marm -O2 -c y.c
>     $ arm-linux-gnueabihf-objdump  -d y.o
>     
>     00000000 <f>:
>        0:   e0801001        add     r1, r0, r1
>        4:   e2813001        add     r3, r1, #1
>        8:   e92d0870        push    {r4, r5, r6, fp}        ; NOTE r4... go 
> before fp
>        c:   e0832183        add     r2, r3, r3, lsl #3
>       10:   e3a05000        mov     r5, #0
>       14:   e28db00c        add     fp, sp, #12
>       18:   e0833082        add     r3, r3, r2, lsl #1
>       1c:   e1a04005        mov     r4, r5
>       20:   e28360ab        add     r6, r3, #171    ; 0xab
>       24:   e085c001        add     ip, r5, r1
>       28:   e1a02006        mov     r2, r6
>       2c:   e3a03000        mov     r3, #0
>       30:   e2833001        add     r3, r3, #1
>       34:   e084400c        add     r4, r4, ip
>       38:   e353000f        cmp     r3, #15
>       3c:   e0844002        add     r4, r4, r2
>       40:   e28cc001        add     ip, ip, #1
>       44:   e2822013        add     r2, r2, #19
>       48:   1afffff8        bne     30 <f+0x30>
>       4c:   e2855001        add     r5, r5, #1
>       50:   e2866013        add     r6, r6, #19
>       54:   e355000a        cmp     r5, #10
>       58:   1afffff1        bne     24 <f+0x24>
>       5c:   e59b0000        ldr     r0, [fp]                ; BUG here!
>       60:   e0800004        add     r0, r0, r4
>       64:   e24bd00c        sub     sp, fp, #12
>       68:   e8bd0870        pop     {r4, r5, r6, fp}
> 
> ... as maybe required by arm calling convention (not looked at the spec
> yet, but it seems reasanoble, given how push is really a block str and
> that str stores register in ascending order).
> 
> 
> So for any function we can't know where it's caller fp is stored on the
> frame, *but* for currently-compiled function we can know it - we pushed
> that many regs on the stack first and we know the value, so on arm
> __builtin_frame_address(level=1) should work but for level > is
> undefined. Note - gcc is seemingle wrong here too
> 
> 
> That's are my current findings. Will try to prepare patch for tcc and
> maybe gcc. I'm also not 100% sure because I had only a very small bit of
> time for this.

How about this? (Still not done gcc patch, but time is very pressuring)

---- 8< ----
From: Kirill Smelkov <address@hidden>
Date: Thu, 29 Nov 2012 21:22:23 +0400
Subject: [PATCH] arm: Handle __builtin_frame_address(1) correctly

Thomas reported that after b2a02961 (Add support for
__builtin_frame_address(level)) tcctest stopped passing on arm.
The problem turned out to be that on arm stack frame is not stable, i.e.
before pushing fp, some other registers could be pushed, look e.g.

    void f(int x, int y)
    {
    }

tcc first saves r0 & r1, and only then fp:

    $ ./arm-eabi-tcc -c y.c
    $ arm-linux-gnueabi-objdump -d y.o

    00000000 <f>:
       0:   e1a0c00d        mov     ip, sp
       4:   e92d0003        push    {r0, r1}
       8:   e92d5800        push    {fp, ip, lr}
       c:   e28db00c        add     fp, sp, #12
      10:   e1a00000        nop                     ; (mov r0, r0)
      14:   e91ba800        ldmdb   fp, {fp, sp, pc}

and that {r0, r1} correspond to 2 parameters - if there were more - more
registers will be pushed.

gcc does not, but it will save r5,r6,etc. before fp(=r13 iirc) ...

    $ arm-linux-gnueabihf-gcc-4.7 -marm -c y.c
    $ arm-linux-gnueabihf-objdump  -d y.o

    00000000 <f>:
       0:   e52db004        push    {fp}            ; (str fp, [sp, #-4]!)
       4:   e28db000        add     fp, sp, #0
       8:   e24dd00c        sub     sp, sp, #12
       c:   e50b0008        str     r0, [fp, #-8]
      10:   e50b100c        str     r1, [fp, #-12]
      14:   e28bd000        add     sp, fp, #0
      18:   e8bd0800        pop     {fp}
      1c:   e12fff1e        bx      lr

    $ cat y.c
    char *f(int x, int y)
    {
        int i, j, k;
        int r=0;

        /* lots of stuff to force gcc use r4,r5,... */
        for (i=0; i<10; ++i)
            for (j=0; j<15; ++j)
                for (k=0; k< 20; ++k)
                    r += x+y + i+j+k;

        /* + r is just not to kill the above as dead code */
        return (char *)__builtin_frame_address(1) + r;
    }

    $ arm-linux-gnueabihf-gcc-4.7 -marm -O2 -c y.c
    $ arm-linux-gnueabihf-objdump  -d y.o

    00000000 <f>:
       0:   e0801001        add     r1, r0, r1
       4:   e2813001        add     r3, r1, #1
       8:   e92d0870        push    {r4, r5, r6, fp}        ; NOTE r4... go 
before fp
       c:   e0832183        add     r2, r3, r3, lsl #3
      10:   e3a05000        mov     r5, #0
      14:   e28db00c        add     fp, sp, #12
      18:   e0833082        add     r3, r3, r2, lsl #1
      1c:   e1a04005        mov     r4, r5
      20:   e28360ab        add     r6, r3, #171    ; 0xab
      24:   e085c001        add     ip, r5, r1
      28:   e1a02006        mov     r2, r6
      2c:   e3a03000        mov     r3, #0
      30:   e2833001        add     r3, r3, #1
      34:   e084400c        add     r4, r4, ip
      38:   e353000f        cmp     r3, #15
      3c:   e0844002        add     r4, r4, r2
      40:   e28cc001        add     ip, ip, #1
      44:   e2822013        add     r2, r2, #19
      48:   1afffff8        bne     30 <f+0x30>
      4c:   e2855001        add     r5, r5, #1
      50:   e2866013        add     r6, r6, #19
      54:   e355000a        cmp     r5, #10
      58:   1afffff1        bne     24 <f+0x24>
      5c:   e59b0000        ldr     r0, [fp]                ; BUG here!
      60:   e0800004        add     r0, r0, r4
      64:   e24bd00c        sub     sp, fp, #12
      68:   e8bd0870        pop     {r4, r5, r6, fp}

... as probably required by arm calling convention (not looked at the spec
yet, but it seems reasanoble, given how push is really a block str and
that str stores register in ascending order).

So for any function we can't know where it's caller fp is stored on the
frame, *but* for currently-compiled function we can know it - we pushed
that many regs on the stack first and we know the value, so on arm
__builtin_frame_address(level=1) should work but for level > is
undefined. Note - gcc is seemingle wrong here too

~~~~

So when compiling function, let's remember fp on-stack offset, and use
it in when expanding __builtin_frame_address().

Tested on i386 via qemu-arm, that after compiling tcctest.c with
arm-eabi-tcc, builtin_frame_address_test outputs what is expected:

    $ ./arm-eabi-tcc -B. -c tests/tcctest.c
    $ ./arm-eabi-tcc -B. -c tests/tcctest.c -o tcctest_arm.o
    $ arm-linux-gnueabi-gcc-4.4 -o tcctest_arm tcctest_arm.o
    $ qemu-arm -L /usr/arm-linux-gnueabi ./tcctest_arm |tail
    weak_fpc=123
    weak_asm_f1=0
    weak_asm_f2=0
    weak_asm_f3=0
    weak_asm_v1=0
    weak_asm_v2=0
    weak_asm_v3=0
    43
    str: __builtin_frame_address
    bfa1: __builtin_frame_address

Reported-by: Thomas Preud'homme <address@hidden>
---
 arm-gen.c       |  8 ++++++++
 c67-gen.c       |  5 +++++
 i386-gen.c      |  5 +++++
 tccgen.c        | 16 ++++++++++++++--
 tests/tcctest.c | 13 +++++++++++++
 x86_64-gen.c    |  6 ++++++
 6 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/arm-gen.c b/arm-gen.c
index ea12fd6..508d567 100644
--- a/arm-gen.c
+++ b/arm-gen.c
@@ -133,6 +133,12 @@ ST_DATA CType float_type, double_type, func_float_type, 
func_double_type;
 
 #define CHAR_IS_UNSIGNED
 
+/* on arm, before push {fp}, r4-r7 or other registers could be saved on frame,
+ * so stack frame is not stable
+ */
+#define STABLE_FRAME_PROLOG FALSE
+extern int func_fp1_offset;
+
 /******************************************************/
 /* ELF defines */
 
@@ -177,6 +183,7 @@ ST_DATA CType float_type, double_type, func_float_type, 
func_double_type;
 
 static int func_sub_sp_offset, last_itod_magic;
 static int leaffunc;
+int func_fp1_offset;
 
 static int two2mask(int a,int b) {
   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
@@ -1182,6 +1189,7 @@ void gfunc_prolog(CType *func_type)
   }
   o(0xE92D5800); /* save fp, ip, lr */
   o(0xE28DB00C); /* add fp, sp, #12 */
+  func_fp1_offset = n + nf;
   func_sub_sp_offset = ind;
   o(0xE1A00000); /* nop, leave space for stack adjustment in epilogue */
   {
diff --git a/c67-gen.c b/c67-gen.c
index 52d9f3b..c7a9631 100644
--- a/c67-gen.c
+++ b/c67-gen.c
@@ -114,6 +114,11 @@ do {\
 /* maximum alignment (for aligned attribute support) */
 #define MAX_ALIGN     8
 
+/* XXX is stack frame prolog stable on c67? */
+#define STABLE_FRAME_PROLOG TRUE
+#define func_fp1_offset     0
+
+
 /******************************************************/
 /* ELF defines */
 
diff --git a/i386-gen.c b/i386-gen.c
index c3f03c7..f25fe0e 100644
--- a/i386-gen.c
+++ b/i386-gen.c
@@ -67,6 +67,11 @@ enum {
 #define MAX_ALIGN     8
 
 
+/* stack frame prolog always begins with push %ebp */
+#define STABLE_FRAME_PROLOG TRUE
+#define func_fp1_offset     0
+
+
 #define psym oad
 
 /******************************************************/
diff --git a/tccgen.c b/tccgen.c
index f79da36..7174f57 100644
--- a/tccgen.c
+++ b/tccgen.c
@@ -3665,7 +3665,7 @@ ST_FUNC void unary(void)
         break;
     case TOK_builtin_frame_address:
         {
-            int level;
+            int level, i;
             CType type;
             next();
             skip('(');
@@ -3678,8 +3678,20 @@ ST_FUNC void unary(void)
             type.t = VT_VOID;
             mk_pointer(&type);
             vset(&type, VT_LOCAL, 0);       /* local frame */
-            while (level--) {
+            for (i=0; i<level; ++i) {
                 mk_pointer(&vtop->type);
+                /* on arches, such as arm, where in func prologue other
+                 * registers could be pushed before push {fp}, we can know
+                 * parent's fp (we've generated current function and know
+                 * offsets), but grand-parent is out of our reach.
+                 */
+                if (!STABLE_FRAME_PROLOG) {
+                    if (i)
+                        tcc_error("__builtin_frame_address(level > 1) 
dissalowed on %s",
+                           "this arch" /* XXX better say arm, i386, etc... */);
+                    vpushi(func_fp1_offset);
+                    gen_op('+');
+                }
                 indir();                    /* -> parent frame */
             }
         }
diff --git a/tests/tcctest.c b/tests/tcctest.c
index 4f9f2a3..ca5650e 100644
--- a/tests/tcctest.c
+++ b/tests/tcctest.c
@@ -2684,6 +2684,15 @@ void callsave_test(void)
 }
 
 
+/* i386, x86_64 support getting frame addres of grand-callers, arm does not */
+/* XXX c67? il? */
+#if defined __i386__ || defined __x86_64__
+# define ARCH_STABLE_FRAME_PROLOG    TRUE
+#else
+# define ARCH_STABLE_FRAME_PROLOG    FALSE
+#endif
+
+#if ARCH_STABLE_FRAME_PROLOG
 void bfa3(ptrdiff_t str_offset)
 {
     printf("bfa3: %s\n", (char *)__builtin_frame_address(3) + str_offset);
@@ -2693,10 +2702,14 @@ void bfa2(ptrdiff_t str_offset)
     printf("bfa2: %s\n", (char *)__builtin_frame_address(2) + str_offset);
     bfa3(str_offset);
 }
+#endif
+
 void bfa1(ptrdiff_t str_offset)
 {
     printf("bfa1: %s\n", (char *)__builtin_frame_address(1) + str_offset);
+#if ARCH_STABLE_FRAME_PROLOG
     bfa2(str_offset);
+#endif
 }
 
 void builtin_frame_address_test(void)
diff --git a/x86_64-gen.c b/x86_64-gen.c
index 1fa8dd5..42afe3d 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -86,6 +86,12 @@ enum {
 ST_FUNC void gen_opl(int op);
 ST_FUNC void gen_le64(int64_t c);
 
+
+/* stack frame prolog always begins with push %rbp */
+#define STABLE_FRAME_PROLOG TRUE
+#define func_fp1_offset     0
+
+
 /******************************************************/
 /* ELF defines */
 
-- 
1.8.0.1.465.g43d51c2



reply via email to

[Prev in Thread] Current Thread [Next in Thread]