I have noticed that the Tiny C Compiler does not not align the stack to 16 bytes on i386 Linux. Unfortunately over the years the ABI defined by gcc has gradually drifted from 4 bytes to 16 bytes. To reproduce see
https://sourceforge.net/p/fbc/bugs/659/ (that page is the equivalent issue in freebasic). TCC suffers from the same issue.
I ran into the issue in tcc myself when I attempted to call back from tcc generated code into the Mozilla Spidermonkey _javascript_ VM via js-ctypes callbacks. I was able to hack around it in my project with a bit of inline assembly
https://github.com/cosinusoidally/mishmashvm/commit/8746e482256af04589bdfcda14d955713f2e3651 but in the general case this is not a complete fix as I may eventually end up calling some affected system library from tcc generated code. I've been luckly so far, but there's a good chance I'll eventually hit this issue again.
I did have a stab at implementing something like mstackrealign in tcc, but the code I wrote was subtly buggy (breaks several tests and produces some buggy binaries). Included below in case it is any use as the basis of a real fix:
diff --git a/i386-gen.c b/i386-gen.c
index 8c245ad..8a33135 100644
--- a/i386-gen.c
+++ b/i386-gen.c
@@ -402,14 +402,41 @@ ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int
#endif
}
+int gfunc_call_realign(){
+ // 50 push %eax
+ o(0x50);
+ // 89 e0 mov %esp,%eax
+ o(0x89); o(0xe0);
+ // 83 c0 04 add $0x4,%eax
+ o(0x83); o(0xc0); o(0x04);
+ // 81 e4 f0 ff ff ff and $0xfffffff0,%esp
+ o(0x81); o(0xe4); o(0xf0); o(0xff); o(0xff); o(0xff);
+ // 90 nop
+ int foo1=ind;
+ o(0x90);
+ o(0x90);
+ o(0x90);
+ // 50 push %eax
+ o(0x50);
+ // 8b 40 fc mov -0x4(%eax),%eax
+ o(0x8b); o(0x40); o(0xfc);
+
+ return foo1;
+}
+
/* Generate function call. The function address is pushed first, then
all the parameters in call order. This functions pops all the
parameters and the function address. */
ST_FUNC void gfunc_call(int nb_args)
{
- int size, align, r, args_size, i, func_call;
+ int size, align, r, args_size, i, func_call,foo1;
+ int pre=0;
Sym *func_sym;
-
+ if(nb_args==0){
+ foo1=gfunc_call_realign();
+ pre=1;
+ }
+
#ifdef CONFIG_TCC_BCHECK
if (tcc_state->do_bounds_check)
gbound_args(nb_args);
@@ -432,6 +459,12 @@ ST_FUNC void gfunc_call(int nb_args)
} else
#endif
{
+ // insert stack realign
+ if(pre==0){
+ foo1=gfunc_call_realign();
+ pre=1;
+ }
+
oad(0xec81, size); /* sub $xxx, %esp */
/* generate structure store */
r = get_reg(RC_INT);
@@ -443,6 +476,17 @@ ST_FUNC void gfunc_call(int nb_args)
args_size += size;
} else if (is_float(vtop->type.t)) {
gv(RC_FLOAT); /* only one float register */
+
+ // insert stack realign
+ if(pre==0){
+ foo1=gfunc_call_realign();
+ pre=1;
+ }
+ if(pre==0){
+ foo1=gfunc_call_realign();
+ pre=1;
+ }
+
if ((vtop->type.t & VT_BTYPE) == VT_FLOAT)
size = 4;
else if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
@@ -461,6 +505,13 @@ ST_FUNC void gfunc_call(int nb_args)
/* simple type (currently always same size) */
/* XXX: implicit cast ? */
r = gv(RC_INT);
+
+ // insert stack realign
+ if(pre==0){
+ foo1=gfunc_call_realign();
+ pre=1;
+ }
+
if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
size = 8;
o(0x50 + vtop->r2); /* push r */
@@ -505,6 +556,13 @@ ST_FUNC void gfunc_call(int nb_args)
if (args_size && func_call != FUNC_STDCALL && func_call != FUNC_FASTCALLW)
gadd_sp(args_size);
vtop--;
+
+ // 5c pop %esp
+ o(0x5c);
+ int blah=(4-(((args_size+4) & 15)>> 2))&3;
+ for(int bar=0;bar<blah;bar++){
+ cur_text_section->data[foo1+bar] = 0x50;
+ }
}
#ifdef TCC_TARGET_PE
Thanks
Liam Wilson