tinycc-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Tinycc-devel] [PATCH 4/4] stdatomic: x86_64 implementation


From: Dmitry Selyutin
Subject: [Tinycc-devel] [PATCH 4/4] stdatomic: x86_64 implementation
Date: Sun, 14 Feb 2021 22:52:27 +0300

---
 x86_64-gen.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 265 insertions(+), 12 deletions(-)

diff --git a/x86_64-gen.c b/x86_64-gen.c
index 15d6b53..2b24f20 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -2280,59 +2280,312 @@ ST_FUNC void gen_vla_alloc(CType *type, int align) {
     }
 }

-ST_FUNC void gen_atomic_init(int mode)
+struct atomic_code {
+    const void *data;
+    size_t size;
+};
+#define ATOMIC_CODE(code) \
+    {code, (sizeof(code) - 1)}
+
+static inline void gen_atomic_prologue(size_t argc)
 {
-    tcc_error("atomic_init not implemented");
+    ssize_t idx;
+    SValue sv;
+    SValue *args = vtop;
+
+    for (idx = 0; idx != argc; ++idx) {
+        const int treg = arg_regs[argc - idx - 1];
+
+        save_reg(treg);
+        load(treg, &vtop[-idx]);
+        vtop[-idx].r = treg;
+    }
+}
+
+static inline void gen_atomic_epilogue(size_t argc)
+{
+    ssize_t idx;
+
+    for (idx = ((ssize_t)argc - 1); idx >= 0; --idx)
+        vpop();
+
+    store(TREG_RAX, ++vtop);
 }

 ST_FUNC void gen_atomic_store(int mode)
 {
-    tcc_error("atomic_store not implemented");
+    static const struct atomic_code code[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+            "\x40\x0f\xb6\xf6" /* movzbl %sil,%esi */
+            "\x40\x88\x37"     /* movb %sil,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+            "\x0f\xb7\xf6"     /* movzwl %si,%esi */
+            "\x66\x89\x37"     /* movw %si,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+            "\x89\x37"         /* movl %esi,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+            "\x48\x89\x37"     /* movq %rsi,(%rdi) */
+        ),
+    };
+
+    vpop(); /* order */
+
+    gen_atomic_prologue(2);
+    gbc(code[mode].data, code[mode].size);
+    gen_atomic_epilogue(2);
 }

 ST_FUNC void gen_atomic_load(int mode)
 {
-    tcc_error("atomic_load not implemented");
+    static const struct atomic_code code[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+            "\x0f\xb6\x07" /* movzbl (%rdi),%eax */
+        ),
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+            "\x0f\xb7\x07" /* movzwl (%rdi),%eax */
+        ),
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+            "\x8b\x07"     /* movl (%rdi),%eax */
+        ),
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+            "\x48\x8b\x07" /* movq (%rdi),%rax */
+        ),
+    };
+
+    vpop(); /* order */
+
+    gen_atomic_prologue(1);
+    gbc(code[mode].data, code[mode].size);
+    gen_atomic_epilogue(1);
 }

 ST_FUNC void gen_atomic_exchange(int mode)
 {
-    tcc_error("atomic_exchange not implemented");
+    static const struct atomic_code code[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+            "\x40\x0f\xb6\xc6" /* movzbl %sil,%eax */
+            "\x86\x07"         /* xchgb %al,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+            "\x0f\xb7\xc6"     /* movzwl %si,%eax */
+            "\x66\x87\x07"     /* xchgw %ax,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+            "\x89\xf0"         /* movl %esi,%eax */
+            "\x87\x07"         /* xchgl %eax,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+            "\x48\x89\xf0"     /* movq %rsi,%rax */
+            "\x48\x87\x07"     /* xchgq %rax,(%rdi) */
+        ),
+    };
+
+    vpop(); /* order */
+
+    gen_atomic_prologue(2);
+    gbc(code[mode].data, code[mode].size);
+    gen_atomic_epilogue(2);
 }

 ST_FUNC void gen_atomic_compare_exchange_strong(int mode)
 {
-    tcc_error("atomic_compare_exchange_strong not implemented");
+    static const struct atomic_code code[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+            "\x0f\xb6\x06"         /* movzbl (%rsi),%eax */
+            "\x0f\xb6\xd2"         /* movzbl %dl,%edx */
+            "\xf0\x0f\xb0\x17"     /* lock cmpxchgb %dl,(%rdi) */
+            "\x41\x0f\x94\xc0"     /* sete %r8b */
+            "\x74\x02"             /* je +2 */
+            "\x88\x06"             /* movb %al,(%rsi) */
+            "\x44\x89\xc0"         /* movl %r8d,%eax */
+        ),
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+            "\x0f\xb7\x06"         /* movzwl (%rsi),%eax */
+            "\x0f\xb7\xd2"         /* movzwl %dx,%edx */
+            "\x66\xf0\x0f\xb1\x17" /* lock cmpxchgw %dx,(%rdi) */
+            "\x41\x0f\x94\xc0"     /* sete %r8b */
+            "\x74\x03"             /* je +3 */
+            "\x66\x89\x06"         /* movw %ax,(%rsi) */
+            "\x44\x89\xc0"         /* movl %r8d,%eax */
+        ),
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+            "\x8b\x06"             /* movl (%rsi),%eax */
+            "\xf0\x0f\xb1\x17"     /* lock cmpxchgl %edx,(%rdi) */
+            "\x41\x0f\x94\xc0"     /* sete %r8b */
+            "\x74\x02"             /* je +2 */
+            "\x89\x06"             /* movl %eax,(%rsi) */
+            "\x44\x89\xc0"         /* movl %r8d,%eax */
+        ),
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+            "\x48\x8b\x06"         /* movq (%rsi),%rax */
+            "\xf0\x48\x0f\xb1\x17" /* lock cmpxchgq %rdx,(%rdi) */
+            "\x41\x0f\x94\xc0"     /* sete %r8b */
+            "\x74\x03"             /* je +3 */
+            "\x48\x89\x06"         /* movq %rax,(%rsi) */
+            "\x44\x89\xc0"         /* movl %r8d,%eax */
+        ),
+    };
+
+    vpop(); /* failure order */
+    vpop(); /* success order */
+
+    gen_atomic_prologue(3);
+    gbc(code[mode].data, code[mode].size);
+    gen_atomic_epilogue(3);
 }

 ST_FUNC void gen_atomic_compare_exchange_weak(int mode)
 {
-    tcc_error("atomic_compare_exchange_weak not implemented");
+    gen_atomic_compare_exchange_strong(mode);
 }

 ST_FUNC void gen_atomic_fetch_add(int mode)
 {
-    tcc_error("atomic_fetch_add not implemented");
+    static const struct atomic_code code[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+            "\x40\x0f\xb6\xc6" /* movzbl %sil,%eax */
+            "\xf0\x0f\xc0\x07" /* lock xaddb %al,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+            "\x0f\xb7\xc6" /* movzwl %si,%eax */
+            "\x66\xf0\x0f\xc1\x07" /* lock xaddw %ax,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+            "\x89\xf0"             /* movl %esi,%eax */
+            "\xf0\x0f\xc1\x07"     /* lock xaddl %eax,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+            "\x48\x89\xf0"         /* movq %rsi,%rax */
+            "\xf0\x48\x0f\xc1\x07" /* lock xaddq %rax,(%rdi) */
+        ),
+    };
+
+    vpop(); /* order */
+
+    gen_atomic_prologue(2);
+    gbc(code[mode].data, code[mode].size);
+    gen_atomic_epilogue(2);
 }

 ST_FUNC void gen_atomic_fetch_sub(int mode)
 {
-    tcc_error("atomic_fetch_sub not implemented");
+    static const struct atomic_code code[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+            "\x89\xf0"             /* movl %esi,%eax */
+            "\xf7\xd8"             /* negl %eax */
+            "\xf0\x0f\xc0\x07"     /* lock xaddb %al,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+            "\x89\xf0"             /* movl %esi,%eax */
+            "\xf7\xd8"             /* negl %eax */
+            "\x66\xf0\x0f\xc1\x07" /* lock xaddw %ax,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+            "\x89\xf0"             /* movl %esi,%eax */
+            "\xf7\xd8"             /* negl %eax */
+            "\xf0\x0f\xc1\x07"     /* lock xaddl %eax,(%rdi) */
+        ),
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+            "\x48\x89\xf0"         /* movq %rsi,%rax */
+            "\x48\xf7\xd8"         /* negq %rax */
+            "\xf0\x48\x0f\xc1\x07" /* lock xaddq %rax,(%rdi) */
+        ),
+    };
+
+    vpop(); /* order */
+
+    gen_atomic_prologue(2);
+    gbc(code[mode].data, code[mode].size);
+    gen_atomic_epilogue(2);
+}
+
+/* Caveat: only %rdi and %rdx registers can be used. */
+static inline void gen_atomic_fetch_op(int mode, const void *data, size_t size)
+{
+    static const struct atomic_code prologue[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE("\x0f\xb6\x07"), /* movzbl
(%rdi),%eax */
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE("\x0f\xb7\x07"), /* movzwl
(%rdi),%eax */
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE("\x8b\x07"),     /* movl
(%rdi),%eax */
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE("\x48\x8b\x07"), /* movq
(%rdi),%rax */
+    };
+    static const struct atomic_code label[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+            "\x40\x0f\xb6\xf6" /* movzbl %sil,%esi */
+            "\x89\xc2"         /* movl %eax,%edx */
+            "\x41\x89\xc0"     /* movl %eax,%r8d */
+        ),
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+            "\x0f\xb7\xf6"     /* movzwl %si,%esi */
+            "\x89\xc2"         /* movl %eax,%edx */
+            "\x41\x89\xc0"     /* movl %eax,%r8d */
+        ),
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+            "\x89\xc2"         /* movl %eax,%edx */
+            "\x41\x89\xc0"     /* movl %eax,%r8d */
+        ),
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+            "\x48\x89\xc2"     /* movq %rax,%rdx */
+            "\x49\x89\xc0"     /* movq %rax,%r8 */
+        ),
+    };
+    static const struct atomic_code cmpxchg[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE("\xf0\x0f\xb0\x17"),     /*
lock cmpxchgb %dl,(%rdi) */
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE("\x66\xf0\x0f\xb1\x17"),
/* lock cmpxchgw %dx,(%rdi) */
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE("\xf0\x0f\xb1\x17"),
/* lock cmpxchgl %edx,(%rdi) */
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE("\xf0\x48\x0f\xb1\x17"),
/* lock cmpxchgq %rdx,(%rdi) */
+    };
+    static const struct atomic_code epilogue[] = {
+        [GEN_ATOMIC_MODE_8] = ATOMIC_CODE("\x44\x89\xc0"), /* movl %r8d,%eax */
+        [GEN_ATOMIC_MODE_16] = ATOMIC_CODE("\x44\x89\xc0"), /* movl
%r8d,%eax */
+        [GEN_ATOMIC_MODE_32] = ATOMIC_CODE("\x44\x89\xc0"), /* movl
%r8d,%eax */
+        [GEN_ATOMIC_MODE_64] = ATOMIC_CODE("\x4c\x89\xc0"), /* movq %r8,%rax */
+    };
+    const size_t offset = (prologue[mode].size + label[mode].size +
size + cmpxchg[mode].size + 2);
+
+    vpop(); /* order */
+
+    gen_atomic_prologue(2);
+    gbc(prologue[mode].data, prologue[mode].size);
+
+    gbc(label[mode].data, label[mode].size);
+    gbc(data, size);
+    gbc(cmpxchg[mode].data, cmpxchg[mode].size);
+
+    g(0x75);
+    g((uint8_t)-(ssize_t)(offset - prologue[mode].size));
+
+    gbc(epilogue[mode].data, cmpxchg[mode].size);
+    gen_atomic_epilogue(2);
 }

 ST_FUNC void gen_atomic_fetch_or(int mode)
 {
-    tcc_error("atomic_fetch_or not implemented");
+    const size_t prefix64 = (mode != GEN_ATOMIC_MODE_64);
+    static const struct atomic_code code =
ATOMIC_CODE("\x48\x09\xf2"); /* orq %rsi,%rdx */
+
+    /* All modes but 64-bit discard last byte (orl %edi,%edx) */
+    gen_atomic_fetch_op(mode, (code.data + prefix64), (code.size - prefix64));
 }

 ST_FUNC void gen_atomic_fetch_xor(int mode)
 {
-    tcc_error("atomic_fetch_xor not implemented");
+    const size_t prefix64 = (mode != GEN_ATOMIC_MODE_64);
+    static const struct atomic_code code =
ATOMIC_CODE("\x48\x31\xf2"); /* xorq %rsi,%rdx */
+
+    /* All modes but 64-bit discard last byte (xorl %edi,%edx) */
+    gen_atomic_fetch_op(mode, (code.data + prefix64), (code.size - prefix64));
 }

 ST_FUNC void gen_atomic_fetch_and(int mode)
 {
-    tcc_error("atomic_fetch_and not implemented");
+    const size_t prefix64 = (mode != GEN_ATOMIC_MODE_64);
+    static const struct atomic_code code =
ATOMIC_CODE("\x48\x21\xf2"); /* andq %rsi,%rdx */
+
+    /* All modes but 64-bit discard last byte (andl %edi,%edx) */
+    gen_atomic_fetch_op(mode, (code.data + prefix64), (code.size - prefix64));
 }

 /* end of x86-64 code generator */
--
2.30.0



reply via email to

[Prev in Thread] Current Thread [Next in Thread]