[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Tinycc-devel] [PATCH 4/4] stdatomic: x86_64 implementation
From: |
Dmitry Selyutin |
Subject: |
[Tinycc-devel] [PATCH 4/4] stdatomic: x86_64 implementation |
Date: |
Sun, 14 Feb 2021 22:52:27 +0300 |
---
x86_64-gen.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 265 insertions(+), 12 deletions(-)
diff --git a/x86_64-gen.c b/x86_64-gen.c
index 15d6b53..2b24f20 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -2280,59 +2280,312 @@ ST_FUNC void gen_vla_alloc(CType *type, int align) {
}
}
-ST_FUNC void gen_atomic_init(int mode)
+struct atomic_code {
+ const void *data;
+ size_t size;
+};
+#define ATOMIC_CODE(code) \
+ {code, (sizeof(code) - 1)}
+
+static inline void gen_atomic_prologue(size_t argc)
{
- tcc_error("atomic_init not implemented");
+ ssize_t idx;
+ SValue sv;
+ SValue *args = vtop;
+
+ for (idx = 0; idx != argc; ++idx) {
+ const int treg = arg_regs[argc - idx - 1];
+
+ save_reg(treg);
+ load(treg, &vtop[-idx]);
+ vtop[-idx].r = treg;
+ }
+}
+
+static inline void gen_atomic_epilogue(size_t argc)
+{
+ ssize_t idx;
+
+ for (idx = ((ssize_t)argc - 1); idx >= 0; --idx)
+ vpop();
+
+ store(TREG_RAX, ++vtop);
}
ST_FUNC void gen_atomic_store(int mode)
{
- tcc_error("atomic_store not implemented");
+ static const struct atomic_code code[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+ "\x40\x0f\xb6\xf6" /* movzbl %sil,%esi */
+ "\x40\x88\x37" /* movb %sil,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+ "\x0f\xb7\xf6" /* movzwl %si,%esi */
+ "\x66\x89\x37" /* movw %si,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+ "\x89\x37" /* movl %esi,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+ "\x48\x89\x37" /* movq %rsi,(%rdi) */
+ ),
+ };
+
+ vpop(); /* order */
+
+ gen_atomic_prologue(2);
+ gbc(code[mode].data, code[mode].size);
+ gen_atomic_epilogue(2);
}
ST_FUNC void gen_atomic_load(int mode)
{
- tcc_error("atomic_load not implemented");
+ static const struct atomic_code code[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+ "\x0f\xb6\x07" /* movzbl (%rdi),%eax */
+ ),
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+ "\x0f\xb7\x07" /* movzwl (%rdi),%eax */
+ ),
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+ "\x8b\x07" /* movl (%rdi),%eax */
+ ),
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+ "\x48\x8b\x07" /* movq (%rdi),%rax */
+ ),
+ };
+
+ vpop(); /* order */
+
+ gen_atomic_prologue(1);
+ gbc(code[mode].data, code[mode].size);
+ gen_atomic_epilogue(1);
}
ST_FUNC void gen_atomic_exchange(int mode)
{
- tcc_error("atomic_exchange not implemented");
+ static const struct atomic_code code[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+ "\x40\x0f\xb6\xc6" /* movzbl %sil,%eax */
+ "\x86\x07" /* xchgb %al,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+ "\x0f\xb7\xc6" /* movzwl %si,%eax */
+ "\x66\x87\x07" /* xchgw %ax,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+ "\x89\xf0" /* movl %esi,%eax */
+ "\x87\x07" /* xchgl %eax,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+ "\x48\x89\xf0" /* movq %rsi,%rax */
+ "\x48\x87\x07" /* xchgq %rax,(%rdi) */
+ ),
+ };
+
+ vpop(); /* order */
+
+ gen_atomic_prologue(2);
+ gbc(code[mode].data, code[mode].size);
+ gen_atomic_epilogue(2);
}
ST_FUNC void gen_atomic_compare_exchange_strong(int mode)
{
- tcc_error("atomic_compare_exchange_strong not implemented");
+ static const struct atomic_code code[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+ "\x0f\xb6\x06" /* movzbl (%rsi),%eax */
+ "\x0f\xb6\xd2" /* movzbl %dl,%edx */
+ "\xf0\x0f\xb0\x17" /* lock cmpxchgb %dl,(%rdi) */
+ "\x41\x0f\x94\xc0" /* sete %r8b */
+ "\x74\x02" /* je +2 */
+ "\x88\x06" /* movb %al,(%rsi) */
+ "\x44\x89\xc0" /* movl %r8d,%eax */
+ ),
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+ "\x0f\xb7\x06" /* movzwl (%rsi),%eax */
+ "\x0f\xb7\xd2" /* movzwl %dx,%edx */
+ "\x66\xf0\x0f\xb1\x17" /* lock cmpxchgw %dx,(%rdi) */
+ "\x41\x0f\x94\xc0" /* sete %r8b */
+ "\x74\x03" /* je +3 */
+ "\x66\x89\x06" /* movw %ax,(%rsi) */
+ "\x44\x89\xc0" /* movl %r8d,%eax */
+ ),
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+ "\x8b\x06" /* movl (%rsi),%eax */
+ "\xf0\x0f\xb1\x17" /* lock cmpxchgl %edx,(%rdi) */
+ "\x41\x0f\x94\xc0" /* sete %r8b */
+ "\x74\x02" /* je +2 */
+ "\x89\x06" /* movl %eax,(%rsi) */
+ "\x44\x89\xc0" /* movl %r8d,%eax */
+ ),
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+ "\x48\x8b\x06" /* movq (%rsi),%rax */
+ "\xf0\x48\x0f\xb1\x17" /* lock cmpxchgq %rdx,(%rdi) */
+ "\x41\x0f\x94\xc0" /* sete %r8b */
+ "\x74\x03" /* je +3 */
+ "\x48\x89\x06" /* movq %rax,(%rsi) */
+ "\x44\x89\xc0" /* movl %r8d,%eax */
+ ),
+ };
+
+ vpop(); /* failure order */
+ vpop(); /* success order */
+
+ gen_atomic_prologue(3);
+ gbc(code[mode].data, code[mode].size);
+ gen_atomic_epilogue(3);
}
ST_FUNC void gen_atomic_compare_exchange_weak(int mode)
{
- tcc_error("atomic_compare_exchange_weak not implemented");
+ gen_atomic_compare_exchange_strong(mode);
}
ST_FUNC void gen_atomic_fetch_add(int mode)
{
- tcc_error("atomic_fetch_add not implemented");
+ static const struct atomic_code code[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+ "\x40\x0f\xb6\xc6" /* movzbl %sil,%eax */
+ "\xf0\x0f\xc0\x07" /* lock xaddb %al,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+ "\x0f\xb7\xc6" /* movzwl %si,%eax */
+ "\x66\xf0\x0f\xc1\x07" /* lock xaddw %ax,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+ "\x89\xf0" /* movl %esi,%eax */
+ "\xf0\x0f\xc1\x07" /* lock xaddl %eax,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+ "\x48\x89\xf0" /* movq %rsi,%rax */
+ "\xf0\x48\x0f\xc1\x07" /* lock xaddq %rax,(%rdi) */
+ ),
+ };
+
+ vpop(); /* order */
+
+ gen_atomic_prologue(2);
+ gbc(code[mode].data, code[mode].size);
+ gen_atomic_epilogue(2);
}
ST_FUNC void gen_atomic_fetch_sub(int mode)
{
- tcc_error("atomic_fetch_sub not implemented");
+ static const struct atomic_code code[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+ "\x89\xf0" /* movl %esi,%eax */
+ "\xf7\xd8" /* negl %eax */
+ "\xf0\x0f\xc0\x07" /* lock xaddb %al,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+ "\x89\xf0" /* movl %esi,%eax */
+ "\xf7\xd8" /* negl %eax */
+ "\x66\xf0\x0f\xc1\x07" /* lock xaddw %ax,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+ "\x89\xf0" /* movl %esi,%eax */
+ "\xf7\xd8" /* negl %eax */
+ "\xf0\x0f\xc1\x07" /* lock xaddl %eax,(%rdi) */
+ ),
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+ "\x48\x89\xf0" /* movq %rsi,%rax */
+ "\x48\xf7\xd8" /* negq %rax */
+ "\xf0\x48\x0f\xc1\x07" /* lock xaddq %rax,(%rdi) */
+ ),
+ };
+
+ vpop(); /* order */
+
+ gen_atomic_prologue(2);
+ gbc(code[mode].data, code[mode].size);
+ gen_atomic_epilogue(2);
+}
+
+/* Caveat: only %rdi and %rdx registers can be used. */
+static inline void gen_atomic_fetch_op(int mode, const void *data, size_t size)
+{
+ static const struct atomic_code prologue[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE("\x0f\xb6\x07"), /* movzbl
(%rdi),%eax */
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE("\x0f\xb7\x07"), /* movzwl
(%rdi),%eax */
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE("\x8b\x07"), /* movl
(%rdi),%eax */
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE("\x48\x8b\x07"), /* movq
(%rdi),%rax */
+ };
+ static const struct atomic_code label[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE(
+ "\x40\x0f\xb6\xf6" /* movzbl %sil,%esi */
+ "\x89\xc2" /* movl %eax,%edx */
+ "\x41\x89\xc0" /* movl %eax,%r8d */
+ ),
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE(
+ "\x0f\xb7\xf6" /* movzwl %si,%esi */
+ "\x89\xc2" /* movl %eax,%edx */
+ "\x41\x89\xc0" /* movl %eax,%r8d */
+ ),
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE(
+ "\x89\xc2" /* movl %eax,%edx */
+ "\x41\x89\xc0" /* movl %eax,%r8d */
+ ),
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE(
+ "\x48\x89\xc2" /* movq %rax,%rdx */
+ "\x49\x89\xc0" /* movq %rax,%r8 */
+ ),
+ };
+ static const struct atomic_code cmpxchg[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE("\xf0\x0f\xb0\x17"), /*
lock cmpxchgb %dl,(%rdi) */
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE("\x66\xf0\x0f\xb1\x17"),
/* lock cmpxchgw %dx,(%rdi) */
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE("\xf0\x0f\xb1\x17"),
/* lock cmpxchgl %edx,(%rdi) */
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE("\xf0\x48\x0f\xb1\x17"),
/* lock cmpxchgq %rdx,(%rdi) */
+ };
+ static const struct atomic_code epilogue[] = {
+ [GEN_ATOMIC_MODE_8] = ATOMIC_CODE("\x44\x89\xc0"), /* movl %r8d,%eax */
+ [GEN_ATOMIC_MODE_16] = ATOMIC_CODE("\x44\x89\xc0"), /* movl
%r8d,%eax */
+ [GEN_ATOMIC_MODE_32] = ATOMIC_CODE("\x44\x89\xc0"), /* movl
%r8d,%eax */
+ [GEN_ATOMIC_MODE_64] = ATOMIC_CODE("\x4c\x89\xc0"), /* movq %r8,%rax */
+ };
+ const size_t offset = (prologue[mode].size + label[mode].size +
size + cmpxchg[mode].size + 2);
+
+ vpop(); /* order */
+
+ gen_atomic_prologue(2);
+ gbc(prologue[mode].data, prologue[mode].size);
+
+ gbc(label[mode].data, label[mode].size);
+ gbc(data, size);
+ gbc(cmpxchg[mode].data, cmpxchg[mode].size);
+
+ g(0x75);
+ g((uint8_t)-(ssize_t)(offset - prologue[mode].size));
+
+ gbc(epilogue[mode].data, cmpxchg[mode].size);
+ gen_atomic_epilogue(2);
}
ST_FUNC void gen_atomic_fetch_or(int mode)
{
- tcc_error("atomic_fetch_or not implemented");
+ const size_t prefix64 = (mode != GEN_ATOMIC_MODE_64);
+ static const struct atomic_code code =
ATOMIC_CODE("\x48\x09\xf2"); /* orq %rsi,%rdx */
+
+ /* All modes but 64-bit discard last byte (orl %edi,%edx) */
+ gen_atomic_fetch_op(mode, (code.data + prefix64), (code.size - prefix64));
}
ST_FUNC void gen_atomic_fetch_xor(int mode)
{
- tcc_error("atomic_fetch_xor not implemented");
+ const size_t prefix64 = (mode != GEN_ATOMIC_MODE_64);
+ static const struct atomic_code code =
ATOMIC_CODE("\x48\x31\xf2"); /* xorq %rsi,%rdx */
+
+ /* All modes but 64-bit discard last byte (xorl %edi,%edx) */
+ gen_atomic_fetch_op(mode, (code.data + prefix64), (code.size - prefix64));
}
ST_FUNC void gen_atomic_fetch_and(int mode)
{
- tcc_error("atomic_fetch_and not implemented");
+ const size_t prefix64 = (mode != GEN_ATOMIC_MODE_64);
+ static const struct atomic_code code =
ATOMIC_CODE("\x48\x21\xf2"); /* andq %rsi,%rdx */
+
+ /* All modes but 64-bit discard last byte (andl %edi,%edx) */
+ gen_atomic_fetch_op(mode, (code.data + prefix64), (code.size - prefix64));
}
/* end of x86-64 code generator */
--
2.30.0
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Tinycc-devel] [PATCH 4/4] stdatomic: x86_64 implementation,
Dmitry Selyutin <=