[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Tinycc-devel] [PATCH 1/4] arm-asm: Add vmla, vmls, vnmls, vnmla, vmul,
From: |
Danny Milosavljevic |
Subject: |
[Tinycc-devel] [PATCH 1/4] arm-asm: Add vmla, vmls, vnmls, vnmla, vmul, vnmul, vadd, vsub, vdiv, vneg, vabs, vsqrt, vcmp, vcmpe |
Date: |
Sat, 23 Jan 2021 15:41:47 +0100 |
---
arm-asm.c | 290 ++++++++++++++++++++++++++++++++++++-
arm-tok.h | 38 +++++
tests/arm-asm-testsuite.sh | 17 ++-
3 files changed, 343 insertions(+), 2 deletions(-)
diff --git a/arm-asm.c b/arm-asm.c
index 36e784c..fb2e6c5 100644
--- a/arm-asm.c
+++ b/arm-asm.c
@@ -1463,7 +1463,6 @@ static int asm_parse_vfp_regvar(int t, int
double_precision)
return -1;
}
-
static void asm_floating_point_single_data_transfer_opcode(TCCState *s1, int
token)
{
Operand ops[3];
@@ -1646,6 +1645,264 @@ static void
asm_floating_point_block_data_transfer_opcode(TCCState *s1, int toke
else
asm_emit_coprocessor_data_transfer(condition_code_of_token(token),
coprocessor, first_regset_register, &ops[0], &offset, 0, preincrement,
op0_exclam, extra_register_bit, load);
}
+
+// Not standalone.
+static void asm_floating_point_immediate_data_processing_opcode_tail(TCCState
*s1, int token, uint8_t coprocessor, uint8_t CRd) {
+ uint8_t opcode1 = 0;
+ uint8_t opcode2 = 0;
+ uint8_t operands[3] = {0, 0, 0};
+ Operand operand;
+
+ operands[0] = CRd;
+
+ parse_operand(s1, &operand);
+ if (operand.type != OP_IM8 && operand.type != OP_IM8N) {
+ expect("Immediate value");
+ return;
+ }
+
+ opcode1 = 11; // "Other" instruction
+ switch (ARM_INSTRUCTION_GROUP(token)) {
+ case TOK_ASM_vcmpeq_f32:
+ case TOK_ASM_vcmpeq_f64:
+ opcode2 = 2;
+ operands[1] = 5;
+ if (operand.e.v) {
+ expect("Immediate value 0");
+ return;
+ }
+ break;
+ case TOK_ASM_vcmpeeq_f32:
+ case TOK_ASM_vcmpeeq_f64:
+ opcode2 = 6;
+ operands[1] = 5;
+ if (operand.e.v) {
+ expect("Immediate value 0");
+ return;
+ }
+ break;
+ default:
+ expect("known floating point with immediate instruction");
+ return;
+ }
+
+ if (coprocessor == CP_SINGLE_PRECISION_FLOAT) {
+ if (operands[0] & 1)
+ opcode1 |= 4;
+ operands[0] >>= 1;
+ }
+
+ asm_emit_coprocessor_opcode(condition_code_of_token(token), coprocessor,
opcode1, operands[0], operands[1], operands[2], opcode2, 0);
+}
+
+static void asm_floating_point_data_processing_opcode(TCCState *s1, int token)
{
+ uint8_t coprocessor = CP_SINGLE_PRECISION_FLOAT;
+ uint8_t opcode1 = 0;
+ uint8_t opcode2 = 0; // (0 || 2) | register selection
+ uint8_t operands[3];
+ uint8_t nb_operands = 0;
+ int operand_1_register = 1;
+ int reg;
+
+/* TODO:
+ Instruction opcode opcode2 Reason
+ =============================================================
+ - 1?00 ?1? Undefined
+ VFNMS 1?01 ?0? Must be unconditional
+ VFNMA 1?01 ?1? Must be unconditional
+ VFMA 1?10 ?0? Must be unconditional
+ VFMS 1?10 ?1? Must be unconditional
+
+ VCVT*
+
+ VMOV Fd, Fm
+ VMOV Sn, Rd
+ VMOV Rd, Sn
+ VMOV Sn, Sm, Rd, Rn
+ VMOV Rd, Rn, Sn, Sm
+ VMOV Dm, Rd, Rn
+ VMOV Rd, Rn, Dm
+ VMOV Dn[0], Rd
+ VMOV Rd, Dn[0]
+ VMOV Dn[1], Rd
+ VMOV Rd, Dn[1]
+
+ VMSR <sysreg>, Rd
+ VMRS Rd, <sysreg>
+ VMRS APSR_nzcv, FPSCR
+*/
+
+ switch (ARM_INSTRUCTION_GROUP(token)) {
+ case TOK_ASM_vmlaeq_f64:
+ case TOK_ASM_vmlseq_f64:
+ case TOK_ASM_vnmlseq_f64:
+ case TOK_ASM_vnmlaeq_f64:
+ case TOK_ASM_vmuleq_f64:
+ case TOK_ASM_vnmuleq_f64:
+ case TOK_ASM_vaddeq_f64:
+ case TOK_ASM_vsubeq_f64:
+ case TOK_ASM_vdiveq_f64:
+ case TOK_ASM_vnegeq_f64:
+ case TOK_ASM_vabseq_f64:
+ case TOK_ASM_vsqrteq_f64:
+ case TOK_ASM_vcmpeq_f64:
+ case TOK_ASM_vcmpeeq_f64:
+ coprocessor = CP_DOUBLE_PRECISION_FLOAT;
+ }
+
+ for (nb_operands = 0; nb_operands < 3; ) {
+ if (nb_operands == 1 && (tok == '#' || tok == '$')) {
+ asm_floating_point_immediate_data_processing_opcode_tail(s1,
token, coprocessor, operands[0]);
+ return;
+ }
+ if (coprocessor == CP_SINGLE_PRECISION_FLOAT) {
+ if ((reg = asm_parse_vfp_regvar(tok, 0)) != -1) {
+ operands[nb_operands] = reg;
+ next();
+ } else {
+ expect("'s<number>'");
+ return;
+ }
+ } else if (coprocessor == CP_DOUBLE_PRECISION_FLOAT) {
+ if ((reg = asm_parse_vfp_regvar(tok, 1)) != -1) {
+ operands[nb_operands] = reg;
+ next();
+ } else {
+ expect("'d<number>'");
+ return;
+ }
+ } else if ((reg = asm_parse_vfp_regvar(tok, 0)) != -1) {
+ coprocessor = CP_SINGLE_PRECISION_FLOAT;
+ operands[nb_operands] = reg;
+ next();
+ } else if ((reg = asm_parse_vfp_regvar(tok, 1)) != -1) {
+ coprocessor = CP_DOUBLE_PRECISION_FLOAT;
+ operands[nb_operands] = reg;
+ next();
+ } else
+ tcc_internal_error("unknown coprocessor");
+ ++nb_operands;
+ if (tok == ',')
+ next();
+ else
+ break;
+ }
+
+ if (nb_operands == 2) { // implicit
+ operands[2] = operands[1];
+ operands[1] = operands[0];
+ nb_operands = 3;
+ }
+ if (nb_operands < 3) {
+ tcc_error("Not enough operands for '%s' (%u)", get_tok_str(token,
NULL), nb_operands);
+ return;
+ }
+
+ switch (ARM_INSTRUCTION_GROUP(token)) {
+ case TOK_ASM_vmlaeq_f32:
+ case TOK_ASM_vmlaeq_f64:
+ opcode1 = 0;
+ opcode2 = 0;
+ break;
+ case TOK_ASM_vmlseq_f32:
+ case TOK_ASM_vmlseq_f64:
+ opcode1 = 0;
+ opcode2 = 2;
+ break;
+ case TOK_ASM_vnmlseq_f32:
+ case TOK_ASM_vnmlseq_f64:
+ opcode1 = 1;
+ opcode2 = 0;
+ break;
+ case TOK_ASM_vnmlaeq_f32:
+ case TOK_ASM_vnmlaeq_f64:
+ opcode1 = 1;
+ opcode2 = 2;
+ break;
+ case TOK_ASM_vmuleq_f32:
+ case TOK_ASM_vmuleq_f64:
+ opcode1 = 2;
+ opcode2 = 0;
+ break;
+ case TOK_ASM_vnmuleq_f32:
+ case TOK_ASM_vnmuleq_f64:
+ opcode1 = 2;
+ opcode2 = 2;
+ break;
+ case TOK_ASM_vaddeq_f32:
+ case TOK_ASM_vaddeq_f64:
+ opcode1 = 3;
+ opcode2 = 0;
+ break;
+ case TOK_ASM_vsubeq_f32:
+ case TOK_ASM_vsubeq_f64:
+ opcode1 = 3;
+ opcode2 = 2;
+ break;
+ case TOK_ASM_vdiveq_f32:
+ case TOK_ASM_vdiveq_f64:
+ opcode1 = 8;
+ opcode2 = 0;
+ break;
+ case TOK_ASM_vnegeq_f32:
+ case TOK_ASM_vnegeq_f64:
+ opcode1 = 11; // Other" instruction
+ opcode2 = 2;
+ operands[1] = 1;
+ operand_1_register = 0;
+ break;
+ case TOK_ASM_vabseq_f32:
+ case TOK_ASM_vabseq_f64:
+ opcode1 = 11; // "Other" instruction
+ opcode2 = 6;
+ operands[1] = 0;
+ operand_1_register = 0;
+ break;
+ case TOK_ASM_vsqrteq_f32:
+ case TOK_ASM_vsqrteq_f64:
+ opcode1 = 11; // "Other" instruction
+ opcode2 = 6;
+ operands[1] = 1;
+ operand_1_register = 0;
+ break;
+ case TOK_ASM_vcmpeq_f32:
+ case TOK_ASM_vcmpeq_f64:
+ opcode1 = 11; // "Other" instruction
+ opcode2 = 2;
+ operands[1] = 4;
+ operand_1_register = 0;
+ break;
+ case TOK_ASM_vcmpeeq_f32:
+ case TOK_ASM_vcmpeeq_f64:
+ opcode1 = 11; // "Other" instruction
+ opcode2 = 6;
+ operands[1] = 4;
+ operand_1_register = 0;
+ break;
+ // TODO: vcvt; vcvtr
+ default:
+ expect("known floating point instruction");
+ return;
+ }
+
+ if (coprocessor == CP_SINGLE_PRECISION_FLOAT) {
+ if (operands[2] & 1)
+ opcode2 |= 1;
+ operands[2] >>= 1;
+
+ if (operand_1_register) {
+ if (operands[1] & 1)
+ opcode2 |= 4;
+ operands[1] >>= 1;
+ }
+
+ if (operands[0] & 1)
+ opcode1 |= 4;
+ operands[0] >>= 1;
+ }
+
+ asm_emit_coprocessor_opcode(condition_code_of_token(token), coprocessor,
opcode1, operands[0], operands[1], operands[2], opcode2, 0);
+}
#endif
static void asm_misc_single_data_transfer_opcode(TCCState *s1, int token)
@@ -2010,6 +2267,37 @@ ST_FUNC void asm_opcode(TCCState *s1, int token)
asm_floating_point_single_data_transfer_opcode(s1, token);
return;
+ case TOK_ASM_vmlaeq_f32:
+ case TOK_ASM_vmlseq_f32:
+ case TOK_ASM_vnmlseq_f32:
+ case TOK_ASM_vnmlaeq_f32:
+ case TOK_ASM_vmuleq_f32:
+ case TOK_ASM_vnmuleq_f32:
+ case TOK_ASM_vaddeq_f32:
+ case TOK_ASM_vsubeq_f32:
+ case TOK_ASM_vdiveq_f32:
+ case TOK_ASM_vnegeq_f32:
+ case TOK_ASM_vabseq_f32:
+ case TOK_ASM_vsqrteq_f32:
+ case TOK_ASM_vcmpeq_f32:
+ case TOK_ASM_vcmpeeq_f32:
+ case TOK_ASM_vmlaeq_f64:
+ case TOK_ASM_vmlseq_f64:
+ case TOK_ASM_vnmlseq_f64:
+ case TOK_ASM_vnmlaeq_f64:
+ case TOK_ASM_vmuleq_f64:
+ case TOK_ASM_vnmuleq_f64:
+ case TOK_ASM_vaddeq_f64:
+ case TOK_ASM_vsubeq_f64:
+ case TOK_ASM_vdiveq_f64:
+ case TOK_ASM_vnegeq_f64:
+ case TOK_ASM_vabseq_f64:
+ case TOK_ASM_vsqrteq_f64:
+ case TOK_ASM_vcmpeq_f64:
+ case TOK_ASM_vcmpeeq_f64:
+ asm_floating_point_data_processing_opcode(s1, token);
+ return;
+
case TOK_ASM_vpusheq:
case TOK_ASM_vpopeq:
case TOK_ASM_vldmeq:
diff --git a/arm-tok.h b/arm-tok.h
index cd45a01..270f420 100644
--- a/arm-tok.h
+++ b/arm-tok.h
@@ -153,6 +153,29 @@
DEF(TOK_ASM_ ## x, #x) \
DEF(TOK_ASM_ ## x ## rsvd, #x "rsvd")
+/* Note: condition code is 4 bits */
+#define DEF_ASM_CONDED_WITH_SUFFIX(x, y) \
+ DEF(TOK_ASM_ ## x ## eq ## _ ## y, #x "eq." #y) \
+ DEF(TOK_ASM_ ## x ## ne ## _ ## y, #x "ne." #y) \
+ DEF(TOK_ASM_ ## x ## cs ## _ ## y, #x "cs." #y) \
+ DEF(TOK_ASM_ ## x ## cc ## _ ## y, #x "cc." #y) \
+ DEF(TOK_ASM_ ## x ## mi ## _ ## y, #x "mi." #y) \
+ DEF(TOK_ASM_ ## x ## pl ## _ ## y, #x "pl." #y) \
+ DEF(TOK_ASM_ ## x ## vs ## _ ## y, #x "vs." #y) \
+ DEF(TOK_ASM_ ## x ## vc ## _ ## y, #x "vc." #y) \
+ DEF(TOK_ASM_ ## x ## hi ## _ ## y, #x "hi." #y) \
+ DEF(TOK_ASM_ ## x ## ls ## _ ## y, #x "ls." #y) \
+ DEF(TOK_ASM_ ## x ## ge ## _ ## y, #x "ge." #y) \
+ DEF(TOK_ASM_ ## x ## lt ## _ ## y, #x "lt." #y) \
+ DEF(TOK_ASM_ ## x ## gt ## _ ## y, #x "gt." #y) \
+ DEF(TOK_ASM_ ## x ## le ## _ ## y, #x "le." #y) \
+ DEF(TOK_ASM_ ## x ## _ ## y, #x "." #y) \
+ DEF(TOK_ASM_ ## x ## rsvd ## _ ## y, #x "rsvd." #y)
+
+#define DEF_ASM_CONDED_VFP_F32_F64(x) \
+ DEF_ASM_CONDED_WITH_SUFFIX(x, f32) \
+ DEF_ASM_CONDED_WITH_SUFFIX(x, f64)
+
/* Note: add new tokens after nop (MUST always use DEF_ASM_CONDED) */
DEF_ASM_CONDED(nop)
@@ -285,6 +308,21 @@
DEF_ASM_CONDED(vldr)
DEF_ASM_CONDED(vstr)
+ DEF_ASM_CONDED_VFP_F32_F64(vmla)
+ DEF_ASM_CONDED_VFP_F32_F64(vmls)
+ DEF_ASM_CONDED_VFP_F32_F64(vnmls)
+ DEF_ASM_CONDED_VFP_F32_F64(vnmla)
+ DEF_ASM_CONDED_VFP_F32_F64(vmul)
+ DEF_ASM_CONDED_VFP_F32_F64(vnmul)
+ DEF_ASM_CONDED_VFP_F32_F64(vadd)
+ DEF_ASM_CONDED_VFP_F32_F64(vsub)
+ DEF_ASM_CONDED_VFP_F32_F64(vdiv)
+ DEF_ASM_CONDED_VFP_F32_F64(vneg)
+ DEF_ASM_CONDED_VFP_F32_F64(vabs)
+ DEF_ASM_CONDED_VFP_F32_F64(vsqrt)
+ DEF_ASM_CONDED_VFP_F32_F64(vcmp)
+ DEF_ASM_CONDED_VFP_F32_F64(vcmpe)
+
DEF_ASM_CONDED(vpush)
DEF_ASM_CONDED(vpop)
DEF_ASM_CONDED(vldm)
diff --git a/tests/arm-asm-testsuite.sh b/tests/arm-asm-testsuite.sh
index 4e27863..d1c96ff 100755
--- a/tests/arm-asm-testsuite.sh
+++ b/tests/arm-asm-testsuite.sh
@@ -5,7 +5,16 @@ set -e
# Note: "{r3}" is definitely different--but would complicate the assembler.
state="`mktemp -d`"
-cat ../arm-tok.h |grep DEF_ASM |grep -v 'not useful' |grep -v '#define' |grep
-v '/[*]' |sed -e 's;^[ ]*DEF_ASM[^(]*(\(.*\)).*$;\1;' | egrep -v
'^((r|c|p|s|d)[0-9]+|fp|ip|sp|lr|pc|asl)$' | while read s
+cat ../arm-tok.h | \
+ grep DEF_ASM | \
+ grep -v 'not useful' | \
+ grep -v '#define' | \
+ grep -v '/[*]' | \
+ grep -v 'DEF_ASM_CONDED_WITH_SUFFIX(x' | \
+ sed -e 's;^[ ]*DEF_ASM_CONDED_VFP_F32_F64[^(]*(\(.*\)).*$;
DEF_ASM_CONDED(\1.f32)\
+ DEF_ASM_CONDED(\1.f64);g' | \
+ sed -e 's;^[ ]*DEF_ASM[^(]*(\(.*\)).*$;\1;g' | \
+ egrep -v '^((r|c|p|s|d)[0-9]+|fp|ip|sp|lr|pc|asl)$' | while read s
do
as_opts=""
if [ "${s#v}" != "${s}" ]
@@ -132,6 +141,12 @@ do
"{d4}" \
"{s4-s31}" \
"{s4}" \
+ "s2, s3, s4" \
+ "s2, s3" \
+ "d2, d3, d4" \
+ "d2, d3" \
+ "s2, #0" \
+ "d2, #0" \
""
do
#echo ".syntax unified" > a.s
--
2.29.2
- [Tinycc-devel] [PATCH 0/4] Implement ARM VFP arithmetic instructions in ARM inline assembler, Danny Milosavljevic, 2021/01/23
- [Tinycc-devel] [PATCH 1/4] arm-asm: Add vmla, vmls, vnmls, vnmla, vmul, vnmul, vadd, vsub, vdiv, vneg, vabs, vsqrt, vcmp, vcmpe,
Danny Milosavljevic <=
- [Tinycc-devel] [PATCH 2/4] arm-asm: Add vmov, Danny Milosavljevic, 2021/01/23
- [Tinycc-devel] [PATCH 3/4] arm-asm: Mostly factor out VFP register reference parsing to parse_operand, Danny Milosavljevic, 2021/01/23
- [Tinycc-devel] [PATCH 4/4] arm-asm: Implement "vmov.f32 Sn, Rd", "vmov.f32 Rd, Sn", "vmov.f64 Dm, Rd, Rn", "vmov.f64 Rd, Rn, Dm", Danny Milosavljevic, 2021/01/23