[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Tinycc-devel] [PATCH 4/4] arm-asm: Implement "vmov.f32 Sn, Rd", "vmov.f
From: |
Danny Milosavljevic |
Subject: |
[Tinycc-devel] [PATCH 4/4] arm-asm: Implement "vmov.f32 Sn, Rd", "vmov.f32 Rd, Sn", "vmov.f64 Dm, Rd, Rn", "vmov.f64 Rd, Rn, Dm" |
Date: |
Sat, 23 Jan 2021 15:41:50 +0100 |
---
arm-asm.c | 122 +++++++++++++++++++++++++++++++------
tests/arm-asm-testsuite.sh | 7 +++
2 files changed, 111 insertions(+), 18 deletions(-)
diff --git a/arm-asm.c b/arm-asm.c
index d064b8f..f2f1cb8 100644
--- a/arm-asm.c
+++ b/arm-asm.c
@@ -1291,6 +1291,7 @@ static void asm_single_data_transfer_opcode(TCCState *s1,
int token)
}
}
+// Note: Only call this using a VFP register if you know exactly what you are
doing (i.e. cp_number is 10 or 11 and you are doing a vmov)
static void asm_emit_coprocessor_data_transfer(uint32_t high_nibble, uint8_t
cp_number, uint8_t CRd, const Operand* Rn, const Operand* offset, int
offset_minus, int preincrement, int writeback, int long_transfer, int load) {
uint32_t opcode = 0x0;
opcode |= 1 << 26; // Load/Store
@@ -1304,12 +1305,14 @@ static void asm_emit_coprocessor_data_transfer(uint32_t
high_nibble, uint8_t cp_
opcode |= cp_number << 8;
+ //assert(CRd < 16);
opcode |= ENCODE_RD(CRd);
if (Rn->type != OP_REG32) {
expect("register");
return;
}
+ //assert(Rn->reg < 16);
opcode |= ENCODE_RN(Rn->reg);
if (preincrement)
opcode |= 1 << 24; // add offset before transfer
@@ -1342,6 +1345,9 @@ static void asm_emit_coprocessor_data_transfer(uint32_t
high_nibble, uint8_t cp_
opcode |= offset->reg;
tcc_error("Using register offset to register address is not possible
here");
return;
+ } else if (offset->type == OP_VREG64) {
+ opcode |= 16;
+ opcode |= offset->reg;
} else
expect("immediate or register");
@@ -1417,6 +1423,9 @@ static void asm_coprocessor_data_transfer_opcode(TCCState
*s1, int token)
tcc_error("Using 'pc' for register offset in '%s' is not
implemented by ARM", get_tok_str(token, NULL));
return;
}
+ } else if (ops[2].type == OP_VREG64) {
+ tcc_error("'%s' does not support VFP register operand",
get_tok_str(token, NULL));
+ return;
}
} else {
// end of input expression in brackets--assume 0 offset
@@ -1814,12 +1823,77 @@ static void
asm_floating_point_immediate_data_processing_opcode_tail(TCCState *s
asm_emit_coprocessor_opcode(condition_code_of_token(token), coprocessor,
opcode1, operands[0], operands[1], operands[2], opcode2, 0);
}
+static void asm_floating_point_reg_arm_reg_transfer_opcode_tail(TCCState *s1,
int token, int coprocessor, int nb_arm_regs, int nb_ops, Operand ops[3]) {
+ uint8_t opcode1 = 0;
+ uint8_t opcode2 = 0;
+ switch (coprocessor) {
+ case CP_SINGLE_PRECISION_FLOAT:
+ // "vmov.f32 r2, s3" or "vmov.f32 s3, r2"
+ if (nb_ops != 2 || nb_arm_regs != 1) {
+ tcc_error("vmov.f32 only implemented for one VFP register operand
and one ARM register operands");
+ return;
+ }
+ if (ops[0].type != OP_REG32) { // determine mode: load or store
+ // need to swap operands 0 and 1
+ memcpy(&ops[2], &ops[1], sizeof(ops[2]));
+ memcpy(&ops[1], &ops[0], sizeof(ops[1]));
+ memcpy(&ops[0], &ops[2], sizeof(ops[0]));
+ } else
+ opcode1 |= 1;
+
+ if (ops[1].type == OP_VREG32) {
+ if (ops[1].reg & 1)
+ opcode2 |= 4;
+ ops[1].reg >>= 1;
+ }
+
+ if (ops[0].type == OP_VREG32) {
+ if (ops[0].reg & 1)
+ opcode1 |= 4;
+ ops[0].reg >>= 1;
+ }
+
+ asm_emit_coprocessor_opcode(condition_code_of_token(token),
coprocessor, opcode1, ops[0].reg, (ops[1].type == OP_IM8) ? ops[1].e.v :
ops[1].reg, 0x10, opcode2, 0);
+ break;
+ case CP_DOUBLE_PRECISION_FLOAT:
+ if (nb_ops != 3 || nb_arm_regs != 2) {
+ tcc_error("vmov.f32 only implemented for one VFP register operand
and two ARM register operands");
+ return;
+ }
+ // Determine whether it's a store into a VFP register (vmov "d1, r2,
r3") rather than "vmov r2, r3, d1"
+ if (ops[0].type == OP_VREG64) {
+ if (ops[2].type == OP_REG32) {
+ Operand temp;
+ // need to rotate operand list to the left
+ memcpy(&temp, &ops[0], sizeof(temp));
+ memcpy(&ops[0], &ops[1], sizeof(ops[0]));
+ memcpy(&ops[1], &ops[2], sizeof(ops[1]));
+ memcpy(&ops[2], &temp, sizeof(ops[2]));
+ } else {
+ tcc_error("vmov.f64 only implemented for one VFP register
operand and two ARM register operands");
+ return;
+ }
+ } else if (ops[0].type != OP_REG32 || ops[1].type != OP_REG32 ||
ops[2].type != OP_VREG64) {
+ tcc_error("vmov.f64 only implemented for one VFP register operand
and two ARM register operands");
+ return;
+ } else {
+ opcode1 |= 1;
+ }
+ asm_emit_coprocessor_data_transfer(condition_code_of_token(token),
coprocessor, ops[0].reg, &ops[1], &ops[2], 0, 0, 0, 1, opcode1);
+ break;
+ default:
+ tcc_internal_error("unknown coprocessor");
+ }
+}
+
static void asm_floating_point_data_processing_opcode(TCCState *s1, int token)
{
uint8_t coprocessor = CP_SINGLE_PRECISION_FLOAT;
uint8_t opcode1 = 0;
uint8_t opcode2 = 0; // (0 || 2) | register selection
Operand ops[3];
uint8_t nb_ops = 0;
+ int vmov = 0;
+ int nb_arm_regs = 0;
/* TODO:
Instruction opcode opcode2 Reason
@@ -1833,12 +1907,8 @@ static void
asm_floating_point_data_processing_opcode(TCCState *s1, int token) {
VCVT*
VMOV Fd, Fm
- VMOV Sn, Rd
- VMOV Rd, Sn
VMOV Sn, Sm, Rd, Rn
VMOV Rd, Rn, Sn, Sm
- VMOV Dm, Rd, Rn
- VMOV Rd, Rn, Dm
VMOV Dn[0], Rd
VMOV Rd, Dn[0]
VMOV Dn[1], Rd
@@ -1868,13 +1938,23 @@ static void
asm_floating_point_data_processing_opcode(TCCState *s1, int token) {
coprocessor = CP_DOUBLE_PRECISION_FLOAT;
}
+ switch (ARM_INSTRUCTION_GROUP(token)) {
+ case TOK_ASM_vmoveq_f32:
+ case TOK_ASM_vmoveq_f64:
+ vmov = 1;
+ break;
+ }
+
for (nb_ops = 0; nb_ops < 3; ) {
+ // Note: Necessary because parse_operand can't parse decimal numerals.
if (nb_ops == 1 && (tok == '#' || tok == '$')) {
asm_floating_point_immediate_data_processing_opcode_tail(s1,
token, coprocessor, ops[0].reg);
return;
}
parse_operand(s1, &ops[nb_ops]);
- if (ops[nb_ops].type == OP_VREG32) {
+ if (vmov && ops[nb_ops].type == OP_REG32) {
+ ++nb_arm_regs;
+ } else if (ops[nb_ops].type == OP_VREG32) {
if (coprocessor != CP_SINGLE_PRECISION_FLOAT) {
expect("'s<number>'");
return;
@@ -1895,14 +1975,16 @@ static void
asm_floating_point_data_processing_opcode(TCCState *s1, int token) {
break;
}
- if (nb_ops == 2) { // implicit
- memcpy(&ops[2], &ops[1], sizeof(ops[1])); // move ops[2]
- memcpy(&ops[1], &ops[0], sizeof(ops[0])); // ops[1] was implicit
- nb_ops = 3;
- }
- if (nb_ops < 3) {
- tcc_error("Not enough operands for '%s' (%u)", get_tok_str(token,
NULL), nb_ops);
- return;
+ if (nb_arm_regs == 0) {
+ if (nb_ops == 2) { // implicit
+ memcpy(&ops[2], &ops[1], sizeof(ops[1])); // move ops[2]
+ memcpy(&ops[1], &ops[0], sizeof(ops[0])); // ops[1] was implicit
+ nb_ops = 3;
+ }
+ if (nb_ops < 3) {
+ tcc_error("Not enough operands for '%s' (%u)", get_tok_str(token,
NULL), nb_ops);
+ return;
+ }
}
switch (ARM_INSTRUCTION_GROUP(token)) {
@@ -1988,11 +2070,15 @@ static void
asm_floating_point_data_processing_opcode(TCCState *s1, int token) {
break;
case TOK_ASM_vmoveq_f32:
case TOK_ASM_vmoveq_f64:
- // FIXME: Check for ARM registers--and allow only very little.
- opcode1 = 11; // "Other" instruction
- opcode2 = 2;
- ops[1].type = OP_IM8;
- ops[1].e.v = 0;
+ if (nb_arm_regs > 0) { // vmov.f32 r2, s3 or similar
+ asm_floating_point_reg_arm_reg_transfer_opcode_tail(s1, token,
coprocessor, nb_arm_regs, nb_ops, ops);
+ return;
+ } else {
+ opcode1 = 11; // "Other" instruction
+ opcode2 = 2;
+ ops[1].type = OP_IM8;
+ ops[1].e.v = 0;
+ }
break;
// TODO: vcvt; vcvtr
default:
diff --git a/tests/arm-asm-testsuite.sh b/tests/arm-asm-testsuite.sh
index 6a3845e..19acf90 100755
--- a/tests/arm-asm-testsuite.sh
+++ b/tests/arm-asm-testsuite.sh
@@ -151,6 +151,10 @@ do
"d3, #0.0" \
"s4, #-0.1796875" \
"d4, #0.1796875" \
+ "r2, r3, d1" \
+ "d1, r2, r3" \
+ "s1, r2" \
+ "r2, s1" \
""
do
#echo ".syntax unified" > a.s
@@ -214,6 +218,9 @@ else
"bl r3"|"b r3"|"mov r2, #0xEFFF"|"mov r4, #0x0201")
known_failure=" (known failure)"
;;
+ "vmov.f32 r2, r3, d1"|"vmov.f32 d1, r2, r3") # GNU as
bug
+ known_failure=" (known failure)"
+ ;;
*)
known_failure=""
status=1
--
2.29.2
- [Tinycc-devel] [PATCH 0/4] Implement ARM VFP arithmetic instructions in ARM inline assembler, Danny Milosavljevic, 2021/01/23
- [Tinycc-devel] [PATCH 1/4] arm-asm: Add vmla, vmls, vnmls, vnmla, vmul, vnmul, vadd, vsub, vdiv, vneg, vabs, vsqrt, vcmp, vcmpe, Danny Milosavljevic, 2021/01/23
- [Tinycc-devel] [PATCH 2/4] arm-asm: Add vmov, Danny Milosavljevic, 2021/01/23
- [Tinycc-devel] [PATCH 3/4] arm-asm: Mostly factor out VFP register reference parsing to parse_operand, Danny Milosavljevic, 2021/01/23
- [Tinycc-devel] [PATCH 4/4] arm-asm: Implement "vmov.f32 Sn, Rd", "vmov.f32 Rd, Sn", "vmov.f64 Dm, Rd, Rn", "vmov.f64 Rd, Rn, Dm",
Danny Milosavljevic <=