[PULL 35/42] target/arm: Relax ordered/atomic alignment checks for LSE2

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PULL 35/42] target/arm: Relax ordered/atomic alignment checks for LSE2

From:	Peter Maydell
Subject:	[PULL 35/42] target/arm: Relax ordered/atomic alignment checks for LSE2
Date:	Tue, 6 Jun 2023 10:48:07 +0100

From: Richard Henderson <richard.henderson@linaro.org>

FEAT_LSE2 only requires that atomic operations not cross a
16-byte boundary.  Ordered operations may be completely
unaligned if SCTLR.nAA is set.

Because this alignment check is so special, do it by hand.
Make sure not to keep TCG temps live across the branch.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20230530191438.411344-17-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/tcg/helper-a64.h    |   3 +
 target/arm/tcg/helper-a64.c    |   7 ++
 target/arm/tcg/translate-a64.c | 120 ++++++++++++++++++++++++++-------
 3 files changed, 104 insertions(+), 26 deletions(-)

diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index ff56807247f..3d5957c11f4 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -110,3 +110,6 @@ DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, 
i64)
 DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
 DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
+
+DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG,
+                   noreturn, env, i64, i32, i32)
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index c3edf163be4..1c9370f07bd 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -952,3 +952,10 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
 
     memset(mem, 0, blocklen);
 }
+
+void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
+                              uint32_t access_type, uint32_t mmu_idx)
+{
+    arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type,
+                                mmu_idx, GETPC());
+}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 91d28f86620..adedebd1c22 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -307,6 +307,89 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, 
bool is_write,
     return clean_data_tbi(s, addr);
 }
 
+/*
+ * Generate the special alignment check that applies to AccType_ATOMIC
+ * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
+ * naturally aligned, but it must not cross a 16-byte boundary.
+ * See AArch64.CheckAlignment().
+ */
+static void check_lse2_align(DisasContext *s, int rn, int imm,
+                             bool is_write, MemOp mop)
+{
+    TCGv_i32 tmp;
+    TCGv_i64 addr;
+    TCGLabel *over_label;
+    MMUAccessType type;
+    int mmu_idx;
+
+    tmp = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
+    tcg_gen_addi_i32(tmp, tmp, imm & 15);
+    tcg_gen_andi_i32(tmp, tmp, 15);
+    tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
+
+    over_label = gen_new_label();
+    tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
+
+    addr = tcg_temp_new_i64();
+    tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
+
+    type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
+    mmu_idx = get_mem_index(s);
+    gen_helper_unaligned_access(cpu_env, addr, tcg_constant_i32(type),
+                                tcg_constant_i32(mmu_idx));
+
+    gen_set_label(over_label);
+
+}
+
+/* Handle the alignment check for AccType_ATOMIC instructions. */
+static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
+{
+    MemOp size = mop & MO_SIZE;
+
+    if (size == MO_8) {
+        return mop;
+    }
+
+    /*
+     * If size == MO_128, this is a LDXP, and the operation is single-copy
+     * atomic for each doubleword, not the entire quadword; it still must
+     * be quadword aligned.
+     */
+    if (size == MO_128) {
+        return finalize_memop_atom(s, MO_128 | MO_ALIGN,
+                                   MO_ATOM_IFALIGN_PAIR);
+    }
+    if (dc_isar_feature(aa64_lse2, s)) {
+        check_lse2_align(s, rn, 0, true, mop);
+    } else {
+        mop |= MO_ALIGN;
+    }
+    return finalize_memop(s, mop);
+}
+
+/* Handle the alignment check for AccType_ORDERED instructions. */
+static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
+                                 bool is_write, MemOp mop)
+{
+    MemOp size = mop & MO_SIZE;
+
+    if (size == MO_8) {
+        return mop;
+    }
+    if (size == MO_128) {
+        return finalize_memop_atom(s, MO_128 | MO_ALIGN,
+                                   MO_ATOM_IFALIGN_PAIR);
+    }
+    if (!dc_isar_feature(aa64_lse2, s)) {
+        mop |= MO_ALIGN;
+    } else if (!s->naa) {
+        check_lse2_align(s, rn, imm, is_write, mop);
+    }
+    return finalize_memop(s, mop);
+}
+
 typedef struct DisasCompare64 {
     TCGCond cond;
     TCGv_i64 value;
@@ -2372,21 +2455,7 @@ static void gen_load_exclusive(DisasContext *s, int rt, 
int rt2, int rn,
 {
     int idx = get_mem_index(s);
     TCGv_i64 dirty_addr, clean_addr;
-    MemOp memop;
-
-    /*
-     * For pairs:
-     * if size == 2, the operation is single-copy atomic for the doubleword.
-     * if size == 3, the operation is single-copy atomic for *each* doubleword,
-     * not the entire quadword, however it must be quadword aligned.
-     */
-    memop = size + is_pair;
-    if (memop == MO_128) {
-        memop = finalize_memop_atom(s, MO_128 | MO_ALIGN,
-                                    MO_ATOM_IFALIGN_PAIR);
-    } else {
-        memop = finalize_memop(s, memop | MO_ALIGN);
-    }
+    MemOp memop = check_atomic_align(s, rn, size + is_pair);
 
     s->is_ldex = true;
     dirty_addr = cpu_reg_sp(s, rn);
@@ -2524,7 +2593,7 @@ static void gen_compare_and_swap(DisasContext *s, int rs, 
int rt,
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
-    memop = finalize_memop(s, size | MO_ALIGN);
+    memop = check_atomic_align(s, rn, size);
     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
                                memidx, memop);
@@ -2546,7 +2615,7 @@ static void gen_compare_and_swap_pair(DisasContext *s, 
int rs, int rt,
     }
 
     /* This is a single atomic access, despite the "pair". */
-    memop = finalize_memop(s, (size + 1) | MO_ALIGN);
+    memop = check_atomic_align(s, rn, size + 1);
     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
 
     if (size == 2) {
@@ -2666,8 +2735,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t 
insn)
             gen_check_sp_alignment(s);
         }
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
-        /* TODO: ARMv8.4-LSE SCTLR.nAA */
-        memop = finalize_memop(s, size | MO_ALIGN);
+        memop = check_ordered_align(s, rn, 0, true, size);
         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
                                     true, rn != 31, memop);
         do_gpr_st(s, cpu_reg(s, rt), clean_addr, memop, true, rt,
@@ -2685,8 +2753,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t 
insn)
         if (rn == 31) {
             gen_check_sp_alignment(s);
         }
-        /* TODO: ARMv8.4-LSE SCTLR.nAA */
-        memop = finalize_memop(s, size | MO_ALIGN);
+        memop = check_ordered_align(s, rn, 0, false, size);
         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
                                     false, rn != 31, memop);
         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, memop, false, true,
@@ -3367,7 +3434,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t 
insn,
     bool a = extract32(insn, 23, 1);
     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
     AtomicThreeOpFn *fn = NULL;
-    MemOp mop = finalize_memop(s, size | MO_ALIGN);
+    MemOp mop = size;
 
     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
         unallocated_encoding(s);
@@ -3418,6 +3485,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t 
insn,
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
+
+    mop = check_atomic_align(s, rn, mop);
     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, mop);
 
     if (o3_opc == 014) {
@@ -3542,16 +3611,13 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, 
uint32_t insn)
     bool is_store = false;
     bool extend = false;
     bool iss_sf;
-    MemOp mop;
+    MemOp mop = size;
 
     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
         unallocated_encoding(s);
         return;
     }
 
-    /* TODO: ARMv8.4-LSE SCTLR.nAA */
-    mop = finalize_memop(s, size | MO_ALIGN);
-
     switch (opc) {
     case 0: /* STLURB */
         is_store = true;
@@ -3583,6 +3649,8 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, 
uint32_t insn)
         gen_check_sp_alignment(s);
     }
 
+    mop = check_ordered_align(s, rn, offset, is_store, mop);
+
     dirty_addr = read_cpu_reg_sp(s, rn, 1);
     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
     clean_addr = clean_data_tbi(s, dirty_addr);
-- 
2.34.1

[Prev in Thread]

Current Thread

[Next in Thread]

[PULL 08/42] tests/qtest: Introduce tests for Xilinx VERSAL CANFD controller, (continued)
- [PULL 08/42] tests/qtest: Introduce tests for Xilinx VERSAL CANFD controller, Peter Maydell, 2023/06/06
- [PULL 13/42] hw/misc: Rename axp209 to axp22x and add support AXP221 PMU, Peter Maydell, 2023/06/06
- [PULL 17/42] hw: arm: allwinner-sramc: Add SRAM Controller support for R40, Peter Maydell, 2023/06/06
- [PULL 24/42] target/arm: Use tcg_gen_qemu_{st, ld}_i128 for do_fp_{st, ld}, Peter Maydell, 2023/06/06
- [PULL 32/42] target/arm: Pass single_memop to gen_mte_checkN, Peter Maydell, 2023/06/06
- [PULL 07/42] MAINTAINERS: Include canfd tests under Xilinx CAN, Peter Maydell, 2023/06/06
- [PULL 16/42] hw: arm: allwinner-r40: Add emac and gmac support, Peter Maydell, 2023/06/06
- [PULL 22/42] target/arm: Introduce finalize_memop_{atom,pair}, Peter Maydell, 2023/06/06
- [PULL 31/42] target/arm: Pass memop to gen_mte_check1*, Peter Maydell, 2023/06/06
- [PULL 34/42] target/arm: Add SCTLR.nAA to TBFLAG_A64, Peter Maydell, 2023/06/06
- [PULL 35/42] target/arm: Relax ordered/atomic alignment checks for LSE2, Peter Maydell <=
- [PULL 39/42] target/arm: Enable FEAT_LSE2 for -cpu max, Peter Maydell, 2023/06/06
- [PULL 42/42] target/arm: trap DCC access in user mode emulation, Peter Maydell, 2023/06/06
- [PULL 33/42] target/arm: Check alignment in helper_mte_check, Peter Maydell, 2023/06/06
- [PULL 10/42] hw/arm/allwinner-r40: add Clock Control Unit, Peter Maydell, 2023/06/06
- [PULL 11/42] hw: allwinner-r40: Complete uart devices, Peter Maydell, 2023/06/06
- [PULL 05/42] hw/net/can: Introduce Xilinx Versal CANFD controller, Peter Maydell, 2023/06/06
- [PULL 21/42] target/arm: Add feature test for FEAT_LSE2, Peter Maydell, 2023/06/06
- [PULL 25/42] target/arm: Use tcg_gen_qemu_st_i128 for STZG, STZ2G, Peter Maydell, 2023/06/06
- [PULL 26/42] target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r, Peter Maydell, 2023/06/06
  - Re: [PULL 26/42] target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r, Jonathan Cameron, 2023/06/12

Prev by Date: [PULL 34/42] target/arm: Add SCTLR.nAA to TBFLAG_A64
Next by Date: [PULL 39/42] target/arm: Enable FEAT_LSE2 for -cpu max
Previous by thread: [PULL 34/42] target/arm: Add SCTLR.nAA to TBFLAG_A64
Next by thread: [PULL 39/42] target/arm: Enable FEAT_LSE2 for -cpu max
Index(es):
- Date
- Thread