[PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store

From:	Jiajie Chen
Subject:	[PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store
Date:	Sat, 2 Sep 2023 13:02:16 +0800

If LSX is available, use LSX instructions to implement 128-bit load &
store.

Signed-off-by: Jiajie Chen <c@jia.je>
---
 tcg/loongarch64/tcg-target-con-set.h |  2 ++
 tcg/loongarch64/tcg-target.c.inc     | 42 ++++++++++++++++++++++++++++
 tcg/loongarch64/tcg-target.h         |  2 +-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 914572d21b..77d62e38e7 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -18,6 +18,7 @@ C_O0_I1(r)
 C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
 C_O0_I2(w, r)
+C_O0_I3(r, r, r)
 C_O1_I1(r, r)
 C_O1_I1(w, r)
 C_O1_I1(w, w)
@@ -37,3 +38,4 @@ C_O1_I2(w, w, wM)
 C_O1_I2(w, w, wA)
 C_O1_I3(w, w, w, w)
 C_O1_I4(r, rZ, rJ, rZ, rZ)
+C_O2_I1(r, r, r)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 2b001598e2..9d999ef58c 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1081,6 +1081,31 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg 
data_reg, TCGReg addr_reg,
     }
 }
 
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg 
data_hi,
+                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
+{
+    TCGLabelQemuLdst *ldst;
+    HostAddress h;
+
+    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
+    if (is_ld) {
+        tcg_out_opc_vldx(s, TCG_VEC_TMP0, h.base, h.index);
+        tcg_out_opc_vpickve2gr_d(s, data_lo, TCG_VEC_TMP0, 0);
+        tcg_out_opc_vpickve2gr_d(s, data_hi, TCG_VEC_TMP0, 1);
+    } else {
+        tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_lo, 0);
+        tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_hi, 1);
+        tcg_out_opc_vstx(s, TCG_VEC_TMP0, h.base, h.index);
+    }
+
+    if (ldst) {
+        ldst->type = TCG_TYPE_I128;
+        ldst->datalo_reg = data_lo;
+        ldst->datahi_reg = data_hi;
+        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+    }
+}
+
 /*
  * Entry-points
  */
@@ -1145,6 +1170,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     TCGArg a0 = args[0];
     TCGArg a1 = args[1];
     TCGArg a2 = args[2];
+    TCGArg a3 = args[3];
     int c2 = const_args[2];
 
     switch (opc) {
@@ -1507,6 +1533,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_ld_a64_i64:
         tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
         break;
+    case INDEX_op_qemu_ld_a32_i128:
+    case INDEX_op_qemu_ld_a64_i128:
+        tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true);
+        break;
     case INDEX_op_qemu_st_a32_i32:
     case INDEX_op_qemu_st_a64_i32:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
@@ -1515,6 +1545,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_st_a64_i64:
         tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
         break;
+    case INDEX_op_qemu_st_a32_i128:
+    case INDEX_op_qemu_st_a64_i128:
+        tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false);
+        break;
 
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
@@ -1995,6 +2029,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
     case INDEX_op_qemu_st_a64_i64:
         return C_O0_I2(rZ, r);
 
+    case INDEX_op_qemu_ld_a32_i128:
+    case INDEX_op_qemu_ld_a64_i128:
+        return C_O2_I1(r, r, r);
+
+    case INDEX_op_qemu_st_a32_i128:
+    case INDEX_op_qemu_st_a64_i128:
+        return C_O0_I3(r, r, r);
+
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
         return C_O0_I2(rZ, rZ);
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 67b0a95532..03017672f6 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -171,7 +171,7 @@ extern bool use_lsx_instructions;
 #define TCG_TARGET_HAS_muluh_i64        1
 #define TCG_TARGET_HAS_mulsh_i64        1
 
-#define TCG_TARGET_HAS_qemu_ldst_i128   0
+#define TCG_TARGET_HAS_qemu_ldst_i128   use_lsx_instructions
 
 #define TCG_TARGET_HAS_v64              0
 #define TCG_TARGET_HAS_v128             use_lsx_instructions
-- 
2.42.0

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v3 07/16] tcg/loongarch64: Lower neg_vec to vneg, (continued)
- [PATCH v3 07/16] tcg/loongarch64: Lower neg_vec to vneg, Jiajie Chen, 2023/09/02
- [PATCH v3 06/16] tcg/loongarch64: Lower vector bitwise operations, Jiajie Chen, 2023/09/02
- [PATCH v3 08/16] tcg/loongarch64: Lower mul_vec to vmul, Jiajie Chen, 2023/09/02
- [PATCH v3 10/16] tcg/loongarch64: Lower vector saturated ops, Jiajie Chen, 2023/09/02
- [PATCH v3 09/16] tcg/loongarch64: Lower vector min max ops, Jiajie Chen, 2023/09/02
- [PATCH v3 11/16] tcg/loongarch64: Lower vector shift vector ops, Jiajie Chen, 2023/09/02
- [PATCH v3 12/16] tcg/loongarch64: Lower bitsel_vec to vbitsel, Jiajie Chen, 2023/09/02
- [PATCH v3 13/16] tcg/loongarch64: Lower vector shift integer ops, Jiajie Chen, 2023/09/02
- [PATCH v3 14/16] tcg/loongarch64: Lower rotv_vec ops to LSX, Jiajie Chen, 2023/09/02
- [PATCH v3 15/16] tcg/loongarch64: Lower rotli_vec to vrotri, Jiajie Chen, 2023/09/02
- [PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store, Jiajie Chen <=
  - Re: [PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store, Richard Henderson, 2023/09/02
    - Re: [PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store, Jiajie Chen, 2023/09/02
    - Re: [PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store, gaosong, 2023/09/03
    - Re: [PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store, bibo mao, 2023/09/04

Prev by Date: [PATCH v3 15/16] tcg/loongarch64: Lower rotli_vec to vrotri
Next by Date: [PATCH v2] qdict: Preserve order for iterating qdict elements
Previous by thread: [PATCH v3 15/16] tcg/loongarch64: Lower rotli_vec to vrotri
Next by thread: Re: [PATCH v3 16/16] tcg/loongarch64: Implement 128-bit load & store
Index(es):
- Date
- Thread