qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v1 42/46] target/loongarch: Implement xvreplve xvinsve0 xvpickve


From: Song Gao
Subject: [PATCH v1 42/46] target/loongarch: Implement xvreplve xvinsve0 xvpickve xvb{sll/srl}v
Date: Tue, 20 Jun 2023 17:38:10 +0800

This patch includes:
- XVREPLVE.{B/H/W/D};
- XVREPL128VEI.{B/H/W/D};
- XVREPLVE0.{B/H/W/D/Q};
- XVINSVE0.{W/D};
- XVPICKVE.{W/D};
- XVBSLL.V, XVBSRL.V.

Signed-off-by: Song Gao <gaosong@loongson.cn>
---
 target/loongarch/disas.c                     |  29 +++
 target/loongarch/helper.h                    |   5 +
 target/loongarch/insn_trans/trans_lasx.c.inc | 205 +++++++++++++++++++
 target/loongarch/insns.decode                |  29 +++
 target/loongarch/lasx_helper.c               |  29 +++
 5 files changed, 297 insertions(+)

diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
index fd7d459921..3b89a5df87 100644
--- a/target/loongarch/disas.c
+++ b/target/loongarch/disas.c
@@ -1748,6 +1748,11 @@ static void output_rx_i(DisasContext *ctx, arg_rx_i *a, 
const char *mnemonic)
     output(ctx, mnemonic, "r%d, x%d, 0x%x", a->rd, a->xj, a->imm);
 }
 
+static void output_xxr(DisasContext *ctx, arg_xxr *a, const char *mnemonic)
+{
+    output(ctx, mnemonic, "x%d, x%d, r%d", a->xd, a->xj, a->rk);
+}
+
 INSN_LASX(xvadd_b,           xxx)
 INSN_LASX(xvadd_h,           xxx)
 INSN_LASX(xvadd_w,           xxx)
@@ -2518,3 +2523,27 @@ INSN_LASX(xvreplgr2vr_b,     xr)
 INSN_LASX(xvreplgr2vr_h,     xr)
 INSN_LASX(xvreplgr2vr_w,     xr)
 INSN_LASX(xvreplgr2vr_d,     xr)
+
+INSN_LASX(xvreplve_b,        xxr)
+INSN_LASX(xvreplve_h,        xxr)
+INSN_LASX(xvreplve_w,        xxr)
+INSN_LASX(xvreplve_d,        xxr)
+INSN_LASX(xvrepl128vei_b,    xx_i)
+INSN_LASX(xvrepl128vei_h,    xx_i)
+INSN_LASX(xvrepl128vei_w,    xx_i)
+INSN_LASX(xvrepl128vei_d,    xx_i)
+
+INSN_LASX(xvreplve0_b,       xx)
+INSN_LASX(xvreplve0_h,       xx)
+INSN_LASX(xvreplve0_w,       xx)
+INSN_LASX(xvreplve0_d,       xx)
+INSN_LASX(xvreplve0_q,       xx)
+
+INSN_LASX(xvinsve0_w,        xx_i)
+INSN_LASX(xvinsve0_d,        xx_i)
+
+INSN_LASX(xvpickve_w,        xx_i)
+INSN_LASX(xvpickve_d,        xx_i)
+
+INSN_LASX(xvbsll_v,          xx_i)
+INSN_LASX(xvbsrl_v,          xx_i)
diff --git a/target/loongarch/helper.h b/target/loongarch/helper.h
index f6d64bfde5..6c4525a413 100644
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@@ -1232,3 +1232,8 @@ DEF_HELPER_3(xvsetallnez_b, void, env, i32, i32)
 DEF_HELPER_3(xvsetallnez_h, void, env, i32, i32)
 DEF_HELPER_3(xvsetallnez_w, void, env, i32, i32)
 DEF_HELPER_3(xvsetallnez_d, void, env, i32, i32)
+
+DEF_HELPER_4(xvinsve0_w, void, env, i32, i32, i32)
+DEF_HELPER_4(xvinsve0_d, void, env, i32, i32, i32)
+DEF_HELPER_4(xvpickve_w, void, env, i32, i32, i32)
+DEF_HELPER_4(xvpickve_d, void, env, i32, i32, i32)
diff --git a/target/loongarch/insn_trans/trans_lasx.c.inc 
b/target/loongarch/insn_trans/trans_lasx.c.inc
index 0fc26023d1..e63b1c67c9 100644
--- a/target/loongarch/insn_trans/trans_lasx.c.inc
+++ b/target/loongarch/insn_trans/trans_lasx.c.inc
@@ -2851,3 +2851,208 @@ TRANS(xvreplgr2vr_b, gvec_dupx, MO_8)
 TRANS(xvreplgr2vr_h, gvec_dupx, MO_16)
 TRANS(xvreplgr2vr_w, gvec_dupx, MO_32)
 TRANS(xvreplgr2vr_d, gvec_dupx, MO_64)
+
+static bool gen_xvreplve(DisasContext *ctx, arg_xxr *a, int vece, int bit,
+                         void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_ptr t1 = tcg_temp_new_ptr();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    CHECK_ASXE;
+
+    tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
+    tcg_gen_shli_i64(t0, t0, vece);
+    if (HOST_BIG_ENDIAN) {
+        tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1));
+    }
+
+    tcg_gen_trunc_i64_ptr(t1, t0);
+    tcg_gen_add_ptr(t1, t1, cpu_env);
+    func(t2, t1, vec_full_offset(a->xj));
+    tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->xd), 16, 16, t2);
+    func(t2, t1,  offsetof(CPULoongArchState, fpr[a->xj].xreg.XQ(1)));
+    tcg_gen_gvec_dup_i64(vece,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XQ(1)),
+                         16, 16, t2);
+    return true;
+}
+
+TRANS(xvreplve_b, gen_xvreplve, MO_8,  8, tcg_gen_ld8u_i64)
+TRANS(xvreplve_h, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
+TRANS(xvreplve_w, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
+TRANS(xvreplve_d, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
+
+static bool trans_xvrepl128vei_b(DisasContext *ctx, arg_xx_i * a)
+{
+    CHECK_ASXE;
+
+    tcg_gen_gvec_dup_mem(MO_8,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XB(0)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->xj].xreg.XB((a->imm))),
+                         16, 16);
+    tcg_gen_gvec_dup_mem(MO_8,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XB(16)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->xj].xreg.XB((a->imm + 16))),
+                         16, 16);
+    return true;
+}
+
+static bool trans_xvrepl128vei_h(DisasContext *ctx, arg_xx_i *a)
+{
+    CHECK_ASXE;
+
+    tcg_gen_gvec_dup_mem(MO_16,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XH(0)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->xj].xreg.XH((a->imm))),
+                         16, 16);
+    tcg_gen_gvec_dup_mem(MO_16,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XH(8)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->xj].xreg.XH((a->imm + 8))),
+                         16, 16);
+    return true;
+}
+
+static bool trans_xvrepl128vei_w(DisasContext *ctx, arg_xx_i *a)
+{
+    CHECK_ASXE;
+
+    tcg_gen_gvec_dup_mem(MO_32,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XW(0)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->xj].xreg.XW((a->imm))),
+                         16, 16);
+    tcg_gen_gvec_dup_mem(MO_32,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XW(4)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->xj].xreg.XW((a->imm + 4))),
+                         16, 16);
+    return true;
+}
+
+static bool trans_xvrepl128vei_d(DisasContext *ctx, arg_xx_i *a)
+{
+    CHECK_ASXE;
+
+    tcg_gen_gvec_dup_mem(MO_64,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XD(0)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->xj].xreg.XD((a->imm))),
+                         16, 16);
+    tcg_gen_gvec_dup_mem(MO_64,
+                         offsetof(CPULoongArchState, fpr[a->xd].xreg.XD(2)),
+                         offsetof(CPULoongArchState,
+                                  fpr[a->xj].xreg.XD((a->imm + 2))),
+                         16, 16);
+    return true;
+}
+
+#define XVREPLVE0(NAME, MOP)                                                  \
+static bool trans_## NAME(DisasContext *ctx, arg_xx * a)                      \
+{                                                                             \
+    CHECK_ASXE;                                                               \
+                                                                              \
+    tcg_gen_gvec_dup_mem(MOP, vec_full_offset(a->xd), vec_full_offset(a->xj), \
+                         32, 32);                                             \
+    return true;                                                              \
+}
+
+XVREPLVE0(xvreplve0_b, MO_8)
+XVREPLVE0(xvreplve0_h, MO_16)
+XVREPLVE0(xvreplve0_w, MO_32)
+XVREPLVE0(xvreplve0_d, MO_64)
+XVREPLVE0(xvreplve0_q, MO_128)
+
+TRANS(xvinsve0_w, gen_xx_i, gen_helper_xvinsve0_w)
+TRANS(xvinsve0_d, gen_xx_i, gen_helper_xvinsve0_d)
+
+TRANS(xvpickve_w, gen_xx_i, gen_helper_xvpickve_w)
+TRANS(xvpickve_d, gen_xx_i, gen_helper_xvpickve_d)
+
+static bool trans_xvbsll_v(DisasContext *ctx, arg_xx_i *a)
+{
+    int ofs;
+    TCGv_i64 desthigh[2], destlow[2], high[2], low[2];
+
+    CHECK_ASXE;
+
+    desthigh[0] = tcg_temp_new_i64();
+    desthigh[1] = tcg_temp_new_i64();
+    destlow[0] = tcg_temp_new_i64();
+    destlow[1] = tcg_temp_new_i64();
+    high[0] = tcg_temp_new_i64();
+    high[1] = tcg_temp_new_i64();
+    low[0] = tcg_temp_new_i64();
+    low[1] = tcg_temp_new_i64();
+
+    get_xreg64(low[0], a->xj, 0);
+    get_xreg64(low[1], a->xj, 2);
+
+    ofs = ((a->imm) & 0xf) * 8;
+    if (ofs < 64) {
+        get_xreg64(high[0], a->xj, 1);
+        get_xreg64(high[1], a->xj, 3);
+        tcg_gen_extract2_i64(desthigh[0], low[0], high[0], 64 - ofs);
+        tcg_gen_extract2_i64(desthigh[1], low[1], high[1], 64 - ofs);
+        tcg_gen_shli_i64(destlow[0], low[0], ofs);
+        tcg_gen_shli_i64(destlow[1], low[1], ofs);
+    } else {
+        tcg_gen_shli_i64(desthigh[0], low[0], ofs - 64);
+        tcg_gen_shli_i64(desthigh[1], low[1], ofs - 64);
+        destlow[0] = tcg_constant_i64(0);
+        destlow[1] = tcg_constant_i64(0);
+    }
+
+    set_xreg64(desthigh[0], a->xd, 1);
+    set_xreg64(destlow[0], a->xd, 0);
+    set_xreg64(desthigh[1], a->xd, 3);
+    set_xreg64(destlow[1], a->xd, 2);
+
+    return true;
+}
+
+static bool trans_xvbsrl_v(DisasContext *ctx, arg_xx_i *a)
+{
+    TCGv_i64 desthigh[2], destlow[2], high[2], low[2];
+    int ofs;
+
+    CHECK_ASXE;
+
+    desthigh[0] = tcg_temp_new_i64();
+    desthigh[1] = tcg_temp_new_i64();
+    destlow[0] = tcg_temp_new_i64();
+    destlow[1] = tcg_temp_new_i64();
+    high[0] = tcg_temp_new_i64();
+    high[1] = tcg_temp_new_i64();
+    low[0] = tcg_temp_new_i64();
+    low[1] = tcg_temp_new_i64();
+
+    get_xreg64(high[0], a->xj, 1);
+    get_xreg64(high[1], a->xj, 3);
+
+    ofs = ((a->imm) & 0xf) * 8;
+    if (ofs < 64) {
+        get_xreg64(low[0], a->xj, 0);
+        get_xreg64(low[1], a->xj, 2);
+        tcg_gen_extract2_i64(destlow[0], low[0], high[0], ofs);
+        tcg_gen_extract2_i64(destlow[1], low[1], high[1], ofs);
+        tcg_gen_shri_i64(desthigh[0], high[0], ofs);
+        tcg_gen_shri_i64(desthigh[1], high[1], ofs);
+    } else {
+        tcg_gen_shri_i64(destlow[0], high[0], ofs - 64);
+        tcg_gen_shri_i64(destlow[1], high[1], ofs - 64);
+        desthigh[0] = tcg_constant_i64(0);
+        desthigh[1] = tcg_constant_i64(0);
+    }
+
+    set_xreg64(desthigh[0], a->xd, 1);
+    set_xreg64(destlow[0], a->xd, 0);
+    set_xreg64(desthigh[1], a->xd, 3);
+    set_xreg64(destlow[1], a->xd, 2);
+
+    return true;
+}
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
index 8c87b3f840..697087e6ef 100644
--- a/target/loongarch/insns.decode
+++ b/target/loongarch/insns.decode
@@ -1311,6 +1311,7 @@ vstelm_b         0011 000110 .... ........ ..... .....    
@vr_i8i4
 &cx           cd xj
 &xr_i         xd rj imm
 &rx_i         rd xj imm
+&xxr          xd xj rk
 
 #
 # LASX Formats
@@ -1321,6 +1322,8 @@ vstelm_b         0011 000110 .... ........ ..... .....    
@vr_i8i4
 @xxx                .... ........ ..... xk:5 xj:5 xd:5    &xxx
 @xr                .... ........ ..... ..... rj:5 xd:5    &xr
 @xx_i5            .... ........ ..... imm:s5 xj:5 xd:5    &xx_i
+@xx_ui1       .... ........ ..... .... imm:1 xj:5 xd:5    &xx_i
+@xx_ui2        .... ........ ..... ... imm:2 xj:5 xd:5    &xx_i
 @xx_ui3         .... ........ ..... .. imm:3 xj:5 xd:5    &xx_i
 @xx_ui4          .... ........ ..... . imm:4 xj:5 xd:5    &xx_i
 @xx_ui5            .... ........ ..... imm:5 xj:5 xd:5    &xx_i
@@ -1334,6 +1337,7 @@ vstelm_b         0011 000110 .... ........ ..... .....    
@vr_i8i4
 @xr_ui2        .... ........ ..... ... imm:2 rj:5 xd:5    &xr_i
 @rx_ui3         .... ........ ..... .. imm:3 xj:5 rd:5    &rx_i
 @rx_ui2        .... ........ ..... ... imm:2 xj:5 rd:5    &rx_i
+@xxr                .... ........ ..... rk:5 xj:5 xd:5    &xxr
 
 xvadd_b          0111 01000000 10100 ..... ..... .....    @xxx
 xvadd_h          0111 01000000 10101 ..... ..... .....    @xxx
@@ -2022,3 +2026,28 @@ xvreplgr2vr_b    0111 01101001 11110 00000 ..... .....   
 @xr
 xvreplgr2vr_h    0111 01101001 11110 00001 ..... .....    @xr
 xvreplgr2vr_w    0111 01101001 11110 00010 ..... .....    @xr
 xvreplgr2vr_d    0111 01101001 11110 00011 ..... .....    @xr
+
+xvreplve_b       0111 01010010 00100 ..... ..... .....    @xxr
+xvreplve_h       0111 01010010 00101 ..... ..... .....    @xxr
+xvreplve_w       0111 01010010 00110 ..... ..... .....    @xxr
+xvreplve_d       0111 01010010 00111 ..... ..... .....    @xxr
+
+xvrepl128vei_b   0111 01101111 01111 0 .... ..... .....   @xx_ui4
+xvrepl128vei_h   0111 01101111 01111 10 ... ..... .....   @xx_ui3
+xvrepl128vei_w   0111 01101111 01111 110 .. ..... .....   @xx_ui2
+xvrepl128vei_d   0111 01101111 01111 1110 . ..... .....   @xx_ui1
+
+xvreplve0_b      0111 01110000 01110 00000 ..... .....    @xx
+xvreplve0_h      0111 01110000 01111 00000 ..... .....    @xx
+xvreplve0_w      0111 01110000 01111 10000 ..... .....    @xx
+xvreplve0_d      0111 01110000 01111 11000 ..... .....    @xx
+xvreplve0_q      0111 01110000 01111 11100 ..... .....    @xx
+
+xvinsve0_w       0111 01101111 11111 10 ... ..... .....   @xx_ui3
+xvinsve0_d       0111 01101111 11111 110 .. ..... .....   @xx_ui2
+
+xvpickve_w       0111 01110000 00111 10 ... ..... .....   @xx_ui3
+xvpickve_d       0111 01110000 00111 110 .. ..... .....   @xx_ui2
+
+xvbsll_v         0111 01101000 11100 ..... ..... .....    @xx_ui5
+xvbsrl_v         0111 01101000 11101 ..... ..... .....    @xx_ui5
diff --git a/target/loongarch/lasx_helper.c b/target/loongarch/lasx_helper.c
index 56dfe10a0d..4422c1292e 100644
--- a/target/loongarch/lasx_helper.c
+++ b/target/loongarch/lasx_helper.c
@@ -2819,3 +2819,32 @@ XSETALLNEZ(xvsetallnez_b, MO_8)
 XSETALLNEZ(xvsetallnez_h, MO_16)
 XSETALLNEZ(xvsetallnez_w, MO_32)
 XSETALLNEZ(xvsetallnez_d, MO_64)
+
+#define XVINSVE0(NAME, E, MASK)                           \
+void HELPER(NAME)(CPULoongArchState *env,                 \
+                  uint32_t xd, uint32_t xj, uint32_t imm) \
+{                                                         \
+    XReg *Xd = &(env->fpr[xd].xreg);                      \
+    XReg *Xj = &(env->fpr[xj].xreg);                      \
+    Xd->E(imm & MASK) = Xj->E(0);                         \
+}
+
+XVINSVE0(xvinsve0_w, XW, 0x7)
+XVINSVE0(xvinsve0_d, XD, 0x3)
+
+#define XVPICKVE(NAME, E, BIT, MASK)                      \
+void HELPER(NAME)(CPULoongArchState *env,                 \
+                  uint32_t xd, uint32_t xj, uint32_t imm) \
+{                                                         \
+    int i;                                                \
+    XReg *Xd = &(env->fpr[xd].xreg);                      \
+    XReg *Xj = &(env->fpr[xj].xreg);                      \
+                                                          \
+    Xd->E(0) = Xj->E(imm & MASK);                         \
+    for (i = 1; i < LASX_LEN / BIT; i++) {                \
+        Xd->E(i) = 0;                                     \
+    }                                                     \
+}
+
+XVPICKVE(xvpickve_w, XW, 32, 0x7)
+XVPICKVE(xvpickve_d, XD, 64, 0x3)
-- 
2.39.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]