[PATCH v2 14/27] target/riscv: 128-bit support for instructions using ge

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 14/27] target/riscv: 128-bit support for instructions using ge

From:	Frédéric Pétrot
Subject:	[PATCH v2 14/27] target/riscv: 128-bit support for instructions using gen_arith/gen_logic
Date:	Wed, 6 Oct 2021 23:28:20 +0200

The RISCV ISA is such that the arithmetic and logic instructions have the
same opcode, irrelevant of the base register size.
So, for all arithmetic and logic insns existing for the 3 register sizes,
excluding shifts, this patch adds the support for the 128-bit computations.

Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
---
 target/riscv/insn_trans/trans_rvi.c.inc | 223 +++++++++++++++++++++---
 1 file changed, 198 insertions(+), 25 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
b/target/riscv/insn_trans/trans_rvi.c.inc
index ab85693364..814aa5f1b7 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -26,14 +26,17 @@ static bool trans_illegal(DisasContext *ctx, arg_empty *a)
 
 static bool trans_c64_illegal(DisasContext *ctx, arg_empty *a)
 {
-     REQUIRE_64BIT(ctx);
-     return trans_illegal(ctx, a);
+    REQUIRE_64_OR_128BIT(ctx);
+    return trans_illegal(ctx, a);
 }
 
 static bool trans_lui(DisasContext *ctx, arg_lui *a)
 {
     if (a->rd != 0) {
         tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm);
+        if (is_128bit(ctx)) {
+            tcg_gen_movi_tl(cpu_gprh_check(ctx, a->rd), -(a->imm < 0));
+        }
     }
     return true;
 }
@@ -41,7 +44,19 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a)
 static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
 {
     if (a->rd != 0) {
+        if (is_128bit(ctx)) {
+            /* TODO : when pc is 128 bits, use all its bits */
+            TCGv pc = tcg_constant_tl(ctx->base.pc_next),
+                 imm = tcg_constant_tl(a->imm),
+                 immh = tcg_constant_tl(-(a->imm < 0)),
+                 zero = tcg_constant_tl(0);
+            tcg_gen_add2_tl(cpu_gpr[a->rd], cpu_gprh_check(ctx, a->rd),
+                            pc, zero,
+                            imm, immh);
+            return true;
+        }
         tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm + ctx->base.pc_next);
+        return true;
     }
     return true;
 }
@@ -84,13 +99,121 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
     return true;
 }
 
+/*
+ * Comparison predicates using bitwise logic taken from Hacker's Delight, 2.12
+ * We are just interested in the sign bit, so rl is not used but for 
subtracting
+ */
+static bool gen_setcond_i128(TCGv rl, TCGv rh,
+                             TCGv al, TCGv ah,
+                             TCGv bl, TCGv bh,
+                             TCGCond cond)
+{
+    switch (cond) {
+    case TCG_COND_EQ:
+        tcg_gen_setcond_tl(TCG_COND_EQ, rl, al, bl);
+        tcg_gen_setcond_tl(TCG_COND_EQ, rh, ah, bh);
+        tcg_gen_and_tl(rl, rl, rh);
+        break;
+
+    case TCG_COND_NE:
+        tcg_gen_setcond_tl(TCG_COND_NE, rl, al, bl);
+        tcg_gen_setcond_tl(TCG_COND_NE, rh, ah, bh);
+        tcg_gen_or_tl(rl, rl, rh);
+        break;
+
+    case TCG_COND_LT:
+    {
+        TCGv tmp1 = tcg_temp_new(),
+             tmp2 = tcg_temp_new();
+
+        tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
+        tcg_gen_xor_tl(tmp1, rh, ah);
+        tcg_gen_xor_tl(tmp2, ah, bh);
+        tcg_gen_and_tl(tmp1, tmp1, tmp2);
+        tcg_gen_xor_tl(tmp1, rh, tmp1);
+        tcg_gen_shri_tl(rl, tmp1, 63);
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        break;
+    }
+
+    case TCG_COND_GE:
+        /* Invert result of TCG_COND_LT */
+        gen_setcond_i128(rl, rh, al, ah, bl, bh, TCG_COND_LT);
+        tcg_gen_xori_tl(rl, rl, 1);
+        break;
+
+    case TCG_COND_LTU:
+    {
+        TCGv tmp1 = tcg_temp_new(),
+             tmp2 = tcg_temp_new();
+
+        tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
+        tcg_gen_eqv_tl(tmp1, ah, bh);
+        tcg_gen_and_tl(tmp1, tmp1, rh);
+        tcg_gen_andc_tl(tmp2, bh, ah);
+        tcg_gen_or_tl(tmp1, tmp1, tmp2);
+        tcg_gen_shri_tl(rl, tmp1, 63);
+
+        tcg_temp_free(tmp1);
+        tcg_temp_free(tmp2);
+        break;
+    }
+
+    case TCG_COND_GEU:
+        /* Invert result of TCG_COND_LTU */
+        gen_setcond_i128(rl, rh, al, ah, bl, bh, TCG_COND_LTU);
+        tcg_gen_xori_tl(rl, rl, 1);
+        break;
+
+    default:
+        return false;
+    }
+    tcg_gen_movi_tl(rh, 0);
+    return true;
+}
+
 static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
 {
     TCGLabel *l = gen_new_label();
     TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
     TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
 
-    tcg_gen_brcond_tl(cond, src1, src2, l);
+    if (is_128bit(ctx)) {
+        TCGv src1h = get_gprh(ctx, a->rs1),
+             src2h = get_gprh(ctx, a->rs2),
+             tmpl = tcg_temp_new(),
+             tmph = tcg_temp_new();
+
+        /*
+         * Do not use gen_setcond_i128 for EQ and NE as these conditions are
+         * often met and can be more efficiently implemented.
+         */
+        if (cond == TCG_COND_EQ || cond == TCG_COND_NE) {
+            /*
+             * bnez and beqz being used quite often too, lets optimize them,
+             * although QEMU's tcg optimizer handles these cases nicely
+             */
+            if (a->rs2 == 0) {
+                tcg_gen_or_tl(tmpl, src1, src1h);
+                tcg_gen_brcondi_tl(cond, tmpl, 0, l);
+            } else {
+                tcg_gen_xor_tl(tmpl, src1, src2);
+                tcg_gen_xor_tl(tmph, src1h, src2h);
+                tcg_gen_or_tl(tmpl, tmpl, tmph);
+                tcg_gen_brcondi_tl(cond, tmpl, 0, l);
+            }
+        } else {
+            gen_setcond_i128(tmpl, tmph, src1, src1h, src2, src2h, cond);
+            tcg_gen_brcondi_tl(TCG_COND_NE, tmpl, 0, l);
+        }
+
+        tcg_temp_free(tmph);
+        tcg_temp_free(tmpl);
+    } else {
+        tcg_gen_brcond_tl(cond, src1, src2, l);
+    }
     gen_goto_tb(ctx, 1, ctx->pc_succ_insn);
 
     gen_set_label(l); /* branch taken */
@@ -235,10 +358,18 @@ static bool trans_sd(DisasContext *ctx, arg_sd *a)
     return gen_store(ctx, a, MO_TEQ);
 }
 
+static void gen_addi2_i128(TCGv retl, TCGv reth,
+                           TCGv srcl, TCGv srch, target_long imm)
+{
+    TCGv imml  = tcg_constant_tl(imm),
+         immh  = tcg_constant_tl(-(imm < 0));
+    tcg_gen_add2_tl(retl, reth, srcl, srch, imml, immh);
+}
+
 static bool trans_addi(DisasContext *ctx, arg_addi *a)
 {
     return gen_arith_imm_fn(ctx, a, EXT_NONE,
-                            tcg_gen_addi_tl, tcg_gen_addi_tl, NULL);
+                            tcg_gen_addi_tl, tcg_gen_addi_tl, gen_addi2_i128);
 }
 
 static void gen_slt_tl(TCGv ret, TCGv s1, TCGv s2)
@@ -246,21 +377,33 @@ static void gen_slt_tl(TCGv ret, TCGv s1, TCGv s2)
     tcg_gen_setcond_tl(TCG_COND_LT, ret, s1, s2);
 }
 
+static void gen_slt_i128(TCGv retl, TCGv reth,
+                         TCGv s1l, TCGv s1h, TCGv s2l, TCGv s2h)
+{
+    gen_setcond_i128(retl, reth, s1l, s1h, s2l, s2h, TCG_COND_LT);
+}
+
 static void gen_sltu_tl(TCGv ret, TCGv s1, TCGv s2)
 {
     tcg_gen_setcond_tl(TCG_COND_LTU, ret, s1, s2);
 }
 
+static void gen_sltu_i128(TCGv retl, TCGv reth,
+                         TCGv s1l, TCGv s1h, TCGv s2l, TCGv s2h)
+{
+    gen_setcond_i128(retl, reth, s1l, s1h, s2l, s2h, TCG_COND_LTU);
+}
+
 static bool trans_slti(DisasContext *ctx, arg_slti *a)
 {
     return gen_arith_imm_tl(ctx, a, EXT_SIGN,
-                            gen_slt_lt, gen_slt_lt, NULL);
+                            gen_slt_tl, gen_slt_tl, gen_slt_i128);
 }
 
 static bool trans_sltiu(DisasContext *ctx, arg_sltiu *a)
 {
     return gen_arith_imm_tl(ctx, a, EXT_SIGN,
-                            gen_sltu_lt, gen_sltu_lt, NULL);
+                            gen_sltu_tl, gen_sltu_tl, gen_sltu_i128);
 }
 
 static bool trans_xori(DisasContext *ctx, arg_xori *a)
@@ -299,13 +442,13 @@ static bool trans_srai(DisasContext *ctx, arg_srai *a)
 static bool trans_add(DisasContext *ctx, arg_add *a)
 {
     return gen_arith(ctx, a, EXT_NONE,
-                     tcg_gen_add_tl, tcg_gen_add_tl, NULL);
+                     tcg_gen_add_tl, tcg_gen_add_tl, tcg_gen_add2_tl);
 }
 
 static bool trans_sub(DisasContext *ctx, arg_sub *a)
 {
     return gen_arith(ctx, a, EXT_NONE,
-                     tcg_gen_sub_tl, tcg_gen_sub_tl, NULL);
+                     tcg_gen_sub_tl, tcg_gen_sub_tl, tcg_gen_sub2_tl);
 }
 
 static bool trans_sll(DisasContext *ctx, arg_sll *a)
@@ -316,22 +459,26 @@ static bool trans_sll(DisasContext *ctx, arg_sll *a)
 
 static bool trans_slt(DisasContext *ctx, arg_slt *a)
 {
-    return gen_arith(ctx, a, EXT_SIGN, gen_slt_lt, gen_slt_lt, NULL);
+    return gen_arith(ctx, a, EXT_SIGN,
+                     gen_slt_tl, gen_slt_tl, gen_slt_i128);
 }
 
 static bool trans_sltu(DisasContext *ctx, arg_sltu *a)
 {
-    return gen_arith(ctx, a, EXT_SIGN, gen_sltu_lt, gen_sltu_lt, NULL);
+    return gen_arith(ctx, a, EXT_SIGN,
+                     gen_sltu_tl, gen_sltu_tl, gen_sltu_i128);
 }
 
 static bool trans_srl(DisasContext *ctx, arg_srl *a)
 {
-    return gen_shift(ctx, a, EXT_ZERO, tcg_gen_shr_tl, tcg_gen_shr_tl, NULL);
+    return gen_shift(ctx, a, EXT_ZERO,
+                     tcg_gen_shr_tl, tcg_gen_shr_tl, NULL);
 }
 
 static bool trans_sra(DisasContext *ctx, arg_sra *a)
 {
-    return gen_shift(ctx, a, EXT_SIGN, tcg_gen_sar_tl, tcg_gen_sar_tl, NULL);
+    return gen_shift(ctx, a, EXT_SIGN,
+                     tcg_gen_sar_tl, tcg_gen_sar_tl, NULL);
 }
 
 static bool trans_xor(DisasContext *ctx, arg_xor *a)
@@ -349,18 +496,26 @@ static bool trans_and(DisasContext *ctx, arg_and *a)
     return gen_logic(ctx, a, EXT_NONE, tcg_gen_and_tl);
 }
 
+static void gen_addiw_i128(TCGv retl, TCGv reth,
+                           TCGv srcl, TCGv srch, target_long imm)
+{
+        tcg_gen_addi_tl(retl, srcl, imm);
+}
+
 static bool trans_addiw(DisasContext *ctx, arg_addiw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->w = true;
-    return gen_arith_imm_fn(ctx, a, EXT_NONE, NULL, tcg_gen_addi_tl, NULL);
+    return gen_arith_imm_fn(ctx, a, EXT_NONE,
+                            NULL, tcg_gen_addi_tl, gen_addiw_i128);
 }
 
 static bool trans_slliw(DisasContext *ctx, arg_slliw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->w = true;
-    return gen_shift_imm_fn(ctx, a, EXT_NONE, NULL, tcg_gen_shli_tl, NULL);
+    return gen_shift_imm_fn(ctx, a, EXT_NONE,
+                            NULL, tcg_gen_shli_tl, NULL);
 }
 
 static void gen_srliw_tl(TCGv dst, TCGv src, target_long shamt)
@@ -370,9 +525,10 @@ static void gen_srliw_tl(TCGv dst, TCGv src, target_long 
shamt)
 
 static bool trans_srliw(DisasContext *ctx, arg_srliw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->w = true;
-    return gen_shift_imm_fn(ctx, a, EXT_NONE, NULL, gen_srliw_tl, NULL);
+    return gen_shift_imm_fn(ctx, a, EXT_NONE,
+                            NULL, gen_srliw_tl, NULL);
 }
 
 static void gen_sraiw_tl(TCGv dst, TCGv src, target_long shamt)
@@ -382,23 +538,40 @@ static void gen_sraiw_tl(TCGv dst, TCGv src, target_long 
shamt)
 
 static bool trans_sraiw(DisasContext *ctx, arg_sraiw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->w = true;
-    return gen_shift_imm_fn(ctx, a, EXT_NONE, NULL, gen_sraiw_tl, NULL);
+    return gen_shift_imm_fn(ctx, a, EXT_NONE,
+                            NULL, gen_sraiw_tl, NULL);
+}
+
+static void gen_addw_i128(TCGv retl, TCGv reth,
+                          TCGv src1l, TCGv src1h,
+                          TCGv src2l, TCGv src2h)
+{
+    tcg_gen_add_tl(retl, src1l, src2l);
 }
 
 static bool trans_addw(DisasContext *ctx, arg_addw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->w = true;
-    return gen_arith(ctx, a, EXT_NONE, NULL, tcg_gen_add_tl, NULL);
+    return gen_arith(ctx, a, EXT_NONE,
+                     NULL, tcg_gen_add_tl, gen_addw_i128);
+}
+
+static void gen_subw_i128(TCGv retl, TCGv reth,
+                          TCGv src1l, TCGv src1h,
+                          TCGv src2l, TCGv src2h)
+{
+    tcg_gen_sub_tl(retl, src1l, src2l);
 }
 
 static bool trans_subw(DisasContext *ctx, arg_subw *a)
 {
-    REQUIRE_64BIT(ctx);
+    REQUIRE_64_OR_128BIT(ctx);
     ctx->w = true;
-    return gen_arith(ctx, a, EXT_NONE, NULL, tcg_gen_sub_tl, NULL);
+    return gen_arith(ctx, a, EXT_NONE,
+                     NULL, tcg_gen_sub_tl, gen_subw_i128);
 }
 
 static bool trans_sllw(DisasContext *ctx, arg_sllw *a)
@@ -406,7 +579,7 @@ static bool trans_sllw(DisasContext *ctx, arg_sllw *a)
     REQUIRE_64BIT(ctx);
     ctx->w = true;
     return gen_shift(ctx, a, EXT_NONE,
-                     tcg_gen_shl_tl, tcg_gen_shl_tl, NULL);
+                     NULL, tcg_gen_shl_tl, NULL);
 }
 
 static bool trans_srlw(DisasContext *ctx, arg_srlw *a)
-- 
2.33.0

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v2 09/27] target/riscv: setup everything so that riscv128-softmmu compiles, (continued)
- [PATCH v2 09/27] target/riscv: setup everything so that riscv128-softmmu compiles, Frédéric Pétrot, 2021/10/06
- [PATCH v2 06/27] target/riscv: separation of bitwise logic and aritmetic helpers, Frédéric Pétrot, 2021/10/06
- [PATCH v2 07/27] target/riscv: refactoring calls to gen_arith, Frédéric Pétrot, 2021/10/06
- [PATCH v2 11/27] target/riscv: handling 128-bit part in logic/arith/shift gen helpers, Frédéric Pétrot, 2021/10/06
- [PATCH v2 12/27] target/riscv: moving some insns close to similar insns, Frédéric Pétrot, 2021/10/06
- [PATCH v2 13/27] target/riscv: rename a few gen function helpers, Frédéric Pétrot, 2021/10/06
- [PATCH v2 18/27] target/riscv: 128-bit double word integer shift instructions, Frédéric Pétrot, 2021/10/06
- [PATCH v2 15/27] target/riscv: 128-bit support for instructions using gen_shift, Frédéric Pétrot, 2021/10/06
- [PATCH v2 16/27] target/riscv: support for 128-bit loads and store, Frédéric Pétrot, 2021/10/06
- [PATCH v2 19/27] target/riscv: support for 128-bit base multiplications insns, Frédéric Pétrot, 2021/10/06
- [PATCH v2 14/27] target/riscv: 128-bit support for instructions using gen_arith/gen_logic, Frédéric Pétrot <=
- [PATCH v2 17/27] target/riscv: 128-bit double word integer arithmetic instructions, Frédéric Pétrot, 2021/10/06
- [PATCH v2 24/27] target/riscv: modification of the trans_csrxx for 128-bit support, Frédéric Pétrot, 2021/10/06
- [PATCH v2 20/27] target/riscv: addition of the 'd' insns for 128-bit mult/div/rem, Frédéric Pétrot, 2021/10/06
- [PATCH v2 21/27] target/riscv: div and rem insns on 128-bit, Frédéric Pétrot, 2021/10/06
- [PATCH v2 22/27] target/riscv: adding high part of some csrs, Frédéric Pétrot, 2021/10/06
- [PATCH v2 25/27] target/riscv: actual functions to realize crs 128-bit insns, Frédéric Pétrot, 2021/10/06
- [PATCH v2 23/27] target/riscv: helper functions to wrap calls to 128-bit csr insns, Frédéric Pétrot, 2021/10/06
- [PATCH v2 26/27] target/riscv: adding 128-bit access functions for some csrs, Frédéric Pétrot, 2021/10/06
- [PATCH v2 27/27] target/riscv: support for 128-bit satp, Frédéric Pétrot, 2021/10/06
- Re: [PATCH v2 00/27] Adding partial support for 128-bit riscv target, Philippe Mathieu-Daudé, 2021/10/07

Prev by Date: [PATCH v2 19/27] target/riscv: support for 128-bit base multiplications insns
Next by Date: [PATCH v2 17/27] target/riscv: 128-bit double word integer arithmetic instructions
Previous by thread: [PATCH v2 19/27] target/riscv: support for 128-bit base multiplications insns
Next by thread: [PATCH v2 17/27] target/riscv: 128-bit double word integer arithmetic instructions
Index(es):
- Date
- Thread