[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 19/27] target/riscv: support for 128-bit base multiplications
From: |
Frédéric Pétrot |
Subject: |
[PATCH v2 19/27] target/riscv: support for 128-bit base multiplications insns |
Date: |
Wed, 6 Oct 2021 23:28:25 +0200 |
We deal here with the multiplication part of the M extension as, although a
bit complex, the code is generated inline, as opposed to division and
remainder that resort to helpers (to come soon).
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
---
target/riscv/insn_trans/trans_rvm.c.inc | 183 ++++++++++++++++++++++--
1 file changed, 173 insertions(+), 10 deletions(-)
diff --git a/target/riscv/insn_trans/trans_rvm.c.inc
b/target/riscv/insn_trans/trans_rvm.c.inc
index 0c5f1ba548..d61c79450c 100644
--- a/target/riscv/insn_trans/trans_rvm.c.inc
+++ b/target/riscv/insn_trans/trans_rvm.c.inc
@@ -18,12 +18,106 @@
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
+static void gen_mulu2_i128(TCGv rll, TCGv rlh, TCGv rhl, TCGv rhh,
+ TCGv al, TCGv ah, TCGv bl, TCGv bh)
+{
+ TCGv tmpl = tcg_temp_new(),
+ tmph = tcg_temp_new(),
+ cnst_zero = tcg_constant_tl(0);
+
+ tcg_gen_mulu2_tl(rll, rlh, al, bl);
+
+ tcg_gen_mulu2_tl(tmpl, tmph, al, bh);
+ tcg_gen_add2_tl(rlh, rhl, rlh, cnst_zero, tmpl, tmph);
+ tcg_gen_mulu2_tl(tmpl, tmph, ah, bl);
+ tcg_gen_add2_tl(rlh, tmph, rlh, rhl, tmpl, tmph);
+ /* Overflow detection into rhh */
+ tcg_gen_setcond_tl(TCG_COND_LTU, rhh, tmph, rhl);
+
+ tcg_gen_mov_tl(rhl, tmph);
+
+ tcg_gen_mulu2_tl(tmpl, tmph, ah, bh);
+ tcg_gen_add2_tl(rhl, rhh, rhl, rhh, tmpl, tmph);
+
+ tcg_temp_free(tmpl);
+ tcg_temp_free(tmph);
+}
+
+static void gen_mul_i128(TCGv rll, TCGv rlh,
+ TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
+{
+ TCGv rhl = tcg_temp_new(),
+ rhh = tcg_temp_new();
+
+ gen_mulu2_i128(rll, rlh, rhl, rhh, rs1l, rs1h, rs2l, rs2h);
+
+ tcg_temp_free(rhl);
+ tcg_temp_free(rhh);
+}
static bool trans_mul(DisasContext *ctx, arg_mul *a)
{
REQUIRE_EXT(ctx, RVM);
return gen_arith(ctx, a, EXT_NONE,
- tcg_gen_mul_tl, tcg_gen_mul_tl, NULL);
+ tcg_gen_mul_tl, tcg_gen_mul_tl, gen_mul_i128);
+}
+
+static void gen_mulh_i128(TCGv rhl, TCGv rhh,
+ TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
+{
+ TCGv rll = tcg_temp_new(),
+ rlh = tcg_temp_new(),
+ rlln = tcg_temp_new(),
+ rlhn = tcg_temp_new(),
+ rhln = tcg_temp_new(),
+ rhhn = tcg_temp_new(),
+ sgnres = tcg_temp_new(),
+ tmp = tcg_temp_new(),
+ cnst_one = tcg_constant_tl(1),
+ cnst_zero = tcg_constant_tl(0);
+
+ /* Extract sign of result (=> sgn(a) xor sgn(b)) */
+ tcg_gen_setcondi_tl(TCG_COND_LT, sgnres, rs1h, 0);
+ tcg_gen_setcondi_tl(TCG_COND_LT, tmp, rs2h, 0);
+ tcg_gen_xor_tl(sgnres, sgnres, tmp);
+
+ /* Take absolute value of operands */
+ tcg_gen_sari_tl(rhl, rs1h, 63);
+ tcg_gen_add2_tl(rlln, rlhn, rs1l, rs1h, rhl, rhl);
+ tcg_gen_xor_tl(rlln, rlln, rhl);
+ tcg_gen_xor_tl(rlhn, rlhn, rhl);
+
+ tcg_gen_sari_tl(rhl, rs2h, 63);
+ tcg_gen_add2_tl(rhln, rhhn, rs2l, rs2h, rhl, rhl);
+ tcg_gen_xor_tl(rhln, rhln, rhl);
+ tcg_gen_xor_tl(rhhn, rhhn, rhl);
+
+ /* Unsigned multiplication */
+ gen_mulu2_i128(rll, rlh, rhl, rhh, rlln, rlhn, rhln, rhhn);
+
+ /* Negation of result (two's complement : ~res + 1) */
+ tcg_gen_not_tl(rlln, rll);
+ tcg_gen_not_tl(rlhn, rlh);
+ tcg_gen_not_tl(rhln, rhl);
+ tcg_gen_not_tl(rhhn, rhh);
+
+ tcg_gen_add2_tl(rlln, tmp, rlln, cnst_zero, cnst_one, cnst_zero);
+ tcg_gen_add2_tl(rlhn, tmp, rlhn, cnst_zero, tmp, cnst_zero);
+ tcg_gen_add2_tl(rhln, tmp, rhln, cnst_zero, tmp, cnst_zero);
+ tcg_gen_add2_tl(rhhn, tmp, rhhn, cnst_zero, tmp, cnst_zero);
+
+ /* Move conditionally result or -result depending on result sign */
+ tcg_gen_movcond_tl(TCG_COND_NE, rhl, sgnres, cnst_zero, rhln, rhl);
+ tcg_gen_movcond_tl(TCG_COND_NE, rhh, sgnres, cnst_zero, rhhn, rhh);
+
+ tcg_temp_free(rll);
+ tcg_temp_free(rlh);
+ tcg_temp_free(rlln);
+ tcg_temp_free(rlhn);
+ tcg_temp_free(rhln);
+ tcg_temp_free(rhhn);
+ tcg_temp_free(sgnres);
+ tcg_temp_free(tmp);
}
static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
@@ -38,7 +132,58 @@ static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
{
REQUIRE_EXT(ctx, RVM);
return gen_arith(ctx, a, EXT_NONE,
- gen_mulh, gen_mulh, NULL);
+ gen_mulh, gen_mulh, gen_mulh_i128);
+}
+
+static void gen_mulhsu_i128(TCGv rhl, TCGv rhh,
+ TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
+{
+ TCGv rll = tcg_temp_new(),
+ rlh = tcg_temp_new(),
+ rlln = tcg_temp_new(),
+ rlhn = tcg_temp_new(),
+ rhln = tcg_temp_new(),
+ rhhn = tcg_temp_new(),
+ sgnres = tcg_temp_new(),
+ tmp = tcg_temp_new(),
+ cnst_one = tcg_constant_tl(1),
+ cnst_zero = tcg_constant_tl(0);
+
+ /* Extract sign of result (=> sgn(a)) */
+ tcg_gen_setcondi_tl(TCG_COND_LT, sgnres, rs1h, 0);
+
+ /* Take absolute value of rs1 */
+ tcg_gen_sari_tl(rhl, rs1h, 63);
+ tcg_gen_add2_tl(rlln, rlhn, rs1l, rs1h, rhl, rhl);
+ tcg_gen_xor_tl(rlln, rlln, rhl);
+ tcg_gen_xor_tl(rlhn, rlhn, rhl);
+
+ /* Unsigned multiplication */
+ gen_mulu2_i128(rll, rlh, rhl, rhh, rlln, rlhn, rs2l, rs2h);
+
+ /* Negation of result (two's complement : ~res + 1) */
+ tcg_gen_not_tl(rlln, rll);
+ tcg_gen_not_tl(rlhn, rlh);
+ tcg_gen_not_tl(rhln, rhl);
+ tcg_gen_not_tl(rhhn, rhh);
+
+ tcg_gen_add2_tl(rlln, tmp, rlln, cnst_zero, cnst_one, cnst_zero);
+ tcg_gen_add2_tl(rlhn, tmp, rlhn, cnst_zero, tmp, cnst_zero);
+ tcg_gen_add2_tl(rhln, tmp, rhln, cnst_zero, tmp, cnst_zero);
+ tcg_gen_add2_tl(rhhn, tmp, rhhn, cnst_zero, tmp, cnst_zero);
+
+ /* Move conditionally result or -result depending on result sign */
+ tcg_gen_movcond_tl(TCG_COND_NE, rhl, sgnres, cnst_zero, rhln, rhl);
+ tcg_gen_movcond_tl(TCG_COND_NE, rhh, sgnres, cnst_zero, rhhn, rhh);
+
+ tcg_temp_free(rll);
+ tcg_temp_free(rlh);
+ tcg_temp_free(rlln);
+ tcg_temp_free(rlhn);
+ tcg_temp_free(rhln);
+ tcg_temp_free(rhhn);
+ tcg_temp_free(sgnres);
+ tcg_temp_free(tmp);
}
static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
@@ -60,7 +205,19 @@ static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
{
REQUIRE_EXT(ctx, RVM);
return gen_arith(ctx, a, EXT_NONE,
- gen_mulhsu, gen_mulhsu, NULL);
+ gen_mulhsu, gen_mulhsu, gen_mulhsu_i128);
+}
+
+static void gen_mulhu_i128(TCGv rhl, TCGv rhh,
+ TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
+{
+ TCGv rll = tcg_temp_new(),
+ rlh = tcg_temp_new();
+
+ gen_mulu2_i128(rll, rlh, rhl, rhh, rs1l, rs1h, rs2l, rs2h);
+
+ tcg_temp_free(rll);
+ tcg_temp_free(rlh);
}
static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
@@ -75,7 +232,7 @@ static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
{
REQUIRE_EXT(ctx, RVM);
return gen_arith(ctx, a, EXT_NONE,
- gen_mulhu, gen_mulhu, NULL);
+ gen_mulhu, gen_mulhu, gen_mulhu_i128);
}
static void gen_div(TCGv ret, TCGv source1, TCGv source2)
@@ -218,18 +375,24 @@ static bool trans_remu(DisasContext *ctx, arg_remu *a)
gen_remu, gen_remu, NULL);
}
+static void gen_mulw_i128(TCGv rdl, TCGv rdh,
+ TCGv rs1l, TCGv rs1h, TCGv rs2l, TCGv rs2h)
+{
+ tcg_gen_mul_tl(rdl, rs1l, rs2l);
+}
+
static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->w = true;
return gen_arith(ctx, a, EXT_NONE,
- tcg_gen_mul_tl, tcg_gen_mul_tl, NULL);
+ tcg_gen_mul_tl, tcg_gen_mul_tl, gen_mulw_i128);
}
static bool trans_divw(DisasContext *ctx, arg_divw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->w = true;
return gen_arith(ctx, a, EXT_SIGN,
@@ -238,7 +401,7 @@ static bool trans_divw(DisasContext *ctx, arg_divw *a)
static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->w = true;
return gen_arith(ctx, a, EXT_ZERO,
@@ -247,7 +410,7 @@ static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
static bool trans_remw(DisasContext *ctx, arg_remw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->w = true;
return gen_arith(ctx, a, EXT_SIGN,
@@ -256,7 +419,7 @@ static bool trans_remw(DisasContext *ctx, arg_remw *a)
static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
{
- REQUIRE_64BIT(ctx);
+ REQUIRE_64_OR_128BIT(ctx);
REQUIRE_EXT(ctx, RVM);
ctx->w = true;
return gen_arith(ctx, a, EXT_ZERO,
--
2.33.0
- [PATCH v2 02/27] Int128.h: addition of a few 128-bit operations, (continued)
- [PATCH v2 02/27] Int128.h: addition of a few 128-bit operations, Frédéric Pétrot, 2021/10/06
- [PATCH v2 09/27] target/riscv: setup everything so that riscv128-softmmu compiles, Frédéric Pétrot, 2021/10/06
- [PATCH v2 06/27] target/riscv: separation of bitwise logic and aritmetic helpers, Frédéric Pétrot, 2021/10/06
- [PATCH v2 07/27] target/riscv: refactoring calls to gen_arith, Frédéric Pétrot, 2021/10/06
- [PATCH v2 11/27] target/riscv: handling 128-bit part in logic/arith/shift gen helpers, Frédéric Pétrot, 2021/10/06
- [PATCH v2 12/27] target/riscv: moving some insns close to similar insns, Frédéric Pétrot, 2021/10/06
- [PATCH v2 13/27] target/riscv: rename a few gen function helpers, Frédéric Pétrot, 2021/10/06
- [PATCH v2 18/27] target/riscv: 128-bit double word integer shift instructions, Frédéric Pétrot, 2021/10/06
- [PATCH v2 15/27] target/riscv: 128-bit support for instructions using gen_shift, Frédéric Pétrot, 2021/10/06
- [PATCH v2 16/27] target/riscv: support for 128-bit loads and store, Frédéric Pétrot, 2021/10/06
- [PATCH v2 19/27] target/riscv: support for 128-bit base multiplications insns,
Frédéric Pétrot <=
- [PATCH v2 14/27] target/riscv: 128-bit support for instructions using gen_arith/gen_logic, Frédéric Pétrot, 2021/10/06
- [PATCH v2 17/27] target/riscv: 128-bit double word integer arithmetic instructions, Frédéric Pétrot, 2021/10/06
- [PATCH v2 24/27] target/riscv: modification of the trans_csrxx for 128-bit support, Frédéric Pétrot, 2021/10/06
- [PATCH v2 20/27] target/riscv: addition of the 'd' insns for 128-bit mult/div/rem, Frédéric Pétrot, 2021/10/06
- [PATCH v2 21/27] target/riscv: div and rem insns on 128-bit, Frédéric Pétrot, 2021/10/06
- [PATCH v2 22/27] target/riscv: adding high part of some csrs, Frédéric Pétrot, 2021/10/06
- [PATCH v2 25/27] target/riscv: actual functions to realize crs 128-bit insns, Frédéric Pétrot, 2021/10/06
- [PATCH v2 23/27] target/riscv: helper functions to wrap calls to 128-bit csr insns, Frédéric Pétrot, 2021/10/06
- [PATCH v2 26/27] target/riscv: adding 128-bit access functions for some csrs, Frédéric Pétrot, 2021/10/06
- [PATCH v2 27/27] target/riscv: support for 128-bit satp, Frédéric Pétrot, 2021/10/06