[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v6 10/61] target/riscv: vector single-width integer add and s
From: |
Alistair Francis |
Subject: |
Re: [PATCH v6 10/61] target/riscv: vector single-width integer add and subtract |
Date: |
Fri, 20 Mar 2020 11:31:37 -0700 |
On Tue, Mar 17, 2020 at 8:27 AM LIU Zhiwei <address@hidden> wrote:
>
> Signed-off-by: LIU Zhiwei <address@hidden>
Reviewed-by: Alistair Francis <address@hidden>
Alistair
> ---
> target/riscv/helper.h | 21 ++
> target/riscv/insn32.decode | 10 +
> target/riscv/insn_trans/trans_rvv.inc.c | 251 ++++++++++++++++++++++++
> target/riscv/vector_helper.c | 149 ++++++++++++++
> 4 files changed, 431 insertions(+)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index 70a4b05f75..e73701d4bb 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -269,3 +269,24 @@ DEF_HELPER_6(vamominw_v_w, void, ptr, ptr, tl, ptr,
> env, i32)
> DEF_HELPER_6(vamomaxw_v_w, void, ptr, ptr, tl, ptr, env, i32)
> DEF_HELPER_6(vamominuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
> DEF_HELPER_6(vamomaxuw_v_w, void, ptr, ptr, tl, ptr, env, i32)
> +
> +DEF_HELPER_6(vadd_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vadd_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vadd_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vadd_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vsub_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vsub_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vsub_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vsub_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vadd_vx_b, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vadd_vx_h, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vadd_vx_w, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vadd_vx_d, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vrsub_vx_b, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vrsub_vx_h, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vrsub_vx_w, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vrsub_vx_d, void, ptr, ptr, tl, ptr, env, i32)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 1330703720..d1034a0e61 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -44,6 +44,7 @@
> &u imm rd
> &shift shamt rs1 rd
> &atomic aq rl rs2 rs1 rd
> +&rmrr vm rd rs1 rs2
> &rwdvm vm wd rd rs1 rs2
> &r2nfvm vm rd rs1 nf
> &rnfvm vm rd rs1 rs2 nf
> @@ -68,6 +69,7 @@
> @r2 ....... ..... ..... ... ..... ....... %rs1 %rd
> @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
> @r_nfvm ... ... vm:1 ..... ..... ... ..... ....... &rnfvm %nf %rs2 %rs1 %rd
> +@r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd
> @r_wdvm ..... wd:1 vm:1 ..... ..... ... ..... ....... &rwdvm %rs2 %rs1 %rd
> @r2_zimm . zimm:11 ..... ... ..... ....... %rs1 %rd
>
> @@ -275,5 +277,13 @@ vamominuw_v 11000 . . ..... ..... 110 ..... 0101111
> @r_wdvm
> vamomaxuw_v 11100 . . ..... ..... 110 ..... 0101111 @r_wdvm
>
> # *** new major opcode OP-V ***
> +vadd_vv 000000 . ..... ..... 000 ..... 1010111 @r_vm
> +vadd_vx 000000 . ..... ..... 100 ..... 1010111 @r_vm
> +vadd_vi 000000 . ..... ..... 011 ..... 1010111 @r_vm
> +vsub_vv 000010 . ..... ..... 000 ..... 1010111 @r_vm
> +vsub_vx 000010 . ..... ..... 100 ..... 1010111 @r_vm
> +vrsub_vx 000011 . ..... ..... 100 ..... 1010111 @r_vm
> +vrsub_vi 000011 . ..... ..... 011 ..... 1010111 @r_vm
> +
> vsetvli 0 ........... ..... 111 ..... 1010111 @r2_zimm
> vsetvl 1000000 ..... ..... 111 ..... 1010111 @r
> diff --git a/target/riscv/insn_trans/trans_rvv.inc.c
> b/target/riscv/insn_trans/trans_rvv.inc.c
> index a8722ed9d2..c68f6ffe3b 100644
> --- a/target/riscv/insn_trans/trans_rvv.inc.c
> +++ b/target/riscv/insn_trans/trans_rvv.inc.c
> @@ -740,3 +740,254 @@ GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check)
> GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check)
> GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check)
> #endif
> +
> +/*
> + *** Vector Integer Arithmetic Instructions
> + */
> +#define MAXSZ(s) (s->vlen >> (3 - s->lmul))
> +
> +static bool opivv_check(DisasContext *s, arg_rmrr *a)
> +{
> + return (vext_check_isa_ill(s) &&
> + vext_check_overlap_mask(s, a->rd, a->vm, false) &&
> + vext_check_reg(s, a->rd, false) &&
> + vext_check_reg(s, a->rs2, false) &&
> + vext_check_reg(s, a->rs1, false));
> +}
> +
> +typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
> + uint32_t, uint32_t, uint32_t);
> +
> +static inline bool
> +do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
> + gen_helper_gvec_4_ptr *fn)
> +{
> + if (!opivv_check(s, a)) {
> + return false;
> + }
> +
> + if (a->vm && s->vl_eq_vlmax) {
> + gvec_fn(s->sew, vreg_ofs(s, a->rd),
> + vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
> + MAXSZ(s), MAXSZ(s));
> + } else {
> + uint32_t data = 0;
> +
> + data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
> + data = FIELD_DP32(data, VDATA, VM, a->vm);
> + data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> + tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
> + vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
> + cpu_env, 0, s->vlen / 8, data, fn);
> + }
> + return true;
> +}
> +
> +/* OPIVV with GVEC IR */
> +#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \
> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
> +{ \
> + static gen_helper_gvec_4_ptr * const fns[4] = { \
> + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
> + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
> + }; \
> + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
> +}
> +
> +GEN_OPIVV_GVEC_TRANS(vadd_vv, add)
> +GEN_OPIVV_GVEC_TRANS(vsub_vv, sub)
> +
> +typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr,
> + TCGv_env, TCGv_i32);
> +
> +static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
> + gen_helper_opivx *fn, DisasContext *s)
> +{
> + TCGv_ptr dest, src2, mask;
> + TCGv src1;
> + TCGv_i32 desc;
> + uint32_t data = 0;
> +
> + dest = tcg_temp_new_ptr();
> + mask = tcg_temp_new_ptr();
> + src2 = tcg_temp_new_ptr();
> + src1 = tcg_temp_new();
> + gen_get_gpr(src1, rs1);
> +
> + data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
> + data = FIELD_DP32(data, VDATA, VM, vm);
> + data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
> +
> + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
> + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
> + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
> +
> + fn(dest, mask, src1, src2, cpu_env, desc);
> +
> + tcg_temp_free_ptr(dest);
> + tcg_temp_free_ptr(mask);
> + tcg_temp_free_ptr(src2);
> + tcg_temp_free(src1);
> + tcg_temp_free_i32(desc);
> + return true;
> +}
> +
> +static bool opivx_check(DisasContext *s, arg_rmrr *a)
> +{
> + return (vext_check_isa_ill(s) &&
> + vext_check_overlap_mask(s, a->rd, a->vm, false) &&
> + vext_check_reg(s, a->rd, false) &&
> + vext_check_reg(s, a->rs2, false));
> +}
> +
> +typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64,
> + uint32_t, uint32_t);
> +
> +static inline bool
> +do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
> + gen_helper_opivx *fn)
> +{
> + if (!opivx_check(s, a)) {
> + return false;
> + }
> +
> + if (a->vm && s->vl_eq_vlmax) {
> + TCGv_i64 src1 = tcg_temp_new_i64();
> + TCGv tmp = tcg_temp_new();
> +
> + gen_get_gpr(tmp, a->rs1);
> + tcg_gen_ext_tl_i64(src1, tmp);
> + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
> + src1, MAXSZ(s), MAXSZ(s));
> +
> + tcg_temp_free_i64(src1);
> + tcg_temp_free(tmp);
> + return true;
> + } else {
> + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
> + }
> + return true;
> +}
> +
> +/* OPIVX with GVEC IR */
> +#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \
> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
> +{ \
> + static gen_helper_opivx * const fns[4] = { \
> + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
> + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
> + }; \
> + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \
> +}
> +
> +GEN_OPIVX_GVEC_TRANS(vadd_vx, adds)
> +GEN_OPIVX_GVEC_TRANS(vsub_vx, subs)
> +
> +/* OPIVX without GVEC IR */
> +#define GEN_OPIVX_TRANS(NAME, CHECK) \
> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
> +{ \
> + if (CHECK(s, a)) { \
> + static gen_helper_opivx * const fns[4] = { \
> + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
> + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
> + }; \
> + \
> + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\
> + } \
> + return false; \
> +}
> +
> +GEN_OPIVX_TRANS(vrsub_vx, opivx_check)
> +
> +static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
> + gen_helper_opivx *fn, DisasContext *s, int zx)
> +{
> + TCGv_ptr dest, src2, mask;
> + TCGv src1;
> + TCGv_i32 desc;
> + uint32_t data = 0;
> +
> + dest = tcg_temp_new_ptr();
> + mask = tcg_temp_new_ptr();
> + src2 = tcg_temp_new_ptr();
> + if (zx) {
> + src1 = tcg_const_tl(imm);
> + } else {
> + src1 = tcg_const_tl(sextract64(imm, 0, 5));
> + }
> + data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
> + data = FIELD_DP32(data, VDATA, VM, vm);
> + data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> + desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
> +
> + tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
> + tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
> + tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
> +
> + fn(dest, mask, src1, src2, cpu_env, desc);
> +
> + tcg_temp_free_ptr(dest);
> + tcg_temp_free_ptr(mask);
> + tcg_temp_free_ptr(src2);
> + tcg_temp_free(src1);
> + tcg_temp_free_i32(desc);
> + return true;
> +}
> +
> +typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
> + uint32_t, uint32_t);
> +
> +static inline bool
> +do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
> + gen_helper_opivx *fn, int zx)
> +{
> + if (!opivx_check(s, a)) {
> + return false;
> + }
> +
> + if (a->vm && s->vl_eq_vlmax) {
> + if (zx) {
> + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
> + extract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s));
> + } else {
> + gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
> + sextract64(a->rs1, 0, 5), MAXSZ(s), MAXSZ(s));
> + }
> + } else {
> + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, zx);
> + }
> + return true;
> +}
> +
> +/* OPIVI with GVEC IR */
> +#define GEN_OPIVI_GVEC_TRANS(NAME, ZX, OPIVX, SUF) \
> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
> +{ \
> + static gen_helper_opivx * const fns[4] = { \
> + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \
> + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \
> + }; \
> + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \
> + fns[s->sew], ZX); \
> +}
> +
> +GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi)
> +
> +/* OPIVI without GVEC IR */
> +#define GEN_OPIVI_TRANS(NAME, ZX, OPIVX, CHECK) \
> +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
> +{ \
> + if (CHECK(s, a)) { \
> + static gen_helper_opivx * const fns[4] = { \
> + gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \
> + gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \
> + }; \
> + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \
> + fns[s->sew], s, ZX); \
> + } \
> + return false; \
> +}
> +
> +GEN_OPIVI_TRANS(vrsub_vi, 0, vrsub_vx, opivx_check)
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 45da43ade9..27934e291b 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -827,3 +827,152 @@ GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w,
> clearl)
> GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl)
> GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl)
> GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
> +
> +/*
> + *** Vector Integer Arithmetic Instructions
> + */
> +
> +/* expand macro args before macro */
> +#define RVVCALL(macro, ...) macro(__VA_ARGS__)
> +
> +/* (TD, T1, T2, TX1, TX2) */
> +#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
> +#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
> +#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
> +#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
> +
> +/* operation of two vector elements */
> +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
> +
> +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
> +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
> +{ \
> + TX1 s1 = *((T1 *)vs1 + HS1(i)); \
> + TX2 s2 = *((T2 *)vs2 + HS2(i)); \
> + *((TD *)vd + HD(i)) = OP(s2, s1); \
> +}
> +#define DO_SUB(N, M) (N - M)
> +#define DO_RSUB(N, M) (M - N)
> +
> +RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
> +RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
> +RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
> +RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
> +RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
> +RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
> +RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
> +RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
> +
> +static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
> + CPURISCVState *env, uint32_t desc,
> + uint32_t esz, uint32_t dsz,
> + opivv2_fn *fn, clear_fn *clearfn)
> +{
> + uint32_t vlmax = vext_maxsz(desc) / esz;
> + uint32_t mlen = vext_mlen(desc);
> + uint32_t vm = vext_vm(desc);
> + uint32_t vl = env->vl;
> + uint32_t i;
> +
> + if (vl == 0) {
> + return;
> + }
> + for (i = 0; i < vl; i++) {
> + if (!vm && !vext_elem_mask(v0, mlen, i)) {
> + continue;
> + }
> + fn(vd, vs1, vs2, i);
> + }
> + clearfn(vd, vl, vl * dsz, vlmax * dsz);
> +}
> +
> +/* generate the helpers for OPIVV */
> +#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \
> +void HELPER(NAME)(void *vd, void *v0, void *vs1, \
> + void *vs2, CPURISCVState *env, \
> + uint32_t desc) \
> +{ \
> + do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
> + do_##NAME, CLEAR_FN); \
> +}
> +
> +GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
> +GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
> +GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
> +GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
> +GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
> +GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
> +GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
> +GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
> +
> +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
> +
> +/*
> + * (T1)s1 gives the real operator type.
> + * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
> + */
> +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
> +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
> +{ \
> + TX2 s2 = *((T2 *)vs2 + HS2(i)); \
> + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
> +}
> +
> +RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
> +RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
> +RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
> +RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
> +RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
> +RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
> +RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
> +RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
> +RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
> +RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
> +RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
> +RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
> +
> +static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
> + CPURISCVState *env, uint32_t desc,
> + uint32_t esz, uint32_t dsz,
> + opivx2_fn fn, clear_fn *clearfn)
> +{
> + uint32_t vlmax = vext_maxsz(desc) / esz;
> + uint32_t mlen = vext_mlen(desc);
> + uint32_t vm = vext_vm(desc);
> + uint32_t vl = env->vl;
> + uint32_t i;
> +
> + if (vl == 0) {
> + return;
> + }
> + for (i = 0; i < vl; i++) {
> + if (!vm && !vext_elem_mask(v0, mlen, i)) {
> + continue;
> + }
> + fn(vd, s1, vs2, i);
> + }
> + clearfn(vd, vl, vl * dsz, vlmax * dsz);
> +}
> +
> +/* generate the helpers for OPIVX */
> +#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \
> +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
> + void *vs2, CPURISCVState *env, \
> + uint32_t desc) \
> +{ \
> + do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
> + do_##NAME, CLEAR_FN); \
> +}
> +
> +GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb)
> +GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh)
> +GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl)
> +GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq)
> +GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb)
> +GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh)
> +GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl)
> +GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq)
> +GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb)
> +GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh)
> +GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl)
> +GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)
> --
> 2.23.0
>
- Re: [PATCH v6 05/61] target/riscv: add an internals.h header, (continued)
- [PATCH v6 06/61] target/riscv: add vector stride load and store instructions, LIU Zhiwei, 2020/03/17
- [PATCH v6 07/61] target/riscv: add vector index load and store instructions, LIU Zhiwei, 2020/03/17
- [PATCH v6 08/61] target/riscv: add fault-only-first unit stride load, LIU Zhiwei, 2020/03/17
- [PATCH v6 09/61] target/riscv: add vector amo operations, LIU Zhiwei, 2020/03/17
- [PATCH v6 10/61] target/riscv: vector single-width integer add and subtract, LIU Zhiwei, 2020/03/17
- [PATCH v6 11/61] target/riscv: vector widening integer add and subtract, LIU Zhiwei, 2020/03/17
- [PATCH v6 12/61] target/riscv: vector integer add-with-carry / subtract-with-borrow instructions, LIU Zhiwei, 2020/03/17
- [PATCH v6 13/61] target/riscv: vector bitwise logical instructions, LIU Zhiwei, 2020/03/17
- [PATCH v6 14/61] target/riscv: vector single-width bit shift instructions, LIU Zhiwei, 2020/03/17