[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 31/47] tcg/tcg-op-gvec.c: Introduce tcg_gen_gvec_4i
From: |
matheus . ferst |
Subject: |
[PATCH v4 31/47] tcg/tcg-op-gvec.c: Introduce tcg_gen_gvec_4i |
Date: |
Tue, 22 Feb 2022 11:36:29 -0300 |
From: Matheus Ferst <matheus.ferst@eldorado.org.br>
Following the implementation of tcg_gen_gvec_3i, add a four-vector and
immediate operand expansion method.
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
include/tcg/tcg-op-gvec.h | 22 ++++++
tcg/tcg-op-gvec.c | 146 ++++++++++++++++++++++++++++++++++++++
2 files changed, 168 insertions(+)
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index da55fed870..28cafbcc5c 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -218,6 +218,25 @@ typedef struct {
bool write_aofs;
} GVecGen4;
+typedef struct {
+ /*
+ * Expand inline as a 64-bit or 32-bit integer. Only one of these will be
+ * non-NULL.
+ */
+ void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, int64_t);
+ void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, int32_t);
+ /* Expand inline with a host vector type. */
+ void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec, int64_t);
+ /* Expand out-of-line helper w/descriptor, data in descriptor. */
+ gen_helper_gvec_4 *fno;
+ /* The optional opcodes, if any, utilized by .fniv. */
+ const TCGOpcode *opt_opc;
+ /* The vector element size, if applicable. */
+ uint8_t vece;
+ /* Prefer i64 to v64. */
+ bool prefer_i64;
+} GVecGen4i;
+
void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz, const GVecGen2 *);
void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
@@ -231,6 +250,9 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t
bofs,
const GVecGen3i *);
void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
+void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t
cofs,
+ uint32_t oprsz, uint32_t maxsz, int64_t c,
+ const GVecGen4i *);
/* Expand a specific vector operation. */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index ffe55e908f..079a761b04 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -836,6 +836,30 @@ static void expand_4_i32(uint32_t dofs, uint32_t aofs,
uint32_t bofs,
tcg_temp_free_i32(t0);
}
+static void expand_4i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz, int32_t c,
+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32,
+ int32_t))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv_i32 t3 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t1, cpu_env, aofs + i);
+ tcg_gen_ld_i32(t2, cpu_env, bofs + i);
+ tcg_gen_ld_i32(t3, cpu_env, cofs + i);
+ fni(t0, t1, t2, t3, c);
+ tcg_gen_st_i32(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t3);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t0);
+}
+
/* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
bool load_dest, void (*fni)(TCGv_i64, TCGv_i64))
@@ -971,6 +995,30 @@ static void expand_4_i64(uint32_t dofs, uint32_t aofs,
uint32_t bofs,
tcg_temp_free_i64(t0);
}
+static void expand_4i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz, int64_t c,
+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64,
+ int64_t))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t1, cpu_env, aofs + i);
+ tcg_gen_ld_i64(t2, cpu_env, bofs + i);
+ tcg_gen_ld_i64(t3, cpu_env, cofs + i);
+ fni(t0, t1, t2, t3, c);
+ tcg_gen_st_i64(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t3);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t0);
+}
+
/* Expand OPSZ bytes worth of two-operand operations using host vectors. */
static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t tysz, TCGType type,
@@ -1121,6 +1169,35 @@ static void expand_4_vec(unsigned vece, uint32_t dofs,
uint32_t aofs,
tcg_temp_free_vec(t0);
}
+/*
+ * Expand OPSZ bytes worth of four-vector operands and an immediate operand
+ * using host vectors.
+ */
+static void expand_4i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t cofs, uint32_t oprsz,
+ uint32_t tysz, TCGType type, int64_t c,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec,
+ TCGv_vec, TCGv_vec, int64_t))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ TCGv_vec t3 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t1, cpu_env, aofs + i);
+ tcg_gen_ld_vec(t2, cpu_env, bofs + i);
+ tcg_gen_ld_vec(t3, cpu_env, cofs + i);
+ fni(vece, t0, t1, t2, t3, c);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t3);
+ tcg_temp_free_vec(t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t0);
+}
+
/* Expand a vector two-operand operation. */
void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
@@ -1533,6 +1610,75 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t cofs,
}
}
+/* Expand a vector four-operand operation. */
+void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t
cofs,
+ uint32_t oprsz, uint32_t maxsz, int64_t c,
+ const GVecGen4i *g)
+{
+ const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
+ TCGType type;
+ uint32_t some;
+
+ check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
+ check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
+
+ type = 0;
+ if (g->fniv) {
+ type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64);
+ }
+ switch (type) {
+ case TCG_TYPE_V256:
+ /*
+ * Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, some,
+ 32, TCG_TYPE_V256, c, g->fniv);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ bofs += some;
+ cofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
+ 16, TCG_TYPE_V128, c, g->fniv);
+ break;
+ case TCG_TYPE_V64:
+ expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
+ 8, TCG_TYPE_V64, c, g->fniv);
+ break;
+
+ case 0:
+ if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_4i_i64(dofs, aofs, bofs, cofs, oprsz, c, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_4i_i32(dofs, aofs, bofs, cofs, oprsz, c, g->fni4);
+ } else {
+ assert(g->fno != NULL);
+ tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
+ oprsz, maxsz, c, g->fno);
+ oprsz = maxsz;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_swap_vecop_list(hold_list);
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
/*
* Expand specific vector operations.
*/
--
2.25.1
- [PATCH v4 26/47] target/ppc: Move vsel and vperm/vpermr to decodetree, (continued)
- [PATCH v4 26/47] target/ppc: Move vsel and vperm/vpermr to decodetree, matheus . ferst, 2022/02/22
- [PATCH v4 27/47] target/ppc: Move xxsel to decodetree, matheus . ferst, 2022/02/22
- [PATCH v4 28/47] target/ppc: move xxperm/xxpermr to decodetree, matheus . ferst, 2022/02/22
- [PATCH v4 29/47] target/ppc: Move xxpermdi to decodetree, matheus . ferst, 2022/02/22
- [PATCH v4 30/47] target/ppc: Implement xxpermx instruction, matheus . ferst, 2022/02/22
- [PATCH v4 31/47] tcg/tcg-op-gvec.c: Introduce tcg_gen_gvec_4i,
matheus . ferst <=
- [PATCH v4 32/47] target/ppc: Implement xxeval, matheus . ferst, 2022/02/22
- [PATCH v4 33/47] target/ppc: Implement xxgenpcv[bhwd]m instruction, matheus . ferst, 2022/02/22
- [PATCH v4 34/47] target/ppc: move xs[n]madd[am][ds]p/xs[n]msub[am][ds]p to decodetree, matheus . ferst, 2022/02/22
- [PATCH v4 35/47] target/ppc: implement xs[n]maddqp[o]/xs[n]msubqp[o], matheus . ferst, 2022/02/22
- [PATCH v4 36/47] target/ppc: Implement xvtlsbb instruction, matheus . ferst, 2022/02/22