[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 12/12] target/ppc: Use gvec to decode XVTSTDC[DS]P
From: |
Lucas Mateus Castro(alqotel) |
Subject: |
[PATCH 12/12] target/ppc: Use gvec to decode XVTSTDC[DS]P |
Date: |
Fri, 23 Sep 2022 18:47:54 -0300 |
From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Used gvec to translate XVTSTDCSP and XVTSTDCDP.
xvtstdcsp:
rept loop patch10 patch12
8 12500 2,70288900 1,24050300 (-54.1%)
25 4000 2,65665700 1,14078900 (-57.1%)
100 1000 2,82795400 1,53337200 (-45.8%)
500 200 3,62225400 3,91718000 (+8.1%)
2500 40 6,45658000 12,60683700 (+95.3%)
8000 12 17,48091900 44,15384000 (+152.6%)
xvtstdcdp:
rept loop patch10 patch12
8 12500 1,56435900 1,24554800 (-20.4%)
25 4000 1,53789500 1,14177800 (-25.8%)
100 1000 1,67964600 1,54280000 (-8.1%)
500 200 2,46777100 3,96816000 (+60.8%)
2500 40 5,21938900 12,79937800 (+145.2%)
8000 12 15,97600500 45,44233000 (+184.4%)
Overall these instructions are the hardest ones to measure performance
as the helper implementation is affected by the immediate. So for
example in a worst case scenario (high REPT, LOOP = 1, immediate 127) it
took 13x longer with the gvec implementation, and in a best case
scenario (low REPT, high LOOP, only 1 bit set in the immediate) the
execution took 21.8% of the time with gvec (-78.2%).
The tests here are the sum of every possible immediate.
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/translate/vsx-impl.c.inc | 73 ++++++++++++++++++++++++++++-
1 file changed, 71 insertions(+), 2 deletions(-)
diff --git a/target/ppc/translate/vsx-impl.c.inc
b/target/ppc/translate/vsx-impl.c.inc
index c3c179723b..dc95e8fdf4 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -1121,16 +1121,85 @@ GEN_VSX_HELPER_X2(xscvhpdp, 0x16, 0x15, 0x10,
PPC2_ISA300)
GEN_VSX_HELPER_R2(xscvsdqp, 0x04, 0x1A, 0x0A, PPC2_ISA300)
GEN_VSX_HELPER_X2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
+static void do_xvtstdc_vec(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t imm)
+{
+ TCGv_vec match = tcg_const_ones_vec_matching(t);
+ TCGv_vec temp;
+ TCGv_vec mask;
+ uint64_t exp_msk = (vece == MO_32) ? (uint32_t)EXP_MASK_SP : EXP_MASK_DP;
+ uint64_t sgn_msk = (vece == MO_32) ? (uint32_t)SGN_MASK_SP : SGN_MASK_DP;
+ uint64_t frc_msk = ~(exp_msk | sgn_msk);
+ mask = tcg_constant_vec_matching(t, vece, 0);
+ tcg_gen_mov_vec(t, mask);
+ if (imm & (0x3 << 0)) {
+ /* test if Denormal */
+ temp = tcg_temp_new_vec_matching(t);
+ mask = tcg_constant_vec_matching(t, vece, ~sgn_msk);
+ tcg_gen_and_vec(vece, t, b, mask);
+ mask = tcg_constant_vec_matching(t, vece, frc_msk);
+ tcg_gen_cmp_vec(TCG_COND_LE, vece, temp, t, mask);
+ mask = tcg_constant_vec_matching(t, vece, 0);
+ tcg_gen_cmpsel_vec(TCG_COND_NE, vece, temp, t, mask, temp, mask);
+
+ tcg_gen_mov_vec(t, mask);
+ mask = tcg_constant_vec_matching(t, vece, sgn_msk);
+ if (imm & (0x1)) {
+ /* test if negative */
+ tcg_gen_cmpsel_vec(TCG_COND_GTU, vece, t, b, mask, temp, t);
+ }
+ if (imm & (0x2)) {
+ /* test if positive */
+ tcg_gen_cmpsel_vec(TCG_COND_LTU, vece, t, b, mask, temp, t);
+ }
+ tcg_temp_free_vec(temp);
+ }
+ if (imm & (1 << 2)) {
+ /* test if -0 */
+ mask = tcg_constant_vec_matching(t, vece, sgn_msk);
+ tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, b, mask, match, t);
+ }
+ if (imm & (1 << 3)) {
+ /* test if +0 */
+ mask = tcg_constant_vec_matching(t, vece, 0);
+ tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, b, mask, match, t);
+ }
+ if (imm & (1 << 4)) {
+ /* test if -Inf */
+ mask = tcg_constant_vec_matching(t, vece, exp_msk | sgn_msk);
+ tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, b, mask, match, t);
+ }
+ if (imm & (1 << 5)) {
+ /* test if +Inf */
+ mask = tcg_constant_vec_matching(t, vece, exp_msk);
+ tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, b, mask, match, t);
+ }
+ if (imm & (1 << 6)) {
+ /* test if NaN */
+ mask = tcg_constant_vec_matching(t, vece, ~sgn_msk);
+ tcg_gen_and_vec(vece, b, b, mask);
+ mask = tcg_constant_vec_matching(t, vece, exp_msk);
+ tcg_gen_cmpsel_vec(TCG_COND_GT, vece, t, b, mask, match, t);
+ }
+ tcg_temp_free_vec(match);
+}
+
static bool do_xvtstdc(DisasContext *ctx, arg_XX2_uim *a, unsigned vece)
{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ };
static const GVecGen2i op[] = {
{
.fnoi = gen_helper_XVTSTDCSP,
- .vece = MO_32
+ .fniv = do_xvtstdc_vec,
+ .vece = MO_32,
+ .opt_opc = vecop_list
},
{
.fnoi = gen_helper_XVTSTDCDP,
- .vece = MO_64
+ .fniv = do_xvtstdc_vec,
+ .vece = MO_64,
+ .opt_opc = vecop_list
},
};
--
2.31.1
- [PATCH 06/12] target/ppc: Move VAVG[SU][BHW] to decodetree and use gvec, (continued)
- [PATCH 06/12] target/ppc: Move VAVG[SU][BHW] to decodetree and use gvec, Lucas Mateus Castro(alqotel), 2022/09/23
- [PATCH 07/12] target/ppc: Move VABSDU[BHW] to decodetree and use gvec, Lucas Mateus Castro(alqotel), 2022/09/23
- [PATCH 08/12] target/ppc: Use gvec to decode XV[N]ABS[DS]P/XVNEG[DS]P, Lucas Mateus Castro(alqotel), 2022/09/23
- [PATCH 09/12] target/ppc: Use gvec to decode XVCPSGN[SD]P, Lucas Mateus Castro(alqotel), 2022/09/23
- [PATCH 10/12] target/ppc: Moved XVTSTDC[DS]P to decodetree, Lucas Mateus Castro(alqotel), 2022/09/23
- [PATCH 12/12] target/ppc: Use gvec to decode XVTSTDC[DS]P,
Lucas Mateus Castro(alqotel) <=
- [PATCH 11/12] target/ppc: Moved XSTSTDC[QDS]P to decodetree, Lucas Mateus Castro(alqotel), 2022/09/23