qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v7 3/3] target/ppc: Optimize emulation of vupkhpx and vupklpx


From: Aleksandar Markovic
Subject: Re: [PATCH v7 3/3] target/ppc: Optimize emulation of vupkhpx and vupklpx instructions
Date: Sat, 19 Oct 2019 21:24:29 +0200



On Thursday, October 17, 2019, Stefan Brankovic <address@hidden> wrote:
'trans_vupkpx' function implements both vupkhpx and vupklpx instructions with

implements both -> implements emulation of both
 
with -> , while its

argument 'high' determine which 

determine -> determines
 
instruction is processed. Instructions are
implemented in two 'for' loops. Outer 'for' loop repeats unpacking two times,
since both doubleword elements of destination register are formed the same way.
It also stores result of every iteration in temporary register, that is later
transferred to destination register. Inner 'for' loop does unpacking of pixels
and forms resulting doubleword 32 by 32 bits.

temporary register -> a temporary variable

destination register -> the destination register

'forms resulting doubleword 32 by 32 bits' is unclear, reword.


Signed-off-by: Stefan Brankovic <address@hidden>
---
 target/ppc/helper.h                 |  2 -
 target/ppc/int_helper.c             | 20 --------
 target/ppc/translate/vmx-impl.inc.c | 91 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 89 insertions(+), 24 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index b489b38..fd06b56 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -233,8 +233,6 @@ DEF_HELPER_2(vextsh2d, void, avr, avr)
 DEF_HELPER_2(vextsw2d, void, avr, avr)
 DEF_HELPER_2(vnegw, void, avr, avr)
 DEF_HELPER_2(vnegd, void, avr, avr)
-DEF_HELPER_2(vupkhpx, void, avr, avr)
-DEF_HELPER_2(vupklpx, void, avr, avr)
 DEF_HELPER_2(vupkhsb, void, avr, avr)
 DEF_HELPER_2(vupkhsh, void, avr, avr)
 DEF_HELPER_2(vupkhsw, void, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index f910c11..9ee667d 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1737,26 +1737,6 @@ void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 #define UPKHI 0
 #define UPKLO 1
 #endif
-#define VUPKPX(suffix, hi)                                              \
-    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
-    {                                                                   \
-        int i;                                                          \
-        ppc_avr_t result;                                               \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
-            uint16_t e = b->u16[hi ? i : i + 4];                        \
-            uint8_t a = (e >> 15) ? 0xff : 0;                           \
-            uint8_t r = (e >> 10) & 0x1f;                               \
-            uint8_t g = (e >> 5) & 0x1f;                                \
-            uint8_t b = e & 0x1f;                                       \
-                                                                        \
-            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
-        }                                                               \
-        *r = result;                                                    \
-    }
-VUPKPX(lpx, UPKLO)
-VUPKPX(hpx, UPKHI)
-#undef VUPKPX

 #define VUPK(suffix, unpacked, packee, hi)                              \
     void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 3550ffa..09d80d6 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -1031,6 +1031,95 @@ static void trans_vclzd(DisasContext *ctx)
     tcg_temp_free_i64(avr);
 }

+/*
+ * vupkhpx VRT,VRB - Vector Unpack High Pixel
+ * vupklpx VRT,VRB - Vector Unpack Low Pixel
+ *
+ * Unpacks 4 pixels coded in 1-5-5-5 pattern from high/low doubleword element
+ * of source register into contigous array of bits in the destination register.
+ * Argument 'high' determines if high or low doubleword element of source
+ * register is processed.
+ */
+static void trans_vupkpx(DisasContext *ctx, int high)
+{
+    int VT = rD(ctx->opcode);
+    int VB = rB(ctx->opcode);
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 avr = tcg_temp_new_i64();
+    TCGv_i64 result = tcg_temp_new_i64();
+    TCGv_i64 result1 = tcg_temp_new_i64();
+    TCGv_i64 result2 = tcg_temp_new_i64();
+    int64_t mask1 = 0x1fULL;
+    int64_t mask2 = 0x1fULL << 8;
+    int64_t mask3 = 0x1fULL << 16;
+    int64_t mask4 = 0xffULL << 56;
+    int i, j;
+
+    if (high == 1) {
+        get_avr64(avr, VB, true);

before this line, insert comment: /* vupkhpx */
 
+    } else {
+        get_avr64(avr, VB, false);

before this line, insert comment: /* vupklpx */
 
+    }
+
+    tcg_gen_movi_i64(result, 0x0ULL);
+    for (i = 0; i < 2; i++) {
+        for (j = 0; j < 2; j++) {
+            tcg_gen_shli_i64(tmp, avr, (j * 16));
+            tcg_gen_andi_i64(tmp, tmp, mask1 << (j * 32));
+            tcg_gen_or_i64(result, result, tmp);
+
+            tcg_gen_shli_i64(tmp, avr, 3 + (j * 16));
+            tcg_gen_andi_i64(tmp, tmp, mask2 << (j * 32));
+            tcg_gen_or_i64(result, result, tmp);
+
+            tcg_gen_shli_i64(tmp, avr, 6 + (j * 16));
+            tcg_gen_andi_i64(tmp, tmp, mask3 << (j * 32));
+            tcg_gen_or_i64(result, result, tmp);
+
+            tcg_gen_shri_i64(tmp, avr, (j * 16));
+            tcg_gen_ext16s_i64(tmp, tmp);
+            tcg_gen_andi_i64(tmp, tmp, mask4);
+            tcg_gen_shri_i64(tmp, tmp, (32 * (1 - j)));
+            tcg_gen_or_i64(result, result, tmp);
+        }
+        if (i == 0) {
+            tcg_gen_mov_i64(result1, result);
+            tcg_gen_movi_i64(result, 0x0ULL);
+            tcg_gen_shri_i64(avr, avr, 32);
+        }
+        if (i == 1) {
+            tcg_gen_mov_i64(result2, result);
+        }
+    }
+
+    set_avr64(VT, result1, false);
+    set_avr64(VT, result2, true);
+
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i64(avr);
+    tcg_temp_free_i64(result);
+    tcg_temp_free_i64(result1);
+    tcg_temp_free_i64(result2);
+}
+
+static void gen_vupkhpx(DisasContext *ctx)
+{
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    trans_vupkpx(ctx, 1);
+}
+
+static void gen_vupklpx(DisasContext *ctx)
    +{
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    trans_vupkpx(ctx, 0);
+}
+
 GEN_VXFORM(vmuloub, 4, 0);
 GEN_VXFORM(vmulouh, 4, 1);
 GEN_VXFORM(vmulouw, 4, 2);
@@ -1348,8 +1437,6 @@ GEN_VXFORM_NOA(vupkhsw, 7, 25);
 GEN_VXFORM_NOA(vupklsb, 7, 10);
 GEN_VXFORM_NOA(vupklsh, 7, 11);
 GEN_VXFORM_NOA(vupklsw, 7, 27);
-GEN_VXFORM_NOA(vupkhpx, 7, 13);
-GEN_VXFORM_NOA(vupklpx, 7, 15);
 GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
 GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
 GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
--
2.7.4



reply via email to

[Prev in Thread] Current Thread [Next in Thread]