qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 19/33] target/ppc: Implement Vector Extract Double to VSR usi


From: Richard Henderson
Subject: Re: [PATCH 19/33] target/ppc: Implement Vector Extract Double to VSR using GPR index insns
Date: Sat, 23 Oct 2021 13:01:35 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.13.0

On 10/21/21 12:45 PM, matheus.ferst@eldorado.org.br wrote:
From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
vextdubvlx: Vector Extract Double Unsigned Byte to VSR using
             GPR-specified Left-Index
vextduhvlx: Vector Extract Double Unsigned Halfword to VSR using
             GPR-specified Left-Index
vextduwvlx: Vector Extract Double Unsigned Word to VSR using
             GPR-specified Left-Index
vextddvlx: Vector Extract Double Unsigned Doubleword to VSR using
            GPR-specified Left-Index
vextdubvrx: Vector Extract Double Unsigned Byte to VSR using
             GPR-specified Right-Index
vextduhvrx: Vector Extract Double Unsigned Halfword to VSR using
             GPR-specified Right-Index
vextduwvrx: Vector Extract Double Unsigned Word to VSR using
             GPR-specified Right-Index
vextddvrx: Vector Extract Double Unsigned Doubleword to VSR using
            GPR-specified Right-Index

Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
  target/ppc/helper.h                 |  4 +++
  target/ppc/insn32.decode            | 12 +++++++++
  target/ppc/int_helper.c             | 41 ++++++++++++++++++++++++++++-
  target/ppc/translate/vmx-impl.c.inc | 37 ++++++++++++++++++++++++++
  4 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 53c65ca1c7..ac8ab7e436 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -336,6 +336,10 @@ DEF_HELPER_2(vextuwlx, tl, tl, avr)
  DEF_HELPER_2(vextubrx, tl, tl, avr)
  DEF_HELPER_2(vextuhrx, tl, tl, avr)
  DEF_HELPER_2(vextuwrx, tl, tl, avr)
+DEF_HELPER_5(VEXTDUBVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDUHVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDUWVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDDVLX, void, env, avr, avr, avr, tl)
DEF_HELPER_2(vsbox, void, avr, avr)
  DEF_HELPER_3(vcipher, void, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 2eb7fb4e92..e438177b32 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -38,6 +38,9 @@
  %dx_d           6:s10 16:5 0:1
  @DX             ...... rt:5  ..... .......... ..... .   &DX d=%dx_d
+&VA vrt vra vrb rc
+@VA             ...... vrt:5 vra:5 vrb:5 rc:5 ......    &VA
+
  &VN             vrt vra vrb sh
  @VN             ...... vrt:5 vra:5 vrb:5 .. sh:3 ......         &VN
@@ -347,6 +350,15 @@ VPEXTD 000100 ..... ..... ..... 10110001101 @VX ## Vector Permute and Formatting Instruction +VEXTDUBVLX 000100 ..... ..... ..... ..... 011000 @VA
+VEXTDUBVRX      000100 ..... ..... ..... ..... 011001   @VA
+VEXTDUHVLX      000100 ..... ..... ..... ..... 011010   @VA
+VEXTDUHVRX      000100 ..... ..... ..... ..... 011011   @VA
+VEXTDUWVLX      000100 ..... ..... ..... ..... 011100   @VA
+VEXTDUWVRX      000100 ..... ..... ..... ..... 011101   @VA
+VEXTDDVLX       000100 ..... ..... ..... ..... 011110   @VA
+VEXTDDVRX       000100 ..... ..... ..... ..... 011111   @VA
+
  VINSERTB        000100 ..... - .... ..... 01100001101   @VX_uim4
  VINSERTH        000100 ..... - .... ..... 01101001101   @VX_uim4
  VINSERTW        000100 ..... - .... ..... 01110001101   @VX_uim4
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 5a925a564d..1577ea8788 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1673,8 +1673,47 @@ VINSX(B, uint8_t)
  VINSX(H, uint16_t)
  VINSX(W, uint32_t)
  VINSX(D, uint64_t)
-#undef ELEM_ADDR
  #undef VINSX
+#define VEXTDVLX(NAME, TYPE) \
+void glue(glue(helper_VEXTD, NAME), VLX)(CPUPPCState *env, ppc_avr_t *t,       
\
+                                         ppc_avr_t *a, ppc_avr_t *b,           
\
+                                         target_ulong index)                   
\
+{                                                                              
\
+    const int array_size = ARRAY_SIZE(t->u8), elem_size = sizeof(TYPE);        
\
+    const target_long idx = index;                                             
\
+                                                                               
\
+    if (idx < 0) {                                                             
\
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VEXTD" #NAME "VRX 
at"\
+            " 0x" TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", env->nip,   
\
+            32 - elem_size - idx, 32 - elem_size);                             
\
+    } else if (idx + elem_size <= array_size) {                                
\
+        t->VsrD(0) = *(TYPE *)ELEM_ADDR(a, idx, elem_size);                    
\

You need an unaligned load here.

+        t->VsrD(1) = 0;                                                        
\
+    } else if (idx < array_size) {                                             
\
+        ppc_avr_t tmp = { .u64 = { 0, 0 } };                                   
\
+        const int len_a = array_size - idx, len_b = elem_size - len_a;         
\
+                                                                               
\
+        memmove(ELEM_ADDR(&tmp, array_size / 2 - elem_size, len_a),            
\
+                ELEM_ADDR(a, idx, len_a), len_a);                              
\
+        memmove(ELEM_ADDR(&tmp, array_size / 2 - len_b, len_b),                
\
+                ELEM_ADDR(b, 0, len_b), len_b);                                
\

You know tmp does not overlap the source; memcpy will do.

+                                                                               
\
+        *t = tmp;                                                              
\
+    } else if (idx + elem_size <= 2 * array_size) {                            
\
+        t->VsrD(0) = *(TYPE *)ELEM_ADDR(b, idx - array_size, elem_size);       
\

Another unaligned load.

Or... we could set this up as

    ppc_avr_t tmp[2] = { *a, *b };
    memset(t, 0, sizeof(*t));
    if (idx >= 0 && idx + elem_size <= sizeof(tmp)) {
        memcpy(t + 8 - elem_size, (char *)&tmp + idx, elem_size);
    }

... with some sort of host-endian adjustment which I'm too lazy to work out at 
the moment.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]