[PATCH v2 3/7] target/s390x: vxeh2: vector shift {double by bit, left, r

qemu-s390x

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 3/7] target/s390x: vxeh2: vector shift {double by bit, left, r

From:	David Miller
Subject:	[PATCH v2 3/7] target/s390x: vxeh2: vector shift {double by bit, left, right {logical, arithmetic}}
Date:	Sun, 6 Mar 2022 21:03:23 -0500

Signed-off-by: David Miller <dmiller423@gmail.com>
---
 include/qemu/bitops.h               |  25 +++++++
 target/s390x/helper.h               |   3 +
 target/s390x/tcg/insn-data.def      |   6 +-
 target/s390x/tcg/translate_vx.c.inc | 108 ++++++++++++++++++----------
 target/s390x/tcg/vec_int_helper.c   |  58 +++++++++++++++
 5 files changed, 163 insertions(+), 37 deletions(-)

diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 03213ce952..72426deea0 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -445,6 +445,31 @@ static inline int64_t sextract64(uint64_t value, int 
start, int length)
      */
     return ((int64_t)(value << (64 - length - start))) >> (64 - length);
 }
+/**
+ * deposit8:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 8 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 8 bit byte.
+ * It is valid to request that all 8 bits are modified (ie @length
+ * 8 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint8_t deposit8(uint8_t value, int start, int length,
+                               uint8_t fieldval)
+{
+    uint8_t mask;
+    assert(start >= 0 && length > 0 && length <= 8 - start);
+    mask = (~0ULL >> (8 - length)) << start;
+    return (value & ~mask) | ((fieldval << start) & mask);
+}
 
 /**
  * deposit32:
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 1e38ee2e4e..a36308d651 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -203,8 +203,11 @@ DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, 
ptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsra_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_vsrl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 46add91a0e..1bfe88a4ac 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1207,12 +1207,16 @@
     F(0xe774, VSL,     VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
 /* VECTOR SHIFT LEFT BY BYTE */
     F(0xe775, VSLB,    VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
+/* VECTOR SHIFT LEFT DOUBLE BY BIT */
+       F(0xe786, VSLD,    VRI_d, VE2, 0, 0, 0, 0, vsld, 0, IF_VEC)
 /* VECTOR SHIFT LEFT DOUBLE BY BYTE */
-    F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsldb, 0, IF_VEC)
+    F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsld, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC */
     F(0xe77e, VSRA,    VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
     F(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
+/* VECTOR SHIFT RIGHT DOUBLE BY BIT */
+       F(0xe787, VSRD,    VRI_d, VE2, 0, 0, 0, 0, vsrd, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL */
     F(0xe77c, VSRL,    VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
 /* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index db86d9b87d..60e1efdbfa 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2020,26 +2020,33 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps 
*o)
 
 static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
 {
-    TCGv_i64 shift = tcg_temp_new_i64();
-
-    read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
-    if (s->fields.op2 == 0x74) {
-        tcg_gen_andi_i64(shift, shift, 0x7);
+    const bool B = 0x75 == s->fields.op2;
+    if (!B && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), 0,  gen_helper_gvec_vsl_ve2);
     } else {
-        tcg_gen_andi_i64(shift, shift, 0x78);
-    }
+        TCGv_i64 shift = tcg_temp_new_i64();
 
-    gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
-                    shift, 0, gen_helper_gvec_vsl);
-    tcg_temp_free_i64(shift);
+        read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
+        tcg_gen_andi_i64(shift, shift, B ? 0x78 : 7);
+        gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
+                        shift, 0, gen_helper_gvec_vsl);
+        tcg_temp_free_i64(shift);
+    }
     return DISAS_NEXT;
 }
 
-static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o)
+static DisasJumpType op_vsld(DisasContext *s, DisasOps *o)
 {
-    const uint8_t i4 = get_field(s, i4) & 0xf;
-    const int left_shift = (i4 & 7) * 8;
-    const int right_shift = 64 - left_shift;
+    const uint8_t mask = (0x86 == s->fields.op2) ? 7 : 15;
+    const uint8_t mul  = (0x86 == s->fields.op2) ? 1 : 8;
+    const uint8_t i4   = get_field(s, i4);
+    const int shift = 64 - (i4 & 7) * mul;
+
+    if (i4 & ~mask) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
     TCGv_i64 t0 = tcg_temp_new_i64();
     TCGv_i64 t1 = tcg_temp_new_i64();
     TCGv_i64 t2 = tcg_temp_new_i64();
@@ -2053,8 +2060,8 @@ static DisasJumpType op_vsldb(DisasContext *s, DisasOps 
*o)
         read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
         read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
     }
-    tcg_gen_extract2_i64(t0, t1, t0, right_shift);
-    tcg_gen_extract2_i64(t1, t2, t1, right_shift);
+    tcg_gen_extract2_i64(t0, t1, t0, shift);
+    tcg_gen_extract2_i64(t1, t2, t1, shift);
     write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
     write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
 
@@ -2064,37 +2071,66 @@ static DisasJumpType op_vsldb(DisasContext *s, DisasOps 
*o)
     return DISAS_NEXT;
 }
 
-static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
+static DisasJumpType op_vsrd(DisasContext *s, DisasOps *o)
 {
-    TCGv_i64 shift = tcg_temp_new_i64();
+    const uint8_t i4 = get_field(s, i4);
+    const int left_shift = (i4 & 7);
+    if (i4 & ~7) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
+    read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
+    read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
+
+    tcg_gen_extract2_i64(t0, t1, t0, left_shift);
+    tcg_gen_extract2_i64(t1, t2, t1, left_shift);
+    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
 
-    read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
-    if (s->fields.op2 == 0x7e) {
-        tcg_gen_andi_i64(shift, shift, 0x7);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
+{
+    const bool B = 0x7f == s->fields.op2;
+    if (!B && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), 0, gen_helper_gvec_vsra_ve2);
     } else {
-        tcg_gen_andi_i64(shift, shift, 0x78);
-    }
+        TCGv_i64 shift = tcg_temp_new_i64();
 
-    gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
-                    shift, 0, gen_helper_gvec_vsra);
-    tcg_temp_free_i64(shift);
+        read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
+        tcg_gen_andi_i64(shift, shift, B ? 0x78 : 7);
+        gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
+                        shift, 0, gen_helper_gvec_vsra);
+        tcg_temp_free_i64(shift);
+    }
     return DISAS_NEXT;
 }
 
 static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
 {
-    TCGv_i64 shift = tcg_temp_new_i64();
-
-    read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
-    if (s->fields.op2 == 0x7c) {
-        tcg_gen_andi_i64(shift, shift, 0x7);
+    const bool B = 0x7d == s->fields.op2;
+    if (!B && s390_has_feat(S390_FEAT_VECTOR_ENH2)) {
+        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), 0, gen_helper_gvec_vsrl_ve2);
     } else {
-        tcg_gen_andi_i64(shift, shift, 0x78);
-    }
+        TCGv_i64 shift = tcg_temp_new_i64();
 
-    gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
-                    shift, 0, gen_helper_gvec_vsrl);
-    tcg_temp_free_i64(shift);
+        read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
+        tcg_gen_andi_i64(shift, shift, B ? 0x78 : 7);
+        gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
+                        shift, 0, gen_helper_gvec_vsrl);
+        tcg_temp_free_i64(shift);
+    }
     return DISAS_NEXT;
 }
 
diff --git a/target/s390x/tcg/vec_int_helper.c 
b/target/s390x/tcg/vec_int_helper.c
index 5561b3ed90..02d0a1cc94 100644
--- a/target/s390x/tcg/vec_int_helper.c
+++ b/target/s390x/tcg/vec_int_helper.c
@@ -540,18 +540,76 @@ void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t 
count,
     s390_vec_shl(v1, v2, count);
 }
 
+void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
+                          uint32_t desc)
+{
+    uint8_t i, v;
+    S390Vector tmp = {};
+    for (i = 0; i < 16; i++) {
+        const uint8_t shift = s390_vec_read_element8(v3, i) & 7;
+        v = s390_vec_read_element8(v2, i);
+
+        if (shift) {
+            v <<= shift;
+            if (i < 15) {
+                v |= extract8(s390_vec_read_element8(v2, i + 1),
+                              8 - shift, shift);
+            }
+        }
+        s390_vec_write_element8(&tmp, i, v);
+    }
+    *(S390Vector *)v1 = tmp;
+}
+
 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
                        uint32_t desc)
 {
     s390_vec_sar(v1, v2, count);
 }
 
+void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
+                           uint32_t desc)
+{
+    int i;
+    uint8_t t, v;
+    S390Vector tmp = {};
+    for (i = 0; i < 16; i++) {
+        const uint8_t shift = s390_vec_read_element8(v3, i) & 7;
+        v = s390_vec_read_element8(v2, i);
+        if (shift) {
+            t = i > 0 ? s390_vec_read_element8(v2, i - 1)
+                      : ((v & 0x80) ? ~0 : 0);
+            v = deposit8(v >> shift, 8 - shift, shift, t);
+        }
+        s390_vec_write_element8(&tmp, i, v);
+    }
+    *(S390Vector *)v1 = tmp;
+}
+
 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
                        uint32_t desc)
 {
     s390_vec_shr(v1, v2, count);
 }
 
+void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
+                           uint32_t desc)
+{
+    int i;
+    uint8_t t, v;
+    S390Vector tmp = {};
+    for (i = 0; i < 16; i++) {
+        const uint8_t shift = s390_vec_read_element8(v3, i) & 7;
+        v = s390_vec_read_element8(v2, i) >> shift;
+        if (shift) {
+            t = (0 == i ? 0 : s390_vec_read_element8(v2, i - 1));
+            v = deposit8(v, 8 - shift, shift, t);
+        }
+        s390_vec_write_element8(&tmp, i, v);
+    }
+    *(S390Vector *)v1 = tmp;
+}
+
 #define DEF_VSCBI(BITS)                                                        
\
 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        
\
                               uint32_t desc)                                   
\
-- 
2.34.1

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v2 0/7] s390x/tcg: Implement Vector-Enhancements Facility 2, David Miller, 2022/03/06
- [PATCH v2 1/7] target/s390x: vxeh2: vector convert short/32b, David Miller, 2022/03/06
  - Re: [PATCH v2 1/7] target/s390x: vxeh2: vector convert short/32b, Richard Henderson, 2022/03/07
  - Re: [PATCH v2 1/7] target/s390x: vxeh2: vector convert short/32b, David Hildenbrand, 2022/03/11
- [PATCH v2 3/7] target/s390x: vxeh2: vector shift {double by bit, left, right {logical, arithmetic}}, David Miller <=
  - Re: [PATCH v2 3/7] target/s390x: vxeh2: vector shift {double by bit, left, right {logical,arithmetic}}, Richard Henderson, 2022/03/07
- [PATCH v2 4/7] target/s390x: vxeh2: vector {load, store} elements reversed, David Miller, 2022/03/06
- [PATCH v2 7/7] tests/tcg/s390x: Tests for Vector Enhancements Facility 2, David Miller, 2022/03/06
- [PATCH v2 6/7] target/s390x: add S390_FEAT_VECTOR_ENH2 to cpu max, David Miller, 2022/03/06
  - Re: [PATCH v2 6/7] target/s390x: add S390_FEAT_VECTOR_ENH2 to cpu max, Richard Henderson, 2022/03/07
- [PATCH v2 2/7] target/s390x: vxeh2: vector string search, David Miller, 2022/03/06
  - Re: [PATCH v2 2/7] target/s390x: vxeh2: vector string search, Richard Henderson, 2022/03/07
- [PATCH v2 5/7] target/s390x: vxeh2: vector {load, store} reversed elements [and {zero, replicate}], David Miller, 2022/03/06

Prev by Date: [PATCH v2 1/7] target/s390x: vxeh2: vector convert short/32b
Next by Date: [PATCH v2 4/7] target/s390x: vxeh2: vector {load, store} elements reversed
Previous by thread: Re: [PATCH v2 1/7] target/s390x: vxeh2: vector convert short/32b
Next by thread: Re: [PATCH v2 3/7] target/s390x: vxeh2: vector shift {double by bit, left, right {logical,arithmetic}}
Index(es):
- Date
- Thread