[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 03/11] target/s390x: vxeh2: vector string search
From: |
David Hildenbrand |
Subject: |
Re: [PATCH v3 03/11] target/s390x: vxeh2: vector string search |
Date: |
Mon, 21 Mar 2022 11:31:10 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.6.2 |
On 08.03.22 02:53, Richard Henderson wrote:
> From: David Miller <dmiller423@gmail.com>
>
> Signed-off-by: David Miller <dmiller423@gmail.com>
> Message-Id: <20220307020327.3003-3-dmiller423@gmail.com>
> [rth: Rewrite helpers; fix validation of m6.]
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>
> The substring search was incorrect, in that it didn't properly
> restart the search when a match failed. Split the helper into
> multiple, so that the memory accesses can be optimized.
> ---
> target/s390x/helper.h | 6 ++
> target/s390x/tcg/translate.c | 3 +-
> target/s390x/tcg/vec_string_helper.c | 101 +++++++++++++++++++++++++++
> target/s390x/tcg/translate_vx.c.inc | 26 +++++++
> target/s390x/tcg/insn-data.def | 2 +
> 5 files changed, 137 insertions(+), 1 deletion(-)
>
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index 7cbcbd7f0b..7412130883 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -246,6 +246,12 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr,
> cptr, env, i32)
> DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
> DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
> DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_8, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_16, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_32, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_zs8, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_zs16, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_zs32, void, ptr, cptr, cptr, cptr, env, i32)
>
> /* === Vector Floating-Point Instructions */
> DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env,
> i32)
> diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
> index 904b51542f..d9ac29573d 100644
> --- a/target/s390x/tcg/translate.c
> +++ b/target/s390x/tcg/translate.c
> @@ -6222,7 +6222,8 @@ enum DisasInsnEnum {
> #define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */
> #define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION
> #define FAC_V S390_FEAT_VECTOR /* vector facility */
> -#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility
> 1 */
> +#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements
> facility 1 */
> +#define FAC_VE2 S390_FEAT_VECTOR_ENH2 /* vector enhancements
> facility 2 */
> #define FAC_MIE2 S390_FEAT_MISC_INSTRUCTION_EXT2 /*
> miscellaneous-instruction-extensions facility 2 */
> #define FAC_MIE3 S390_FEAT_MISC_INSTRUCTION_EXT3 /*
> miscellaneous-instruction-extensions facility 3 */
>
> diff --git a/target/s390x/tcg/vec_string_helper.c
> b/target/s390x/tcg/vec_string_helper.c
> index ac315eb095..6c0476ecc1 100644
> --- a/target/s390x/tcg/vec_string_helper.c
> +++ b/target/s390x/tcg/vec_string_helper.c
> @@ -471,3 +471,104 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const
> void *v2, const void *v3, \
> DEF_VSTRC_CC_RT_HELPER(8)
> DEF_VSTRC_CC_RT_HELPER(16)
> DEF_VSTRC_CC_RT_HELPER(32)
> +
> +static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
> + const S390Vector *v4, uint8_t es, bool zs)
> +{
> + int substr_elen, substr_0, str_elen, i, j, k, cc;
> + int nelem = 16 >> es;
> + bool eos = false;
> +
> + substr_elen = s390_vec_read_element8(v4, 7) >> es;
> +
> + /* If ZS, bound substr length by min(nelem, strlen(v3)). */
> + if (zs) {
> + int i;
You can drop this "int i;"
> + for (i = 0; i < nelem; i++) {
> + if (s390_vec_read_element(v3, i, es) == 0) {
> + break;
> + }
> + }
> + if (i < substr_elen) {
> + substr_elen = i;
> + }
Maybe combine both, I guess there is no need to search beyond substr_elen.
substr_elen = MIN(substr_elen, nelem);
for (i = 0; i < substr_elen; i++) {
if (s390_vec_read_element(v3, i, es) == 0) {
substr_elen = i;
break;
}
}
We should do the MIN(substr_elen, nelem) maybe right when reading it
from v4.
> + }
> +
> + if (substr_elen == 0) {
> + cc = 2; /* full match for degenerate case of empty substr */
> + k = 0;
> + goto done;
> + }
> +
> + /* If ZS, look for eos in the searched string. */
> + if (zs) {
> + for (k = 0; k < nelem; k++) {
> + if (s390_vec_read_element(v2, k, es) == 0) {
> + eos = true;
> + break;
> + }
> + }
I guess we could move that into the main search loop and avoid parsing
the string twice. Not sure what's better.
> + str_elen = k;
> + } else {
> + str_elen = nelem;
> + }
> +
> + substr_0 = s390_vec_read_element(v3, 0, es);
> +
> + for (k = 0; ; k++) {
> + for (; k < str_elen; k++) {
> + if (s390_vec_read_element(v2, k, es) == substr_0) {
> + break;
> + }
> + }
> +
> + /* If we reached the end of the string, no match. */
> + if (k == str_elen) {
> + cc = eos; /* no match (with or without zero char) */
> + goto done;
> + }
> +
> + /* If the substring is only one char, match. */
> + if (substr_elen == 1) {
> + cc = 2; /* full match */
> + goto done;
> + }
> +
> + /* If the match begins at the last char, we have a partial match. */
> + if (k == str_elen - 1) {
> + cc = 3; /* partial match */
> + goto done;
> + }
> +
> + i = MIN(nelem, k + substr_elen);
> + for (j = k + 1; j < i; j++) {
> + uint32_t e2 = s390_vec_read_element(v2, j, es);
> + uint32_t e3 = s390_vec_read_element(v3, j - k, es);
> + if (e2 != e3) {
> + break;
> + }
> + }
> + if (j == i) {
> + /* Matched up until "end". */
> + cc = i - k == substr_elen ? 2 : 3; /* full or partial match */
> + goto done;
> + }
> + }
> +
> + done:
> + s390_vec_write_element64(v1, 0, k << es);
> + s390_vec_write_element64(v1, 1, 0);
> + return cc;
> +}
> +
> +#define DEF_VSTRS_HELPER(BITS) \
> +void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2, \
> + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \
> + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); } \
> +void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2, \
> + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \
> + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); }
> +
> +DEF_VSTRS_HELPER(8)
> +DEF_VSTRS_HELPER(16)
> +DEF_VSTRS_HELPER(32)
> diff --git a/target/s390x/tcg/translate_vx.c.inc
> b/target/s390x/tcg/translate_vx.c.inc
> index ea28e40d4f..d514e8b218 100644
> --- a/target/s390x/tcg/translate_vx.c.inc
> +++ b/target/s390x/tcg/translate_vx.c.inc
> @@ -2497,6 +2497,32 @@ static DisasJumpType op_vstrc(DisasContext *s,
> DisasOps *o)
> return DISAS_NEXT;
> }
>
> +static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
> +{
> + typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr,
> + TCGv_ptr, TCGv_ptr, TCGv_i32);
> + static const helper_vstrs fns[3][2] = {
> + { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 },
> + { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 },
> + { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 },
> + };
> +
Superfluous empty line.
> + const uint8_t m5 = get_field(s, m5);
Could so a s/m5/es/ , as we do it in other handlers.
> + const uint8_t m6 = get_field(s, m6);
> + bool zs = m6 & 2;
I remember we wanted to use extract32() for such bit-tests, at least we
do it in most of the other handlers :)
const bool zs = extract32(m6, 1, 1);
?
> +
> + if (m5 > ES_32 || m6 & ~2) {
> + gen_program_exception(s, PGM_SPECIFICATION);
> + return DISAS_NORETURN;
> + }
> +
--
Thanks,
David / dhildenb
- [PATCH v3 00/11] s390x/tcg: Implement Vector-Enhancements Facility 2, Richard Henderson, 2022/03/07
- [PATCH v3 04/11] target/s390x: vxeh2: Update for changes to vector shifts, Richard Henderson, 2022/03/07
- [PATCH v3 05/11] target/s390x: vxeh2: vector shift double by bit, Richard Henderson, 2022/03/07
- [PATCH v3 07/11] target/s390x: vxeh2: vector {load, store} byte reversed elements, Richard Henderson, 2022/03/07
- [PATCH v3 06/11] target/s390x: vxeh2: vector {load, store} elements reversed, Richard Henderson, 2022/03/07