qemu-s390x
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 03/11] target/s390x: vxeh2: vector string search


From: David Hildenbrand
Subject: Re: [PATCH v3 03/11] target/s390x: vxeh2: vector string search
Date: Mon, 21 Mar 2022 11:31:10 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.6.2

On 08.03.22 02:53, Richard Henderson wrote:
> From: David Miller <dmiller423@gmail.com>
> 
> Signed-off-by: David Miller <dmiller423@gmail.com>
> Message-Id: <20220307020327.3003-3-dmiller423@gmail.com>
> [rth: Rewrite helpers; fix validation of m6.]
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> 
> The substring search was incorrect, in that it didn't properly
> restart the search when a match failed.  Split the helper into
> multiple, so that the memory accesses can be optimized.
> ---
>  target/s390x/helper.h                |   6 ++
>  target/s390x/tcg/translate.c         |   3 +-
>  target/s390x/tcg/vec_string_helper.c | 101 +++++++++++++++++++++++++++
>  target/s390x/tcg/translate_vx.c.inc  |  26 +++++++
>  target/s390x/tcg/insn-data.def       |   2 +
>  5 files changed, 137 insertions(+), 1 deletion(-)
> 
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index 7cbcbd7f0b..7412130883 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -246,6 +246,12 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, 
> cptr, env, i32)
>  DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
>  DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
>  DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_8, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_16, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_32, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_zs8, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_zs16, void, ptr, cptr, cptr, cptr, env, i32)
> +DEF_HELPER_6(gvec_vstrs_zs32, void, ptr, cptr, cptr, cptr, env, i32)
>  
>  /* === Vector Floating-Point Instructions */
>  DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
> i32)
> diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
> index 904b51542f..d9ac29573d 100644
> --- a/target/s390x/tcg/translate.c
> +++ b/target/s390x/tcg/translate.c
> @@ -6222,7 +6222,8 @@ enum DisasInsnEnum {
>  #define FAC_PCI         S390_FEAT_ZPCI /* z/PCI facility */
>  #define FAC_AIS         S390_FEAT_ADAPTER_INT_SUPPRESSION
>  #define FAC_V           S390_FEAT_VECTOR /* vector facility */
> -#define FAC_VE          S390_FEAT_VECTOR_ENH /* vector enhancements facility 
> 1 */
> +#define FAC_VE          S390_FEAT_VECTOR_ENH  /* vector enhancements 
> facility 1 */
> +#define FAC_VE2         S390_FEAT_VECTOR_ENH2 /* vector enhancements 
> facility 2 */
>  #define FAC_MIE2        S390_FEAT_MISC_INSTRUCTION_EXT2 /* 
> miscellaneous-instruction-extensions facility 2 */
>  #define FAC_MIE3        S390_FEAT_MISC_INSTRUCTION_EXT3 /* 
> miscellaneous-instruction-extensions facility 3 */
>  
> diff --git a/target/s390x/tcg/vec_string_helper.c 
> b/target/s390x/tcg/vec_string_helper.c
> index ac315eb095..6c0476ecc1 100644
> --- a/target/s390x/tcg/vec_string_helper.c
> +++ b/target/s390x/tcg/vec_string_helper.c
> @@ -471,3 +471,104 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const 
> void *v2, const void *v3,  \
>  DEF_VSTRC_CC_RT_HELPER(8)
>  DEF_VSTRC_CC_RT_HELPER(16)
>  DEF_VSTRC_CC_RT_HELPER(32)
> +
> +static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
> +                 const S390Vector *v4, uint8_t es, bool zs)
> +{
> +    int substr_elen, substr_0, str_elen, i, j, k, cc;
> +    int nelem = 16 >> es;
> +    bool eos = false;
> +
> +    substr_elen = s390_vec_read_element8(v4, 7) >> es;
> +
> +    /* If ZS, bound substr length by min(nelem, strlen(v3)). */
> +    if (zs) {
> +        int i;

You can drop this "int i;"

> +        for (i = 0; i < nelem; i++) {
> +            if (s390_vec_read_element(v3, i, es) == 0) {
> +                break;
> +            }
> +        }
> +        if (i < substr_elen) {
> +            substr_elen = i;
> +        }

Maybe combine both, I guess there is no need to search beyond substr_elen.

substr_elen = MIN(substr_elen, nelem);
for (i = 0; i < substr_elen; i++) {
    if (s390_vec_read_element(v3, i, es) == 0) {
        substr_elen = i;
        break;
    }
}


We should do the MIN(substr_elen, nelem) maybe right when reading it
from v4.

> +    }
> +
> +    if (substr_elen == 0) {
> +        cc = 2; /* full match for degenerate case of empty substr */
> +        k = 0;
> +        goto done;
> +    }
> +
> +    /* If ZS, look for eos in the searched string. */
> +    if (zs) {
> +        for (k = 0; k < nelem; k++) {
> +            if (s390_vec_read_element(v2, k, es) == 0) {
> +                eos = true;
> +                break;
> +            }
> +        }

I guess we could move that into the main search loop and avoid parsing
the string twice. Not sure what's better.

> +        str_elen = k;
> +    } else {
> +        str_elen = nelem;
> +    }
> +
> +    substr_0 = s390_vec_read_element(v3, 0, es);
> +
> +    for (k = 0; ; k++) {
> +        for (; k < str_elen; k++) {
> +            if (s390_vec_read_element(v2, k, es) == substr_0) {
> +                break;
> +            }
> +        }
> +
> +        /* If we reached the end of the string, no match. */
> +        if (k == str_elen) {
> +            cc = eos; /* no match (with or without zero char) */
> +            goto done;
> +        }
> +
> +        /* If the substring is only one char, match. */
> +        if (substr_elen == 1) {
> +            cc = 2; /* full match */
> +            goto done;
> +        }
> +
> +        /* If the match begins at the last char, we have a partial match. */
> +        if (k == str_elen - 1) {
> +            cc = 3; /* partial match */
> +            goto done;
> +        }
> +
> +        i = MIN(nelem, k + substr_elen);
> +        for (j = k + 1; j < i; j++) {
> +            uint32_t e2 = s390_vec_read_element(v2, j, es);
> +            uint32_t e3 = s390_vec_read_element(v3, j - k, es);
> +            if (e2 != e3) {
> +                break;
> +            }
> +        }
> +        if (j == i) {
> +            /* Matched up until "end". */
> +            cc = i - k == substr_elen ? 2 : 3; /* full or partial match */
> +            goto done;
> +        }
> +    }
> +
> + done:
> +    s390_vec_write_element64(v1, 0, k << es);
> +    s390_vec_write_element64(v1, 1, 0);
> +    return cc;
> +}
> +
> +#define DEF_VSTRS_HELPER(BITS)                                             \
> +void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2,      \
> +    const void *v3, const void *v4, CPUS390XState *env, uint32_t desc)     \
> +    { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); }              \
> +void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2,    \
> +    const void *v3, const void *v4, CPUS390XState *env, uint32_t desc)     \
> +    { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); }
> +
> +DEF_VSTRS_HELPER(8)
> +DEF_VSTRS_HELPER(16)
> +DEF_VSTRS_HELPER(32)
> diff --git a/target/s390x/tcg/translate_vx.c.inc 
> b/target/s390x/tcg/translate_vx.c.inc
> index ea28e40d4f..d514e8b218 100644
> --- a/target/s390x/tcg/translate_vx.c.inc
> +++ b/target/s390x/tcg/translate_vx.c.inc
> @@ -2497,6 +2497,32 @@ static DisasJumpType op_vstrc(DisasContext *s, 
> DisasOps *o)
>      return DISAS_NEXT;
>  }
>  
> +static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
> +{
> +    typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr,
> +                                 TCGv_ptr, TCGv_ptr, TCGv_i32);
> +    static const helper_vstrs fns[3][2] = {
> +        { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 },
> +        { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 },
> +        { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 },
> +    };
> +

Superfluous empty line.

> +    const uint8_t m5 = get_field(s, m5);

Could so a s/m5/es/ , as we do it in other handlers.

> +    const uint8_t m6 = get_field(s, m6);
> +    bool zs = m6 & 2;

I remember we wanted to use extract32() for such bit-tests, at least we
do it in most of the other handlers :)

const bool zs = extract32(m6, 1, 1);

?

> +
> +    if (m5 > ES_32 || m6 & ~2) {
> +        gen_program_exception(s, PGM_SPECIFICATION);
> +        return DISAS_NORETURN;
> +    }
> +



-- 
Thanks,

David / dhildenb




reply via email to

[Prev in Thread] Current Thread [Next in Thread]