qemu-s390x
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 07/11] target/s390x: vxeh2: vector {load, store} byte reve


From: David Hildenbrand
Subject: Re: [PATCH v3 07/11] target/s390x: vxeh2: vector {load, store} byte reversed elements
Date: Mon, 21 Mar 2022 12:45:06 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.6.2

On 08.03.22 02:53, Richard Henderson wrote:
> From: David Miller <dmiller423@gmail.com>
> 
> Signed-off-by: David Miller <dmiller423@gmail.com>
> Message-Id: <20220307020327.3003-6-dmiller423@gmail.com>
> [rth: Split out elements (plural) from element (scalar)
>       Use tcg little-endian memory ops, plus hswap and wswap.]
> Signed-off-by: Richard Henderson <richard.henderson@linar.org>
> ---
>  target/s390x/tcg/translate_vx.c.inc | 101 ++++++++++++++++++++++++++++
>  target/s390x/tcg/insn-data.def      |   4 ++
>  2 files changed, 105 insertions(+)
> 
> diff --git a/target/s390x/tcg/translate_vx.c.inc 
> b/target/s390x/tcg/translate_vx.c.inc
> index ac807122a3..9a82401d71 100644
> --- a/target/s390x/tcg/translate_vx.c.inc
> +++ b/target/s390x/tcg/translate_vx.c.inc
> @@ -457,6 +457,56 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps 
> *o)
>      return DISAS_NEXT;
>  }
>  
> +static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
> +{
> +    const uint8_t es = get_field(s, m3);
> +    TCGv_i64 t0, t1, tt;
> +
> +    if (es < ES_16 || es > ES_128) {
> +        gen_program_exception(s, PGM_SPECIFICATION);
> +        return DISAS_NORETURN;
> +    }
> +
> +    t0 = tcg_temp_new_i64();
> +    t1 = tcg_temp_new_i64();
> +
> +    /* Begin with byte reversed doublewords... */
> +    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
> +    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
> +    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
> +

Would it make sense to just special-case ES_128, by loading them into
the proper t0/t1 right away?

if (es == ES_128) {
    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
    goto write;
}

/* Begin with byte reversed doublewords... */
tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);

/*
 * For 16 and 32-bit elements, the doubleword bswap also reversed
 * the order of the elements.  Perform a larger order swap to put
 * them back into place.
 */
switch (es) {
...
}

write:
write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);

> +    /*
> +     * For 16 and 32-bit elements, the doubleword bswap also reversed
> +     * the order of the elements.  Perform a larger order swap to put
> +     * them back into place.  For the 128-bit "element", finish the
> +     * bswap by swapping the doublewords.
> +     */
> +    switch (es) {
> +    case ES_16:
> +        tcg_gen_hswap_i64(t0, t0);
> +        tcg_gen_hswap_i64(t1, t1);
> +        break;
> +    case ES_32:
> +        tcg_gen_wswap_i64(t0, t0);
> +        tcg_gen_wswap_i64(t1, t1);
> +        break;
> +    case ES_64:
> +        break;
> +    case ES_128:
> +        tt = t0, t0 = t1, t1 = tt;
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
> +    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
> +
> +    tcg_temp_free(t0);
> +    tcg_temp_free(t1);
> +    return DISAS_NEXT;
> +}
> +
>  static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
>  {
>      const uint8_t es = s->insn->data;
> @@ -998,6 +1048,57 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps 
> *o)
>      return DISAS_NEXT;
>  }
>  
> +static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
> +{
> +    const uint8_t es = get_field(s, m3);
> +    TCGv_i64 t0, t1, tt;
> +
> +    if (es < ES_16 || es > ES_128) {
> +        gen_program_exception(s, PGM_SPECIFICATION);
> +        return DISAS_NORETURN;
> +    }
> +
> +    /* Probe write access before actually modifying memory */
> +    gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
> +
> +    t0 = tcg_temp_new_i64();
> +    t1 = tcg_temp_new_i64();
> +    read_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
> +    read_vec_element_i64(t1, get_field(s, v1), 1, ES_64);


Dito, eventually just special case on MO_128 directly.

> +
> +    /*
> +     * For 16 and 32-bit elements, the doubleword bswap below will
> +     * reverse the order of the elements.  Perform a larger order
> +     * swap to put them back into place.  For the 128-bit "element",
> +     * finish the bswap by swapping the doublewords.
> +     */
> +    switch (es) {
> +    case MO_16:
> +        tcg_gen_hswap_i64(t0, t0);
> +        tcg_gen_hswap_i64(t1, t1);
> +        break;
> +    case MO_32:
> +        tcg_gen_wswap_i64(t0, t0);
> +        tcg_gen_wswap_i64(t1, t1);
> +        break;
> +    case MO_64:
> +        break;
> +    case MO_128:
> +        tt = t0, t0 = t1, t1 = tt;
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
> +    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
> +    tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
> +
> +    tcg_temp_free(t0);
> +    tcg_temp_free(t1);
> +    return DISAS_NEXT;
> +}
> +
>  static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
>  {
>      const uint8_t es = s->insn->data;
> diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
> index b524541a7d..ee6e1dc9e5 100644
> --- a/target/s390x/tcg/insn-data.def
> +++ b/target/s390x/tcg/insn-data.def
> @@ -1027,6 +1027,8 @@
>      F(0xe756, VLR,     VRR_a, V,   0, 0, 0, 0, vlr, 0, IF_VEC)
>  /* VECTOR LOAD AND REPLICATE */
>      F(0xe705, VLREP,   VRX,   V,   la2, 0, 0, 0, vlrep, 0, IF_VEC)
> +/* VECTOR LOAD BYTE REVERSED ELEMENTS */
> +    F(0xe606, VLBR,    VRX,   VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC)
>  /* VECTOR LOAD ELEMENT */
>      E(0xe700, VLEB,    VRX,   V,   la2, 0, 0, 0, vle, 0, ES_8, IF_VEC)
>      E(0xe701, VLEH,    VRX,   V,   la2, 0, 0, 0, vle, 0, ES_16, IF_VEC)
> @@ -1079,6 +1081,8 @@
>      F(0xe75f, VSEG,    VRR_a, V,   0, 0, 0, 0, vseg, 0, IF_VEC)
>  /* VECTOR STORE */
>      F(0xe70e, VST,     VRX,   V,   la2, 0, 0, 0, vst, 0, IF_VEC)
> +/* VECTOR STORE BYTE REVERSED ELEMENTS */
> +    F(0xe60e, VSTBR,    VRX,   VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC)
>  /* VECTOR STORE ELEMENT */
>      E(0xe708, VSTEB,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_8, IF_VEC)
>      E(0xe709, VSTEH,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)


-- 
Thanks,

David / dhildenb




reply via email to

[Prev in Thread] Current Thread [Next in Thread]