qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld


From: Richard Henderson
Subject: Re: [Qemu-devel] [RFC PATCH v1 06/22] target/i386: introduce gen_gvec_ld_modrm_* helpers
Date: Wed, 31 Jul 2019 15:47:30 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.8.0

On 7/31/19 10:56 AM, Jan Bobek wrote:
> +static inline void gen_gvec_ld_modrm_2(CPUX86State *env, DisasContext *s,
> +                                       int modrm, unsigned vece,
> +                                       uint32_t oprsz, uint32_t maxsz,
> +                                       gen_ld_modrm_2_fp_t gen_ld_modrm_2_fp,
> +                                       gen_gvec_2_fp_t gen_gvec_2_fp,
> +                                       int opctl)
> +{
> +    uint32_t ofss[2];
> +
> +    const int opd = ((opctl >> 6) & 7) - 1;
> +    const int opa = ((opctl >> 3) & 7) - 1;
> +    const int opb = ((opctl >> 0) & 7) - 1;
> +
> +    assert(0 <= opd && opd < 2);
> +    assert(0 <= opa && opa < 2);
> +    assert(0 <= opb && opb < 2);
> +
> +    (*gen_ld_modrm_2_fp)(env, s, modrm, &ofss[0], &ofss[1]);
> +    (*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
> +}
> +
> +static inline void gen_gvec_ld_modrm_3(CPUX86State *env, DisasContext *s,
> +                                       int modrm, unsigned vece,
> +                                       uint32_t oprsz, uint32_t maxsz,
> +                                       gen_ld_modrm_3_fp_t gen_ld_modrm_3_fp,
> +                                       gen_gvec_2_fp_t gen_gvec_2_fp,
> +                                       int opctl)
> +{
> +    uint32_t ofss[3];
> +
> +    const int opd = ((opctl >> 6) & 7) - 1;
> +    const int opa = ((opctl >> 3) & 7) - 1;
> +    const int opb = ((opctl >> 0) & 7) - 1;
> +
> +    assert(0 <= opd && opd < 3);
> +    assert(0 <= opa && opa < 3);
> +    assert(0 <= opb && opb < 3);
> +
> +    (*gen_ld_modrm_3_fp)(env, s, modrm, &ofss[0], &ofss[1], &ofss[2]);
> +    (*gen_gvec_2_fp)(vece, ofss[opd], ofss[opa], ofss[opb], oprsz, maxsz);
> +}
> +> +#define gen_gvec_ld_modrm_mm(env, s, modrm, vece,                       
> \> +
                            gen_gvec_2_fp, opctl)                      \> +
gen_gvec_ld_modrm_2((env), (s), (modrm), (vece),                    \> +
                 sizeof(MMXReg), sizeof(MMXReg),                 \> +
              gen_ld_modrm_PqQq,                              \> +
           gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_xmm(env, s,
modrm, vece,                      \> +
gen_gvec_2_fp, opctl)                     \> +    gen_gvec_ld_modrm_2((env),
(s), (modrm), (vece),                    \> +
sizeof(XMMReg), sizeof(XMMReg),                 \> +
gen_ld_modrm_VxWx,                              \> +
gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_vxmm(env, s, modrm,
vece,                     \> +                               gen_gvec_2_fp,
opctl)                    \> +    gen_gvec_ld_modrm_3((env), (s), (modrm),
(vece),                    \> +                        sizeof(XMMReg),
sizeof(ZMMReg),                 \> +
gen_ld_modrm_VxHxWx,                            \> +
gen_gvec_2_fp, (opctl))> +> +#define gen_gvec_ld_modrm_vymm(env, s, modrm,
vece,                     \> +                               gen_gvec_2_fp,
opctl)                    \> +    gen_gvec_ld_modrm_3((env), (s), (modrm),
(vece),                    \> +                        sizeof(YMMReg),
sizeof(ZMMReg),                 \> +
gen_ld_modrm_VxHxWx,                            \> +
gen_gvec_2_fp, (opctl))

I suppose there aren't so many different combinations, but did you consider
separate callbacks per operand?  If you have

typedef unsigned (*gen_offset)(CPUX86State *, DisasContext *, int);

static unsigned offset_Pq(CPUX86State *env, DisasContext *s, int modrm)
{
    int reg = (modrm >> 3) & 7; /* Ignore REX_R */
    return offsetof(CPUX86State, fpregs[reg].mmx);
}

static unsigned offset_Qq(CPUX86State *env, DisasContext *s, int modrm)
{
    int mod = (modrm >> 6) & 3;
    unsigned ret;

    if (mod == 3) {
        int rm = modrm & 7; /* Ignore REX_B */
        ret = offsetof(CPUX86State, fpregs[rm].mmx);
    } else {
        ret = offsetof(CPUX86State, mmx_t0);
        gen_lea_modrm(env, s, modrm);
        gen_ldq_env_A0(s, ret);
    }
    return ret;
}

static unsigned offset_Vx(CPUX86State *env, DisasContext *s, int modrm)
{
    int reg = ((modrm >> 3) & 7) | REX_R(s);
    return offsetof(CPUX86State, xmm_regs[reg]);
}

static unsigned offset_Wx(CPUX86State *env, DisasContext *s, int modrm)
{
    int mod = (modrm >> 6) & 3;
    unsigned ret;

    if (mod == 3) {
        int rm = (modrm & 7) | REX_B(s);
        ret = offsetof(CPUX86State, xmm_regs[rm]);
    } else {
        ret = offsetof(CPUX86State, xmm_t0);
        gen_lea_modrm(env, s, modrm);
        gen_ldo_env_A0(s, ret);
    }
    return ret;
}

static unsigned offset_Hx(CPUX86State *env, DisasContext *s, int modrm)
{
    return offsetof(CPUX86State, xmm_regs[s->vex_v]);
}

Then you can have

#define GEN_GVEC_3(OP0, OP1, OP2, OPRSZ, MAXSZ)
static void gen_gvec_ld_modrm_##OP0##OP1##OP2(CPUX86State *env,      \
    DisasContext *s, int modrm, unsigned vece,  gen_gvec_2_fp_t gen) \
{                                               \
    int ofd = offset_##OP0(env, s, modrm);      \
    int of1 = offset_##OP1(env, s, modrm);      \
    int of2 = offset_##OP2(env, s, modrm);      \
    gen(vece, opd, opa, opb, OPRSZ, MAXSZ);     \
}

GEN_GVEC_3(Pq, Pq, Qq, sizeof(MMXReg), sizeof(MMXReg))
GEN_GVEC_3(Vx, Vx, Wx, sizeof(XMMReg), max_vec_size(s))
GEN_GVEC_3(Vx, Hx, Wx, sizeof(XMMReg), max_vec_size(s))

The PqPqQq and VxVxWx sub-strings aren't quite canonical, but imo a better fit
to the actual format of the instruction, with 2 inputs and 1 output.

You can also do

GEN_GVEC_3(Pq, Qq, Pq, sizeof(MMXReg), sizeof(MMXReg))

for those rare "reversed" operations like PANDN.  Now you don't need to carry
around the OPCTL argument, which I initially found non-obvious.

I initially thought you'd be able to infer maxsz from the set of arguments, but
since there are vex encoded operations that do not use vex.vvvv that is not
always the case.  Thus I suggest

static size_t max_vec_size(DisasContext *s)
{
    if (s->prefixes & PREFIX_VEX) {
        /*
         * TODO: When avx512 is supported and enabled, sizeof(ZMMReg).
         * In the meantime don't waste time zeroing data that is not
         * architecturally present.
         */
        return sizeof(YMMReg);
    } else {
        /* Without vex encoding, only the low 128 bits are modified. */
        return sizeof(XMMReg);
    }
}


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]