qemu-s390x
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Fa


From: David Hildenbrand
Subject: Re: [PATCH] s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x
Date: Tue, 15 Feb 2022 09:34:33 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.4.0

> 
> +/* move right to left */
> +void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t 
> src)
> +{
> +    const int mmu_idx = cpu_mmu_index(env, false);
> +    const uint64_t ra = GETPC();
> +    S390Access srca, desta;
> +    int32_t i;
> +
> +    /* MVCRL always copies one more byte than specified - maximum is 256 */
> +    l++;
> +
> +    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
> +    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
> +

... thinking again, this might not be correct.

Problem 1: is_destructive_overlap()

is_destructive_overlap() essentially checks for
        dest > src && dest <= src + len - 1;

but the rules for right-to-left are inverted I think
        src > dst && src <= dst + len - 1;

Could fix by inverting the parameters supplied to is_destructive_overlap(). 
Or we should just drop special casing completely because we don't care about
destructive overlaps. :)


Problem 2: access_memmove()

It essentially does a left-to-right copy
* On the slow path
* On the fast path by handling data on the left pages always first

The second part can be observed when crossing page boundaries.


To fix, the simplest solution is a simple unconditional

for (i = l - 1; i >= 0; i--) {
        uint8_t byte = access_get_byte(env, &srca, i, ra);
        access_set_byte(env, &desta, i, byte, ra);
}

The alternative is providing an access_memmove_rl and using that 
unconditionally:

/*
 * Move data with right-to-left semantics. Undefined behavior on destructive 
overlaps.
 */
static void access_memmove_rl(CPUS390XState *env, S390Access *desta,
                              S390Access *srca, uintptr_t ra)
{
    int diff;

    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);

    /* Fallback to slow access in case we don't have access to all host pages */
    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
        int i;

        for (i = desta->size1 + desta->size2 - 1; i >= 0; i--) {
            uint8_t byte = access_get_byte(env, srca, i, ra);

            access_set_byte(env, desta, i, byte, ra);
        }
        return;
    }

    if (srca->size1 == desta->size1) {
        if (unlikely(srca->size2)) {
            memmove(desta->haddr2, srca->haddr2, srca->size2);
        }
        memmove(desta->haddr1, srca->haddr1, srca->size1);
    } else if (srca->size1 < desta->size1) {
        diff = desta->size1 - srca->size1;
        if (likely(desta->size2)) {
            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
        }
        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
        memmove(desta->haddr1, srca->haddr1, srca->size1);
    } else {
        diff = srca->size1 - desta->size1;
        if (likely(srca->size2)) {
            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
        }
        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
        memmove(desta->haddr1, srca->haddr1, desta->size1);
    }
}

Please double check :)

You can adjust your test program to cross a page boundary to see if it works as 
expected.


> +    if (!is_destructive_overlap(env, dest, src, l)) {
> +        access_memmove(env, &desta, &srca, ra);
> +    } else {
> +        for (i = l - 1; i >= 0; i--) {
> +            uint8_t byte = access_get_byte(env, &srca, i, ra);
> +            access_set_byte(env, &desta, i, byte, ra);
> +        }
> +    }
> +}
> +

[...]

>  static DisasJumpType op_mvcin(DisasContext *s, DisasOps *o)
>  {
>      TCGv_i32 l = tcg_const_i32(get_field(s, l1));
> @@ -3744,7 +3790,17 @@ static DisasJumpType op_pku(DisasContext *s, DisasOps 
> *o)
> 
>  static DisasJumpType op_popcnt(DisasContext *s, DisasOps *o)
>  {
> -    gen_helper_popcnt(o->out, o->in2);
> +    const uint8_t m3 = get_field(s, m3);
> +
> +    if (!m3) {
> +        gen_helper_popcnt(o->out, o->in2);
> +        return DISAS_NEXT;
> +    }
> +    if ((m3 & ~1) || !s390_has_feat(S390_FEAT_MISC_INSTRUCTION_EXT3)) {
> +        gen_program_exception(s, PGM_SPECIFICATION);
> +        return DISAS_NORETURN;
> +    }

Actually the instruction doesn't raise specification exceptions.


if (s390_has_feat(S390_FEAT_MISC_INSTRUCTION_EXT3 && (m3 & 1)) {
        tcg_gen_ctpop_i64(o->out, o->in2)
} else {
        gen_helper_popcnt(o->out, o->in2);
}
return DISAS_NEXT;

> +    tcg_gen_ctpop_i64(o->out, o->in2);
>      return DISAS_NEXT;


-- 
Thanks,

David / dhildenb




reply via email to

[Prev in Thread] Current Thread [Next in Thread]