[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 19/23] i386: Destructive FP helpers for AVX
From: |
Paolo Bonzini |
Subject: |
[PATCH v3 19/23] i386: Destructive FP helpers for AVX |
Date: |
Thu, 1 Sep 2022 09:48:38 +0200 |
From: Paul Brook <paul@nowt.org>
Perpare the horizontal atithmetic vector helpers for AVX
These currently use a dummy Reg typed variable to store the result then
assign the whole register. This will cause 128 bit operations to corrupt
the upper half of the register, so replace it with explicit temporaries
and element assignments.
Signed-off-by: Paul Brook <paul@nowt.org>
Message-Id: <20220424220204.2493824-18-paul@nowt.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/ops_sse.h | 93 ++++++++++++++++++-------------------------
1 file changed, 39 insertions(+), 54 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 59ed30071e..61722fe4a2 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -945,45 +927,49 @@ void helper_insertq_i(CPUX86State *env, ZMMReg *d, int
index, int length)
d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length);
}
-void glue(helper_haddps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
+#define SSE_HELPER_HPS(name, F) \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+ Reg *v = d; \
+ float32 r[2 << SHIFT]; \
+ int i, j, k; \
+ for (k = 0; k < 2 << SHIFT; k += LANE_WIDTH / 4) { \
+ for (i = j = 0; j < 4; i++, j += 2) { \
+ r[i + k] = F(v->ZMM_S(j + k), v->ZMM_S(j + k + 1),
&env->sse_status); \
+ } \
+ for (j = 0; j < 4; i++, j += 2) { \
+ r[i + k] = F(s->ZMM_S(j + k), s->ZMM_S(j + k + 1),
&env->sse_status); \
+ } \
+ } \
+ for (i = 0; i < 2 << SHIFT; i++) { \
+ d->ZMM_S(i) = r[i]; \
+ } \
}
-void glue(helper_haddpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
+SSE_HELPER_HPS(haddps, float32_add)
+SSE_HELPER_HPS(hsubps, float32_sub)
- r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
+#define SSE_HELPER_HPD(name, F) \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+ Reg *v = d; \
+ float64 r[1 << SHIFT]; \
+ int i, j, k; \
+ for (k = 0; k < 1 << SHIFT; k += LANE_WIDTH / 8) { \
+ for (i = j = 0; j < 2; i++, j += 2) { \
+ r[i + k] = F(v->ZMM_D(j + k), v->ZMM_D(j + k + 1),
&env->sse_status); \
+ } \
+ for (j = 0; j < 2; i++, j += 2) { \
+ r[i + k] = F(s->ZMM_D(j + k), s->ZMM_D(j + k + 1),
&env->sse_status); \
+ } \
+ } \
+ for (i = 0; i < 1 << SHIFT; i++) { \
+ d->ZMM_D(i) = r[i]; \
+ } \
}
-void glue(helper_hsubps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
-}
-
-void glue(helper_hsubpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
- ZMMReg r;
-
- r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
-}
+SSE_HELPER_HPD(haddpd, float64_add)
+SSE_HELPER_HPD(hsubpd, float64_sub)
void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
--
2.37.1
- Re: [PATCH v3 10/23] i386: do not cast gen_helper_* function pointers, (continued)
- [PATCH v3 11/23] i386: Add CHECK_NO_VEX, Paolo Bonzini, 2022/09/01
- [PATCH v3 18/23] i386: Dot product AVX helper prep, Paolo Bonzini, 2022/09/01
- [PATCH v3 21/23] i386: Rewrite blendv helpers, Paolo Bonzini, 2022/09/01
- [PATCH v3 12/23] i386: Rewrite vector shift helper, Paolo Bonzini, 2022/09/01
- [PATCH v3 14/23] i386: Misc integer AVX helper prep, Paolo Bonzini, 2022/09/01
- [PATCH v3 13/23] i386: Rewrite simple integer vector helpers, Paolo Bonzini, 2022/09/01
- [PATCH v3 17/23] i386: reimplement AVX comparison helpers, Paolo Bonzini, 2022/09/01
- [PATCH v3 20/23] i386: Misc AVX helper prep, Paolo Bonzini, 2022/09/01
- [PATCH v3 19/23] i386: Destructive FP helpers for AVX,
Paolo Bonzini <=
- [PATCH v3 15/23] i386: Destructive vector helpers for AVX, Paolo Bonzini, 2022/09/01
- [PATCH v3 16/23] i386: Floating point arithmetic helper AVX prep, Paolo Bonzini, 2022/09/01
- [PATCH v3 22/23] i386: AVX pclmulqdq prep, Paolo Bonzini, 2022/09/01
- [PATCH v3 23/23] i386: AVX+AES helpers prep, Paolo Bonzini, 2022/09/01