[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-ppc] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds
From: |
Tom Musta |
Subject: |
[Qemu-ppc] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds |
Date: |
Wed, 6 Nov 2013 14:31:54 -0600 |
This patch adds the Single Precision VSX Scalar Fused Multiply-Add
instructions: xsmaddasp, xsmaddmsp, xssubasp, xssubmsp, xsnmaddasp,
xsnmaddmsp, xsnmsubasp, xsnmsubmsp.
The existing VSX_MADD() macro is modified to support rounding of the
intermediate double precision result to single precision.
Signed-off-by: Tom Musta <address@hidden>
---
target-ppc/fpu_helper.c | 70 +++++++++++++++++++++++++++++------------------
target-ppc/helper.h | 8 +++++
target-ppc/translate.c | 16 +++++++++++
3 files changed, 67 insertions(+), 27 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index e08f317..c86778f 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2198,7 +2198,7 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, f32, -126, 23)
* afrm - A form (1=A, 0=M)
* sfprf - set FPRF
*/
-#define VSX_MADD(op, nels, tp, fld, maddflgs, afrm, sfprf) \
+#define VSX_MADD(op, nels, tp, fld, maddflgs, afrm, sfprf, r2sp) \
void helper_##op(CPUPPCState *env, uint32_t opcode) \
{ \
ppc_vsr_t xt_in, xa, xb, xt_out; \
@@ -2248,6 +2248,13 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)
\
fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf); \
} \
} \
+ \
+ if (r2sp) { \
+ float32 tmp32 = float64_to_float32(xt_out.fld[i], \
+ &env->fp_status); \
+ xt_out.fld[i] = float32_to_float64(tmp32, &env->fp_status); \
+ } \
+ \
if (sfprf) { \
helper_compute_fprf(env, xt_out.fld[i], sfprf); \
} \
@@ -2261,32 +2268,41 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)
\
#define NMADD_FLGS float_muladd_negate_result
#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result)
-VSX_MADD(xsmaddadp, 1, float64, f64, MADD_FLGS, 1, 1)
-VSX_MADD(xsmaddmdp, 1, float64, f64, MADD_FLGS, 0, 1)
-VSX_MADD(xsmsubadp, 1, float64, f64, MSUB_FLGS, 1, 1)
-VSX_MADD(xsmsubmdp, 1, float64, f64, MSUB_FLGS, 0, 1)
-VSX_MADD(xsnmaddadp, 1, float64, f64, NMADD_FLGS, 1, 1)
-VSX_MADD(xsnmaddmdp, 1, float64, f64, NMADD_FLGS, 0, 1)
-VSX_MADD(xsnmsubadp, 1, float64, f64, NMSUB_FLGS, 1, 1)
-VSX_MADD(xsnmsubmdp, 1, float64, f64, NMSUB_FLGS, 0, 1)
-
-VSX_MADD(xvmaddadp, 2, float64, f64, MADD_FLGS, 1, 0)
-VSX_MADD(xvmaddmdp, 2, float64, f64, MADD_FLGS, 0, 0)
-VSX_MADD(xvmsubadp, 2, float64, f64, MSUB_FLGS, 1, 0)
-VSX_MADD(xvmsubmdp, 2, float64, f64, MSUB_FLGS, 0, 0)
-VSX_MADD(xvnmaddadp, 2, float64, f64, NMADD_FLGS, 1, 0)
-VSX_MADD(xvnmaddmdp, 2, float64, f64, NMADD_FLGS, 0, 0)
-VSX_MADD(xvnmsubadp, 2, float64, f64, NMSUB_FLGS, 1, 0)
-VSX_MADD(xvnmsubmdp, 2, float64, f64, NMSUB_FLGS, 0, 0)
-
-VSX_MADD(xvmaddasp, 4, float32, f32, MADD_FLGS, 1, 0)
-VSX_MADD(xvmaddmsp, 4, float32, f32, MADD_FLGS, 0, 0)
-VSX_MADD(xvmsubasp, 4, float32, f32, MSUB_FLGS, 1, 0)
-VSX_MADD(xvmsubmsp, 4, float32, f32, MSUB_FLGS, 0, 0)
-VSX_MADD(xvnmaddasp, 4, float32, f32, NMADD_FLGS, 1, 0)
-VSX_MADD(xvnmaddmsp, 4, float32, f32, NMADD_FLGS, 0, 0)
-VSX_MADD(xvnmsubasp, 4, float32, f32, NMSUB_FLGS, 1, 0)
-VSX_MADD(xvnmsubmsp, 4, float32, f32, NMSUB_FLGS, 0, 0)
+VSX_MADD(xsmaddadp, 1, float64, f64, MADD_FLGS, 1, 1, 0)
+VSX_MADD(xsmaddmdp, 1, float64, f64, MADD_FLGS, 0, 1, 0)
+VSX_MADD(xsmsubadp, 1, float64, f64, MSUB_FLGS, 1, 1, 0)
+VSX_MADD(xsmsubmdp, 1, float64, f64, MSUB_FLGS, 0, 1, 0)
+VSX_MADD(xsnmaddadp, 1, float64, f64, NMADD_FLGS, 1, 1, 0)
+VSX_MADD(xsnmaddmdp, 1, float64, f64, NMADD_FLGS, 0, 1, 0)
+VSX_MADD(xsnmsubadp, 1, float64, f64, NMSUB_FLGS, 1, 1, 0)
+VSX_MADD(xsnmsubmdp, 1, float64, f64, NMSUB_FLGS, 0, 1, 0)
+
+VSX_MADD(xsmaddasp, 1, float64, f64, MADD_FLGS, 1, 1, 1)
+VSX_MADD(xsmaddmsp, 1, float64, f64, MADD_FLGS, 0, 1, 1)
+VSX_MADD(xsmsubasp, 1, float64, f64, MSUB_FLGS, 1, 1, 1)
+VSX_MADD(xsmsubmsp, 1, float64, f64, MSUB_FLGS, 0, 1, 1)
+VSX_MADD(xsnmaddasp, 1, float64, f64, NMADD_FLGS, 1, 1, 1)
+VSX_MADD(xsnmaddmsp, 1, float64, f64, NMADD_FLGS, 0, 1, 1)
+VSX_MADD(xsnmsubasp, 1, float64, f64, NMSUB_FLGS, 1, 1, 1)
+VSX_MADD(xsnmsubmsp, 1, float64, f64, NMSUB_FLGS, 0, 1, 1)
+
+VSX_MADD(xvmaddadp, 2, float64, f64, MADD_FLGS, 1, 0, 0)
+VSX_MADD(xvmaddmdp, 2, float64, f64, MADD_FLGS, 0, 0, 0)
+VSX_MADD(xvmsubadp, 2, float64, f64, MSUB_FLGS, 1, 0, 0)
+VSX_MADD(xvmsubmdp, 2, float64, f64, MSUB_FLGS, 0, 0, 0)
+VSX_MADD(xvnmaddadp, 2, float64, f64, NMADD_FLGS, 1, 0, 0)
+VSX_MADD(xvnmaddmdp, 2, float64, f64, NMADD_FLGS, 0, 0, 0)
+VSX_MADD(xvnmsubadp, 2, float64, f64, NMSUB_FLGS, 1, 0, 0)
+VSX_MADD(xvnmsubmdp, 2, float64, f64, NMSUB_FLGS, 0, 0, 0)
+
+VSX_MADD(xvmaddasp, 4, float32, f32, MADD_FLGS, 1, 0, 0)
+VSX_MADD(xvmaddmsp, 4, float32, f32, MADD_FLGS, 0, 0, 0)
+VSX_MADD(xvmsubasp, 4, float32, f32, MSUB_FLGS, 1, 0, 0)
+VSX_MADD(xvmsubmsp, 4, float32, f32, MSUB_FLGS, 0, 0, 0)
+VSX_MADD(xvnmaddasp, 4, float32, f32, NMADD_FLGS, 1, 0, 0)
+VSX_MADD(xvnmaddmsp, 4, float32, f32, NMADD_FLGS, 0, 0, 0)
+VSX_MADD(xvnmsubasp, 4, float32, f32, NMSUB_FLGS, 1, 0, 0)
+VSX_MADD(xvnmsubmsp, 4, float32, f32, NMSUB_FLGS, 0, 0, 0)
#define VSX_SCALAR_CMP(op, ordered) \
void helper_##op(CPUPPCState *env, uint32_t opcode) \
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 84c6ee1..655b670 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -293,6 +293,14 @@ DEF_HELPER_2(xsdivsp, void, env, i32)
DEF_HELPER_2(xsresp, void, env, i32)
DEF_HELPER_2(xssqrtsp, void, env, i32)
DEF_HELPER_2(xsrsqrtesp, void, env, i32)
+DEF_HELPER_2(xsmaddasp, void, env, i32)
+DEF_HELPER_2(xsmaddmsp, void, env, i32)
+DEF_HELPER_2(xsmsubasp, void, env, i32)
+DEF_HELPER_2(xsmsubmsp, void, env, i32)
+DEF_HELPER_2(xsnmaddasp, void, env, i32)
+DEF_HELPER_2(xsnmaddmsp, void, env, i32)
+DEF_HELPER_2(xsnmsubasp, void, env, i32)
+DEF_HELPER_2(xsnmsubmsp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ae80289..672cf0a 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7348,6 +7348,14 @@ GEN_VSX_HELPER_2(xsdivsp, 0x00, 0x03, 0, PPC2_VSX207)
GEN_VSX_HELPER_2(xsresp, 0x14, 0x01, 0, PPC2_VSX207)
GEN_VSX_HELPER_2(xssqrtsp, 0x16, 0x00, 0, PPC2_VSX207)
GEN_VSX_HELPER_2(xsrsqrtesp, 0x14, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmaddasp, 0x04, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmaddmsp, 0x04, 0x01, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmsubasp, 0x04, 0x02, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmsubmsp, 0x04, 0x03, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmaddasp, 0x04, 0x10, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmaddmsp, 0x04, 0x11, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmsubasp, 0x04, 0x12, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmsubmsp, 0x04, 0x13, 0, PPC2_VSX207)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -10163,6 +10171,14 @@ GEN_XX3FORM(xsdivsp, 0x00, 0x03, PPC2_VSX207),
GEN_XX2FORM(xsresp, 0x14, 0x01, PPC2_VSX207),
GEN_XX2FORM(xssqrtsp, 0x16, 0x00, PPC2_VSX207),
GEN_XX2FORM(xsrsqrtesp, 0x14, 0x00, PPC2_VSX207),
+GEN_XX3FORM(xsmaddasp, 0x04, 0x00, PPC2_VSX207),
+GEN_XX3FORM(xsmaddmsp, 0x04, 0x01, PPC2_VSX207),
+GEN_XX3FORM(xsmsubasp, 0x04, 0x02, PPC2_VSX207),
+GEN_XX3FORM(xsmsubmsp, 0x04, 0x03, PPC2_VSX207),
+GEN_XX3FORM(xsnmaddasp, 0x04, 0x10, PPC2_VSX207),
+GEN_XX3FORM(xsnmaddmsp, 0x04, 0x11, PPC2_VSX207),
+GEN_XX3FORM(xsnmsubasp, 0x04, 0x12, PPC2_VSX207),
+GEN_XX3FORM(xsnmsubmsp, 0x04, 0x13, PPC2_VSX207),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
--
1.7.1
- [Qemu-ppc] [PATCH 02/14] VSX Stage 4: Refactor lxsdx, (continued)
- [Qemu-ppc] [PATCH 02/14] VSX Stage 4: Refactor lxsdx, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 04/14] VSX Stage 4: Refactor stxsdx, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 05/14] VSX Stage 4: Add stxsiwx and stxsspx, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 07/14] VSX Stage 4: Add xsmulsp, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 06/14] VSX Stage 4: Add xsaddsp and xssubsp, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 08/14] VSX Stage 4: Add xsdivsp, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 10/14] VSX Stage 4: Add xssqrtsp, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 09/14] VSX Stage 4: Add xsresp, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 11/14] VSX Stage 4: add xsrsqrtesp, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 13/14] VSX Stage 4: Add xscvsxdsp and xscvuxdsp, Tom Musta, 2013/11/06
- [Qemu-ppc] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds,
Tom Musta <=
- Re: [Qemu-ppc] [Qemu-devel] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds, Richard Henderson, 2013/11/07
- Re: [Qemu-ppc] [Qemu-devel] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds, Richard Henderson, 2013/11/07
- Re: [Qemu-ppc] [Qemu-devel] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds, Richard Henderson, 2013/11/07
- Re: [Qemu-ppc] [Qemu-devel] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds, Tom Musta, 2013/11/13
- Re: [Qemu-ppc] [Qemu-devel] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds, Richard Henderson, 2013/11/13
- Re: [Qemu-ppc] [Qemu-devel] [PATCH 12/14] VSX Stage 4: Add Scalar SP Fused Multiply-Adds, Tom Musta, 2013/11/14
[Qemu-ppc] [PATCH 14/14] VSX Stage 4: Add xxleqv, xxlnand and xxlorc, Tom Musta, 2013/11/06
Re: [Qemu-ppc] [Qemu-devel] [PATCH 00/14] VSX Stage 4, Richard Henderson, 2013/11/07