[RFC v2 63/76] target/riscv: rvv-0.9: widening floating-point reduction

qemu-riscv

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC v2 63/76] target/riscv: rvv-0.9: widening floating-point reduction

From:	frank . chang
Subject:	[RFC v2 63/76] target/riscv: rvv-0.9: widening floating-point reduction instructions
Date:	Wed, 22 Jul 2020 17:16:26 +0800

From: Frank Chang <frank.chang@sifive.com>

Separate the implementation of vfwredsum.vs and vfwredosum.vs.

Introduce propagate NaN feature for vfwredsum.vs as implementations are
permitted to canonicalize the NaN and, if the NaN is signaling, set the
invalid exception flag.

Signed-off-by: Frank Chang <frank.chang@sifive.com>
---
 target/riscv/helper.h                   |  2 +
 target/riscv/insn32.decode              |  3 +-
 target/riscv/insn_trans/trans_rvv.inc.c |  3 +-
 target/riscv/vector_helper.c            | 71 +++++++++++++++++++++++--
 4 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 6957a98237..cfe9baa253 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1078,6 +1078,8 @@ DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, 
env, i32)
 
 DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwredosum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vfwredosum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
 
 DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32)
 DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 0fe46c10c2..e32946b1f5 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -580,7 +580,8 @@ vfredosum_vs    000011 . ..... ..... 001 ..... 1010111 @r_vm
 vfredmin_vs     000101 . ..... ..... 001 ..... 1010111 @r_vm
 vfredmax_vs     000111 . ..... ..... 001 ..... 1010111 @r_vm
 # Vector widening ordered and unordered float reduction sum
-vfwredsum_vs    1100-1 . ..... ..... 001 ..... 1010111 @r_vm
+vfwredsum_vs    110001 . ..... ..... 001 ..... 1010111 @r_vm
+vfwredosum_vs   110011 . ..... ..... 001 ..... 1010111 @r_vm
 vmand_mm        011001 - ..... ..... 010 ..... 1010111 @r
 vmnand_mm       011101 - ..... ..... 010 ..... 1010111 @r
 vmandnot_mm     011000 - ..... ..... 010 ..... 1010111 @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c 
b/target/riscv/insn_trans/trans_rvv.inc.c
index 37eee6cf97..10d8b8b00d 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -3016,7 +3016,8 @@ GEN_OPFVV_TRANS(vfredmax_vs, reduction_check)
 GEN_OPFVV_TRANS(vfredmin_vs, reduction_check)
 
 /* Vector Widening Floating-Point Reduction Instructions */
-GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check)
+GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_widen_check)
+GEN_OPFVV_WIDEN_TRANS(vfwredosum_vs, reduction_widen_check)
 
 /*
  *** Vector Mask Operations
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 8465aec94e..2b2b1f521f 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4805,6 +4805,51 @@ GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 
false,
               float64_minnum_noprop, clearq)
 
 /* Vector Widening Floating-Point Reduction Instructions */
+/* Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
+void HELPER(vfwredosum_vs_h)(void *vd, void *v0, void *vs1,
+                             void *vs2, CPURISCVState *env, uint32_t desc)
+{
+    uint32_t vm = vext_vm(desc);
+    uint32_t vta = vext_vta(desc);
+    uint32_t vl = env->vl;
+    uint32_t i;
+    uint32_t tot = env_archcpu(env)->cfg.vlen >> 3;
+    uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
+
+    for (i = 0; i < vl; i++) {
+        uint16_t s2 = *((uint16_t *)vs2 + H2(i));
+        if (!vm && !vext_elem_mask(v0, i)) {
+            continue;
+        }
+        s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
+                         &env->fp_status);
+    }
+    *((uint32_t *)vd + H4(0)) = s1;
+    clearl(vd, vta, 1, sizeof(uint32_t), tot);
+}
+
+void HELPER(vfwredosum_vs_w)(void *vd, void *v0, void *vs1,
+                             void *vs2, CPURISCVState *env, uint32_t desc)
+{
+    uint32_t vm = vext_vm(desc);
+    uint32_t vta = vext_vta(desc);
+    uint32_t vl = env->vl;
+    uint32_t i;
+    uint32_t tot = env_archcpu(env)->cfg.vlen >> 3;
+    uint64_t s1 =  *((uint64_t *)vs1);
+
+    for (i = 0; i < vl; i++) {
+        uint32_t s2 = *((uint32_t *)vs2 + H4(i));
+        if (!vm && !vext_elem_mask(v0, i)) {
+            continue;
+        }
+        s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
+                         &env->fp_status);
+    }
+    *((uint64_t *)vd) = s1;
+    clearq(vd, vta, 1, sizeof(uint64_t), tot);
+}
+
 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
                             void *vs2, CPURISCVState *env, uint32_t desc)
@@ -4813,18 +4858,27 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void 
*vs1,
     uint32_t vta = vext_vta(desc);
     uint32_t vl = env->vl;
     uint32_t i;
-    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
+    uint32_t tot = env_archcpu(env)->cfg.vlen >> 3;
     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
+    bool active = false;
 
     for (i = 0; i < vl; i++) {
         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
         if (!vm && !vext_elem_mask(v0, i)) {
             continue;
         }
+        active = true;
         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
                          &env->fp_status);
     }
-    *((uint32_t *)vd + H4(0)) = s1;
+
+    if (vl > 0) {
+        if (!active) {
+            *((uint32_t *)vd + H4(0)) = propagate_nan(s1, 32, &env->fp_status);
+        } else {
+            *((uint32_t *)vd + H4(0)) = s1;
+        }
+    }
     clearl(vd, vta, 1, sizeof(uint32_t), tot);
 }
 
@@ -4835,18 +4889,27 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void 
*vs1,
     uint32_t vta = vext_vta(desc);
     uint32_t vl = env->vl;
     uint32_t i;
-    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
+    uint32_t tot = env_archcpu(env)->cfg.vlen >> 3;
     uint64_t s1 =  *((uint64_t *)vs1);
+    bool active = false;                                                   \
 
     for (i = 0; i < vl; i++) {
         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
         if (!vm && !vext_elem_mask(v0, i)) {
             continue;
         }
+        active = true;
         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
                          &env->fp_status);
     }
-    *((uint64_t *)vd) = s1;
+
+    if (vl > 0) {
+        if (!active) {
+            *((uint64_t *)vd) = propagate_nan(s1, 64, &env->fp_status);
+        } else {
+            *((uint64_t *)vd) = s1;
+        }
+    }
     clearq(vd, vta, 1, sizeof(uint64_t), tot);
 }
 
-- 
2.17.1

[Prev in Thread]

Current Thread

[Next in Thread]

[RFC v2 58/76] target/riscv: rvv-0.9: slide instructions, (continued)
- [RFC v2 58/76] target/riscv: rvv-0.9: slide instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 58/76] target/riscv: rvv-0.9: slide instructions, Richard Henderson, 2020/07/31
- [RFC v2 59/76] target/riscv: rvv-0.9: floating-point slide instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 59/76] target/riscv: rvv-0.9: floating-point slide instructions, Richard Henderson, 2020/07/31
- [RFC v2 60/76] target/riscv: rvv-0.9: narrowing fixed-point clip instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 60/76] target/riscv: rvv-0.9: narrowing fixed-point clip instructions, Richard Henderson, 2020/07/31
- [RFC v2 61/76] target/riscv: rvv-0.9: floating-point/integer type-convert instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 61/76] target/riscv: rvv-0.9: floating-point/integer type-convert instructions, Richard Henderson, 2020/07/31
- [RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction, frank . chang, 2020/07/22
  - Re: [RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction, Richard Henderson, 2020/07/31
- [RFC v2 63/76] target/riscv: rvv-0.9: widening floating-point reduction instructions, frank . chang <=
- [RFC v2 64/76] target/riscv: rvv-0.9: single-width scaling shift instructions, frank . chang, 2020/07/22
  - Re: [RFC v2 64/76] target/riscv: rvv-0.9: single-width scaling shift instructions, Richard Henderson, 2020/07/31
- [RFC v2 65/76] target/riscv: rvv-0.9: remove widening saturating scaled multiply-add, frank . chang, 2020/07/22
  - Re: [RFC v2 65/76] target/riscv: rvv-0.9: remove widening saturating scaled multiply-add, Richard Henderson, 2020/07/31
- [RFC v2 66/76] target/riscv: rvv-0.9: remove vmford.vv and vmford.vf, frank . chang, 2020/07/22
  - Re: [RFC v2 66/76] target/riscv: rvv-0.9: remove vmford.vv and vmford.vf, Richard Henderson, 2020/07/31
- [RFC v2 67/76] target/riscv: rvv-0.9: remove integer extract instruction, frank . chang, 2020/07/22
  - Re: [RFC v2 67/76] target/riscv: rvv-0.9: remove integer extract instruction, Richard Henderson, 2020/07/31
- [RFC v2 68/76] fpu: add api to handle alternative sNaN propagation, frank . chang, 2020/07/22
- [RFC v2 69/76] target/riscv: rvv-0.9: floating-point min/max instructions, frank . chang, 2020/07/22

Prev by Date: [RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction
Next by Date: [RFC v2 64/76] target/riscv: rvv-0.9: single-width scaling shift instructions
Previous by thread: Re: [RFC v2 62/76] target/riscv: rvv-0.9: single-width floating-point reduction
Next by thread: [RFC v2 64/76] target/riscv: rvv-0.9: single-width scaling shift instructions
Index(es):
- Date
- Thread