[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 09/10] tcg/s390x: Use vector ctz for integer ctz
From: |
Richard Henderson |
Subject: |
[PATCH 09/10] tcg/s390x: Use vector ctz for integer ctz |
Date: |
Thu, 24 Feb 2022 05:43:32 -1000 |
There is no integer version of ctz, but there is a vector one.
Push the values to and fro, then fix up as required for the
semantics of the tcg operation.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.h | 4 ++--
tcg/s390x/tcg-target.c.inc | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 37 insertions(+), 2 deletions(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 53c4da7730..4aff59b7c0 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -90,7 +90,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i32 0
-#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctz_i32 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_deposit_i32 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_extract_i32 HAVE_FACILITY(GEN_INST_EXT)
@@ -127,7 +127,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM)
-#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctz_i64 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_deposit_i64 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_extract_i64 HAVE_FACILITY(GEN_INST_EXT)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index e32eddf584..9c3f8f365e 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -64,6 +64,7 @@
/* A scratch register that may be be used throughout the backend. */
#define TCG_TMP0 TCG_REG_R1
+#define TCG_TMPV TCG_REG_V31
/* A scratch register that holds a pointer to the beginning of the TB.
We don't need this when we have pc-relative loads with the general
@@ -291,6 +292,7 @@ typedef enum S390Opcode {
VRIb_VGM = 0xe746,
VRIc_VREP = 0xe74d,
+ VRRa_VCTZ = 0xe752,
VRRa_VLC = 0xe7de,
VRRa_VLP = 0xe7df,
VRRa_VLR = 0xe756,
@@ -1669,6 +1671,29 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg
a1,
tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
}
+static void tgen_ctz(TCGContext *s, TCGType type, TCGReg dest,
+ TCGReg a1, TCGArg a2, int a2const)
+{
+ MemOp vece = type == TCG_TYPE_I32 ? MO_32 : MO_64;
+ int width = 8 << vece;
+ int cc, inv_cc;
+ TCGReg src;
+
+ tcg_out_mov(s, type, TCG_TMPV, a1);
+ tcg_out_insn(s, VRRa, VCTZ, TCG_TMPV, TCG_TMPV, vece);
+
+ if (a2const && a2 == width) {
+ tcg_out_mov(s, type, dest, TCG_TMPV);
+ return;
+ }
+
+ cc = tgen_cmp2(s, type, TCG_COND_EQ, a1, 0, true, false, &inv_cc);
+
+ src = (a2const || dest != a2 ? dest : TCG_TMP0);
+ tcg_out_mov(s, type, src, TCG_TMPV);
+ tgen_movcond_int(s, type, dest, a2, a2const, src, cc, inv_cc);
+}
+
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
int ofs, int len, int z)
{
@@ -2826,6 +2851,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
tgen_clz(s, args[0], args[1], args[2], const_args[2]);
break;
+ case INDEX_op_ctz_i32:
+ tgen_ctz(s, TCG_TYPE_I32, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_ctz_i64:
+ tgen_ctz(s, TCG_TYPE_I64, args[0], args[1], args[2], const_args[2]);
+ break;
+
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
@@ -3303,6 +3335,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode
op)
return C_O1_I2(r, r, ri);
case INDEX_op_clz_i64:
+ case INDEX_op_ctz_i32:
+ case INDEX_op_ctz_i64:
return C_O1_I2(r, r, rI);
case INDEX_op_sub_i32:
@@ -3557,6 +3591,7 @@ static void tcg_target_init(TCGContext *s)
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMPV);
/* XXX many insns can't be used with R0, so we better avoid it for now */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
--
2.25.1
- [PATCH 00/10] tcg/s390x: updates for mie2 and mie3, Richard Henderson, 2022/02/24
- [PATCH 01/10] tcg/s390x: Distinguish RRF-a and RRF-c formats, Richard Henderson, 2022/02/24
- [PATCH 02/10] tcg/s390x: Distinguish RIE formats, Richard Henderson, 2022/02/24
- [PATCH 03/10] tcg/s390x: Support MIE2 multiply single instructions, Richard Henderson, 2022/02/24
- [PATCH 06/10] tcg/s390x: Create tgen_cmp2 to simplify movcond, Richard Henderson, 2022/02/24
- [PATCH 05/10] tcg/s390x: Support MIE3 logical operations, Richard Henderson, 2022/02/24
- [PATCH 04/10] tcg/s390x: Support MIE2 MGRK instruction, Richard Henderson, 2022/02/24
- [PATCH 08/10] tcg/s390x: Use tgen_movcond_int in tgen_clz, Richard Henderson, 2022/02/24
- [PATCH 07/10] tcg/s390x: Support SELGR instruction in MOVCOND, Richard Henderson, 2022/02/24
- [PATCH 09/10] tcg/s390x: Use vector ctz for integer ctz,
Richard Henderson <=
- [PATCH 10/10] tcg/s390x: Implement ctpop operation, Richard Henderson, 2022/02/24