[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 14/15] tcg-mips: Use mipsr6 instructions in bran
From: |
James Hogan |
Subject: |
Re: [Qemu-devel] [PATCH 14/15] tcg-mips: Use mipsr6 instructions in branches |
Date: |
Tue, 9 Feb 2016 16:22:34 +0000 |
User-agent: |
Mutt/1.5.23 (2014-03-12) |
Hi Richard,
On Tue, Feb 09, 2016 at 09:40:02PM +1100, Richard Henderson wrote:
> Using compact branches, when possible, avoids a delay slot nop.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> include/elf.h | 4 +
> tcg/mips/tcg-target.c | 216
> +++++++++++++++++++++++++++++++++++++++-----------
> 2 files changed, 172 insertions(+), 48 deletions(-)
>
> diff --git a/include/elf.h b/include/elf.h
> index 1098d21..6e52ba0 100644
> --- a/include/elf.h
> +++ b/include/elf.h
> @@ -352,6 +352,10 @@ typedef struct {
> #define R_MIPS_CALLHI16 30
> #define R_MIPS_CALLLO16 31
> /*
> + * Incomplete list of MIPS R6 relocation types.
> + */
> +#define R_MIPS_PC26_S2 61
> +/*
> * This range is reserved for vendor specific relocations.
> */
> #define R_MIPS_LOVENDOR 100
> diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
> index e0972ba..06e15d4 100644
> --- a/tcg/mips/tcg-target.c
> +++ b/tcg/mips/tcg-target.c
> @@ -152,6 +152,19 @@ static inline void reloc_pc16(tcg_insn_unit *pc,
> tcg_insn_unit *target)
> *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target));
> }
>
> +static inline uint32_t reloc_pc26_val(tcg_insn_unit *pc, tcg_insn_unit
> *target)
> +{
> + /* Let the compiler perform the right-shift as part of the arithmetic.
> */
> + ptrdiff_t disp = target - (pc + 1);
> + assert(disp == sextract32(disp, 0, 26));
> + return disp & 0x1ffffff;
> +}
> +
> +static inline void reloc_pc26(tcg_insn_unit *pc, tcg_insn_unit *target)
> +{
> + *pc = deposit32(*pc, 0, 26, reloc_pc16_val(pc, target));
> +}
> +
> static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target)
> {
> assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0);
> @@ -166,9 +179,17 @@ static inline void reloc_26(tcg_insn_unit *pc,
> tcg_insn_unit *target)
> static void patch_reloc(tcg_insn_unit *code_ptr, int type,
> intptr_t value, intptr_t addend)
> {
> - assert(type == R_MIPS_PC16);
> assert(addend == 0);
> - reloc_pc16(code_ptr, (tcg_insn_unit *)value);
> + switch (type) {
> + case R_MIPS_PC16:
> + reloc_pc16(code_ptr, (tcg_insn_unit *)value);
> + break;
> + case R_MIPS_PC26_S2:
> + reloc_pc26(code_ptr, (tcg_insn_unit *)value);
> + break;
> + default:
> + tcg_abort();
> + }
> }
>
> #define TCG_CT_CONST_ZERO 0x100
> @@ -309,7 +330,10 @@ typedef enum {
> OPC_BEQ = 004 << 26,
> OPC_BNE = 005 << 26,
> OPC_BLEZ = 006 << 26,
> + OPC_BGEUC = OPC_BLEZ, /* R6: rs != 0, rt != 0, rs != rt */
> OPC_BGTZ = 007 << 26,
> + OPC_BLTUC = OPC_BGTZ, /* R6: rs != 0, rt != 0, rs != rt */
> + OPC_BEQC = 010 << 26, /* R6: rs > rt */
> OPC_ADDIU = 011 << 26,
> OPC_SLTI = 012 << 26,
> OPC_SLTIU = 013 << 26,
> @@ -318,6 +342,9 @@ typedef enum {
> OPC_XORI = 016 << 26,
> OPC_LUI = 017 << 26,
> OPC_AUI = OPC_LUI,
> + OPC_BGEC = 026 << 26,
> + OPC_BLTC = 027 << 26,
> + OPC_BNEC = 030 << 26, /* R6: rs > rt */
> OPC_DADDIU = 031 << 26,
> OPC_DAUI = 035 << 26,
> OPC_LB = 040 << 26,
> @@ -329,6 +356,7 @@ typedef enum {
> OPC_SB = 050 << 26,
> OPC_SH = 051 << 26,
> OPC_SW = 053 << 26,
> + OPC_BC = 062 << 26,
> OPC_LD = 067 << 26,
> OPC_SD = 077 << 26,
>
> @@ -527,6 +555,17 @@ static inline void tcg_out_opc_br(TCGContext *s,
> MIPSInsn opc,
> tcg_out_opc_imm(s, opc, rt, rs, offset);
> }
>
> +static void tcg_out_opc_br_pc16(TCGContext *s, MIPSInsn opc,
> + TCGReg arg1, TCGReg arg2, TCGLabel *l)
> +{
> + tcg_out_opc_br(s, opc, arg1, arg2);
> + if (l->has_value) {
> + reloc_pc16(s->code_ptr - 1, l->u.value_ptr);
> + } else {
> + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0);
> + }
> +}
> +
> /*
> * Type sa
> */
> @@ -1002,59 +1041,129 @@ static void tcg_out_brcond(TCGContext *s, TCGCond
> cond, TCGReg arg1,
> [TCG_COND_GE] = OPC_BGEZ,
> };
>
> - MIPSInsn s_opc = OPC_SLTU;
> - MIPSInsn b_opc;
> - int cmp_map;
> + MIPSInsn b_opc = 0;
> + bool compact = false;
> + int cmp_map, t;
> +
> + /* We shouldn't expect to have arg1 == arg2, as the TCG optimizer
> + should have eliminated all such. However, the R6 encodings do
> + not allow this situation, so e.g. if the optimizer is disabled
> + we must fall back to normal compares. */
> + if (use_mips32r6_instructions && arg1 != arg2) {
> + switch (cond) {
> + case TCG_COND_EQ:
> + case TCG_COND_NE:
> + if (arg1 < arg2) {
> + t = arg1, arg1 = arg2, arg2 = t;
> + }
> + b_opc = cond == TCG_COND_EQ ? OPC_BEQC : OPC_BNEC;
> + compact = true;
> + break;
>
> - switch (cond) {
> - case TCG_COND_EQ:
> - b_opc = OPC_BEQ;
> - break;
> - case TCG_COND_NE:
> - b_opc = OPC_BNE;
> - break;
> + case TCG_COND_LE:
> + case TCG_COND_GT:
> + if (arg1 == TCG_REG_ZERO) {
> + break;
> + }
> + /* Swap arguments to turn LE to GE or GT to LT.
> + This also produces BLEZC/BGTZC when arg2 = 0. */
> + t = arg1, arg1 = arg2, arg2 = t;
> + b_opc = cond == TCG_COND_LE ? OPC_BGEC : OPC_BLTC;
> + compact = true;
> + break;
>
> - case TCG_COND_LT:
> - case TCG_COND_GT:
> - case TCG_COND_LE:
> - case TCG_COND_GE:
> - if (arg2 == 0) {
> - b_opc = b_zero[cond];
> - arg2 = arg1;
> - arg1 = 0;
> + case TCG_COND_GE:
> + case TCG_COND_LT:
> + if (arg1 == TCG_REG_ZERO) {
> + break;
> + }
> + /* The encoding of BGEZC/BLTZC requires rs = rt. */
> + if (arg2 == TCG_REG_ZERO) {
> + arg2 = arg1;
> + }
> + b_opc = cond == TCG_COND_GE ? OPC_BGEC : OPC_BLTC;
> + compact = true;
> break;
> - }
> - s_opc = OPC_SLT;
> - /* FALLTHRU */
>
> - case TCG_COND_LTU:
> - case TCG_COND_GTU:
> - case TCG_COND_LEU:
> - case TCG_COND_GEU:
> - cmp_map = mips_cmp_map[cond];
> - if (cmp_map & MIPS_CMP_SWAP) {
> - TCGReg t = arg1;
> - arg1 = arg2;
> - arg2 = t;
> + case TCG_COND_LEU:
> + /* Swap arguments to turn LE to GE. */
> + t = arg1, arg1 = arg2, arg2 = t;
> + /* FALLTHRU */
> + case TCG_COND_GEU:
> + b_opc = OPC_BGEUC;
> + compact = true;
> + break;
> +
> + case TCG_COND_GTU:
> + /* Swap arguments to turn GT to LT. */
> + t = arg1, arg1 = arg2, arg2 = t;
> + /* FALLTHRU */
> + case TCG_COND_LTU:
> + b_opc = OPC_BLTUC;
> + compact = true;
> + break;
> +
> + default:
> + tcg_abort();
> + break;
> }
> - tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2);
> - b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE);
> - arg1 = TCG_TMP0;
> - arg2 = TCG_REG_ZERO;
> - break;
> + }
>
> - default:
> - tcg_abort();
> - break;
> + if (b_opc == 0) {
> + MIPSInsn s_opc = OPC_SLTU;
> +
> + switch (cond) {
> + case TCG_COND_EQ:
> + b_opc = OPC_BEQ;
> + break;
> + case TCG_COND_NE:
> + b_opc = OPC_BNE;
> + break;
> +
> + case TCG_COND_LT:
> + case TCG_COND_GT:
> + case TCG_COND_LE:
> + case TCG_COND_GE:
> + if (arg2 == 0) {
> + b_opc = b_zero[cond];
> + arg2 = arg1;
> + arg1 = 0;
> + break;
> + }
> + s_opc = OPC_SLT;
> + /* FALLTHRU */
> +
> + case TCG_COND_LTU:
> + case TCG_COND_GTU:
> + case TCG_COND_LEU:
> + case TCG_COND_GEU:
> + cmp_map = mips_cmp_map[cond];
> + if (cmp_map & MIPS_CMP_SWAP) {
> + TCGReg t = arg1;
> + arg1 = arg2;
> + arg2 = t;
> + }
> + tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2);
> + if (use_mips32r6_instructions) {
> + b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQC : OPC_BNEC);
> + compact = true;
> + } else {
> + b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE);
> + }
> + arg1 = TCG_TMP0;
> + arg2 = TCG_REG_ZERO;
> + break;
> +
> + default:
> + tcg_abort();
> + break;
> + }
> }
>
> - tcg_out_opc_br(s, b_opc, arg1, arg2);
> - if (l->has_value) {
> - reloc_pc16(s->code_ptr - 1, l->u.value_ptr);
> - } else {
> - tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0);
> + tcg_out_opc_br_pc16(s, b_opc, arg1, arg2, l);
> + if (!compact) {
> + tcg_out_nop(s);
Unfortunately this isn't quite right. As far as I understand them,
conditional compact branches have a forbidden slot after them which
isn't permitted to contain a control transfer instruction (CTI).
Executing a conditional compact branch with a CTI in the forbidden slot
is required to signal a reserved instruction, but only if the branch is
not taken (giving user process a SIGILL).
E.g.
Program received signal SIGILL, Illegal instruction.
[Switching to Thread 0xfff1c32e00 (LWP 204)]
0x000000fff30b0068 in code_gen_buffer ()
(gdb) disas/r
Dump of assembler code for function code_gen_buffer:
0x000000fff30b0064 <+0>: f8 ff 11 8e lw s1,-8(s0)
=> 0x000000fff30b0068 <+4>: 08 00 11 60 bnezalc s1,0xfff30b008c
<code_gen_buffer+40>
0x000000fff30b006c <+8>: 1d c0 c2 08 j 0xfff30b0074
<code_gen_buffer+16>
0x000000fff30b0070 <+12>: 00 00 00 00 nop
(gdb) set *0x000000fff30b006c=0
(gdb) disas/r
Dump of assembler code for function code_gen_buffer:
0x000000fff30b0064 <+0>: f8 ff 11 8e lw s1,-8(s0)
=> 0x000000fff30b0068 <+4>: 08 00 11 60 bnezalc s1,0xfff30b008c
<code_gen_buffer+40>
0x000000fff30b006c <+8>: 00 00 00 00 nop
0x000000fff30b0070 <+12>: 00 00 00 00 nop
(gdb) stepi
0x000000fff30b0070 in code_gen_buffer ()
(gdb) disas/r
Dump of assembler code for function code_gen_buffer:
0x000000fff30b0064 <+0>: f8 ff 11 8e lw s1,-8(s0)
0x000000fff30b0068 <+4>: 08 00 11 60 bnezalc s1,0xfff30b008c
<code_gen_buffer+40>
0x000000fff30b006c <+8>: 00 00 00 00 nop
=> 0x000000fff30b0070 <+12>: 00 00 00 00 nop
So to be correct + efficient, it should only put the nop in if the next
generated instruction is a CTI. I imagine that would be a bit messy /
fragile, but maybe doable? I haven't looked too deeply.
Cheers
James
> }
> - tcg_out_nop(s);
> }
>
> static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1,
> @@ -1826,8 +1935,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
> opc,
> s->tb_next_offset[a0] = tcg_current_code_size(s);
> break;
> case INDEX_op_br:
> - tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO,
> - arg_label(a0));
> + {
> + TCGLabel *l = arg_label(a0);
> + if (use_mips32r6_instructions) {
> + tcg_out32(s, OPC_BC);
> + if (l->has_value) {
> + reloc_pc26(s->code_ptr - 1, l->u.value_ptr);
> + } else {
> + tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC26_S2, l, 0);
> + }
> + } else {
> + tcg_out_opc_br_pc16(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO,
> l);
> + }
> + }
> break;
>
> case INDEX_op_ld8u_i32:
> --
> 2.5.0
>
signature.asc
Description: Digital signature
- [Qemu-devel] [PATCH 07/15] tcg-mips: Adjust qemu_ld/st for mips64, (continued)
- [Qemu-devel] [PATCH 07/15] tcg-mips: Adjust qemu_ld/st for mips64, Richard Henderson, 2016/02/09
- [Qemu-devel] [PATCH 12/15] tcg-mips: Use mips64r6 instructions in tcg_out_ldst, Richard Henderson, 2016/02/09
- [Qemu-devel] [PATCH 13/15] tcg-mips: Use mips64r6 instructions in constant addition, Richard Henderson, 2016/02/09
- [Qemu-devel] [PATCH 11/15] tcg-mips: Use mips64r6 instructions in tcg_out_movi, Richard Henderson, 2016/02/09
- [Qemu-devel] [PATCH 15/15] tcg-mips: Use mipsr6 instructions in calls, Richard Henderson, 2016/02/09
- [Qemu-devel] [PATCH 14/15] tcg-mips: Use mipsr6 instructions in branches, Richard Henderson, 2016/02/09