[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 13/21] target-mips: add Compact Branches
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH 13/21] target-mips: add Compact Branches |
Date: |
Mon, 2 Jun 2014 21:16:12 +0200 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Fri, May 30, 2014 at 03:47:51PM +0100, Leon Alrae wrote:
> From: Yongbok Kim <address@hidden>
>
> Introduce MIPS32R6 Compact Branch instructions which do not have delay slot -
> they have forbidden slot instead. However, current implementation does not
> support forbidden slot yet.
>
> Signed-off-by: Yongbok Kim <address@hidden>
> Signed-off-by: Leon Alrae <address@hidden>
> ---
> disas/mips.c | 67 ++++++-
> target-mips/cpu.h | 5 +-
> target-mips/translate.c | 510
> +++++++++++++++++++++++++++++++++++++++++++++--
> 3 files changed, 565 insertions(+), 17 deletions(-)
>
> diff --git a/disas/mips.c b/disas/mips.c
> index 127a7d5..bee39d8 100644
> --- a/disas/mips.c
> +++ b/disas/mips.c
> @@ -1250,6 +1250,34 @@ const struct mips_opcode mips_builtin_opcodes[] =
> {"dalign", "d,v,t", 0x7c000224, 0xfc00063f, WR_d|RD_s|RD_t, 0,
> I64R6},
> {"bitswap", "d,w", 0x7c000020, 0xffe007ff, WR_d|RD_t, 0,
> I32R6},
> {"dbitswap","d,w", 0x7c000024, 0xffe007ff, WR_d|RD_t, 0,
> I64R6},
> +{"balc", "+p", 0xe8000000, 0xfc000000, UBD|WR_31, 0,
> I32R6},
> +{"bc", "+p", 0xc8000000, 0xfc000000, UBD|WR_31, 0,
> I32R6},
> +{"jic", "t,o", 0xd8000000, 0xffe00000, UBD|RD_t, 0,
> I32R6},
> +{"beqzc", "s,+p", 0xd8000000, 0xfc000000, CBD|RD_s, 0,
> I32R6},
> +{"jialc", "t,o", 0xf8000000, 0xffe00000, UBD|RD_t, 0,
> I32R6},
> +{"bnezc", "s,+p", 0xf8000000, 0xfc000000, CBD|RD_s, 0,
> I32R6},
> +{"beqzalc", "s,t,p", 0x20000000, 0xffe00000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bovc", "s,t,p", 0x20000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"beqc", "s,t,p", 0x20000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bnezalc", "s,t,p", 0x60000000, 0xffe00000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bnvc", "s,t,p", 0x60000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bnec", "s,t,p", 0x60000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"blezc", "s,t,p", 0x58000000, 0xffe00000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bgezc", "s,t,p", 0x58000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bgec", "s,t,p", 0x58000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bgtzc", "s,t,p", 0x5c000000, 0xffe00000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bltzc", "s,t,p", 0x5c000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bltc", "s,t,p", 0x5c000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"blezalc", "s,t,p", 0x18000000, 0xffe00000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bgezalc", "s,t,p", 0x18000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bgeuc", "s,t,p", 0x18000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bgtzalc", "s,t,p", 0x1c000000, 0xffe00000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bltzalc", "s,t,p", 0x1c000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bltuc", "s,t,p", 0x1c000000, 0xfc000000, CBD|RD_s|RD_t, 0,
> I32R6},
> +{"bc1eqz", "T,p", 0x45200000, 0xffe00000, CBD|RD_T|FP_S|FP_D, 0,
> I32R6},
> +{"bc1nez", "T,p", 0x45a00000, 0xffe00000, CBD|RD_T|FP_S|FP_D, 0,
> I32R6},
> +{"bc2eqz", "E,p", 0x49200000, 0xffe00000, CBD|RD_C2, 0,
> I32R6},
> +{"bc2nez", "E,p", 0x49a00000, 0xffe00000, CBD|RD_C2, 0,
> I32R6},
> {"pref", "k,o(b)", 0xcc000000, 0xfc000000, RD_b, 0,
> I4|I32|G3 },
> {"prefx", "h,t(b)", 0x4c00000f, 0xfc0007ff, RD_b|RD_t,
> 0, I4|I33 },
> {"nop", "", 0x00000000, 0xffffffff, 0,
> INSN2_ALIAS, I1 }, /* sll */
> @@ -3616,6 +3644,24 @@ print_insn_args (const char *d,
> (*info->fprintf_func) (info->stream, "0x%x", msbd + 1);
> break;
>
> + case 'o':
> + delta = (l >> OP_SH_DELTA_R6) & OP_MASK_DELTA_R6;
> + if (delta & 0x8000) {
> + delta |= ~0xffff;
> + }
> + (*info->fprintf_func) (info->stream, "%d", delta);
> + break;
> +
> + case 'p':
> + /* Sign extend the displacement with 26 bits. */
> + delta = (l >> OP_SH_DELTA) & OP_MASK_TARGET;
> + if (delta & 0x2000000) {
> + delta |= ~0x3FFFFFF;
> + }
> + info->target = (delta << 2) + pc + INSNLEN;
> + (*info->print_address_func) (info->target, info);
> + break;
> +
> case 't': /* Coprocessor 0 reg name */
> (*info->fprintf_func) (info->stream, "%s",
> mips_cp0_names[(l >> OP_SH_RT) &
> @@ -3767,9 +3813,7 @@ print_insn_args (const char *d,
>
> case 'j': /* Same as i, but sign-extended. */
> case 'o':
> - delta = (opp->membership == I32R6) ?
> - (l >> OP_SH_DELTA_R6) & OP_MASK_DELTA_R6 :
> - (l >> OP_SH_DELTA) & OP_MASK_DELTA;
> + delta = (l >> OP_SH_DELTA) & OP_MASK_DELTA;
>
> if (delta & 0x8000)
> delta |= ~0xffff;
> @@ -4096,6 +4140,23 @@ print_insn_mips (bfd_vma memaddr,
> && strcmp (op->name, "jalx"))
> continue;
>
> + if (strcmp(op->name, "bovc") == 0
> + || strcmp(op->name, "bnvc") == 0) {
> + if (((word >> OP_SH_RS) & OP_MASK_RS) <
> + ((word >> OP_SH_RT) & OP_MASK_RT)) {
> + continue;
> + }
> + }
> + if (strcmp(op->name, "bgezc") == 0
> + || strcmp(op->name, "bltzc") == 0
> + || strcmp(op->name, "bgezalc") == 0
> + || strcmp(op->name, "bltzalc") == 0) {
> + if (((word >> OP_SH_RS) & OP_MASK_RS) !=
> + ((word >> OP_SH_RT) & OP_MASK_RT)) {
> + continue;
> + }
> + }
> +
> /* Figure out instruction type and branch delay information. */
> if ((op->pinfo & INSN_UNCOND_BRANCH_DELAY) != 0)
> {
> diff --git a/target-mips/cpu.h b/target-mips/cpu.h
> index 3dbc219..ccefa62 100644
> --- a/target-mips/cpu.h
> +++ b/target-mips/cpu.h
> @@ -465,10 +465,13 @@ struct CPUMIPSState {
> #define MIPS_HFLAG_BDS16 0x08000 /* branch requires 16-bit delay slot */
> #define MIPS_HFLAG_BDS32 0x10000 /* branch requires 32-bit delay slot */
> #define MIPS_HFLAG_BX 0x20000 /* branch exchanges execution mode */
> -#define MIPS_HFLAG_BMASK (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT)
> +#define MIPS_HFLAG_BMASK (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT | \
> + MIPS_HFLAG_CB)
> /* MIPS DSP resources access. */
> #define MIPS_HFLAG_DSP 0x40000 /* Enable access to MIPS DSP resources. */
> #define MIPS_HFLAG_DSPR2 0x80000 /* Enable access to MIPS DSPR2 resources.
> */
> +
> +#define MIPS_HFLAG_CB 0x100000 /* Compact branch */
> target_ulong btarget; /* Jump / branch target */
> target_ulong bcond; /* Branch condition (if needed) */
>
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index cce17cd..1820ebb 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -107,6 +107,31 @@ enum {
> OPC_SWC2 = (0x3A << 26),
> OPC_SDC1 = (0x3D << 26),
> OPC_SDC2 = (0x3E << 26),
> + /* Compact Branches */
> + OPC_BLEZALC = (0x06 << 26),
> + OPC_BGEZALC = (0x06 << 26),
> + OPC_BGEUC = (0x06 << 26),
> + OPC_BGTZALC = (0x07 << 26),
> + OPC_BLTZALC = (0x07 << 26),
> + OPC_BLTUC = (0x07 << 26),
> + OPC_BOVC = (0x08 << 26),
> + OPC_BEQZALC = (0x08 << 26),
> + OPC_BEQC = (0x08 << 26),
> + OPC_BLEZC = (0x16 << 26),
> + OPC_BGEZC = (0x16 << 26),
> + OPC_BGEC = (0x16 << 26),
> + OPC_BGTZC = (0x17 << 26),
> + OPC_BLTZC = (0x17 << 26),
> + OPC_BLTC = (0x17 << 26),
> + OPC_BNVC = (0x18 << 26),
> + OPC_BNEZALC = (0x18 << 26),
> + OPC_BNEC = (0x18 << 26),
> + OPC_BC = (0x32 << 26),
> + OPC_BEQZC = (0x36 << 26),
> + OPC_JIC = (0x36 << 26),
> + OPC_BALC = (0x3A << 26),
> + OPC_BNEZC = (0x3E << 26),
> + OPC_JIALC = (0x3E << 26),
> /* MDMX ASE specific */
> OPC_MDMX = (0x1E << 26),
> /* Cache and prefetch */
> @@ -895,6 +920,8 @@ enum {
> OPC_W_FMT = (FMT_W << 21) | OPC_CP1,
> OPC_L_FMT = (FMT_L << 21) | OPC_CP1,
> OPC_PS_FMT = (FMT_PS << 21) | OPC_CP1,
> + OPC_BC1EQZ = (0x09 << 21) | OPC_CP1,
> + OPC_BC1NEZ = (0x0D << 21) | OPC_CP1,
> };
>
> #define MASK_CP1_FUNC(op) MASK_CP1(op) | (op & 0x3F)
> @@ -929,6 +956,8 @@ enum {
> OPC_CTC2 = (0x06 << 21) | OPC_CP2,
> OPC_MTHC2 = (0x07 << 21) | OPC_CP2,
> OPC_BC2 = (0x08 << 21) | OPC_CP2,
> + OPC_BC2EQZ = (0x09 << 21) | OPC_CP2,
> + OPC_BC2NEZ = (0x0D << 21) | OPC_CP2,
> };
>
> #define MASK_LMI(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F))
> @@ -1405,6 +1434,20 @@ static inline void gen_op_addr_add (DisasContext *ctx,
> TCGv ret, TCGv arg0, TCGv
> #endif
> }
>
> +/* Addresses computation (translation time) */
> +static target_long addr_add(DisasContext *ctx, target_long base,
> + target_long offset)
> +{
> + target_long sum = base + offset;
> +
> +#if defined(TARGET_MIPS64)
> + if (is_wrapping_needed(ctx)) {
> + sum = (int32_t)sum;
> + }
Same comment as in the previous patch applies.
> +#endif
> + return sum;
> +}
> +
> static inline void check_cp0_enabled(DisasContext *ctx)
> {
> if (unlikely(!(ctx->hflags & MIPS_HFLAG_CP0)))
> @@ -3929,7 +3972,13 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
>
> if (ctx->hflags & MIPS_HFLAG_BMASK) {
> #ifdef MIPS_DEBUG_DISAS
> - LOG_DISAS("Branch in delay slot at PC 0x" TARGET_FMT_lx "\n",
> ctx->pc);
> + if (ctx->hflags & MIPS_HFLAG_CB) {
> + LOG_DISAS("Branch in forbidden slot at PC 0x" TARGET_FMT_lx "\n",
> + ctx->pc);
> + } else {
> + LOG_DISAS("Branch in delay slot at PC 0x" TARGET_FMT_lx "\n",
> + ctx->pc);
> + }
> #endif
> generate_exception(ctx, EXCP_RI);
> goto out;
> @@ -4223,6 +4272,277 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
> tcg_temp_free(t1);
> }
>
> +#ifdef TARGET_MIPS64
> +static void gen_detect_input_overflow(TCGv_i64 ret, TCGv_i64 input1,
> + TCGv_i64 input2)
> +{
> + TCGv_i64 input1_overflow = tcg_temp_local_new_i64();
> + TCGv_i64 input2_overflow = tcg_temp_local_new_i64();
> + TCGv_i64 above_max = tcg_temp_local_new_i64();
> + TCGv_i64 below_min = tcg_temp_local_new_i64();
You don't need local temps here, but just normal temp, as it doesn't
cross branches.
> +
> + tcg_gen_setcondi_i64(TCG_COND_LT, below_min, input1, (int32_t)(1U <<
> 31));
> + tcg_gen_setcondi_i64(TCG_COND_GT, above_max, input1, (1U << 31) - 1);
> + tcg_gen_or_i64(input1_overflow, above_max, below_min);
> +
> + tcg_gen_setcondi_i64(TCG_COND_LT, below_min, input2, (int32_t)(1U <<
> 31));
> + tcg_gen_setcondi_i64(TCG_COND_GT, above_max, input2, (1U << 31) - 1);
> + tcg_gen_or_i64(input2_overflow, above_max, below_min);
> +
> + tcg_gen_or_i64(ret, input1_overflow, input2_overflow);
> + tcg_temp_free_i64(below_min);
> + tcg_temp_free_i64(above_max);
> + tcg_temp_free_i64(input2_overflow);
> + tcg_temp_free_i64(input1_overflow);
This looks quite complicated, and setcond with such kind of constants is
going to generate quite a lot of instructions while you just want to
check that ext32s(value) == value.
You can therefore use something like that:
t0 = tcg_temp_new();
tcg_gen_ext32s_tl(ret, input1);
tcg_gen_setcond(TCG_COND_NE, ret, ret, input);
tcg_gen_ext32s_tl(t0, input2);
tcg_gen_setcond(TCGCOND_NE, t0, t0, input);
tcg_gen_or_tl(ret, ret, t0);
tcg_temp_free(t0);
I haven't tried, but on MIPS32 this should be changed by the optimizer
into tcg_gen_movi_tl(ret, 0);
> +}
> +#else
> +static void gen_detect_input_overflow(TCGv_i32 ret, TCGv_i32 input1,
> + TCGv_i32 input2)
> +{
> + tcg_gen_movi_i32(ret, 0);
> +}
> +#endif
> +
> +/* Compact Branches */
> +static void gen_compute_compact_branch(DisasContext *ctx, uint32_t opc,
> + int rs, int rt, int32_t offset)
> +{
> + target_ulong btgt = -1;
> + int blink = 0;
> + int bcond_compute = 0;
> + TCGv t0 = tcg_temp_new();
> + TCGv t1 = tcg_temp_new();
> +
> + if (ctx->hflags & MIPS_HFLAG_BMASK) {
> +#ifdef MIPS_DEBUG_DISAS
> + if (ctx->hflags & MIPS_HFLAG_CB) {
> + LOG_DISAS("Branch in forbidden slot at PC 0x" TARGET_FMT_lx "\n",
> + ctx->pc);
> + } else {
> + LOG_DISAS("Branch in delay slot at PC 0x" TARGET_FMT_lx "\n",
> + ctx->pc);
> + }
> +#endif
> + generate_exception(ctx, EXCP_RI);
> + goto out;
> + }
> +
> + /* Load needed operands */
> + switch (opc) {
> + /* compact branch */
> + case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
> + case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, rt);
> + bcond_compute = 1;
> + btgt = addr_add(ctx, ctx->pc + 4, offset);
> + if (rs <= rt && rs == 0) {
> + /* OPC_BEQZALC, OPC_BNEZALC */
> + blink = 31;
> + }
> + break;
> + case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
> + case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, rt);
> + bcond_compute = 1;
> + btgt = addr_add(ctx, ctx->pc + 4, offset);
> + break;
> + case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
> + case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
> + if (rs == 0 || rs == rt) {
> + /* OPC_BLEZALC, OPC_BGEZALC */
> + /* OPC_BGTZALC, OPC_BLTZALC */
> + blink = 31;
> + }
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, rt);
> + bcond_compute = 1;
> + btgt = addr_add(ctx, ctx->pc + 4, offset);
> + break;
> + case OPC_BC:
> + case OPC_BALC:
> + btgt = addr_add(ctx, ctx->pc + 4, offset);
> + break;
> + case OPC_BEQZC:
> + case OPC_BNEZC:
> + if (rs != 0) {
> + /* OPC_BEQZC, OPC_BNEZC */
> + gen_load_gpr(t0, rs);
> + bcond_compute = 1;
> + btgt = addr_add(ctx, ctx->pc + 4, offset);
> + } else {
> + /* OPC_JIC, OPC_JIALC */
> + TCGv tbase = tcg_temp_local_new();
tcg_temp_new()
> + TCGv toffset = tcg_temp_local_new();
tcg_temp_new()
> +
> + gen_load_gpr(tbase, rt);
> + tcg_gen_movi_tl(toffset, offset);
> + gen_op_addr_add(ctx, btarget, tbase, toffset);
> + tcg_temp_free(tbase);
> + tcg_temp_free(toffset);
> + }
> + break;
> + default:
> + MIPS_INVAL("branch/jump");
> + generate_exception(ctx, EXCP_RI);
> + goto out;
> + }
> +
> + ctx->hflags |= MIPS_HFLAG_CB;
> +
> + if (bcond_compute == 0) {
> + /* No condition to be computed */
> + switch (opc) {
> + /* compact branches */
> + case OPC_JIALC:
> + blink = 31;
> + /* Fallthrough */
> + case OPC_JIC:
> + ctx->hflags |= MIPS_HFLAG_BR;
> + break;
> + case OPC_BALC:
> + blink = 31;
> + /* Fallthrough */
> + case OPC_BC:
> + ctx->hflags |= MIPS_HFLAG_B;
> + /* Always take and link */
> + break;
> + default:
> + MIPS_INVAL("branch/jump");
> + generate_exception(ctx, EXCP_RI);
> + goto out;
> + }
> + } else {
> + ctx->hflags |= MIPS_HFLAG_BC;
> +
> + switch (opc) {
> + case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
> + if (rs == 0 && rt != 0) {
> + /* OPC_BLEZALC */
> + tcg_gen_setcondi_tl(TCG_COND_LE, bcond, t1, 0);
> + } else if (rs != 0 && rt != 0 && rs == rt) {
> + /* OPC_BGEZALC */
> + tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t1, 0);
> + } else {
> + /* OPC_BGEUC */
> + tcg_gen_setcond_tl(TCG_COND_GEU, bcond, t0, t1);
> + }
> + break;
> + case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
> + if (rs == 0 && rt != 0) {
> + /* OPC_BGTZALC */
> + tcg_gen_setcondi_tl(TCG_COND_GT, bcond, t1, 0);
> + } else if (rs != 0 && rt != 0 && rs == rt) {
> + /* OPC_BLTZALC */
> + tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t1, 0);
> + } else {
> + /* OPC_BLTUC */
> + tcg_gen_setcond_tl(TCG_COND_LTU, bcond, t0, t1);
> + }
> + break;
> + case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
> + if (rs == 0 && rt != 0) {
> + /* OPC_BLEZC */
> + tcg_gen_setcondi_tl(TCG_COND_LE, bcond, t1, 0);
> + } else if (rs != 0 && rt != 0 && rs == rt) {
> + /* OPC_BGEZC */
> + tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t1, 0);
> + } else {
> + /* OPC_BGEC */
> + tcg_gen_setcond_tl(TCG_COND_GE, bcond, t0, t1);
> + }
> + break;
> + case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
> + if (rs == 0 && rt != 0) {
> + /* OPC_BGTZC */
> + tcg_gen_setcondi_tl(TCG_COND_GT, bcond, t1, 0);
> + } else if (rs != 0 && rt != 0 && rs == rt) {
> + /* OPC_BLTZC */
> + tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t1, 0);
> + } else {
> + /* OPC_BLTC */
> + tcg_gen_setcond_tl(TCG_COND_LT, bcond, t0, t1);
> + }
> + break;
> + case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
> + case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
> + if (rs >= rt) {
> + /* OPC_BOVC, OPC_BNVC */
> + TCGv t0 = tcg_temp_local_new();
> + TCGv t1 = tcg_temp_local_new();
> + TCGv tadd = tcg_temp_local_new();
> + TCGv input_overflow = tcg_temp_local_new();
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, rt);
> + gen_detect_input_overflow(input_overflow, t0, t1);
> + tcg_gen_ext32s_tl(t0, t0);
> + tcg_gen_ext32s_tl(t1, t1);
Given gen_detect_input_overflow is used exactly once, you can probably
move it here. And reuse the result of tcg_gen_ext32s_tl. That said at
this point either both values are already sign extended or the overflow
has already been detected, so they can even be ignored.
> + tcg_gen_add_tl(tadd, t0, t1);
> + tcg_gen_ext32s_tl(tadd, tadd);
> + tcg_gen_xor_tl(t0, t0, t1);
> + tcg_gen_xor_tl(t1, tadd, t1);
> + tcg_gen_andc_tl(t0, t1, t0);
> + tcg_gen_setcondi_tl(TCG_COND_LT, tadd, t0, 0);
> + if (opc == OPC_BOVC) {
> + /* OPC_BOVC */
> + tcg_gen_or_tl(bcond, tadd, input_overflow);
> + } else {
> + /* OPC_BNVC */
> + tcg_gen_nor_tl(tadd, tadd, input_overflow);
> + tcg_gen_andi_tl(bcond, tadd, 1);
nor is often implemented as or + not, so using or + xori with 1 looks a
better alternative.
> + }
> + /* operands of same sign, result different sign */
> + tcg_temp_free(input_overflow);
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> + tcg_temp_free(tadd);
> + } else if (rs < rt && rs == 0) {
> + /* OPC_BEQZALC, OPC_BNEZALC */
> + if (opc == OPC_BEQZALC) {
> + /* OPC_BEQZALC */
> + tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, t1, 0);
> + } else {
> + /* OPC_BNEZALC */
> + tcg_gen_setcondi_tl(TCG_COND_NE, bcond, t1, 0);
> + }
> + } else {
> + /* OPC_BEQC, OPC_BNEC */
> + if (opc == OPC_BEQC) {
> + /* OPC_BEQC */
> + tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
> + } else {
> + /* OPC_BNEC */
> + tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
> + }
> + }
> + break;
> + case OPC_BEQZC:
> + tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, t0, 0);
> + break;
> + case OPC_BNEZC:
> + tcg_gen_setcondi_tl(TCG_COND_NE, bcond, t0, 0);
> + break;
> + default:
> + MIPS_INVAL("conditional branch/jump");
> + generate_exception(ctx, EXCP_RI);
> + goto out;
> + }
> + }
> + MIPS_DEBUG("enter ds: link %d cond %02x target " TARGET_FMT_lx,
> + blink, ctx->hflags, btgt);
> +
> + ctx->btarget = btgt;
> + if (blink > 0) {
> + tcg_gen_movi_tl(cpu_gpr[blink], ctx->pc + 4);
> + }
> +
> +out:
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> +}
> +
> /* special3 bitfield operations */
> static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt,
> int rs, int lsb, int msb)
> @@ -7412,6 +7732,49 @@ static void gen_compute_branch1(DisasContext *ctx,
> uint32_t op,
> tcg_temp_free_i32(t0);
> }
>
> +/* R6 CP1 Branches (before delay slot) */
> +static void gen_compute_branch1_r6(DisasContext *ctx, uint32_t op,
> + int32_t ft, int32_t offset)
> +{
> + target_ulong btarget;
> + const char *opn = "cp1 cond branch";
> +
> + TCGv_i64 t0 = tcg_temp_new_i64();
> + gen_load_fpr64(ctx, t0, ft);
> + tcg_gen_andi_i64(t0, t0, 1);
> +
> + btarget = addr_add(ctx, ctx->pc + 4, offset);
> +
> + switch (op) {
> + case OPC_BC1EQZ:
> + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
> + opn = "bc1eqz";
> + ctx->hflags |= MIPS_HFLAG_BC;
> + break;
This is just tcg_gen_xori_tl(t0, t0, 1);
> + case OPC_BC1NEZ:
> + tcg_gen_setcondi_i64(TCG_COND_NE, t0, t0, 0);
> + opn = "bc1nez";
> + ctx->hflags |= MIPS_HFLAG_BC;
> + break;
This is just tcg_gen_mov_tl(t0, t0);
> + default:
> + MIPS_INVAL(opn);
> + generate_exception(ctx, EXCP_RI);
> + return;
> + }
> +
> +#ifdef TARGET_MIPS64
> + tcg_gen_mov_tl(bcond, t0);
> +#else
> + tcg_gen_trunc_i64_i32(bcond, t0);
> +#endif
You should use tcg_gen_trunc_i64_tl instead of the #ifdef
> + tcg_temp_free_i64(t0);
> +
> + (void)opn; /* avoid a compiler warning */
> + MIPS_DEBUG("%s: cond %02x target " TARGET_FMT_lx, opn,
> + ctx->hflags, btarget);
> + ctx->btarget = btarget;
> +}
> +
> /* Coprocessor 1 (FPU) */
>
> #define FOP(func, fmt) (((fmt) << 21) | (func))
> @@ -16013,7 +16376,16 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx)
> break;
> }
> break;
> - case OPC_ADDI: /* Arithmetic with immediate opcode */
> + case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC, OPC_ADDI */
> + if (ctx->insn_flags & ISA_MIPS32R6) {
> + /* OPC_BOVC, OPC_BEQZALC, OPC_BEQC */
> + gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
> + } else {
> + /* OPC_ADDI */
> + /* Arithmetic with immediate opcode */
> + gen_arith_imm(ctx, op, rt, rs, imm);
> + }
> + break;
> case OPC_ADDIU:
> gen_arith_imm(ctx, op, rt, rs, imm);
> break;
> @@ -16031,9 +16403,58 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx)
> offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
> gen_compute_branch(ctx, op, 4, rs, rt, offset);
> break;
> - case OPC_BEQL ... OPC_BGTZL: /* Branch */
> + /* Branch */
> + case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC, OPC_BLEZL */
> + if (ctx->insn_flags & ISA_MIPS32R6) {
> + if (rt == 0) {
> + generate_exception(ctx, EXCP_RI);
> + break;
> + }
> + /* OPC_BLEZC, OPC_BGEZC, OPC_BGEC */
> + gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
> + } else {
> + /* OPC_BLEZL */
> + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> + }
> + break;
> + case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC, OPC_BGTZL */
> + if (ctx->insn_flags & ISA_MIPS32R6) {
> + if (rt == 0) {
> + generate_exception(ctx, EXCP_RI);
> + break;
> + }
> + /* OPC_BGTZC, OPC_BLTZC, OPC_BLTC */
> + gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
> + } else {
> + /* OPC_BGTZL */
> + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> + }
> + break;
> + case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC, OPC_BLEZ */
> + if (rt == 0) {
> + /* OPC_BLEZ */
> + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> + } else {
> + check_insn(ctx, ISA_MIPS32R6);
> + /* OPC_BLEZALC, OPC_BGEZALC, OPC_BGEUC */
> + gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
> + }
> + break;
> + case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC, OPC_BGTZ */
> + if (rt == 0) {
> + /* OPC_BGTZ */
> + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> + } else {
> + check_insn(ctx, ISA_MIPS32R6);
> + /* OPC_BGTZALC, OPC_BLTZALC, OPC_BLTUC */
> + gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
> + }
> + break;
> + case OPC_BEQL:
> + case OPC_BNEL:
> check_insn_opc_removed(ctx, ISA_MIPS32R6);
> - case OPC_BEQ ... OPC_BGTZ:
> + case OPC_BEQ:
> + case OPC_BNE:
> gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> break;
> case OPC_LWL: /* Load and stores */
> @@ -16096,7 +16517,24 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx)
> gen_cp1(ctx, op1, rt, rd);
> break;
> #endif
> - case OPC_BC1ANY2:
> + case OPC_BC1EQZ: /* OPC_BC1ANY2 */
> + if (ctx->insn_flags & ISA_MIPS32R6) {
> + /* OPC_BC1EQZ */
> + gen_compute_branch1_r6(ctx, MASK_CP1(ctx->opcode),
> + rt, imm << 2);
> + } else {
> + /* OPC_BC1ANY2 */
> + check_cop1x(ctx);
> + check_insn(ctx, ASE_MIPS3D);
> + gen_compute_branch1(ctx, MASK_BC1(ctx->opcode),
> + (rt >> 2) & 0x7, imm << 2);
> + }
> + break;
> + case OPC_BC1NEZ:
> + check_insn(ctx, ISA_MIPS32R6);
> + gen_compute_branch1_r6(ctx, MASK_CP1(ctx->opcode),
> + rt, imm << 2);
> + break;
> case OPC_BC1ANY4:
> check_insn_opc_removed(ctx, ISA_MIPS32R6);
> check_cop1x(ctx);
> @@ -16126,13 +16564,35 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx)
> }
> break;
>
> - /* COP2. */
> - case OPC_LWC2:
> - case OPC_LDC2:
> - case OPC_SWC2:
> - case OPC_SDC2:
> - /* COP2: Not implemented. */
> - generate_exception_err(ctx, EXCP_CpU, 2);
> + /* Compact branches [R6] and COP2 [non-R6] */
> + case OPC_BC: /* OPC_LWC2 */
> + case OPC_BALC: /* OPC_SWC2 */
> + if (ctx->insn_flags & ISA_MIPS32R6) {
> + /* OPC_BC, OPC_BALC */
> + gen_compute_compact_branch(ctx, op, 0, 0,
> + SIMM((ctx->opcode & 0x3FFFFFF) << 2, 0,
> 28));
> + } else {
> + /* OPC_LWC2, OPC_SWC2 */
> + /* COP2: Not implemented. */
> + generate_exception_err(ctx, EXCP_CpU, 2);
> + }
> + break;
> + case OPC_BEQZC: /* OPC_JIC, OPC_LDC2 */
> + case OPC_BNEZC: /* OPC_JIALC, OPC_SDC2 */
> + if (ctx->insn_flags & ISA_MIPS32R6) {
> + if (rs != 0) {
> + /* OPC_BEQZC, OPC_BNEZC */
> + gen_compute_compact_branch(ctx, op, rs, 0,
> + SIMM((ctx->opcode & 0x1FFFFF) << 2, 0,
> 23));
> + } else {
> + /* OPC_JIC, OPC_JIALC */
> + gen_compute_compact_branch(ctx, op, 0, rt, imm);
> + }
> + } else {
> + /* OPC_LWC2, OPC_SWC2 */
> + /* COP2: Not implemented. */
> + generate_exception_err(ctx, EXCP_CpU, 2);
> + }
> break;
> case OPC_CP2:
> check_insn(ctx, INSN_LOONGSON2F);
> @@ -16206,12 +16666,31 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx)
> check_mips_64(ctx);
> gen_st_cond(ctx, op, rt, rs, imm);
> break;
> - case OPC_DADDI:
> + case OPC_DADDI: /* OPC_BNVC, OPC_BNEZALC, OPC_BNEC */
> + if (ctx->insn_flags & ISA_MIPS32R6) {
> + /* OPC_BNVC, OPC_BNEZALC, OPC_BNEC */
> + gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
> + } else {
> + /* OPC_DADDI */
> + check_insn(ctx, ISA_MIPS3);
> + check_mips_64(ctx);
> + gen_arith_imm(ctx, op, rt, rs, imm);
> + }
> + break;
> case OPC_DADDIU:
> check_insn(ctx, ISA_MIPS3);
> check_mips_64(ctx);
> gen_arith_imm(ctx, op, rt, rs, imm);
> break;
> +#else
> + case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
> + if (ctx->insn_flags & ISA_MIPS32R6) {
> + gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
> + } else {
> + MIPS_INVAL("major opcode");
> + generate_exception(ctx, EXCP_RI);
> + }
> + break;
Why is that introduced only in the MIPS32 case, but not in the MIPS64
case.
> #endif
> case OPC_JALX:
> check_insn(ctx, ASE_MIPS16 | ASE_MICROMIPS);
> @@ -16316,6 +16795,11 @@ gen_intermediate_code_internal(MIPSCPU *cpu,
> TranslationBlock *tb,
> ctx.bstate = BS_STOP;
> break;
> }
> + if (ctx.hflags & MIPS_HFLAG_CB) {
> + /* Compact branch, execute a branch now.
> + TODO: implement forbidden slot */
> + is_delay = 1;
> + }
> if (is_delay) {
> handle_delay_slot(&ctx, insn_bytes);
> }
> --
> 1.7.5.4
>
>
--
Aurelien Jarno GPG: 4096R/1DDD8C9B
address@hidden http://www.aurel32.net
- Re: [Qemu-devel] [PATCH 13/21] target-mips: add Compact Branches,
Aurelien Jarno <=