qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 20/37] target/i386: reimplement 0x0f 0x60-0x6f, add AVX


From: Paolo Bonzini
Subject: [PATCH 20/37] target/i386: reimplement 0x0f 0x60-0x6f, add AVX
Date: Mon, 12 Sep 2022 01:04:00 +0200

These are both MMX and SSE/AVX instructions, except for vmovdqu.  In both
cases the inputs and output is in s->ptr{0,1,2}, so the only difference
between MMX, SSE, and AVX is which helper to call.

PCMPGT, MOVD and MOVQ are implemented using gvec.

The amount of macro magic for generating functions is kept to the minimum.
In particular, the gvec cases are easy enough and have no duplication within
each function, so they are spelled out one by one.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/decode-new.c.inc |  35 ++++++++
 target/i386/tcg/emit.c.inc       | 148 +++++++++++++++++++++++++++++++
 target/i386/tcg/translate.c      |   3 +-
 3 files changed, 185 insertions(+), 1 deletion(-)

diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index b31daecb90..f20587c096 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -142,6 +142,23 @@ static void decode_group17(DisasContext *s, CPUX86State 
*env, X86OpEntry *entry,
     entry->gen = group17_gen[op];
 }
 
+static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, 
uint8_t *b)
+{
+    if (s->prefix & PREFIX_REPNZ) {
+        entry->gen = NULL;
+    } else if (s->prefix & PREFIX_REPZ) {
+        /* movdqu */
+        entry->gen = gen_MOVDQ;
+        entry->vex_class = 4;
+        entry->vex_special = X86_VEX_SSEUnaligned;
+    } else {
+        /* MMX movq, movdqa */
+        entry->gen = gen_MOVDQ;
+        entry->vex_class = 1;
+        entry->special = X86_SPECIAL_MMX;
+    }
+}
+
 static const X86OpEntry opcodes_0F38_00toEF[240] = {
 };
 
@@ -227,8 +244,26 @@ static void decode_0F3A(DisasContext *s, CPUX86State *env, 
X86OpEntry *entry, ui
 }
 
 static const X86OpEntry opcodes_0F[256] = {
+    [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+
     [0x38] = X86_OP_GROUP0(0F38),
     [0x3a] = X86_OP_GROUP0(0F3A),
+
+    [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 
p_00_66),
+    [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
+    [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
+    [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66), 
 /* wrong dest Vy on SDM! */
+    [0x6f] = X86_OP_GROUP3(0F6F,       V,x, None,None, W,x, vex5 mmx 
p_00_66_f3),
 };
 
 static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, 
uint8_t *b)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 36b963a0d3..3f89d3cf50 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -212,6 +212,97 @@ static void gen_writeback(DisasContext *s, X86DecodedOp 
*op)
     }
 }
 
+static inline int sse_vec_len(DisasContext *s, X86DecodedInsn *decode)
+{
+    if (decode->e.special == X86_SPECIAL_MMX &&
+        !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
+        return 8;
+    }
+    return s->vex_l ? 32 : 16;
+}
+
+static void gen_store_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode, int src_ofs)
+{
+    MemOp ot = decode->op[0].ot;
+    int vec_len = sse_vec_len(s, decode);
+
+    if (!decode->op[0].has_ea) {
+        tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, vec_len, 
vec_len);
+        return;
+    }
+
+    switch (ot) {
+    case MO_64:
+        gen_stq_env_A0(s, src_ofs);
+        break;
+    case MO_128:
+        gen_sto_env_A0(s, src_ofs);
+        break;
+    case MO_256:
+        gen_sty_env_A0(s, src_ofs);
+        break;
+    default:
+        abort();
+    }
+}
+
+/*
+ * 00 = p*  Pq, Qq (if mmx not NULL; no VEX)
+ * 66 = vp* Vx, Hx, Wx
+ *
+ * These are really the same encoding, because 1) V is the same as P when VEX.V
+ * is not present 2) P and Q are the same as H and W apart from MM/XMM
+ */
+static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, 
X86DecodedInsn *decode,
+                                      SSEFunc_0_eppp mmx, SSEFunc_0_eppp xmm, 
SSEFunc_0_eppp ymm)
+{
+    assert (!!mmx == !!(decode->e.special == X86_SPECIAL_MMX));
+
+    if (mmx && (s->prefix & PREFIX_VEX) && !(s->prefix & PREFIX_DATA)) {
+        /* VEX encoding is not applicable to MMX instructions.  */
+        gen_illegal_opcode(s);
+        return;
+    }
+    if (!(s->prefix & PREFIX_DATA)) {
+        mmx(cpu_env, s->ptr0, s->ptr1, s->ptr2);
+    } else if (!s->vex_l) {
+        xmm(cpu_env, s->ptr0, s->ptr1, s->ptr2);
+    } else {
+        ymm(cpu_env, s->ptr0, s->ptr1, s->ptr2);
+    }
+}
+
+#define BINARY_INT_MMX(uname, lname)                                           
    \
+static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode) \
+{                                                                              
    \
+    gen_binary_int_sse(s, env, decode,                                         
    \
+                          gen_helper_##lname##_mmx,                            
    \
+                          gen_helper_##lname##_xmm,                            
    \
+                          gen_helper_##lname##_ymm);                           
    \
+}
+BINARY_INT_MMX(PUNPCKLBW,  punpcklbw)
+BINARY_INT_MMX(PUNPCKLWD,  punpcklwd)
+BINARY_INT_MMX(PUNPCKLDQ,  punpckldq)
+BINARY_INT_MMX(PACKSSWB,   packsswb)
+BINARY_INT_MMX(PACKUSWB,   packuswb)
+BINARY_INT_MMX(PUNPCKHBW,  punpckhbw)
+BINARY_INT_MMX(PUNPCKHWD,  punpckhwd)
+BINARY_INT_MMX(PUNPCKHDQ,  punpckhdq)
+BINARY_INT_MMX(PACKSSDW,   packssdw)
+
+/* Instructions with no MMX equivalent.  */
+#define BINARY_INT_SSE(uname, lname)                                           
    \
+static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode) \
+{                                                                              
    \
+    gen_binary_int_sse(s, env, decode,                                         
    \
+                          NULL,                                                
    \
+                          gen_helper_##lname##_xmm,                            
    \
+                          gen_helper_##lname##_ymm);                           
    \
+}
+
+BINARY_INT_SSE(PUNPCKLQDQ, punpcklqdq)
+BINARY_INT_SSE(PUNPCKHQDQ, punpckhqdq)
+
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
     TCGv carry_in = NULL;
@@ -382,6 +473,36 @@ static void gen_MOVBE(DisasContext *s, CPUX86State *env, 
X86DecodedInsn *decode)
     }
 }
 
+static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    MemOp ot = decode->op[2].ot;
+    int vec_len = sse_vec_len(s, decode);
+    int lo_ofs = decode->op[0].offset
+        - xmm_offset(decode->op[0].ot)
+        + xmm_offset(ot);
+
+    tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0);
+
+    switch (ot) {
+    case MO_32:
+#ifdef TARGET_X86_64
+        tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+        tcg_gen_st_i32(s->tmp3_i32, cpu_env, lo_ofs);
+        break;
+    case MO_64:
+#endif
+        tcg_gen_st_tl(s->T1, cpu_env, lo_ofs);
+        break;
+    default:
+        abort();
+    }
+}
+
+static void gen_MOVDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    gen_store_sse(s, env, decode, decode->op[2].offset);
+}
+
 static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -405,6 +526,33 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, 
X86DecodedInsn *decode)
 
 }
 
+static void gen_PCMPGTB(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    int vec_len = sse_vec_len(s, decode);
+
+    tcg_gen_gvec_cmp(TCG_COND_GT, MO_8,
+                     decode->op[0].offset, decode->op[1].offset,
+                     decode->op[2].offset, vec_len, vec_len);
+}
+
+static void gen_PCMPGTW(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    int vec_len = sse_vec_len(s, decode);
+
+    tcg_gen_gvec_cmp(TCG_COND_GT, MO_16,
+                     decode->op[0].offset, decode->op[1].offset,
+                     decode->op[2].offset, vec_len, vec_len);
+}
+
+static void gen_PCMPGTD(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    int vec_len = sse_vec_len(s, decode);
+
+    tcg_gen_gvec_cmp(TCG_COND_GT, MO_32,
+                     decode->op[0].offset, decode->op[1].offset,
+                     decode->op[2].offset, vec_len, vec_len);
+}
+
 static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e147a95c5f..cf18e12d38 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -23,6 +23,7 @@
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
 #include "exec/cpu_ldst.h"
 #include "exec/translator.h"
 
@@ -4665,7 +4666,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 #ifndef CONFIG_USER_ONLY
         use_new &= b <= limit;
 #endif
-        if (use_new && 0) {
+        if (use_new && (b >= 0x160 && b <= 0x16f)) {
             return disas_insn_new(s, cpu, b + 0x100);
         }
         break;
-- 
2.37.2





reply via email to

[Prev in Thread] Current Thread [Next in Thread]