qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 30/37] target/i386: reimplement 0x0f 0x10-0x17, add AVX


From: Richard Henderson
Subject: Re: [PATCH 30/37] target/i386: reimplement 0x0f 0x10-0x17, add AVX
Date: Tue, 13 Sep 2022 11:14:06 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.11.0

On 9/12/22 00:04, Paolo Bonzini wrote:
+static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    if (decode->op[0].offset != decode->op[1].offset) {
+        tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + 
offsetof(XMMReg, XMM_Q(0)));
+        tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + 
offsetof(XMMReg, XMM_Q(0)));
+    }
+    gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1)));
+}

Don't modify op0 before the load fault.

+static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + 
offsetof(XMMReg, XMM_Q(1)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + 
offsetof(XMMReg, XMM_Q(0)));
+    if (decode->op[0].offset != decode->op[1].offset) {
+        tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + 
offsetof(XMMReg, XMM_Q(1)));
+        tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + 
offsetof(XMMReg, XMM_Q(1)));
+    }
+}
+
+static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + 
offsetof(XMMReg, XMM_Q(0)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + 
offsetof(XMMReg, XMM_Q(1)));
+    if (decode->op[0].offset != decode->op[1].offset) {
+        tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[1].offset + 
offsetof(XMMReg, XMM_Q(0)));
+        tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + 
offsetof(XMMReg, XMM_Q(0)));
+    }
+}
+
+static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    int vec_len = sse_vec_len(s, decode);
+
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, decode->op[2].offset + 
offsetof(XMMReg, XMM_Q(0)));
+    tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, 
vec_len, vec_len);
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, decode->op[0].offset + 
offsetof(XMMReg, XMM_Q(0)));
+}

You've just been moving i64 pieces in the other functions, why is this one different using a gvec move in the middle? I do wonder if a generic helper moving offset->offset, with the comparison wouldn't be helpful within these functions, even when you know off1 != off2, due to Q(0) vs Q(1).

+static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    int vec_len = sse_vec_len(s, decode);
+
+    tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, 
vec_len, vec_len);
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_64);
+    tcg_gen_st_i64(s->tmp1_i64, s->ptr0, offsetof(ZMMReg, ZMM_Q(0)));
+}

Don't modify op0 before load fault.

+static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    TCGv zero = tcg_const_i64(0);
+
+    tcg_gen_st_i64(zero, s->ptr0, offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_64);
+    tcg_gen_st_i64(s->tmp1_i64, s->ptr0, offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_temp_free_i64(zero);
+}

Likewise.

+static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    int vec_len = sse_vec_len(s, decode);
+
+    tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0);
+    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_32);
+    tcg_gen_st_i32(s->tmp2_i32, s->ptr0, offsetof(ZMMReg, ZMM_L(0)));
+}

Likewise.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]