qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 09/10] target/i386: implement 32-bit SYSENTER for linux-user


From: Paolo Bonzini
Subject: [PATCH v2 09/10] target/i386: implement 32-bit SYSENTER for linux-user
Date: Tue, 20 Jun 2023 17:16:33 +0200

TCG reports the SEP feature (SYSENTER/SYSEXIT) in user mode emulation,
but does not plumb it into the linux-user run loop.  Split the helper into
system emulation and user-mode emulation cases and implement the latter.

SYSENTER does not have the best design for a kernel-mode entry
instruction, and therefore Linux always makes it return to the
vsyscall page.  Because QEMU does not provide the _contents_ of
the vsyscall page, the instructions executed after SYSEXIT have
to be emulated by hand until the first RET.

Some corner cases, such as restarting the system call after the
system call has rewritten the SYSENTER instruction, are not emulated
correctly.  On Linux, the system call restart uses the SYSENTER
call in the vsyscall page, while on QEMU it uses the emulated
program's instruction.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 linux-user/i386/cpu_loop.c          | 51 +++++++++++++++++++++++++++--
 target/i386/cpu.c                   |  9 ++++-
 target/i386/cpu.h                   |  1 +
 target/i386/helper.h                |  2 +-
 target/i386/tcg/seg_helper.c        | 33 -------------------
 target/i386/tcg/sysemu/seg_helper.c | 33 +++++++++++++++++++
 target/i386/tcg/translate.c         |  2 +-
 target/i386/tcg/user/seg_helper.c   | 16 +++++++++
 8 files changed, 109 insertions(+), 38 deletions(-)

diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
index 6908bad14aa..690d9a42ee0 100644
--- a/linux-user/i386/cpu_loop.c
+++ b/linux-user/i386/cpu_loop.c
@@ -197,6 +197,41 @@ static bool maybe_handle_vm86_trap(CPUX86State *env, int 
trapnr)
     return false;
 }
 
+static void emulate_vsyscall_sysexit(CPUX86State *env)
+{
+    /*
+     * Emulate the pop and ret instructions after the sysenter instruction
+     * in the vsyscall page.  Any sysenter returns there, because sysenter
+     * does not save the old EIP!
+     */
+    abi_ulong word;
+    if (get_user_ual(word, env->regs[R_ESP])) {
+        goto segv;
+    }
+    env->regs[R_EBP] = word;
+    env->regs[R_ESP] += sizeof(target_ulong);
+    if (get_user_ual(word, env->regs[R_ESP])) {
+        goto segv;
+    }
+    env->regs[R_EDX] = word;
+    env->regs[R_ESP] += sizeof(target_ulong);
+    if (get_user_ual(word, env->regs[R_ESP])) {
+        goto segv;
+    }
+    env->regs[R_ECX] = word;
+    env->regs[R_ESP] += sizeof(target_ulong);
+    if (get_user_ual(word, env->regs[R_ESP])) {
+        goto segv;
+    }
+    env->eip = word;
+    env->regs[R_ESP] += sizeof(target_ulong);
+    return;
+
+segv:
+    env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
+    force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, env->regs[R_ESP]);
+}
+
 void cpu_loop(CPUX86State *env)
 {
     CPUState *cs = env_cpu(env);
@@ -213,6 +248,7 @@ void cpu_loop(CPUX86State *env)
         case 0x80:
 #ifdef TARGET_ABI32
         case EXCP_SYSCALL:
+        case EXCP_SYSENTER:
 #endif
             /* linux syscall from int $0x80 */
             ret = do_syscall(env,
@@ -226,12 +262,18 @@ void cpu_loop(CPUX86State *env)
                              0, 0);
             if (ret == -QEMU_ERESTARTSYS) {
                 env->eip -= 2;
-            } else if (ret != -QEMU_ESIGRETURN) {
+                break;
+            }
+            if (ret != -QEMU_ESIGRETURN) {
                 env->regs[R_EAX] = ret;
             }
+            if (trapnr == EXCP_SYSENTER) {
+                emulate_vsyscall_sysexit(env);
+            }
             break;
 #ifndef TARGET_ABI32
         case EXCP_SYSCALL:
+        case EXCP_SYSENTER:
             /* linux syscall from syscall instruction */
             ret = do_syscall(env,
                              env->regs[R_EAX],
@@ -244,9 +286,14 @@ void cpu_loop(CPUX86State *env)
                              0, 0);
             if (ret == -QEMU_ERESTARTSYS) {
                 env->eip -= 2;
-            } else if (ret != -QEMU_ESIGRETURN) {
+                break;
+            }
+            if (ret != -QEMU_ESIGRETURN) {
                 env->regs[R_EAX] = ret;
             }
+            if (trapnr == EXCP_SYSENTER) {
+                emulate_vsyscall_sysexit(env);
+            }
             break;
 #endif
 #ifdef TARGET_X86_64
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 934360e4091..2c71c3ea32b 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -614,11 +614,18 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           CPUID_PAT | CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | \
           CPUID_PAE | CPUID_SEP | CPUID_APIC)
 
+#if defined CONFIG_SOFTMMU || defined CONFIG_LINUX_USER
+#define TCG_NOBSD_FEATURES CPUID_SEP
+#else
+#define TCG_NOBSD_FEATURES 0
+#endif
+
 #define TCG_FEATURES (CPUID_FP87 | CPUID_PSE | CPUID_TSC | CPUID_MSR | \
           CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
           CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
           CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
-          CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE)
+          CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE | \
+          TCG_NOBSD_FEATURES)
           /* partly implemented:
           CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
           /* missing:
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 7201a71de86..bc7d10bf863 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1185,6 +1185,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define EXCP_VMEXIT     0x100 /* only for system emulation */
 #define EXCP_SYSCALL    0x101 /* only for user emulation */
 #define EXCP_VSYSCALL   0x102 /* only for user emulation */
+#define EXCP_SYSENTER   0x103 /* only for user emulation */
 
 /* i386-specific interrupt pending bits.  */
 #define CPU_INTERRUPT_POLL      CPU_INTERRUPT_TGT_EXT_1
diff --git a/target/i386/helper.h b/target/i386/helper.h
index c2e86c6119c..49d2f537557 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -49,7 +49,7 @@ DEF_HELPER_FLAGS_3(set_dr, TCG_CALL_NO_WG, void, env, int, tl)
 DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int)
 #endif /* !CONFIG_USER_ONLY */
 
-DEF_HELPER_1(sysenter, void, env)
+DEF_HELPER_2(sysenter, void, env, int)
 DEF_HELPER_2(sysexit, void, env, int)
 DEF_HELPER_2(syscall, void, env, int)
 #ifdef TARGET_X86_64
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 03b58e94a2d..6899b8f6890 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -2147,39 +2147,6 @@ void helper_lret_protected(CPUX86State *env, int shift, 
int addend)
     helper_ret_protected(env, shift, 0, addend, GETPC());
 }
 
-void helper_sysenter(CPUX86State *env)
-{
-    if (env->sysenter_cs == 0) {
-        raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
-    }
-    env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
-
-#ifdef TARGET_X86_64
-    if (env->hflags & HF_LMA_MASK) {
-        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
-                               DESC_L_MASK);
-    } else
-#endif
-    {
-        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
-    }
-    cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
-                           0, 0xffffffff,
-                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                           DESC_S_MASK |
-                           DESC_W_MASK | DESC_A_MASK);
-    env->regs[R_ESP] = env->sysenter_esp;
-    env->eip = env->sysenter_eip;
-}
-
 void helper_sysexit(CPUX86State *env, int dflag)
 {
     int cpl;
diff --git a/target/i386/tcg/sysemu/seg_helper.c 
b/target/i386/tcg/sysemu/seg_helper.c
index 2c9bd007adb..967882b6c69 100644
--- a/target/i386/tcg/sysemu/seg_helper.c
+++ b/target/i386/tcg/sysemu/seg_helper.c
@@ -215,3 +215,36 @@ void helper_check_io(CPUX86State *env, uint32_t addr, 
uint32_t size)
         raise_exception_err_ra(env, EXCP0D_GPF, 0, retaddr);
     }
 }
+
+void helper_sysenter(CPUX86State *env, int next_eip_addend)
+{
+    if (env->sysenter_cs == 0) {
+        raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+    }
+    env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
+
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_LMA_MASK) {
+        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+                               DESC_L_MASK);
+    } else
+#endif
+    {
+        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+    }
+    cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
+                           0, 0xffffffff,
+                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                           DESC_S_MASK |
+                           DESC_W_MASK | DESC_A_MASK);
+    env->regs[R_ESP] = env->sysenter_esp;
+    env->eip = env->sysenter_eip;
+}
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 0ddb689444e..af74c842f96 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -5667,7 +5667,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         if (!PE(s)) {
             gen_exception_gpf(s);
         } else {
-            gen_helper_sysenter(cpu_env);
+            gen_helper_sysenter(cpu_env, cur_insn_len_i32(s));
             s->base.is_jmp = DISAS_EOB_ONLY;
         }
         break;
diff --git a/target/i386/tcg/user/seg_helper.c 
b/target/i386/tcg/user/seg_helper.c
index c45f2ac2ba6..1ac3ee39b5b 100644
--- a/target/i386/tcg/user/seg_helper.c
+++ b/target/i386/tcg/user/seg_helper.c
@@ -36,6 +36,22 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
     cpu_loop_exit(cs);
 }
 
+void helper_sysenter(CPUX86State *env, int next_eip_addend)
+{
+    CPUState *cs = env_cpu(env);
+
+    /*
+     * sysenter returns to the landing pad of the vDSO, which pops
+     * ebp/edx/ecx before executing a "ret".
+     */
+    cs->exception_index = EXCP_SYSENTER;
+    env->exception_is_int = 0;
+
+    /* Used for ERESTARTSYS.  */
+    env->exception_next_eip = env->eip + next_eip_addend;
+    cpu_loop_exit(cs);
+}
+
 /*
  * fake user mode interrupt. is_int is TRUE if coming from the int
  * instruction. next_eip is the env->eip value AFTER the interrupt
-- 
2.40.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]