qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] coroutine-asm: add x86 CET shadow stack support


From: Marc-André Lureau
Subject: Re: [PATCH] coroutine-asm: add x86 CET shadow stack support
Date: Mon, 12 Jun 2023 15:30:40 +0200

Hi Paolo

On Wed, May 10, 2023 at 6:05 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

patch looks generally good, but does not apply anymore.

Are you planning to update it later to support the more secure "map_shadow_stack" syscall, if/when it is added?

---
 meson.build          | 16 +++++++--
 util/coroutine-asm.c | 82 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/meson.build b/meson.build
index 0121ccab78dd..17e4a3bc582e 100644
--- a/meson.build
+++ b/meson.build
@@ -328,6 +328,10 @@ elif coroutine_backend not in supported_backends
         .format(coroutine_backend, ', '.join(supported_backends)))
 endif

+if cfi_mode == 'hw' and coroutine_backend != 'asm'
+  error('Hardware control-flow integrity requires the "asm" coroutine backend.')
+endif
+
 # Compiles if SafeStack *not* enabled
 safe_stack_probe = '''
   int main(void)
@@ -469,16 +473,22 @@ if cfi_mode == 'sw'
     endif
   endif
 elif cfi_mode in ['hw', 'auto']
-  if cfi_mode == 'hw'
-    error('Hardware CFI is not supported yet')
+  if cpu in ['x86', 'x86_64']
+    cfi_flags += cc.get_supported_arguments('-fcf-protection=full')
+    if cfi_mode == 'hw'
+      error('C compiler does not support -fcf-protection')
+    endif
+  elif cfi_mode == 'hw'
+    error('Hardware CFI is only supported on x86')
   endif
   if cfi_flags == [] and cfi_mode == 'auto'
     cfi_mode = 'disabled'
   endif
 endif
-if cpu in ['x86', 'x86_64']
+if cpu in ['x86', 'x86_64'] and cfi_mode != 'hw'
   cfi_flags += cc.get_supported_arguments('-fcf-protection=branch')
 endif
+
 add_global_arguments(cfi_flags, native: false, language: all_languages)
 add_global_link_arguments(cfi_flags, native: false, language: all_languages)

diff --git a/util/coroutine-asm.c b/util/coroutine-asm.c
index a06ecbcb0a07..771b1d4a0fc9 100644
--- a/util/coroutine-asm.c
+++ b/util/coroutine-asm.c
@@ -22,6 +22,13 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/coroutine_int.h"
+#include "qemu/error-report.h"
+
+#ifdef CONFIG_CF_PROTECTION
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+int arch_prctl(int code, unsigned long addr);
+#endif

 #ifdef CONFIG_VALGRIND_H
 #include <valgrind/valgrind.h>
@@ -39,10 +46,14 @@
 typedef struct {
     Coroutine base;
     void *sp;
+    void *ssp;

     void *stack;
     size_t stack_size;

+    /* x86: CET shadow stack */
+    void *sstack;
+    size_t sstack_size;
 #ifdef CONFIG_VALGRIND_H
     unsigned int valgrind_stack_id;
 #endif
@@ -77,6 +88,35 @@ static void start_switch_fiber(void **fake_stack_save,
 #endif
 }

+static bool have_sstack(void)
+{
+#if defined CONFIG_CF_PROTECTION && defined __x86_64__
+    uint64_t ssp;
+    asm ("xor %0, %0; rdsspq %0\n" : "=r" (ssp));
+    return !!ssp;
+#else
+    return 0;
+#endif
+}
+
+static void *alloc_sstack(size_t sz)
+{
+#if defined CONFIG_CF_PROTECTION && defined __x86_64__
+#ifndef ARCH_X86_CET_ALLOC_SHSTK
+#define ARCH_X86_CET_ALLOC_SHSTK 0x3004
+#endif
+
+    uint64_t arg = sz;
+    if (arch_prctl(ARCH_X86_CET_ALLOC_SHSTK, (unsigned long) &arg) < 0) {
+        abort();
+    }
+
+    return (void *)arg;
+#else
+    abort();
+#endif
+}
+
 #ifdef __x86_64__
 /*
  * We hardcode all operands to specific registers so that we can write down all the
@@ -88,6 +128,26 @@ static void start_switch_fiber(void **fake_stack_save,
  * Note that push and call would clobber the red zone.  Makefile.objs compiles this
  * file with -mno-red-zone.  The alternative is to subtract/add 128 bytes from rsp
  * around the switch, with slightly lower cache performance.
+ *
+ * The RSTORSSP and SAVEPREVSSP instructions are intricate.  In a nutshell they are:
+ *
+ *      RSTORSSP(mem):    oldSSP = SSP
+ *                        SSP = mem
+ *                        *SSP = oldSSP
+ *
+ *      SAVEPREVSSP:      oldSSP = shadow_stack_pop()
+ *                        *(oldSSP - 8) = oldSSP       # "push" to old shadow stack
+ *
+ * Therefore, RSTORSSP(mem) followed by SAVEPREVSSP is the same as
+ *
+ *     shadow_stack_push(SSP)
+ *     SSP = mem
+ *     shadow_stack_pop()
+ *
+ * From the simplified description you can see that co->ssp, being stored before
+ * the RSTORSSP+SAVEPREVSSP sequence, points to the top actual entry of the shadow
+ * stack, not to the restore token.  Hence we use an offset of -8 in the operand
+ * of rstorssp.
  */
 #define CO_SWITCH(from, to, action, jump) ({                                          \
     int action_ = action;                                                             \
@@ -100,7 +160,15 @@ static void start_switch_fiber(void **fake_stack_save,
         "jmp 2f\n"                          /* switch back continues at label 2 */    \
                                                                                       \
         "1: .cfi_adjust_cfa_offset 8\n"                                               \
-        "movq %%rsp, %c[SP](%[FROM])\n"     /* save source SP */                      \
+        "xor %%rbp, %%rbp\n"                /* use old frame pointer as scratch reg */ \
+        "rdsspq %%rbp\n"                                                              \
+        "test %%rbp, %%rbp\n"               /* if CET is enabled... */                \
+        "jz 9f\n"                                                                     \
+        "movq %%rbp, %c[SSP](%[FROM])\n"    /* ... save source shadow SP, */         \
+        "movq %c[SSP](%[TO]), %%rbp\n"      /* restore destination shadow stack, */  \
+        "rstorssp -8(%%rbp)\n"                                                        \
+        "saveprevssp\n"                     /* and save source shadow SP token */     \
+        "9: movq %%rsp, %c[SP](%[FROM])\n"  /* save source SP */                      \
         "movq %c[SP](%[TO]), %%rsp\n"       /* load destination SP */                 \
         jump "\n"                           /* coroutine switch */                    \
                                                                                       \
@@ -108,7 +176,8 @@ static void start_switch_fiber(void **fake_stack_save,
         "popq %%rbp\n"                                                                \
         ".cfi_adjust_cfa_offset -8\n"                                                 \
         : "+a" (action_), [FROM] "+b" (from_), [TO] "+D" (to_)                        \
-        : [SP] "i" (offsetof(CoroutineAsm, sp))                                       \
+        : [SP] "i" (offsetof(CoroutineAsm, sp)),                                      \
+          [SSP] "i" (offsetof(CoroutineAsm, ssp))                                     \
         : "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",  \
           "memory");                                                                  \
     action_;                                                                          \
@@ -141,6 +210,12 @@ Coroutine *qemu_coroutine_new(void)
     co->stack = qemu_alloc_stack(&co->stack_size);
     co->sp = co->stack + co->stack_size;

+    if (have_sstack()) {
+        co->sstack_size = COROUTINE_SHADOW_STACK_SIZE;
+        co->sstack = alloc_sstack(co->sstack_size);
+        co->ssp = co->sstack + co->sstack_size;
+    }
+
 #ifdef CONFIG_VALGRIND_H
     co->valgrind_stack_id =
         VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
@@ -186,6 +261,9 @@ void qemu_coroutine_delete(Coroutine *co_)
 #endif

     qemu_free_stack(co->stack, co->stack_size);
+    if (co->sstack) {
+        munmap(co->sstack, co->sstack_size);
+    }
     g_free(co);
 }

--
2.40.1




--
Marc-André Lureau

reply via email to

[Prev in Thread] Current Thread [Next in Thread]