libunwind-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Libunwind-devel] [PATCH] Add stack alignment prologue tdep_trace fastpa


From: Dave Watson
Subject: [Libunwind-devel] [PATCH] Add stack alignment prologue tdep_trace fastpath
Date: Mon, 28 Nov 2016 15:50:58 -0800
User-agent: Mutt/1.6.0 (2016-04-01)

GCC versions 4.9~current will often generate stack alignment prologues like:

lea 0x8(%rsp),%r10
and $0xfffffffffffffff0,%rsp
...
push %rbp
mov %rsp, %rbp
push %r10

resulting in dwarf expressions:
DW_CFA_def_cfa_expression (DW_OP_breg6: -8; DW_OP_deref)
DW_CFA_expression: r6 (rbp) (DW_OP_breg6: 0)

These prologues seem to be generated for SSE/AVX code, but sometimes
other times as well.

tdep_trace fastpath currently falls back to the slow dwarf parsing path
if it encounters any cfa_expressions. Unfortunately this is happening
often enough in our codebase to cause perf issues.  We could also fix the
fallback path (make the rs cache bigger, lock-free instead of locking, etc),
but that seems like a separate issue, and it will ever be as fast as the tracing
code.   Our binaries each have at least ~100 functions in them like this.

This patch teaches the tdep_trace about the two specific cfa_expressions,
which really just result in a single extra memory dereference of the stack
at a fixed offset from rbp.
---
 include/dwarf.h                   |  5 ++++
 include/tdep-x86_64/libunwind_i.h |  5 ++--
 src/dwarf/Gexpr.c                 | 48 +++++++++++++++++++++++++++++++++++++++
 src/x86_64/Gstash_frame.c         | 23 ++++++++++++++++++-
 src/x86_64/Gtrace.c               | 18 +++++++++++++++
 5 files changed, 96 insertions(+), 3 deletions(-)

diff --git a/include/dwarf.h b/include/dwarf.h
index 633868b..f493de8 100644
--- a/include/dwarf.h
+++ b/include/dwarf.h
@@ -387,6 +387,7 @@ struct dwarf_callback_data
 #define dwarf_put_unwind_info           UNW_OBJ (dwarf_put_unwind_info)
 #define dwarf_put_unwind_info           UNW_OBJ (dwarf_put_unwind_info)
 #define dwarf_eval_expr                 UNW_OBJ (dwarf_eval_expr)
+#define dwarf_stack_aligned             UNW_OBJ (dwarf_stack_aligned)
 #define dwarf_extract_proc_info_from_fde \
                 UNW_OBJ (dwarf_extract_proc_info_from_fde)
 #define dwarf_find_save_locs            UNW_OBJ (dwarf_find_save_locs)
@@ -419,6 +420,10 @@ extern void dwarf_put_unwind_info (unw_addr_space_t as,
 extern int dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr,
                             unw_word_t len, unw_word_t *valp,
                             int *is_register);
+extern int
+dwarf_stack_aligned(struct dwarf_cursor *c, unw_word_t cfa_addr,
+                    unw_word_t rbp_addr, unw_word_t *offset);
+
 extern int dwarf_extract_proc_info_from_fde (unw_addr_space_t as,
                                              unw_accessors_t *a,
                                              unw_word_t *fde_addr,
diff --git a/include/tdep-x86_64/libunwind_i.h 
b/include/tdep-x86_64/libunwind_i.h
index d19c705..e1271c1 100644
--- a/include/tdep-x86_64/libunwind_i.h
+++ b/include/tdep-x86_64/libunwind_i.h
@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
SOFTWARE.  */
 
 typedef enum
   {
+    UNW_X86_64_FRAME_ALIGNED = -3,       /* frame stack pointer aligned */
     UNW_X86_64_FRAME_STANDARD = -2,     /* regular rbp, rsp +/- offset */
     UNW_X86_64_FRAME_SIGRETURN = -1,    /* special sigreturn frame */
     UNW_X86_64_FRAME_OTHER = 0,         /* not cacheable (special or 
unrecognised) */
@@ -50,10 +51,10 @@ unw_tdep_frame_type_t;
 typedef struct
   {
     uint64_t virtual_address;
-    int64_t frame_type     : 2;  /* unw_tdep_frame_type_t classification */
+    int64_t frame_type     : 3;  /* unw_tdep_frame_type_t classification */
     int64_t last_frame     : 1;  /* non-zero if last frame in chain */
     int64_t cfa_reg_rsp    : 1;  /* cfa dwarf base register is rsp vs. rbp */
-    int64_t cfa_reg_offset : 30; /* cfa is at this offset from base register 
value */
+    int64_t cfa_reg_offset : 29; /* cfa is at this offset from base register 
value */
     int64_t rbp_cfa_offset : 15; /* rbp saved at this offset from cfa (-1 = 
not saved) */
     int64_t rsp_cfa_offset : 15; /* rsp saved at this offset from cfa (-1 = 
not saved) */
   }
diff --git a/src/dwarf/Gexpr.c b/src/dwarf/Gexpr.c
index b56bb31..1d4974e 100644
--- a/src/dwarf/Gexpr.c
+++ b/src/dwarf/Gexpr.c
@@ -187,6 +187,54 @@ read_operand (unw_addr_space_t as, unw_accessors_t *a,
 }
 
 HIDDEN int
+dwarf_stack_aligned(struct dwarf_cursor *c, unw_word_t cfa_addr,
+                    unw_word_t rbp_addr, unw_word_t *cfa_offset) {
+  unw_accessors_t *a;
+  int ret;
+  void *arg;
+  unw_word_t len;
+  uint8_t opcode;
+  unw_word_t operand1;
+
+  a = unw_get_accessors (c->as);
+  arg = c->as_arg;
+
+  ret = dwarf_read_uleb128(c->as, a, &rbp_addr, &len, arg);
+  if (len != 2 || ret < 0)
+    return 0;
+
+  ret = dwarf_readu8(c->as, a, &rbp_addr, &opcode, arg);
+  if (ret < 0 || opcode != DW_OP_breg6)
+    return 0;
+
+  ret = read_operand(c->as, a, &rbp_addr,
+                     OPND1_TYPE(operands[opcode]), &operand1, arg);
+
+  if (ret < 0 || operand1 != 0)
+    return 0;
+
+  ret = dwarf_read_uleb128(c->as, a, &cfa_addr, &len, arg);
+  if (ret < 0 || len != 3)
+    return 0;
+
+  ret = dwarf_readu8(c->as, a, &cfa_addr, &opcode, arg);
+  if (ret < 0 || opcode != DW_OP_breg6)
+    return 0;
+
+  ret = read_operand(c->as, a, &cfa_addr,
+                     OPND1_TYPE(operands[opcode]), &operand1, arg);
+  if (ret < 0)
+    return 0;
+
+  ret = dwarf_readu8(c->as, a, &cfa_addr, &opcode, arg);
+  if (ret < 0 || opcode != DW_OP_deref)
+    return 0;
+
+  *cfa_offset = operand1;
+  return 1;
+}
+
+HIDDEN int
 dwarf_eval_expr (struct dwarf_cursor *c, unw_word_t *addr, unw_word_t len,
                  unw_word_t *valp, int *is_register)
 {
diff --git a/src/x86_64/Gstash_frame.c b/src/x86_64/Gstash_frame.c
index dc6c7c8..451b9fe 100644
--- a/src/x86_64/Gstash_frame.c
+++ b/src/x86_64/Gstash_frame.c
@@ -41,6 +41,23 @@ tdep_stash_frame (struct dwarf_cursor *d, struct 
dwarf_reg_state *rs)
          rs->reg[RBP].where, rs->reg[RBP].val, DWARF_GET_LOC(d->loc[RBP]),
          rs->reg[RSP].where, rs->reg[RSP].val, DWARF_GET_LOC(d->loc[RSP]));
 
+  if (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_EXPR &&
+    rs->reg[RBP].where == DWARF_WHERE_EXPR) {
+    /* Check for GCC generated alignment frame for rsp.  A simple
+     * def_cfa_expr that loads a constant offset from rbp, where the
+     * addres of the rip was pushed on the stack */
+    unw_word_t cfa_addr = rs->reg[DWARF_CFA_REG_COLUMN].val;
+    unw_word_t rbp_addr = rs->reg[RBP].val;
+    unw_word_t cfa_offset;
+
+    int ret = dwarf_stack_aligned(d, cfa_addr, rbp_addr, &cfa_offset);
+    if (ret) {
+      f->frame_type = UNW_X86_64_FRAME_ALIGNED;
+      f->cfa_reg_offset = cfa_offset;
+      f->cfa_reg_rsp = 0;
+    }
+  }
+
   /* A standard frame is defined as:
       - CFA is register-relative offset off RBP or RSP;
       - Return address is saved at CFA-8;
@@ -50,7 +67,7 @@ tdep_stash_frame (struct dwarf_cursor *d, struct 
dwarf_reg_state *rs)
       && (rs->reg[DWARF_CFA_REG_COLUMN].where == DWARF_WHERE_REG)
       && (rs->reg[DWARF_CFA_REG_COLUMN].val == RBP
           || rs->reg[DWARF_CFA_REG_COLUMN].val == RSP)
-      && labs((long) rs->reg[DWARF_CFA_OFF_COLUMN].val) < (1 << 29)
+      && labs((long) rs->reg[DWARF_CFA_OFF_COLUMN].val) < (1 << 28)
       && DWARF_GET_LOC(d->loc[d->ret_addr_column]) == d->cfa-8
       && (rs->reg[RBP].where == DWARF_WHERE_UNDEF
           || rs->reg[RBP].where == DWARF_WHERE_SAME
@@ -92,6 +109,10 @@ tdep_stash_frame (struct dwarf_cursor *d, struct 
dwarf_reg_state *rs)
     Debug (4, " sigreturn frame\n");
   }
 
+  else if (f->frame_type == UNW_X86_64_FRAME_ALIGNED) {
+    Debug (4, " aligned frame, offset %li\n", f->cfa_reg_offset);
+  }
+
   /* PLT and guessed RBP-walked frames are handled in unw_step(). */
   else
     Debug (4, " unusual frame\n");
diff --git a/src/x86_64/Gtrace.c b/src/x86_64/Gtrace.c
index 833d7a7..7412271 100644
--- a/src/x86_64/Gtrace.c
+++ b/src/x86_64/Gtrace.c
@@ -506,6 +506,24 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
       d->use_prev_instr = 0;
       break;
 
+    case UNW_X86_64_FRAME_ALIGNED:
+      /* Address of RIP was pushed on the stack via a simple
+       * def_cfa_expr - result stack offset stored in cfa_reg_offset */
+      cfa = (f->cfa_reg_rsp ? rsp : rbp) + f->cfa_reg_offset;
+      ACCESS_MEM_FAST(ret, c->validate, d, cfa, cfa);
+      if (likely(ret >= 0))
+        ACCESS_MEM_FAST(ret, c->validate, d, cfa - 8, rip);
+      if (likely(ret >= 0))
+        ACCESS_MEM_FAST(ret, c->validate, d, rbp, rbp);
+
+      /* Don't bother reading RSP from DWARF, CFA becomes new RSP. */
+      rsp = cfa;
+
+      /* Next frame needs to back up for unwind info lookup. */
+      d->use_prev_instr = 1;
+
+      break;
+
     default:
       /* We cannot trace through this frame, give up and tell the
          caller we had to stop.  Data collected so far may still be
-- 
2.8.0-rc2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]