qemu-trivial
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] target/i386: log guest name and memory error type AO, AR for


From: Philippe Mathieu-Daudé
Subject: Re: [PATCH] target/i386: log guest name and memory error type AO, AR for MCEs
Date: Mon, 7 Oct 2019 12:27:29 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.1.0

Hi Mario,

On 10/5/19 1:53 AM, Mario Smarduch wrote:
In a large VPC environment we want to log memory error occurrences
and log them with guest name and type - there are few use cases


- if VM crashes on AR mce inform the user about the reason and
   resolve the case
- if VM hangs notify the user to reboot and resume processing
- if VM continues to run let the user know, he/she maybe able to
   correlate to vm internal outage
- Rawhammer attacks - isolate/determine the attacker possible
   migrating it off the hypervisor
- In general track memory errors on a hyperviosr over time to determine
   trends

Monitoring our fleet we come across quite a few of these and been
able to take action where before there were no clues to the causes.

When memory error occurs we get a log entry in qemu log:

Guest [Droplet-12345678] 2019-08-02T05:00:11.940270Z qemu-system-x86_64:
Guest MCE Memory Error at qemu addr 0x7f3c7622f000 and guest 78e42f000
addr of type BUS_MCEERR_AR injected

with enterprise logging environment we can to take further actions.

Signed-off-by: Mario Smarduch <address@hidden>
---
  target/i386/kvm.c | 27 ++++++++++++++++++++++-----
  util/qemu-error.c | 24 ++++++++++++++++++++++++
  2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 92069099ab..79ebccc684 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -555,9 +555,9 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr
paddr, int code)
                         (MCM_ADDR_PHYS << 6) | 0xc, flags);
  }

-static void hardware_memory_error(void)
+static void hardware_memory_error(void *addr)

Maybe rename addr -> host_addr.

  {
-    fprintf(stderr, "Hardware memory error!\n");
+    error_report("QEMU got Hardware memory error at addr %p", addr);
      exit(1);
  }

@@ -581,15 +581,32 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int
code, void *addr)
              kvm_physical_memory_addr_from_host(c->kvm_state, addr,
&paddr)) {
              kvm_hwpoison_page_add(ram_addr);
              kvm_mce_inject(cpu, paddr, code);
+            /*
+             * Use different logging severity based on error type.
+             * If mcelog is running qemu va addr will help debug via
mcelog.
+             */
+            if (code == BUS_MCEERR_AR) {
+                error_report("Guest MCE Memory Error at qemu addr %p and "
+                    "guest %lx addr of type %s injected", addr, paddr,

"qemu addr" is not clear IMO, 'addr' is in the host (and is virtual... how does this help you?).

For the guest paddr you should use "0x%"HWADDR_PRIx format.

+                     "BUS_MCEERR_AR");
+            } else {
+                 warn_report("Guest MCE Memory Error at qemu addr %p and "
+                     "guest %lx addr of type %s injected", addr,
+                     paddr, "BUS_MCEERR_AO");
+            }
+
              return;
          }

-        fprintf(stderr, "Hardware memory error for memory used by "
-                "QEMU itself instead of guest system!\n");
+        if (code == BUS_MCEERR_AO) {
+            warn_report("Hardware memory error at addr %p of type %s "
+                "for memory used by QEMU itself instead of guest system!",
+                addr, "BUS_MCEERR_AO");
+        }
      }

      if (code == BUS_MCEERR_AR) {
-        hardware_memory_error();
+        hardware_memory_error(addr);
      }

      /* Hope we are lucky for AO MCE */
diff --git a/util/qemu-error.c b/util/qemu-error.c
index f373f3b3b0..2ebafd4405 100644
--- a/util/qemu-error.c
+++ b/util/qemu-error.c
@@ -11,6 +11,8 @@
   */

  #include "qemu/osdep.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
  #include "monitor/monitor.h"
  #include "qemu/error-report.h"

@@ -35,11 +37,31 @@ int error_printf(const char *fmt, ...)
      return ret;
  }

+static const char *error_get_guestname(void)
+{
+    QemuOpts *opts = qemu_opts_find(qemu_find_opts("name"), NULL);
+    return qemu_opt_get(opts, "guest");
+}
+
+/*
+ * Print guest name associated with error, to aid debugging errors from
+ * multiple guests in centralized logging environment.
+ */
+static void error_print_guestname(void)
+{
+    const char *name;
+    name = error_get_guestname();
+    if (name != NULL && !cur_mon) {
+        error_printf("Guest [%s] ", name);
+    }
+}
+
  int error_printf_unless_qmp(const char *fmt, ...)
  {
      va_list ap;
      int ret;

+    error_print_guestname();
      va_start(ap, fmt);
      ret = error_vprintf_unless_qmp(fmt, ap);
      va_end(ap);
@@ -274,6 +296,7 @@ void error_report(const char *fmt, ...)
  {
      va_list ap;

+    error_print_guestname();
      va_start(ap, fmt);
      vreport(REPORT_TYPE_ERROR, fmt, ap);
      va_end(ap);
@@ -289,6 +312,7 @@ void warn_report(const char *fmt, ...)
  {
      va_list ap;

+    error_print_guestname();
      va_start(ap, fmt);
      vreport(REPORT_TYPE_WARNING, fmt, ap);
      va_end(ap);
--
2.17.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]