qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v5 6/8] hmat acpi: Build Memory Subsystem Addres


From: Tao Xu
Subject: Re: [Qemu-devel] [PATCH v5 6/8] hmat acpi: Build Memory Subsystem Address Range Structure(s) in ACPI HMAT
Date: Tue, 2 Jul 2019 09:14:40 +0800
User-agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Thunderbird/60.7.2

On 7/1/2019 7:25 PM, Igor Mammedov wrote:
On Fri, 14 Jun 2019 23:56:24 +0800
Tao Xu <address@hidden> wrote:

From: Liu Jingqi <address@hidden>

HMAT is defined in ACPI 6.2: 5.2.27 Heterogeneous Memory Attribute Table (HMAT).
The specification references below link:
http://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf

It describes the memory attributes, such as memory side cache
attributes and bandwidth and latency details, related to the
System Physical Address (SPA) Memory Ranges. The software is
expected to use this information as hint for optimization.

This structure describes the System Physical Address(SPA) range
occupied by memory subsystem and its associativity with processor
proximity domain as well as hint for memory usage.

Signed-off-by: Liu Jingqi <address@hidden>
Signed-off-by: Tao Xu <address@hidden>
---

Changes in v5 -> v4:
     - Add more descriptions from ACPI spec (Igor)
     - Remove all the dependcy on PCMachineState (Igor)
---
  hw/acpi/Kconfig       |   5 ++
  hw/acpi/Makefile.objs |   1 +
  hw/acpi/hmat.c        | 153 ++++++++++++++++++++++++++++++++++++++++++
  hw/acpi/hmat.h        |  43 ++++++++++++
  hw/core/machine.c     |   2 +
  hw/i386/acpi-build.c  |   3 +
  include/sysemu/numa.h |   2 +
  numa.c                |   6 ++
  8 files changed, 215 insertions(+)
  create mode 100644 hw/acpi/hmat.c
  create mode 100644 hw/acpi/hmat.h

diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 7c59cf900b..039bb99efa 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -7,6 +7,7 @@ config ACPI_X86
      select ACPI_NVDIMM
      select ACPI_CPU_HOTPLUG
      select ACPI_MEMORY_HOTPLUG
+    select ACPI_HMAT
config ACPI_X86_ICH
      bool
@@ -31,3 +32,7 @@ config ACPI_VMGENID
      bool
      default y
      depends on PC
+
+config ACPI_HMAT
+    bool
+    depends on ACPI
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 661a9b8c2f..20cc2fb124 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
  common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
  common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
  common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
  common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
common-obj-y += acpi_interface.o
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
new file mode 100644
index 0000000000..6fd434c4d9
--- /dev/null
+++ b/hw/acpi/hmat.c
@@ -0,0 +1,153 @@
+/*
+ * HMAT ACPI Implementation
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ *  Liu jingqi <address@hidden>
+ *  Tao Xu <address@hidden>
+ *
+ * HMAT is defined in ACPI 6.2: 5.2.27 Heterogeneous Memory Attribute Table
+ * (HMAT)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/numa.h"
+#include "hw/acpi/hmat.h"
+#include "hw/mem/pc-dimm.h"
+
+/* ACPI 6.2: 5.2.27.3 Memory Subsystem Address Range Structure: Table 5-141 */
+static void build_hmat_spa(GArray *table_data, uint16_t flags,
+                           uint64_t base, uint64_t length, int node)
+{
+
+    /* Memory Subsystem Address Range Structure */
+    /* Type */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* Length */
+    build_append_int_noprefix(table_data, 40, 4);
+    /* Flags */
+    build_append_int_noprefix(table_data, flags, 2);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* Process Proximity Domain */
+    build_append_int_noprefix(table_data, node, 4);
+    /* Memory Proximity Domain */
+    build_append_int_noprefix(table_data, node, 4);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 4);
+    /* System Physical Address Range Base */
+    build_append_int_noprefix(table_data, base, 8);
+    /* System Physical Address Range Length */
+    build_append_int_noprefix(table_data, length, 8);
+}
+
+static int pc_dimm_device_list(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+        DeviceState *dev = DEVICE(obj);
+        if (dev->realized) { /* only realized memory devices matter */
+            *list = g_slist_append(*list, DEVICE(obj));
+        }
+    }
+
+    object_child_foreach(obj, pc_dimm_device_list, opaque);
+    return 0;
+}
+
+/* Build HMAT sub table structures */
+static void hmat_build_table_structs(GArray *table_data, MachineState *ms)
+{
+    GSList *device_list = NULL;
+    uint16_t flags;
+    uint64_t mem_base, mem_len;
+    int i;
+    NumaState *nstat = ms->numa_state;
+    NumaMemRange *mem_range;
+
+    Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL);
+    AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(obj);
+    AcpiDeviceIf *adev = ACPI_DEVICE_IF(obj);
+
+    /*
+     * ACPI 6.2: 5.2.27.3 Memory Subsystem Address Range Structure:
+     * Table 5-141. The Proximity Domain of System Physical Address
+     * ranges defined in the HMAT, NFIT and SRAT tables shall match
+     * each other.
+     */
+    if (nstat->num_nodes && !nstat->mem_ranges_num) {
+        nstat->mem_ranges = g_array_new(false, true /* clear */,
+                                        sizeof *mem_range);
+        adevc->build_mem_ranges(adev, ms);
another place you are tying to initialize nstat->mem_ranges
make initialization in generic numa init code

+    }
+
+    for (i = 0; i < nstat->mem_ranges_num; i++) {
+        mem_range = &g_array_index(nstat->mem_ranges, NumaMemRange, i);
+        flags = 0;
+
+        if (nstat->nodes[mem_range->node].is_initiator) {
+            flags |= HMAT_SPA_PROC_VALID;
+        }
+        if (nstat->nodes[mem_range->node].is_target) {
+            flags |= HMAT_SPA_MEM_VALID;
+        }
+
+        build_hmat_spa(table_data, flags, mem_range->base,
+                       mem_range->length,
+                       mem_range->node);
+    }
+
+    /* Build HMAT SPA structures for PC-DIMM devices. */
+    object_child_foreach(OBJECT(ms), pc_dimm_device_list, &device_list);
+
+    for (; device_list; device_list = device_list->next) {
+        PCDIMMDevice *dimm = device_list->data;
+        mem_base = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+                                            NULL);
+        mem_len = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP,
+                                           NULL);
+        i = object_property_get_uint(OBJECT(dimm), PC_DIMM_NODE_PROP, NULL);
+        flags = 0;
+
+        if (nstat->nodes[i].is_initiator) {
+            flags |= HMAT_SPA_PROC_VALID;
+        }
+        if (nstat->nodes[i].is_target) {
+            flags |= HMAT_SPA_MEM_VALID;
+        }
+        build_hmat_spa(table_data, flags, mem_base, mem_len, i);
+    }
Don't you need to free device_list at this point?


Thank you for your suggestion, I will correct it.
+}
+
+void build_hmat(GArray *table_data, BIOSLinker *linker, MachineState *ms)
+{
+    uint64_t hmat_start;
+
+    hmat_start = table_data->len;
+
+    /* reserve space for HMAT header  */
+    acpi_data_push(table_data, 40);
+
+    hmat_build_table_structs(table_data, ms);
+
+    build_header(linker, table_data,
+                 (void *)(table_data->data + hmat_start),
+                 "HMAT", table_data->len - hmat_start, 1, NULL, NULL);
+}
diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h
new file mode 100644
index 0000000000..e24b673fad
--- /dev/null
+++ b/hw/acpi/hmat.h
@@ -0,0 +1,43 @@
+/*
+ * HMAT ACPI Implementation Header
+ *
+ * Copyright(C) 2019 Intel Corporation.
+ *
+ * Author:
+ *  Liu jingqi <address@hidden>
+ *  Tao Xu <address@hidden>
+ *
+ * HMAT is defined in ACPI 6.2.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef HMAT_H
+#define HMAT_H
+
+#include "hw/acpi/acpi-defs.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/acpi/aml-build.h"
+
+/* the values of AcpiHmatSpaRange flag */
+enum {
+    HMAT_SPA_PROC_VALID       = 0x1,
+    HMAT_SPA_MEM_VALID        = 0x2,
+    HMAT_SPA_RESERVATION_HINT = 0x4,
+};
+
+void build_hmat(GArray *table_data, BIOSLinker *linker, MachineState *ms);
+
+#endif
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 14b29de0a9..2ad09ec23e 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -646,6 +646,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
                                 const CpuInstanceProperties *props, Error 
**errp)
  {
      MachineClass *mc = MACHINE_GET_CLASS(machine);
+    NodeInfo *numa_info = machine->numa_state->nodes;
      bool match = false;
      int i;
@@ -706,6 +707,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
          match = true;
          slot->props.node_id = props->node_id;
          slot->props.has_node_id = props->has_node_id;
+        numa_info[props->node_id].is_initiator = true;
      }
if (!match) {
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 44dd447fa5..6584eac76e 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -66,6 +66,7 @@
  #include "hw/i386/intel_iommu.h"
#include "hw/acpi/ipmi.h"
+#include "hw/acpi/hmat.h"
/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
   * -M pc-i440fx-2.0.  Even if the actual amount of AML generated grows
@@ -2710,6 +2711,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
*machine)
              acpi_add_table(table_offsets, tables_blob);
              build_slit(tables_blob, tables->linker, machine);
          }
+        acpi_add_table(table_offsets, tables_blob);
+        build_hmat(tables_blob, tables->linker, machine);
I'm not sure if we should add it unconditionally.
Is this table used in any meaningful manner by guest when
it's incomplete (i.e. populated only with SPA records)?

      }
      if (acpi_get_mcfg(&mcfg)) {
          acpi_add_table(table_offsets, tables_blob);
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index e3c85b77bc..13cff59112 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -10,6 +10,8 @@ struct NodeInfo {
      uint64_t node_mem;
      struct HostMemoryBackend *node_memdev;
      bool present;
+    bool is_initiator;
+    bool is_target;
      uint8_t distance[MAX_NODES];
  };
diff --git a/numa.c b/numa.c
index d23e130bce..5556d118c3 100644
--- a/numa.c
+++ b/numa.c
@@ -102,6 +102,10 @@ static void parse_numa_node(MachineState *ms, 
NumaNodeOptions *node,
          }
      }
+ if (node->cpus) {
+        numa_info[nodenr].is_initiator = true;
+    }
+
      if (node->has_mem && node->has_memdev) {
          error_setg(errp, "cannot specify both mem= and memdev=");
          return;
@@ -118,6 +122,7 @@ static void parse_numa_node(MachineState *ms, 
NumaNodeOptions *node,
if (node->has_mem) {
          numa_info[nodenr].node_mem = node->mem;
+        numa_info[nodenr].is_target = true;
      }
      if (node->has_memdev) {
          Object *o;
@@ -130,6 +135,7 @@ static void parse_numa_node(MachineState *ms, 
NumaNodeOptions *node,
          object_ref(o);
          numa_info[nodenr].node_mem = object_property_get_uint(o, "size", 
NULL);
          numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
+        numa_info[nodenr].is_target = true;
      }
      numa_info[nodenr].present = true;
      max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);





reply via email to

[Prev in Thread] Current Thread [Next in Thread]