[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v5 4/4] spapr: Add Hcalls to support PAPR NVDIMM device
From: |
David Gibson |
Subject: |
Re: [PATCH v5 4/4] spapr: Add Hcalls to support PAPR NVDIMM device |
Date: |
Tue, 4 Feb 2020 15:09:15 +1100 |
On Thu, Jan 30, 2020 at 05:48:28AM -0600, Shivaprasad G Bhat wrote:
> This patch implements few of the necessary hcalls for the nvdimm support.
>
> PAPR semantics is such that each NVDIMM device is comprising of multiple
> SCM(Storage Class Memory) blocks. The guest requests the hypervisor to
> bind each of the SCM blocks of the NVDIMM device using hcalls. There can
> be SCM block unbind requests in case of driver errors or unplug(not
> supported now) use cases. The NVDIMM label read/writes are done through
> hcalls.
>
> Since each virtual NVDIMM device is divided into multiple SCM blocks,
> the bind, unbind, and queries using hcalls on those blocks can come
> independently. This doesn't fit well into the qemu device semantics,
> where the map/unmap are done at the (whole)device/object level granularity.
> The patch doesnt actually bind/unbind on hcalls but let it happen at the
> device_add/del phase itself instead.
>
> The guest kernel makes bind/unbind requests for the virtual NVDIMM device
> at the region level granularity. Without interleaving, each virtual NVDIMM
> device is presented as a separate guest physical address range. So, there
> is no way a partial bind/unbind request can come for the vNVDIMM in a
> hcall for a subset of SCM blocks of a virtual NVDIMM. Hence it is safe to
> do bind/unbind everything during the device_add/del.
>
> Signed-off-by: Shivaprasad G Bhat <address@hidden>
LGTM, apart from some minor nits noted below.
> ---
> hw/ppc/Makefile.objs | 2
> hw/ppc/spapr_nvdimm.c | 327
> ++++++++++++++++++++++++++++++++++++++++++++++++
> include/hw/ppc/spapr.h | 8 +
> 3 files changed, 335 insertions(+), 2 deletions(-)
> create mode 100644 hw/ppc/spapr_nvdimm.c
>
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index a4bac57be6..c3d3cc56eb 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -7,7 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o
> spapr_events.o
> obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
> obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o
> obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o spapr_irq.o
> -obj-$(CONFIG_PSERIES) += spapr_tpm_proxy.o
> +obj-$(CONFIG_PSERIES) += spapr_tpm_proxy.o spapr_nvdimm.o
> obj-$(CONFIG_SPAPR_RNG) += spapr_rng.o
> obj-$(call land,$(CONFIG_PSERIES),$(CONFIG_LINUX)) += spapr_pci_vfio.o
> spapr_pci_nvlink2.o
> # IBM PowerNV
> diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
> new file mode 100644
> index 0000000000..8d1c2dc009
> --- /dev/null
> +++ b/hw/ppc/spapr_nvdimm.c
It'd be nice to introduce this file in the previous patch and try to
keep as much of the NVDIMM code together, rather than bloating spapr.c
even further.
> @@ -0,0 +1,327 @@
> +/*
> + * QEMU PAPR Storage Class Memory Interfaces
> + *
> + * Copyright (c) 2019-2020, IBM Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> deal
> + * in the Software without restriction, including without limitation the
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "hw/ppc/spapr.h"
> +#include "hw/ppc/spapr_drc.h"
> +#include "hw/mem/nvdimm.h"
> +#include "qemu/range.h"
> +#include "qemu/nvdimm-utils.h"
> +
> +static target_ulong h_scm_read_metadata(PowerPCCPU *cpu,
> + SpaprMachineState *spapr,
> + target_ulong opcode,
> + target_ulong *args)
> +{
> + uint32_t drc_index = args[0];
> + uint64_t offset = args[1];
> + uint64_t numBytesToRead = args[2];
That's a really long name for a local. How about just 'size' or 'len'?
> + SpaprDrc *drc = spapr_drc_by_index(drc_index);
> + NVDIMMDevice *nvdimm;
> + NVDIMMClass *ddc;
> + uint64_t data = 0;
> + uint8_t buf[8] = { 0 };
> +
> + if (!drc || !drc->dev ||
> + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
> + return H_PARAMETER;
> + }
> +
> + if (numBytesToRead != 1 && numBytesToRead != 2 &&
> + numBytesToRead != 4 && numBytesToRead != 8) {
> + return H_P3;
> + }
> +
> + nvdimm = NVDIMM(drc->dev);
> + if ((offset + numBytesToRead < offset) ||
> + (nvdimm->label_size < numBytesToRead + offset)) {
> + return H_P2;
> + }
> +
> + ddc = NVDIMM_GET_CLASS(nvdimm);
> + ddc->read_label_data(nvdimm, buf, numBytesToRead, offset);
> +
> + switch (numBytesToRead) {
> + case 1:
> + data = ldub_p(buf);
> + break;
> + case 2:
> + data = lduw_be_p(buf);
> + break;
> + case 4:
> + data = ldl_be_p(buf);
> + break;
> + case 8:
> + data = ldq_be_p(buf);
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + args[0] = data;
> +
> + return H_SUCCESS;
> +}
> +
> +static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
> + SpaprMachineState *spapr,
> + target_ulong opcode,
> + target_ulong *args)
> +{
> + uint32_t drc_index = args[0];
> + uint64_t offset = args[1];
> + uint64_t data = args[2];
> + uint64_t numBytesToWrite = args[3];
> + SpaprDrc *drc = spapr_drc_by_index(drc_index);
> + NVDIMMDevice *nvdimm;
> + NVDIMMClass *ddc;
> + uint8_t buf[8] = { 0 };
> +
> + if (!drc || !drc->dev ||
> + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
> + return H_PARAMETER;
> + }
> +
> + if (numBytesToWrite != 1 && numBytesToWrite != 2 &&
> + numBytesToWrite != 4 && numBytesToWrite != 8) {
> + return H_P4;
> + }
> +
> + nvdimm = NVDIMM(drc->dev);
> + if ((offset + numBytesToWrite < offset) ||
> + (nvdimm->label_size < numBytesToWrite + offset)) {
> + return H_P2;
> + }
> +
> + switch (numBytesToWrite) {
> + case 1:
> + if (data & 0xffffffffffffff00) {
> + return H_P2;
> + }
> + stb_p(buf, data);
> + break;
> + case 2:
> + if (data & 0xffffffffffff0000) {
> + return H_P2;
> + }
> + stw_be_p(buf, data);
> + break;
> + case 4:
> + if (data & 0xffffffff00000000) {
> + return H_P2;
> + }
> + stl_be_p(buf, data);
> + break;
> + case 8:
> + stq_be_p(buf, data);
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + ddc = NVDIMM_GET_CLASS(nvdimm);
> + ddc->write_label_data(nvdimm, buf, numBytesToWrite, offset);
> +
> + return H_SUCCESS;
> +}
> +
> +static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
> + target_ulong opcode, target_ulong *args)
> +{
> + uint32_t drc_index = args[0];
> + uint64_t starting_idx = args[1];
> + uint64_t no_of_scm_blocks_to_bind = args[2];
> + uint64_t target_logical_mem_addr = args[3];
> + uint64_t continue_token = args[4];
> + uint64_t size;
> + uint64_t total_no_of_scm_blocks;
> + SpaprDrc *drc = spapr_drc_by_index(drc_index);
> + hwaddr addr;
> + NVDIMMDevice *nvdimm;
> +
> + if (!drc || !drc->dev ||
> + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
> + return H_PARAMETER;
> + }
> +
> + /*
> + * Currently continue token should be zero qemu has already bound
> + * everything and this hcall doesnt return H_BUSY.
> + */
> + if (continue_token > 0) {
> + return H_P5;
> + }
> +
> + /* Currently qemu assigns the address. */
> + if (target_logical_mem_addr != 0xffffffffffffffff) {
> + return H_OVERLAP;
> + }
> +
> + nvdimm = NVDIMM(drc->dev);
> +
> + size = object_property_get_uint(OBJECT(nvdimm),
> + PC_DIMM_SIZE_PROP, &error_abort);
> +
> + total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
> +
> + if (starting_idx > total_no_of_scm_blocks) {
> + return H_P2;
> + }
> +
> + if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) ||
> + ((starting_idx + no_of_scm_blocks_to_bind) >
> total_no_of_scm_blocks)) {
> + return H_P3;
> + }
> +
> + addr = object_property_get_uint(OBJECT(nvdimm),
> + PC_DIMM_ADDR_PROP, &error_abort);
> +
> + addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
> +
> + /* Already bound, Return target logical address in R5 */
> + args[1] = addr;
> + args[2] = no_of_scm_blocks_to_bind;
> +
> + return H_SUCCESS;
> +}
> +
> +static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState
> *spapr,
> + target_ulong opcode, target_ulong *args)
> +{
> + uint32_t drc_index = args[0];
> + uint64_t starting_scm_logical_addr = args[1];
> + uint64_t no_of_scm_blocks_to_unbind = args[2];
> + uint64_t continue_token = args[3];
> + uint64_t size_to_unbind;
> + Range blockrange = range_empty;
> + Range nvdimmrange = range_empty;
> + SpaprDrc *drc = spapr_drc_by_index(drc_index);
> + NVDIMMDevice *nvdimm;
> + uint64_t size, addr;
> +
> + if (!drc || !drc->dev ||
> + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
> + return H_PARAMETER;
> + }
> +
> + /* continue_token should be zero as this hcall doesn't return H_BUSY. */
> + if (continue_token > 0) {
> + return H_P4;
> + }
> +
> + /* Check if starting_scm_logical_addr is block aligned */
> + if (!QEMU_IS_ALIGNED(starting_scm_logical_addr,
> + SPAPR_MINIMUM_SCM_BLOCK_SIZE)) {
> + return H_P2;
> + }
> +
> + size_to_unbind = no_of_scm_blocks_to_unbind *
> SPAPR_MINIMUM_SCM_BLOCK_SIZE;
> + if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind !=
> + size_to_unbind /
> SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
> + return H_P3;
> + }
> +
> + nvdimm = NVDIMM(drc->dev);
> + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
> + &error_abort);
> + addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP,
> + &error_abort);
> +
> + range_init_nofail(&nvdimmrange, addr, size);
> + range_init_nofail(&blockrange, starting_scm_logical_addr,
> size_to_unbind);
> +
> + if (!range_contains_range(&nvdimmrange, &blockrange)) {
> + return H_P3;
> + }
> +
> + args[1] = no_of_scm_blocks_to_unbind;
> +
> + /* let unplug take care of actual unbind */
> + return H_SUCCESS;
> +}
> +
> +#define H_UNBIND_SCOPE_ALL 0x1
> +#define H_UNBIND_SCOPE_DRC 0x2
> +
> +static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState
> *spapr,
> + target_ulong opcode, target_ulong *args)
> +{
> + uint64_t target_scope = args[0];
> + uint32_t drc_index = args[1];
> + uint64_t continue_token = args[2];
> + NVDIMMDevice *nvdimm;
> + uint64_t size;
> + uint64_t no_of_scm_blocks_unbound = 0;
> +
> + /* continue_token should be zero as this hcall doesn't return H_BUSY. */
> + if (continue_token > 0) {
> + return H_P4;
> + }
> +
> + if (target_scope == H_UNBIND_SCOPE_DRC) {
> + SpaprDrc *drc = spapr_drc_by_index(drc_index);
> +
> + if (!drc || !drc->dev ||
> + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
> + return H_P2;
> + }
> +
> + nvdimm = NVDIMM(drc->dev);
> + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
> + &error_abort);
> +
> + no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
> + } else if (target_scope == H_UNBIND_SCOPE_ALL) {
> + GSList *list, *nvdimms;
> +
> + nvdimms = nvdimm_get_device_list();
> + for (list = nvdimms; list; list = list->next) {
> + nvdimm = list->data;
> + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
> + &error_abort);
> +
> + no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
> + }
> + g_slist_free(nvdimms);
> + } else {
> + return H_PARAMETER;
> + }
> +
> + args[1] = no_of_scm_blocks_unbound;
> +
> + /* let unplug take care of actual unbind */
> + return H_SUCCESS;
> +}
> +
> +static void spapr_scm_register_types(void)
> +{
> + /* qemu/scm specific hcalls */
> + spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata);
> + spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata);
> + spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem);
> + spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem);
> + spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all);
> +}
> +
> +type_init(spapr_scm_register_types)
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index ed2de4bae5..633ff5202b 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -287,6 +287,7 @@ struct SpaprMachineState {
> #define H_P7 -60
> #define H_P8 -61
> #define H_P9 -62
> +#define H_OVERLAP -68
> #define H_UNSUPPORTED_FLAG -256
> #define H_MULTI_THREADS_ACTIVE -9005
>
> @@ -494,8 +495,13 @@ struct SpaprMachineState {
> #define H_INT_ESB 0x3C8
> #define H_INT_SYNC 0x3CC
> #define H_INT_RESET 0x3D0
> +#define H_SCM_READ_METADATA 0x3E4
> +#define H_SCM_WRITE_METADATA 0x3E8
> +#define H_SCM_BIND_MEM 0x3EC
> +#define H_SCM_UNBIND_MEM 0x3F0
> +#define H_SCM_UNBIND_ALL 0x3FC
>
> -#define MAX_HCALL_OPCODE H_INT_RESET
> +#define MAX_HCALL_OPCODE H_SCM_UNBIND_ALL
>
> /* The hcalls above are standardized in PAPR and implemented by pHyp
> * as well.
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [PATCH v5 4/4] spapr: Add Hcalls to support PAPR NVDIMM device,
David Gibson <=