[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 7/7] hw/mem/cxl_type3: Add CXL RAS Error Injection Support.
From: |
Mike Maslenkin |
Subject: |
Re: [PATCH 7/7] hw/mem/cxl_type3: Add CXL RAS Error Injection Support. |
Date: |
Sun, 15 Jan 2023 23:06:03 +0300 |
On Fri, Jan 13, 2023 at 7:43 PM Jonathan Cameron via
<qemu-devel@nongnu.org> wrote:
>
> CXL uses PCI AER Internal errors to signal to the host that an error has
> occurred. The host can then read more detailed status from the CXL RAS
> capability.
>
> For uncorrectable errors: support multiple injection in one operation
> as this is needed to reliably test multiple header logging support in an
> OS. The equivalent feature doesn't exist for correctable errors, so only
> one error need be injected at a time.
>
> Note:
> - Header content needs to be manually specified in a fashion that
> matches the specification for what can be in the header for each
> error type.
>
> Injection via QMP:
> { "execute": "qmp_capabilities" }
> ...
> { "execute": "cxl-inject-uncorrectable-errors",
> "arguments": {
> "path": "/machine/peripheral/cxl-pmem0",
> "errors": [
> {
> "type": "cache-address-parity",
> "header": [ 3, 4]
> },
> {
> "type": "cache-data-parity",
> "header":
> [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
> },
> {
> "type": "internal",
> "header": [ 1, 2, 4]
> }
> ]
> }}
> ...
> { "execute": "cxl-inject-correctable-error",
> "arguments": {
> "path": "/machine/peripheral/cxl-pmem0",
> "type": "physical",
> "header": [ 3, 4]
> } }
>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
> hw/cxl/cxl-component-utils.c | 4 +-
> hw/mem/cxl_type3.c | 290 +++++++++++++++++++++++++++++++++
> hw/mem/cxl_type3_stubs.c | 10 ++
> hw/mem/meson.build | 2 +
> include/hw/cxl/cxl_component.h | 26 +++
> include/hw/cxl/cxl_device.h | 11 ++
> qapi/cxl.json | 113 +++++++++++++
> qapi/meson.build | 1 +
> qapi/qapi-schema.json | 1 +
> 9 files changed, 457 insertions(+), 1 deletion(-)
>
> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index 3edd303a33..02fb6c17b9 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c
> @@ -142,16 +142,18 @@ static void ras_init_common(uint32_t *reg_state,
> uint32_t *write_msk)
> * be handled as RO.
> */
> reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0;
> + write_msk[R_CXL_RAS_UNC_ERR_STATUS] = 0x1cfff;
> /* Bits 12-13 and 17-31 reserved in CXL 2.0 */
> reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
> write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
> reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
> write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
> reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0;
> + write_msk[R_CXL_RAS_COR_ERR_STATUS] = 0x7f;
> reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
> write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
> /* CXL switches and devices must set */
> - reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00;
> + reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x200;
> }
>
> static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 6cdd988d1d..ae8fd09e87 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1,6 +1,7 @@
> #include "qemu/osdep.h"
> #include "qemu/units.h"
> #include "qemu/error-report.h"
> +#include "qapi/qapi-commands-cxl.h"
> #include "hw/mem/memory-device.h"
> #include "hw/mem/pc-dimm.h"
> #include "hw/pci/pci.h"
> @@ -323,6 +324,66 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int
> which)
> ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
> }
>
> +static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
> +{
> + switch (qmp_err) {
> + case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
> + return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
> + case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
> + return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
> + case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
> + return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
> + case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
> + return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
> + case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
> + return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
> + case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
> + return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
> + case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
> + return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
> + case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
> + return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
> + case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
> + return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
> + case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
> + return CXL_RAS_UNC_ERR_RSVD_ENCODING;
> + case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
> + return CXL_RAS_UNC_ERR_POISON_RECEIVED;
> + case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
> + return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
> + case CXL_UNCOR_ERROR_TYPE_INTERNAL:
> + return CXL_RAS_UNC_ERR_INTERNAL;
> + case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
> + return CXL_RAS_UNC_ERR_CXL_IDE_TX;
> + case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
> + return CXL_RAS_UNC_ERR_CXL_IDE_RX;
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +static int ct3d_qmp_cor_err_to_cxl(CxlUncorErrorType qmp_err)
CxlCorErrorType type is required.
Compiler warns here:
../hw/mem/cxl_type3.c:1263:44: error: implicit conversion from
enumeration type 'CxlCorErrorType' (aka 'enum CxlCorErrorType') to
different enumeration type 'CxlUncorErrorType' (aka 'enum
CxlUncorErrorType') [-Werror,-Wenum-conversion]
cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
~~~~~~~~~~~~~~~~~~~~~~~ ^~~~
1 error generated.
> +{
> + switch (qmp_err) {
> + case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
> + return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
> + case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
> + return CXL_RAS_COR_ERR_MEM_DATA_ECC;
> + case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
> + return CXL_RAS_COR_ERR_CRC_THRESHOLD;
> + case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
> + return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
> + case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
> + return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
> + case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
> + return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
> + case CXL_COR_ERROR_TYPE_PHYSICAL:
> + return CXL_RAS_COR_ERR_PHYSICAL;
> + default:
> + return -EINVAL;
> + }
> +}
> +
> static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
> unsigned size)
> {
> @@ -341,6 +402,84 @@ static void ct3d_reg_write(void *opaque, hwaddr offset,
> uint64_t value,
> should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
> which_hdm = 0;
> break;
> + case A_CXL_RAS_UNC_ERR_STATUS:
> + {
> + uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
> + uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL,
> FIRST_ERROR_POINTER);
> + CXLError *cxl_err;
> + uint32_t unc_err;
> +
> + /*
> + * If single bit written that corresponds to the first error
> + * pointer being cleared, update the status and header log.
> + */
> + if (!QTAILQ_EMPTY(&ct3d->error_list)) {
> + CXLError *cxl_err = QTAILQ_FIRST(&ct3d->error_list);
Is it ok that "CXLError *cxl_err" definition clobbers previous one above?
- [PATCH 0/7] hw/cxl: RAS error emulation and injection, Jonathan Cameron, 2023/01/13
- [PATCH 1/7] hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register, Jonathan Cameron, 2023/01/13
- [PATCH 2/7] hw/pci/aer: Add missing routing for AER errors, Jonathan Cameron, 2023/01/13
- [PATCH 3/7] hw/pci-bridge/cxl_root_port: Wire up AER, Jonathan Cameron, 2023/01/13
- [PATCH 4/7] hw/pci-bridge/cxl_root_port: Wire up MSI, Jonathan Cameron, 2023/01/13
- [PATCH 5/7] hw/mem/cxl-type3: Add AER extended capability, Jonathan Cameron, 2023/01/13
- [PATCH 6/7] hw/pci/aer: Make PCIE AER error injection facility available for other emulation to use., Jonathan Cameron, 2023/01/13
- [PATCH 7/7] hw/mem/cxl_type3: Add CXL RAS Error Injection Support., Jonathan Cameron, 2023/01/13
- Re: [PATCH 7/7] hw/mem/cxl_type3: Add CXL RAS Error Injection Support.,
Mike Maslenkin <=