[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH V8 29/39] vfio-pci: cpr part 3 (intx)
From: |
Steven Sistare |
Subject: |
Re: [PATCH V8 29/39] vfio-pci: cpr part 3 (intx) |
Date: |
Wed, 6 Jul 2022 13:46:17 -0400 |
User-agent: |
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Thunderbird/91.11.0 |
On 6/29/2022 4:43 PM, Alex Williamson wrote:
> On Wed, 15 Jun 2022 07:52:16 -0700
> Steve Sistare <steven.sistare@oracle.com> wrote:
>
>> Preserve vfio INTX state across cpr restart. Preserve VFIOINTx fields as
>> follows:
>> pin : Recover this from the vfio config in kernel space
>> interrupt : Preserve its eventfd descriptor across exec.
>> unmask : Ditto
>> route.irq : This could perhaps be recovered in vfio_pci_post_load by
>> calling pci_device_route_intx_to_irq(pin), whose implementation reads
>> config space for a bridge device such as ich9. However, there is no
>> guarantee that the bridge vmstate is read before vfio vmstate. Rather
>> than fiddling with MigrationPriority for vmstate handlers, explicitly
>> save route.irq in vfio vmstate.
>> pending : save in vfio vmstate.
>> mmap_timeout, mmap_timer : Re-initialize
>> bool kvm_accel : Re-initialize
>>
>> In vfio_realize, defer calling vfio_intx_enable until the vmstate
>> is available, in vfio_pci_post_load. Modify vfio_intx_enable and
>> vfio_intx_kvm_enable to skip vfio initialization, but still perform
>> kvm initialization.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>> hw/vfio/pci.c | 92
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++------
>> 1 file changed, 83 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 2fd7121..b8aee91 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -173,14 +173,45 @@ static void vfio_intx_eoi(VFIODevice *vbasedev)
>> vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
>> }
>>
>> +#ifdef CONFIG_KVM
>> +static bool vfio_no_kvm_intx(VFIOPCIDevice *vdev)
>> +{
>> + return vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
>> + vdev->intx.route.mode != PCI_INTX_ENABLED ||
>> + !kvm_resamplefds_enabled();
>> +}
>> +#endif
>> +
>> +static void vfio_intx_reenable_kvm(VFIOPCIDevice *vdev, Error **errp)
>> +{
>> +#ifdef CONFIG_KVM
>> + if (vfio_no_kvm_intx(vdev)) {
>> + return;
>> + }
>> +
>> + if (vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0)) {
>> + error_setg(errp, "vfio_notifier_init intx-unmask failed");
>> + return;
>> + }
>> +
>> + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
>> + &vdev->intx.interrupt,
>> + &vdev->intx.unmask,
>> + vdev->intx.route.irq)) {
>> + error_setg_errno(errp, errno, "failed to setup resample irqfd");
>
>
> Does not unwind with vfio_notifier_cleanup(). This also exactly
> duplicates code in vfio_intx_enable_kvm(), which suggests it needs
> further refactoring to a common helper.
I will delete vfio_intx_reenable_kvm and add conditionals to
vfio_intx_enable_kvm.
That looks better.
>> + return;
>> + }
>> +
>> + vdev->intx.kvm_accel = true;
>> +#endif
>> +}
>> +
>> static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
>> {
>> #ifdef CONFIG_KVM
>> int irq_fd = event_notifier_get_fd(&vdev->intx.interrupt);
>>
>> - if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
>> - vdev->intx.route.mode != PCI_INTX_ENABLED ||
>> - !kvm_resamplefds_enabled()) {
>> + if (vfio_no_kvm_intx(vdev)) {
>> return;
>> }
>>
>> @@ -328,7 +359,13 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error
>> **errp)
>> return 0;
>> }
>>
>> - vfio_disable_interrupts(vdev);
>> + /*
>> + * Do not alter interrupt state during vfio_realize and cpr-load. The
>> + * reused flag is cleared thereafter.
>> + */
>> + if (!vdev->vbasedev.reused) {
>> + vfio_disable_interrupts(vdev);
>> + }
>>
>> vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
>> pci_config_set_interrupt_pin(vdev->pdev.config, pin);
>> @@ -353,6 +390,11 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error
>> **errp)
>> fd = event_notifier_get_fd(&vdev->intx.interrupt);
>> qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
>>
>> + if (vdev->vbasedev.reused) {
>> + vfio_intx_reenable_kvm(vdev, &err);
>> + goto finish;
>> + }
>> +
>
> This only jumps over the vfio_set_irq_signaling() and
> vfio_intx_enable_kvm(), largely replacing the latter with chunks of
> code taken from it. Doesn't seem like the right factoring.
Cleaned up in the next version.
>> if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
>> VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
>> qemu_set_fd_handler(fd, NULL, NULL, vdev);
>> @@ -365,6 +407,7 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error
>> **errp)
>> warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
>> }
>>
>> +finish:
>> vdev->interrupt = VFIO_INT_INTx;
>>
>> trace_vfio_intx_enable(vdev->vbasedev.name);
>> @@ -3195,9 +3238,13 @@ static void vfio_realize(PCIDevice *pdev, Error
>> **errp)
>> vfio_intx_routing_notifier);
>> vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
>> kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
>> - ret = vfio_intx_enable(vdev, errp);
>> - if (ret) {
>> - goto out_deregister;
>> +
>> + /* Wait until cpr-load reads intx routing data to enable */
>> + if (!vdev->vbasedev.reused) {
>> + ret = vfio_intx_enable(vdev, errp);
>> + if (ret) {
>> + goto out_deregister;
>> + }
>> }
>> }
>>
>> @@ -3474,6 +3521,7 @@ static int vfio_pci_post_load(void *opaque, int
>> version_id)
>> VFIOPCIDevice *vdev = opaque;
>> PCIDevice *pdev = &vdev->pdev;
>> int nr_vectors;
>> + int ret = 0;
>>
>> if (msix_enabled(pdev)) {
>> msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
>> @@ -3486,10 +3534,35 @@ static int vfio_pci_post_load(void *opaque, int
>> version_id)
>> vfio_claim_vectors(vdev, nr_vectors, false);
>>
>> } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
>> - assert(0); /* completed in a subsequent patch */
>> + Error *err = 0;
>> + ret = vfio_intx_enable(vdev, &err);
>> + if (ret) {
>> + error_report_err(err);
>> + }
>> }
>>
>> - return 0;
>> + return ret;
>> +}
>> +
>> +static const VMStateDescription vfio_intx_vmstate = {
>> + .name = "vfio-intx",
>> + .unmigratable = 1,
>
>
> unmigratable-vmstates-to-interfere-with-migration+
A bug, will delete.
- Steve
>> + .version_id = 0,
>> + .minimum_version_id = 0,
>> + .fields = (VMStateField[]) {
>> + VMSTATE_BOOL(pending, VFIOINTx),
>> + VMSTATE_UINT32(route.mode, VFIOINTx),
>> + VMSTATE_INT32(route.irq, VFIOINTx),
>> + VMSTATE_END_OF_LIST()
>> + }
>> +};
>> +
>> +#define VMSTATE_VFIO_INTX(_field, _state) { \
>> + .name = (stringify(_field)), \
>> + .size = sizeof(VFIOINTx), \
>> + .vmsd = &vfio_intx_vmstate, \
>> + .flags = VMS_STRUCT, \
>> + .offset = vmstate_offset_value(_state, _field, VFIOINTx), \
>> }
>>
>> static bool vfio_pci_needed(void *opaque)
>> @@ -3509,6 +3582,7 @@ static const VMStateDescription vfio_pci_vmstate = {
>> .fields = (VMStateField[]) {
>> VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
>> VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
>> + VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
>> VMSTATE_END_OF_LIST()
>> }
>> };
>
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [PATCH V8 29/39] vfio-pci: cpr part 3 (intx),
Steven Sistare <=