qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH V8 29/39] vfio-pci: cpr part 3 (intx)


From: Steven Sistare
Subject: Re: [PATCH V8 29/39] vfio-pci: cpr part 3 (intx)
Date: Wed, 6 Jul 2022 13:46:17 -0400
User-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Thunderbird/91.11.0

On 6/29/2022 4:43 PM, Alex Williamson wrote:
> On Wed, 15 Jun 2022 07:52:16 -0700
> Steve Sistare <steven.sistare@oracle.com> wrote:
> 
>> Preserve vfio INTX state across cpr restart.  Preserve VFIOINTx fields as
>> follows:
>>   pin : Recover this from the vfio config in kernel space
>>   interrupt : Preserve its eventfd descriptor across exec.
>>   unmask : Ditto
>>   route.irq : This could perhaps be recovered in vfio_pci_post_load by
>>     calling pci_device_route_intx_to_irq(pin), whose implementation reads
>>     config space for a bridge device such as ich9.  However, there is no
>>     guarantee that the bridge vmstate is read before vfio vmstate.  Rather
>>     than fiddling with MigrationPriority for vmstate handlers, explicitly
>>     save route.irq in vfio vmstate.
>>   pending : save in vfio vmstate.
>>   mmap_timeout, mmap_timer : Re-initialize
>>   bool kvm_accel : Re-initialize
>>
>> In vfio_realize, defer calling vfio_intx_enable until the vmstate
>> is available, in vfio_pci_post_load.  Modify vfio_intx_enable and
>> vfio_intx_kvm_enable to skip vfio initialization, but still perform
>> kvm initialization.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>  hw/vfio/pci.c | 92 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++------
>>  1 file changed, 83 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 2fd7121..b8aee91 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -173,14 +173,45 @@ static void vfio_intx_eoi(VFIODevice *vbasedev)
>>      vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
>>  }
>>  
>> +#ifdef CONFIG_KVM
>> +static bool vfio_no_kvm_intx(VFIOPCIDevice *vdev)
>> +{
>> +    return vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
>> +           vdev->intx.route.mode != PCI_INTX_ENABLED ||
>> +           !kvm_resamplefds_enabled();
>> +}
>> +#endif
>> +
>> +static void vfio_intx_reenable_kvm(VFIOPCIDevice *vdev, Error **errp)
>> +{
>> +#ifdef CONFIG_KVM
>> +    if (vfio_no_kvm_intx(vdev)) {
>> +        return;
>> +    }
>> +
>> +    if (vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0)) {
>> +        error_setg(errp, "vfio_notifier_init intx-unmask failed");
>> +        return;
>> +    }
>> +
>> +    if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
>> +                                           &vdev->intx.interrupt,
>> +                                           &vdev->intx.unmask,
>> +                                           vdev->intx.route.irq)) {
>> +        error_setg_errno(errp, errno, "failed to setup resample irqfd");
> 
> 
> Does not unwind with vfio_notifier_cleanup().  This also exactly
> duplicates code in vfio_intx_enable_kvm(), which suggests it needs
> further refactoring to a common helper.

I will delete vfio_intx_reenable_kvm and add conditionals to 
vfio_intx_enable_kvm.
That looks better.

>> +        return;
>> +    }
>> +
>> +    vdev->intx.kvm_accel = true;
>> +#endif
>> +}
>> +
>>  static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
>>  {
>>  #ifdef CONFIG_KVM
>>      int irq_fd = event_notifier_get_fd(&vdev->intx.interrupt);
>>  
>> -    if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
>> -        vdev->intx.route.mode != PCI_INTX_ENABLED ||
>> -        !kvm_resamplefds_enabled()) {
>> +    if (vfio_no_kvm_intx(vdev)) {
>>          return;
>>      }
>>  
>> @@ -328,7 +359,13 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error 
>> **errp)
>>          return 0;
>>      }
>>  
>> -    vfio_disable_interrupts(vdev);
>> +    /*
>> +     * Do not alter interrupt state during vfio_realize and cpr-load.  The
>> +     * reused flag is cleared thereafter.
>> +     */
>> +    if (!vdev->vbasedev.reused) {
>> +        vfio_disable_interrupts(vdev);
>> +    }
>>  
>>      vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
>>      pci_config_set_interrupt_pin(vdev->pdev.config, pin);
>> @@ -353,6 +390,11 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error 
>> **errp)
>>      fd = event_notifier_get_fd(&vdev->intx.interrupt);
>>      qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
>>  
>> +    if (vdev->vbasedev.reused) {
>> +        vfio_intx_reenable_kvm(vdev, &err);
>> +        goto finish;
>> +    }
>> +
> 
> This only jumps over the vfio_set_irq_signaling() and
> vfio_intx_enable_kvm(), largely replacing the latter with chunks of
> code taken from it.  Doesn't seem like the right factoring.
Cleaned up in the next version.

>>      if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
>>                                 VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
>>          qemu_set_fd_handler(fd, NULL, NULL, vdev);
>> @@ -365,6 +407,7 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error 
>> **errp)
>>          warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
>>      }
>>  
>> +finish:
>>      vdev->interrupt = VFIO_INT_INTx;
>>  
>>      trace_vfio_intx_enable(vdev->vbasedev.name);
>> @@ -3195,9 +3238,13 @@ static void vfio_realize(PCIDevice *pdev, Error 
>> **errp)
>>                                               vfio_intx_routing_notifier);
>>          vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
>>          kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
>> -        ret = vfio_intx_enable(vdev, errp);
>> -        if (ret) {
>> -            goto out_deregister;
>> +
>> +        /* Wait until cpr-load reads intx routing data to enable */
>> +        if (!vdev->vbasedev.reused) {
>> +            ret = vfio_intx_enable(vdev, errp);
>> +            if (ret) {
>> +                goto out_deregister;
>> +            }
>>          }
>>      }
>>  
>> @@ -3474,6 +3521,7 @@ static int vfio_pci_post_load(void *opaque, int 
>> version_id)
>>      VFIOPCIDevice *vdev = opaque;
>>      PCIDevice *pdev = &vdev->pdev;
>>      int nr_vectors;
>> +    int ret = 0;
>>  
>>      if (msix_enabled(pdev)) {
>>          msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
>> @@ -3486,10 +3534,35 @@ static int vfio_pci_post_load(void *opaque, int 
>> version_id)
>>          vfio_claim_vectors(vdev, nr_vectors, false);
>>  
>>      } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
>> -        assert(0);      /* completed in a subsequent patch */
>> +        Error *err = 0;
>> +        ret = vfio_intx_enable(vdev, &err);
>> +        if (ret) {
>> +            error_report_err(err);
>> +        }
>>      }
>>  
>> -    return 0;
>> +    return ret;
>> +}
>> +
>> +static const VMStateDescription vfio_intx_vmstate = {
>> +    .name = "vfio-intx",
>> +    .unmigratable = 1,
> 
> 
> unmigratable-vmstates-to-interfere-with-migration+
A bug, will delete.

- Steve
 
>> +    .version_id = 0,
>> +    .minimum_version_id = 0,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_BOOL(pending, VFIOINTx),
>> +        VMSTATE_UINT32(route.mode, VFIOINTx),
>> +        VMSTATE_INT32(route.irq, VFIOINTx),
>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>> +#define VMSTATE_VFIO_INTX(_field, _state) {                         \
>> +    .name       = (stringify(_field)),                              \
>> +    .size       = sizeof(VFIOINTx),                                 \
>> +    .vmsd       = &vfio_intx_vmstate,                               \
>> +    .flags      = VMS_STRUCT,                                       \
>> +    .offset     = vmstate_offset_value(_state, _field, VFIOINTx),   \
>>  }
>>  
>>  static bool vfio_pci_needed(void *opaque)
>> @@ -3509,6 +3582,7 @@ static const VMStateDescription vfio_pci_vmstate = {
>>      .fields = (VMStateField[]) {
>>          VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
>>          VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
>> +        VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
>>          VMSTATE_END_OF_LIST()
>>      }
>>  };
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]