On 1 Sep 2023, at 8:00 PM, Markus Armbruster <armbru@redhat.com> wrote:
Li Feng <fengli@smartx.com> writes:If the backend crashes and restarts, the device is broken. This patch adds reconnect for vhost-user-scsi.
Tested with spdk backend.
Signed-off-by: Li Feng <fengli@smartx.com> --- hw/scsi/vhost-user-scsi.c | 199 +++++++++++++++++++++++++--- include/hw/virtio/vhost-user-scsi.h | 4 + 2 files changed, 184 insertions(+), 19 deletions(-)
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index ee99b19e7a..5bf012461b 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -43,26 +43,54 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, };
+static int vhost_user_scsi_start(VHostUserSCSI *s) +{ + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + int ret; + + ret = vhost_scsi_common_start(vsc); + s->started_vu = (ret < 0 ? false : true); + + return ret; +} + +static void vhost_user_scsi_stop(VHostUserSCSI *s) +{ + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + + if (!s->started_vu) { + return; + } + s->started_vu = false; + + vhost_scsi_common_stop(vsc); +} + static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCSI *s = (VHostUserSCSI *)vdev; + DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj; VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); - bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + bool should_start = virtio_device_should_start(vdev, status); + int ret;
- if (vhost_dev_is_started(&vsc->dev) == start) { + if (!s->connected) { return; }
- if (start) { - int ret; + if (vhost_dev_is_started(&vsc->dev) == should_start) { + return; + }
- ret = vhost_scsi_common_start(vsc); + if (should_start) { + ret = vhost_user_scsi_start(s); if (ret < 0) { error_report("unable to start vhost-user-scsi: %s", strerror(-ret)); - exit(1); + qemu_chr_fe_disconnect(&vs->conf.chardev); } } else { - vhost_scsi_common_stop(vsc); + vhost_user_scsi_stop(s); } }
@@ -89,14 +117,126 @@ static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) { }
+static int vhost_user_scsi_connect(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + int ret = 0; + + if (s->connected) { + return 0; + } + s->connected = true; + + vsc->dev.num_queues = vs->conf.num_queues; + vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; + vsc->dev.vqs = s->vhost_vqs; + vsc->dev.vq_index = 0; + vsc->dev.backend_features = 0; + + ret = vhost_dev_init(&vsc->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0, + errp); + if (ret < 0) { + return ret; + } + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + ret = vhost_user_scsi_start(s); + if (ret < 0) { + return ret; + }
Fails without setting an error, violating the function's (tacit)postcondition. Callers:* vhost_user_scsi_event() Dereferences the null error and crashes.* vhost_user_scsi_realize_connect() Also fails without setting an error. Caller: - vhost_user_scsi_realize() 1. Doesn't emit the "Reconnecting after error: " message. See also below. 2. Succeeds instead of failing!
Ack. + } + + return 0; +} + +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event); + +static void vhost_user_scsi_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + + if (!s->connected) { + return; + } + s->connected = false; + + vhost_user_scsi_stop(s); + + vhost_dev_cleanup(&vsc->dev); + + /* Re-instate the event handler for new connections */ + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, + vhost_user_scsi_event, NULL, dev, NULL, true); +} + +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + Error *local_err = NULL; + + switch (event) { + case CHR_EVENT_OPENED: + if (vhost_user_scsi_connect(dev, &local_err) < 0) { + error_report_err(local_err); + qemu_chr_fe_disconnect(&vs->conf.chardev); + return; + } + break; + case CHR_EVENT_CLOSED: + /* defer close until later to avoid circular close */ + vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev, + vhost_user_scsi_disconnect); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static int vhost_user_scsi_realize_connect(VHostUserSCSI *s, Error **errp) +{ + DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + int ret; + + s->connected = false; + + ret = qemu_chr_fe_wait_connected(&vs->conf.chardev, errp); + if (ret < 0) { + return ret; + } + + ret = vhost_user_scsi_connect(dev, errp); + if (ret < 0) { + qemu_chr_fe_disconnect(&vs->conf.chardev); + return ret; + } + assert(s->connected); + + return 0; +} + static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); VHostUserSCSI *s = VHOST_USER_SCSI(dev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); - struct vhost_virtqueue *vqs = NULL; Error *err = NULL; int ret; + int retries = VU_REALIZE_CONN_RETRIES;
if (!vs->conf.chardev.chr) { error_setg(errp, "vhost-user-scsi: missing chardev"); @@ -115,18 +255,28 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) goto free_virtio; }
- vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; - vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs); - vsc->dev.vq_index = 0; - vsc->dev.backend_features = 0; - vqs = vsc->dev.vqs; + vsc->inflight = g_new0(struct vhost_inflight, 1); + s->vhost_vqs = g_new0(struct vhost_virtqueue, + VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues); + + assert(!*errp);
Dereferencing *errp is wrong. Quoting qapi/error.h's big comment:* = Why, when and how to use ERRP_GUARD() =** Without ERRP_GUARD(), use of the @errp parameter is restricted:* - It must not be dereferenced, because it may be null.* - It should not be passed to error_prepend() or* error_append_hint(), because that doesn't work with &error_fatal.* ERRP_GUARD() lifts these restrictions.See there for how to use ERRP_GUARD().
Thanks, good catch. I have understood this knowledge. + do { + if (*errp) { + error_prepend(errp, "Reconnecting after error: "); + error_report_err(*errp); + *errp = NULL; + } + ret = vhost_user_scsi_realize_connect(s, errp); + } while (ret < 0 && retries--);
Reports "Reconnecting ..." to the HMP monitor when in HMP context, elseto stderr. I.e. reports to stderr when we're in QMP context. Is thisreally what we want?
The vhost-user-blk has the same code, so just keep it the same here is a good idea? - ret = vhost_dev_init(&vsc->dev, &s->vhost_user, - VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { goto free_vhost; }
+ /* we're fully initialized, now we can operate, so add the handler */ + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, + vhost_user_scsi_event, NULL, (void *)dev, + NULL, true); /* Channel and lun both are 0 for bootable vhost-user-scsi disk */ vsc->channel = 0; vsc->lun = 0; @@ -135,8 +285,12 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) return;
free_vhost: + g_free(s->vhost_vqs); + s->vhost_vqs = NULL; + g_free(vsc->inflight); + vsc->inflight = NULL; vhost_user_cleanup(&s->vhost_user); - g_free(vqs); + free_virtio: virtio_scsi_common_unrealize(dev); } @@ -146,16 +300,23 @@ static void vhost_user_scsi_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserSCSI *s = VHOST_USER_SCSI(dev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); - struct vhost_virtqueue *vqs = vsc->dev.vqs; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
/* This will stop the vhost backend. */ vhost_user_scsi_set_status(vdev, 0); + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, NULL, NULL, NULL, + NULL, false);
vhost_dev_cleanup(&vsc->dev); - g_free(vqs); + g_free(s->vhost_vqs); + s->vhost_vqs = NULL; + + vhost_dev_free_inflight(vsc->inflight); + g_free(vsc->inflight); + vsc->inflight = NULL;
- virtio_scsi_common_unrealize(dev); vhost_user_cleanup(&s->vhost_user); + virtio_scsi_common_unrealize(dev); }
static Property vhost_user_scsi_properties[] = { diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h index 521b08e559..b405ec952a 100644 --- a/include/hw/virtio/vhost-user-scsi.h +++ b/include/hw/virtio/vhost-user-scsi.h @@ -29,6 +29,10 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, VHOST_USER_SCSI) struct VHostUserSCSI { VHostSCSICommon parent_obj; VhostUserState vhost_user; + bool connected; + bool started_vu; + + struct vhost_virtqueue *vhost_vqs; };
#endif /* VHOST_USER_SCSI_H */
|