[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH V3] vhost: logs sharing
From: |
Jason Wang |
Subject: |
Re: [Qemu-devel] [PATCH V3] vhost: logs sharing |
Date: |
Mon, 01 Jun 2015 15:53:46 +0800 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.7.0 |
On 06/01/2015 03:26 PM, Michael S. Tsirkin wrote:
> On Mon, Jun 01, 2015 at 01:53:35PM +0800, Jason Wang wrote:
>> >
>> >
>> > On 06/01/2015 02:12 AM, Michael S. Tsirkin wrote:
>>> > > On Tue, May 26, 2015 at 01:54:09AM -0400, Jason Wang wrote:
>>>> > >> We allocate one vhost log per vhost device. This is sub optimal when:
>>>> > >>
>>>> > >> - Guest has several device with vhost as backend
>>>> > >> - Guest has multiqueue devices
>>>> > >>
>>>> > >> Since each vhost device will allocate and use their private log, this
>>>> > >> could cause very huge unnecessary memory allocation. We can in fact
>>>> > >> avoid extra cost by sharing a single vhost log among all the vhost
>>>> > >> devices. This is feasible since:
>>>> > >>
>>>> > >> - All vhost devices for a vm should see a consistent memory layout
>>>> > >> (memory slots).
>>>> > >> - Vhost log were design to be shared, all access function were
>>>> > >> synchronized.
>>>> > >>
>>>> > >> So this patch tries to implement the sharing through
>>>> > >>
>>>> > >> - Introducing a new vhost_log structure and refcnt it.
>>>> > >> - Using a global pointer of vhost_log structure to keep track the
>>>> > >> current log used.
>>>> > >> - If there's no resize, next vhost device will just use this log and
>>>> > >> increase the refcnt.
>>>> > >> - If resize happens, a new vhost_log structure will be allocated and
>>>> > >> each vhost device will use the new log then drop the refcnt of old
>>>> > >> log.
>>>> > >> - The old log will be synced and freed when reference count drops to
>>>> > >> zero.
>>>> > >>
>>>> > >> Tested by doing scp during migration for a 2 queues virtio-net-pci.
>>>> > >>
>>>> > >> Cc: Michael S. Tsirkin <address@hidden>
>>>> > >> Signed-off-by: Jason Wang <address@hidden>
>>> > > Almost there I think.
>>> > >
>>>> > >> ---
>>>> > >> Changes from V3:
>>>> > >> - only sync the old log on put
>>>> > >> Changes from V2:
>>>> > >> - rebase to HEAD
>>>> > >> - drop unused node field from vhost_log structure
>>>> > >> Changes from V1:
>>>> > >> - Drop the list of vhost log, instead, using a global pointer instead
>>>> > >> ---
>>>> > >> hw/virtio/vhost.c | 78
>>>> > >> ++++++++++++++++++++++++++++++++++++-----------
>>>> > >> include/hw/virtio/vhost.h | 8 ++++-
>>>> > >> 2 files changed, 68 insertions(+), 18 deletions(-)
>>>> > >>
>>>> > >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>>>> > >> index 54851b7..fef28d9 100644
>>>> > >> --- a/hw/virtio/vhost.c
>>>> > >> +++ b/hw/virtio/vhost.c
>>>> > >> @@ -22,15 +22,19 @@
>>>> > >> #include "hw/virtio/virtio-bus.h"
>>>> > >> #include "migration/migration.h"
>>>> > >>
>>>> > >> +static struct vhost_log *vhost_log;
>>>> > >> +
>>>> > >> static void vhost_dev_sync_region(struct vhost_dev *dev,
>>>> > >> MemoryRegionSection *section,
>>>> > >> uint64_t mfirst, uint64_t mlast,
>>>> > >> uint64_t rfirst, uint64_t rlast)
>>>> > >> {
>>>> > >> + vhost_log_chunk_t *log = dev->log->log;
>>>> > >> +
>>>> > >> uint64_t start = MAX(mfirst, rfirst);
>>>> > >> uint64_t end = MIN(mlast, rlast);
>>>> > >> - vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
>>>> > >> - vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
>>>> > >> + vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK;
>>>> > >> + vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1;
>>>> > >> uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
>>>> > >>
>>>> > >> if (end < start) {
>>>> > >> @@ -280,22 +284,57 @@ static uint64_t vhost_get_log_size(struct
>>>> > >> vhost_dev *dev)
>>>> > >> }
>>>> > >> return log_size;
>>>> > >> }
>>>> > >> +static struct vhost_log *vhost_log_alloc(uint64_t size)
>>>> > >> +{
>>>> > >> + struct vhost_log *log = g_malloc0(sizeof *log + size *
>>>> > >> sizeof(*(log->log)));
>>>> > >> +
>>>> > >> + log->size = size;
>>>> > >> + log->refcnt = 1;
>>>> > >> +
>>>> > >> + return log;
>>>> > >> +}
>>>> > >> +
>>>> > >> +static struct vhost_log *vhost_log_get(uint64_t size)
>>>> > >> +{
>>>> > >> + if (!vhost_log || vhost_log->size != size) {
>>>> > >> + vhost_log = vhost_log_alloc(size);
>>>> > >> + } else {
>>>> > >> + ++vhost_log->refcnt;
>>>> > >> + }
>>>> > >> +
>>>> > >> + return vhost_log;
>>>> > >> +}
>>>> > >> +
>>>> > >> +static void vhost_log_put(struct vhost_dev *dev, bool sync)
>>>> > >> +{
>>>> > >> + struct vhost_log *log = dev->log;
>>>> > >> +
>>>> > >> + if (!log) {
>>>> > >> + return;
>>>> > >> + }
>>>> > >> +
>>>> > >> + --log->refcnt;
>>>> > >> + if (log->refcnt == 0) {
>>>> > >> + /* Sync only the range covered by the old log */
>>>> > >> + if (dev->log_size && sync) {
>>>> > >> + vhost_log_sync_range(dev, 0, dev->log_size *
>>>> > >> VHOST_LOG_CHUNK - 1);
>>>> > >> + }
>>>> > >> + if (vhost_log == log) {
>>>> > >> + vhost_log = NULL;
>>>> > >> + }
>>>> > >> + g_free(log);
>>>> > >> + }
>>>> > >> +}
>>>> > >>
>>>> > >> static inline void vhost_dev_log_resize(struct vhost_dev* dev,
>>>> > >> uint64_t size)
>>>> > >> {
>>>> > >> - vhost_log_chunk_t *log;
>>>> > >> - uint64_t log_base;
>>>> > >> + struct vhost_log *log = vhost_log_get(size);
>>>> > >> + uint64_t log_base = (uintptr_t)log->log;
>>>> > >> int r;
>>>> > >>
>>>> > >> - log = g_malloc0(size * sizeof *log);
>>>> > >> - log_base = (uintptr_t)log;
>>>> > >> r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE,
>>>> > >> &log_base);
>>>> > >> assert(r >= 0);
>>>> > >> - /* Sync only the range covered by the old log */
>>>> > >> - if (dev->log_size) {
>>>> > >> - vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK
>>>> > >> - 1);
>>>> > >> - }
>>>> > >> - g_free(dev->log);
>>>> > >> + vhost_log_put(dev, true);
>>>> > >> dev->log = log;
>>>> > >> dev->log_size = size;
>>>> > >> }
>>>> > >> @@ -601,7 +640,7 @@ static int vhost_migration_log(MemoryListener
>>>> > >> *listener, int enable)
>>>> > >> if (r < 0) {
>>>> > >> return r;
>>>> > >> }
>>>> > >> - g_free(dev->log);
>>>> > >> + vhost_log_put(dev, false);
>>>> > >> dev->log = NULL;
>>>> > >> dev->log_size = 0;
>>>> > >> } else {
>>>> > >> @@ -1060,10 +1099,12 @@ int vhost_dev_start(struct vhost_dev *hdev,
>>>> > >> VirtIODevice *vdev)
>>>> > >> uint64_t log_base;
>>>> > >>
>>>> > >> hdev->log_size = vhost_get_log_size(hdev);
>>>> > >> - hdev->log = hdev->log_size ?
>>>> > >> - g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
>>>> > >> - log_base = (uintptr_t)hdev->log;
>>>> > >> - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE,
>>>> > >> &log_base);
>>>> > >> + if (hdev->log_size) {
>>>> > >> + hdev->log = vhost_log_get(hdev->log_size);
>>>> > >> + }
>>> > > Why is this conditional on hdev->log_size?
>>> > > What will the value be for log_size == 0?
>> >
>> > This is used to save an unnecessary allocation for a dummy vhost_log
>> > structure without any log.
> Then you need to go over all uses and make sure they
> are safe. A dummy vhost_log structure might be easier.
>
>>>> > >> + log_base = (uintptr_t)hdev->log->log;
>>> > > especially since we de-reference it here.
>> >
>> > Yes, this seems unsafe, will change this to
>> >
>> > log_base = hdev->log_size ? (uintptr_t) hdev->log->log : NULL
>>>> > >> + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE,
>>>> > >> + hdev->log_size ? &log_base :
>>>> > >> NULL);
>>>> > >> if (r < 0) {
>>>> > >> r = -errno;
>>>> > >> goto fail_log;
>>>> > >> @@ -1072,6 +1113,9 @@ int vhost_dev_start(struct vhost_dev *hdev,
>>>> > >> VirtIODevice *vdev)
>>>> > >>
>>>> > >> return 0;
>>>> > >> fail_log:
>>>> > >> + if (hdev->log_size) {
>>>> > >> + vhost_log_put(hdev, false);
>>>> > >> + }
>>>> > >> fail_vq:
>>>> > >> while (--i >= 0) {
>>>> > >> vhost_virtqueue_stop(hdev,
>>>> > >> @@ -1101,7 +1145,7 @@ void vhost_dev_stop(struct vhost_dev *hdev,
>>>> > >> VirtIODevice *vdev)
>>>> > >> vhost_log_sync_range(hdev, 0, ~0x0ull);
>>>> > >>
>>>> > >> hdev->started = false;
>>>> > >> - g_free(hdev->log);
>>>> > >> + vhost_log_put(hdev, false);
>>>> > >> hdev->log = NULL;
>>>> > >> hdev->log_size = 0;
>>>> > >> }
>>> > > Why false? We better sync the dirty bitmap since log is getting
>>> > > cleared.
>> >
>> > We did a vhost_log_sync_range(hdev, 0, ~0x0ull) before. And we only sync
>> > 0 to dev->log_size * VHOST_LOG_CHUNK - 1 in vhost_log_put(). But looks
>> > like there's no difference, will remove vhost_log_sync_range() and use
>> > true for vhost_log_put() here.
>>> > >
>>> > > And if it's always true, we can just drop this flag.
>> >
>> > There's still other usage, e.g when fail to setting log base in
>> > vhost_dev_start() or migration end. In those cases, no need to sync.
> We definitely must sync on migration end.
>
Sorry for not being clear. I mean in vhost_log_global_stop which is
called by migration_end(). We don't sync there in the past since
ram_save_complete() will first synces dirty map and then calls
migration_end().