qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v12 16/19] multi-process: Synchronize remote memory


From: Jag Raman
Subject: Re: [PATCH v12 16/19] multi-process: Synchronize remote memory
Date: Thu, 10 Dec 2020 11:57:40 -0500


> On Dec 9, 2020, at 4:28 PM, Marc-André Lureau <marcandre.lureau@gmail.com> 
> wrote:
> 
> 
> 
> On Wed, Dec 9, 2020 at 8:20 PM Jag Raman <jag.raman@oracle.com> wrote:
> 
> 
> > On Dec 8, 2020, at 8:57 AM, Marc-André Lureau <marcandre.lureau@gmail.com> 
> > wrote:
> > 
> > Hi
> > 
> > On Wed, Dec 2, 2020 at 12:23 AM Jagannathan Raman <jag.raman@oracle.com> 
> > wrote:
> > Add memory-listener object which is used to keep the view of the RAM
> > in sync between QEMU and remote process.
> > A MemoryListener is registered for system-memory AddressSpace. The
> > listener sends SYNC_SYSMEM message to the remote process when memory
> > listener commits the changes to memory, the remote process receives
> > the message and processes it in the handler for SYNC_SYSMEM message.
> > 
> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  include/hw/remote/memory-sync.h |  27 ++++++
> >  include/hw/remote/proxy.h       |   2 +
> >  hw/remote/memory-sync.c         | 210 
> > ++++++++++++++++++++++++++++++++++++++++
> >  hw/remote/message.c             |   5 +
> >  hw/remote/proxy.c               |   6 ++
> >  MAINTAINERS                     |   2 +
> >  hw/remote/meson.build           |   1 +
> >  7 files changed, 253 insertions(+)
> >  create mode 100644 include/hw/remote/memory-sync.h
> >  create mode 100644 hw/remote/memory-sync.c
> > 
> > diff --git a/include/hw/remote/memory-sync.h 
> > b/include/hw/remote/memory-sync.h
> > new file mode 100644
> > index 0000000..785f76a
> > --- /dev/null
> > +++ b/include/hw/remote/memory-sync.h
> > @@ -0,0 +1,27 @@
> > +/*
> > + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef MEMORY_SYNC_H
> > +#define MEMORY_SYNC_H
> > +
> > +#include "exec/memory.h"
> > +#include "io/channel.h"
> > +
> > +typedef struct RemoteMemSync {
> > +    MemoryListener listener;
> > +
> > +    int n_mr_sections;
> > +    MemoryRegionSection *mr_sections;
> > +
> > +    QIOChannel *ioc;
> > +} RemoteMemSync;
> > +
> > +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc);
> > +void deconfigure_memory_sync(RemoteMemSync *sync);
> > 
> > RemoteMemSync vs MemorySync, and function with _memory_sync suffixes...
> > Naming things is hard, but trying to be consistent generally helps.
> > 
> > My understanding is that this is a proxy-dev helper to handle memory 
> > listening and sending SYNC_SYSMEM.
> > 
> > I would thus suggest naming it ProxyMemoryListener. It could eventually be 
> > folded in proxy.c
> > 
> > Please try to be consistent with header naming, structure naming, type, 
> > functions and enum prefixes etc.
> > 
> > proxy_memory_listener isn't that long imho.
> > 
> > +
> > +#endif
> > diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
> > index e29c61b..a687b7d 100644
> > --- a/include/hw/remote/proxy.h
> > +++ b/include/hw/remote/proxy.h
> > @@ -11,6 +11,7 @@
> > 
> >  #include "hw/pci/pci.h"
> >  #include "io/channel.h"
> > +#include "hw/remote/memory-sync.h"
> > 
> >  #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
> > 
> > @@ -40,6 +41,7 @@ struct PCIProxyDev {
> >      QemuMutex io_mutex;
> >      QIOChannel *ioc;
> >      Error *migration_blocker;
> > +    RemoteMemSync sync;
> >      ProxyMemoryRegion region[PCI_NUM_REGIONS];
> >  };
> > 
> > diff --git a/hw/remote/memory-sync.c b/hw/remote/memory-sync.c
> > new file mode 100644
> > index 0000000..2365e69
> > --- /dev/null
> > +++ b/hw/remote/memory-sync.c
> > @@ -0,0 +1,210 @@
> > +/*
> > + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu-common.h"
> > +
> > +#include "qemu/compiler.h"
> > +#include "qemu/int128.h"
> > +#include "qemu/range.h"
> > +#include "exec/memory.h"
> > +#include "exec/cpu-common.h"
> > +#include "cpu.h"
> > +#include "exec/ram_addr.h"
> > +#include "exec/address-spaces.h"
> > +#include "hw/remote/mpqemu-link.h"
> > +#include "hw/remote/memory-sync.h"
> > +
> > +static void proxy_ml_begin(MemoryListener *listener)
> > 
> > I suggest to rename begin -> reset 
> > 
> > +{
> > +    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> > +    int mrs;
> > +
> > +    for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
> > +        memory_region_unref(sync->mr_sections[mrs].mr);
> > +    }
> > +
> > +    g_free(sync->mr_sections);
> > +    sync->mr_sections = NULL;
> > +    sync->n_mr_sections = 0;
> > +}
> > +
> > +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
> > 
> > This function is very similar to vhost_user_get_mr_data(). That suggests we 
> > could factor the code.
> > 
> > Perhaps a new memory_region_from_host_full(), or extend 
> > memory_region_from_host() with an extra optional "int *fd" argument.
> >  
> > +{
> > +    MemoryRegion *mr;
> > +    ram_addr_t off;
> > +
> > +    /**
> > +     * Assumes that the host address is a valid address as it's
> > +     * coming from the MemoryListener system. In the case host
> > +     * address is not valid, the following call would return
> > +     * the default subregion of "system_memory" region, and
> > +     * not NULL. So it's not possible to check for NULL here.
> > +     */
> > +    mr = memory_region_from_host((void *)(uintptr_t)host, &off);
> > +
> > +    if (offset) {
> > +        *offset = off;
> > +    }
> > +
> > +    return memory_region_get_fd(mr);
> > +}
> > +
> > +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t 
> > size)
> > +{
> > 
> > This seems similar to vhost_user_can_merge(). 
> > 
> > +    bool merge;
> > +    int fd1, fd2;
> > +
> > +    fd1 = get_fd_from_hostaddr(host, NULL);
> > +
> > +    fd2 = get_fd_from_hostaddr(prev_host, NULL);
> > +
> > +    merge = (fd1 == fd2);
> > 
> > This could be written in a simpler manner, ex:
> > 
> > if (get_fd_from_hostaddr(host, NULL) != get_fd_from_hostaddr(prev_host, 
> > NULL))
> >   return false
> > 
> > +
> > +    merge &= ((prev_host + size) == host);
> > 
> > That check could be done early on before doing the more expensive 
> > memory_region_from_host() calls
> > 
> > +
> > +    return merge;
> > +}
> > +
> > +static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section)
> > +{
> > +    uint64_t mrs_size, mrs_gpa, mrs_page;
> > +    MemoryRegionSection *prev_sec;
> > +    bool merged = false;
> > +    uintptr_t mrs_host;
> > +    RAMBlock *mrs_rb;
> > +
> > +    if (!sync->n_mr_sections) {
> > +        return false;
> > +    }
> > +
> > +    mrs_rb = section->mr->ram_block;
> > +    mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
> > +    mrs_size = int128_get64(section->size);
> > +    mrs_gpa = section->offset_within_address_space;
> > +    mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> > +               section->offset_within_region;
> > +
> > +    if (get_fd_from_hostaddr(mrs_host, NULL) < 0) {
> > +        return true;
> > +    }
> > +
> > +    mrs_host = mrs_host & ~(mrs_page - 1);
> > +    mrs_gpa = mrs_gpa & ~(mrs_page - 1);
> > +    mrs_size = ROUND_UP(mrs_size, mrs_page);
> > +
> > +    prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
> > +    uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
> > +    uint64_t prev_size = int128_get64(prev_sec->size);
> > +    uint64_t prev_gpa_end   = range_get_last(prev_gpa_start, prev_size);
> > +    uint64_t prev_host_start =
> > +        (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
> > +        prev_sec->offset_within_region;
> > +    uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
> > +
> > +    if (mrs_gpa <= (prev_gpa_end + 1)) {
> > +        g_assert(mrs_gpa > prev_gpa_start);
> > +
> > +        if ((section->mr == prev_sec->mr) &&
> > +            proxy_mrs_can_merge(mrs_host, prev_host_start,
> > +                                (mrs_gpa - prev_gpa_start))) {
> > +            uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
> > +            merged = true;
> > +            prev_sec->offset_within_address_space =
> > +                MIN(prev_gpa_start, mrs_gpa);
> > +            prev_sec->offset_within_region =
> > +                MIN(prev_host_start, mrs_host) -
> > +                (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
> > +            prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
> > +                                                         mrs_host));
> > +        }
> > +    }
> > +
> > +    return merged;
> > +}
> > +
> > +static void proxy_ml_region_addnop(MemoryListener *listener,
> > +                                   MemoryRegionSection *section)
> > +{
> > +    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> > +
> > +    if (!(memory_region_is_ram(section->mr) &&
> > +          !memory_region_is_rom(section->mr))) {
> > +        return;
> > 
> > A bit clearer in vhost.c:
> > if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
> >  
> > +    }
> > +
> > +    if (try_merge(sync, section)) {
> > +        return;
> > +    }
> > +
> > +    ++sync->n_mr_sections;
> > +    sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
> > +                                sync->n_mr_sections);
> > +    sync->mr_sections[sync->n_mr_sections - 1] = *section;
> > +    sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
> > +    memory_region_ref(section->mr);
> > +}
> > +
> > +static void proxy_ml_commit(MemoryListener *listener)
> > +{
> > +    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> > +    MPQemuMsg msg;
> > +    MemoryRegionSection *section;
> > +    ram_addr_t offset;
> > +    uintptr_t host_addr;
> > +    int region;
> > +    Error *local_err = NULL;
> > +
> > +    memset(&msg, 0, sizeof(MPQemuMsg));
> > +
> > +    msg.cmd = SYNC_SYSMEM;
> > +    msg.num_fds = sync->n_mr_sections;
> > +    msg.size = sizeof(SyncSysmemMsg);
> > +    if (msg.num_fds > REMOTE_MAX_FDS) {
> > +        error_report("Number of fds is more than %d", REMOTE_MAX_FDS);
> > +        return;
> > +    }
> > +
> > +    for (region = 0; region < sync->n_mr_sections; region++) {
> > +        section = &sync->mr_sections[region];
> > +        msg.data.sync_sysmem.gpas[region] =
> > +            section->offset_within_address_space;
> > +        msg.data.sync_sysmem.sizes[region] = int128_get64(section->size);
> > +        host_addr = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> > +                    section->offset_within_region;
> > +        msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
> > +        msg.data.sync_sysmem.offsets[region] = offset;
> > +    }
> > +    mpqemu_msg_send(&msg, sync->ioc, &local_err);
> > +    if (local_err) {
> > +        error_report("Error in sending command %d", msg.cmd);
> > +    }
> > +}
> > 
> > That whole complex code above duplicates much of the logic in vhost.c. Can 
> > we try to factorize it instead?
> 
> Hi Marc-Andre,
> 
> Thank you for sharing your feedback!
> 
> Would it be alright if we addressed this item alone in a separate patch in 
> the future? Since
> this refactoring affects vhost code, we’re wondering it would be better to 
> address it in a
> future patch to help with any regression analysis in the future.
> 
> That's fine with me, but please leave a TODO note in the code then.
> 
> thanks

Thank you very much for confirming!

—
Jag

> 
> 
> Thank you!
>
> Jag
> 
> > 
> > +
> > +void deconfigure_memory_sync(RemoteMemSync *sync)
> > +{
> > +    memory_listener_unregister(&sync->listener);
> > +
> > +    proxy_ml_begin(&sync->listener);
> > +}
> > +
> > +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc)
> > +{
> > +    sync->n_mr_sections = 0;
> > +    sync->mr_sections = NULL;
> > +
> > +    sync->ioc = ioc;
> > +
> > +    sync->listener.begin = proxy_ml_begin;
> > +    sync->listener.commit = proxy_ml_commit;
> > +    sync->listener.region_add = proxy_ml_region_addnop;
> > +    sync->listener.region_nop = proxy_ml_region_addnop;
> > +    sync->listener.priority = 10;
> > +
> > +    memory_listener_register(&sync->listener, &address_space_memory);
> > +}
> > diff --git a/hw/remote/message.c b/hw/remote/message.c
> > index 0f3e38a..454fd2d 100644
> > --- a/hw/remote/message.c
> > +++ b/hw/remote/message.c
> > @@ -17,6 +17,7 @@
> >  #include "sysemu/runstate.h"
> >  #include "hw/pci/pci.h"
> >  #include "exec/memattrs.h"
> > +#include "hw/remote/memory.h"
> > 
> >  static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
> >                                   MPQemuMsg *msg);
> > @@ -64,6 +65,10 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
> >          case BAR_READ:
> >              process_bar_read(com->ioc, &msg, &local_err);
> >              break;
> > +        case SYNC_SYSMEM:
> > +            remote_sysmem_reconfig(&msg, &local_err);
> > +            break;
> > +
> >          default:
> >              error_setg(&local_err,
> >                         "Unknown command (%d) received for device %s 
> > (pid=%d)",
> > diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
> > index 039347d..0f2d1aa 100644
> > --- a/hw/remote/proxy.c
> > +++ b/hw/remote/proxy.c
> > @@ -18,6 +18,8 @@
> >  #include "migration/blocker.h"
> >  #include "hw/remote/mpqemu-link.h"
> >  #include "qemu/error-report.h"
> > +#include "hw/remote/memory-sync.h"
> > +#include "qom/object.h"
> > 
> >  static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
> >  {
> > @@ -58,6 +60,8 @@ static void pci_proxy_dev_realize(PCIDevice *device, 
> > Error **errp)
> > 
> >      qemu_mutex_init(&dev->io_mutex);
> >      qio_channel_set_blocking(dev->ioc, true, NULL);
> > +
> > +    configure_memory_sync(&dev->sync, dev->ioc);
> >  }
> > 
> >  static void pci_proxy_dev_exit(PCIDevice *pdev)
> > @@ -69,6 +73,8 @@ static void pci_proxy_dev_exit(PCIDevice *pdev)
> >      migrate_del_blocker(dev->migration_blocker);
> > 
> >      error_free(dev->migration_blocker);
> > +
> > +    deconfigure_memory_sync(&dev->sync);
> >  }
> > 
> >  static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index ebd1d1d..5d78b78 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -3150,6 +3150,8 @@ F: include/hw/remote/memory.h
> >  F: hw/remote/memory.c
> >  F: hw/remote/proxy.c
> >  F: include/hw/remote/proxy.h
> > +F: hw/remote/memory-sync.c
> > +F: include/hw/remote/memory-sync.h
> > 
> >  Build and test automation
> >  -------------------------
> > diff --git a/hw/remote/meson.build b/hw/remote/meson.build
> > index 569cd20..7d434a5 100644
> > --- a/hw/remote/meson.build
> > +++ b/hw/remote/meson.build
> > @@ -7,5 +7,6 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: 
> > files('remote-obj.c'))
> >  remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
> > 
> >  specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c'))
> > +specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: 
> > files('memory-sync.c'))
> > 
> >  softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss)
> > -- 
> > 1.8.3.1
> > 
> > 
> > 
> > -- 
> > Marc-André Lureau
> 
> 
> 
> -- 
> Marc-André Lureau




reply via email to

[Prev in Thread] Current Thread [Next in Thread]