qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v5] 9pfs: use GHashTable for fid table


From: Christian Schoenebeck
Subject: Re: [PATCH v5] 9pfs: use GHashTable for fid table
Date: Tue, 27 Sep 2022 21:07:35 +0200

On Dienstag, 27. September 2022 16:25:03 CEST Linus Heckemann wrote:
> The previous implementation would iterate over the fid table for
> lookup operations, resulting in an operation with O(n) complexity on
> the number of open files and poor cache locality -- for every open,
> stat, read, write, etc operation.
> 
> This change uses a hashtable for this instead, significantly improving
> the performance of the 9p filesystem. The runtime of NixOS's simple
> installer test, which copies ~122k files totalling ~1.8GiB from 9p,
> decreased by a factor of about 10.
> 
> Signed-off-by: Linus Heckemann <git@sphalerite.org>
> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
> Reviewed-by: Greg Kurz <groug@kaod.org>
> Message-Id: <20220908112353.289267-1-git@sphalerite.org>
> [CS: - Retain BUG_ON(f->clunked) in get_fid().
>      - Add TODO comment in clunk_fid(). ]
> Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> ---
> This squashes the separately submitted patch "9pfs: avoid iterator
> invalidation in v9fs_mark_fids_unreclaim"
> (20220926124207.1325763-1-git@sphalerite.org) into the previous
> version of this change.
> 
> I've skipped v4, because the former is arguably a poorly submitted v4.
> 
> I've also addressed Christian Schoenebeck's comments on the former:
> 
> * (v9fs_mark_fids_unreclaim) switched to g_autoptr for the array
>   storing the fids intermediately in preparation for reopening
> 
> * (v9fs_mark_fids_unreclaim) restored the accidentally removed
>   FID_NON_RECLAIMABLE mark
> 
> * (v9fs_mark_fids_unreclaim) moved fid reference release into a third
>   loop, which is run even if an error is encountered during a reopen
>   operation, in order to avoid leaking references to fids.
> 
> * (v9fs_reset) implemented logic to avoid the same iterator
>   invalidation problem
> 
> I've also added a comment explaining the exact role of
> v9fs_mark_fids_unreclaim, since it's not entirely obvious at a glance.
> 
> 
>  hw/9pfs/9p.c | 188 ++++++++++++++++++++++++++++-----------------------
>  hw/9pfs/9p.h |   2 +-
>  2 files changed, 106 insertions(+), 84 deletions(-)
> 
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index aebadeaa03..0e485cb631 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -282,33 +282,32 @@ static V9fsFidState *coroutine_fn get_fid(V9fsPDU
> *pdu, int32_t fid) V9fsFidState *f;
>      V9fsState *s = pdu->s;
> 
> -    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
> +    f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid));
> +    if (f) {
>          BUG_ON(f->clunked);
> -        if (f->fid == fid) {
> -            /*
> -             * Update the fid ref upfront so that
> -             * we don't get reclaimed when we yield
> -             * in open later.
> -             */
> -            f->ref++;
> -            /*
> -             * check whether we need to reopen the
> -             * file. We might have closed the fd
> -             * while trying to free up some file
> -             * descriptors.
> -             */
> -            err = v9fs_reopen_fid(pdu, f);
> -            if (err < 0) {
> -                f->ref--;
> -                return NULL;
> -            }
> -            /*
> -             * Mark the fid as referenced so that the LRU
> -             * reclaim won't close the file descriptor
> -             */
> -            f->flags |= FID_REFERENCED;
> -            return f;
> +        /*
> +         * Update the fid ref upfront so that
> +         * we don't get reclaimed when we yield
> +         * in open later.
> +         */
> +        f->ref++;
> +        /*
> +         * check whether we need to reopen the
> +         * file. We might have closed the fd
> +         * while trying to free up some file
> +         * descriptors.
> +         */
> +        err = v9fs_reopen_fid(pdu, f);
> +        if (err < 0) {
> +            f->ref--;
> +            return NULL;
>          }
> +        /*
> +         * Mark the fid as referenced so that the LRU
> +         * reclaim won't close the file descriptor
> +         */
> +        f->flags |= FID_REFERENCED;
> +        return f;
>      }
>      return NULL;
>  }
> @@ -317,12 +316,9 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t
> fid) {
>      V9fsFidState *f;
> 
> -    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
> +    if (g_hash_table_contains(s->fids, GINT_TO_POINTER(fid))) {
>          /* If fid is already there return NULL */
> -        BUG_ON(f->clunked);

Not a big deal, but I start thinking whether to keep BUG_ON() here as well. 
That would require using g_hash_table_lookup() here instead of 
g_hash_table_contains(). Not that I would insist.

> -        if (f->fid == fid) {
> -            return NULL;
> -        }
> +        return NULL;
>      }
>      f = g_new0(V9fsFidState, 1);
>      f->fid = fid;
> @@ -333,7 +329,7 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t
> fid) * reclaim won't close the file descriptor
>       */
>      f->flags |= FID_REFERENCED;
> -    QSIMPLEQ_INSERT_TAIL(&s->fid_list, f, next);
> +    g_hash_table_insert(s->fids, GINT_TO_POINTER(fid), f);
> 
>      v9fs_readdir_init(s->proto_version, &f->fs.dir);
>      v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir);
> @@ -424,12 +420,12 @@ static V9fsFidState *clunk_fid(V9fsState *s, int32_t
> fid) {
>      V9fsFidState *fidp;
> 
> -    QSIMPLEQ_FOREACH(fidp, &s->fid_list, next) {
> -        if (fidp->fid == fid) {
> -            QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
> -            fidp->clunked = true;
> -            return fidp;
> -        }
> +    /* TODO: Use g_hash_table_steal_extended() instead? */
> +    fidp = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid));
> +    if (fidp) {
> +        g_hash_table_remove(s->fids, GINT_TO_POINTER(fid));
> +        fidp->clunked = true;
> +        return fidp;
>      }
>      return NULL;
>  }
> @@ -439,10 +435,15 @@ void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
>      int reclaim_count = 0;
>      V9fsState *s = pdu->s;
>      V9fsFidState *f;
> +    GHashTableIter iter;
> +    gpointer fid;
> +
> +    g_hash_table_iter_init(&iter, s->fids);
> +
>      QSLIST_HEAD(, V9fsFidState) reclaim_list =
>          QSLIST_HEAD_INITIALIZER(reclaim_list);
> 
> -    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
> +    while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &f)) {
>          /*
>           * Unlink fids cannot be reclaimed. Check
>           * for them and skip them. Also skip fids
> @@ -514,72 +515,82 @@ void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
>      }
>  }
> 
> +/*
> + * This is used when a path is removed from the directory tree. Any
> + * fids that still reference it must not be closed from then on, since
> + * they cannot be reopened.
> + */
>  static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath
> *path) {
> -    int err;
> +    int err = 0;
>      V9fsState *s = pdu->s;
> -    V9fsFidState *fidp, *fidp_next;
> +    V9fsFidState *fidp;
> +    gpointer fid;
> +    GHashTableIter iter;
> +    /*
> +     * The most common case is probably that we have exactly one
> +     * fid for the given path, so preallocate exactly one.
> +     */
> +    g_autoptr(GArray) to_reopen = g_array_sized_new(FALSE, FALSE,
> sizeof(V9fsFidState*), 1);

Line length. I'm missing the QEMU code style bot that used to bark on this. :)

    ./scripts/checkpatch.pl your.patch

or

    ./scripts/checkpatch.pl -f hw/9pfs/9p.c

> +    gint i;
> 
> -    fidp = QSIMPLEQ_FIRST(&s->fid_list);
> -    if (!fidp) {
> -        return 0;
> -    }
> +    g_hash_table_iter_init(&iter, s->fids);
> 
>      /*
> -     * v9fs_reopen_fid() can yield : a reference on the fid must be held
> -     * to ensure its pointer remains valid and we can safely pass it to
> -     * QSIMPLEQ_NEXT(). The corresponding put_fid() can also yield so
> -     * we must keep a reference on the next fid as well. So the logic here
> -     * is to get a reference on a fid and only put it back during the next
> -     * iteration after we could get a reference on the next fid. Start with
> -     * the first one.
> +     * We iterate over the fid table looking for the entries we need
> +     * to reopen, and store them in to_reopen. This is because
> +     * reopening them happens asynchronously, allowing the fid table
> +     * to be modified in the meantime, invalidating our iterator.
>       */

OK, I get it that you squashed your previous patch and that you ended up with 
this comment instead. But if you read the old comment version here, you'll 
notice that it describes the actual problem more accurately: especially that 
v9fs_reopen_fid() and put_fid() are the 2 functions that may cause the 
coroutine to "yield" here. That's an important information to be retained in 
this comment here as it's not obvious.

> -    for (fidp->ref++; fidp; fidp = fidp_next) {
> +    while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &fidp)) {
>          if (fidp->path.size == path->size &&
>              !memcmp(fidp->path.data, path->data, path->size)) {
> -            /* Mark the fid non reclaimable. */
> -            fidp->flags |= FID_NON_RECLAIMABLE;

I would not move this to the 2nd loop. I would still set the 
FID_NON_RECLAIMABLE flag here, for the same reasons that you are bumping the 
refcount already in the first loop below.

> -
> -            /* reopen the file/dir if already closed */
> -            err = v9fs_reopen_fid(pdu, fidp);
> -            if (err < 0) {
> -                put_fid(pdu, fidp);
> -                return err;
> -            }
> -        }
> -
> -        fidp_next = QSIMPLEQ_NEXT(fidp, next);
> -
> -        if (fidp_next) {
>              /*
> -             * Ensure the next fid survives a potential clunk request
> during
> -             * put_fid() below and v9fs_reopen_fid() in the next
> iteration.
> +             * Ensure the fid survives a potential clunk
> request during
> +             * v9fs_reopen_fid or put_fid.
>               */
> -            fidp_next->ref++;
> +            fidp->ref++;
> +            g_array_append_val(to_reopen, fidp);
>          }
> +    }
> 
> -        /* We're done with this fid */
> -        put_fid(pdu, fidp);
> +    for (i=0; i < to_reopen->len; i++) {
> +        fidp = g_array_index(to_reopen, V9fsFidState*, i);
> +        fidp->flags |= FID_NON_RECLAIMABLE;

So setting the flag would go back to the 1st loop.

> +        /* reopen the file/dir if already closed */
> +        err = v9fs_reopen_fid(pdu, fidp);
> +        if (err < 0) {
> +            goto out;

There is something wrong here ...

> +        }
>      }
> 
> -    return 0;
> + out:
> +    for (i=0; i < to_reopen->len; i++) {
> +        put_fid(pdu, g_array_index(to_reopen, V9fsFidState*, i));

... that's not the same behaviour as before, is it? Old behaviour was to put 
the respective (last) fid only on error. And this would now put all fids.

> +    }
> +    return err;
>  }
> 
>  static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
>  {
>      V9fsState *s = pdu->s;
>      V9fsFidState *fidp;
> +    GList *freeing;

Wasn't there something like GLIST_FOREACH()? Then you wouldn't need to add 
that variable.

> +    /* Get a list of all the values (fid states) in the table, which we
> then... */

Line length.

> +    g_autoptr(GList) fids = g_hash_table_get_values(s->fids);
> 
> -    /* Free all fids */
> -    while (!QSIMPLEQ_EMPTY(&s->fid_list)) {
> -        /* Get fid */
> -        fidp = QSIMPLEQ_FIRST(&s->fid_list);
> -        fidp->ref++;
> +    /* ... remove from the table, taking over ownership. */
> +    g_hash_table_steal_all(s->fids);
> 
> -        /* Clunk fid */
> -        QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
> +    /*
> +     * This allows us to release our references to them asynchronously
> without
> +     * iterating over the hash table and risking iterator
> invalidation
> +     * through concurrent modifications.
> +     */

Line length.

> +    for (freeing = fids; freeing; freeing = freeing->next) {

GLIST_FOREACH()? I mean if we don't have that macro somewhere, be it. Might be 
something for a future, separate cleanup patch then.

> +        fidp = freeing->data;
> +        fidp->ref++;
>          fidp->clunked = true;
> -
>          put_fid(pdu, fidp);
>      }
>  }
> @@ -3205,6 +3216,8 @@ static int coroutine_fn v9fs_complete_rename(V9fsPDU
> *pdu, V9fsFidState *fidp, V9fsFidState *tfidp;
>      V9fsState *s = pdu->s;
>      V9fsFidState *dirfidp = NULL;
> +    GHashTableIter iter;
> +    gpointer fid;
> 
>      v9fs_path_init(&new_path);
>      if (newdirfid != -1) {
> @@ -3238,11 +3251,13 @@ static int coroutine_fn v9fs_complete_rename(V9fsPDU
> *pdu, V9fsFidState *fidp, if (err < 0) {
>          goto out;
>      }
> +
>      /*
>       * Fixup fid's pointing to the old name to
>       * start pointing to the new name
>       */
> -    QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
> +    g_hash_table_iter_init(&iter, s->fids);
> +    while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) {
>          if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
>              /* replace the name */
>              v9fs_fix_path(&tfidp->path, &new_path,
> strlen(fidp->path.data)); @@ -3320,6 +3335,8 @@ static int coroutine_fn
> v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, V9fsPath oldpath,
> newpath;
>      V9fsState *s = pdu->s;
>      int err;
> +    GHashTableIter iter;
> +    gpointer fid;
> 
>      v9fs_path_init(&oldpath);
>      v9fs_path_init(&newpath);
> @@ -3336,7 +3353,8 @@ static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU
> *pdu, V9fsPath *olddir, * Fixup fid's pointing to the old name to
>       * start pointing to the new name
>       */
> -    QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
> +    g_hash_table_iter_init(&iter, s->fids);
> +    while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) {
>          if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
>              /* replace the name */
>              v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
> @@ -4226,7 +4244,7 @@ int v9fs_device_realize_common(V9fsState *s, const
> V9fsTransport *t, s->ctx.fmode = fse->fmode;
>      s->ctx.dmode = fse->dmode;
> 
> -    QSIMPLEQ_INIT(&s->fid_list);
> +    s->fids = g_hash_table_new(NULL, NULL);
>      qemu_co_rwlock_init(&s->rename_lock);
> 
>      if (s->ops->init(&s->ctx, errp) < 0) {
> @@ -4286,6 +4304,10 @@ void v9fs_device_unrealize_common(V9fsState *s)
>      if (s->ctx.fst) {
>          fsdev_throttle_cleanup(s->ctx.fst);
>      }
> +    if (s->fids) {
> +        g_hash_table_destroy(s->fids);
> +        s->fids = NULL;
> +    }
>      g_free(s->tag);
>      qp_table_destroy(&s->qpd_table);
>      qp_table_destroy(&s->qpp_table);
> diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> index 994f952600..10fd2076c2 100644
> --- a/hw/9pfs/9p.h
> +++ b/hw/9pfs/9p.h
> @@ -339,7 +339,7 @@ typedef struct {
>  struct V9fsState {
>      QLIST_HEAD(, V9fsPDU) free_list;
>      QLIST_HEAD(, V9fsPDU) active_list;
> -    QSIMPLEQ_HEAD(, V9fsFidState) fid_list;
> +    GHashTable *fids;
>      FileOperations *ops;
>      FsContext ctx;
>      char *tag;





reply via email to

[Prev in Thread] Current Thread [Next in Thread]