qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [External] Re: [PULL 26/26] virtiofsd: Add -o allow_direct_io|no_all


From: Jiachen Zhang
Subject: Re: [External] Re: [PULL 26/26] virtiofsd: Add -o allow_direct_io|no_allow_direct_io options
Date: Wed, 30 Sep 2020 10:14:19 +0800


On Wed, Sep 30, 2020 at 5:53 AM Vivek Goyal <vgoyal@redhat.com> wrote:
On Fri, Sep 25, 2020 at 01:06:55PM +0100, Dr. David Alan Gilbert (git) wrote:
> From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
>
> Due to the commit 65da4539803373ec4eec97ffc49ee90083e56efd, the O_DIRECT
> open flag of guest applications will be discarded by virtiofsd. While
> this behavior makes it consistent with the virtio-9p scheme when guest
> applications use direct I/O, we no longer have any chance to bypass the
> host page cache.
>
> Therefore, we add a flag 'allow_direct_io' to lo_data. If '-o
>  no_allow_direct_io' option is added, or none of '-o allow_direct_io' or
>  '-o no_allow_direct_io' is added, the 'allow_direct_io' will be set to
>  0, and virtiofsd discards O_DIRECT as before. If '-o allow_direct_io'
> is added to the starting command-line, 'allow_direct_io' will be set to
> 1, so that the O_DIRECT flags will be retained and host page cache can
> be bypassed.

Hi Jiachen,

Curious that in what cases you want to bypass host page cache.

Thanks
Vivek
 
Hi Vivek,

Some apps like DBMS may allocate their own file cache in userspace, so they
may want to bypass kernel page cache by using O_DIRECT. When these apps are
running in guest and access virtio-fs files, we'd better obey these needs.
This can also eliminate the host memory usage.

Another case is when we perform file I/O benchmarks on different storage
devices (like HDD and SSD), it's not that fair to introduce ram caches. By
using "cache=none" with "allow_direct_io", we can bypass guest and host page
caches, and access the storage devices directly.

Best wishes,
Jiachen
 
>
> Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> Message-Id: <20200824105957.61265-1-zhangjiachen.jaycee@bytedance.com>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  tools/virtiofsd/helper.c         |  4 ++++
>  tools/virtiofsd/passthrough_ll.c | 20 ++++++++++++++------
>  2 files changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
> index 7bc5d7dc5a..85770d63f1 100644
> --- a/tools/virtiofsd/helper.c
> +++ b/tools/virtiofsd/helper.c
> @@ -178,6 +178,10 @@ void fuse_cmdline_help(void)
>             "                               (0 leaves rlimit unchanged)\n"
>             "                               default: min(1000000, fs.file-max - 16384)\n"
>             "                                        if the current rlimit is lower\n"
> +           "    -o allow_direct_io|no_allow_direct_io\n"
> +           "                               retain/discard O_DIRECT flags passed down\n"
> +           "                               to virtiofsd from guest applications.\n"
> +           "                               default: no_allow_direct_io\n"
>             );
>  }

> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index 784330e0e4..0b229ebd57 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -151,6 +151,7 @@ struct lo_data {
>      int timeout_set;
>      int readdirplus_set;
>      int readdirplus_clear;
> +    int allow_direct_io;
>      struct lo_inode root;
>      GHashTable *inodes; /* protected by lo->mutex */
>      struct lo_map ino_map; /* protected by lo->mutex */
> @@ -179,6 +180,8 @@ static const struct fuse_opt lo_opts[] = {
>      { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
>      { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
>      { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
> +    { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 },
> +    { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 },
>      FUSE_OPT_END
>  };
>  static bool use_syslog = false;
> @@ -1516,7 +1519,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
>      fuse_reply_err(req, 0);
>  }

> -static void update_open_flags(int writeback, struct fuse_file_info *fi)
> +static void update_open_flags(int writeback, int allow_direct_io,
> +                              struct fuse_file_info *fi)
>  {
>      /*
>       * With writeback cache, kernel may send read requests even
> @@ -1541,10 +1545,13 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi)

>      /*
>       * O_DIRECT in guest should not necessarily mean bypassing page
> -     * cache on host as well. If somebody needs that behavior, it
> -     * probably should be a configuration knob in daemon.
> +     * cache on host as well. Therefore, we discard it by default
> +     * ('-o no_allow_direct_io'). If somebody needs that behavior,
> +     * the '-o allow_direct_io' option should be set.
>       */
> -    fi->flags &= ~O_DIRECT;
> +    if (!allow_direct_io) {
> +        fi->flags &= ~O_DIRECT;
> +    }
>  }

>  static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
> @@ -1576,7 +1583,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
>          goto out;
>      }

> -    update_open_flags(lo->writeback, fi);
> +    update_open_flags(lo->writeback, lo->allow_direct_io, fi);

>      fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
>                  mode);
> @@ -1786,7 +1793,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
>      fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
>               fi->flags);

> -    update_open_flags(lo->writeback, fi);
> +    update_open_flags(lo->writeback, lo->allow_direct_io, fi);

>      sprintf(buf, "%i", lo_fd(req, ino));
>      fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
> @@ -2823,6 +2830,7 @@ int main(int argc, char *argv[])
>          .debug = 0,
>          .writeback = 0,
>          .posix_lock = 0,
> +        .allow_direct_io = 0,
>          .proc_self_fd = -1,
>      };
>      struct lo_map_elem *root_elem;
> --
> 2.26.2
>


reply via email to

[Prev in Thread] Current Thread [Next in Thread]