qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 1/1] raw-posix.c - use max transfer length /


From: Maxim Levitsky
Subject: Re: [Qemu-devel] [PATCH v2 1/1] raw-posix.c - use max transfer length / max segement count only for SCSI passthrough
Date: Wed, 10 Jul 2019 16:43:00 +0300

On Thu, 2019-07-04 at 15:43 +0300, Maxim Levitsky wrote:
> Regular kernel block devices (/dev/sda*, /dev/nvme*, etc) don't have
> max segment size/max segment count hardware requirements exposed
> to the userspace, but rather the kernel block layer
> takes care to split the incoming requests that
> violate these requirements.
> 
> Allowing the kernel to do the splitting allows qemu to avoid
> various overheads that arise otherwise from this.
> 
> This is especially visible in nbd server,
> exposing as a raw file, a mostly empty qcow2 image over the net.
> In this case most of the reads by the remote user
> won't even hit the underlying kernel block device,
> and therefore most of the  overhead will be in the
> nbd traffic which increases significantly with lower max transfer size.
> 
> In addition to that even for local block device
> access the peformance improves a bit due to less
> traffic between qemu and the kernel when large
> transfer sizes are used (e.g for image conversion)
> 
> More info can be found at:
> https://bugzilla.redhat.com/show_bug.cgi?id=1647104
> 
> Signed-off-by: Maxim Levitsky <address@hidden>
> Reviewed-by: Stefan Hajnoczi <address@hidden>
> Reviewed-by: Eric Blake <address@hidden>
> ---
>  block/file-posix.c | 54 ++++++++++++++++++++++++----------------------
>  1 file changed, 28 insertions(+), 26 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index ab05b51a66..4479cc7ab4 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1038,15 +1038,13 @@ static void raw_reopen_abort(BDRVReopenState *state)
>      s->reopen_state = NULL;
>  }
>  
> -static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
> +static int sg_get_max_transfer_length(int fd)
>  {
>  #ifdef BLKSECTGET
>      int max_bytes = 0;
> -    short max_sectors = 0;
> -    if (bs->sg && ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
> +
> +    if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
>          return max_bytes;
> -    } else if (!bs->sg && ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
> -        return max_sectors << BDRV_SECTOR_BITS;
>      } else {
>          return -errno;
>      }
> @@ -1055,25 +1053,31 @@ static int 
> hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
>  #endif
>  }
>  
> -static int hdev_get_max_segments(const struct stat *st)
> +static int sg_get_max_segments(int fd)
>  {
>  #ifdef CONFIG_LINUX
>      char buf[32];
>      const char *end;
> -    char *sysfspath;
> +    char *sysfspath = NULL;
>      int ret;
> -    int fd = -1;
> +    int sysfd = -1;
>      long max_segments;
> +    struct stat st;
> +
> +    if (fstat(fd, &st)) {
> +        ret = -errno;
> +        goto out;
> +    }
>  
>      sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> -                                major(st->st_rdev), minor(st->st_rdev));
> -    fd = open(sysfspath, O_RDONLY);
> -    if (fd == -1) {
> +                                major(st.st_rdev), minor(st.st_rdev));
> +    sysfd = open(sysfspath, O_RDONLY);
> +    if (sysfd == -1) {
>          ret = -errno;
>          goto out;
>      }
>      do {
> -        ret = read(fd, buf, sizeof(buf) - 1);
> +        ret = read(sysfd, buf, sizeof(buf) - 1);
>      } while (ret == -1 && errno == EINTR);
>      if (ret < 0) {
>          ret = -errno;
> @@ -1090,8 +1094,8 @@ static int hdev_get_max_segments(const struct stat *st)
>      }
>  
>  out:
> -    if (fd != -1) {
> -        close(fd);
> +    if (sysfd != -1) {
> +        close(sysfd);
>      }
>      g_free(sysfspath);
>      return ret;
> @@ -1103,19 +1107,17 @@ out:
>  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>  {
>      BDRVRawState *s = bs->opaque;
> -    struct stat st;
>  
> -    if (!fstat(s->fd, &st)) {
> -        if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
> -            int ret = hdev_get_max_transfer_length(bs, s->fd);
> -            if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
> -                bs->bl.max_transfer = pow2floor(ret);
> -            }
> -            ret = hdev_get_max_segments(&st);
> -            if (ret > 0) {
> -                bs->bl.max_transfer = MIN(bs->bl.max_transfer,
> -                                          ret * getpagesize());
> -            }
> +    if (bs->sg) {
> +        int ret = sg_get_max_transfer_length(s->fd);
> +
> +        if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
> +            bs->bl.max_transfer = pow2floor(ret);
> +        }
> +
> +        ret = sg_get_max_segments(s->fd);
> +        if (ret > 0) {
> +            bs->bl.max_transfer = MIN(bs->bl.max_transfer, ret * 
> getpagesize());
>          }
>      }
>  


Ping.

Best regards,
        Maxim Levitsky




reply via email to

[Prev in Thread] Current Thread [Next in Thread]