bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module


From: Jeff liu
Subject: bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module
Date: Mon, 18 Apr 2011 22:15:13 +0800

Hi All,

Please ignore the current patch, I will submit another patch with a few fixes 
soon.


Thanks,
-Jeff

在 2011-2-17,下午9:57, Jeff liu 写道:

> Hello All,
> 
> This is the first try to introduce the SEEK_DATA/SEEK_HOLE support to 
> extent_scan module for efficient sparse file copy on ZFS,  I have delayed it 
> for a long time, sorry for that!
> 
> Below is the code change lists:
> src/extent_scan.h:  add a new structure item 'src_total_size' to "struct 
> extent_info",  since I have to make use of this value to determine
> a file is sparse of not for the initial scan.  If the returns of lseek(fd, 0, 
> SEEK_HOLE) is equal to the src_total_size or large than it, it means the 
> source file
> is definitely not a sparse file or maybe it is a sparse file but it does not 
> make sense for proceeding scan read.
> another change in this file is the signature of extent_scan_init(), just as I 
> mentioned above, it need to accept the src_total_size variable.
> src/extent_scan.c: implement the new exent_scan_read() through 
> SEEK_DATA/SEEK_HOLE, it will be called if those two values are defined at 
> <unistd.h>.
> src/copy.c: pass src_total_size to extent_scan_init().
> 
> On my test environment,  Solaris10, SunOS 5.10 Generic_142910-17, I have 
> tried a few simple cases, they are works to me.
> 
> For now, I am using diff(1) to verify the copy result,  does anyone know some 
> utilities can be used to write the test script?
> I have sent an email to ZFS DEV mail-list to ask this question yesterday,  a 
> nice guy suggest me to use ZDB(http://cuddletech.com/blog/?p=407) for that, 
> I'm
> still study this utility now,   I also noticed there is patch to add 
> SEEK_HOLE/SEEK_DATA support to os module in Python community,  please refer 
> to:
> http://bugs.python.org/file19566/z.patch
> but it require very latest python build I think,  so could anyone give some 
> other advices in this point?
> 
> The patch is shown as following, any help testing and comments are 
> appreciated!!
> 
> 
> Thanks,
> -Jeff
> 
> 
> From: Jie Liu <address@hidden>
> Date: Thu, 17 Feb 2011 21:14:23 +0800
> Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to extent_scan 
> module
> 
> * src/extent_scan.h: add src_total_size to struct extent_info, we need
>   to check the SEEK_HOLE result against it for initial extent scan.
>   modify the extent_scan_init() signature, to add size_t src_total_size.
> * src/extent_scan.c: implement a new extent_scan_read() through SEEK_DATA
>   and SEEK_HOLE.
> * src/copy.c: pass src_total_size to extent_scan_init().
> 
> Signed-off-by: Jie Liu <address@hidden>
> ---
>  src/copy.c        |    2 +-
>  src/extent-scan.c |  113 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  src/extent-scan.h |    9 +++-
>  3 files changed, 120 insertions(+), 4 deletions(-)
> 
> diff --git a/src/copy.c b/src/copy.c
> index 104652d..22b9911 100644
> --- a/src/copy.c
> +++ b/src/copy.c
> @@ -306,7 +306,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t 
> buf_size,
>       We may need this at the end, for a final ftruncate.  */
>    off_t dest_pos = 0;
>  
> -  extent_scan_init (src_fd, &scan);
> +  extent_scan_init (src_fd, src_total_size, &scan);
>  
>    *require_normal_copy = false;
>    bool wrote_hole_at_eof = true;
> diff --git a/src/extent-scan.c b/src/extent-scan.c
> index 1ba59db..ffeab7a 100644
> --- a/src/extent-scan.c
> +++ b/src/extent-scan.c
> @@ -32,13 +32,17 @@
>  /* Allocate space for struct extent_scan, initialize the entries if
>     necessary and return it as the input argument of extent_scan_read().  */
>  extern void
> -extent_scan_init (int src_fd, struct extent_scan *scan)
> +extent_scan_init (int src_fd, size_t src_total_size,
> +                  struct extent_scan *scan)
>  {
>    scan->fd = src_fd;
>    scan->ei_count = 0;
>    scan->scan_start = 0;
>    scan->initial_scan_failed = false;
>    scan->hit_final_extent = false;
> +#if defined(SEEK_HOLE) && defined(SEEK_DATA)
> +  scan->src_total_size = src_total_size;
> +#endif
>  }
>  
>  #ifdef __linux__
> @@ -106,6 +110,113 @@ extent_scan_read (struct extent_scan *scan)
>  
>    return true;
>  }
> +#elif defined(SEEK_HOLE) && defined(SEEK_DATA)
> +extern bool
> +extent_scan_read (struct extent_scan *scan)
> +{
> +  off_t data_pos, hole_pos;
> +  union { struct extent_info ei; char c[4096]; } extent_buf;
> +  struct extent_info *ext_info = &extent_buf.ei;
> +  enum { count = (sizeof extent_buf / sizeof *ext_info) };
> +  verify (count != 0);
> +
> +  memset (&extent_buf, 0, sizeof extent_buf);
> +
> +  if (scan->scan_start == 0)
> +    {
> +# ifdef _PC_MIN_HOLE_SIZE
> +      /* To determine if the underlaying file system support
> +         SEEK_HOLE, if not, fall back to the standard copy.  */
> +      if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0)
> +        {
> +          scan->initial_scan_failed = true;
> +          return false;
> +        }
> +# endif
> +
> +      /* If we have been compiled on an OS that supports SEEK_HOLE
> +         but run on an OS that does not support SEEK_HOLE, we get
> +         EINVAL.  If the underlying filesystem does not support the
> +         SEEK_HOLE call, we get ENOTSUP, fall back to standard copy
> +         in either case.  */
> +      hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE);
> +      if (hole_pos < 0)
> +        {
> +          if (errno == EINVAL || errno == ENOTSUP)
> +            scan->initial_scan_failed = true;
> +          return false;
> +        }
> +
> +      /* Seek back to position 0 first if we detected a real hole.  */
> +      if (hole_pos > 0)
> +        {
> +          off_t tmp_pos;
> +          tmp_pos = lseek (scan->fd, (off_t) 0, SEEK_SET);
> +          if (tmp_pos != (off_t) 0)
> +              return false;
> +
> +          /* The source file is definitely not a sparse file, or it
> +             maybe a sparse file but SEEK_HOLE returns the source file's
> +             total size, fall back to the standard copy too.  */
> +          if (hole_pos >= scan->src_total_size)
> +            {
> +              scan->initial_scan_failed = true;
> +              return false;
> +            }
> +        }
> +    }
> +
> +  unsigned int i = 0;
> +  /* If lseek(2) failed and the errno is set to ENXIO, for
> +     SEEK_DATA there are no more data regions past the supplied
> +     offset.  For SEEK_HOLE, there are no more holes past the 
> +     supplied offset.  Set scan->hit_final_extent to true for
> +     either case.  */
> +  do {
> +    data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA);
> +    if (data_pos < 0)
> +      {
> +        if (errno != ENXIO)
> +          return false;
> +        else
> +          {
> +            scan->hit_final_extent = true;
> +            return true;
> +          }
> +      }
> +
> +    hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE);
> +    if (hole_pos < 0)
> +      {
> +        if (errno != ENXIO)
> +          return false;
> +        else
> +          {
> +            scan->hit_final_extent = true;
> +            return true;
> +          }
> +      }
> +
> +    ext_info[i].ext_logical = data_pos;
> +    ext_info[i].ext_length = hole_pos - data_pos;
> +    scan->scan_start = hole_pos;
> +    ++i;
> +  } while (scan->scan_start < scan->src_total_size && i < count);
> +
> +  i--;
> +  scan->ei_count = i;
> +  scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
> +
> +  for (i = 0; i < scan->ei_count; i++)
> +    {
> +      assert (ext_info[i].ext_logical <= OFF_T_MAX);
> +
> +      scan->ext_info[i].ext_logical = ext_info[i].ext_logical;
> +      scan->ext_info[i].ext_length = ext_info[i].ext_length;
> +    }
> +
> +  return true; 
> +}
>  #else
>  extern bool
>  extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED)
> diff --git a/src/extent-scan.h b/src/extent-scan.h
> index 4724b25..a271b95 100644
> --- a/src/extent-scan.h
> +++ b/src/extent-scan.h
> @@ -18,7 +18,6 @@
>  
>  #ifndef EXTENT_SCAN_H
>  # define EXTENT_SCAN_H
> -
>  /* Structure used to store information of each extent.  */
>  struct extent_info
>  {
> @@ -38,6 +37,11 @@ struct extent_scan
>    /* File descriptor of extent scan run against.  */
>    int fd;
>  
> +#if defined(SEEK_DATA) && defined(SEEK_HOLE)
> +  /* Source file size, i.e, (struct stat) &statbuf.st_size.  */
> +  size_t src_total_size;
> +#endif
> +
>    /* Next scan start offset.  */
>    off_t scan_start;
>  
> @@ -55,7 +59,8 @@ struct extent_scan
>    struct extent_info *ext_info;
>  };
>  
> -void extent_scan_init (int src_fd, struct extent_scan *scan);
> +void extent_scan_init (int src_fd, size_t src_total_size,
> +                       struct extent_scan *scan);
>  
>  bool extent_scan_read (struct extent_scan *scan);
>  
> -- 
> 1.7.4



reply via email to

[Prev in Thread] Current Thread [Next in Thread]