bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Feature request: gzip/bzip support for split


From: Chandrakumar Muthaiah
Subject: Feature request: gzip/bzip support for split
Date: Thu, 29 Jan 2009 16:46:02 -0500
User-agent: Thunderbird 2.0.0.19 (X11/20090105)

I would like to propose a feature that allows to gzip/bzip on its way
out during the split and I am also including the patch for the same.

I believe this is going to be really useful when we are dealing with
very large of files. I hope that it will be useful for people out there.

Below is the patch for coreutils 6.9

--- split.c    2007-03-18 17:36:43.000000000 -0400
+++ ../../coreutils-6/src/split.c    2009-01-28 22:26:45.000000000 -0500
@@ -75,6 +76,9 @@
    output file is opened. */
 static bool verbose;

+/* gzip/bzip2 the output file. */
+static int zipoutfile = 0;
+
 /* For long options that have no equivalent short option, use a
    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 enum
@@ -82,6 +86,14 @@
   VERBOSE_OPTION = CHAR_MAX + 1
 };

+struct strvars
+{
+    const char *val;
+};
+
+static struct strvars const zsuffixes[] = {{""},  {".gz"}, {".bz2"}};
+static struct strvars const zipcmds[]   = {{""},  {"gzip"}, {"bzip2"}};
+
 static struct option const longopts[] =
 {
   {"bytes", required_argument, NULL, 'b'},
@@ -90,6 +102,8 @@
   {"suffix-length", required_argument, NULL, 'a'},
   {"numeric-suffixes", no_argument, NULL, 'd'},
   {"verbose", no_argument, NULL, VERBOSE_OPTION},
+  {"gzip", no_argument, NULL, 'z'},
+  {"bzip2", no_argument, NULL, 'j'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -122,6 +136,8 @@
   -C, --line-bytes=SIZE   put at most SIZE bytes of lines per output
file\n\
   -d, --numeric-suffixes  use numeric suffixes instead of alphabetic\n\
   -l, --lines=NUMBER      put NUMBER lines per output file\n\
+  -z, --gzip              gzip output files\n\
+  -j, --bzip2             bzip2 output files\n\
 "), DEFAULT_SUFFIX_LENGTH);
       fputs (_("\
       --verbose           print a diagnostic to standard error just\n\
@@ -194,21 +210,136 @@
     }
 }

-/* Write BYTES bytes at BP to an output file.
-   If NEW_FILE_FLAG is true, open the next output file.
-   Otherwise add to the same output file already in use.  */
+/* Opens a new fd based on the file type seletion
+ */

-static void
-cwrite (bool new_file_flag, const char *bp, size_t bytes)
+#define EXIT_FAILURE 1
+#define O_BINARY 0
+
+static int
+fdpopen ( const char *command, const char *mode)
 {
-  if (new_file_flag)
+    int parent_end, child_end;
+    int pipe_fds[2];
+    pid_t child_pid;
+
+    int do_read = 0;
+    int do_write = 0;
+    int do_cloexec = 0;
+
+    while (*mode != '\0')
+    {
+        switch (*mode++)
+        {
+            case 'r':
+                do_read = 1;
+                break;
+            case 'w':
+                do_write = 1;
+                break;
+            case 'e':
+                do_cloexec = 1;
+                break;
+            default:
+            errout:
+                errno = EINVAL;
+                return -1;
+        }
+    }
+
+    if ((do_read ^ do_write) == 0)
+        goto errout;
+
+    if (pipe (pipe_fds) < 0)
+        return -1;
+
+    if (do_read)
+    {
+        parent_end = pipe_fds[0];
+        child_end = pipe_fds[1];
+    }
+    else
+    {
+        parent_end = pipe_fds[1];
+        child_end = pipe_fds[0];
+    }
+    child_pid = fork ();
+
+    if (child_pid == 0)
     {
+        int child_std_end = do_read ? 1 : 0; /* Make this as the
stdin/stdout file descriptor */
+        close (parent_end);
+
+        if (child_end != child_std_end)
+         {
+             dup2 (child_end, child_std_end);
+            close (child_end);
+        }
+
+        execl ("/bin/sh", "sh", "-c", command, (char *) 0);
+        _exit (127);
+    }
+
+    close (child_end);
+    if (child_pid < 0)
+    {
+        close (parent_end);
+        return -1;
+    }
+
+    if (do_cloexec)
+        fcntl (parent_end, F_SETFD, FD_CLOEXEC);
+
+     return parent_end;
+}
+
+static void
+new_fd_pipe()
+{
+    const char* zipcmd = zipcmds[zipoutfile].val;
+    const char* zsuf   = zsuffixes[zipoutfile].val;
+
+    /* 'gzip > /1/2/3/4/5/outputfile.gz' */
+
+  size_t outzlength = strlen (zipcmd);
+  size_t outlength  = strlen (outfile);
+    size_t zsuflength = strlen (zsuf);
+    size_t tlength    = outzlength + outlength + zsuflength + 3;
+
+    char* outfilez    = xmalloc (tlength + 1);
+    char* ptrpos = outfilez;
+
+    memcpy (ptrpos, zipcmd, outzlength);
+    ptrpos += outzlength;
+    memcpy (ptrpos, " > ", 3);
+    ptrpos += 3;
+    memcpy (ptrpos, outfile, outlength);
+    ptrpos += outlength;
+    memcpy (ptrpos, zsuf, zsuflength);
+    outfile[tlength] = 0;
+
+  if (verbose)
+    fprintf (stderr, _("creating file %s\n"), quote (outfilez));
+
+    output_desc = fdpopen ( outfilez, "we");
+
+  if (output_desc < 0)
+    error (EXIT_FAILURE, errno, "%s", outfilez);
+}
+
+static void
+new_fd_file()
+{
       if (output_desc >= 0 && close (output_desc) < 0)
     error (EXIT_FAILURE, errno, "%s", outfile);

       next_file_name ();
+
+  if(!zipoutfile)
+    {
       if (verbose)
     fprintf (stderr, _("creating file %s\n"), quote (outfile));
+
       output_desc = open (outfile,
               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
               (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
@@ -216,6 +347,29 @@
       if (output_desc < 0)
     error (EXIT_FAILURE, errno, "%s", outfile);
     }
+    else
+    {
+        new_fd_pipe();
+    }
+}
+
+static void
+new_fd(bool new_file_flag)
+{
+  if (!new_file_flag)
+    return;
+
+    new_fd_file();
+}
+
+/* Write BYTES bytes at BP to an output file.
+   If NEW_FILE_FLAG is true, open the next output file.
+   Otherwise add to the same output file already in use.  */
+
+static void
+cwrite (bool new_file_flag, const char *bp, size_t bytes)
+{
+  new_fd(new_file_flag);
   if (full_write (output_desc, bp, bytes) != bytes)
     error (EXIT_FAILURE, errno, "%s", outfile);
 }
@@ -405,7 +559,7 @@
       /* This is the argv-index of the option we will read next.  */
       int this_optind = optind ? optind : 1;

-      c = getopt_long (argc, argv, "0123456789C:a:b:dl:", longopts, NULL);
+      c = getopt_long (argc, argv, "0123456789C:a:b:dl:zj", longopts,
NULL);
       if (c == -1)
     break;

@@ -489,6 +643,14 @@
         }
       break;

+  case 'z':
+    zipoutfile = 1;
+    break;
+
+  case 'j':
+    zipoutfile = 2;
+    break;
+
     case 'd':
       suffix_alphabet = "0123456789";
       break;











reply via email to

[Prev in Thread] Current Thread [Next in Thread]