[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#37859: [PATCH] shuf: fix bug with ‘-r -n 0’
From: |
Paul Eggert |
Subject: |
bug#37859: [PATCH] shuf: fix bug with ‘-r -n 0’ |
Date: |
Mon, 21 Oct 2019 13:21:08 -0700 |
‘shuf -r -n 0 file’ would mistakenly read from standard input.
Problem reported by my student Jingnong Qu while reimplementing a
shuf subset in Python as an exercise in UCLA Computer Science 35L:
https://web.cs.ucla.edu/classes/fall19/cs35L/assign/assign3.html
* NEWS: Mention the fix. Also, ASCIIfy a previous item.
* src/shuf.c (main): Fix bug.
* tests/misc/shuf.sh: Add a test case for the bug.
---
NEWS | 5 ++++-
src/shuf.c | 53 +++++++++++++++++++++++++---------------------
tests/misc/shuf.sh | 4 ++++
3 files changed, 37 insertions(+), 25 deletions(-)
diff --git a/NEWS b/NEWS
index fe38e80d4..476c02aed 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,9 @@ GNU coreutils NEWS -*-
outline -*-
(like Solaris 10 and Solaris 11).
[bug introduced in coreutils-8.31]
+ 'shuf -r -n 0 file' no longer mistakenly reads from standard input.
+ [bug introduced with the --repeat feature in coreutils-8.22]
+
split no longer reports a "output file suffixes exhausted" error
when the specified number of files is evenly divisible by 10, 16, 26,
for --numeric, --hex, or default alphabetic suffixes respectively.
@@ -210,7 +213,7 @@ GNU coreutils NEWS -*-
outline -*-
'mv -n A B' no longer suffers from a race condition that can
overwrite a simultaneously-created B. This bug fix requires
platform support for the renameat2 or renameatx_np syscalls, found
- in recent Linux and macOS kernels. As a side effect, ‘mv -n A A’
+ in recent Linux and macOS kernels. As a side effect, 'mv -n A A'
now silently does nothing if A exists.
[bug introduced with coreutils-7.1]
diff --git a/src/shuf.c b/src/shuf.c
index 968d3641c..6a1aa0158 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -493,7 +493,12 @@ main (int argc, char **argv)
}
/* Prepare input. */
- if (echo)
+ if (head_lines == 0)
+ {
+ n_lines = 0;
+ line = NULL;
+ }
+ else if (echo)
{
input_from_argv (operand, n_operands, eolbyte);
n_lines = n_operands;
@@ -507,54 +512,54 @@ main (int argc, char **argv)
else
{
/* If an input file is specified, re-open it as stdin. */
- if (n_operands == 1)
- if (! (STREQ (operand[0], "-") || ! head_lines
- || freopen (operand[0], "r", stdin)))
- die (EXIT_FAILURE, errno, "%s", quotef (operand[0]));
+ if (n_operands == 1
+ && ! (STREQ (operand[0], "-")
+ || freopen (operand[0], "r", stdin)))
+ die (EXIT_FAILURE, errno, "%s", quotef (operand[0]));
fadvise (stdin, FADVISE_SEQUENTIAL);
- if (! repeat && head_lines != SIZE_MAX
- && (! head_lines || input_size () > RESERVOIR_MIN_INPUT))
+ if (repeat || head_lines == SIZE_MAX
+ || input_size () <= RESERVOIR_MIN_INPUT)
{
- use_reservoir_sampling = true;
- n_lines = SIZE_MAX; /* unknown number of input lines, for now. */
+ n_lines = read_input (stdin, eolbyte, &input_lines);
+ line = input_lines;
}
else
{
- n_lines = read_input (stdin, eolbyte, &input_lines);
- line = input_lines;
+ use_reservoir_sampling = true;
+ n_lines = SIZE_MAX; /* unknown number of input lines, for now. */
}
}
- if (! repeat)
- head_lines = MIN (head_lines, n_lines);
+ /* The adjusted head line count; can be less than HEAD_LINES if the
+ input is small and if not repeating. */
+ size_t ahead_lines = repeat || head_lines < n_lines ? head_lines : n_lines;
randint_source = randint_all_new (random_source,
(use_reservoir_sampling || repeat
? SIZE_MAX
- : randperm_bound (head_lines, n_lines)));
+ : randperm_bound (ahead_lines, n_lines)));
if (! randint_source)
die (EXIT_FAILURE, errno, "%s", quotef (random_source));
if (use_reservoir_sampling)
{
/* Instead of reading the entire file into 'line',
- use reservoir-sampling to store just "head_lines" random lines. */
- n_lines = read_input_reservoir_sampling (stdin, eolbyte, head_lines,
+ use reservoir-sampling to store just AHEAD_LINES random lines. */
+ n_lines = read_input_reservoir_sampling (stdin, eolbyte, ahead_lines,
randint_source, &reservoir);
- head_lines = n_lines;
+ ahead_lines = n_lines;
}
/* Close stdin now, rather than earlier, so that randint_all_new
doesn't have to worry about opening something other than
stdin. */
- if (! (echo || input_range)
- && (fclose (stdin) != 0))
+ if (! (head_lines == 0 || echo || input_range || fclose (stdin) == 0))
die (EXIT_FAILURE, errno, _("read error"));
if (!repeat)
- permutation = randperm_new (randint_source, head_lines, n_lines);
+ permutation = randperm_new (randint_source, ahead_lines, n_lines);
if (outfile && ! freopen (outfile, "w", stdout))
die (EXIT_FAILURE, errno, "%s", quotef (outfile));
@@ -569,10 +574,10 @@ main (int argc, char **argv)
if (n_lines == 0)
die (EXIT_FAILURE, 0, _("no lines to repeat"));
if (input_range)
- i = write_random_numbers (randint_source, head_lines,
+ i = write_random_numbers (randint_source, ahead_lines,
lo_input, hi_input, eolbyte);
else
- i = write_random_lines (randint_source, head_lines, line, n_lines);
+ i = write_random_lines (randint_source, ahead_lines, line,
n_lines);
}
}
else
@@ -580,10 +585,10 @@ main (int argc, char **argv)
if (use_reservoir_sampling)
i = write_permuted_output_reservoir (n_lines, reservoir, permutation);
else if (input_range)
- i = write_permuted_numbers (head_lines, lo_input,
+ i = write_permuted_numbers (ahead_lines, lo_input,
permutation, eolbyte);
else
- i = write_permuted_lines (head_lines, line, permutation);
+ i = write_permuted_lines (ahead_lines, line, permutation);
}
if (i != 0)
diff --git a/tests/misc/shuf.sh b/tests/misc/shuf.sh
index bbc54a9d2..c866f5de5 100755
--- a/tests/misc/shuf.sh
+++ b/tests/misc/shuf.sh
@@ -39,6 +39,10 @@ compare in out > /dev/null && { fail=1; echo "not random?"
1>&2; }
sort -n out > out1
compare in out1 || { fail=1; echo "not a permutation" 1>&2; }
+# Exercize shuf's -r -n 0 options, with no standard input.
+shuf -r -n 0 in <&- >out || fail=1
+compare /dev/null out || fail=1
+
# Exercise shuf's -e option.
t=$(shuf -e a b c d e | sort | fmt)
test "$t" = 'a b c d e' || { fail=1; echo "not a permutation" 1>&2; }
--
2.21.0
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- bug#37859: [PATCH] shuf: fix bug with ‘-r -n 0’,
Paul Eggert <=