[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gawk-diffs] [SCM] gawk branch, master, updated. c0583c31b8d47bd55e9340e
From: |
Arnold Robbins |
Subject: |
[gawk-diffs] [SCM] gawk branch, master, updated. c0583c31b8d47bd55e9340e7434cf9ccf7336f6d |
Date: |
Thu, 07 Apr 2011 18:45:47 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, master has been updated
via c0583c31b8d47bd55e9340e7434cf9ccf7336f6d (commit)
from 00068b77275315c8c16e32c39af376dfcf5a2374 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=c0583c31b8d47bd55e9340e7434cf9ccf7336f6d
commit c0583c31b8d47bd55e9340e7434cf9ccf7336f6d
Author: Arnold D. Robbins <address@hidden>
Date: Thu Apr 7 21:44:53 2011 +0300
More improvements to array sorting.
diff --git a/ChangeLog b/ChangeLog
index a307a16..01176a7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Thu Apr 7 21:38:08 2011 Arnold D. Robbins <address@hidden>
+
+ * array.c (merge): Use sort_cmp_nodes for asort/asorti.
+ See test/arraysort.awk test 1.
+
+Thu Apr 7 10:48:21 2011 Pat Rankin <address@hidden>
+
+ * array.c (sort_cmp_nodes): New routine. Unlike cmp_nodes, numbers
+ are less than strings instead of being formatted and then compared.
+ (sort_up_value): Use it.
+
Sun Apr 3 22:18:26 2011 Arnold D. Robbins <address@hidden>
* README, FUTURE: Minor edits.
diff --git a/array.c b/array.c
index 6817a37..5d1c588 100644
--- a/array.c
+++ b/array.c
@@ -61,6 +61,7 @@ struct sort_num { AWKNUM sort_numbr; NODE *sort_index; };
static qsort_compfunc sort_selection(NODE *, const char *, int);
static size_t sort_match(const char *, size_t, const char *);
static int sort_ignorecase(const char *, size_t, const char *, size_t);
+static int sort_cmp_nodes(NODE *, NODE *);
/* qsort callback routines */
static int sort_up_index_string(const void *, const void *);
@@ -1035,11 +1036,13 @@ merge(NODE *left, NODE *right)
NODE *ans, *cur;
/*
- * The use of cmp_nodes() here means that IGNORECASE influences the
- * comparison. This is OK, but it may be surprising. This comment
- * serves to remind us that we know about this and that it's OK.
+ * Use sort_cmp_nodes(): see the comment there as to why. That
+ * function will call cmp_nodes() on strings, which means that
+ * IGNORECASE influences the comparison. This is OK, but it may
+ * be surprising. This comment serves to remind us that we
+ * know about this and that it's OK.
*/
- if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
+ if (sort_cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
ans = cur = left;
left = left->ahnext;
} else {
@@ -1300,6 +1303,56 @@ comp_func(const void *p1, const void *p2)
return sort_up_index_string(p1, p2);
}
+/*
+ * sort_cmp_nodes --- compare two nodes. Unlike cmp_nodes(), we don't
+ * mix numeric and string comparisons. Numbers compare as less than
+ * strings, which in turn compare as less than sub-arrays. All
+ * sub-arrays compare equal to each other, regardless of their contents.
+ */
+
+static int
+sort_cmp_nodes(NODE *n1, NODE *n2)
+{
+ int ret;
+
+ if (n1->type == Node_var_array) {
+ /* return 0 if n2 is a sub-array too, else return 1 */
+ ret = (n2->type != Node_var_array);
+ } else if (n2->type == Node_var_array) {
+ ret = -1; /* n1 (scalar) < n2 (sub-array) */
+ } else {
+ /*
+ * Both scalars; we can't settle for a regular
+ * MAYBE_NUM comparison because that can cause
+ * problems when strings fall between numbers
+ * whose value makes them be ordered differently
+ * when handled as strings than as numbers.
+ * For example, { 10, 5, "3D" } yields a cycle:
+ * 5 < 10, "10" < "3D", "3D" < "5", so would
+ * produce different sort results depending upon
+ * the order in which comparisons between pairs
+ * of values happen to be performed. We force
+ * numbers to be less than strings in order to
+ * avoid that: 5 < 10, 10 < "3D", 5 < "3D".
+ */
+ /* first resolve any undecided types */
+ if (n1->flags & MAYBE_NUM)
+ (void) force_number(n1);
+ if (n2->flags & MAYBE_NUM)
+ (void) force_number(n2);
+ /*
+ * If both types are the same, use cmp_nodes();
+ * otherwise, force the numeric value to be less
+ * than the string one.
+ */
+ if ((n1->flags & NUMBER) == (n2->flags & NUMBER))
+ ret = cmp_nodes(n1, n2);
+ else
+ ret = (n1->flags & NUMBER) ? -1 : 1;
+ }
+ return ret;
+}
+
/* sort_ignorecase --- case insensitive string comparison from cmp_nodes() */
static int
@@ -1446,14 +1499,7 @@ sort_down_index_number(const void *p1, const void *p2)
return -sort_up_index_number(p1, p2);
}
-/*
- * sort_up_value --- qsort comparison function; ascending values;
- * standard awk comparison rules apply, so might be by number or might
- * be by string depending upon the values involved; if a value happens
- * to be a sub-array, it will compare greater than any scalar (last in
- * ascending order) and equal with any other sub-array (disambiguated
- * by index string, the same as with equal scalar values)
- */
+/* sort_up_value --- qsort comparison function; ascending values */
static int
sort_up_value(const void *p1, const void *p2)
@@ -1471,17 +1517,8 @@ sort_up_value(const void *p1, const void *p2)
/* and we want to compare the element values they refer to */
val1 = idx1->hvalue;
val2 = idx2->hvalue;
- if (val1->type == Node_var_array) {
- if (val2->type == Node_var_array)
- ret = 0; /* both sub-arrays, hence tie */
- else
- ret = 1; /* val1 (sub-array) > val2 (scalar) */
- } else {
- if (val2->type == Node_var_array)
- ret = -1; /* val1 (scalar) < val2 (sub-array) */
- else
- ret = cmp_nodes(val1, val2); /* both scalars */
- }
+
+ ret = sort_cmp_nodes(val1, val2);
/* disambiguate ties to force a deterministic order */
if (ret == 0)
diff --git a/test/ChangeLog b/test/ChangeLog
index ee4c679..2f5a461 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,6 +1,10 @@
+Thu Apr 7 21:44:06 2011 Arnold D. Robbins <address@hidden>
+
+ * arraysort.awk, arraysort.ok: Added more test cases.
+
Fri Apr 1 11:56:54 2011 Arnold D. Robbins <address@hidden>
- * arraysort.awk, arraysort.ok: New files from John Haque,
+ * arraysort.awk, arraysort.ok: New files from John Haque,
edited somewhat.
* Makefile.am (arraysort): New test.
diff --git a/test/arraysort.awk b/test/arraysort.awk
index 2853552..912d588 100644
--- a/test/arraysort.awk
+++ b/test/arraysort.awk
@@ -1,11 +1,81 @@
-# Date: Thu, 31 Mar 2011 12:29:09 -0600
# From: address@hidden
+# March and April 2010
-BEGIN { a[100]=a[1]=a["x"]=a["y"]=1; PROCINFO["sorted_in"]="num";
-for (i in a) print i, a[i] }
+BEGIN {
+ print "--- test1 ---"
+ a[1] = b[3] = 5
+ a[2] = b[2] = "3D"
+ a[3] = b[1] = 10
+ asort(a)
+ asort(b)
+ for (i = 1; i <= 3; ++i)
+ printf(" %2s %-2s\n", (a[i] ""), (b[i] ""))
-BEGIN { a[100]=a[1]=a["x"]=1; PROCINFO["sorted_in"]="num";
-for (i in a) print i, a[i] }
+ delete a
+ delete b
+}
-BEGIN { a[0]=a[100]=a[1]=a["x"]=1; PROCINFO["sorted_in"]="num";
-for (i in a) print i, a[i] }
+BEGIN {
+ print "--- test2 ---"
+ a[100] = a[1] = a["x"] = a["y"] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print i, a[i]
+ delete a
+}
+
+BEGIN {
+ print "--- test3 ---"
+ a[100] = a[1] = a["x"] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print i, a[i]
+ delete a
+}
+
+BEGIN {
+ print "--- test4 ---"
+ a[0] = a[100] = a[1] = a["x"] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print i, a[i]
+ delete a
+}
+
+BEGIN {
+ print "--- test5 ---"
+ a[""] = a["y"] = a[0] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print i, a[i]
+ delete a
+}
+
+BEGIN {
+ print "--- test6 ---"
+ a[2] = a[1] = a[4] = a["3 "] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print "\""i"\""
+ delete a
+}
+
+
+BEGIN {
+ print "--- test7 ---"
+ n = split(" 4 \n 3\n3D\nD3\n3\n0\n2\n4\n1\n5", a, "\n")
+ for (i = 1; i <= n; i++)
+ b[a[i]] = a[i]
+ print "--asc ind str--"
+ PROCINFO["sorted_in"] = "asc ind str"
+ for (i in b)
+ print "|"i"|"b[i]
+ print "--asc ind num--"
+ PROCINFO["sorted_in"] = "asc ind num"
+ for (i in b)
+ print "|"i"|"b[i]
+ print "--asc val num--"
+ PROCINFO["sorted_in"] = "asc val num"
+ for (i in b)
+ print "|"i"|"b[i]
+}
diff --git a/test/arraysort.ok b/test/arraysort.ok
index 2306e8a..bf48ecd 100644
--- a/test/arraysort.ok
+++ b/test/arraysort.ok
@@ -1,13 +1,62 @@
+--- test1 ---
+ 5 5
+ 10 10
+ 3D 3D
+--- test2 ---
x 1
y 1
1 1
100 1
+--- test3 ---
x 1
-y 1
1 1
100 1
+--- test4 ---
0 1
x 1
-y 1
1 1
100 1
+--- test5 ---
+ 1
+0 1
+y 1
+--- test6 ---
+"1"
+"2"
+"3 "
+"4"
+--- test7 ---
+--asc ind str--
+| 3| 3
+| 4 | 4
+|0|0
+|1|1
+|2|2
+|3|3
+|3D|3D
+|4|4
+|5|5
+|D3|D3
+--asc ind num--
+|0|0
+|D3|D3
+|1|1
+|2|2
+| 3| 3
+|3|3
+|3D|3D
+| 4 | 4
+|4|4
+|5|5
+--asc val num--
+gawk: arraysort.awk:79: warning: `PROCINFO["sorted_in"]': sorting by value
can't be forced to use numeric comparison
+|4|4
+|5|5
+|D3|D3
+|3D|3D
+| 3| 3
+|0|0
+|1|1
+|2|2
+|3|3
+| 4 | 4
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 11 +++++++
array.c | 83 +++++++++++++++++++++++++++++++++++++--------------
test/ChangeLog | 6 +++-
test/arraysort.awk | 84 +++++++++++++++++++++++++++++++++++++++++++++++----
test/arraysort.ok | 53 +++++++++++++++++++++++++++++++-
5 files changed, 204 insertions(+), 33 deletions(-)
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [gawk-diffs] [SCM] gawk branch, master, updated. c0583c31b8d47bd55e9340e7434cf9ccf7336f6d,
Arnold Robbins <=