[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: casereader numbering
From: |
John Darrington |
Subject: |
Re: casereader numbering |
Date: |
Thu, 24 Jul 2008 20:25:39 +0800 |
User-agent: |
Mutt/1.5.18 (2008-05-17) |
On Thu, Jul 24, 2008 at 07:06:05AM +0800, John Darrington wrote:
On Wed, Jul 23, 2008 at 07:24:21AM -0700, Ben Pfaff wrote:
> On initial tests, it appears to work fine, except that I would have
> expected casereader_get_value_cnt on the new casereader to return 1
more
> than that of the old one. But that's not what I am experiencing.
Er, I would expect that too. On inspection, the code looks
correct; I don't see how casereader_get_value_cnt() could return
value different from that. Huh.
Maybe I made a mistake then. I'll have closer look later.
On closer inspection it turns out that the culprit is the function
sort_execute :
struct casereader *
sort_execute (struct casereader *input, struct case_ordering *ordering);
Instead of returning a casereader the same width as INPUT, it returns
one with the width associated with ORDERING.
I don't see any valid reason to for a case_ordering to be aware of the
value_cnt, so I'm proposing this patch which seems to fix this
problem.
diff --git a/src/data/case-ordering.c b/src/data/case-ordering.c
index 7b3948c..c4a716e 100644
--- a/src/data/case-ordering.c
+++ b/src/data/case-ordering.c
@@ -37,8 +37,6 @@ struct sort_key
/* A set of criteria for ordering cases. */
struct case_ordering
{
- size_t value_cnt; /* Number of `union value's per case. */
-
/* Sort keys. */
struct sort_key *keys;
size_t key_cnt;
@@ -49,10 +47,9 @@ struct case_ordering
contains no variables, so that all cases will compare as
equal. */
struct case_ordering *
-case_ordering_create (const struct dictionary *dict)
+case_ordering_create (void)
{
struct case_ordering *co = xmalloc (sizeof *co);
- co->value_cnt = dict_get_next_value_idx (dict);
co->keys = NULL;
co->key_cnt = 0;
return co;
@@ -63,7 +60,6 @@ struct case_ordering *
case_ordering_clone (const struct case_ordering *orig)
{
struct case_ordering *co = xmalloc (sizeof *co);
- co->value_cnt = orig->value_cnt;
co->keys = xmemdup (orig->keys, orig->key_cnt * sizeof *orig->keys);
co->key_cnt = orig->key_cnt;
return co;
@@ -80,15 +76,6 @@ case_ordering_destroy (struct case_ordering *co)
}
}
-/* Returns the number of `union value's in the cases that case
- ordering CO compares (taken from the dictionary used to
- construct it). */
-size_t
-case_ordering_get_value_cnt (const struct case_ordering *co)
-{
- return co->value_cnt;
-}
-
/* Compares cases A and B given case ordering CO and returns a
strcmp()-type result. */
int
diff --git a/src/data/case-ordering.h b/src/data/case-ordering.h
index 026cd89..f49f265 100644
--- a/src/data/case-ordering.h
+++ b/src/data/case-ordering.h
@@ -32,7 +32,7 @@ enum sort_direction
};
/* Creation and destruction. */
-struct case_ordering *case_ordering_create (const struct dictionary *);
+struct case_ordering *case_ordering_create (void);
struct case_ordering *case_ordering_clone (const struct case_ordering *);
void case_ordering_destroy (struct case_ordering *);
diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q
index 5bc88c4..cb63949 100644
--- a/src/language/stats/rank.q
+++ b/src/language/stats/rank.q
@@ -261,7 +261,7 @@ rank_cmd (struct dataset *ds, const struct case_ordering
*sc,
/* Sort this split group by the BY variables as primary
keys and the rank variable as secondary key. */
- ordering = case_ordering_create (d);
+ ordering = case_ordering_create ();
for (j = 0; j < n_group_vars; j++)
case_ordering_add_var (ordering, group_vars[j], SRT_ASCEND);
case_ordering_add_var (ordering,
@@ -778,7 +778,7 @@ cmd_rank (struct lexer *lexer, struct dataset *ds)
/* Put the active file back in its original order. Delete
our sort key, which we don't need anymore. */
{
- struct case_ordering *ordering = case_ordering_create (dataset_dict (ds));
+ struct case_ordering *ordering = case_ordering_create ();
struct casereader *sorted;
case_ordering_add_var (ordering, order, SRT_ASCEND);
/* FIXME: loses error conditions. */
diff --git a/src/language/stats/sort-criteria.c
b/src/language/stats/sort-criteria.c
index c84f71d..fd8c7c5 100644
--- a/src/language/stats/sort-criteria.c
+++ b/src/language/stats/sort-criteria.c
@@ -39,7 +39,7 @@ struct case_ordering *
parse_case_ordering (struct lexer *lexer, const struct dictionary *dict,
bool *saw_direction)
{
- struct case_ordering *ordering = case_ordering_create (dict);
+ struct case_ordering *ordering = case_ordering_create ();
const struct variable **vars = NULL;
size_t var_cnt = 0;
diff --git a/src/math/merge.c b/src/math/merge.c
index d56a78c..4fc7c8d 100644
--- a/src/math/merge.c
+++ b/src/math/merge.c
@@ -44,16 +44,18 @@ struct merge
struct case_ordering *ordering;
struct merge_input inputs[MAX_MERGE_ORDER];
size_t input_cnt;
+ size_t value_cnt;
};
static void do_merge (struct merge *m);
struct merge *
-merge_create (const struct case_ordering *ordering)
+merge_create (const struct case_ordering *ordering, size_t value_cnt)
{
struct merge *m = xmalloc (sizeof *m);
m->ordering = case_ordering_clone (ordering);
m->input_cnt = 0;
+ m->value_cnt = value_cnt;
return m;
}
@@ -95,8 +97,7 @@ merge_make_reader (struct merge *m)
}
else if (m->input_cnt == 0)
{
- size_t value_cnt = case_ordering_get_value_cnt (m->ordering);
- struct casewriter *writer = mem_writer_create (value_cnt);
+ struct casewriter *writer = mem_writer_create (m->value_cnt);
r = casewriter_make_reader (writer);
}
else
@@ -129,7 +130,7 @@ do_merge (struct merge *m)
assert (m->input_cnt > 1);
- w = tmpfile_writer_create (case_ordering_get_value_cnt (m->ordering));
+ w = tmpfile_writer_create (m->value_cnt);
for (i = 0; i < m->input_cnt; i++)
taint_propagate (casereader_get_taint (m->inputs[i].reader),
casewriter_get_taint (w));
diff --git a/src/math/merge.h b/src/math/merge.h
index c9c9c48..18322e8 100644
--- a/src/math/merge.h
+++ b/src/math/merge.h
@@ -18,11 +18,12 @@
#define MATH_MERGE_H 1
#include <stdbool.h>
+#include <stddef.h>
struct case_ordering;
struct casereader;
-struct merge *merge_create (const struct case_ordering *);
+struct merge *merge_create (const struct case_ordering *, size_t);
void merge_destroy (struct merge *);
void merge_append (struct merge *, struct casereader *);
struct casereader *merge_make_reader (struct merge *);
diff --git a/src/math/sort.c b/src/math/sort.c
index e03ef57..10b8a12 100644
--- a/src/math/sort.c
+++ b/src/math/sort.c
@@ -41,6 +41,7 @@ int max_buffers = INT_MAX;
struct sort_writer
{
+ size_t value_cnt;
struct case_ordering *ordering;
struct merge *merge;
struct pqueue *pqueue;
@@ -52,7 +53,7 @@ struct sort_writer
static struct casewriter_class sort_casewriter_class;
-static struct pqueue *pqueue_create (const struct case_ordering *);
+static struct pqueue *pqueue_create (const struct case_ordering *, size_t);
static void pqueue_destroy (struct pqueue *);
static bool pqueue_is_full (const struct pqueue *);
static bool pqueue_is_empty (const struct pqueue *);
@@ -62,15 +63,15 @@ static void pqueue_pop (struct pqueue *, struct ccase *,
casenumber *);
static void output_record (struct sort_writer *);
struct casewriter *
-sort_create_writer (struct case_ordering *ordering)
+sort_create_writer (struct case_ordering *ordering, size_t value_cnt)
{
- size_t value_cnt = case_ordering_get_value_cnt (ordering);
struct sort_writer *sort;
sort = xmalloc (sizeof *sort);
+ sort->value_cnt = value_cnt;
sort->ordering = case_ordering_clone (ordering);
- sort->merge = merge_create (ordering);
- sort->pqueue = pqueue_create (ordering);
+ sort->merge = merge_create (ordering, value_cnt);
+ sort->pqueue = pqueue_create (ordering, value_cnt);
sort->run = NULL;
sort->run_id = 0;
case_nullify (&sort->run_end);
@@ -118,8 +119,7 @@ sort_casewriter_convert_to_reader (struct casewriter
*writer, void *sort_)
if (sort->run == NULL && sort->run_id == 0)
{
/* In-core sort. */
- sort->run = mem_writer_create (case_ordering_get_value_cnt (
- sort->ordering));
+ sort->run = mem_writer_create (casewriter_get_value_cnt (writer));
sort->run_id = 1;
}
while (!pqueue_is_empty (sort->pqueue))
@@ -151,8 +151,7 @@ output_record (struct sort_writer *sort)
}
if (sort->run == NULL)
{
- sort->run = tmpfile_writer_create (case_ordering_get_value_cnt (
- sort->ordering));
+ sort->run = tmpfile_writer_create (sort->value_cnt);
sort->run_id = min_run_id;
}
@@ -176,7 +175,8 @@ static struct casewriter_class sort_casewriter_class =
struct casereader *
sort_execute (struct casereader *input, struct case_ordering *ordering)
{
- struct casewriter *output = sort_create_writer (ordering);
+ struct casewriter *output =
+ sort_create_writer (ordering, casereader_get_value_cnt (input));
casereader_transfer (input, output);
return casewriter_make_reader (output);
}
@@ -201,14 +201,14 @@ static int compare_pqueue_records_minheap (const void *a,
const void *b,
const void *pq_);
static struct pqueue *
-pqueue_create (const struct case_ordering *ordering)
+pqueue_create (const struct case_ordering *ordering, size_t value_cnt)
{
struct pqueue *pq;
pq = xmalloc (sizeof *pq);
pq->ordering = case_ordering_clone (ordering);
pq->record_cap
- = settings_get_workspace_cases (case_ordering_get_value_cnt (ordering));
+ = settings_get_workspace_cases (value_cnt);
if (pq->record_cap > max_buffers)
pq->record_cap = max_buffers;
else if (pq->record_cap < min_buffers)
diff --git a/src/math/sort.h b/src/math/sort.h
index 7f7b2f8..ea2c16b 100644
--- a/src/math/sort.h
+++ b/src/math/sort.h
@@ -25,7 +25,7 @@ struct case_ordering;
extern int min_buffers ;
extern int max_buffers ;
-struct casewriter *sort_create_writer (struct case_ordering *);
+struct casewriter *sort_create_writer (struct case_ordering *, size_t
value_cnt);
struct casereader *sort_execute (struct casereader *, struct case_ordering *);
#endif /* math/sort.h */
--
PGP Public key ID: 1024D/2DE827B3
fingerprint = 8797 A26D 0854 2EAB 0285 A290 8A67 719C 2DE8 27B3
See http://pgp.mit.edu or any PGP keyserver for public key.
signature.asc
Description: Digital signature