[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[pre-lexer 20/21] data-in: Make data_in() parameters more uniform.
From: |
Ben Pfaff |
Subject: |
[pre-lexer 20/21] data-in: Make data_in() parameters more uniform. |
Date: |
Thu, 23 Sep 2010 21:20:56 -0700 |
data_in() essentially takes an input encoding and an output encoding but
the parameters were in inconsistent order, inconsistently named, and had
inconsistent types. This commit changes all that to, I hope, be easier to
understand.
---
perl-module/PSPP.xs | 5 ++---
src/data/data-in.c | 30 +++++++++++++++---------------
src/data/data-in.h | 7 +++----
src/language/data-io/data-parser.c | 27 ++++++++++++++-------------
src/language/expressions/operations.def | 2 +-
src/language/lexer/value-parser.c | 2 +-
src/language/stats/flip.c | 11 +++++------
src/language/xforms/recode.c | 9 ++++-----
src/ui/gui/helper.c | 3 ++-
src/ui/gui/psppire-data-store.c | 3 ++-
src/ui/gui/text-data-import-dialog.c | 4 ++--
src/ui/syntax-gen.c | 2 +-
12 files changed, 52 insertions(+), 53 deletions(-)
diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs
index 0e918a9..bf90d98 100644
--- a/perl-module/PSPP.xs
+++ b/perl-module/PSPP.xs
@@ -655,9 +655,8 @@ CODE:
{
struct substring ss = ss_cstr (SvPV_nolen (sv));
if ( ! data_in (ss, LEGACY_NATIVE, ifmt->type, 0, 0,
- sfi->dict,
- case_data_rw (c, v),
- var_get_width (v)) )
+ case_data_rw (c, v), var_get_width (v),
+ dict_get_encoding (sfi->dict)))
{
RETVAL = 0;
goto finish;
diff --git a/src/data/data-in.c b/src/data/data-in.c
index b8226b3..de19d67 100644
--- a/src/data/data-in.c
+++ b/src/data/data-in.c
@@ -79,17 +79,16 @@ static bool trim_spaces_and_check_missing (struct data_in
*);
static int hexit_value (int c);
/* Parses the characters in INPUT, which are encoded in the given
- ENCODING, according to FORMAT. Stores the parsed
- representation in OUTPUT, which the caller must have
- initialized with the given WIDTH (0 for a numeric field,
- otherwise the string width).
- Iff FORMAT is a string format, then DICT must be a pointer
- to the dictionary associated with OUTPUT. Otherwise, DICT
- may be null. */
+ INPUT_ENCODING, according to FORMAT.
+
+ Stores the parsed representation in OUTPUT, which the caller must have
+ initialized with the given WIDTH (0 for a numeric field, otherwise the
+ string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the
+ correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */
bool
-data_in (struct substring input, const char *encoding,
+data_in (struct substring input, const char *input_encoding,
enum fmt_type format, int first_column, int last_column,
- const struct dictionary *dict, union value *output, int width)
+ union value *output, int width, const char *output_encoding)
{
static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
{
@@ -146,13 +145,14 @@ data_in (struct substring input, const char *encoding,
else
{
/* Use the final output encoding. */
- dest_encoding = dict_get_encoding (dict);
+ dest_encoding = output_encoding;
}
}
if (dest_encoding != NULL)
{
- i.input = recode_substring_pool (dest_encoding, encoding, input, NULL);
+ i.input = recode_substring_pool (dest_encoding, input_encoding, input,
+ NULL);
s = i.input.string;
}
else
@@ -209,7 +209,7 @@ number_has_implied_decimals (const char *s, enum fmt_type
type)
}
static bool
-has_implied_decimals (struct substring input, const char *encoding,
+has_implied_decimals (struct substring input, const char *input_encoding,
enum fmt_type format)
{
bool retval;
@@ -237,7 +237,7 @@ has_implied_decimals (struct substring input, const char
*encoding,
return false;
}
- s = recode_string (LEGACY_NATIVE, encoding,
+ s = recode_string (LEGACY_NATIVE, input_encoding,
ss_data (input), ss_length (input));
retval = (format == FMT_Z
? strchr (s, '.') == NULL
@@ -255,11 +255,11 @@ has_implied_decimals (struct substring input, const char
*encoding,
If it is appropriate, this function modifies the numeric value in OUTPUT. */
void
-data_in_imply_decimals (struct substring input, const char *encoding,
+data_in_imply_decimals (struct substring input, const char *input_encoding,
enum fmt_type format, int d, union value *output)
{
if (d > 0 && output->f != SYSMIS
- && has_implied_decimals (input, encoding, format))
+ && has_implied_decimals (input, input_encoding, format))
output->f /= pow (10., d);
}
diff --git a/src/data/data-in.h b/src/data/data-in.h
index af62b3a..dbf2fa7 100644
--- a/src/data/data-in.h
+++ b/src/data/data-in.h
@@ -25,10 +25,9 @@
union value;
struct dictionary;
-bool data_in (struct substring input, const char *encoding,
- enum fmt_type, int first_column, int last_column,
- const struct dictionary *dict,
- union value *output, int width);
+bool data_in (struct substring input, const char *input_encoding,
+ enum fmt_type, int first_column, int last_column,
+ union value *output, int width, const char *output_encoding);
void data_in_imply_decimals (struct substring input, const char *encoding,
enum fmt_type format, int d, union value *output);
diff --git a/src/language/data-io/data-parser.c
b/src/language/data-io/data-parser.c
index 0802bba..800ae57 100644
--- a/src/language/data-io/data-parser.c
+++ b/src/language/data-io/data-parser.c
@@ -509,7 +509,8 @@ static bool
parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
struct ccase *c)
{
- const char *encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *output_encoding = dict_get_encoding (parser->dict);
struct field *f;
int row;
@@ -536,12 +537,12 @@ parse_fixed (const struct data_parser *parser, struct
dfm_reader *reader,
f->format.w);
union value *value = case_data_rw_idx (c, f->case_idx);
- data_in (s, encoding, f->format.type,
+ data_in (s, input_encoding, f->format.type,
f->first_column, f->first_column + f->format.w,
- parser->dict, value, fmt_var_width (&f->format));
+ value, fmt_var_width (&f->format), output_encoding);
- data_in_imply_decimals (s, encoding, f->format.type, f->format.d,
- value);
+ data_in_imply_decimals (s, input_encoding, f->format.type,
+ f->format.d, value);
}
dfm_forward_record (reader);
@@ -557,7 +558,8 @@ static bool
parse_delimited_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- const char *encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *output_encoding = dict_get_encoding (parser->dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct field *f;
@@ -582,10 +584,9 @@ parse_delimited_span (const struct data_parser *parser,
}
}
- data_in (s, encoding, f->format.type, first_column, last_column,
- parser->dict,
+ data_in (s, input_encoding, f->format.type, first_column, last_column,
case_data_rw_idx (c, f->case_idx),
- fmt_var_width (&f->format));
+ fmt_var_width (&f->format), output_encoding);
}
ds_destroy (&tmp);
return true;
@@ -598,7 +599,8 @@ static bool
parse_delimited_no_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- const char *encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *output_encoding = dict_get_encoding (parser->dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct substring s;
struct field *f, *end;
@@ -623,10 +625,9 @@ parse_delimited_no_span (const struct data_parser *parser,
goto exit;
}
- data_in (s, encoding, f->format.type, first_column, last_column,
- parser->dict,
+ data_in (s, input_encoding, f->format.type, first_column, last_column,
case_data_rw_idx (c, f->case_idx),
- fmt_var_width (&f->format));
+ fmt_var_width (&f->format), output_encoding);
}
s = dfm_get_record (reader);
diff --git a/src/language/expressions/operations.def
b/src/language/expressions/operations.def
index a945f7b..b778325 100644
--- a/src/language/expressions/operations.def
+++ b/src/language/expressions/operations.def
@@ -581,7 +581,7 @@ string function RTRIM (string s, string c)
function NUMBER (string s, ni_format f)
{
union value out;
- data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, 0, 0, NULL, &out, 0);
+ data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, 0, 0, &out, 0, NULL);
data_in_imply_decimals (s, LEGACY_NATIVE, f->type, f->d, &out);
return out.f;
}
diff --git a/src/language/lexer/value-parser.c
b/src/language/lexer/value-parser.c
index c2020d3..8cbbab4 100644
--- a/src/language/lexer/value-parser.c
+++ b/src/language/lexer/value-parser.c
@@ -105,7 +105,7 @@ parse_number (struct lexer *lexer, double *x, const enum
fmt_type *format)
union value v;
assert (! (fmt_get_category (*format) & ( FMT_CAT_STRING )));
data_in (ds_ss (lex_tokstr (lexer)), LEGACY_NATIVE, *format, 0, 0,
- NULL, &v, 0);
+ &v, 0, NULL);
lex_get (lexer);
*x = v.f;
if (*x == SYSMIS)
diff --git a/src/language/stats/flip.c b/src/language/stats/flip.c
index 3474979..7390b08 100644
--- a/src/language/stats/flip.c
+++ b/src/language/stats/flip.c
@@ -397,6 +397,7 @@ static struct ccase *
flip_casereader_read (struct casereader *reader, void *flip_)
{
struct flip_pgm *flip = flip_;
+ const char *encoding;
struct ccase *c;
size_t i;
@@ -404,12 +405,10 @@ flip_casereader_read (struct casereader *reader, void
*flip_)
return false;
c = case_create (casereader_get_proto (reader));
- data_in (ss_cstr (flip->old_names.names[flip->cases_read]),
dict_get_encoding (flip->dict),
- FMT_A,
- 0, 0,
- flip->dict,
- case_data_rw_idx (c, 0), 8);
-
+ encoding = dict_get_encoding (flip->dict);
+ data_in (ss_cstr (flip->old_names.names[flip->cases_read]), encoding,
+ FMT_A, 0, 0, case_data_rw_idx (c, 0), 8, encoding);
+
for (i = 0; i < flip->n_cases; i++)
{
double in;
diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c
index 7f45865..c9787b1 100644
--- a/src/language/xforms/recode.c
+++ b/src/language/xforms/recode.c
@@ -159,6 +159,7 @@ cmd_recode (struct lexer *lexer, struct dataset *ds)
/* Create destination variables, if needed.
This must be the final step; otherwise we'd have to
delete destination variables on failure. */
+ trns->dst_dict = dataset_dict (ds);
if (trns->src_vars != trns->dst_vars)
create_dst_vars (trns, dataset_dict (ds));
@@ -545,8 +546,6 @@ create_dst_vars (struct recode_trns *trns, struct
dictionary *dict)
{
size_t i;
- trns->dst_dict = dict;
-
for (i = 0; i < trns->var_cnt; i++)
{
const struct variable **var = &trns->dst_vars[i];
@@ -608,8 +607,9 @@ static const struct map_out *
find_src_string (struct recode_trns *trns, const uint8_t *value,
const struct variable *src_var)
{
- struct mapping *m;
+ const char *encoding = dict_get_encoding (trns->dst_dict);
int width = var_get_width (src_var);
+ struct mapping *m;
for (m = trns->mappings; m < trns->mappings + trns->map_cnt; m++)
{
@@ -632,8 +632,7 @@ find_src_string (struct recode_trns *trns, const uint8_t
*value,
msg_disable ();
match = data_in (ss_buffer (CHAR_CAST_BUG (char *, value), width),
- LEGACY_NATIVE, FMT_F, 0, 0, trns->dst_dict,
- &uv, 0);
+ LEGACY_NATIVE, FMT_F, 0, 0, &uv, 0, encoding);
msg_enable ();
out->value.f = uv.f;
break;
diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c
index 70e3c27..b8936e6 100644
--- a/src/ui/gui/helper.c
+++ b/src/ui/gui/helper.c
@@ -98,7 +98,8 @@ text_to_value (const gchar *text,
value_init (val, width);
msg_disable ();
- data_in (ss_cstr (text), UTF8, format->type, 0, 0, dict->dict, val, width);
+ data_in (ss_cstr (text), UTF8, format->type, 0, 0, val, width,
+ dict_get_encoding (dict->dict));
msg_enable ();
return val;
diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c
index fb9c817..7d26f56 100644
--- a/src/ui/gui/psppire-data-store.c
+++ b/src/ui/gui/psppire-data-store.c
@@ -959,7 +959,8 @@ psppire_data_store_data_in (PsppireDataStore *ds,
casenumber casenum, gint idx,
FALSE);
value_init (&value, width);
ok = (datasheet_get_value (ds->datasheet, casenum, idx, &value)
- && data_in (input, UTF8, fmt->type, 0, 0, dict->dict, &value, width)
+ && data_in (input, UTF8, fmt->type, 0, 0, &value, width,
+ dict_get_encoding (dict->dict))
&& datasheet_put_value (ds->datasheet, casenum, idx, &value));
value_destroy (&value, width);
diff --git a/src/ui/gui/text-data-import-dialog.c
b/src/ui/gui/text-data-import-dialog.c
index 070c4f6..c9227ff 100644
--- a/src/ui/gui/text-data-import-dialog.c
+++ b/src/ui/gui/text-data-import-dialog.c
@@ -1777,8 +1777,8 @@ parse_field (struct import_assistant *ia,
{
msg_disable ();
- if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, ia->formats.dict,
- &val, var_get_width (var)))
+ if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, &val,
+ var_get_width (var), dict_get_encoding (ia->formats.dict)))
{
char fmt_string[FMT_STRING_LEN_MAX + 1];
fmt_to_string (in, fmt_string);
diff --git a/src/ui/syntax-gen.c b/src/ui/syntax-gen.c
index b204fbf..aae8c97 100644
--- a/src/ui/syntax-gen.c
+++ b/src/ui/syntax-gen.c
@@ -155,7 +155,7 @@ syntax_gen_number (struct string *output,
msg_disable ();
/* FIXME: UTF8 encoded strings will fail here */
ok = data_in (ss_cstr (s), LEGACY_NATIVE,
- format->type, 0, 0, NULL, &v_out, 0);
+ format->type, 0, 0, &v_out, 0, NULL);
msg_enable ();
if (ok && v_out.f == number)
{
--
1.7.1
- [pre-lexer 00/21] preparation for work on lexer, Ben Pfaff, 2010/09/24
- [pre-lexer 01/21] str: Make ss_alloc_substring() allocate null-terminated strings., Ben Pfaff, 2010/09/24
- [pre-lexer 13/21] command: Remove superfluous trailing spaces from command names., Ben Pfaff, 2010/09/24
- [pre-lexer 19/21] data-in: Rewrite logic for recoding input, and get rid of src_enc member., Ben Pfaff, 2010/09/24
- [pre-lexer 03/21] i18n: New function recode_substring_pool()., Ben Pfaff, 2010/09/24
- [pre-lexer 11/21] lexer: Use lex_is_string() more consistently., Ben Pfaff, 2010/09/24
- [pre-lexer 04/21] syntax-string-source: Fix format string problems., Ben Pfaff, 2010/09/24
- [pre-lexer 08/21] Make translation easier., Ben Pfaff, 2010/09/24
- [pre-lexer 14/21] command: Add specific DATASET unimplemented commands., Ben Pfaff, 2010/09/24
- [pre-lexer 12/21] command: Remove INSERT from list of unimplemented commands., Ben Pfaff, 2010/09/24
- [pre-lexer 20/21] data-in: Make data_in() parameters more uniform.,
Ben Pfaff <=
- [pre-lexer 02/21] i18n: Use UTF8 macro instead of "UTF8" literal string., Ben Pfaff, 2010/09/24
- [pre-lexer 09/21] lexer: Improve translatability of lex_error()., Ben Pfaff, 2010/09/24
- [pre-lexer 15/21] message: Consistently initialize locator; use 0 for "no line number"., Ben Pfaff, 2010/09/24
- [pre-lexer 06/21] AGGREGATE: Simplify code., Ben Pfaff, 2010/09/24
- [pre-lexer 17/21] calendar: Use sensible error reporting in calendar_gregorian_to_offset()., Ben Pfaff, 2010/09/24
- [pre-lexer 16/21] message: Add column range to struct msg_locator., Ben Pfaff, 2010/09/24
- [pre-lexer 05/21] PERMISSIONS: Add missing check for string token., Ben Pfaff, 2010/09/24
- [pre-lexer 10/21] lexer: Remove DUMP_TOKENS debugging feature., Ben Pfaff, 2010/09/24
- [pre-lexer 18/21] data-in: Eliminate "implied_decimals" parameter from data_in()., Ben Pfaff, 2010/09/24
- [pre-lexer 21/21] data-in: Get rid of first_column, last_column arguments., Ben Pfaff, 2010/09/24