gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, feature/api-parser, created. gawk-4.1.0-


From: Andrew J. Schorr
Subject: [gawk-diffs] [SCM] gawk branch, feature/api-parser, created. gawk-4.1.0-2442-g62fe07b
Date: Sun, 5 Mar 2017 17:06:36 -0500 (EST)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, feature/api-parser has been created
        at  62fe07b69e522c909aad303b31443cc3c9bdf6c0 (commit)

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=62fe07b69e522c909aad303b31443cc3c9bdf6c0

commit 62fe07b69e522c909aad303b31443cc3c9bdf6c0
Author: Andrew J. Schorr <address@hidden>
Date:   Sun Mar 5 17:05:36 2017 -0500

    Enable an API input parser to supply an array of field widths to override 
the default gawk field parsing mechanism.

diff --git a/ChangeLog b/ChangeLog
index 8b87746..bc1fe39 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2017-03-05         Andrew J. Schorr     <address@hidden>
+
+       * awk.h (set_record): Add a new argument containing a field-width
+       array returned by an API parser.
+       (field_sep_type): Add new enum value Using_API.
+       (current_field_sep_str): Declare new function.
+       * field.c (save_parse_field): New static variable to save the
+       parse_field value in cases where it's overridden by API parsing.
+       (api_fw): New static variable to hold pointer to API parser fieldwidth
+       array.
+       (set_record): Add new field-width array argument. If present, API
+       parsing will override the default parsing mechanism.
+       (api_parse_field): New field parser using field widths supplied by the
+       API. This is very similar to the existing fw_parse_field function.
+       (get_field): Fix typo in comment.
+       (set_parser): New function to set default parser and check whether
+       there's an API parser override in effect. Update PROCINFO["FS"] if
+       something has changed.
+       (set_FIELDWIDTHS): Use set_parser and stop updating PROCINFO["FS"].
+       (set_FS): Ditto.
+       (set_FPAT): Ditto.
+       (current_field_sep): Return Using_API when using the API field parsing
+       widths.
+       (current_field_sep_str): New function to return the proper string
+       value for PROCINFO["FS"].
+       * gawkapi.h (awk_input_buf_t): Add field_width array to enable the
+       parser get_record function to supply field widths to override the
+       default gawk field parsing mechanism.
+       * io.c (inrec): Pass iop->public.field_width to set_record as the
+       3rd argument to enable API field parsing overrides.
+       (do_getline_redir, do_getline): Ditto.
+       * main.c (load_procinfo): Use new current_field_sep_str function
+       instead of switching on the return value from current_field_sep.
+
 2017-02-23         Arnold D. Robbins     <address@hidden>
 
        * awk.h (boolval): Return bool instead of int.
diff --git a/awk.h b/awk.h
index aae5e7e..be6a647 100644
--- a/awk.h
+++ b/awk.h
@@ -1510,7 +1510,7 @@ extern NODE *get_actual_argument(NODE *, int, bool);
 #endif
 /* field.c */
 extern void init_fields(void);
-extern void set_record(const char *buf, int cnt);
+extern void set_record(const char *buf, int cnt, const int *);
 extern void reset_record(void);
 extern void rebuild_record(void);
 extern void set_NF(void);
@@ -1527,9 +1527,11 @@ extern void update_PROCINFO_num(const char *subscript, 
AWKNUM val);
 typedef enum {
        Using_FS,
        Using_FIELDWIDTHS,
-       Using_FPAT
+       Using_FPAT,
+       Using_API
 } field_sep_type;
 extern field_sep_type current_field_sep(void);
+extern const char *current_field_sep_str(void);
 
 /* gawkapi.c: */
 extern gawk_api_t api_impl;
diff --git a/field.c b/field.c
index 0799fb1..4a5884a 100644
--- a/field.c
+++ b/field.c
@@ -40,6 +40,8 @@ typedef void (* Setfunc)(long, char *, long, NODE *);
 
 static long (*parse_field)(long, char **, int, NODE *,
                             Regexp *, Setfunc, NODE *, NODE *, bool);
+static long (*save_parse_field)(long, char **, int, NODE *,
+                            Regexp *, Setfunc, NODE *, NODE *, bool);
 static long re_parse_field(long, char **, int, NODE *,
                             Regexp *, Setfunc, NODE *, NODE *, bool);
 static long def_parse_field(long, char **, int, NODE *,
@@ -50,6 +52,9 @@ static long sc_parse_field(long, char **, int, NODE *,
                             Regexp *, Setfunc, NODE *, NODE *, bool);
 static long fw_parse_field(long, char **, int, NODE *,
                             Regexp *, Setfunc, NODE *, NODE *, bool);
+static long api_parse_field(long, char **, int, NODE *,
+                            Regexp *, Setfunc, NODE *, NODE *, bool);
+static const int *api_fw = NULL;
 static long fpat_parse_field(long, char **, int, NODE *,
                             Regexp *, Setfunc, NODE *, NODE *, bool);
 static void set_element(long num, char * str, long len, NODE *arr);
@@ -252,7 +257,7 @@ rebuild_record()
  * but better correct than fast.
  */
 void
-set_record(const char *buf, int cnt)
+set_record(const char *buf, int cnt, const int *fw)
 {
        NODE *n;
        static char *databuf;
@@ -306,6 +311,20 @@ set_record(const char *buf, int cnt)
        n->stfmt = STFMT_UNUSED;
        n->flags = (STRING|STRCUR|USER_INPUT);  /* do not set MALLOC */
        fields_arr[0] = n;
+       if (fw != api_fw) {
+               if ((api_fw = fw) != NULL) {
+                       if (parse_field != api_parse_field) {
+                               parse_field = api_parse_field;
+                               update_PROCINFO_str("FS", "API");
+                       }
+               }
+               else {
+                       if (parse_field != save_parse_field) {
+                               parse_field = save_parse_field;
+                               update_PROCINFO_str("FS", 
current_field_sep_str());
+                       }
+               }
+       }
 
 #undef INITIAL_SIZE
 #undef MAX_SIZE
@@ -760,6 +779,49 @@ fw_parse_field(long up_to, /* parse only up to this field 
number */
        return nf;
 }
 
+/*
+ * api_parse_field --- field parsing using field widths returned by API parser.
+ *
+ * This is called from get_field() via (*parse_field)().
+ */
+static long
+api_parse_field(long up_to,    /* parse only up to this field number */
+       char **buf,     /* on input: string to parse; on output: point to start 
next */
+       int len,
+       NODE *fs ATTRIBUTE_UNUSED,
+       Regexp *rp ATTRIBUTE_UNUSED,
+       Setfunc set,    /* routine to set the value of the parsed field */
+       NODE *n,
+       NODE *dummy ATTRIBUTE_UNUSED, /* sep_arr not needed here: hence dummy */
+       bool in_middle ATTRIBUTE_UNUSED)
+{
+       char *scan = *buf;
+       long nf = parse_high_water;
+       char *end = scan + len;
+       int skiplen;
+
+       if (up_to == UNLIMITED)
+               nf = 0;
+       if (len == 0)
+               return nf;
+       while (nf < up_to) {
+               if (((skiplen = api_fw[2*nf]) < 0) ||
+                   ((len = api_fw[2*nf+1]) < 0)) {
+                       *buf = end;
+                       return nf;
+               }
+               if (skiplen > end - scan)
+                       skiplen = end - scan;
+               scan += skiplen;
+               if (len > end - scan)
+                       len = end - scan;
+               (*set)(++nf, scan, (long) len, n);
+               scan += len;
+       }
+       *buf = scan;
+       return nf;
+}
+
 /* invalidate_field0 --- $0 needs reconstruction */
 
 void
@@ -845,7 +907,7 @@ get_field(long requested, Func_ptr *assign)
                if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
                        NF = parse_high_water;
                else if (parse_field == fpat_parse_field) {
-                       /* FPAT parsing is wierd, isolate the special cases */
+                       /* FPAT parsing is weird, isolate the special cases */
                        char *rec_start = fields_arr[0]->stptr;
                        char *rec_end = fields_arr[0]->stptr + 
fields_arr[0]->stlen;
 
@@ -1057,6 +1119,18 @@ do_patsplit(int nargs)
        return tmp;
 }
 
+/* set_parser: update the current (non-API) parser */
+
+static void
+set_parser(long (*func)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, 
NODE *, bool))
+{
+       save_parse_field = func;
+       if (parse_field != api_parse_field && parse_field != func) {
+               parse_field = func;
+               update_PROCINFO_str("FS", current_field_sep_str());
+       }
+}
+
 /* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
 
 void
@@ -1084,7 +1158,7 @@ set_FIELDWIDTHS()
        if (fields_arr != NULL)
                (void) get_field(UNLIMITED - 1, 0);
 
-       parse_field = fw_parse_field;
+       set_parser(fw_parse_field);
        tmp = force_string(FIELDWIDTHS_node->var_value);
        scan = tmp->stptr;
 
@@ -1134,7 +1208,6 @@ set_FIELDWIDTHS()
        }
        FIELDWIDTHS[i+1] = -1;
 
-       update_PROCINFO_str("FS", "FIELDWIDTHS");
        if (fatal_error)
                fatal(_("invalid FIELDWIDTHS value, near `%s'"),
                              scan);
@@ -1205,7 +1278,7 @@ choose_fs_function:
        if (! do_traditional && fs->stlen == 0) {
                static bool warned = false;
 
-               parse_field = null_parse_field;
+               set_parser(null_parse_field);
 
                if (do_lint && ! warned) {
                        warned = true;
@@ -1214,10 +1287,10 @@ choose_fs_function:
        } else if (fs->stlen > 1) {
                if (do_lint_old)
                        warning(_("old awk does not support regexps as value of 
`FS'"));
-               parse_field = re_parse_field;
+               set_parser(re_parse_field);
        } else if (RS_is_null) {
                /* we know that fs->stlen <= 1 */
-               parse_field = sc_parse_field;
+               set_parser(sc_parse_field);
                if (fs->stlen == 1) {
                        if (fs->stptr[0] == ' ') {
                                default_FS = true;
@@ -1233,7 +1306,7 @@ choose_fs_function:
                        }
                }
        } else {
-               parse_field = def_parse_field;
+               set_parser(def_parse_field);
 
                if (fs->stlen == 1) {
                        if (fs->stptr[0] == ' ')
@@ -1242,7 +1315,7 @@ choose_fs_function:
                                /* same special case */
                                strcpy(buf, "[\\\\]");
                        else
-                               parse_field = sc_parse_field;
+                               set_parser(sc_parse_field);
                }
        }
        if (remake_re) {
@@ -1254,7 +1327,7 @@ choose_fs_function:
                        FS_re_yes_case = make_regexp(buf, strlen(buf), false, 
true, true);
                        FS_re_no_case = make_regexp(buf, strlen(buf), true, 
true, true);
                        FS_regexp = (IGNORECASE ? FS_re_no_case : 
FS_re_yes_case);
-                       parse_field = re_parse_field;
+                       set_parser(re_parse_field);
                } else if (parse_field == re_parse_field) {
                        FS_re_yes_case = make_regexp(fs->stptr, fs->stlen, 
false, true, true);
                        FS_re_no_case = make_regexp(fs->stptr, fs->stlen, true, 
true, true);
@@ -1270,8 +1343,6 @@ choose_fs_function:
         */
        if (fs->stlen == 1 && parse_field == re_parse_field)
                FS_regexp = FS_re_yes_case;
-
-       update_PROCINFO_str("FS", "FS");
 }
 
 /* current_field_sep --- return what field separator is */
@@ -1283,10 +1354,27 @@ current_field_sep()
                return Using_FIELDWIDTHS;
        else if (parse_field == fpat_parse_field)
                return Using_FPAT;
+       else if (parse_field == api_parse_field)
+               return Using_API;
        else
                return Using_FS;
 }
 
+/* current_field_sep --- return what field separator is */
+
+const char *
+current_field_sep_str()
+{
+       if (parse_field == fw_parse_field)
+               return "FIELDWIDTHS";
+       else if (parse_field == fpat_parse_field)
+               return "FPAT";
+       else if (parse_field == api_parse_field)
+               return "API";
+       else
+               return "FS";
+}
+
 /* update_PROCINFO_str --- update PROCINFO[sub] with string value */
 
 void
@@ -1373,7 +1461,7 @@ set_FPAT()
 
 set_fpat_function:
        fpat = force_string(FPAT_node->var_value);
-       parse_field = fpat_parse_field;
+       set_parser(fpat_parse_field);
 
        if (remake_re) {
                refree(FPAT_re_yes_case);
@@ -1384,8 +1472,6 @@ set_fpat_function:
                FPAT_re_no_case = make_regexp(fpat->stptr, fpat->stlen, true, 
true, true);
                FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes_case);
        }
-
-       update_PROCINFO_str("FS", "FPAT");
 }
 
 /*
diff --git a/gawkapi.h b/gawkapi.h
index 5071adc..6d552b8 100644
--- a/gawkapi.h
+++ b/gawkapi.h
@@ -151,6 +151,18 @@ typedef struct awk_input {
                        char **rt_start, size_t *rt_len);
 
        /*
+        * If this pointer is non-NULL, then this record should be parsed
+        * using the supplied field widths instead of the default gawk
+        * field parsing mechanism. The field_width array should have
+        * at least 2*NF+1 elements, and the value of field_width[2*NF]
+        * must be negative. The first entry field_width[0] should contain
+        * the number of bytes to skip before $1; field_width[1] contains
+        * the number of bytes in $1. Note that these values are specified
+        * in bytes, not (potentially multi-byte) characters!
+        */
+       const int *field_width;
+
+       /*
         * No argument prototype on read_func to allow for older systems
         * whose headers are not up to date.
         */
diff --git a/io.c b/io.c
index cced126..040b485 100644
--- a/io.c
+++ b/io.c
@@ -604,7 +604,7 @@ inrec(IOBUF *iop, int *errcode)
        } else {
                INCREMENT_REC(NR);
                INCREMENT_REC(FNR);
-               set_record(begin, cnt);
+               set_record(begin, cnt, iop->public.field_width);
                if (*errcode > 0)
                        retval = false;
        }
@@ -2668,7 +2668,7 @@ do_getline_redir(int into_variable, enum redirval 
redirtype)
        }
 
        if (lhs == NULL)        /* no optional var. */
-               set_record(s, cnt);
+               set_record(s, cnt, iop->public.field_width);
        else {                  /* assignment to variable */
                unref(*lhs);
                *lhs = make_string(s, cnt);
@@ -2709,7 +2709,7 @@ do_getline(int into_variable, IOBUF *iop)
        INCREMENT_REC(FNR);
 
        if (! into_variable)    /* no optional var. */
-               set_record(s, cnt);
+               set_record(s, cnt, iop->public.field_width);
        else {                  /* assignment to variable */
                NODE **lhs;
                lhs = POP_ADDRESS();
diff --git a/main.c b/main.c
index 56482f5..8117552 100644
--- a/main.c
+++ b/main.c
@@ -1005,22 +1005,7 @@ load_procinfo()
        value = getegid();
        update_PROCINFO_num("egid", value);
 
-       switch (current_field_sep()) {
-       case Using_FIELDWIDTHS:
-               update_PROCINFO_str("FS", "FIELDWIDTHS");
-               break;
-       case Using_FPAT:
-               update_PROCINFO_str("FS", "FPAT");
-               break;
-       case Using_FS:
-               update_PROCINFO_str("FS", "FS");
-               break;
-       default:
-               fatal(_("unknown value for field spec: %d\n"),
-                               current_field_sep());
-               break;
-       }
-
+       update_PROCINFO_str("FS", current_field_sep_str());
 
 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
        for (i = 0; i < ngroups; i++) {

-----------------------------------------------------------------------


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]