[SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5491-gdde70

gawk-diffs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5491-gdde70

From:	Arnold Robbins
Subject:	[SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5491-gdde70cc7
Date:	Fri, 5 Jul 2024 09:56:44 -0400 (EDT)
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, stable/printf-rework has been updated
       via  dde70cc78f2cfc69439c35713575158d14366567 (commit)
      from  8d18169d7124cee926d1755e64c6eb5ae3edef20 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=dde70cc78f2cfc69439c35713575158d14366567

commit dde70cc78f2cfc69439c35713575158d14366567
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Fri Jul 5 16:56:21 2024 +0300

    Progress with printf. Checkpoint commit.

diff --git a/ChangeLog b/ChangeLog
index 9108e64e..0f07fbab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2024-07-05         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * printf.c: Lots of cleanup. Floating point format pretty
+       much working. Signed integer printing getting there.
+       Unsigned integer printing still to do.
+
 2024-07-02         Arnold D. Robbins     <arnold@skeeve.com>
 
        * awk.h (format_tree_new, format_tree_old): Add declarations.
diff --git a/printf.c b/printf.c
index dadca64e..3cb5450c 100644
--- a/printf.c
+++ b/printf.c
@@ -51,12 +51,14 @@ extern FILE *output_fp;
 static size_t mbc_byte_count(const char *ptr, size_t numchars);
 static size_t mbc_char_count(const char *ptr, size_t numbytes);
 static void reverse(char *str);
+static bool compute_zero_flag(struct flags *flags);
+static void adjust_flags(struct flags *flags);
 static const char *add_thousands(const char *original);
-static const char *format_integer(NODE *arg, struct flags *flags);
+static const char *format_signed_integer(NODE *arg, struct flags *flags);
+static const char *format_unsigned_integer(NODE *arg, struct flags *flags);
 static const char *format_mpg_integer(NODE *arg, struct flags *flags);
 static const char *format_float(NODE *arg, struct flags *flags);
 static const char *format_out_of_range(NODE *arg, struct flags *flags);
-static bool compute_zero_flag(struct flags *flags);
 
 #ifdef HAVE_MPFR
 
@@ -1236,7 +1238,7 @@ format_tree_new(
        static const char zero_string[] = "0";
        static const char lchbuf[] = "0123456789abcdef";
        static const char Uchbuf[] = "0123456789ABCDEF";
-       static const char bad_modifiers[] = "hjlLtz";
+       static const char bad_modifiers[] = "DHhjLltwz";
        static bool warned[sizeof(bad_modifiers)-1];    // auto-init to zero
 
        bool modifier_seen[sizeof(bad_modifiers)-1];
@@ -1535,11 +1537,14 @@ check_pos:
 #else
                        goto retry;
 #endif
+               case 'D':
+               case 'H':
                case 'h':
                case 'j':
-               case 'l':
                case 'L':
+               case 'l':
                case 't':
+               case 'w':
                case 'z':
                        if (modifier_seen[modifier_index(cs1)])
                                break;
@@ -1563,9 +1568,6 @@ check_pos:
                                break;
                        magic_posix_flag = true;
                        goto retry;
-               case 'C':       // POSIX 2024
-                       cs1 = 'c';
-                       // FALL THROUGH
                case 'c':
                        need_format = false;
                        parse_next_arg();
@@ -1645,9 +1647,6 @@ out0:
                                }
                        }
                        goto pr_tail;
-               case 'S':       // POSIX 2024
-                       cs1 = 's';
-                       // FALL THROUGH
                case 's':
                        need_format = false;
                        parse_next_arg();
@@ -1666,6 +1665,35 @@ out0:
                        need_format = false;
                        parse_next_arg();
                        (void) force_number(arg);
+                       base = 10;
+
+#define set_flags() \
+       flags.left_just = lj; \
+       flags.alt = alt; \
+       flags.zero = zero_flag; \
+       flags.space = space_flag; \
+       flags.plus = plus_flag; \
+       flags.quote = quote_flag; \
+       flags.have_prec = have_prec; \
+       flags.format = cs1; \
+       flags.base = base; \
+       flags.field_width = fw; \
+       flags.precision = prec
+
+                       set_flags();
+                       adjust_flags(& flags);
+                       {
+                       const char *formatted = format_signed_integer(arg, & 
flags);
+                       len = strlen(formatted);
+                       chksize(len)
+                       strcpy(obufout, formatted);
+                       free((void *) formatted);
+
+                       ofre -= len;
+                       obufout += len;
+                       s0 = s1;
+                       break;
+                       }
 
                        /*
                         * Check for Nan or Inf.
@@ -1794,20 +1822,8 @@ out0:
                        parse_next_arg();
                        (void) force_number(arg);
 
-#define set_flags() \
-       flags.left_just = lj; \
-       flags.alt = alt; \
-       flags.zero = zero_flag; \
-       flags.space = space_flag; \
-       flags.plus = plus_flag; \
-       flags.quote = quote_flag; \
-       flags.have_prec = have_prec; \
-       flags.format = cs1; \
-       flags.base = base; \
-       flags.field_width = fw; \
-       flags.precision = prec
-
                        set_flags();
+                       adjust_flags(& flags);
 
 
                        if (out_of_range(arg))
@@ -2024,16 +2040,6 @@ mpf1:
                                break;
                        }
 
-               case 'F':
-#if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
-                       cs1 = 'f';
-                       /* FALL THROUGH */
-#endif
-               case 'g':
-               case 'G':
-               case 'e':
-               case 'f':
-               case 'E':
 #if defined(PRINTF_HAS_A_FORMAT) && PRINTF_HAS_A_FORMAT == 1
                case 'A':
                case 'a':
@@ -2044,13 +2050,26 @@ mpf1:
                                warned = true;
                                lintwarn(_("%%%c format is POSIX standard but 
not portable to other awks"), cs1);
                        }
+                       base = 6;
                }
 #endif
+               case 'F':
+#if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
+                       cs1 = 'f';
+                       /* FALL THROUGH */
+#endif
+               case 'g':
+               case 'G':
+               case 'e':
+               case 'f':
+               case 'E':
+                       base += 10;
                        need_format = false;
                        parse_next_arg();
                        (void) force_number(arg);
 
                        set_flags();
+                       adjust_flags(& flags);
                        {
                        const char *formatted = format_float(arg, & flags);
                        len = strlen(formatted);
@@ -2326,289 +2345,175 @@ do_printf(int nargs, int redirtype)
                gawk_exit(EXIT_FATAL);
 }
 
-/* mbc_byte_count --- return number of bytes for corresponding numchars 
multibyte characters */
+/* format_integer_digits --- format just the actual value of an integer. 
caller frees return value */
 
-static size_t
-mbc_byte_count(const char *ptr, size_t numchars)
+static const char *
+format_integer_digits(NODE *arg, struct flags *flags)
 {
-       mbstate_t cur_state;
-       size_t sum = 0;
-       int mb_len;
-
-       memset(& cur_state, 0, sizeof(cur_state));
-
-       assert(gawk_mb_cur_max > 1);
-       mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
-       if (mb_len <= 0)
-               return numchars;        /* no valid m.b. char */
+#define VALUE_SIZE 40
+       char *buf = NULL;
+       size_t buflen;
+       static const char lchbuf[] = "0123456789abcdef";
+       static const char Uchbuf[] = "0123456789ABCDEF";
+       const char *chbuf;
+       char *cp;
+       bool quote_flag = false;
+       bool negative = false;
+       uintmax_t uval;
 
-       for (; numchars > 0; numchars--) {
-               mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
-               if (mb_len <= 0)
-                       break;
-               sum += mb_len;
-               ptr += mb_len;
+#define growbuffer(buf, buflen, cp) { \
+               erealloc(buf, char *, buflen * 2, "format_integer_xxx"); \
+               cp = buf + buflen; \
+               buflen *= 2; \
        }
 
-       return sum;
-}
-
-/* mbc_char_count --- return number of m.b. chars in string, up to numbytes 
bytes */
+#if defined(HAVE_LOCALE_H)
+       quote_flag = (flags->quote && loc.thousands_sep[0] != '\0');
+#endif
 
-static size_t
-mbc_char_count(const char *ptr, size_t numbytes)
-{
-       mbstate_t cur_state;
-       size_t sum = 0;
-       int mb_len;
+       chbuf = (flags->format == 'X' ? Uchbuf : lchbuf);
+       emalloc(buf, char *, VALUE_SIZE, "format_integer_digits");
+       buflen = VALUE_SIZE;
+       cp = buf;
 
-       if (gawk_mb_cur_max == 1)
-               return numbytes;
+       // C 2023 says negative zeros get a minus sign
+       // for floating conversions.
+       if (flags->base == 10 && arg->numbr < 0) {
+               negative = true;
+               arg->numbr = -arg->numbr;
+       }
+       uval = get_number_uj(arg);
 
-       memset(& cur_state, 0, sizeof(cur_state));
+       // generate the digits backwards.
+       do {
+               if (cp >= buf + buflen)
+                       growbuffer(buf, buflen, cp);
 
-       mb_len = mbrlen(ptr, numbytes, &cur_state);
-       if (mb_len <= 0)
-               return numbytes;        /* no valid m.b. char */
+               *cp++ = chbuf[uval % flags->base];
+               uval /= flags->base;
+       } while (uval > 0);
+       if (negative)
+               *cp++ = '-';
+       *cp = '\0';
 
-       while (numbytes > 0) {
-               mb_len = mbrlen(ptr, numbytes, &cur_state);
-               if (mb_len <= 0)
-                       break;
-               sum++;
-               ptr += mb_len;
-               numbytes -= mb_len;
-       }
+       // turn it back around
+       reverse(buf);
 
-       return sum;
-}
+       if (flags->base == 10 && quote_flag) {
+               const char *with_commas = add_thousands(buf);
 
-/* out_of_range --- return true if a value is out of range */
+               free((void *) buf);
+               buf = (char *) with_commas;
+       }
 
-bool
-out_of_range(NODE *n)
-{
-#ifdef HAVE_MPFR
-       if (is_mpg_integer(n))
-               return false;
-       else if (is_mpg_float(n))
-               return (! mpfr_number_p(n->mpg_numbr));
-       else
-#endif
-               return (isnan(n->numbr) || isinf(n->numbr));
+       return buf;
 }
 
-/* format_nan_inf --- format NaN and INF values. return value is to a static 
buffer */
+/* format_signed_integer --- format a signed integer (decimal) value. caller 
frees return value */
 
-char *
-format_nan_inf(NODE *n, char format)
+static const char *
+format_signed_integer(NODE *arg, struct flags *flags)
 {
-       static char buf[100];
-       double val = n->numbr;
+       const char *number_value;
+       size_t val_len;
+       char *buf1 = NULL;
+       char fill[] = " ";
 
-#ifdef HAVE_MPFR
-       if (is_mpg_integer(n))
-               return NULL;
-       else if (is_mpg_float(n)) {
-               if (mpfr_nan_p(n->mpg_numbr)) {
-                       strcpy(buf, mpfr_signbit(n->mpg_numbr) != 0 ? "-nan" : 
"+nan");
+       if (out_of_range(arg))
+               return format_out_of_range(arg, flags);
 
-                       goto fmt;
-               } else if (mpfr_inf_p(n->mpg_numbr)) {
-                       strcpy(buf, mpfr_signbit(n->mpg_numbr) ? "-inf" : 
"+inf");
+       if (is_mpg_integer(arg) || is_mpg_float(arg))
+               return format_mpg_integer(arg, flags);
 
-                       goto fmt;
-               } else
-                       return NULL;
-       }
-       /* else
-               fallthrough */
-#endif
+       number_value = format_integer_digits(arg, flags);       // just digits, 
possible leading '-'
+       val_len = strlen(number_value);
 
-       if (isnan(val)) {
-               strcpy(buf, signbit(val) != 0 ? "-nan" : "+nan");
+       // We now have the initial *integer* decimalvalue in hand.
+       // If it's decimal, we've added commas if appropriate. If it's negative
+       // and decimal, it has a minus sign.
+       
+       // The next step is deal with the rest of the printf flags.
 
-               // fall through to end
-       } else if (isinf(val)) {
-               strcpy(buf, val < 0 ? "-inf" : "+inf");
+       // add more output digits to match the precision
+       if (flags->have_prec && val_len < flags->precision) {
+               char *cp;
+               const char *src;
+               int prec = flags->precision;
+
+               emalloc(buf1, char *, flags->precision + 1 + (flags->plus || 
flags->space),
+                               "format_signed_integer");
+               cp = buf1;
+               src = number_value;
+               if (number_value[0] == '-') {
+                       *cp++ = '-';
+                       src++;
+                       val_len--;
+               } else if (flags->plus) {
+                       *cp++ = '+';
+               } else if (flags->space) {
+                       *cp++ = ' ';
+               }
 
-               // fall through to end
-       } else
-               return NULL;
+               for (; prec > val_len; prec--)
+                       *cp++ = '0';
 
-#ifdef HAVE_MPFR
-fmt:
-#endif
-       if (isupper(format)) {
-               int i;
+               strcpy(cp, src);
+               free((void *) number_value);
+       } else if (flags->field_width > val_len) {
+               char *cp;
+               const char *src;
+               int fw = flags->field_width;
 
-               for (i = 0; buf[i] != '\0'; i++)
-                       buf[i] = toupper(buf[i]);
-       }
-       return buf;
-}
+               emalloc(buf1, char *, flags->field_width + 1 + (flags->plus || 
flags->space),
+                               "format_signed_integer");
+
+               cp = buf1;
+               src = number_value;
+               if (number_value[0] == '-') {
+                       *cp++ = '-';
+                       src++;
+                       fw--;
+                       val_len--;
+               } else if (flags->plus) {
+                       *cp++ = '+';
+                       fw--;
+               } else if (flags->space) {
+                       *cp++ = ' ';
+                       fw--;
+               }
 
+               if (compute_zero_flag(flags))
+                       fill[0] = '0';
 
-/* reverse --- reverse the contents of a string in place */
+               if (flags->left_just) {
+                       strcpy(cp, src);
+                       cp += val_len;
+                       for (; fw > val_len; fw--)
+                               *cp++ = fill[0];
+               } else {
+                       for (; fw > val_len; fw--)
+                               *cp++ = fill[0];
+                       strcpy(cp, src);
+               }
 
-static void
-reverse(char *str)
-{
-       int i, j;
-       char tmp;
+               free((void *) number_value);
+       } else
+               buf1 = (char *) number_value;
 
-       for (i = 0, j = strlen(str) - 1; j > i; i++, j--) {
-               tmp = str[i];
-               str[i] = str[j];
-               str[j] = tmp;
-       }
+       return buf1;
 }
 
-/* add_thousands --- add the thousands separator. caller free the return value 
*/
-
-/*
- * Copy the source string into the destination string, backwards,
- * adding the thousands separator at the right points. Then reverse
- * the string when done. This gives us much cleaner code than trying
- * to work through the string backwards. (We tried it, it was yucky.)
- */
+/* format_unsigned_integer --- format an unsigned integer value. caller frees 
return value */
 
 static const char *
-add_thousands(const char *original)
-{
-       size_t orig_len = strlen(original);
-       size_t new_len = orig_len + 1;
-       char *newbuf;
-       const char *src;
-       char *dest;
-
-       emalloc(newbuf, char *, new_len, "add_thousands");
-       memset(newbuf, '\0', new_len);
-
-#if defined(HAVE_LOCALE_H)
-       new_len = orig_len + (orig_len * strlen(loc.thousands_sep)) + 1;        
// worst case
-       src = original + strlen(original) - 1;
-       dest = newbuf;
-
-       if (loc.decimal_point[0] != '\0') {
-               const char *dec = NULL;
-
-               if ((dec = strchr(original, loc.decimal_point[0])) != NULL) {
-                       while (src >= dec)
-                               *dest++ = *src--;
-               }
-       }
-
-
-       int ii = 0;
-       int jj = 0;
-       do {
-               *dest++ = *src--;
-               if (loc.grouping[ii] && ++jj == loc.grouping[ii]) {
-                       if (src >= original) {  /* only add if more digits 
coming */
-                               const char *ts = loc.thousands_sep;
-                               int k;
-
-                               for (k = strlen(ts) - 1; k >= 0; k--)
-                                       *dest++ = ts[k];
-                       }
-                       if (loc.grouping[ii+1] == 0)
-                               jj = 0;         /* keep using current val in 
loc.grouping[ii] */
-                       else if (loc.grouping[ii+1] == CHAR_MAX) {
-                               // copy in the rest and be done
-                               while (src >= original)
-                                       *dest++ = *src--;
-                               break;
-                       } else {
-                               ii++;
-                               jj = 0;
-                       }
-               }
-       } while (src >= original);
-
-       *dest++ = '\0';
-       reverse(newbuf);
-#else
-       strcpy(newbuf, original);
-#endif
-
-       return newbuf;
-}
-
-/* format_integer_value --- format just the actual value of an integer. caller 
frees return value */
-
-static const char *
-format_integer_value(NODE *arg, struct flags *flags)
-{
-#define VALUE_SIZE 40
-       char *buf = NULL;
-       size_t buflen;
-       static const char lchbuf[] = "0123456789abcdef";
-       static const char Uchbuf[] = "0123456789ABCDEF";
-       const char *chbuf;
-       char *cp;
-       bool quote_flag = false;
-       bool negative = false;
-       uintmax_t uval;
-
-#define growbuffer(buf, buflen, cp) { \
-               erealloc(buf, char *, buflen * 2, "format_integer"); \
-               cp = buf + buflen; \
-               buflen *= 2; \
-       }
-
-#if defined(HAVE_LOCALE_H)
-       quote_flag = (flags->quote && loc.thousands_sep[0] != '\0');
-#endif
-
-       chbuf = (flags->format == 'X' ? Uchbuf : lchbuf);
-       emalloc(buf, char *, VALUE_SIZE, "format_integer_value");
-       buflen = VALUE_SIZE;
-       cp = buf;
-
-       // C 2023 says negative zeros get a minus sign
-       if (flags->base == 10 && (arg->numbr < 0 || arg->numbr == -0)) {
-               negative = true;
-               arg->numbr = -arg->numbr;
-       }
-       uval = get_number_uj(arg);
-
-       // generate the digits backwards.
-       do {
-               if (cp >= buf + buflen)
-                       growbuffer(buf, buflen, cp);
-
-               *cp++ = chbuf[uval % flags->base];
-               uval /= flags->base;
-       } while (uval > 0);
-       if (negative)
-               *cp++ = '-';
-       *cp = '\0';
-
-       // turn it back around
-       reverse(buf);
-
-       if (flags->base == 10 && quote_flag) {
-               const char *with_commas = add_thousands(buf);
-
-               free((void *) buf);
-               buf = (char *) with_commas;
-       }
-
-       return buf;
-}
-
-/* format_integer --- format a signed or unsigned integer value. caller frees 
return value */
-
-static const char *
-format_integer(NODE *arg, struct flags *flags)
+format_unsigned_integer(NODE *arg, struct flags *flags)
 {
+       // FIXME: still needs work
        const char *number_value;
        double tmpval;
        char *buf1 = NULL;
        size_t buflen;
        char *buf2 = NULL;
-       uintmax_t uval;
        char fill[] = " ";
 
        if (out_of_range(arg))
@@ -2617,19 +2522,9 @@ format_integer(NODE *arg, struct flags *flags)
        if (is_mpg_integer(arg) || is_mpg_float(arg))
                return format_mpg_integer(arg, flags);
 
+       number_value = format_integer_digits(arg, flags);       // just digits, 
possible leading '-'
        tmpval = arg->numbr;
-       if (tmpval < 0) {
-               uval = (uintmax_t) (intmax_t) tmpval;
-               if ((AWKNUM)(intmax_t)uval != double_to_int(tmpval))
-                       return format_out_of_range(arg, flags);
-       } else {
-               uval = (intmax_t) tmpval;
-               if ((AWKNUM)uval != double_to_int(tmpval))
-                       return format_out_of_range(arg, flags);
-       }
 
-       // we now have an integer we can format, do so
-       number_value = format_integer_value(arg, flags);
 
        // We now have the initial *integer* decimal, octal, or hex value in 
hand.
        // If it's decimal, we've added commas if appropriate. If it's negative
@@ -2813,6 +2708,8 @@ pr_tail:
        return buf1;
 }
 
+////////////////// below here all is good /////////////////////////////////
+
 /* format_out_of_range --- format an out of range value as %g. caller frees 
return value */
 
 static const char *
@@ -2845,10 +2742,11 @@ format_out_of_range(NODE *arg, struct flags *flags)
        // A NaN or Inf, deal with a field width, if any
        size_t len = strlen(nan_inf_val);
        if (flags->field_width > len) {
-               char *cp, *buf;
+               char *cp = NULL, *buf = NULL;
                int fw = flags->field_width;
 
                emalloc(buf, char *, fw + 1, "format_out_of_range");
+               cp = buf;
                if (flags->left_just) {
                        strcpy(cp, nan_inf_val);
                        cp += len;
@@ -2867,6 +2765,105 @@ format_out_of_range(NODE *arg, struct flags *flags)
        return strdup(nan_inf_val);
 }
 
+/* compute_zero_flag --- return true if we want to fill with zeros */
+
+static bool
+compute_zero_flag(struct flags *flags)
+{
+       bool zero_flag;
+
+       /*
+        * When to fill with zeroes is of course not simple.
+        * First: No zero fill if left-justifying.
+        * Next: There seem to be two cases:
+        *      A '0' without a precision, e.g. %06d
+        *      A precision with no field width, e.g. %.10d
+        * Any other case, we don't want to fill with zeroes.
+        */
+       zero_flag = (! flags->left_just
+                   && ((flags->zero && ! flags->have_prec)
+                        || (flags->field_width == 0 && flags->have_prec)));
+
+       return zero_flag;
+}
+
+/* format_mpg_integer --- format an MPZ or MPFR integer. caller frees return 
value */
+
+static const char *
+format_mpg_integer(NODE *arg, struct flags *flags)
+{
+       return strdup("mpg_int");
+}
+
+
+/* adjust_flags --- take care of conflicts between flags */
+
+/*
+ * The flag characters and their meanings are:
+ * 
+ * - The result of the conversion is left-justified within the field. (It is
+ *   right-justified if this flag is not specified.)
+ * 
+ * + The result of a signed conversion always begins with a plus or minus
+ *   sign. (It begins with a sign only when a value with a negative sign
+ *   is converted if this flag is not specified.)
+ *
+ * space   If the first character of a signed conversion is not a sign,
+ *   or if a signed conversion results in no characters, a space is prefixed
+ *   to the result. If the space and + flags both appear, the space flag is 
ignored.
+ * 
+ * # The result is converted to an "alternative form". For o conversion,
+ *   it increases the precision, if and only if necessary, to force the
+ *   first digit of the result to be a zero (if the value and precision are
+ *   both 0, a single 0 is printed). For b conversion, a nonzero result has
+ *   0b prefixed to it. For the optional B conversion as described below,
+ *   a nonzero result has 0B prefixed to it. For x (or X) conversion,
+ *   a nonzero result has 0x (or 0X) prefixed to it. For a, A, e, E, f,
+ *   F, g, and G conversions, the result of converting a floating-point
+ *   number always contains a decimal-point character, even if no digits
+ *   follow it. (Normally, a decimal-point character appears in the result of
+ *   these conversions only if a digit follows it.) For g and G conversions,
+ *   trailing zeros are not removed from the result. For other conversions,
+ *   the behavior is undefined.
+ * 
+ * 0 For b, d, i, o, u, x, X, a, A, e, E, f, F, g, and G conversions,
+ *   leading zeros (following any indication of sign or base) are used to
+ *   pad to the field width rather than performing space padding, except
+ *   when converting an infinity or NaN. If the 0 and - flags both appear,
+ *   the 0 flag is ignored. For d, i, o, u, x, and X conversions, if a
+ *   preision is specified, the 0 flag is ignored. For other conversions,
+ *   the behavior is undefined.
+ */
+
+static void
+adjust_flags(struct flags *flags)
+{
+       if (flags->base == 10 || strchr("cdisu", flags->format) != NULL)
+               flags->alt = false;
+
+       if (flags->field_width < 0) {
+               flags->field_width = -flags->field_width;
+               flags->left_just = true;
+       }
+
+       if (flags->base != 10) {
+               flags->quote = false;
+               flags->plus = false;
+       }
+
+       if (flags->plus)
+               flags->space = false;
+
+       if (flags->format == 'u')
+               flags->plus = false;
+
+       if (flags->left_just)
+               flags->zero = false;
+
+       if (strchr("diouxX", flags->format) != NULL && flags->have_prec)
+               flags->zero = false;
+}
+
 /* format_float --- format a floating point number. caller frees return value 
*/
 
 static const char *
@@ -2880,7 +2877,6 @@ format_float(NODE *arg, struct flags *flags)
        
        double tmpval;
 #ifdef HAVE_MPFR
-       mpz_ptr zi;
        mpfr_ptr mf;
 #endif
        bool quote_flag = false;
@@ -2978,6 +2974,7 @@ format_float(NODE *arg, struct flags *flags)
        if (quote_flag && ! use_lc_numeric)
                setlocale(LC_NUMERIC, "C");
 #endif
+
        if (quote_flag && need_to_add_thousands) {
                const char *new_text = add_thousands(buf);
 
@@ -2987,32 +2984,211 @@ format_float(NODE *arg, struct flags *flags)
        return buf;
 }
 
-/* compute_zero_flag --- return true if we want to fill with zeros */
+/* mbc_byte_count --- return number of bytes for corresponding numchars 
multibyte characters */
 
-static bool
-compute_zero_flag(struct flags *flags)
+static size_t
+mbc_byte_count(const char *ptr, size_t numchars)
 {
-       bool zero_flag;
+       mbstate_t cur_state;
+       size_t sum = 0;
+       int mb_len;
 
-       /*
-        * When to fill with zeroes is of course not simple.
-        * First: No zero fill if left-justifying.
-        * Next: There seem to be two cases:
-        *      A '0' without a precision, e.g. %06d
-        *      A precision with no field width, e.g. %.10d
-        * Any other case, we don't want to fill with zeroes.
-        */
-       zero_flag = (! flags->left_just
-                   && ((flags->zero && ! flags->have_prec)
-                        || (flags->field_width == 0 && flags->have_prec)));
+       memset(& cur_state, 0, sizeof(cur_state));
 
-       return zero_flag;
+       assert(gawk_mb_cur_max > 1);
+       mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
+       if (mb_len <= 0)
+               return numchars;        /* no valid m.b. char */
+
+       for (; numchars > 0; numchars--) {
+               mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
+               if (mb_len <= 0)
+                       break;
+               sum += mb_len;
+               ptr += mb_len;
+       }
+
+       return sum;
 }
 
-/* format_mpg_integer --- format an MPZ or MPFR integer. caller frees return 
value */
+/* mbc_char_count --- return number of m.b. chars in string, up to numbytes 
bytes */
+
+static size_t
+mbc_char_count(const char *ptr, size_t numbytes)
+{
+       mbstate_t cur_state;
+       size_t sum = 0;
+       int mb_len;
+
+       if (gawk_mb_cur_max == 1)
+               return numbytes;
+
+       memset(& cur_state, 0, sizeof(cur_state));
+
+       mb_len = mbrlen(ptr, numbytes, &cur_state);
+       if (mb_len <= 0)
+               return numbytes;        /* no valid m.b. char */
+
+       while (numbytes > 0) {
+               mb_len = mbrlen(ptr, numbytes, &cur_state);
+               if (mb_len <= 0)
+                       break;
+               sum++;
+               ptr += mb_len;
+               numbytes -= mb_len;
+       }
+
+       return sum;
+}
+
+/* out_of_range --- return true if a value is out of range */
+
+bool
+out_of_range(NODE *n)
+{
+#ifdef HAVE_MPFR
+       if (is_mpg_integer(n))
+               return false;
+       else if (is_mpg_float(n))
+               return (! mpfr_number_p(n->mpg_numbr));
+       else
+#endif
+               return (isnan(n->numbr) || isinf(n->numbr));
+}
+
+/* format_nan_inf --- format NaN and INF values. return value is to a static 
buffer */
+
+char *
+format_nan_inf(NODE *n, char format)
+{
+       static char buf[100];
+       double val = n->numbr;
+
+#ifdef HAVE_MPFR
+       if (is_mpg_integer(n))
+               return NULL;
+       else if (is_mpg_float(n)) {
+               if (mpfr_nan_p(n->mpg_numbr)) {
+                       strcpy(buf, mpfr_signbit(n->mpg_numbr) != 0 ? "-nan" : 
"+nan");
+
+                       goto fmt;
+               } else if (mpfr_inf_p(n->mpg_numbr)) {
+                       strcpy(buf, mpfr_signbit(n->mpg_numbr) ? "-inf" : 
"+inf");
+
+                       goto fmt;
+               } else
+                       return NULL;
+       }
+       /* else
+               fallthrough */
+#endif
+
+       if (isnan(val)) {
+               strcpy(buf, signbit(val) != 0 ? "-nan" : "+nan");
+
+               // fall through to end
+       } else if (isinf(val)) {
+               strcpy(buf, val < 0 ? "-inf" : "+inf");
+
+               // fall through to end
+       } else
+               return NULL;
+
+#ifdef HAVE_MPFR
+fmt:
+#endif
+       if (isupper(format)) {
+               int i;
+
+               for (i = 0; buf[i] != '\0'; i++)
+                       buf[i] = toupper(buf[i]);
+       }
+       return buf;
+}
+
+
+/* reverse --- reverse the contents of a string in place */
+
+static void
+reverse(char *str)
+{
+       int i, j;
+       char tmp;
+
+       for (i = 0, j = strlen(str) - 1; j > i; i++, j--) {
+               tmp = str[i];
+               str[i] = str[j];
+               str[j] = tmp;
+       }
+}
+
+/* add_thousands --- add the thousands separator. caller free the return value 
*/
+
+/*
+ * Copy the source string into the destination string, backwards,
+ * adding the thousands separator at the right points. Then reverse
+ * the string when done. This gives us much cleaner code than trying
+ * to work through the string backwards. (We tried it, it was yucky.)
+ */
 
 static const char *
-format_mpg_integer(NODE *arg, struct flags *flags)
+add_thousands(const char *original)
 {
-       return strdup("mpg_int");
+       size_t orig_len = strlen(original);
+       size_t new_len = orig_len + 1;
+       char *newbuf;
+       const char *src;
+       char *dest;
+
+       emalloc(newbuf, char *, new_len, "add_thousands");
+       memset(newbuf, '\0', new_len);
+
+#if defined(HAVE_LOCALE_H)
+       new_len = orig_len + (orig_len * strlen(loc.thousands_sep)) + 1;        
// worst case
+       src = original + strlen(original) - 1;
+       dest = newbuf;
+
+       if (loc.decimal_point[0] != '\0') {
+               const char *dec = NULL;
+
+               if ((dec = strchr(original, loc.decimal_point[0])) != NULL) {
+                       while (src >= dec)
+                               *dest++ = *src--;
+               }
+       }
+
+
+       int ii = 0;
+       int jj = 0;
+       do {
+               *dest++ = *src--;
+               if (loc.grouping[ii] && ++jj == loc.grouping[ii]) {
+                       if (src >= original) {  /* only add if more digits 
coming */
+                               const char *ts = loc.thousands_sep;
+                               int k;
+
+                               for (k = strlen(ts) - 1; k >= 0; k--)
+                                       *dest++ = ts[k];
+                       }
+                       if (loc.grouping[ii+1] == 0)
+                               jj = 0;         /* keep using current val in 
loc.grouping[ii] */
+                       else if (loc.grouping[ii+1] == CHAR_MAX) {
+                               // copy in the rest and be done
+                               while (src >= original)
+                                       *dest++ = *src--;
+                               break;
+                       } else {
+                               ii++;
+                               jj = 0;
+                       }
+               }
+       } while (src >= original);
+
+       *dest++ = '\0';
+       reverse(newbuf);
+#else
+       strcpy(newbuf, original);
+#endif
+
+       return newbuf;
 }

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog |   6 +
 printf.c  | 794 ++++++++++++++++++++++++++++++++++++++------------------------
 2 files changed, 491 insertions(+), 309 deletions(-)


hooks/post-receive
-- 
gawk
[Prev in Thread]
Current Thread
[Next in Thread]
[SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5491-gdde70cc7, Arnold Robbins <=
Prev by Date: [SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5490-g8d18169d
Next by Date: [SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5492-g7b9e9bca
Previous by thread: [SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5490-g8d18169d
Next by thread: [SCM] gawk branch, stable/printf-rework, updated. gawk-4.1.0-5492-g7b9e9bca
Index(es):
- Date
- Thread