m4-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: minor regex speedup


From: Eric Blake
Subject: Re: minor regex speedup
Date: Mon, 18 Feb 2008 06:28:58 -0700
User-agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.9) Gecko/20071031 Thunderbird/2.0.0.9 Mnenhy/0.7.5.666

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

According to Ralf Wildenhues on 2/18/2008 5:23 AM:
|
| I haven't otherwise looked at this patch at all, but it contains several
| instances of the number 256.  As it is unqual to 0, 1, or 2, please make
| that one or more #defines or consts, and use them throughout.

Thanks for the idea.  In most cases, 256 was being used in the context of
(UCHAR_MAX + 1); M4 currently makes heavy assumption of the POSIX
requirement that CHAR_BIT==8 (and I doubt that it would be very easy to
port M4 to a non-POSIX system with 9-bit or 32-bit char, even though I am
aware that such systems exist).  I'm installing this (more than just the
patch you mentioned was affected), for both branch and head.

- --
Don't work too hard, make some time for fun as well!

Eric Blake             address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.5 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFHuYga84KuGfSFAYARAhlxAKCDI/QYL/LwyGjP+RxKXzyBeM3cCwCgo8r1
x5OVOe6LgQft60B2smOeBmo=
=K22h
-----END PGP SIGNATURE-----
>From 6e45bac29917da4289b841d1f339851e1def72d9 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Mon, 18 Feb 2008 06:09:45 -0700
Subject: [PATCH] Avoid some magic numbers.

* src/m4.h (DEBUG_TRACE_ARGS, DEBUG_TRACE_EXPANSION)
(DEBUG_TRACE_QUOTE, DEBUT_TRACE_ALL, DEBUG_TRACE_LINE)
(DEBUG_TRACE_FILE, DEBUG_TRACE_PATH, DEBUG_TRACE_CALL)
(DEBUG_TRACE_INPUT, DEBUG_TRACE_CALLID, DEBUG_TRACE_VERBOSE)
(DEBUG_TRACE_DEFAULT): Use hex constants, to make it obvious these
are bit fields.
* src/input.c (CHAR_EOF, CHAR_MACRO, CHAR_QUOTE, CHAR_ARGV):
Define in terms of UCHAR_MAX.
(set_word_regexp): Likewise.
* src/builtin.c (compile_pattern, m4_translit): Likewise.
Reported by Ralf Wildenhues.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog     |   15 +++++++++++++++
 src/builtin.c |    6 +++---
 src/input.c   |   10 +++++-----
 src/m4.h      |   38 +++++++++++++++++++-------------------
 4 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 76fcac3..86f8cb8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2008-02-18  Eric Blake  <address@hidden>
+
+       Avoid some magic numbers.
+       * src/m4.h (DEBUG_TRACE_ARGS, DEBUG_TRACE_EXPANSION)
+       (DEBUG_TRACE_QUOTE, DEBUT_TRACE_ALL, DEBUG_TRACE_LINE)
+       (DEBUG_TRACE_FILE, DEBUG_TRACE_PATH, DEBUG_TRACE_CALL)
+       (DEBUG_TRACE_INPUT, DEBUG_TRACE_CALLID, DEBUG_TRACE_VERBOSE)
+       (DEBUG_TRACE_DEFAULT): Use hex constants, to make it obvious these
+       are bit fields.
+       * src/input.c (CHAR_EOF, CHAR_MACRO, CHAR_QUOTE, CHAR_ARGV):
+       Define in terms of UCHAR_MAX.
+       (set_word_regexp): Likewise.
+       * src/builtin.c (compile_pattern, m4_translit): Likewise.
+       Reported by Ralf Wildenhues.
+
 2008-02-16  Eric Blake  <address@hidden>
 
        Add regression test for multi-character quote recursion.
diff --git a/src/builtin.c b/src/builtin.c
index a48e7a0..d4a0fee 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -299,7 +299,7 @@ compile_pattern (const char *str, size_t len, struct 
re_pattern_buffer **buf,
       return msg;
     }
   /* Use a fastmap for speed; it is freed by regfree.  */
-  new_buf->fastmap = xcharalloc (256);
+  new_buf->fastmap = xcharalloc (UCHAR_MAX + 1);
 
   /* Now, find a victim slot.  Decrease the count of all entries, then
      prime the count of the victim slot at REGEX_CACHE_SIZE.  This
@@ -1880,8 +1880,8 @@ m4_translit (struct obstack *obs, int argc, 
macro_arguments *argv)
   const char *data;
   const char *from;
   const char *to;
-  char map[256] = {0};
-  char found[256] = {0};
+  char map[UCHAR_MAX + 1] = {0};
+  char found[UCHAR_MAX + 1] = {0};
   unsigned char ch;
 
   if (bad_argc (ARG (0), argc, 2, 3))
diff --git a/src/input.c b/src/input.c
index e320c72..5c3b345 100644
--- a/src/input.c
+++ b/src/input.c
@@ -151,10 +151,10 @@ static bool start_of_input_line;
 /* Flag for next_char () to recognize change in input block.  */
 static bool input_change;
 
-#define CHAR_EOF       256     /* Character return on EOF.  */
-#define CHAR_MACRO     257     /* Character return for MACRO token.  */
-#define CHAR_QUOTE     258     /* Character return for quoted string.  */
-#define CHAR_ARGV      259     /* Character return for $@ reference.  */
+#define CHAR_EOF       (UCHAR_MAX + 1) /* Return on EOF.  */
+#define CHAR_MACRO     (UCHAR_MAX + 2) /* Return for MACRO token.  */
+#define CHAR_QUOTE     (UCHAR_MAX + 3) /* Return for quoted string.  */
+#define CHAR_ARGV      (UCHAR_MAX + 4) /* Return for $@ reference.  */
 
 /* Quote chars.  */
 string_pair curr_quote;
@@ -1303,7 +1303,7 @@ set_word_regexp (const char *caller, const char *regexp)
      The fastmap can be reused between compilations, and will be freed
      by the final regfree.  */
   if (!word_regexp.fastmap)
-    word_regexp.fastmap = xcharalloc (256);
+    word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1);
   msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
   assert (!msg);
   re_set_registers (&word_regexp, &regs, regs.num_regs, regs.start, regs.end);
diff --git a/src/m4.h b/src/m4.h
index 7df29b8..e1da7a7 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -161,32 +161,32 @@ extern FILE *debug;
 /* The value of debug_level is a bitmask of the following.  */
 
 /* a: show arglist in trace output */
-#define DEBUG_TRACE_ARGS 1
+#define DEBUG_TRACE_ARGS 0x001
 /* e: show expansion in trace output */
-#define DEBUG_TRACE_EXPANSION 2
+#define DEBUG_TRACE_EXPANSION 0x002
 /* q: quote args and expansion in trace output */
-#define DEBUG_TRACE_QUOTE 4
+#define DEBUG_TRACE_QUOTE 0x004
 /* t: trace all macros -- overrides trace{on,off} */
-#define DEBUG_TRACE_ALL 8
+#define DEBUG_TRACE_ALL 0x008
 /* l: add line numbers to trace output */
-#define DEBUG_TRACE_LINE 16
+#define DEBUG_TRACE_LINE 0x010
 /* f: add file name to trace output */
-#define DEBUG_TRACE_FILE 32
+#define DEBUG_TRACE_FILE 0x020
 /* p: trace path search of include files */
-#define DEBUG_TRACE_PATH 64
+#define DEBUG_TRACE_PATH 0x040
 /* c: show macro call before args collection */
-#define DEBUG_TRACE_CALL 128
+#define DEBUG_TRACE_CALL 0x080
 /* i: trace changes of input files */
-#define DEBUG_TRACE_INPUT 256
+#define DEBUG_TRACE_INPUT 0x100
 /* x: add call id to trace output */
-#define DEBUG_TRACE_CALLID 512
+#define DEBUG_TRACE_CALLID 0x200
 
 /* V: very verbose --  print everything */
-#define DEBUG_TRACE_VERBOSE 1023
+#define DEBUG_TRACE_VERBOSE 0x377
 /* default flags -- equiv: aeq */
-#define DEBUG_TRACE_DEFAULT 7
+#define DEBUG_TRACE_DEFAULT 0x007
 
-#define DEBUG_PRINT1(Fmt, Arg1)                                 \
+#define DEBUG_PRINT1(Fmt, Arg1)                                        \
   do                                                           \
     {                                                          \
       if (debug != NULL)                                       \
-- 
1.5.4

>From 2e81b080376fcc4f3362a0c4810de084371c87d0 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Mon, 18 Feb 2008 06:24:08 -0700
Subject: [PATCH] Avoid some magic numbers.

* m4/m4private.h (CHAR_EOF, CHAR_BUILTIN, CHAR_QUOTE, CHAR_ARGV)
(CHAR_RETRY): Define in terms of UCHAR_MAX.
* m4/syntax.c (m4_syntax_create, set_syntax_set)
(reset_syntax_set, check_is_single_quotes)
(check_is_single_comments, check_is_macro_escaped)
(m4_set_quotes, m4_set_comment): Likewise.
* modules/gnu.c (regexp_compile): Likewise.
* modules/m4.c (translit): Likewise.
* src/freeze.c (produce_syntax_dump): Likewise.
Reported by Ralf Wildenhues.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog      |   14 ++++++++++++++
 m4/m4private.h |   12 ++++++------
 m4/syntax.c    |   24 ++++++++++++------------
 modules/gnu.c  |    2 +-
 modules/m4.c   |    4 ++--
 src/freeze.c   |    4 ++--
 6 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6a89c56..ba56df5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2008-02-18  Eric Blake  <address@hidden>
+
+       Avoid some magic numbers.
+       * m4/m4private.h (CHAR_EOF, CHAR_BUILTIN, CHAR_QUOTE, CHAR_ARGV)
+       (CHAR_RETRY): Define in terms of UCHAR_MAX.
+       * m4/syntax.c (m4_syntax_create, set_syntax_set)
+       (reset_syntax_set, check_is_single_quotes)
+       (check_is_single_comments, check_is_macro_escaped)
+       (m4_set_quotes, m4_set_comment): Likewise.
+       * modules/gnu.c (regexp_compile): Likewise.
+       * modules/m4.c (translit): Likewise.
+       * src/freeze.c (produce_syntax_dump): Likewise.
+       Reported by Ralf Wildenhues.
+
 2008-02-16  Eric Blake  <address@hidden>
 
        Add regression test for multi-character quote recursion.
diff --git a/m4/m4private.h b/m4/m4private.h
index a2b78b8..2201703 100644
--- a/m4/m4private.h
+++ b/m4/m4private.h
@@ -395,11 +395,11 @@ extern void m4__symtab_remove_module_references 
(m4_symbol_table*,
 
 /* CHAR_RETRY must be last, because we size the syntax table to hold
    all other characters and sentinels. */
-#define CHAR_EOF       256     /* Character return on EOF.  */
-#define CHAR_BUILTIN   257     /* Character return for BUILTIN token.  */
-#define CHAR_QUOTE     258     /* Character return for quoted string.  */
-#define CHAR_ARGV      259     /* Character return for $@ reference.  */
-#define CHAR_RETRY     260     /* Character return for end of input block.  */
+#define CHAR_EOF       (UCHAR_MAX + 1) /* Return on EOF.  */
+#define CHAR_BUILTIN   (UCHAR_MAX + 2) /* Return for BUILTIN token.  */
+#define CHAR_QUOTE     (UCHAR_MAX + 3) /* Return for quoted string.  */
+#define CHAR_ARGV      (UCHAR_MAX + 4) /* Return for $@ reference.  */
+#define CHAR_RETRY     (UCHAR_MAX + 5) /* Return for end of input block.  */
 
 #define DEF_LQUOTE     "`"     /* Default left quote delimiter.  */
 #define DEF_RQUOTE     "\'"    /* Default right quote delimiter.  */
diff --git a/m4/syntax.c b/m4/syntax.c
index 8a7b0d1..115884e 100644
--- a/m4/syntax.c
+++ b/m4/syntax.c
@@ -116,7 +116,7 @@ m4_syntax_create (void)
   int ch;
 
   /* Set up default table.  This table never changes during operation.  */
-  for (ch = 256; --ch >= 0;)
+  for (ch = UCHAR_MAX + 1; --ch >= 0; )
     switch (ch)
       {
       case '(':
@@ -309,7 +309,7 @@ set_syntax_set (m4_syntax_table *syntax, const char *chars, 
int code)
   /* Explicit set of characters to install with this category; all
      other characters that used to have the category get reset to
      OTHER.  */
-  for (ch = 256; --ch >= 0; )
+  for (ch = UCHAR_MAX + 1; --ch >= 0; )
     {
       if (code == M4_SYNTAX_RQUOTE || code == M4_SYNTAX_ECOMM)
        remove_syntax_attribute (syntax, ch, code);
@@ -329,7 +329,7 @@ static void
 reset_syntax_set (m4_syntax_table *syntax, int code)
 {
   int ch;
-  for (ch = 256; --ch >= 0; )
+  for (ch = UCHAR_MAX + 1; --ch >= 0; )
     {
       /* Reset the category back to its default state.  All other
         characters that used to have this category get reset to
@@ -443,7 +443,7 @@ check_is_single_quotes (m4_syntax_table *syntax)
      on the syntax table, then update lquote/rquote accordingly.
      Otherwise, keep lquote/rquote, but we no longer have single
      quotes.  */
-  for (ch = 256; --ch >= 0; )
+  for (ch = UCHAR_MAX + 1; --ch >= 0; )
     {
       if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
        {
@@ -496,7 +496,7 @@ check_is_single_comments (m4_syntax_table *syntax)
      on the syntax table, then update bcomm/ecomm accordingly.
      Otherwise, keep bcomm/ecomm, but we no longer have single
      comments.  */
-  for (ch = 256; --ch >= 0; )
+  for (ch = UCHAR_MAX + 1; --ch >= 0; )
     {
       if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
        {
@@ -535,7 +535,7 @@ check_is_macro_escaped (m4_syntax_table *syntax)
   int ch;
 
   syntax->is_macro_escaped = false;
-  for (ch = 256; --ch >= 0; )
+  for (ch = UCHAR_MAX + 1; --ch >= 0; )
     if (m4_has_syntax (syntax, ch, M4_SYNTAX_ESCAPE))
       {
        syntax->is_macro_escaped = true;
@@ -593,7 +593,7 @@ m4_set_quotes (m4_syntax_table *syntax, const char *lq, 
const char *rq)
                          (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE
                           | M4_SYNTAX_ALPHA | M4_SYNTAX_NUM)));
 
-  for (ch = 256; --ch >= 0;)
+  for (ch = UCHAR_MAX + 1; --ch >= 0; )
     {
       if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
        add_syntax_attribute (syntax, ch,
@@ -656,7 +656,7 @@ m4_set_comment (m4_syntax_table *syntax, const char *bc, 
const char *ec)
                           | M4_SYNTAX_ALPHA | M4_SYNTAX_NUM
                           | M4_SYNTAX_LQUOTE)));
 
-  for (ch = 256; --ch >= 0;)
+  for (ch = UCHAR_MAX + 1; --ch >= 0; )
     {
       if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
        add_syntax_attribute (syntax, ch,
diff --git a/modules/gnu.c b/modules/gnu.c
index f3b7fb7..97b263b 100644
--- a/modules/gnu.c
+++ b/modules/gnu.c
@@ -176,7 +176,7 @@ regexp_compile (m4 *context, const char *caller, const char 
*regexp,
       return NULL;
     }
   /* Use a fastmap for speed; it is freed by regfree.  */
-  pat->fastmap = xcharalloc (256);
+  pat->fastmap = xcharalloc (UCHAR_MAX + 1);
 
   /* Now, find a victim slot.  Decrease the count of all entries, then
      prime the count of the victim slot at REGEX_CACHE_SIZE.  This
diff --git a/modules/m4.c b/modules/m4.c
index ccc847c..afb9d0c 100644
--- a/modules/m4.c
+++ b/modules/m4.c
@@ -998,8 +998,8 @@ M4BUILTIN_HANDLER (translit)
   const char *data;
   const char *from;
   const char *to;
-  char map[256] = {0};
-  char found[256] = {0};
+  char map[UCHAR_MAX + 1] = {0};
+  char found[UCHAR_MAX + 1] = {0};
   unsigned char ch;
 
   if (argc <= 2)
diff --git a/src/freeze.c b/src/freeze.c
index 941b761..7976bec 100644
--- a/src/freeze.c
+++ b/src/freeze.c
@@ -103,12 +103,12 @@ produce_resyntax_dump (m4 *context, FILE *file)
 static void
 produce_syntax_dump (FILE *file, m4_syntax_table *syntax, char ch)
 {
-  char buf[256];
+  char buf[UCHAR_MAX + 1];
   int code = m4_syntax_code (ch);
   int count = 0;
   int i;
 
-  for (i = 0; i < 256; ++i)
+  for (i = 0; i < UCHAR_MAX + 1; ++i)
     if (m4_has_syntax (syntax, i, code) && code != syntax->orig[i])
       buf[count++] = i;
 
-- 
1.5.4


reply via email to

[Prev in Thread] Current Thread [Next in Thread]