[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Patch to gperf 2.7.2 for case-independent keyword matching
From: |
Bruce Lilly |
Subject: |
Patch to gperf 2.7.2 for case-independent keyword matching |
Date: |
Fri, 15 Feb 2002 01:04:40 -0500 |
In several important situations, keywords must be matched
independent of case (email header field names, domain names,
etc.).
The attached patch to gperf 2.7.2 provides the option to
generate output which does case-independent matching and
documents that option.
Best regards,
Bruce Lilly
*** doc/gperf.1.orig Fri Feb 15 00:33:37 2002
--- doc/gperf.1 Fri Feb 15 00:37:29 2002
***************
*** 54,62 ****
\fB\-7\fR, \fB\-\-seven\-bit\fR
Assume 7-bit characters.
.TP
\fB\-c\fR, \fB\-\-compare\-strncmp\fR
Generate comparison code using strncmp rather than
! strcmp.
.TP
\fB\-C\fR, \fB\-\-readonly\-tables\fR
Make the contents of generated lookup tables
--- 54,65 ----
\fB\-7\fR, \fB\-\-seven\-bit\fR
Assume 7-bit characters.
.TP
+ \fB\-A\fR, \fB\-\-ignore\-case\fR
+ Match keys independent of case.
+ .TP
\fB\-c\fR, \fB\-\-compare\-strncmp\fR
Generate comparison code using strncmp rather than
! strcmp (strncasecmp if -A is specified).
.TP
\fB\-C\fR, \fB\-\-readonly\-tables\fR
Make the contents of generated lookup tables
*** doc/gperf.texi.orig Fri Feb 15 00:33:59 2002
--- doc/gperf.texi Fri Feb 15 00:40:19 2002
***************
*** 671,680 ****
default in versions of @code{gperf} earlier than 2.7; now the default is
to assume 8-bit characters.
@item -c
@itemx --compare-strncmp
Generates C code that uses the @code{strncmp} function to perform
! string comparisons. The default action is to use @code{strcmp}.
@item -C
@itemx --readonly-tables
--- 671,685 ----
default in versions of @code{gperf} earlier than 2.7; now the default is
to assume 8-bit characters.
+ @item -A
+ @itemx --ignore-case
+ Matches keywords in a case-independent manner.
+
@item -c
@itemx --compare-strncmp
Generates C code that uses the @code{strncmp} function to perform
! string comparisons (@code{strncasecmp} if -A is specified). The default
! action is to use @code{strcmp} (@code{strcasecmp} if -A is specified).
@item -C
@itemx --readonly-tables
*** src/gen-perf.cc.orig Thu Feb 14 22:19:30 2002
--- src/gen-perf.cc Thu Feb 14 23:06:54 2002
***************
*** 19,24 ****
--- 19,25 ----
along with GNU GPERF; see the file COPYING. If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+ #include <ctype.h>
#include <stdio.h>
#include <stdlib.h> /* declares rand(), srand() */
#include <time.h> /* declares time() */
***************
*** 61,67 ****
srand ((long) time (0));
for (int i = 0; i < ALPHA_SIZE; i++)
! asso_values[i] = (rand () & asso_value_max - 1);
}
else
{
--- 62,71 ----
srand ((long) time (0));
for (int i = 0; i < ALPHA_SIZE; i++)
! if ((option[NOCASE] != 0) && (islower(i) != 0))
! asso_values[i] = asso_values[toupper(i)];
! else
! asso_values[i] = (rand () & asso_value_max - 1);
}
else
{
***************
*** 202,207 ****
--- 206,216 ----
fewest_collisions = collisions;
if (option[DEBUG])
fprintf (stderr, "- resolved after %d iterations",
total_iterations - i);
+ if (option[NOCASE])
+ if (isupper((unsigned char)c))
+ asso_values[tolower(c)] = asso_values[(unsigned char)c];
+ else if (islower((unsigned char)c))
+ asso_values[toupper(c)] = asso_values[(unsigned char)c];
return 0;
}
}
*** src/key-list.cc.orig Thu Feb 14 22:10:58 2002
--- src/key-list.cc Fri Feb 15 00:00:09 2002
***************
*** 972,977 ****
--- 972,1023 ----
printf ("[len] == '\\0'");
}
+ /* This class outputs a comparison using strcasecmp. */
+
+ struct Output_Compare_Strcasecmp : public Output_Compare
+ {
+ virtual void output_comparison (const Output_Expr& expr1,
+ const Output_Expr& expr2) const;
+ Output_Compare_Strcasecmp () {}
+ virtual ~Output_Compare_Strcasecmp () {}
+ };
+
+ void Output_Compare_Strcasecmp::output_comparison (const Output_Expr& expr1,
+ const Output_Expr& expr2) const
+ {
+ T (Trace t ("Output_Compare_Strcasecmp::output_comparison");)
+ printf ("!strcasecmp (");
+ expr1.output_expr ();
+ printf (", ");
+ expr2.output_expr ();
+ printf (")");
+ }
+
+ /* This class outputs a comparison using strncasecmp.
+ Note that the length of expr1 will be available through the local variable
+ `len'. */
+
+ struct Output_Compare_Strncasecmp : public Output_Compare
+ {
+ virtual void output_comparison (const Output_Expr& expr1,
+ const Output_Expr& expr2) const;
+ Output_Compare_Strncasecmp () {}
+ virtual ~Output_Compare_Strncasecmp () {}
+ };
+
+ void Output_Compare_Strncasecmp::output_comparison (const Output_Expr& expr1,
+ const Output_Expr& expr2)
const
+ {
+ T (Trace t ("Output_Compare_Strncasecmp::output_comparison");)
+ printf ("!strncasecmp (");
+ expr1.output_expr ();
+ printf (", ");
+ expr2.output_expr ();
+ printf (", len) && ");
+ expr2.output_expr ();
+ printf ("[len] == '\\0'");
+ }
+
/* This class outputs a comparison using memcmp.
Note that the length of expr1 (available through the local variable `len')
must be verified to be equal to the length of expr2 prior to this
***************
*** 1009,1015 ****
Key_List::output_hash_function (void)
{
T (Trace t ("Key_List::output_hash_function");)
! const int max_column = 10;
int field_width;
/* Calculate maximum number of digits required for MAX_HASH_VALUE. */
--- 1055,1061 ----
Key_List::output_hash_function (void)
{
T (Trace t ("Key_List::output_hash_function");)
! const int max_column = 8;
int field_width;
/* Calculate maximum number of digits required for MAX_HASH_VALUE. */
***************
*** 1063,1070 ****
{
if (count > 0)
printf (",");
! if (!(count % max_column))
printf ("\n ");
printf ("%*d", field_width,
occurrences[count] ? asso_values[count] : max_hash_value + 1);
}
--- 1109,1119 ----
{
if (count > 0)
printf (",");
! if (!(count % max_column)) {
! if (isalnum(count) && isalnum(count+1-max_column))
! printf ("\t/* %c - %c */", count+1-max_column, count);
printf ("\n ");
+ }
printf ("%*d", field_width,
occurrences[count] ? asso_values[count] : max_hash_value + 1);
}
***************
*** 2014,2021 ****
output_lookup_function_body (Output_Compare_Memcmp ());
else
{
! if (option[COMP])
! output_lookup_function_body (Output_Compare_Strncmp ());
else
output_lookup_function_body (Output_Compare_Strcmp ());
}
--- 2063,2075 ----
output_lookup_function_body (Output_Compare_Memcmp ());
else
{
! if (option[COMP]) {
! if (option[NOCASE])
! output_lookup_function_body (Output_Compare_Strncasecmp ());
! else
! output_lookup_function_body (Output_Compare_Strncmp ());
! } else if (option[NOCASE])
! output_lookup_function_body (Output_Compare_Strcasecmp ());
else
output_lookup_function_body (Output_Compare_Strcmp ());
}
*** src/list-node.cc.orig Thu Feb 14 23:09:40 2002
--- src/list-node.cc Fri Feb 15 00:27:05 2002
***************
*** 20,25 ****
--- 20,26 ----
#include "list-node.h"
+ #include <ctype.h>
#include <stdio.h>
#include <stdlib.h> /* declares exit() */
#include "options.h"
***************
*** 66,73 ****
int i;
if (option[ALLCHARS]) /* Use all the character positions in the
KEY. */
! for (i = len; i > 0; k++, ptr++, i--)
++occurrences[(unsigned char)(*ptr = *k)];
else /* Only use those character positions
specified by the user. */
{
/* Iterate through the list of key_positions, initializing occurrences
table
--- 67,80 ----
int i;
if (option[ALLCHARS]) /* Use all the character positions in the
KEY. */
! for (i = len; i > 0; k++, ptr++, i--) {
++occurrences[(unsigned char)(*ptr = *k)];
+ if (option[NOCASE])
+ if (isupper(*k))
+ ++occurrences[tolower(*k)];
+ else if (islower(*k))
+ ++occurrences[toupper(*k)];
+ }
else /* Only use those character positions
specified by the user. */
{
/* Iterate through the list of key_positions, initializing occurrences
table
***************
*** 81,87 ****
*ptr = key[i - 1];
else /* Out of range of KEY length, so we'll just
skip it. */
continue;
! ++occurrences[(unsigned char)(*ptr++)];
}
/* Didn't get any hits and user doesn't want to consider the
--- 88,100 ----
*ptr = key[i - 1];
else /* Out of range of KEY length, so we'll just
skip it. */
continue;
! ++occurrences[(unsigned char)(*ptr)];
! if (option[NOCASE])
! if (isupper(*ptr))
! ++occurrences[tolower(*ptr)];
! else if (islower(*ptr))
! ++occurrences[toupper(*ptr)];
! ++ptr;
}
/* Didn't get any hits and user doesn't want to consider the
*** src/options.cc.orig Thu Feb 14 21:02:42 2002
--- src/options.cc Thu Feb 14 21:19:56 2002
***************
*** 83,89 ****
Options::short_usage (FILE * strm)
{
T (Trace t ("Options::short_usage");)
! fprintf (strm, "Usage: %s
[-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function
name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
"Try `%s --help' for more information.\n",
program_name, program_name);
}
--- 83,89 ----
Options::short_usage (FILE * strm)
{
T (Trace t ("Options::short_usage");)
! fprintf (strm, "Usage: %s
[-aAcCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function
name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
"Try `%s --help' for more information.\n",
program_name, program_name);
}
***************
*** 133,139 ****
" `Perfect_Hash'.\n"
" -7, --seven-bit Assume 7-bit characters.\n"
" -c, --compare-strncmp Generate comparison code using strncmp
rather than\n"
! " strcmp.\n"
" -C, --readonly-tables Make the contents of generated lookup
tables\n"
" constant, i.e., readonly.\n"
" -E, --enum Define constant values using an enum
local to the\n"
--- 133,140 ----
" `Perfect_Hash'.\n"
" -7, --seven-bit Assume 7-bit characters.\n"
" -c, --compare-strncmp Generate comparison code using strncmp
rather than\n"
! " strcmp, or strncasecmp rather than
strcasecmp if -A\n"
! " is specified.\n"
" -C, --readonly-tables Make the contents of generated lookup
tables\n"
" constant, i.e., readonly.\n"
" -E, --enum Define constant values using an enum
local to the\n"
***************
*** 160,165 ****
--- 161,170 ----
" Prevents the transfer of the type
declaration to the\n"
" output file. Use this option if the type
is already\n"
" defined elsewhere.\n"
+ " -A, --ignore-case Match keywords in a case-independent
manner. String\n"
+ " comparisons are made with strcasecmp
(strncasecmp if\n"
+ " -c is specified) and the associate
values are symmetric\n"
+ " with respect to lower-ase and upper-case
ASCII indices.\n"
"\n"
"Algorithm employed by gperf:\n"
" -k, --key-positions=KEYS\n"
***************
*** 355,360 ****
--- 360,366 ----
"\nENUM is........: %s"
"\nINCLUDE is.....: %s"
"\nSEVENBIT is....: %s"
+ "\nNOCASE is......: %s"
"\niterations = %d"
"\nlookup function name = %s"
"\nhash function name = %s"
***************
*** 387,392 ****
--- 393,399 ----
option_word & ENUM ? "enabled" : "disabled",
option_word & INCLUDE ? "enabled" : "disabled",
option_word & SEVENBIT ? "enabled" : "disabled",
+ option_word & NOCASE ? "enabled" : "disabled",
iterations,
function_name, hash_name, wordlist_name, key_name,
initializer_suffix, jump, size - 1, initial_asso_value,
***************
*** 421,426 ****
--- 428,434 ----
{ "lookup-fn-name", required_argument, 0, 'N' },
{ "class-name", required_argument, 0, 'Z' },
{ "seven-bit", no_argument, 0, '7' },
+ { "ignore-case", no_argument, 0, 'A' },
{ "compare-strncmp", no_argument, 0, 'c' },
{ "readonly-tables", no_argument, 0, 'C' },
{ "enum", no_argument, 0, 'E' },
***************
*** 457,463 ****
while ((option_char =
getopt_long (argument_count, argument_vector,
! "adcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7",
long_options, (int *)0))
!= -1)
{
--- 465,471 ----
while ((option_char =
getopt_long (argument_count, argument_vector,
! "aAdcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7",
long_options, (int *)0))
!= -1)
{
***************
*** 465,470 ****
--- 473,483 ----
{
case 'a': /* Generated code uses the ANSI prototype
format. */
break; /* This is now the default. */
+ case 'A': /* Generate strcasecmp rather than strcmp,
make associate values case-symmetric. */
+ {
+ option_word |= NOCASE;
+ break;
+ }
case 'c': /* Generate strncmp rather than strcmp. */
{
option_word |= COMP;
*** src/options.h.orig Thu Feb 14 21:09:03 2002
--- src/options.h Thu Feb 14 21:11:09 2002
***************
*** 59,65 ****
CPLUSPLUS = 01000000, /* Generate C++ code: prototypes, const,
class, inline, enum. */
ENUM = 02000000, /* Use enum for constants. */
INCLUDE = 04000000, /* Generate #include statements. */
! SEVENBIT = 010000000 /* Assume 7-bit, not 8-bit, characters. */
};
/* Define some useful constants (these don't really belong here, but I'm
--- 59,66 ----
CPLUSPLUS = 01000000, /* Generate C++ code: prototypes, const,
class, inline, enum. */
ENUM = 02000000, /* Use enum for constants. */
INCLUDE = 04000000, /* Generate #include statements. */
! SEVENBIT = 010000000, /* Assume 7-bit, not 8-bit, characters. */
! NOCASE = 020000000 /* Case-independent comparisons */
};
/* Define some useful constants (these don't really belong here, but I'm
- Patch to gperf 2.7.2 for case-independent keyword matching,
Bruce Lilly <=