[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 56468b52b23 1/3: Speed up skip-chars-{forward|reverse} with char
From: |
Mattias Engdegård |
Subject: |
master 56468b52b23 1/3: Speed up skip-chars-{forward|reverse} with char classes |
Date: |
Tue, 9 May 2023 08:15:36 -0400 (EDT) |
branch: master
commit 56468b52b2355a00c1dff2137c54136dbb031922
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Speed up skip-chars-{forward|reverse} with char classes
* src/regex-emacs.h (re_wctype_t): Add RECC_NUM_CLASSES.
* src/syntax.c (skip_chars, in_classes): Use an array on the stack
instead of a Lisp list for storing character classes.
Don't check all classes if there is a match in one.
Remove useless handle_iso_classes argument.
---
src/regex-emacs.h | 3 +-
src/syntax.c | 87 +++++++++++++++++++++++--------------------------------
2 files changed, 38 insertions(+), 52 deletions(-)
diff --git a/src/regex-emacs.h b/src/regex-emacs.h
index 1bc973363e9..bc357633135 100644
--- a/src/regex-emacs.h
+++ b/src/regex-emacs.h
@@ -187,7 +187,8 @@ typedef enum { RECC_ERROR = 0,
RECC_DIGIT, RECC_XDIGIT,
RECC_BLANK, RECC_SPACE,
RECC_MULTIBYTE, RECC_NONASCII,
- RECC_ASCII, RECC_UNIBYTE
+ RECC_ASCII, RECC_UNIBYTE,
+ RECC_NUM_CLASSES = RECC_UNIBYTE
} re_wctype_t;
extern bool re_iswctype (int ch, re_wctype_t cc);
diff --git a/src/syntax.c b/src/syntax.c
index e9e04e2d638..839ab36bb2f 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -178,14 +178,14 @@ static ptrdiff_t find_start_begv;
static modiff_count find_start_modiff;
-static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
+static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object);
static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
static void scan_sexps_forward (struct lisp_parse_state *,
ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
bool, int);
static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
-static bool in_classes (int, Lisp_Object);
+static bool in_classes (int c, int num_classes, const unsigned char *classes);
static void parse_sexp_propertize (ptrdiff_t charpos);
/* This setter is used only in this file, so it can be private. */
@@ -1607,7 +1607,7 @@ Char classes, e.g. `[:alpha:]', are supported.
Returns the distance traveled, either zero or positive. */)
(Lisp_Object string, Lisp_Object lim)
{
- return skip_chars (1, string, lim, 1);
+ return skip_chars (1, string, lim);
}
DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1,
2, 0,
@@ -1616,7 +1616,7 @@ See `skip-chars-forward' for details.
Returns the distance traveled, either zero or negative. */)
(Lisp_Object string, Lisp_Object lim)
{
- return skip_chars (0, string, lim, 1);
+ return skip_chars (0, string, lim);
}
DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1,
2, 0,
@@ -1643,8 +1643,7 @@ of this is the distance traveled. */)
}
static Lisp_Object
-skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
- bool handle_iso_classes)
+skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim)
{
int c;
char fastmap[0400];
@@ -1661,11 +1660,9 @@ skip_chars (bool forwardp, Lisp_Object string,
Lisp_Object lim,
ptrdiff_t size_byte;
const unsigned char *str;
int len;
- Lisp_Object iso_classes;
USE_SAFE_ALLOCA;
CHECK_STRING (string);
- iso_classes = Qnil;
if (NILP (lim))
XSETINT (lim, forwardp ? ZV : BEGV);
@@ -1700,6 +1697,8 @@ skip_chars (bool forwardp, Lisp_Object string,
Lisp_Object lim,
If STRING contains non-ASCII characters, setup char_ranges for
them and use fastmap only for their leading codes. */
+ int nclasses = 0;
+ unsigned char classes[RECC_NUM_CLASSES];
if (! string_multibyte)
{
bool string_has_eight_bit = 0;
@@ -1707,18 +1706,16 @@ skip_chars (bool forwardp, Lisp_Object string,
Lisp_Object lim,
/* At first setup fastmap. */
while (i_byte < size_byte)
{
- if (handle_iso_classes)
+ const unsigned char *ch = str + i_byte;
+ re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
+ if (cc == 0)
+ error ("Invalid ISO C character class");
+ if (cc != -1)
{
- const unsigned char *ch = str + i_byte;
- re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
- if (cc == 0)
- error ("Invalid ISO C character class");
- if (cc != -1)
- {
- iso_classes = Fcons (make_fixnum (cc), iso_classes);
- i_byte = ch - str;
- continue;
- }
+ if (!(nclasses && memchr (classes, cc, nclasses)))
+ classes[nclasses++] = cc;
+ i_byte = ch - str;
+ continue;
}
c = str[i_byte++];
@@ -1803,18 +1800,16 @@ skip_chars (bool forwardp, Lisp_Object string,
Lisp_Object lim,
{
int leading_code = str[i_byte];
- if (handle_iso_classes)
+ const unsigned char *ch = str + i_byte;
+ re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
+ if (cc == 0)
+ error ("Invalid ISO C character class");
+ if (cc != -1)
{
- const unsigned char *ch = str + i_byte;
- re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
- if (cc == 0)
- error ("Invalid ISO C character class");
- if (cc != -1)
- {
- iso_classes = Fcons (make_fixnum (cc), iso_classes);
- i_byte = ch - str;
- continue;
- }
+ if (!(nclasses && memchr (classes, cc, nclasses)))
+ classes[nclasses++] = cc;
+ i_byte = ch - str;
+ continue;
}
if (leading_code== '\\')
@@ -1960,7 +1955,7 @@ skip_chars (bool forwardp, Lisp_Object string,
Lisp_Object lim,
stop = endp;
}
c = string_char_and_length (p, &nbytes);
- if (! NILP (iso_classes) && in_classes (c, iso_classes))
+ if (nclasses && in_classes (c, nclasses, classes))
{
if (negate)
break;
@@ -2001,7 +1996,7 @@ skip_chars (bool forwardp, Lisp_Object string,
Lisp_Object lim,
stop = endp;
}
- if (!NILP (iso_classes) && in_classes (*p, iso_classes))
+ if (nclasses && in_classes (*p, nclasses, classes))
{
if (negate)
break;
@@ -2035,7 +2030,7 @@ skip_chars (bool forwardp, Lisp_Object string,
Lisp_Object lim,
c = STRING_CHAR (p);
- if (! NILP (iso_classes) && in_classes (c, iso_classes))
+ if (nclasses && in_classes (c, nclasses, classes))
{
if (negate)
break;
@@ -2069,7 +2064,7 @@ skip_chars (bool forwardp, Lisp_Object string,
Lisp_Object lim,
stop = endp;
}
- if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
+ if (nclasses && in_classes (p[-1], nclasses, classes))
{
if (negate)
break;
@@ -2253,26 +2248,16 @@ skip_syntaxes (bool forwardp, Lisp_Object string,
Lisp_Object lim)
}
}
-/* Return true if character C belongs to one of the ISO classes
- in the list ISO_CLASSES. Each class is represented by an
- integer which is its type according to re_wctype. */
+/* Return true if character C belongs to one of the ISO classes in the
+ array. */
static bool
-in_classes (int c, Lisp_Object iso_classes)
+in_classes (int c, int nclasses, const unsigned char *classes)
{
- bool fits_class = 0;
-
- while (CONSP (iso_classes))
- {
- Lisp_Object elt;
- elt = XCAR (iso_classes);
- iso_classes = XCDR (iso_classes);
-
- if (re_iswctype (c, XFIXNAT (elt)))
- fits_class = 1;
- }
-
- return fits_class;
+ for (int i = 0; i < nclasses; i++)
+ if (re_iswctype (c, classes[i]))
+ return true;
+ return false;
}
/* Jump over a comment, assuming we are at the beginning of one.