texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[7698] implement lazy search match lists


From: gavinsmith0123
Subject: [7698] implement lazy search match lists
Date: Tue, 21 Mar 2017 16:09:46 -0400 (EDT)

Revision: 7698
          http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=7698
Author:   gavin
Date:     2017-03-21 16:09:44 -0400 (Tue, 21 Mar 2017)
Log Message:
-----------
implement lazy search match lists

Modified Paths:
--------------
    trunk/ChangeLog
    trunk/info/search.c
    trunk/info/window.h

Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog     2017-03-21 14:15:24 UTC (rev 7697)
+++ trunk/ChangeLog     2017-03-21 20:09:44 UTC (rev 7698)
@@ -1,5 +1,18 @@
 2017-03-21  Gavin Smith  <address@hidden>
 
+       Implement lazy search match lists.
+
+       * info/window.h (MATCH_STATE): Add 'regex' field of type regex_t.
+       Add fields 'match_alloc', 'finished', 'buffer' and 'buflen'.
+       * info/search.c (free_matches): Update for new fields.
+       (regexp_search): Do not call regfree on regex_t variable; copy
+       it to MATCH_STATE object instead.  Set new fields on MATCH_STATE.
+       (extend_matches): New function, split out from regexp_search.
+       (regexp_search, match_in_match_list, match_by_index)
+       (at_end_of_matches): Call extend_matches as necessary.
+
+2017-03-21  Gavin Smith  <address@hidden>
+
        * info/search.c (copy_binding): Remove this function, which is
        not used much.  Remove comment before this function.
        * info/nodes.c (get_nodes_of_tags_table): Use struct assignment 

Modified: trunk/info/search.c
===================================================================
--- trunk/info/search.c 2017-03-21 14:15:24 UTC (rev 7697)
+++ trunk/info/search.c 2017-03-21 20:09:44 UTC (rev 7698)
@@ -26,16 +26,6 @@
 #include "info-utils.h"
 #include "search.h"
 
-/* The search functions take two arguments:
-
-     1) a string to search for, and
-
-     2) a pointer to a SEARCH_BINDING which contains the buffer, start,
-        and end of the search.
-
-   They return a long, which is the offset from the start of the buffer
-   at which the match was found.  An offset of -1 indicates failure. */
-
 
 /* **************************************************************** */
 /*                                                                  */
@@ -118,50 +108,40 @@
 }
 
 
-/* Search BUFFER for REGEXP.  Pass back the list of matches in MATCHES. */
-enum search_result
-regexp_search (char *regexp, int is_literal, int is_insensitive,
-               char *buffer, size_t buflen,
-               MATCH_STATE *match_state)
+static void
+extend_matches (MATCH_STATE *state)
 {
-  regmatch_t *matches = 0; /* List of found matches. */
-  size_t match_alloc = 0;
-  size_t match_count;
+  regmatch_t *matches = state->matches;
+  size_t match_alloc = state->match_alloc;
+  size_t match_count = state->match_count;
+  char *buffer = state->buffer;
+  size_t buflen = state->buflen;
 
-  regex_t preg; /* Compiled pattern buffer for regexp. */
-  int result;
-  char *regexp_str;
+  regoff_t offset = 0;
   char saved_char;
-  regoff_t offset = 0;
+  size_t initial_match_count = match_count;
 
-  if (!is_literal)
-    regexp_str = regexp_expand_newlines_and_tabs (regexp);
-  else
-    regexp_str = regexp_escape_string (regexp);
+  if (state->finished)
+    return;
 
-  result = regcomp (&preg, regexp_str,
-                    REG_EXTENDED | REG_NEWLINE
-                    | (is_insensitive ? REG_ICASE : 0));
-  free (regexp_str);
+  saved_char = buffer[buflen];
+  buffer[buflen] = '\0';
 
-  if (result != 0)
+  if (match_count > 0)
     {
-      int size = regerror (result, &preg, NULL, 0);
-      char *buf = xmalloc (size);
-      regerror (result, &preg, buf, size);
-      info_error (_("regexp error: %s"), buf);
-      return search_invalid;
+      offset = matches[match_count - 1].rm_eo;
+
+      /* move past zero-length match */
+      if (offset == matches[match_count - 1].rm_so)
+        offset++;
     }
 
-  saved_char = buffer[buflen];
-  buffer[buflen] = '\0';
-
-  for (match_count = 0; offset < buflen; )
+  while (offset < buflen && match_count < initial_match_count + 5)
     {
       int result = 0;
       regmatch_t m;
 
-      result = regexec (&preg, &buffer[offset], 1, &m, REG_NOTBOL);
+      result = regexec (&state->regex, &buffer[offset], 1, &m, REG_NOTBOL);
       if (result == 0)
         {
           if (match_count == match_alloc)
@@ -182,15 +162,59 @@
             offset++; /* Avoid finding match again for a pattern of "$". */
         }
       else
-        break;
+        {
+          state->finished = 1;
+          break;
+        }
     }
   buffer[buflen] = saved_char;
-  regfree (&preg);
 
-  match_state->matches = matches;
-  match_state->match_count = match_count;
+  state->matches = matches;
+  state->match_alloc = match_alloc;
+  state->match_count = match_count;
+}
 
-  if (match_count == 0)
+/* Search BUFFER for REGEXP.  Pass back the list of matches
+   in MATCH_STATE. */
+enum search_result
+regexp_search (char *regexp, int is_literal, int is_insensitive,
+               char *buffer, size_t buflen,
+               MATCH_STATE *match_state)
+{
+  regex_t preg; /* Compiled pattern buffer for regexp. */
+  int result;
+  char *regexp_str;
+
+  if (!is_literal)
+    regexp_str = regexp_expand_newlines_and_tabs (regexp);
+  else
+    regexp_str = regexp_escape_string (regexp);
+
+  result = regcomp (&preg, regexp_str,
+                    REG_EXTENDED | REG_NEWLINE
+                    | (is_insensitive ? REG_ICASE : 0));
+  free (regexp_str);
+
+  if (result != 0)
+    {
+      int size = regerror (result, &preg, NULL, 0);
+      char *buf = xmalloc (size);
+      regerror (result, &preg, buf, size);
+      info_error (_("regexp error: %s"), buf);
+      return search_invalid;
+    }
+
+  match_state->matches = 0;
+  match_state->match_count = 0;
+  match_state->match_alloc = 0;
+  match_state->finished = 0;
+  match_state->regex = preg;
+  match_state->buffer = buffer;
+  match_state->buflen = buflen;
+
+  extend_matches (match_state);
+
+  if (match_state->match_count == 0)
     return search_not_found;
   else
     return search_success;
@@ -417,6 +441,14 @@
     {
       /* searching backward */
       int i;
+
+      /* get all matches */
+      while (!match_state->finished)
+        extend_matches (match_state);
+
+      matches = match_state->matches;
+      match_count = match_state->match_count;
+
       for (i = match_count - 1; i >= 0; i--)
         {
           if (matches[i].rm_so < start)
@@ -433,8 +465,19 @@
     {
       /* searching forward */
       int i;
-      for (i = 0; i < match_count; i++)
+      for (i = 0; i < match_count || !match_state->finished; i++)
         {
+          /* get more matches as we need them */
+          if (i == match_count)
+            {
+              extend_matches (match_state);
+              matches = match_state->matches;
+              match_count = match_state->match_count;
+
+              if (i == match_count)
+                break;
+            }
+
           if (matches[i].rm_so >= end)
             break; /* No matches found in search area. */
 
@@ -450,18 +493,25 @@
   return search_not_found;
 }
 
+/* Return match INDEX in STATE.  INDEX must be a valid index. */
 regmatch_t
 match_by_index (MATCH_STATE *state, int index)
 {
+  while (state->match_alloc <= index)
+    extend_matches (state);
   return state->matches[index];
 }
 
+/* Free and clear all data in STATE. */
 void
 free_matches (MATCH_STATE *state)
 {
   free (state->matches);
   state->matches = 0;
-  state->match_count = 0;
+  state->match_count = state->match_alloc = state->finished = 0;
+  state->buffer = 0; /* do not free as it is kept elsewhere */
+  state->buflen = 0;
+  regfree (&state->regex);
 }
 
 int
@@ -500,7 +550,13 @@
 int
 at_end_of_matches (MATCH_STATE *state, int index)
 {
-  return (state->match_count == index) ? 1 : 0;
+  if (!state->finished)
+    extend_matches (state);
+
+  if (state->finished)
+    return (state->match_count == index) ? 1 : 0;
+  else
+    return 0;
 }
 
 

Modified: trunk/info/window.h
===================================================================
--- trunk/info/window.h 2017-03-21 14:15:24 UTC (rev 7697)
+++ trunk/info/window.h 2017-03-21 20:09:44 UTC (rev 7698)
@@ -65,8 +65,13 @@
 
 typedef struct match_struct
 {
-  regmatch_t *matches;
+  regmatch_t *matches; /* Array of matches */
   size_t match_count;
+  size_t match_alloc;
+  int finished;        /* Non-zero if all possible matches are stored. */
+  regex_t regex;
+  char *buffer;
+  size_t buflen;
 } MATCH_STATE;
 
 /* Structure which defines a window.  Windows are doubly linked, next




reply via email to

[Prev in Thread] Current Thread [Next in Thread]