texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Sat, 13 Jan 2024 14:34:24 -0500 (EST)

branch: master
commit 114e10b2a1cb5ee07ae6b9d1228d6d016c9f86e6
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sat Jan 13 20:34:17 2024 +0100

    * tp/Texinfo/XS/main/targets.c (set_labels_identifiers_target)
    tp/Texinfo/XS/main/tree_types (LABEL),
    tp/Texinfo/XS/parsetexi/labels.c (register_label): if a label element
    is a duplicate, add a reference to the element used in the LABEL
    structure used in labels_lists.
    
    * tp/Texinfo/XS/convert/convert_html.c
    (set_root_commands_targets_node_files): use labels_lists instead of
    identifiers_target to set up targets to process in the document order,
    to have less sorting to do later on in sort_cmd_targets.
---
 ChangeLog                               | 13 ++++++
 tp/Texinfo/XS/convert/convert_html.c    | 74 +++++++++++++++++++++++++++------
 tp/Texinfo/XS/main/errors.c             |  2 +-
 tp/Texinfo/XS/main/errors.h             |  2 +-
 tp/Texinfo/XS/main/targets.c            |  2 +
 tp/Texinfo/XS/main/tree_types.h         |  2 +
 tp/Texinfo/XS/parsetexi/errors_parser.c |  2 +-
 tp/Texinfo/XS/parsetexi/errors_parser.h |  3 +-
 tp/Texinfo/XS/parsetexi/labels.c        |  1 +
 9 files changed, 85 insertions(+), 16 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 26a1192c84..6b8e71008e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2024-01-13  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/XS/main/targets.c (set_labels_identifiers_target)
+       tp/Texinfo/XS/main/tree_types (LABEL),
+       tp/Texinfo/XS/parsetexi/labels.c (register_label): if a label element
+       is a duplicate, add a reference to the element used in the LABEL
+       structure used in labels_lists.
+
+       * tp/Texinfo/XS/convert/convert_html.c
+       (set_root_commands_targets_node_files): use labels_lists instead of
+       identifiers_target to set up targets to process in the document order,
+       to have less sorting to do later on in sort_cmd_targets.
+
 2024-01-12  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Convert/HTML.pm (_convert_no_arg_command): use
diff --git a/tp/Texinfo/XS/convert/convert_html.c 
b/tp/Texinfo/XS/convert/convert_html.c
index 3350caec98..5dbe41f50f 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -2078,7 +2078,9 @@ set_root_commands_targets_node_files (CONVERTER *self)
 
       if (self->conf->EXTENSION.string)
         extension = self->conf->EXTENSION.string;
-      LABEL_LIST *label_targets = self->document->identifiers_target;
+      /* use labels_list and not identifiers_target to process in the
+         document order */
+      LABEL_LIST *label_targets = self->document->labels_list;
       int i;
       for (i = 0; i < label_targets->number; i++)
         {
@@ -2086,9 +2088,15 @@ set_root_commands_targets_node_files (CONVERTER *self)
           char *target;
           char *node_filename;
           char *user_node_filename;
+          const ELEMENT *label_element;
+          const ELEMENT *target_element;
           LABEL *label = &label_targets->list[i];
-          const ELEMENT *target_element = label->element;
-          const ELEMENT *label_element = get_label_element (target_element);
+
+          if (!label->identifier || label->reference)
+            continue;
+
+          target_element = label->element;
+          label_element = get_label_element (target_element);
 
           TARGET_FILENAME *target_filename =
            normalized_label_id_file (self, label->identifier, label_element);
@@ -3808,7 +3816,6 @@ html_internal_command_tree (CONVERTER *self, const 
ELEMENT *command,
           else if (command->args.number <= 0
                    || command->args.list[0]->contents.number <= 0)
             { /* no argument, nothing to do */
-              /* TODO check if possible */
               tree->status = tree_added_status_no_tree;
             }
           else
@@ -4917,12 +4924,50 @@ set_heading_commands_targets (CONVERTER *self)
     }
 }
 
-/* It may not be efficient to sort and find back with bsearch
-   if there is a small number of elements.  However, some target
-   elements should already be ordered when they are accessed in
-   their order of appearance in the document.
-   TODO check in which case it is not true and use another data
-   source if possible  */
+/* For debug/check/optimization
+   used to check to what extent the targets are already ordered.
+   Return the number of elements ordered ok with respect to the
+   previous element
+ */
+size_t
+check_targets_order (enum command_id cmd, HTML_TARGET_LIST *element_targets)
+{
+  size_t i;
+  size_t result = 0;
+  if (element_targets->number <= 1)
+    return result;
+  for (i = 1; i < element_targets->number; i++)
+    {
+      if (compare_element_target (&element_targets->list[i-1],
+                                  &element_targets->list[i]) > 0)
+        {
+          fprintf (stderr, "no %s %zu %ld %p %s %zu %ld %p %s\n",
+           builtin_command_name (cmd), i-1,
+           (uintptr_t)element_targets->list[i-1].element,
+           element_targets->list[i-1].element, 
element_targets->list[i-1].target,
+           i, (uintptr_t)element_targets->list[i].element,
+           element_targets->list[i].element, element_targets->list[i].target);
+        }
+      else
+        result++;
+    }
+  return result;
+}
+
+/* It may not be efficient to sort and find back with bsearch if there is
+   a small number of elements.  However, some target elements are more
+   likely to already be ordered when they are accessed in their order of
+   appearance in the document.  There is no guarantee, as it is only in the
+   same array that adresses are guaranteed to be increasing.  A check done
+   in 2024 with gcc, using check_targets_order, and also looking at the
+   address of newly allocated elements shows that elements are
+   not that much allocated in order.  However, overall, the addresses are
+   more in order when elements are accessed in the document order.
+   For indices, it is not really possible to get them in document order,
+   within an index they are in document order, but not across indices.
+   The other data are in document order, for nodes and similar because
+   the labels list is used instead of identifiers_target on purpose.
+ */
 void
 sort_cmd_targets (CONVERTER *self)
 {
@@ -4934,6 +4979,11 @@ sort_cmd_targets (CONVERTER *self)
       if (self->html_targets[cmd].number > 0)
         {
           HTML_TARGET_LIST *element_targets = &self->html_targets[cmd];
+           /* to check the order
+          size_t ordered_items = check_targets_order (cmd, element_targets);
+          fprintf (stderr, "ORDER %s %zu / %zu\n", builtin_command_name (cmd),
+                   ordered_items, element_targets->number -1);
+            */
           qsort (element_targets->list,
                  element_targets->number,
                  sizeof (HTML_TARGET), compare_element_target);
@@ -7906,7 +7956,7 @@ word_number_more_than_level (const char *text, int level)
   int count = 0;
 
   while (*p)
-    {
+    {/* FIXME in perl unicode spaces are also matched */
       int n = strspn (p, whitespace_chars);
       if (n)
         {
@@ -8525,7 +8575,7 @@ convert_email_command (CONVERTER *self, const enum 
command_id cmd,
       text = mail_string;
     }
 
-  /* FIXME match unicode spaces in perl */
+  /* FIXME in perl unicode spaces are also matched */
   if (!mail || mail[strspn (mail, whitespace_chars)] == '\0')
     {
       if (text)
diff --git a/tp/Texinfo/XS/main/errors.c b/tp/Texinfo/XS/main/errors.c
index a7c019630f..99eebd875e 100644
--- a/tp/Texinfo/XS/main/errors.c
+++ b/tp/Texinfo/XS/main/errors.c
@@ -235,7 +235,7 @@ void
 message_list_line_error_ext (ERROR_MESSAGE_LIST *error_messages,
                              OPTIONS *conf,
                              enum error_type type, int continuation,
-                     SOURCE_INFO *cmd_source_info, const char *format, ...)
+              const SOURCE_INFO *cmd_source_info, const char *format, ...)
 {
   va_list v;
 
diff --git a/tp/Texinfo/XS/main/errors.h b/tp/Texinfo/XS/main/errors.h
index 07e8b58c37..f2fdfbbf79 100644
--- a/tp/Texinfo/XS/main/errors.h
+++ b/tp/Texinfo/XS/main/errors.h
@@ -22,7 +22,7 @@ void vmessage_list_line_error (ERROR_MESSAGE_LIST 
*error_messages,
 void message_list_line_error_ext (ERROR_MESSAGE_LIST *error_messages,
                              OPTIONS *conf,
                              enum error_type type, int continuation,
-                     SOURCE_INFO *cmd_source_info, const char *format, ...);
+                const SOURCE_INFO *cmd_source_info, const char *format, ...);
 void message_list_command_error (ERROR_MESSAGE_LIST *error_messages,
                             OPTIONS *conf,
                             const ELEMENT *e, const char *format, ...);
diff --git a/tp/Texinfo/XS/main/targets.c b/tp/Texinfo/XS/main/targets.c
index b19d8f97c3..235b3fbb5f 100644
--- a/tp/Texinfo/XS/main/targets.c
+++ b/tp/Texinfo/XS/main/targets.c
@@ -132,6 +132,8 @@ set_labels_identifiers_target (LABEL *list_of_labels, 
size_t labels_number)
           while (j < targets_number - 1 && targets[j+1].identifier
                  && !strcmp (targets[i].identifier, targets[j+1].identifier))
             {
+              list_of_labels[targets[j+1].label_number].reference
+                                   = targets[i].element;
               j++;
             }
           if (j > i)
diff --git a/tp/Texinfo/XS/main/tree_types.h b/tp/Texinfo/XS/main/tree_types.h
index b82ec24fbe..9cef300923 100644
--- a/tp/Texinfo/XS/main/tree_types.h
+++ b/tp/Texinfo/XS/main/tree_types.h
@@ -279,6 +279,8 @@ typedef struct {
     size_t label_number;
     char *identifier;
     ELEMENT *element;
+ /* for label that is a duplicate, points to the element used in links */
+    const ELEMENT *reference;
 } LABEL;
 
 typedef struct {
diff --git a/tp/Texinfo/XS/parsetexi/errors_parser.c 
b/tp/Texinfo/XS/parsetexi/errors_parser.c
index 3484b9e861..8d5d348018 100644
--- a/tp/Texinfo/XS/parsetexi/errors_parser.c
+++ b/tp/Texinfo/XS/parsetexi/errors_parser.c
@@ -51,7 +51,7 @@ line_error_internal (enum error_type type, int continuation,
 
 void
 line_error_ext (enum error_type type, int continuation,
-                SOURCE_INFO *cmd_source_info,
+                const SOURCE_INFO *cmd_source_info,
                 const char *format, ...)
 {
   va_list v;
diff --git a/tp/Texinfo/XS/parsetexi/errors_parser.h 
b/tp/Texinfo/XS/parsetexi/errors_parser.h
index 7ba06aa5c5..45477720b9 100644
--- a/tp/Texinfo/XS/parsetexi/errors_parser.h
+++ b/tp/Texinfo/XS/parsetexi/errors_parser.h
@@ -14,7 +14,8 @@ void command_warn (const ELEMENT *e, const char *format, ...);
 void wipe_errors (void);
 void forget_errors (void);
 void line_error_ext (enum error_type type, int continuation,
-                     SOURCE_INFO *cmd_source_info, const char *format, ...);
+                     const SOURCE_INFO *cmd_source_info,
+                     const char *format, ...);
 void bug_message (char *format, ...);
 
 extern ERROR_MESSAGE_LIST error_messages_list;
diff --git a/tp/Texinfo/XS/parsetexi/labels.c b/tp/Texinfo/XS/parsetexi/labels.c
index 7d0ec84e5e..2f2d20518b 100644
--- a/tp/Texinfo/XS/parsetexi/labels.c
+++ b/tp/Texinfo/XS/parsetexi/labels.c
@@ -61,6 +61,7 @@ register_label (ELEMENT *target_element, char *normalized)
   labels_list[labels_number].element = target_element;
   labels_list[labels_number].label_number = labels_number;
   labels_list[labels_number].identifier = normalized;
+  labels_list[labels_number].reference = 0;
   labels_number++;
 }
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]