texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Gavin D. Smith
Date: Fri, 18 Oct 2024 10:55:21 -0400 (EDT)

branch: master
commit 55ab4547df6d452a23ee7be1bd0241a4d6eea2be
Author: Gavin Smith <gavinsmith0123@gmail.com>
AuthorDate: Fri Oct 18 15:53:46 2024 +0100

    C hashmap implementation
    
    * tp/Texinfo/XS/convert/hashmap.c
    (init_registered_ids_c_hashmap, is_c_hashmap_registered_id)
    (c_hashmap_register_id, clear_registered_ids_c_hashmap)
    (free_registered_ids_c_hashmap): New functions.
    
    * tp/Texinfo/XS/main/converter_types.h
    (enum ids_data_type): add IDT_hashmap
    (CONVF_hashmap): add symbol
    (CONVERTER): add field 'registered_ids_c_hashmap'.
    
    * tp/Texinfo/XS/convert/html_prepare_converter.c
    (html_prepare_conversion_units_targets, html_converter_customize):
    Move initialisation of hash to html_prepare_conversion_units_targets.
    (Suggestion from Patrice.)  Add code to use C hashmap implementation
    if selected.  Call ids_hashmap_predicted_values for the number of
    bins.
    
    * tp/Texinfo/XS/convert/html_prepare_converter.c
    (html_register_id, html_id_is_registered) <IDT_hashmap>:
    Call appropriate functions for C hashmap implementation.
    
    * tp/Texinfo/XS/convert/html_converter_finish.c
    (html_reset_converter, html_free_converter) <IDT_hashmap>:
    Call appropriate functions for C hashmap implementation.
    
    * tp/Texinfo/XS/convert/converter.c (new_converter),
    * tp/Texinfo/XS/convert/get_converter_perl_info.c
    (get_or_create_sv_converter): Use C hashmap implementation by default.
    * tp/Texinfo/XS/teximakehtml.c (main): Use CONVF_hashmap.
---
 ChangeLog                                       |  34 +++++
 tp/Texinfo/XS/Makefile.am                       |   2 +
 tp/Texinfo/XS/convert/converter.c               |   8 +-
 tp/Texinfo/XS/convert/get_converter_perl_info.c |   2 +-
 tp/Texinfo/XS/convert/hashmap.c                 | 170 ++++++++++++++++++++++++
 tp/Texinfo/XS/convert/hashmap.h                 |  27 ++++
 tp/Texinfo/XS/convert/html_converter_finish.c   |   5 +
 tp/Texinfo/XS/convert/html_prepare_converter.c  |  28 ++--
 tp/Texinfo/XS/main/converter_types.h            |   3 +
 tp/Texinfo/XS/teximakehtml.c                    |   3 +-
 10 files changed, 266 insertions(+), 16 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 7c3672824c..34026280d9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2024-10-18  Gavin Smith <gavinsmith0123@gmail.com>
+
+       C hashmap implementation
+
+       * tp/Texinfo/XS/convert/hashmap.c
+       (init_registered_ids_c_hashmap, is_c_hashmap_registered_id)
+       (c_hashmap_register_id, clear_registered_ids_c_hashmap)
+       (free_registered_ids_c_hashmap): New functions.
+
+       * tp/Texinfo/XS/main/converter_types.h
+       (enum ids_data_type): add IDT_hashmap
+       (CONVF_hashmap): add symbol
+       (CONVERTER): add field 'registered_ids_c_hashmap'.
+
+       * tp/Texinfo/XS/convert/html_prepare_converter.c
+       (html_prepare_conversion_units_targets, html_converter_customize):
+       Move initialisation of hash to html_prepare_conversion_units_targets.
+       (Suggestion from Patrice.)  Add code to use C hashmap implementation
+       if selected.  Call ids_hashmap_predicted_values for the number of
+       bins.
+
+       * tp/Texinfo/XS/convert/html_prepare_converter.c
+       (html_register_id, html_id_is_registered) <IDT_hashmap>:
+       Call appropriate functions for C hashmap implementation.
+
+       * tp/Texinfo/XS/convert/html_converter_finish.c
+       (html_reset_converter, html_free_converter) <IDT_hashmap>:
+       Call appropriate functions for C hashmap implementation.
+
+       * tp/Texinfo/XS/convert/converter.c (new_converter),
+       * tp/Texinfo/XS/convert/get_converter_perl_info.c
+       (get_or_create_sv_converter): Use C hashmap implementation by default.
+       * tp/Texinfo/XS/teximakehtml.c (main): Use CONVF_hashmap.
+
 2024-10-18  Patrice Dumas  <pertusus@free.fr>
 
        Function to estimate number of "targets" for a manual
diff --git a/tp/Texinfo/XS/Makefile.am b/tp/Texinfo/XS/Makefile.am
index 101038877b..9a9fcc622e 100644
--- a/tp/Texinfo/XS/Makefile.am
+++ b/tp/Texinfo/XS/Makefile.am
@@ -404,6 +404,8 @@ C_libtexinfo_convert_sources = \
                        convert/convert_html.c \
                        convert/format_html.h \
                        convert/format_html.c \
+                       convert/hashmap.c \
+                       convert/hashmap.h \
                        convert/html_converter_types.h \
                        convert/html_converter_init_options.c \
                        convert/html_converter_finish.c \
diff --git a/tp/Texinfo/XS/convert/converter.c 
b/tp/Texinfo/XS/convert/converter.c
index e5d38c25ea..1633282ab2 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -287,8 +287,10 @@ new_converter (enum converter_format format, unsigned long 
flags)
   else if (flags & CONVF_cxx_hashmap)
     converter->ids_data_type = IDT_cxx_hashmap;
 #endif
-  else
+  else if (flags & CONVF_perl_hashmap)
     converter->ids_data_type = IDT_perl_hashmap;
+  else /* default */
+    converter->ids_data_type = IDT_hashmap;
 
   init_generic_converter (converter);
 
@@ -454,10 +456,8 @@ converter_converter (enum converter_format format,
   CONVERTER_INITIALIZATION_INFO *format_defaults;
   unsigned long flags;
 
-  /* NOTE if HAVE_CXX_HASHMAP is not set, even with CONVF_cxx_hashmap
-     string lists will be used */
   if (!converter_flags)
-    flags = CONVF_cxx_hashmap;
+    flags = CONVF_hashmap;
    /*
    To use a string list.  Slower.
     flags = CONVF_string_list;
diff --git a/tp/Texinfo/XS/convert/get_converter_perl_info.c 
b/tp/Texinfo/XS/convert/get_converter_perl_info.c
index 364bcb2af2..54f6ddc537 100644
--- a/tp/Texinfo/XS/convert/get_converter_perl_info.c
+++ b/tp/Texinfo/XS/convert/get_converter_perl_info.c
@@ -105,7 +105,7 @@ get_or_create_sv_converter (SV *converter_in, const char 
*input_class)
         }
 
       converter_descriptor = new_converter (converter_format,
-                                            CONVF_perl_hashmap);
+                                            CONVF_hashmap);
                                              /*
                                             CONVF_string_list);
                                               */
diff --git a/tp/Texinfo/XS/convert/hashmap.c b/tp/Texinfo/XS/convert/hashmap.c
new file mode 100644
index 0000000000..7003e666d1
--- /dev/null
+++ b/tp/Texinfo/XS/convert/hashmap.c
@@ -0,0 +1,170 @@
+/* Copyright 2024 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "converter_types.h"
+
+#include "hashmap.h"
+
+typedef struct BUCKET {
+  /* Linked list of strings. */
+  char *string;
+  struct BUCKET *next;
+} BUCKET;
+
+/* Allocator for bucket object. */
+#define BUCKETS_PER_ARENA 64
+
+typedef struct BUCKET_ARENA {
+  BUCKET buckets[BUCKETS_PER_ARENA];
+  int used;
+  struct BUCKET_ARENA *next;
+} BUCKET_ARENA;
+
+typedef struct C_HASHMAP {
+  BUCKET **bucket;
+  size_t num_buckets;
+  size_t count;
+
+  BUCKET_ARENA *arena;
+} C_HASHMAP;
+
+static BUCKET *
+new_bucket (C_HASHMAP *H)
+{
+  if (H->arena->used < BUCKETS_PER_ARENA)
+    return &H->arena->buckets[H->arena->used++];
+
+  BUCKET_ARENA *new_arena = malloc (sizeof (BUCKET_ARENA));
+  memset (new_arena, 0, sizeof (BUCKET_ARENA));
+
+  /* Add to front of list. */
+  new_arena->next = H->arena;
+  H->arena = new_arena;
+
+  return &H->arena->buckets[H->arena->used++];
+}
+
+
+static unsigned long
+hash_string (const char *string, C_HASHMAP *H)
+{
+  unsigned int hash = 0;
+
+  char c;
+  const char *pc = string;
+
+  while ((c = *pc))
+    {
+      hash *= 127; /* prime */
+      hash += c;
+      pc++;
+    }
+
+  hash %= H->num_buckets;
+  return hash;
+}
+
+#define NBUCKETS 256
+void
+init_registered_ids_c_hashmap (CONVERTER *self, size_t nbuckets)
+{
+  C_HASHMAP *H = malloc (sizeof (C_HASHMAP));
+  memset (H, 0, sizeof (C_HASHMAP));
+
+  H->arena = malloc (sizeof (BUCKET_ARENA));
+  memset (H->arena, 0, sizeof (BUCKET_ARENA));
+
+  if (nbuckets == 0)
+    nbuckets = NBUCKETS;
+
+  H->num_buckets = nbuckets;
+  H->bucket = malloc (sizeof (BUCKET *) * nbuckets);
+  memset (H->bucket, 0, sizeof (BUCKET *) * nbuckets);
+
+  self->registered_ids_c_hashmap = H;
+}
+
+int
+is_c_hashmap_registered_id (CONVERTER *self, const char *in_string)
+{
+  C_HASHMAP *H = (C_HASHMAP *)self->registered_ids_c_hashmap;
+  unsigned int hash = hash_string(in_string, H);
+  BUCKET *B = H->bucket[hash];
+
+  while (B)
+    {
+      if (!strcmp(B->string, in_string))
+        return 1;
+      B = B->next;
+    }
+
+  return 0;
+}
+
+void
+c_hashmap_register_id (CONVERTER *self, const char *in_string)
+{
+  C_HASHMAP *H = (C_HASHMAP *)self->registered_ids_c_hashmap;
+
+  BUCKET *new = new_bucket(H);
+  new->string = strdup (in_string);
+  unsigned int hash = hash_string(in_string, H);
+
+  /* Add to front of linked list. */
+  new->next =  H->bucket[hash];
+  H->bucket[hash] = new;
+
+  H->count++;
+}
+
+void
+clear_registered_ids_c_hashmap (CONVERTER *self)
+{
+  C_HASHMAP *H = (C_HASHMAP *)self->registered_ids_c_hashmap;
+  int i;
+
+  BUCKET_ARENA *arena, *next;
+  /* Free chain. */
+  next = H->arena;
+  while (next)
+    {
+      arena = next;
+      next = arena->next;
+
+      for (i = 0; i < arena->used; i++)
+        {
+          free (arena->buckets[i].string);
+        }
+      free (arena);
+    }
+
+  free (H->bucket);
+  memset (H, 0, sizeof (C_HASHMAP));
+}
+
+void
+free_registered_ids_c_hashmap (CONVERTER *self)
+{
+  C_HASHMAP *H = (C_HASHMAP *)self->registered_ids_c_hashmap;
+  clear_registered_ids_c_hashmap (self);
+  free (H);
+}
+
diff --git a/tp/Texinfo/XS/convert/hashmap.h b/tp/Texinfo/XS/convert/hashmap.h
new file mode 100644
index 0000000000..ad7d04edb3
--- /dev/null
+++ b/tp/Texinfo/XS/convert/hashmap.h
@@ -0,0 +1,27 @@
+/* hashmap.h - declarations for hashmap.c */
+#ifndef HASHMAP_H
+#define HASHMAP_H
+
+/* Copyright 2024 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+void init_registered_ids_c_hashmap (CONVERTER *self, size_t nbuckets);
+int is_c_hashmap_registered_id (CONVERTER *self, const char *in_string);
+void c_hashmap_register_id (CONVERTER *self, const char *in_string);
+void clear_registered_ids_c_hashmap (CONVERTER *self);
+void free_registered_ids_c_hashmap (CONVERTER *self);
+
+
+#endif
diff --git a/tp/Texinfo/XS/convert/html_converter_finish.c 
b/tp/Texinfo/XS/convert/html_converter_finish.c
index 9ecbf80e63..d5bdf7eb33 100644
--- a/tp/Texinfo/XS/convert/html_converter_finish.c
+++ b/tp/Texinfo/XS/convert/html_converter_finish.c
@@ -31,6 +31,7 @@
 #include "api_to_perl.h"
 #include "call_html_perl_function.h"
 #include "call_html_cxx_function.h"
+#include "hashmap.h"
 /* html_reset_translated_special_unit_info_tree
    html_clear_direction_string_type */
 #include "convert_html.h"
@@ -161,6 +162,8 @@ html_reset_converter (CONVERTER *self)
 
   if (self->ids_data_type == IDT_perl_hashmap)
     clear_registered_ids_hv (self);
+  else if (self->ids_data_type == IDT_hashmap)
+    clear_registered_ids_c_hashmap (self);
 #ifdef HAVE_CXX_HASHMAP
   else if (self->ids_data_type == IDT_cxx_hashmap)
     clear_registered_ids_hashmap (self);
@@ -342,6 +345,8 @@ html_free_converter (CONVERTER *self)
 
   if (self->ids_data_type == IDT_perl_hashmap)
     free_registered_ids_hv (self);
+  else if (self->ids_data_type == IDT_hashmap)
+    free_registered_ids_c_hashmap (self);
 #ifdef HAVE_CXX_HASHMAP
   else if (self->ids_data_type == IDT_cxx_hashmap)
     free_registered_ids_hashmap (self);
diff --git a/tp/Texinfo/XS/convert/html_prepare_converter.c 
b/tp/Texinfo/XS/convert/html_prepare_converter.c
index 3faa81de6c..577025f9f1 100644
--- a/tp/Texinfo/XS/convert/html_prepare_converter.c
+++ b/tp/Texinfo/XS/convert/html_prepare_converter.c
@@ -55,6 +55,7 @@
 #include "converter.h"
 #include "call_html_perl_function.h"
 #include "call_html_cxx_function.h"
+#include "hashmap.h"
 #include "format_html.h"
 /* html_complete_no_arg_commands_formatting html_run_stage_handlers
    html_add_to_files_source_info html_find_file_source_info
@@ -1719,15 +1720,6 @@ html_converter_customize (CONVERTER *self)
   int external_type_open_function = 0;
   int external_formatting_function = 0;
 
-  if (self->ids_data_type == IDT_perl_hashmap)
-    init_registered_ids_hv (self);
-#ifdef HAVE_CXX_HASHMAP
-  else if (self->ids_data_type == IDT_cxx_hashmap)
-    init_registered_ids_hashmap (self);
-#endif
-  else
-    self->registered_ids = new_string_list ();
-
   /* for @sc */
   for (l = 0; default_upper_case_commands[l]; l++)
     self->upper_case[default_upper_case_commands[l]] = 1;
@@ -3768,6 +3760,8 @@ html_id_is_registered (CONVERTER *self, const char 
*string)
 {
   if (self->ids_data_type == IDT_perl_hashmap)
     return is_hv_registered_id (self, string);
+  else if (self->ids_data_type == IDT_hashmap)
+    return is_c_hashmap_registered_id (self, string);
 #ifdef HAVE_CXX_HASHMAP
   else if (self->ids_data_type == IDT_cxx_hashmap)
     return is_hashmap_registered_id (self, string);
@@ -3781,6 +3775,8 @@ html_register_id (CONVERTER *self, const char *string)
 {
   if (self->ids_data_type == IDT_perl_hashmap)
     hv_register_id (self, string);
+  else if (self->ids_data_type == IDT_hashmap)
+    c_hashmap_register_id (self, string);
 #ifdef HAVE_CXX_HASHMAP
   else if (self->ids_data_type == IDT_cxx_hashmap)
     hashmap_register_id (self, string);
@@ -4579,6 +4575,20 @@ void
 html_prepare_conversion_units_targets (CONVERTER *self,
                                        const char *document_name)
 {
+  if (self->ids_data_type == IDT_perl_hashmap)
+    init_registered_ids_hv (self);
+  else if (self->ids_data_type == IDT_hashmap)
+    {
+      size_t predicted_values = ids_hashmap_predicted_values (self);
+      init_registered_ids_c_hashmap (self, predicted_values);
+    }
+#ifdef HAVE_CXX_HASHMAP
+  else if (self->ids_data_type == IDT_cxx_hashmap)
+    init_registered_ids_hashmap (self);
+#endif
+  else
+    self->registered_ids = new_string_list ();
+
   /*
    Do that before the other elements, to be sure that special page ids
    are registered before elements id are.
diff --git a/tp/Texinfo/XS/main/converter_types.h 
b/tp/Texinfo/XS/main/converter_types.h
index 16a14328ee..cca8cf20b9 100644
--- a/tp/Texinfo/XS/main/converter_types.h
+++ b/tp/Texinfo/XS/main/converter_types.h
@@ -42,12 +42,14 @@ enum ids_data_type {
    IDT_perl_hashmap,
    IDT_cxx_hashmap,
    IDT_string_list,
+   IDT_hashmap,
 };
 
 /* converter low level customization */
 #define CONVF_perl_hashmap        0x0001
 #define CONVF_string_list         0x0002
 #define CONVF_cxx_hashmap         0x0004
+#define CONVF_hashmap             0x0008
 
 /* for string information passing to/from perl */
 enum sv_string_type {
@@ -883,6 +885,7 @@ typedef struct CONVERTER {
     STRING_LIST *registered_ids;
     /* actually HV * but we do not want to drag in Perl headers */
     void *registered_ids_hv;
+    void *registered_ids_c_hashmap;
 #ifdef HAVE_CXX_HASHMAP
     /* a pointer on C++ data */
     void *registered_ids_hashmap;
diff --git a/tp/Texinfo/XS/teximakehtml.c b/tp/Texinfo/XS/teximakehtml.c
index 16029d4bd8..08d0185ef8 100644
--- a/tp/Texinfo/XS/teximakehtml.c
+++ b/tp/Texinfo/XS/teximakehtml.c
@@ -312,8 +312,7 @@ main (int argc, char *argv[])
                                    program_file,
                                    &converter_texinfo_language_config_dirs,
                                    &convert_options,
-   /* default, use C++ hashmap if available */
-                                   0);
+                                   CONVF_hashmap);
    /* to test linear search
                                    CONVF_string_list);
     */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]