texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Fri, 27 Dec 2024 04:41:59 -0500 (EST)

branch: master
commit c9f58db5767bc0e22525b851ab8f78c95f8e6773
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Dec 27 10:41:33 2024 +0100

    Raw text converter in C
    
    * tp/Texinfo/XS/convert/converter.c, tp/Texinfo/XS/main/utils.c
    (STRING_AND_LEN, texinfo_extensions, remove_extension, canonpath)
    (determine_files_and_directory, texinfo_input_file_basename): move
    remove_extension and canonpath to utils.c.  Create
    texinfo_input_file_basename in utils.c based on
    determine_files_and_directory code, move STRING_AND_LEN and
    texinfo_extensions to utils.c.
    
    * tp/Texinfo/XS/convert/converter.c (apply_converter_info)
    (write_or_return), tp/Texinfo/XS/main/converter_types.h
    (ENCODING_CONVERSION): make apply_converter_info and write_or_return
    public, move ENCODING_CONVERSION to converter_types.h.
    
    * tp/Texinfo/XS/convert/converter.c (init_generic_converter)
    (copy_translated_commands, free_translated_commands)
    (apply_converter_info, clear_converter_initialization_info)
    (destroy_converter_initialization_info)
    (copy_converter_initialization_info),
    tp/Texinfo/XS/main/converter_types.h (TRANSLATED_COMMAND_LIST)
    (CONVERTER, CONVERTER_INITIALIZATION_INFO), tp/Texinfo/XS/main/utils.c
    (add_translated_command, clear_translated_commands),
    tp/Texinfo/XS/main/build_perl_info.c (set_translated_commands,
    get_converter_info_from_sv): add TRANSLATED_COMMAND_LIST for
    translated command list with the number of items.  Use it instead of
    TRANSLATED_COMMAND arrays in CONVERTER_INITIALIZATION_INFO and
    CONVERTER.  Add add_translated_command and clear_translated_commands
    and modify the other functions.  Update callers.
    
    * tp/Texinfo/XS/Makefile.am (C_libtexinfo_convert_sources),
    tp/Texinfo/XS/convert/rawtext_converter_api.c (rawtext_converter)
    (initialize_options_encoding, rawtext_output, rawtext_convert)
    (rawtext_convert_tree), tp/Texinfo/XS/convert/converter.c
    (converter_format_data), tp/Texinfo/XS/main/converter_types.h (enum
    converter_format), tp/Texinfo/XS/texi2any.c (formats_table): add
    rawtext converter.
    
    * tp/Texinfo/Convert/Text.pm (_initialize_options_encoding): change a
    variable name.
---
 ChangeLog                            |  43 ++++++++
 tp/Texinfo/Convert/Text.pm           |   6 +-
 tp/Texinfo/XS/Makefile.am            |   2 +
 tp/Texinfo/XS/convert/converter.c    | 190 +++++++----------------------------
 tp/Texinfo/XS/convert/converter.h    |   7 ++
 tp/Texinfo/XS/main/build_perl_info.c |  10 +-
 tp/Texinfo/XS/main/convert_to_text.c |   2 +-
 tp/Texinfo/XS/main/convert_utils.c   |   4 +-
 tp/Texinfo/XS/main/converter_types.h |  18 +++-
 tp/Texinfo/XS/main/get_perl_info.c   |  25 ++---
 tp/Texinfo/XS/main/utils.c           | 141 +++++++++++++++++++++++++-
 tp/Texinfo/XS/main/utils.h           |  13 ++-
 tp/Texinfo/XS/texi2any.c             |   1 +
 13 files changed, 273 insertions(+), 189 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 9e6a995648..1fa0365cf5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,46 @@
+2024-12-27  Patrice Dumas  <pertusus@free.fr>
+
+       Raw text converter in C
+
+       * tp/Texinfo/XS/convert/converter.c, tp/Texinfo/XS/main/utils.c
+       (STRING_AND_LEN, texinfo_extensions, remove_extension, canonpath)
+       (determine_files_and_directory, texinfo_input_file_basename): move
+       remove_extension and canonpath to utils.c.  Create
+       texinfo_input_file_basename in utils.c based on
+       determine_files_and_directory code, move STRING_AND_LEN and
+       texinfo_extensions to utils.c.
+
+       * tp/Texinfo/XS/convert/converter.c (apply_converter_info)
+       (write_or_return), tp/Texinfo/XS/main/converter_types.h
+       (ENCODING_CONVERSION): make apply_converter_info and write_or_return
+       public, move ENCODING_CONVERSION to converter_types.h.
+
+       * tp/Texinfo/XS/convert/converter.c (init_generic_converter)
+       (copy_translated_commands, free_translated_commands)
+       (apply_converter_info, clear_converter_initialization_info)
+       (destroy_converter_initialization_info)
+       (copy_converter_initialization_info),
+       tp/Texinfo/XS/main/converter_types.h (TRANSLATED_COMMAND_LIST)
+       (CONVERTER, CONVERTER_INITIALIZATION_INFO), tp/Texinfo/XS/main/utils.c
+       (add_translated_command, clear_translated_commands),
+       tp/Texinfo/XS/main/build_perl_info.c (set_translated_commands,
+       get_converter_info_from_sv): add TRANSLATED_COMMAND_LIST for
+       translated command list with the number of items.  Use it instead of
+       TRANSLATED_COMMAND arrays in CONVERTER_INITIALIZATION_INFO and
+       CONVERTER.  Add add_translated_command and clear_translated_commands
+       and modify the other functions.  Update callers.
+
+       * tp/Texinfo/XS/Makefile.am (C_libtexinfo_convert_sources),
+       tp/Texinfo/XS/convert/rawtext_converter_api.c (rawtext_converter)
+       (initialize_options_encoding, rawtext_output, rawtext_convert)
+       (rawtext_convert_tree), tp/Texinfo/XS/convert/converter.c
+       (converter_format_data), tp/Texinfo/XS/main/converter_types.h (enum
+       converter_format), tp/Texinfo/XS/texi2any.c (formats_table): add
+       rawtext converter.
+
+       * tp/Texinfo/Convert/Text.pm (_initialize_options_encoding): change a
+       variable name.
+
 2024-12-26  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/XS/convert/converter.c (converter_format_data)
diff --git a/tp/Texinfo/Convert/Text.pm b/tp/Texinfo/Convert/Text.pm
index 2de601196b..344619d744 100644
--- a/tp/Texinfo/Convert/Text.pm
+++ b/tp/Texinfo/Convert/Text.pm
@@ -134,11 +134,12 @@ my @text_indicator_converter_options
 sub _initialize_options_encoding($$)
 {
   my $self = shift;
-  my $options = shift;
+  my $text_options = shift;
 
   if ($self->get_conf('ENABLE_ENCODING')
        and defined($self->get_conf('OUTPUT_ENCODING_NAME'))) {
-    $options->{'enabled_encoding'} = $self->get_conf('OUTPUT_ENCODING_NAME');
+    $text_options->{'enabled_encoding'}
+       = $self->get_conf('OUTPUT_ENCODING_NAME');
   }
 }
 
@@ -982,6 +983,7 @@ sub output($$)
       $outfile .= '.txt';
     }
     if (defined($self->{'SUBDIR'})) {
+      # FIXME in theory here $outfile could be undef.  Check if possible
       my $destination_directory = File::Spec->canonpath($self->{'SUBDIR'});
       my ($encoded_destination_directory, $destination_directory_encoding)
         = Texinfo::Convert::Utils::encoded_output_file_name($self,
diff --git a/tp/Texinfo/XS/Makefile.am b/tp/Texinfo/XS/Makefile.am
index d787b6aa03..049bd1d423 100644
--- a/tp/Texinfo/XS/Makefile.am
+++ b/tp/Texinfo/XS/Makefile.am
@@ -507,6 +507,8 @@ C_libtexinfo_convert_sources = \
                        convert/html_converter_api.h \
                        convert/plaintexinfo_converter_api.c \
                        convert/plaintexinfo_converter_api.h \
+                       convert/rawtext_converter_api.c \
+                       convert/rawtext_converter_api.h \
                        convert/texinfo.c \
                        convert/texinfo.h
 
diff --git a/tp/Texinfo/XS/convert/converter.c 
b/tp/Texinfo/XS/convert/converter.c
index 089e40fe13..616a1696c7 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -44,7 +44,7 @@
 #include "tree.h"
 #include "extra.h"
 /* for COMMAND_OPTION_DEFAULT ACCENTS_STACK
-   fatal xasprintf get_command_option ... */
+   fatal xasprintf get_command_option texinfo_input_file_basename ... */
 #include "utils.h"
 #include "customization_options.h"
 #include "errors.h"
@@ -63,6 +63,7 @@
 #include "api_to_perl.h"
 #include "html_converter_api.h"
 #include "plaintexinfo_converter_api.h"
+#include "rawtext_converter_api.h"
 #include "converter.h"
 
 /* table used to dispatch format specific functions.
@@ -73,6 +74,9 @@ CONVERTER_FORMAT_DATA converter_format_data[] = {
   {"html", "Texinfo::Convert::HTML", 0, &html_converter_defaults,
    &html_converter_initialize, &html_output, &html_convert, 0,
    &html_reset_converter, &html_free_converter},
+  {"rawtext", "Texinfo::Convert::Text", &rawtext_converter,
+   0, 0, &rawtext_output,
+   &rawtext_convert, &rawtext_convert_tree, 0, 0},
   {"plaintexinfo", "Texinfo::Convert::PlainTexinfo", 0,
    &plaintexinfo_converter_defaults, 0, &plaintexinfo_output,
    &plaintexinfo_convert, &plaintexinfo_convert_tree, 0, 0},
@@ -279,13 +283,8 @@ init_generic_converter (CONVERTER *self)
 
   /* set 'translated_commands'  => {'error' => 'error@arrow{}',}, */
 
-  self->translated_commands = (TRANSLATED_COMMAND *)
-        malloc ((1 +1) * sizeof (TRANSLATED_COMMAND));
-  memset (self->translated_commands, 0,
-              (1 +1) * sizeof (TRANSLATED_COMMAND));
-
-  self->translated_commands[0].cmd = CM_error;
-  self->translated_commands[0].translation = strdup ("error@arrow{}");
+  add_translated_command (&self->translated_commands, CM_error,
+                          "error@arrow{}");
 }
 
 /* Allocate a converter without any initialization such as to leave
@@ -334,52 +333,32 @@ new_converter (enum converter_format format)
   return converter_index +1;
 }
 
-static TRANSLATED_COMMAND *
-copy_translated_commands (const TRANSLATED_COMMAND *translated_commands)
+void
+copy_translated_commands (TRANSLATED_COMMAND_LIST *dst_translated_commands,
+                    const TRANSLATED_COMMAND_LIST *translated_commands)
 {
-  size_t translated_cmds_nr, i;
-  TRANSLATED_COMMAND *result;
-
-  for (translated_cmds_nr = 0; translated_commands[translated_cmds_nr].cmd;
-       translated_cmds_nr++)
-    {}
-
-  result = (TRANSLATED_COMMAND *)
-        malloc ((translated_cmds_nr +1) * sizeof (TRANSLATED_COMMAND));
-  memset (result, 0,
-              (translated_cmds_nr +1) * sizeof (TRANSLATED_COMMAND));
+  size_t i;
 
-  if (translated_cmds_nr)
+  for (i = 0; i < translated_commands->number; i++)
     {
-      for (i = 0; i < translated_cmds_nr; i++)
-        {
-          const TRANSLATED_COMMAND *reference_translated_command
-            = &translated_commands[i];
-          TRANSLATED_COMMAND *translated_command_copy = &result[i];
+      const TRANSLATED_COMMAND *reference_translated_command
+            = &translated_commands->list[i];
 
-          translated_command_copy->cmd = reference_translated_command->cmd;
-          translated_command_copy->translation
-            = strdup (reference_translated_command->translation);
-        }
+      add_translated_command (dst_translated_commands,
+                              reference_translated_command->cmd,
+                              reference_translated_command->translation);
     }
-  return result;
 }
 
 void
-destroy_translated_commands (TRANSLATED_COMMAND *translated_commands)
+free_translated_commands (TRANSLATED_COMMAND_LIST *translated_commands)
 {
-  TRANSLATED_COMMAND *translated_command;
-
-  for (translated_command = translated_commands;
-       translated_command->translation; translated_command++)
-    {
-      free (translated_command->translation);
-    }
-  free (translated_commands);
+  clear_translated_commands (translated_commands);
+  free (translated_commands->list);
 }
 
 /* apply initialization information from one source */
-static void
+void
 apply_converter_info (CONVERTER *converter,
          const CONVERTER_INITIALIZATION_INFO *init_info, int set_configured)
 {
@@ -387,11 +366,11 @@ apply_converter_info (CONVERTER *converter,
                                     converter->sorted_options,
                                     &init_info->conf, set_configured);
 
-  if (init_info->translated_commands)
+  if (init_info->translated_commands.number)
     {
-      destroy_translated_commands (converter->translated_commands);
-      converter->translated_commands
-        = copy_translated_commands (init_info->translated_commands);
+      clear_translated_commands (&converter->translated_commands);
+      copy_translated_commands (&converter->translated_commands,
+                                &init_info->translated_commands);
     }
 
   copy_deprecated_dirs (&converter->deprecated_config_directories,
@@ -445,11 +424,7 @@ set_converter_init_information (CONVERTER *converter,
 void
 clear_converter_initialization_info (CONVERTER_INITIALIZATION_INFO *init_info)
 {
-  if (init_info->translated_commands)
-    {
-      destroy_translated_commands (init_info->translated_commands);
-      init_info->translated_commands = 0;
-    }
+  clear_translated_commands (&init_info->translated_commands);
 
   clear_options_list (&init_info->conf);
 
@@ -461,8 +436,7 @@ clear_converter_initialization_info 
(CONVERTER_INITIALIZATION_INFO *init_info)
 void
 destroy_converter_initialization_info (CONVERTER_INITIALIZATION_INFO 
*init_info)
 {
-  if (init_info->translated_commands)
-    destroy_translated_commands (init_info->translated_commands);
+  free_translated_commands (&init_info->translated_commands);
 
   free_options_list (&init_info->conf);
 
@@ -482,11 +456,11 @@ copy_converter_initialization_info 
(CONVERTER_INITIALIZATION_INFO *dst_info,
 
   copy_options_list (&dst_info->conf, &src_info->conf);
 
-  if (src_info->translated_commands)
+  if (src_info->translated_commands.number)
     {
-      destroy_translated_commands (dst_info->translated_commands);
-      dst_info->translated_commands
-        = copy_translated_commands (src_info->translated_commands);
+      clear_translated_commands (&dst_info->translated_commands);
+      copy_translated_commands (&dst_info->translated_commands,
+                                &src_info->translated_commands);
     }
 }
 
@@ -630,7 +604,7 @@ converter_set_document (CONVERTER *converter, DOCUMENT 
*document)
     = copy_converter_options_for_convert_text (converter);
 }
 
-/* default implementation */
+/* default implementation used in converter_output_tree */
 void
 converter_conversion_initialization (CONVERTER *converter, DOCUMENT *document)
 {
@@ -821,7 +795,7 @@ converter_output_tree (CONVERTER *converter, DOCUMENT 
*document,
   if (file_fh && !strcmp (output_file, "-"))
     {
        output_files_register_closed
-                         (&converter->output_files_information, 
+                         (&converter->output_files_information,
                           encoded_out_filepath);
       if (fclose (file_fh))
         {
@@ -856,82 +830,6 @@ converter_output_tree (CONVERTER *converter, DOCUMENT 
*document,
 
 
 
-/* result to be freed */
-static char *
-remove_extension (const char *input_string)
-{
-  char *result;
-  const char *p = strchr (input_string, '.');
-  if (p)
-    {
-      while (1)
-        {
-          const char *q = strchr (p + 1, '.');
-          if (q)
-            p = q;
-          else
-            break;
-        }
-      result = strndup (input_string, p - input_string);
-    }
-  else result = strdup (input_string);
-
-  return result;
-}
-
-/* try to do at least part of what File::Spec->canonpath does to have
-   tests passing */
-static char *
-canonpath (const char *input_file)
-{
-  TEXT result;
-  const char *p = strchr (input_file, '/');
-
-  if (p)
-    {
-      text_init (&result);
-      text_append_n (&result, input_file, p - input_file);
-      while (1)
-        {
-          const char *q;
-          p++;
-          while (*p == '/')
-            p++;
-          /* omit a / at the end of the path */
-          if (!*p)
-            return (result.text);
-          text_append_n (&result, "/", 1);
-          q = strchr (p, '/');
-          if (q)
-            {
-              text_append_n (&result, p, q - p);
-              p = q;
-            }
-          else
-            {
-              text_append (&result, p);
-              return (result.text);
-            }
-        }
-    }
-  else
-    return strdup (input_file);
-}
-
-typedef struct STRING_AND_LEN {
-    const char *string;
-    int len;
-} STRING_AND_LEN;
-
-/* in perl there is also .tx matched, but it is incorrect */
-static const STRING_AND_LEN texinfo_extensions[5] = {
-  {".texi", 5},
-  {".texinfo", 8},
-  {".txinfo", 7},
-  {".txi", 4},
-  {".tex", 4}
-};
-
 /* RESULT should be a char * array of dimension 5 */
 /* results to be freed by the caller */
 void
@@ -995,22 +893,7 @@ determine_files_and_directory (CONVERTER *self, const char 
*output_format,
     input_basename = strdup ("stdin");
   else
     {
-      int i;
-      int basefile_len = strlen (input_basefile);
-      for (i = 0; i < 5; i++)
-        {
-          int len = texinfo_extensions[i].len;
-          if (basefile_len >= len
-              && !memcmp (input_basefile + basefile_len - len,
-                          texinfo_extensions[i].string, len))
-            {
-              input_basename = strndup (input_basefile,
-                                        basefile_len - len);
-              break;
-            }
-        }
-      if (!input_basename)
-        input_basename = strdup (input_basefile);
+      input_basename = texinfo_input_file_basename (input_basefile);
     }
 
   if (self->conf->setfilename.o.string)
@@ -2094,10 +1977,7 @@ free_generic_converter (CONVERTER *self)
         }
     }
 
-  if (self->translated_commands)
-    {
-      destroy_translated_commands (self->translated_commands);
-    }
+  free_translated_commands (&self->translated_commands);
 
   free_deprecated_dirs_list (&self->deprecated_config_directories);
 
diff --git a/tp/Texinfo/XS/convert/converter.h 
b/tp/Texinfo/XS/convert/converter.h
index 6b8aa8af7d..f67e1e9220 100644
--- a/tp/Texinfo/XS/convert/converter.h
+++ b/tp/Texinfo/XS/convert/converter.h
@@ -4,6 +4,7 @@
 
 #include <stddef.h>
 
+#include "text.h"
 #include "command_ids.h"
 #include "tree_types.h"
 #include "converter_types.h"
@@ -158,6 +159,8 @@ void clear_converter_initialization_info (
 void destroy_converter_initialization_info (
                             CONVERTER_INITIALIZATION_INFO *defaults);
 
+void apply_converter_info (CONVERTER *converter,
+         const CONVERTER_INITIALIZATION_INFO *init_info, int set_configured);
 void converter_set_document (CONVERTER *converter, DOCUMENT *document);
 
 char *
@@ -189,6 +192,10 @@ void set_global_document_commands (CONVERTER *converter,
 char *node_information_filename (CONVERTER *self, const char *normalized,
                                  const ELEMENT *label_element);
 
+void write_or_return (const ENCODING_CONVERSION *conversion,
+                 const char *encoded_out_filepath,
+                 FILE *file_fh, TEXT *result, char *text);
+
 TREE_ADDED_ELEMENTS *new_tree_added_elements
                       (enum tree_added_elements_status status);
 ELEMENT *new_element_added (TREE_ADDED_ELEMENTS *added_elements,
diff --git a/tp/Texinfo/XS/main/build_perl_info.c 
b/tp/Texinfo/XS/main/build_perl_info.c
index f9e58f46c0..02efc3ef42 100644
--- a/tp/Texinfo/XS/main/build_perl_info.c
+++ b/tp/Texinfo/XS/main/build_perl_info.c
@@ -3063,7 +3063,7 @@ build_sv_options_from_options_list (const OPTIONS_LIST 
*options_list,
 /* pass generic converter information to Perl */
 
 static HV *
-build_translated_commands (const TRANSLATED_COMMAND *translated_commands)
+build_translated_commands (const TRANSLATED_COMMAND_LIST *translated_commands)
 {
   size_t i;
   HV *translated_hv;
@@ -3071,10 +3071,10 @@ build_translated_commands (const TRANSLATED_COMMAND 
*translated_commands)
   dTHX;
 
   translated_hv = newHV ();
-  for (i = 0; translated_commands[i].cmd; i++)
+  for (i = 0; i < translated_commands->number; i++)
     {
-      enum command_id cmd = translated_commands[i].cmd;
-      const char *translation = translated_commands[i].translation;
+      enum command_id cmd = translated_commands->list[i].cmd;
+      const char *translation = translated_commands->list[i].translation;
       const char *command_name = builtin_command_name (cmd);
       hv_store (translated_hv, command_name, strlen (command_name),
                 newSVpv_utf8 (translation, 0), 0);
@@ -3116,7 +3116,7 @@ pass_generic_converter_to_converter_sv (SV *converter_sv,
   STORE("expanded_formats", newRV_noinc ((SV *) expanded_formats_hv));
 
   translated_commands_hv
-    = build_translated_commands (converter->translated_commands);
+    = build_translated_commands (&converter->translated_commands);
   STORE("translated_commands", newRV_noinc ((SV *) translated_commands_hv));
 
   /* store converter_descriptor in perl converter */
diff --git a/tp/Texinfo/XS/main/convert_to_text.c 
b/tp/Texinfo/XS/main/convert_to_text.c
index 065f494d88..f2eb9bf92c 100644
--- a/tp/Texinfo/XS/main/convert_to_text.c
+++ b/tp/Texinfo/XS/main/convert_to_text.c
@@ -114,7 +114,7 @@ copy_options_for_convert_text (OPTIONS *options)
   copy_strings (&text_options->include_directories,
                 options->INCLUDE_DIRECTORIES.o.strlist);
 
-  /* not a copy , but a reference to the options */
+  /* not a copy but a reference to the options */
   text_options->other_converter_options = options;
 
   return text_options;
diff --git a/tp/Texinfo/XS/main/convert_utils.c 
b/tp/Texinfo/XS/main/convert_utils.c
index a235578175..6e5db18540 100644
--- a/tp/Texinfo/XS/main/convert_utils.c
+++ b/tp/Texinfo/XS/main/convert_utils.c
@@ -667,10 +667,10 @@ ELEMENT *
 translated_command_tree (CONVERTER *self, enum command_id cmd)
 {
   size_t i;
-  for (i = 0; self->translated_commands[i].cmd; i++)
+  for (i = 0; i < self->translated_commands.number; i++)
     {
       TRANSLATED_COMMAND *translated_command
-        = &self->translated_commands[i];
+        = &self->translated_commands.list[i];
       if (translated_command->cmd == cmd
           && translated_command->translation)
         {
diff --git a/tp/Texinfo/XS/main/converter_types.h 
b/tp/Texinfo/XS/main/converter_types.h
index 79386f96c9..686b80d8c1 100644
--- a/tp/Texinfo/XS/main/converter_types.h
+++ b/tp/Texinfo/XS/main/converter_types.h
@@ -20,6 +20,8 @@
 #include <stddef.h>
 /* for FILE */
 #include <stdio.h>
+/* for iconv_t */
+#include <iconv.h>
 
 /* for enum special_unit_info_type SPECIAL_UNIT_INFO_TYPE_NR ... */
 #include "html_conversion_data.h"
@@ -36,6 +38,7 @@ struct TEXT_OPTIONS;
 enum converter_format {
    COF_none = -1,
    COF_html,
+   COF_rawtext,
    COF_plaintexinfo,
 };
 
@@ -392,6 +395,12 @@ typedef struct TRANSLATED_COMMAND {
     char *translation;
 } TRANSLATED_COMMAND;
 
+typedef struct TRANSLATED_COMMAND_LIST {
+    size_t number;
+    size_t space;
+    TRANSLATED_COMMAND *list;
+} TRANSLATED_COMMAND_LIST;
+
 typedef struct COMMAND_INTEGER_INFORMATION {
     enum command_id cmd;
     int integer;
@@ -764,7 +773,7 @@ typedef struct DEPRECATED_DIRS_LIST {
 /* information on converter configuration from a source of configuration
    (either output format or user customization) */
 typedef struct CONVERTER_INITIALIZATION_INFO {
-    TRANSLATED_COMMAND *translated_commands;
+    TRANSLATED_COMMAND_LIST translated_commands;
     OPTIONS_LIST conf;
     DEPRECATED_DIRS_LIST deprecated_config_directories;
     /* gather strings that are not customization options */
@@ -790,7 +799,7 @@ typedef struct CONVERTER {
     OPTIONS *init_conf;
     OPTIONS *format_defaults_conf;
     EXPANDED_FORMAT *expanded_formats;
-    TRANSLATED_COMMAND *translated_commands;
+    TRANSLATED_COMMAND_LIST translated_commands;
 
     ERROR_MESSAGE_LIST error_messages;
     /* for error messages registered in the converter */
@@ -981,6 +990,11 @@ typedef struct TARGET_DIRECTORY_FILENAME {
     char *target;
 } TARGET_DIRECTORY_FILENAME;
 
+typedef struct ENCODING_CONVERSION {
+    char *encoding_name;
+    iconv_t iconv;
+} ENCODING_CONVERSION;
+
 
 #endif
 
diff --git a/tp/Texinfo/XS/main/get_perl_info.c 
b/tp/Texinfo/XS/main/get_perl_info.c
index 81895200fb..ad6f5b72c8 100644
--- a/tp/Texinfo/XS/main/get_perl_info.c
+++ b/tp/Texinfo/XS/main/get_perl_info.c
@@ -1347,13 +1347,14 @@ get_language_document_hv_sorted_indices (HV 
*document_hv, const char *key,
 
 /* the following is only needed in converters, but we still define here
    such that it is available for functions called from C */
-static TRANSLATED_COMMAND *
-set_translated_commands (SV *translated_commands_sv)
+static void
+set_translated_commands (SV *translated_commands_sv,
+                         TRANSLATED_COMMAND_LIST *translated_commands)
 {
-  TRANSLATED_COMMAND *translated_commands = 0;
-
   dTHX;
 
+  clear_translated_commands (translated_commands);
+
   if (translated_commands_sv)
     {
       HV *translated_commands_hv = 0;
@@ -1370,11 +1371,6 @@ set_translated_commands (SV *translated_commands_sv)
           hv_number = hv_iterinit (translated_commands_hv);
         }
 
-      translated_commands = (TRANSLATED_COMMAND *)
-        non_perl_malloc ((hv_number +1) * sizeof (TRANSLATED_COMMAND));
-      memset (translated_commands, 0,
-              (hv_number +1) * sizeof (TRANSLATED_COMMAND));
-
       for (i = 0; i < hv_number; i++)
         {
           char *cmdname;
@@ -1390,15 +1386,12 @@ set_translated_commands (SV *translated_commands_sv)
               else
                 {
                   char *tmp_spec = (char *) SvPVutf8_nolen (translation_sv);
-                  TRANSLATED_COMMAND *translated_command
-                    = &translated_commands[i];
-                  translated_command->translation = non_perl_strdup (tmp_spec);
-                  translated_command->cmd = cmd;
+                  add_translated_command (translated_commands, cmd,
+                                          tmp_spec);
                 }
             }
         }
     }
-  return translated_commands;
 }
 
 static void
@@ -1491,8 +1484,8 @@ get_converter_info_from_sv (SV *conf_sv, const char 
*class_name,
                 &initialization_info->non_valid_customization);
 
               if (!strcmp (key, "translated_commands"))
-                initialization_info->translated_commands
-                  = set_translated_commands (value_sv);
+                set_translated_commands (value_sv,
+                       &initialization_info->translated_commands);
               else if (!strcmp (key, "deprecated_config_directories"))
                 {
                   get_deprecated_config_directories_sv (value_sv,
diff --git a/tp/Texinfo/XS/main/utils.c b/tp/Texinfo/XS/main/utils.c
index 0df8ececd7..93e1c7565d 100644
--- a/tp/Texinfo/XS/main/utils.c
+++ b/tp/Texinfo/XS/main/utils.c
@@ -629,6 +629,43 @@ set_expanded_formats_from_options (EXPANDED_FORMAT 
*formats,
 }
 
 
+
+void
+add_translated_command (TRANSLATED_COMMAND_LIST *translated_commands,
+                        enum command_id cmd,
+                        const char *translation)
+{
+  TRANSLATED_COMMAND *translated_command;
+  if (translated_commands->number >= translated_commands->space)
+    {
+      translated_commands->space += 5;
+      translated_commands->list
+         = realloc (translated_commands->list,
+                    translated_commands->space * sizeof (TRANSLATED_COMMAND));
+      if (!translated_commands->list)
+        fatal ("realloc failed");
+    }
+  translated_command = &translated_commands->list[translated_commands->number];
+  translated_command->cmd = cmd;
+  translated_command->translation = strdup (translation);
+
+  translated_commands->number++;
+}
+
+void
+clear_translated_commands (TRANSLATED_COMMAND_LIST *translated_commands)
+{
+  size_t i;
+
+  for (i = 0; i < translated_commands->number; i++)
+    {
+      free (translated_commands->list[i].translation);
+    }
+  translated_commands->number = 0;
+}
+
+
+
 /* Return the parent if in an item_line command, @*table */
 ELEMENT *
 item_line_parent (ELEMENT *current)
@@ -708,8 +745,110 @@ index_number_index_by_name (const SORTED_INDEX_NAMES 
*sorted_indices,
   return 0;
 }
 
-
 
+
+typedef struct STRING_AND_LEN {
+    const char *string;
+    int len;
+} STRING_AND_LEN;
+
+/* in perl there is also .tx matched, but it is incorrect */
+static const STRING_AND_LEN texinfo_extensions[5] = {
+  {".texi", 5},
+  {".texinfo", 8},
+  {".txinfo", 7},
+  {".txi", 4},
+  {".tex", 4}
+};
+
+/* similar to s/\.te?x(i|info)?$// in Perl */
+char *
+texinfo_input_file_basename (const char *input_basefile)
+{
+  char *input_basename;
+
+  int i;
+  int basefile_len = strlen (input_basefile);
+  for (i = 0; i < 5; i++)
+    {
+      int len = texinfo_extensions[i].len;
+      if (basefile_len >= len
+          && !memcmp (input_basefile + basefile_len - len,
+                      texinfo_extensions[i].string, len))
+        {
+          input_basename = strndup (input_basefile,
+                                    basefile_len - len);
+          return input_basename;
+        }
+    }
+
+  input_basename = strdup (input_basefile);
+
+  return input_basename;
+}
+
+/* result to be freed */
+char *
+remove_extension (const char *input_string)
+{
+  char *result;
+  const char *p = strchr (input_string, '.');
+  if (p)
+    {
+      while (1)
+        {
+          const char *q = strchr (p + 1, '.');
+          if (q)
+            p = q;
+          else
+            break;
+        }
+      result = strndup (input_string, p - input_string);
+    }
+  else result = strdup (input_string);
+
+  return result;
+}
+
+/* try to do at least part of what File::Spec->canonpath does to have
+   tests passing */
+char *
+canonpath (const char *input_file)
+{
+  TEXT result;
+  const char *p = strchr (input_file, '/');
+
+  if (p)
+    {
+      text_init (&result);
+      text_append_n (&result, input_file, p - input_file);
+      while (1)
+        {
+          const char *q;
+          p++;
+          while (*p == '/')
+            p++;
+          /* omit a / at the end of the path */
+          if (!*p)
+            return (result.text);
+          text_append_n (&result, "/", 1);
+          q = strchr (p, '/');
+          if (q)
+            {
+              text_append_n (&result, p, q - p);
+              p = q;
+            }
+          else
+            {
+              text_append (&result, p);
+              return (result.text);
+            }
+        }
+    }
+  else
+    return strdup (input_file);
+}
+
 /* text parsing functions used in diverse situations */
 /* Determine if there is a name used for @set, @value and translations
    arguments and its length. */
diff --git a/tp/Texinfo/XS/main/utils.h b/tp/Texinfo/XS/main/utils.h
index b1220326af..49de666420 100644
--- a/tp/Texinfo/XS/main/utils.h
+++ b/tp/Texinfo/XS/main/utils.h
@@ -60,11 +60,6 @@ extern char *html_command_text_type_name[];
 
 extern const EXPANDED_FORMAT default_expanded_formats[];
 
-typedef struct ENCODING_CONVERSION {
-    char *encoding_name;
-    iconv_t iconv;
-} ENCODING_CONVERSION;
-
 typedef struct ENCODING_CONVERSION_LIST {
     ENCODING_CONVERSION *list;
     size_t number;
@@ -129,6 +124,9 @@ void wipe_values (VALUE_LIST *values);
 void delete_global_info (GLOBAL_INFO *global_info_ref);
 void delete_global_commands (GLOBAL_COMMANDS *global_commands_ref);
 
+char *texinfo_input_file_basename (const char *input_basefile);
+char *remove_extension (const char *input_string);
+char *canonpath (const char *input_file);
 char *normalize_encoding_name (const char *text, int *possible_encoding);
 ELEMENT *item_line_parent (ELEMENT *current);
 ELEMENT *get_label_element (const ELEMENT *e);
@@ -205,6 +203,11 @@ size_t expanded_formats_number (void);
 void set_expanded_formats_from_options (EXPANDED_FORMAT *formats,
                                         const OPTIONS *options);
 
+void add_translated_command (TRANSLATED_COMMAND_LIST *translated_commands,
+                        enum command_id cmd,
+                        const char *translation);
+void clear_translated_commands (TRANSLATED_COMMAND_LIST *translated_commands);
+
 char *enumerate_item_representation (char *specification, int number);
 
 const ELEMENT *get_global_document_command (
diff --git a/tp/Texinfo/XS/texi2any.c b/tp/Texinfo/XS/texi2any.c
index 2eb6d3fe4d..c1350e2f81 100644
--- a/tp/Texinfo/XS/texi2any.c
+++ b/tp/Texinfo/XS/texi2any.c
@@ -130,6 +130,7 @@ static FORMAT_SPECIFICATION formats_table[] = {
    NULL, "Texinfo::DebugTree", NULL},
   {"textcontent", 0, NULL, "Texinfo::Convert::TextContent", NULL},
   {"plaintexinfo", 0, NULL, NULL, NULL},
+  {"rawtext", 0, NULL, NULL, NULL},
   {"parse", 0, NULL, NULL, NULL},
   {"structure", STTF_nodes_tree | STTF_floats | STTF_split, NULL, NULL, NULL},
   {NULL, 0, NULL, NULL, NULL}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]