[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Fri, 27 Dec 2024 04:41:59 -0500 (EST) |
branch: master
commit c9f58db5767bc0e22525b851ab8f78c95f8e6773
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Dec 27 10:41:33 2024 +0100
Raw text converter in C
* tp/Texinfo/XS/convert/converter.c, tp/Texinfo/XS/main/utils.c
(STRING_AND_LEN, texinfo_extensions, remove_extension, canonpath)
(determine_files_and_directory, texinfo_input_file_basename): move
remove_extension and canonpath to utils.c. Create
texinfo_input_file_basename in utils.c based on
determine_files_and_directory code, move STRING_AND_LEN and
texinfo_extensions to utils.c.
* tp/Texinfo/XS/convert/converter.c (apply_converter_info)
(write_or_return), tp/Texinfo/XS/main/converter_types.h
(ENCODING_CONVERSION): make apply_converter_info and write_or_return
public, move ENCODING_CONVERSION to converter_types.h.
* tp/Texinfo/XS/convert/converter.c (init_generic_converter)
(copy_translated_commands, free_translated_commands)
(apply_converter_info, clear_converter_initialization_info)
(destroy_converter_initialization_info)
(copy_converter_initialization_info),
tp/Texinfo/XS/main/converter_types.h (TRANSLATED_COMMAND_LIST)
(CONVERTER, CONVERTER_INITIALIZATION_INFO), tp/Texinfo/XS/main/utils.c
(add_translated_command, clear_translated_commands),
tp/Texinfo/XS/main/build_perl_info.c (set_translated_commands,
get_converter_info_from_sv): add TRANSLATED_COMMAND_LIST for
translated command list with the number of items. Use it instead of
TRANSLATED_COMMAND arrays in CONVERTER_INITIALIZATION_INFO and
CONVERTER. Add add_translated_command and clear_translated_commands
and modify the other functions. Update callers.
* tp/Texinfo/XS/Makefile.am (C_libtexinfo_convert_sources),
tp/Texinfo/XS/convert/rawtext_converter_api.c (rawtext_converter)
(initialize_options_encoding, rawtext_output, rawtext_convert)
(rawtext_convert_tree), tp/Texinfo/XS/convert/converter.c
(converter_format_data), tp/Texinfo/XS/main/converter_types.h (enum
converter_format), tp/Texinfo/XS/texi2any.c (formats_table): add
rawtext converter.
* tp/Texinfo/Convert/Text.pm (_initialize_options_encoding): change a
variable name.
---
ChangeLog | 43 ++++++++
tp/Texinfo/Convert/Text.pm | 6 +-
tp/Texinfo/XS/Makefile.am | 2 +
tp/Texinfo/XS/convert/converter.c | 190 +++++++----------------------------
tp/Texinfo/XS/convert/converter.h | 7 ++
tp/Texinfo/XS/main/build_perl_info.c | 10 +-
tp/Texinfo/XS/main/convert_to_text.c | 2 +-
tp/Texinfo/XS/main/convert_utils.c | 4 +-
tp/Texinfo/XS/main/converter_types.h | 18 +++-
tp/Texinfo/XS/main/get_perl_info.c | 25 ++---
tp/Texinfo/XS/main/utils.c | 141 +++++++++++++++++++++++++-
tp/Texinfo/XS/main/utils.h | 13 ++-
tp/Texinfo/XS/texi2any.c | 1 +
13 files changed, 273 insertions(+), 189 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 9e6a995648..1fa0365cf5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,46 @@
+2024-12-27 Patrice Dumas <pertusus@free.fr>
+
+ Raw text converter in C
+
+ * tp/Texinfo/XS/convert/converter.c, tp/Texinfo/XS/main/utils.c
+ (STRING_AND_LEN, texinfo_extensions, remove_extension, canonpath)
+ (determine_files_and_directory, texinfo_input_file_basename): move
+ remove_extension and canonpath to utils.c. Create
+ texinfo_input_file_basename in utils.c based on
+ determine_files_and_directory code, move STRING_AND_LEN and
+ texinfo_extensions to utils.c.
+
+ * tp/Texinfo/XS/convert/converter.c (apply_converter_info)
+ (write_or_return), tp/Texinfo/XS/main/converter_types.h
+ (ENCODING_CONVERSION): make apply_converter_info and write_or_return
+ public, move ENCODING_CONVERSION to converter_types.h.
+
+ * tp/Texinfo/XS/convert/converter.c (init_generic_converter)
+ (copy_translated_commands, free_translated_commands)
+ (apply_converter_info, clear_converter_initialization_info)
+ (destroy_converter_initialization_info)
+ (copy_converter_initialization_info),
+ tp/Texinfo/XS/main/converter_types.h (TRANSLATED_COMMAND_LIST)
+ (CONVERTER, CONVERTER_INITIALIZATION_INFO), tp/Texinfo/XS/main/utils.c
+ (add_translated_command, clear_translated_commands),
+ tp/Texinfo/XS/main/build_perl_info.c (set_translated_commands,
+ get_converter_info_from_sv): add TRANSLATED_COMMAND_LIST for
+ translated command list with the number of items. Use it instead of
+ TRANSLATED_COMMAND arrays in CONVERTER_INITIALIZATION_INFO and
+ CONVERTER. Add add_translated_command and clear_translated_commands
+ and modify the other functions. Update callers.
+
+ * tp/Texinfo/XS/Makefile.am (C_libtexinfo_convert_sources),
+ tp/Texinfo/XS/convert/rawtext_converter_api.c (rawtext_converter)
+ (initialize_options_encoding, rawtext_output, rawtext_convert)
+ (rawtext_convert_tree), tp/Texinfo/XS/convert/converter.c
+ (converter_format_data), tp/Texinfo/XS/main/converter_types.h (enum
+ converter_format), tp/Texinfo/XS/texi2any.c (formats_table): add
+ rawtext converter.
+
+ * tp/Texinfo/Convert/Text.pm (_initialize_options_encoding): change a
+ variable name.
+
2024-12-26 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/XS/convert/converter.c (converter_format_data)
diff --git a/tp/Texinfo/Convert/Text.pm b/tp/Texinfo/Convert/Text.pm
index 2de601196b..344619d744 100644
--- a/tp/Texinfo/Convert/Text.pm
+++ b/tp/Texinfo/Convert/Text.pm
@@ -134,11 +134,12 @@ my @text_indicator_converter_options
sub _initialize_options_encoding($$)
{
my $self = shift;
- my $options = shift;
+ my $text_options = shift;
if ($self->get_conf('ENABLE_ENCODING')
and defined($self->get_conf('OUTPUT_ENCODING_NAME'))) {
- $options->{'enabled_encoding'} = $self->get_conf('OUTPUT_ENCODING_NAME');
+ $text_options->{'enabled_encoding'}
+ = $self->get_conf('OUTPUT_ENCODING_NAME');
}
}
@@ -982,6 +983,7 @@ sub output($$)
$outfile .= '.txt';
}
if (defined($self->{'SUBDIR'})) {
+ # FIXME in theory here $outfile could be undef. Check if possible
my $destination_directory = File::Spec->canonpath($self->{'SUBDIR'});
my ($encoded_destination_directory, $destination_directory_encoding)
= Texinfo::Convert::Utils::encoded_output_file_name($self,
diff --git a/tp/Texinfo/XS/Makefile.am b/tp/Texinfo/XS/Makefile.am
index d787b6aa03..049bd1d423 100644
--- a/tp/Texinfo/XS/Makefile.am
+++ b/tp/Texinfo/XS/Makefile.am
@@ -507,6 +507,8 @@ C_libtexinfo_convert_sources = \
convert/html_converter_api.h \
convert/plaintexinfo_converter_api.c \
convert/plaintexinfo_converter_api.h \
+ convert/rawtext_converter_api.c \
+ convert/rawtext_converter_api.h \
convert/texinfo.c \
convert/texinfo.h
diff --git a/tp/Texinfo/XS/convert/converter.c
b/tp/Texinfo/XS/convert/converter.c
index 089e40fe13..616a1696c7 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -44,7 +44,7 @@
#include "tree.h"
#include "extra.h"
/* for COMMAND_OPTION_DEFAULT ACCENTS_STACK
- fatal xasprintf get_command_option ... */
+ fatal xasprintf get_command_option texinfo_input_file_basename ... */
#include "utils.h"
#include "customization_options.h"
#include "errors.h"
@@ -63,6 +63,7 @@
#include "api_to_perl.h"
#include "html_converter_api.h"
#include "plaintexinfo_converter_api.h"
+#include "rawtext_converter_api.h"
#include "converter.h"
/* table used to dispatch format specific functions.
@@ -73,6 +74,9 @@ CONVERTER_FORMAT_DATA converter_format_data[] = {
{"html", "Texinfo::Convert::HTML", 0, &html_converter_defaults,
&html_converter_initialize, &html_output, &html_convert, 0,
&html_reset_converter, &html_free_converter},
+ {"rawtext", "Texinfo::Convert::Text", &rawtext_converter,
+ 0, 0, &rawtext_output,
+ &rawtext_convert, &rawtext_convert_tree, 0, 0},
{"plaintexinfo", "Texinfo::Convert::PlainTexinfo", 0,
&plaintexinfo_converter_defaults, 0, &plaintexinfo_output,
&plaintexinfo_convert, &plaintexinfo_convert_tree, 0, 0},
@@ -279,13 +283,8 @@ init_generic_converter (CONVERTER *self)
/* set 'translated_commands' => {'error' => 'error@arrow{}',}, */
- self->translated_commands = (TRANSLATED_COMMAND *)
- malloc ((1 +1) * sizeof (TRANSLATED_COMMAND));
- memset (self->translated_commands, 0,
- (1 +1) * sizeof (TRANSLATED_COMMAND));
-
- self->translated_commands[0].cmd = CM_error;
- self->translated_commands[0].translation = strdup ("error@arrow{}");
+ add_translated_command (&self->translated_commands, CM_error,
+ "error@arrow{}");
}
/* Allocate a converter without any initialization such as to leave
@@ -334,52 +333,32 @@ new_converter (enum converter_format format)
return converter_index +1;
}
-static TRANSLATED_COMMAND *
-copy_translated_commands (const TRANSLATED_COMMAND *translated_commands)
+void
+copy_translated_commands (TRANSLATED_COMMAND_LIST *dst_translated_commands,
+ const TRANSLATED_COMMAND_LIST *translated_commands)
{
- size_t translated_cmds_nr, i;
- TRANSLATED_COMMAND *result;
-
- for (translated_cmds_nr = 0; translated_commands[translated_cmds_nr].cmd;
- translated_cmds_nr++)
- {}
-
- result = (TRANSLATED_COMMAND *)
- malloc ((translated_cmds_nr +1) * sizeof (TRANSLATED_COMMAND));
- memset (result, 0,
- (translated_cmds_nr +1) * sizeof (TRANSLATED_COMMAND));
+ size_t i;
- if (translated_cmds_nr)
+ for (i = 0; i < translated_commands->number; i++)
{
- for (i = 0; i < translated_cmds_nr; i++)
- {
- const TRANSLATED_COMMAND *reference_translated_command
- = &translated_commands[i];
- TRANSLATED_COMMAND *translated_command_copy = &result[i];
+ const TRANSLATED_COMMAND *reference_translated_command
+ = &translated_commands->list[i];
- translated_command_copy->cmd = reference_translated_command->cmd;
- translated_command_copy->translation
- = strdup (reference_translated_command->translation);
- }
+ add_translated_command (dst_translated_commands,
+ reference_translated_command->cmd,
+ reference_translated_command->translation);
}
- return result;
}
void
-destroy_translated_commands (TRANSLATED_COMMAND *translated_commands)
+free_translated_commands (TRANSLATED_COMMAND_LIST *translated_commands)
{
- TRANSLATED_COMMAND *translated_command;
-
- for (translated_command = translated_commands;
- translated_command->translation; translated_command++)
- {
- free (translated_command->translation);
- }
- free (translated_commands);
+ clear_translated_commands (translated_commands);
+ free (translated_commands->list);
}
/* apply initialization information from one source */
-static void
+void
apply_converter_info (CONVERTER *converter,
const CONVERTER_INITIALIZATION_INFO *init_info, int set_configured)
{
@@ -387,11 +366,11 @@ apply_converter_info (CONVERTER *converter,
converter->sorted_options,
&init_info->conf, set_configured);
- if (init_info->translated_commands)
+ if (init_info->translated_commands.number)
{
- destroy_translated_commands (converter->translated_commands);
- converter->translated_commands
- = copy_translated_commands (init_info->translated_commands);
+ clear_translated_commands (&converter->translated_commands);
+ copy_translated_commands (&converter->translated_commands,
+ &init_info->translated_commands);
}
copy_deprecated_dirs (&converter->deprecated_config_directories,
@@ -445,11 +424,7 @@ set_converter_init_information (CONVERTER *converter,
void
clear_converter_initialization_info (CONVERTER_INITIALIZATION_INFO *init_info)
{
- if (init_info->translated_commands)
- {
- destroy_translated_commands (init_info->translated_commands);
- init_info->translated_commands = 0;
- }
+ clear_translated_commands (&init_info->translated_commands);
clear_options_list (&init_info->conf);
@@ -461,8 +436,7 @@ clear_converter_initialization_info
(CONVERTER_INITIALIZATION_INFO *init_info)
void
destroy_converter_initialization_info (CONVERTER_INITIALIZATION_INFO
*init_info)
{
- if (init_info->translated_commands)
- destroy_translated_commands (init_info->translated_commands);
+ free_translated_commands (&init_info->translated_commands);
free_options_list (&init_info->conf);
@@ -482,11 +456,11 @@ copy_converter_initialization_info
(CONVERTER_INITIALIZATION_INFO *dst_info,
copy_options_list (&dst_info->conf, &src_info->conf);
- if (src_info->translated_commands)
+ if (src_info->translated_commands.number)
{
- destroy_translated_commands (dst_info->translated_commands);
- dst_info->translated_commands
- = copy_translated_commands (src_info->translated_commands);
+ clear_translated_commands (&dst_info->translated_commands);
+ copy_translated_commands (&dst_info->translated_commands,
+ &src_info->translated_commands);
}
}
@@ -630,7 +604,7 @@ converter_set_document (CONVERTER *converter, DOCUMENT
*document)
= copy_converter_options_for_convert_text (converter);
}
-/* default implementation */
+/* default implementation used in converter_output_tree */
void
converter_conversion_initialization (CONVERTER *converter, DOCUMENT *document)
{
@@ -821,7 +795,7 @@ converter_output_tree (CONVERTER *converter, DOCUMENT
*document,
if (file_fh && !strcmp (output_file, "-"))
{
output_files_register_closed
- (&converter->output_files_information,
+ (&converter->output_files_information,
encoded_out_filepath);
if (fclose (file_fh))
{
@@ -856,82 +830,6 @@ converter_output_tree (CONVERTER *converter, DOCUMENT
*document,
-/* result to be freed */
-static char *
-remove_extension (const char *input_string)
-{
- char *result;
- const char *p = strchr (input_string, '.');
- if (p)
- {
- while (1)
- {
- const char *q = strchr (p + 1, '.');
- if (q)
- p = q;
- else
- break;
- }
- result = strndup (input_string, p - input_string);
- }
- else result = strdup (input_string);
-
- return result;
-}
-
-/* try to do at least part of what File::Spec->canonpath does to have
- tests passing */
-static char *
-canonpath (const char *input_file)
-{
- TEXT result;
- const char *p = strchr (input_file, '/');
-
- if (p)
- {
- text_init (&result);
- text_append_n (&result, input_file, p - input_file);
- while (1)
- {
- const char *q;
- p++;
- while (*p == '/')
- p++;
- /* omit a / at the end of the path */
- if (!*p)
- return (result.text);
- text_append_n (&result, "/", 1);
- q = strchr (p, '/');
- if (q)
- {
- text_append_n (&result, p, q - p);
- p = q;
- }
- else
- {
- text_append (&result, p);
- return (result.text);
- }
- }
- }
- else
- return strdup (input_file);
-}
-
-typedef struct STRING_AND_LEN {
- const char *string;
- int len;
-} STRING_AND_LEN;
-
-/* in perl there is also .tx matched, but it is incorrect */
-static const STRING_AND_LEN texinfo_extensions[5] = {
- {".texi", 5},
- {".texinfo", 8},
- {".txinfo", 7},
- {".txi", 4},
- {".tex", 4}
-};
-
/* RESULT should be a char * array of dimension 5 */
/* results to be freed by the caller */
void
@@ -995,22 +893,7 @@ determine_files_and_directory (CONVERTER *self, const char
*output_format,
input_basename = strdup ("stdin");
else
{
- int i;
- int basefile_len = strlen (input_basefile);
- for (i = 0; i < 5; i++)
- {
- int len = texinfo_extensions[i].len;
- if (basefile_len >= len
- && !memcmp (input_basefile + basefile_len - len,
- texinfo_extensions[i].string, len))
- {
- input_basename = strndup (input_basefile,
- basefile_len - len);
- break;
- }
- }
- if (!input_basename)
- input_basename = strdup (input_basefile);
+ input_basename = texinfo_input_file_basename (input_basefile);
}
if (self->conf->setfilename.o.string)
@@ -2094,10 +1977,7 @@ free_generic_converter (CONVERTER *self)
}
}
- if (self->translated_commands)
- {
- destroy_translated_commands (self->translated_commands);
- }
+ free_translated_commands (&self->translated_commands);
free_deprecated_dirs_list (&self->deprecated_config_directories);
diff --git a/tp/Texinfo/XS/convert/converter.h
b/tp/Texinfo/XS/convert/converter.h
index 6b8aa8af7d..f67e1e9220 100644
--- a/tp/Texinfo/XS/convert/converter.h
+++ b/tp/Texinfo/XS/convert/converter.h
@@ -4,6 +4,7 @@
#include <stddef.h>
+#include "text.h"
#include "command_ids.h"
#include "tree_types.h"
#include "converter_types.h"
@@ -158,6 +159,8 @@ void clear_converter_initialization_info (
void destroy_converter_initialization_info (
CONVERTER_INITIALIZATION_INFO *defaults);
+void apply_converter_info (CONVERTER *converter,
+ const CONVERTER_INITIALIZATION_INFO *init_info, int set_configured);
void converter_set_document (CONVERTER *converter, DOCUMENT *document);
char *
@@ -189,6 +192,10 @@ void set_global_document_commands (CONVERTER *converter,
char *node_information_filename (CONVERTER *self, const char *normalized,
const ELEMENT *label_element);
+void write_or_return (const ENCODING_CONVERSION *conversion,
+ const char *encoded_out_filepath,
+ FILE *file_fh, TEXT *result, char *text);
+
TREE_ADDED_ELEMENTS *new_tree_added_elements
(enum tree_added_elements_status status);
ELEMENT *new_element_added (TREE_ADDED_ELEMENTS *added_elements,
diff --git a/tp/Texinfo/XS/main/build_perl_info.c
b/tp/Texinfo/XS/main/build_perl_info.c
index f9e58f46c0..02efc3ef42 100644
--- a/tp/Texinfo/XS/main/build_perl_info.c
+++ b/tp/Texinfo/XS/main/build_perl_info.c
@@ -3063,7 +3063,7 @@ build_sv_options_from_options_list (const OPTIONS_LIST
*options_list,
/* pass generic converter information to Perl */
static HV *
-build_translated_commands (const TRANSLATED_COMMAND *translated_commands)
+build_translated_commands (const TRANSLATED_COMMAND_LIST *translated_commands)
{
size_t i;
HV *translated_hv;
@@ -3071,10 +3071,10 @@ build_translated_commands (const TRANSLATED_COMMAND
*translated_commands)
dTHX;
translated_hv = newHV ();
- for (i = 0; translated_commands[i].cmd; i++)
+ for (i = 0; i < translated_commands->number; i++)
{
- enum command_id cmd = translated_commands[i].cmd;
- const char *translation = translated_commands[i].translation;
+ enum command_id cmd = translated_commands->list[i].cmd;
+ const char *translation = translated_commands->list[i].translation;
const char *command_name = builtin_command_name (cmd);
hv_store (translated_hv, command_name, strlen (command_name),
newSVpv_utf8 (translation, 0), 0);
@@ -3116,7 +3116,7 @@ pass_generic_converter_to_converter_sv (SV *converter_sv,
STORE("expanded_formats", newRV_noinc ((SV *) expanded_formats_hv));
translated_commands_hv
- = build_translated_commands (converter->translated_commands);
+ = build_translated_commands (&converter->translated_commands);
STORE("translated_commands", newRV_noinc ((SV *) translated_commands_hv));
/* store converter_descriptor in perl converter */
diff --git a/tp/Texinfo/XS/main/convert_to_text.c
b/tp/Texinfo/XS/main/convert_to_text.c
index 065f494d88..f2eb9bf92c 100644
--- a/tp/Texinfo/XS/main/convert_to_text.c
+++ b/tp/Texinfo/XS/main/convert_to_text.c
@@ -114,7 +114,7 @@ copy_options_for_convert_text (OPTIONS *options)
copy_strings (&text_options->include_directories,
options->INCLUDE_DIRECTORIES.o.strlist);
- /* not a copy , but a reference to the options */
+ /* not a copy but a reference to the options */
text_options->other_converter_options = options;
return text_options;
diff --git a/tp/Texinfo/XS/main/convert_utils.c
b/tp/Texinfo/XS/main/convert_utils.c
index a235578175..6e5db18540 100644
--- a/tp/Texinfo/XS/main/convert_utils.c
+++ b/tp/Texinfo/XS/main/convert_utils.c
@@ -667,10 +667,10 @@ ELEMENT *
translated_command_tree (CONVERTER *self, enum command_id cmd)
{
size_t i;
- for (i = 0; self->translated_commands[i].cmd; i++)
+ for (i = 0; i < self->translated_commands.number; i++)
{
TRANSLATED_COMMAND *translated_command
- = &self->translated_commands[i];
+ = &self->translated_commands.list[i];
if (translated_command->cmd == cmd
&& translated_command->translation)
{
diff --git a/tp/Texinfo/XS/main/converter_types.h
b/tp/Texinfo/XS/main/converter_types.h
index 79386f96c9..686b80d8c1 100644
--- a/tp/Texinfo/XS/main/converter_types.h
+++ b/tp/Texinfo/XS/main/converter_types.h
@@ -20,6 +20,8 @@
#include <stddef.h>
/* for FILE */
#include <stdio.h>
+/* for iconv_t */
+#include <iconv.h>
/* for enum special_unit_info_type SPECIAL_UNIT_INFO_TYPE_NR ... */
#include "html_conversion_data.h"
@@ -36,6 +38,7 @@ struct TEXT_OPTIONS;
enum converter_format {
COF_none = -1,
COF_html,
+ COF_rawtext,
COF_plaintexinfo,
};
@@ -392,6 +395,12 @@ typedef struct TRANSLATED_COMMAND {
char *translation;
} TRANSLATED_COMMAND;
+typedef struct TRANSLATED_COMMAND_LIST {
+ size_t number;
+ size_t space;
+ TRANSLATED_COMMAND *list;
+} TRANSLATED_COMMAND_LIST;
+
typedef struct COMMAND_INTEGER_INFORMATION {
enum command_id cmd;
int integer;
@@ -764,7 +773,7 @@ typedef struct DEPRECATED_DIRS_LIST {
/* information on converter configuration from a source of configuration
(either output format or user customization) */
typedef struct CONVERTER_INITIALIZATION_INFO {
- TRANSLATED_COMMAND *translated_commands;
+ TRANSLATED_COMMAND_LIST translated_commands;
OPTIONS_LIST conf;
DEPRECATED_DIRS_LIST deprecated_config_directories;
/* gather strings that are not customization options */
@@ -790,7 +799,7 @@ typedef struct CONVERTER {
OPTIONS *init_conf;
OPTIONS *format_defaults_conf;
EXPANDED_FORMAT *expanded_formats;
- TRANSLATED_COMMAND *translated_commands;
+ TRANSLATED_COMMAND_LIST translated_commands;
ERROR_MESSAGE_LIST error_messages;
/* for error messages registered in the converter */
@@ -981,6 +990,11 @@ typedef struct TARGET_DIRECTORY_FILENAME {
char *target;
} TARGET_DIRECTORY_FILENAME;
+typedef struct ENCODING_CONVERSION {
+ char *encoding_name;
+ iconv_t iconv;
+} ENCODING_CONVERSION;
+
#endif
diff --git a/tp/Texinfo/XS/main/get_perl_info.c
b/tp/Texinfo/XS/main/get_perl_info.c
index 81895200fb..ad6f5b72c8 100644
--- a/tp/Texinfo/XS/main/get_perl_info.c
+++ b/tp/Texinfo/XS/main/get_perl_info.c
@@ -1347,13 +1347,14 @@ get_language_document_hv_sorted_indices (HV
*document_hv, const char *key,
/* the following is only needed in converters, but we still define here
such that it is available for functions called from C */
-static TRANSLATED_COMMAND *
-set_translated_commands (SV *translated_commands_sv)
+static void
+set_translated_commands (SV *translated_commands_sv,
+ TRANSLATED_COMMAND_LIST *translated_commands)
{
- TRANSLATED_COMMAND *translated_commands = 0;
-
dTHX;
+ clear_translated_commands (translated_commands);
+
if (translated_commands_sv)
{
HV *translated_commands_hv = 0;
@@ -1370,11 +1371,6 @@ set_translated_commands (SV *translated_commands_sv)
hv_number = hv_iterinit (translated_commands_hv);
}
- translated_commands = (TRANSLATED_COMMAND *)
- non_perl_malloc ((hv_number +1) * sizeof (TRANSLATED_COMMAND));
- memset (translated_commands, 0,
- (hv_number +1) * sizeof (TRANSLATED_COMMAND));
-
for (i = 0; i < hv_number; i++)
{
char *cmdname;
@@ -1390,15 +1386,12 @@ set_translated_commands (SV *translated_commands_sv)
else
{
char *tmp_spec = (char *) SvPVutf8_nolen (translation_sv);
- TRANSLATED_COMMAND *translated_command
- = &translated_commands[i];
- translated_command->translation = non_perl_strdup (tmp_spec);
- translated_command->cmd = cmd;
+ add_translated_command (translated_commands, cmd,
+ tmp_spec);
}
}
}
}
- return translated_commands;
}
static void
@@ -1491,8 +1484,8 @@ get_converter_info_from_sv (SV *conf_sv, const char
*class_name,
&initialization_info->non_valid_customization);
if (!strcmp (key, "translated_commands"))
- initialization_info->translated_commands
- = set_translated_commands (value_sv);
+ set_translated_commands (value_sv,
+ &initialization_info->translated_commands);
else if (!strcmp (key, "deprecated_config_directories"))
{
get_deprecated_config_directories_sv (value_sv,
diff --git a/tp/Texinfo/XS/main/utils.c b/tp/Texinfo/XS/main/utils.c
index 0df8ececd7..93e1c7565d 100644
--- a/tp/Texinfo/XS/main/utils.c
+++ b/tp/Texinfo/XS/main/utils.c
@@ -629,6 +629,43 @@ set_expanded_formats_from_options (EXPANDED_FORMAT
*formats,
}
+
+void
+add_translated_command (TRANSLATED_COMMAND_LIST *translated_commands,
+ enum command_id cmd,
+ const char *translation)
+{
+ TRANSLATED_COMMAND *translated_command;
+ if (translated_commands->number >= translated_commands->space)
+ {
+ translated_commands->space += 5;
+ translated_commands->list
+ = realloc (translated_commands->list,
+ translated_commands->space * sizeof (TRANSLATED_COMMAND));
+ if (!translated_commands->list)
+ fatal ("realloc failed");
+ }
+ translated_command = &translated_commands->list[translated_commands->number];
+ translated_command->cmd = cmd;
+ translated_command->translation = strdup (translation);
+
+ translated_commands->number++;
+}
+
+void
+clear_translated_commands (TRANSLATED_COMMAND_LIST *translated_commands)
+{
+ size_t i;
+
+ for (i = 0; i < translated_commands->number; i++)
+ {
+ free (translated_commands->list[i].translation);
+ }
+ translated_commands->number = 0;
+}
+
+
+
/* Return the parent if in an item_line command, @*table */
ELEMENT *
item_line_parent (ELEMENT *current)
@@ -708,8 +745,110 @@ index_number_index_by_name (const SORTED_INDEX_NAMES
*sorted_indices,
return 0;
}
-
+
+typedef struct STRING_AND_LEN {
+ const char *string;
+ int len;
+} STRING_AND_LEN;
+
+/* in perl there is also .tx matched, but it is incorrect */
+static const STRING_AND_LEN texinfo_extensions[5] = {
+ {".texi", 5},
+ {".texinfo", 8},
+ {".txinfo", 7},
+ {".txi", 4},
+ {".tex", 4}
+};
+
+/* similar to s/\.te?x(i|info)?$// in Perl */
+char *
+texinfo_input_file_basename (const char *input_basefile)
+{
+ char *input_basename;
+
+ int i;
+ int basefile_len = strlen (input_basefile);
+ for (i = 0; i < 5; i++)
+ {
+ int len = texinfo_extensions[i].len;
+ if (basefile_len >= len
+ && !memcmp (input_basefile + basefile_len - len,
+ texinfo_extensions[i].string, len))
+ {
+ input_basename = strndup (input_basefile,
+ basefile_len - len);
+ return input_basename;
+ }
+ }
+
+ input_basename = strdup (input_basefile);
+
+ return input_basename;
+}
+
+/* result to be freed */
+char *
+remove_extension (const char *input_string)
+{
+ char *result;
+ const char *p = strchr (input_string, '.');
+ if (p)
+ {
+ while (1)
+ {
+ const char *q = strchr (p + 1, '.');
+ if (q)
+ p = q;
+ else
+ break;
+ }
+ result = strndup (input_string, p - input_string);
+ }
+ else result = strdup (input_string);
+
+ return result;
+}
+
+/* try to do at least part of what File::Spec->canonpath does to have
+ tests passing */
+char *
+canonpath (const char *input_file)
+{
+ TEXT result;
+ const char *p = strchr (input_file, '/');
+
+ if (p)
+ {
+ text_init (&result);
+ text_append_n (&result, input_file, p - input_file);
+ while (1)
+ {
+ const char *q;
+ p++;
+ while (*p == '/')
+ p++;
+ /* omit a / at the end of the path */
+ if (!*p)
+ return (result.text);
+ text_append_n (&result, "/", 1);
+ q = strchr (p, '/');
+ if (q)
+ {
+ text_append_n (&result, p, q - p);
+ p = q;
+ }
+ else
+ {
+ text_append (&result, p);
+ return (result.text);
+ }
+ }
+ }
+ else
+ return strdup (input_file);
+}
+
/* text parsing functions used in diverse situations */
/* Determine if there is a name used for @set, @value and translations
arguments and its length. */
diff --git a/tp/Texinfo/XS/main/utils.h b/tp/Texinfo/XS/main/utils.h
index b1220326af..49de666420 100644
--- a/tp/Texinfo/XS/main/utils.h
+++ b/tp/Texinfo/XS/main/utils.h
@@ -60,11 +60,6 @@ extern char *html_command_text_type_name[];
extern const EXPANDED_FORMAT default_expanded_formats[];
-typedef struct ENCODING_CONVERSION {
- char *encoding_name;
- iconv_t iconv;
-} ENCODING_CONVERSION;
-
typedef struct ENCODING_CONVERSION_LIST {
ENCODING_CONVERSION *list;
size_t number;
@@ -129,6 +124,9 @@ void wipe_values (VALUE_LIST *values);
void delete_global_info (GLOBAL_INFO *global_info_ref);
void delete_global_commands (GLOBAL_COMMANDS *global_commands_ref);
+char *texinfo_input_file_basename (const char *input_basefile);
+char *remove_extension (const char *input_string);
+char *canonpath (const char *input_file);
char *normalize_encoding_name (const char *text, int *possible_encoding);
ELEMENT *item_line_parent (ELEMENT *current);
ELEMENT *get_label_element (const ELEMENT *e);
@@ -205,6 +203,11 @@ size_t expanded_formats_number (void);
void set_expanded_formats_from_options (EXPANDED_FORMAT *formats,
const OPTIONS *options);
+void add_translated_command (TRANSLATED_COMMAND_LIST *translated_commands,
+ enum command_id cmd,
+ const char *translation);
+void clear_translated_commands (TRANSLATED_COMMAND_LIST *translated_commands);
+
char *enumerate_item_representation (char *specification, int number);
const ELEMENT *get_global_document_command (
diff --git a/tp/Texinfo/XS/texi2any.c b/tp/Texinfo/XS/texi2any.c
index 2eb6d3fe4d..c1350e2f81 100644
--- a/tp/Texinfo/XS/texi2any.c
+++ b/tp/Texinfo/XS/texi2any.c
@@ -130,6 +130,7 @@ static FORMAT_SPECIFICATION formats_table[] = {
NULL, "Texinfo::DebugTree", NULL},
{"textcontent", 0, NULL, "Texinfo::Convert::TextContent", NULL},
{"plaintexinfo", 0, NULL, NULL, NULL},
+ {"rawtext", 0, NULL, NULL, NULL},
{"parse", 0, NULL, NULL, NULL},
{"structure", STTF_nodes_tree | STTF_floats | STTF_split, NULL, NULL, NULL},
{NULL, 0, NULL, NULL, NULL}