texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Fri, 4 Oct 2024 11:31:25 -0400 (EDT)

branch: master
commit c09853aab37a3a36090d5701876a46213433360d
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Wed Aug 14 15:28:53 2024 +0200

    * tp/Texinfo/XS/convert/convert_html.c (html_run_stage_handlers):
    prefix run_stage_handlers with html_ and declare it in header file.
    Update callers.
    
    * tp/Texinfo/XS/convert/convert_html.c,
    tp/Texinfo/XS/convert/html_prepare_converter.c
    (init_conversion_after_setup_handler, html_process_css_file)
    (html_prepare_css, fill_jslicense_file_info, html_setup_output)
    (html_setup_convert): move to html_prepare_converter.c.
---
 ChangeLog                                      |  12 +
 tp/Texinfo/XS/convert/convert_html.c           | 623 +------------------------
 tp/Texinfo/XS/convert/convert_html.h           |   7 +-
 tp/Texinfo/XS/convert/html_prepare_converter.c | 618 ++++++++++++++++++++++++
 tp/Texinfo/XS/convert/html_prepare_converter.h |   3 +
 5 files changed, 642 insertions(+), 621 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 9b27f27f23..d09ccaaeac 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,18 @@
        * tp/Texinfo/XS/convert/convert_html.h: declare node_part_command
        open function.
 
+2024-08-14  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/XS/convert/convert_html.c (html_run_stage_handlers):
+       prefix run_stage_handlers with html_ and declare it in header file.
+       Update callers.
+
+       * tp/Texinfo/XS/convert/convert_html.c,
+       tp/Texinfo/XS/convert/html_prepare_converter.c
+       (init_conversion_after_setup_handler, html_process_css_file)
+       (html_prepare_css, fill_jslicense_file_info, html_setup_output)
+       (html_setup_convert): move to html_prepare_converter.c.
+
 2024-08-14  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Convert/HTML.pm (_html_convert_output, _setup_output),
diff --git a/tp/Texinfo/XS/convert/convert_html.c 
b/tp/Texinfo/XS/convert/convert_html.c
index 0855864145..b13ba13a17 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -733,9 +733,6 @@ add_special_target (CONVERTER *self, enum 
special_target_type type,
 }
 
 
-static const enum command_id contents_elements_options[]
-             = {CM_contents, CM_shortcontents, CM_summarycontents, 0};
-
 static const enum command_id conf_for_documentlanguage[]
                           = {CM_documentlanguage, 0};
 
@@ -1202,7 +1199,8 @@ set_root_commands_targets_node_files (CONVERTER *self)
 }
 
 int
-run_stage_handlers (CONVERTER *self, enum html_stage_handler_stage_type stage)
+html_run_stage_handlers (CONVERTER *self,
+                         enum html_stage_handler_stage_type stage)
 {
   size_t i;
   HTML_STAGE_HANDLER_INFO_LIST *stage_handlers
@@ -14856,7 +14854,8 @@ html_prepare_converted_output_info (CONVERTER *self, 
const char *output_file,
   int handler_fatal_error_level
      = self->conf->HANDLER_FATAL_ERROR_LEVEL.o.integer;
 
-  int structure_handler_status = run_stage_handlers (self, 
HSHT_type_structure);
+  int structure_handler_status
+    = html_run_stage_handlers (self, HSHT_type_structure);
 
   if (structure_handler_status < handler_fatal_error_level
       && structure_handler_status > -handler_fatal_error_level)
@@ -15005,7 +15004,7 @@ html_prepare_converted_output_info (CONVERTER *self, 
const char *output_file,
       self->documentdescription_string = documentdescription_string;
     }
 
-  init_handler_status = run_stage_handlers (self, HSHT_type_init);
+  init_handler_status = html_run_stage_handlers (self, HSHT_type_init);
 
   if (init_handler_status < handler_fatal_error_level
       && init_handler_status > -handler_fatal_error_level)
@@ -15180,20 +15179,6 @@ reset_html_targets (CONVERTER *self, HTML_TARGET_LIST 
*targets)
     }
 }
 
-void
-init_conversion_after_setup_handler (CONVERTER *self)
-{
-  /* the presence of contents elements in the document is used in diverse
-     places, set it once for all here */
-  set_global_document_commands (self, CL_last, contents_elements_options);
-
-  if (self->conf->OUTPUT_CHARACTERS.o.integer > 0
-      && self->conf->OUTPUT_ENCODING_NAME.o.string
-      /* not sure if strcasecmp is needed or not */
-      && !strcasecmp (self->conf->OUTPUT_ENCODING_NAME.o.string, "utf-8"))
-    self->use_unicode_text = 1;
-}
-
 static void
 reset_unset_no_arg_commands_formatting_context (CONVERTER *self,
                enum command_id cmd, enum conversion_context reset_context,
@@ -15396,602 +15381,6 @@ html_conversion_finalization (CONVERTER *self)
     }
 }
 
-static void
-html_process_css_file (CONVERTER *self, FILE *fh, char *filename,
-                       STRING_LIST *imports, STRING_LIST *rules)
-{
-  TEXT text;
-  int in_rules = 0;
-  int in_comment = 0;
-  int in_import = 0;
-  int in_string = 0;
-  int line_nr = 0;
-
-  /* the rule is to assume utf-8.  There could also be a BOM, and
-     the Content-Type: HTTP header but it is not relevant here.
-     https://developer.mozilla.org/en-US/docs/Web/CSS/@charset
-   */
-  const char *input_encoding = "utf-8";
-  ENCODING_CONVERSION *conversion
-    = get_encoding_conversion (input_encoding, &input_conversions);
-
-  text_init (&text);
-
-  while (1)
-    {
-      size_t n;
-      char *input_line = 0;
-      char *line;
-      const char *p;
-      char in_string_string;
-
-      ssize_t status = getline (&input_line, &n, fh);
-      if (status == -1)
-        {
-          free (input_line);
-          break;
-        }
-      if (!conversion)
-        line = strdup (input_line);
-      else
-        line = encode_with_iconv (conversion->iconv, input_line, 0);
-
-      free (input_line);
-      line_nr ++;
-      /*
-      char *protected_line = debug_protect_eol (line);
-      fprintf (stderr, "NL(%d) '%s'\n", line_nr, protected_line);
-      free (protected_line);
-       */
-      if (line_nr == 1)
-        {
-          int line_len = strlen (line);
-          if (line_len > 13)
-            {
-              size_t n_charset;
-              const char *q;
-              char *charset;
-              if (memcmp (line, "@charset ", 9))
-                goto nocharset;
-              p = line + 9;
-              p += strspn (p, " ");
-              if (*p != '"')
-                goto nocharset;
-              p++;
-              q = p;
-              n_charset = strcspn (p, "\"");
-              if (!n_charset)
-                goto nocharset;
-              p += n_charset;
-              if (*p != '"')
-                goto nocharset;
-              p++;
-              p += strspn (p, " ");
-              if (*p != ';')
-                goto nocharset;
-              p++;
-              p += strspn (p, " ");
-              if (*p && !strchr ("\n\r", *p))
-                goto nocharset;
-              charset = strndup (q, n_charset);
-              conversion
-               = get_encoding_conversion (charset, &input_conversions);
-              free (charset);
-              free (line);
-              continue;
-            }
-          nocharset:
-        }
-
-      if (in_rules)
-        {
-          add_string (line, rules);
-          free (line);
-          continue;
-        }
-
-      text_reset (&text);
-      text_append (&text, "");
-
-      p = line;
-      while (1)
-        {
-          /*
-          char *protected_p = debug_protect_eol (p);
-          char *protected_text = debug_protect_eol (text.text);
-          fprintf (stderr,
-            "%s!in_comment %d in_rules %d in_import %d in_string %d: '%s'\n",
-             protected_text, in_comment, in_rules,
-             in_import, in_string,protected_p);
-          free (protected_p);
-          free (protected_text);
-           */
-
-          if (in_comment)
-            {
-              const char *q = p;
-              while (1)
-                {
-                  const char *k = strchr (q, '*');
-                  if (k)
-                    {
-                      k++;
-                      if (*k == '/')
-                        {
-                          k++;
-                          text_append_n (&text, p, k - p);
-                          p = k;
-                          in_comment = 0;
-                          break;
-                        }
-                      else if (!*k)
-                        break;
-                      else
-                        q = k;
-                    }
-                  else
-                    break;
-                }
-              if (in_comment)
-                {
-                  text_append (&text, p);
-                  add_string (text.text, imports);
-                  break;
-                }
-            }
-          else if (!in_string && *p == '/')
-            {
-              p++;
-              if (*p == '*')
-                {
-                  p++;
-                  text_append_n (&text, "/*", 2);
-                  in_comment = 1;
-                }
-              else
-                {
-                  if (text.end > 0)
-                    {
-                      text_append_n (&text, "\n", 1);
-                      add_string (text.text, imports);
-                    }
-                  p--; /* back on / */
-                  add_string (p, rules);
-                  in_rules = 1;
-                  break;
-                }
-            }
-          else if (!in_string && in_import && *p && strchr ("\"'", *p))
-            {
-              /* strings outside of import start rules */
-              text_append_n (&text, p, 1);
-              in_string_string = *p;
-              p++;
-              in_string = 1;
-            }
-          else if (in_string && *p == '\\' && *(p+1) == in_string_string)
-            {
-              text_append_n (&text, p, 2);
-              p += 2;
-            }
-          else if (in_string && *p == in_string_string)
-            {
-              text_append_n (&text, p, 1);
-              p++;
-              in_string = 0;
-            }
-          else
-            {
-              int matched_import = 0;
-              if (!in_string && !in_import)
-                {
-                  const char *q = p;
-                  if (*q == '\\')
-                    q++;
-                  if (strlen (q) >= 7 && !memcmp (q, "@import", 7))
-                    {
-                      q += 7;
-                      if (!*q || strchr (whitespace_chars, *q))
-                        {
-                          /* spaces except newlines */
-                          q += strspn (q, " \t\v\f");
-                          text_append_n (&text, p, q - p);
-                          in_import = 1;
-                          p = q;
-                          matched_import = 1;
-                        }
-                    }
-                }
-              if (!matched_import)
-                {
-                  if (!in_string && in_import && *p == ';')
-                    {
-                      text_append_n (&text, ";", 1);
-                      p++;
-                      in_import = 0;
-                    }
-                  else if ((in_import || in_string) && *p && !strchr ("\n\r", 
*p))
-                    {
-                      /* Count any UTF-8 continuation bytes. */
-                      int char_len = 1;
-                      while ((p[char_len] & 0xC0) == 0x80)
-                        char_len++;
-                      text_append_n (&text, p, char_len);
-                      p += char_len;
-                    }
-                  else if (!in_import && *p && !strchr (whitespace_chars, *p))
-                    {
-                      if (text.end > 0)
-                        {
-                          text_append_n (&text, "\n", 1);
-                          add_string (text.text, imports);
-                        }
-                      add_string (p, rules);
-                      in_rules = 1;
-                      break;
-                    }
-                  else if (*p && strchr (whitespace_chars, *p))
-                    {
-                      text_append_n (&text, p, 1);
-                      p++;
-                    }
-                  else if (!*p)
-                    {
-                      add_string (text.text, imports);
-                      break;
-                    }
-                }
-            }
-        }
-      free (line);
-    }
-
-  free (text.text);
-
-  if (in_string || in_string || in_import)
-    {
-      SOURCE_INFO source_info;
-      source_info.macro = 0;
-      source_info.file_name = filename;
-      source_info.line_nr = line_nr;
-
-      if (in_string)
-        {
-          message_list_line_error_ext (&self->error_messages,
-                                     self->conf, MSG_warning, 0,
-                                     &source_info,
-                                "string not closed in css file");
-        }
-      if (in_comment)
-        {
-          message_list_line_error_ext (&self->error_messages,
-                                     self->conf, MSG_warning, 0,
-                                     &source_info,
-                                "--css-include ended in comment");
-        }
-      if (in_import && !in_comment && !in_string)
-        {
-          message_list_line_error_ext (&self->error_messages,
-                                     self->conf, MSG_warning, 0,
-                                     &source_info,
-                                "@import not finished in css file");
-        }
-    }
-}
-
-static void
-html_prepare_css (CONVERTER *self)
-{
-  const STRING_LIST *css_files;
-  STRING_LIST *css_import_lines;
-  STRING_LIST *css_rule_lines;
-  size_t i;
-
-  if (self->conf->NO_CSS.o.integer > 0)
-    return;
-
-  css_files = self->conf->CSS_FILES.o.strlist;
-
-  if (!css_files || css_files->number <= 0)
-    return;
-
-  css_import_lines = new_string_list ();
-  css_rule_lines = new_string_list ();
-
-  for (i = 0; i < css_files->number; i++)
-    {
-      FILE *css_file_fh;
-      char *css_file_path;
-      char *css_file = css_files->list[i];
-      if (!strcmp (css_file, "-"))
-        {
-          css_file_fh = stdin;
-          css_file_path = strdup ("-");
-        }
-      else
-        {
-          css_file_path = locate_include_file (css_file,
-                             self->conf->INCLUDE_DIRECTORIES.o.strlist);
-          if (!css_file_path)
-            {
-              char *css_input_file_name;
-              const char *encoding
-                = self->conf->COMMAND_LINE_ENCODING.o.string;
-              if (encoding)
-                {
-                  int status;
-                  css_input_file_name
-                   = decode_string (css_file, encoding, &status, 0);
-                }
-              else
-                css_input_file_name = strdup (css_file);
-              message_list_document_warn (&self->error_messages,
-                      self->conf, 0, "CSS file %s not found",
-                      css_input_file_name);
-              free (css_input_file_name);
-              continue;
-            }
-
-          css_file_fh = fopen (css_file_path, "r");
-          if (!css_file_fh)
-            {
-              char *css_file_name;
-              const char *encoding
-                = self->conf->COMMAND_LINE_ENCODING.o.string;
-              if (encoding)
-                {
-                  int status;
-                  css_file_name
-                   = decode_string (css_file_path, encoding, &status, 0);
-                }
-              else
-                css_file_name = strdup (css_file_path);
-              message_list_document_warn (&self->error_messages,
-                                          self->conf, 0,
-                         "could not open --include-file %s: %s",
-                               css_file_name, strerror (errno));
-
-              free (css_file_name);
-              free (css_file_path);
-              continue;
-            }
-        }
-
-      html_process_css_file (self, css_file_fh, css_file_path,
-                             css_import_lines, css_rule_lines);
-
-      if (fclose (css_file_fh))
-        {
-          char *css_file_name;
-          const char *encoding
-            = self->conf->COMMAND_LINE_ENCODING.o.string;
-          if (encoding)
-            {
-              int status;
-              css_file_name
-               = decode_string (css_file_path, encoding, &status, 0);
-            }
-          else
-            css_file_name = strdup (css_file_path);
-
-          message_list_document_error (&self->error_messages, self->conf, 0,
-                         "error on closing CSS file  %s: %s",
-                         css_file_name, strerror (errno));
-
-          free (css_file_path);
-        }
-
-      free (css_file_path);
-    }
-
-  if (self->conf->DEBUG.o.integer > 0)
-    {
-      if (css_import_lines->number > 0)
-        {
-          fprintf (stderr, "# css import lines\n");
-          for (i = 0; i < css_import_lines->number; i++)
-            fprintf (stderr, "%s", css_import_lines->list[i]);
-        }
-      if (css_rule_lines->number > 0)
-        {
-          fprintf (stderr, "# css rule lines\n");
-          for (i = 0; i < css_rule_lines->number; i++)
-            fprintf (stderr, "%s", css_rule_lines->list[i]);
-        }
-    }
-
-  for (i = 0; i < css_import_lines->number; i++)
-    html_css_add_info (self, CI_css_info_imports, css_import_lines->list[i]);
-
-  for (i = 0; i < css_rule_lines->number; i++)
-    html_css_add_info (self, CI_css_info_rules, css_rule_lines->list[i]);
-
-  destroy_strings_list (css_import_lines);
-  destroy_strings_list (css_rule_lines);
-}
-
-static void
-fill_jslicense_file_info (JSLICENSE_FILE_INFO *jslicense_file_info,
-                          const char *filename, const char *license,
-                          const char *url, const char *source)
-{
-  jslicense_file_info->filename = strdup (filename);
-  jslicense_file_info->license = strdup (license);
-  jslicense_file_info->url = strdup (url);
-  jslicense_file_info->source = strdup (source);
-}
-
-/* first function to call a stage handler */
-int
-html_setup_output (CONVERTER *self, char **paths)
-{
-  int handler_fatal_error_level;
-  int setup_handler_status;
-  int js_categories_list_nr = 0;
-  const char *structure_preamble_document_language;
-
-  if (self->conf->OUTFILE.o.string)
-    {
-      int i;
-      int need_unsplit = 0;
-      const char *outfile = self->conf->OUTFILE.o.string;
-      if (!strlen(outfile) || !strcmp (outfile, "-"))
-        need_unsplit = 1;
-      else
-        {
-          for (i = 0; null_device_names[i]; i++)
-            {
-              if (!strcmp (null_device_names[i], outfile))
-                {
-                  need_unsplit = 1;
-                  break;
-                }
-            }
-        }
-      if (need_unsplit)
-        {
-          option_force_conf (&self->conf->SPLIT, 0, "");
-          option_force_conf (&self->conf->MONOLITHIC, 1, 0);
-        }
-    }
-
-  if (self->conf->SPLIT.o.string && strlen (self->conf->SPLIT.o.string))
-    option_set_conf (&self->conf->NODE_FILES, 1, 0);
-
-  option_set_conf (&self->conf->EXTERNAL_CROSSREF_SPLIT, 0,
-            self->conf->SPLIT.o.string);
-
-  handler_fatal_error_level = self->conf->HANDLER_FATAL_ERROR_LEVEL.o.integer;
-  if (handler_fatal_error_level < 0)
-    {
-      handler_fatal_error_level = 100;
-      /* see options_data.txt. TODO automate? */
-      option_force_conf (&self->conf->HANDLER_FATAL_ERROR_LEVEL,
-                         handler_fatal_error_level, 0);
-    }
-
-  if (self->conf->HTML_MATH.o.string
-      && !strcmp (self->conf->HTML_MATH.o.string, "mathjax"))
-    {
-     /*
-     See https://www.gnu.org/licenses/javascript-labels.html
-
-     The link to the source for mathjax does not strictly follow the advice
-     there: instead we link to instructions for obtaining the full source in
-     its preferred form of modification.
-      */
-       if (!self->conf->MATHJAX_SCRIPT.o.string)
-         option_set_conf (&self->conf->MATHJAX_SCRIPT, 0,
-            "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-svg.js";);
-
-       if (!self->conf->MATHJAX_SOURCE.o.string)
-         option_set_conf (&self->conf->MATHJAX_SOURCE, 0,
- "http://docs.mathjax.org/en/latest/web/hosting.html#getting-mathjax-via-git";);
-
-       if (!self->conf->MATHJAX_CONFIGURATION.o.string)
-         option_set_conf (&self->conf->MATHJAX_CONFIGURATION, 0,
- "  options: {\n"
- "    skipHtmlTags: {'[-]': ['pre']},       // do not skip pre\n"
- "    ignoreHtmlClass: 'tex2jax_ignore',\n"
- "    processHtmlClass: 'tex2jax_process'\n"
- "  },\n"
- "  tex: {\n"
- "    processEscapes: false,      // do not use \\$ to produce a literal 
dollar sign\n"
- "    processEnvironments: false, // do not process \\begin{xxx}...\\end{xxx} 
outside math mode\n"
- "    processRefs: false,         // do not process \\ref{...} outside of math 
mode\n"
- "    displayMath: [             // start/end delimiter pairs for display 
math\n"
- "      ['\\\\[', '\\\\]']\n"
- "    ],\n"
- "  },");
-    }
-
-  setup_handler_status = run_stage_handlers (self, HSHT_type_setup);
-
-  if (setup_handler_status < handler_fatal_error_level
-      && setup_handler_status > -handler_fatal_error_level)
-    {}
-  else
-    return 0;
-
-  set_global_document_commands (self, CL_preamble, conf_for_documentlanguage);
-
-  structure_preamble_document_language
-    = self->conf->documentlanguage.o.string;
-
-  if (structure_preamble_document_language
-      && strlen (structure_preamble_document_language))
-    {
-      char *body_element_attributes;
-      xasprintf (&body_element_attributes, "lang=\"%s\"",
-                 structure_preamble_document_language);
-      option_set_conf (&self->conf->BODY_ELEMENT_ATTRIBUTES,
-                0, body_element_attributes);
-      free (body_element_attributes);
-    }
-  set_global_document_commands (self, CL_before, conf_for_documentlanguage);
-
-  init_conversion_after_setup_handler (self);
-
-  copy_options (self->init_conf, self->conf);
-
-  if (self->conf->HTML_MATH.o.string
-      && !strcmp (self->conf->HTML_MATH.o.string, "mathjax"))
-    js_categories_list_nr++;
-
-  if (self->conf->INFO_JS_DIR.o.string)
-    js_categories_list_nr++;
-
-  if (js_categories_list_nr > 0)
-    {
-      int i = 0;
-      initialize_js_categories_list (&self->jslicenses, js_categories_list_nr);
-      if (self->conf->HTML_MATH.o.string
-          && !strcmp (self->conf->HTML_MATH.o.string, "mathjax"))
-        {
-          JSLICENSE_FILE_INFO_LIST *jslicences_files_info
-            = &self->jslicenses.list[i];
-          initialize_jslicense_files (jslicences_files_info, "mathjax", 1);
-          fill_jslicense_file_info (&jslicences_files_info->list[0],
-                                    self->conf->MATHJAX_SCRIPT.o.string,
-                                    "Apache License, Version 2.0.",
-                                "https://www.apache.org/licenses/LICENSE-2.0";,
-                                    self->conf->MATHJAX_SOURCE.o.string);
-          i++;
-        }
-      if (self->conf->INFO_JS_DIR.o.string)
-        {
-          JSLICENSE_FILE_INFO_LIST *jslicences_files_info
-            = &self->jslicenses.list[i];
-          initialize_jslicense_files (jslicences_files_info, "infojs", 2);
-          fill_jslicense_file_info (&jslicences_files_info->list[0],
-                                    "js/info.js",
-                                    "GNU General Public License 3.0 or later",
-                                    "http://www.gnu.org/licenses/gpl-3.0.html";,
-                                    "js/info.js");
-          fill_jslicense_file_info (&jslicences_files_info->list[1],
-                                    "js/modernizr.js", "Expat",
-                                    "http://www.jclark.com/xml/copying.txt";,
-                                    "js/modernizr.js");
-        }
-    }
-
-  html_prepare_css (self);
-
-  /* ($output_file, $destination_directory, $output_filename, $document_name) 
*/
-  determine_files_and_directory (self, self->output_format, paths);
-
-  self->document_name = strdup (paths[3]);
-  self->destination_directory = strdup (paths[1]);
-
-  return 1;
-}
-
-void
-html_setup_convert (CONVERTER *self)
-{
-  init_conversion_after_setup_handler (self);
-}
-
 static void
 clear_type_explanations (EXPLAINED_COMMAND_TYPE_LIST *type_explanations)
 {
@@ -18405,7 +17794,7 @@ html_finish_output (CONVERTER *self, const char 
*output_file,
 
   html_do_js_files (self);
 
-  finish_handler_status = run_stage_handlers (self, HSHT_type_finish);
+  finish_handler_status = html_run_stage_handlers (self, HSHT_type_finish);
 
   if (finish_handler_status < handler_fatal_error_level
       && finish_handler_status > -handler_fatal_error_level)
diff --git a/tp/Texinfo/XS/convert/convert_html.h 
b/tp/Texinfo/XS/convert/convert_html.h
index c256365ffa..aeb6a8dd3b 100644
--- a/tp/Texinfo/XS/convert/convert_html.h
+++ b/tp/Texinfo/XS/convert/convert_html.h
@@ -45,12 +45,8 @@ extern const char *direction_string_context_names[];
 extern const char *html_stage_handler_stage_type_names[];
 extern COMMAND_ID_LIST no_arg_formatted_cmd;
 
-void init_conversion_after_setup_handler (CONVERTER *self);
 void html_conversion_finalization (CONVERTER *self);
 
-int html_setup_output (CONVERTER *self, char **paths);
-void html_setup_convert (CONVERTER *self);
-
 ROOT_AND_UNIT *html_get_tree_root_element (CONVERTER *self,
                                            const ELEMENT *command,
                                            int find_container);
@@ -62,6 +58,9 @@ const char *html_special_unit_info (const CONVERTER *self,
                                     enum special_unit_info_type type,
                                     const char *special_unit_variety);
 
+int html_run_stage_handlers (CONVERTER *self,
+                             enum html_stage_handler_stage_type stage);
+
 void html_default_format_protect_text (const char *text, TEXT *result);
 
 HTMLXREF_MANUAL *new_htmlxref_manual_list (size_t size);
diff --git a/tp/Texinfo/XS/convert/html_prepare_converter.c 
b/tp/Texinfo/XS/convert/html_prepare_converter.c
index 6972d2fa3a..f78c0bfdc3 100644
--- a/tp/Texinfo/XS/convert/html_prepare_converter.c
+++ b/tp/Texinfo/XS/convert/html_prepare_converter.c
@@ -19,6 +19,7 @@
 #include <stdio.h>
 #include <stddef.h>
 #include <string.h>
+#include <errno.h>
 
 #include "text.h"
 #include "command_ids.h"
@@ -29,6 +30,7 @@
 /* new_element */
 #include "tree.h"
 #include "utils.h"
+#include "errors.h"
 /* unicode_character_brace_no_arg_commands */
 #include "unicode.h"
 #include "builtin_commands.h"
@@ -138,6 +140,12 @@ CMD_VARIETY command_special_unit_variety[] = {
 static const enum command_id conf_for_special_units[]
                           = {CM_footnotestyle, 0};
 
+static const enum command_id contents_elements_options[]
+             = {CM_contents, CM_shortcontents, CM_summarycontents, 0};
+
+static const enum command_id conf_for_documentlanguage[]
+                          = {CM_documentlanguage, 0};
+
 static enum element_type ignored_types[] = {
     ET_ignorable_spaces_after_command,
     ET_postamble_after_end,
@@ -2171,6 +2179,616 @@ html_initialize_output_state (CONVERTER *self, const 
char *context)
     }
 }
 
+static void
+init_conversion_after_setup_handler (CONVERTER *self)
+{
+  /* the presence of contents elements in the document is used in diverse
+     places, set it once for all here */
+  set_global_document_commands (self, CL_last, contents_elements_options);
+
+  if (self->conf->OUTPUT_CHARACTERS.o.integer > 0
+      && self->conf->OUTPUT_ENCODING_NAME.o.string
+      /* not sure if strcasecmp is needed or not */
+      && !strcasecmp (self->conf->OUTPUT_ENCODING_NAME.o.string, "utf-8"))
+    self->use_unicode_text = 1;
+}
+
+static void
+html_process_css_file (CONVERTER *self, FILE *fh, char *filename,
+                       STRING_LIST *imports, STRING_LIST *rules)
+{
+  TEXT text;
+  int in_rules = 0;
+  int in_comment = 0;
+  int in_import = 0;
+  int in_string = 0;
+  int line_nr = 0;
+
+  /* the rule is to assume utf-8.  There could also be a BOM, and
+     the Content-Type: HTTP header but it is not relevant here.
+     https://developer.mozilla.org/en-US/docs/Web/CSS/@charset
+   */
+  const char *input_encoding = "utf-8";
+  ENCODING_CONVERSION *conversion
+    = get_encoding_conversion (input_encoding, &input_conversions);
+
+  text_init (&text);
+
+  while (1)
+    {
+      size_t n;
+      char *input_line = 0;
+      char *line;
+      const char *p;
+      char in_string_string;
+
+      ssize_t status = getline (&input_line, &n, fh);
+      if (status == -1)
+        {
+          free (input_line);
+          break;
+        }
+      if (!conversion)
+        line = strdup (input_line);
+      else
+        line = encode_with_iconv (conversion->iconv, input_line, 0);
+
+      free (input_line);
+      line_nr ++;
+      /*
+      char *protected_line = debug_protect_eol (line);
+      fprintf (stderr, "NL(%d) '%s'\n", line_nr, protected_line);
+      free (protected_line);
+       */
+      if (line_nr == 1)
+        {
+          int line_len = strlen (line);
+          if (line_len > 13)
+            {
+              size_t n_charset;
+              const char *q;
+              char *charset;
+              if (memcmp (line, "@charset ", 9))
+                goto nocharset;
+              p = line + 9;
+              p += strspn (p, " ");
+              if (*p != '"')
+                goto nocharset;
+              p++;
+              q = p;
+              n_charset = strcspn (p, "\"");
+              if (!n_charset)
+                goto nocharset;
+              p += n_charset;
+              if (*p != '"')
+                goto nocharset;
+              p++;
+              p += strspn (p, " ");
+              if (*p != ';')
+                goto nocharset;
+              p++;
+              p += strspn (p, " ");
+              if (*p && !strchr ("\n\r", *p))
+                goto nocharset;
+              charset = strndup (q, n_charset);
+              conversion
+               = get_encoding_conversion (charset, &input_conversions);
+              free (charset);
+              free (line);
+              continue;
+            }
+          nocharset:
+        }
+
+      if (in_rules)
+        {
+          add_string (line, rules);
+          free (line);
+          continue;
+        }
+
+      text_reset (&text);
+      text_append (&text, "");
+
+      p = line;
+      while (1)
+        {
+          /*
+          char *protected_p = debug_protect_eol (p);
+          char *protected_text = debug_protect_eol (text.text);
+          fprintf (stderr,
+            "%s!in_comment %d in_rules %d in_import %d in_string %d: '%s'\n",
+             protected_text, in_comment, in_rules,
+             in_import, in_string,protected_p);
+          free (protected_p);
+          free (protected_text);
+           */
+
+          if (in_comment)
+            {
+              const char *q = p;
+              while (1)
+                {
+                  const char *k = strchr (q, '*');
+                  if (k)
+                    {
+                      k++;
+                      if (*k == '/')
+                        {
+                          k++;
+                          text_append_n (&text, p, k - p);
+                          p = k;
+                          in_comment = 0;
+                          break;
+                        }
+                      else if (!*k)
+                        break;
+                      else
+                        q = k;
+                    }
+                  else
+                    break;
+                }
+              if (in_comment)
+                {
+                  text_append (&text, p);
+                  add_string (text.text, imports);
+                  break;
+                }
+            }
+          else if (!in_string && *p == '/')
+            {
+              p++;
+              if (*p == '*')
+                {
+                  p++;
+                  text_append_n (&text, "/*", 2);
+                  in_comment = 1;
+                }
+              else
+                {
+                  if (text.end > 0)
+                    {
+                      text_append_n (&text, "\n", 1);
+                      add_string (text.text, imports);
+                    }
+                  p--; /* back on / */
+                  add_string (p, rules);
+                  in_rules = 1;
+                  break;
+                }
+            }
+          else if (!in_string && in_import && *p && strchr ("\"'", *p))
+            {
+              /* strings outside of import start rules */
+              text_append_n (&text, p, 1);
+              in_string_string = *p;
+              p++;
+              in_string = 1;
+            }
+          else if (in_string && *p == '\\' && *(p+1) == in_string_string)
+            {
+              text_append_n (&text, p, 2);
+              p += 2;
+            }
+          else if (in_string && *p == in_string_string)
+            {
+              text_append_n (&text, p, 1);
+              p++;
+              in_string = 0;
+            }
+          else
+            {
+              int matched_import = 0;
+              if (!in_string && !in_import)
+                {
+                  const char *q = p;
+                  if (*q == '\\')
+                    q++;
+                  if (strlen (q) >= 7 && !memcmp (q, "@import", 7))
+                    {
+                      q += 7;
+                      if (!*q || strchr (whitespace_chars, *q))
+                        {
+                          /* spaces except newlines */
+                          q += strspn (q, " \t\v\f");
+                          text_append_n (&text, p, q - p);
+                          in_import = 1;
+                          p = q;
+                          matched_import = 1;
+                        }
+                    }
+                }
+              if (!matched_import)
+                {
+                  if (!in_string && in_import && *p == ';')
+                    {
+                      text_append_n (&text, ";", 1);
+                      p++;
+                      in_import = 0;
+                    }
+                  else if ((in_import || in_string) && *p && !strchr ("\n\r", 
*p))
+                    {
+                      /* Count any UTF-8 continuation bytes. */
+                      int char_len = 1;
+                      while ((p[char_len] & 0xC0) == 0x80)
+                        char_len++;
+                      text_append_n (&text, p, char_len);
+                      p += char_len;
+                    }
+                  else if (!in_import && *p && !strchr (whitespace_chars, *p))
+                    {
+                      if (text.end > 0)
+                        {
+                          text_append_n (&text, "\n", 1);
+                          add_string (text.text, imports);
+                        }
+                      add_string (p, rules);
+                      in_rules = 1;
+                      break;
+                    }
+                  else if (*p && strchr (whitespace_chars, *p))
+                    {
+                      text_append_n (&text, p, 1);
+                      p++;
+                    }
+                  else if (!*p)
+                    {
+                      add_string (text.text, imports);
+                      break;
+                    }
+                }
+            }
+        }
+      free (line);
+    }
+
+  free (text.text);
+
+  if (in_string || in_string || in_import)
+    {
+      SOURCE_INFO source_info;
+      source_info.macro = 0;
+      source_info.file_name = filename;
+      source_info.line_nr = line_nr;
+
+      if (in_string)
+        {
+          message_list_line_error_ext (&self->error_messages,
+                                     self->conf, MSG_warning, 0,
+                                     &source_info,
+                                "string not closed in css file");
+        }
+      if (in_comment)
+        {
+          message_list_line_error_ext (&self->error_messages,
+                                     self->conf, MSG_warning, 0,
+                                     &source_info,
+                                "--css-include ended in comment");
+        }
+      if (in_import && !in_comment && !in_string)
+        {
+          message_list_line_error_ext (&self->error_messages,
+                                     self->conf, MSG_warning, 0,
+                                     &source_info,
+                                "@import not finished in css file");
+        }
+    }
+}
+
+static void
+html_prepare_css (CONVERTER *self)
+{
+  const STRING_LIST *css_files;
+  STRING_LIST *css_import_lines;
+  STRING_LIST *css_rule_lines;
+  size_t i;
+
+  if (self->conf->NO_CSS.o.integer > 0)
+    return;
+
+  css_files = self->conf->CSS_FILES.o.strlist;
+
+  if (!css_files || css_files->number <= 0)
+    return;
+
+  css_import_lines = new_string_list ();
+  css_rule_lines = new_string_list ();
+
+  for (i = 0; i < css_files->number; i++)
+    {
+      FILE *css_file_fh;
+      char *css_file_path;
+      char *css_file = css_files->list[i];
+      if (!strcmp (css_file, "-"))
+        {
+          css_file_fh = stdin;
+          css_file_path = strdup ("-");
+        }
+      else
+        {
+          css_file_path = locate_include_file (css_file,
+                             self->conf->INCLUDE_DIRECTORIES.o.strlist);
+          if (!css_file_path)
+            {
+              char *css_input_file_name;
+              const char *encoding
+                = self->conf->COMMAND_LINE_ENCODING.o.string;
+              if (encoding)
+                {
+                  int status;
+                  css_input_file_name
+                   = decode_string (css_file, encoding, &status, 0);
+                }
+              else
+                css_input_file_name = strdup (css_file);
+              message_list_document_warn (&self->error_messages,
+                      self->conf, 0, "CSS file %s not found",
+                      css_input_file_name);
+              free (css_input_file_name);
+              continue;
+            }
+
+          css_file_fh = fopen (css_file_path, "r");
+          if (!css_file_fh)
+            {
+              char *css_file_name;
+              const char *encoding
+                = self->conf->COMMAND_LINE_ENCODING.o.string;
+              if (encoding)
+                {
+                  int status;
+                  css_file_name
+                   = decode_string (css_file_path, encoding, &status, 0);
+                }
+              else
+                css_file_name = strdup (css_file_path);
+              message_list_document_warn (&self->error_messages,
+                                          self->conf, 0,
+                         "could not open --include-file %s: %s",
+                               css_file_name, strerror (errno));
+
+              free (css_file_name);
+              free (css_file_path);
+              continue;
+            }
+        }
+
+      html_process_css_file (self, css_file_fh, css_file_path,
+                             css_import_lines, css_rule_lines);
+
+      if (fclose (css_file_fh))
+        {
+          char *css_file_name;
+          const char *encoding
+            = self->conf->COMMAND_LINE_ENCODING.o.string;
+          if (encoding)
+            {
+              int status;
+              css_file_name
+               = decode_string (css_file_path, encoding, &status, 0);
+            }
+          else
+            css_file_name = strdup (css_file_path);
+
+          message_list_document_error (&self->error_messages, self->conf, 0,
+                         "error on closing CSS file  %s: %s",
+                         css_file_name, strerror (errno));
+
+          free (css_file_path);
+        }
+
+      free (css_file_path);
+    }
+
+  if (self->conf->DEBUG.o.integer > 0)
+    {
+      if (css_import_lines->number > 0)
+        {
+          fprintf (stderr, "# css import lines\n");
+          for (i = 0; i < css_import_lines->number; i++)
+            fprintf (stderr, "%s", css_import_lines->list[i]);
+        }
+      if (css_rule_lines->number > 0)
+        {
+          fprintf (stderr, "# css rule lines\n");
+          for (i = 0; i < css_rule_lines->number; i++)
+            fprintf (stderr, "%s", css_rule_lines->list[i]);
+        }
+    }
+
+  for (i = 0; i < css_import_lines->number; i++)
+    html_css_add_info (self, CI_css_info_imports, css_import_lines->list[i]);
+
+  for (i = 0; i < css_rule_lines->number; i++)
+    html_css_add_info (self, CI_css_info_rules, css_rule_lines->list[i]);
+
+  destroy_strings_list (css_import_lines);
+  destroy_strings_list (css_rule_lines);
+}
+
+static void
+fill_jslicense_file_info (JSLICENSE_FILE_INFO *jslicense_file_info,
+                          const char *filename, const char *license,
+                          const char *url, const char *source)
+{
+  jslicense_file_info->filename = strdup (filename);
+  jslicense_file_info->license = strdup (license);
+  jslicense_file_info->url = strdup (url);
+  jslicense_file_info->source = strdup (source);
+}
+
+/* first function to call a stage handler */
+int
+html_setup_output (CONVERTER *self, char **paths)
+{
+  int handler_fatal_error_level;
+  int setup_handler_status;
+  int js_categories_list_nr = 0;
+  const char *structure_preamble_document_language;
+
+  if (self->conf->OUTFILE.o.string)
+    {
+      int i;
+      int need_unsplit = 0;
+      const char *outfile = self->conf->OUTFILE.o.string;
+      if (!strlen(outfile) || !strcmp (outfile, "-"))
+        need_unsplit = 1;
+      else
+        {
+          for (i = 0; null_device_names[i]; i++)
+            {
+              if (!strcmp (null_device_names[i], outfile))
+                {
+                  need_unsplit = 1;
+                  break;
+                }
+            }
+        }
+      if (need_unsplit)
+        {
+          option_force_conf (&self->conf->SPLIT, 0, "");
+          option_force_conf (&self->conf->MONOLITHIC, 1, 0);
+        }
+    }
+
+  if (self->conf->SPLIT.o.string && strlen (self->conf->SPLIT.o.string))
+    option_set_conf (&self->conf->NODE_FILES, 1, 0);
+
+  option_set_conf (&self->conf->EXTERNAL_CROSSREF_SPLIT, 0,
+            self->conf->SPLIT.o.string);
+
+  handler_fatal_error_level = self->conf->HANDLER_FATAL_ERROR_LEVEL.o.integer;
+  if (handler_fatal_error_level < 0)
+    {
+      handler_fatal_error_level = 100;
+      /* see options_data.txt. TODO automate? */
+      option_force_conf (&self->conf->HANDLER_FATAL_ERROR_LEVEL,
+                         handler_fatal_error_level, 0);
+    }
+
+  if (self->conf->HTML_MATH.o.string
+      && !strcmp (self->conf->HTML_MATH.o.string, "mathjax"))
+    {
+     /*
+     See https://www.gnu.org/licenses/javascript-labels.html
+
+     The link to the source for mathjax does not strictly follow the advice
+     there: instead we link to instructions for obtaining the full source in
+     its preferred form of modification.
+      */
+       if (!self->conf->MATHJAX_SCRIPT.o.string)
+         option_set_conf (&self->conf->MATHJAX_SCRIPT, 0,
+            "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-svg.js";);
+
+       if (!self->conf->MATHJAX_SOURCE.o.string)
+         option_set_conf (&self->conf->MATHJAX_SOURCE, 0,
+ "http://docs.mathjax.org/en/latest/web/hosting.html#getting-mathjax-via-git";);
+
+       if (!self->conf->MATHJAX_CONFIGURATION.o.string)
+         option_set_conf (&self->conf->MATHJAX_CONFIGURATION, 0,
+ "  options: {\n"
+ "    skipHtmlTags: {'[-]': ['pre']},       // do not skip pre\n"
+ "    ignoreHtmlClass: 'tex2jax_ignore',\n"
+ "    processHtmlClass: 'tex2jax_process'\n"
+ "  },\n"
+ "  tex: {\n"
+ "    processEscapes: false,      // do not use \\$ to produce a literal 
dollar sign\n"
+ "    processEnvironments: false, // do not process \\begin{xxx}...\\end{xxx} 
outside math mode\n"
+ "    processRefs: false,         // do not process \\ref{...} outside of math 
mode\n"
+ "    displayMath: [             // start/end delimiter pairs for display 
math\n"
+ "      ['\\\\[', '\\\\]']\n"
+ "    ],\n"
+ "  },");
+    }
+
+  setup_handler_status = html_run_stage_handlers (self, HSHT_type_setup);
+
+  if (setup_handler_status < handler_fatal_error_level
+      && setup_handler_status > -handler_fatal_error_level)
+    {}
+  else
+    return 0;
+
+  set_global_document_commands (self, CL_preamble, conf_for_documentlanguage);
+
+  structure_preamble_document_language
+    = self->conf->documentlanguage.o.string;
+
+  if (structure_preamble_document_language
+      && strlen (structure_preamble_document_language))
+    {
+      char *body_element_attributes;
+      xasprintf (&body_element_attributes, "lang=\"%s\"",
+                 structure_preamble_document_language);
+      option_set_conf (&self->conf->BODY_ELEMENT_ATTRIBUTES,
+                0, body_element_attributes);
+      free (body_element_attributes);
+    }
+  set_global_document_commands (self, CL_before, conf_for_documentlanguage);
+
+  init_conversion_after_setup_handler (self);
+
+  copy_options (self->init_conf, self->conf);
+
+  if (self->conf->HTML_MATH.o.string
+      && !strcmp (self->conf->HTML_MATH.o.string, "mathjax"))
+    js_categories_list_nr++;
+
+  if (self->conf->INFO_JS_DIR.o.string)
+    js_categories_list_nr++;
+
+  if (js_categories_list_nr > 0)
+    {
+      int i = 0;
+      initialize_js_categories_list (&self->jslicenses, js_categories_list_nr);
+      if (self->conf->HTML_MATH.o.string
+          && !strcmp (self->conf->HTML_MATH.o.string, "mathjax"))
+        {
+          JSLICENSE_FILE_INFO_LIST *jslicences_files_info
+            = &self->jslicenses.list[i];
+          initialize_jslicense_files (jslicences_files_info, "mathjax", 1);
+          fill_jslicense_file_info (&jslicences_files_info->list[0],
+                                    self->conf->MATHJAX_SCRIPT.o.string,
+                                    "Apache License, Version 2.0.",
+                                "https://www.apache.org/licenses/LICENSE-2.0";,
+                                    self->conf->MATHJAX_SOURCE.o.string);
+          i++;
+        }
+      if (self->conf->INFO_JS_DIR.o.string)
+        {
+          JSLICENSE_FILE_INFO_LIST *jslicences_files_info
+            = &self->jslicenses.list[i];
+          initialize_jslicense_files (jslicences_files_info, "infojs", 2);
+          fill_jslicense_file_info (&jslicences_files_info->list[0],
+                                    "js/info.js",
+                                    "GNU General Public License 3.0 or later",
+                                    "http://www.gnu.org/licenses/gpl-3.0.html";,
+                                    "js/info.js");
+          fill_jslicense_file_info (&jslicences_files_info->list[1],
+                                    "js/modernizr.js", "Expat",
+                                    "http://www.jclark.com/xml/copying.txt";,
+                                    "js/modernizr.js");
+        }
+    }
+
+  html_prepare_css (self);
+
+  /* ($output_file, $destination_directory, $output_filename, $document_name) 
*/
+  determine_files_and_directory (self, self->output_format, paths);
+
+  self->document_name = strdup (paths[3]);
+  self->destination_directory = strdup (paths[1]);
+
+  return 1;
+}
+
+void
+html_setup_convert (CONVERTER *self)
+{
+  init_conversion_after_setup_handler (self);
+}
+
 static OUTPUT_UNIT *
 register_special_unit (CONVERTER *self, char *special_unit_variety)
 {
diff --git a/tp/Texinfo/XS/convert/html_prepare_converter.h 
b/tp/Texinfo/XS/convert/html_prepare_converter.h
index 08bd005b01..aa51827359 100644
--- a/tp/Texinfo/XS/convert/html_prepare_converter.h
+++ b/tp/Texinfo/XS/convert/html_prepare_converter.h
@@ -15,6 +15,9 @@ char ***new_directions_strings_type (int nr_string_directions,
 
 void html_initialize_output_state (CONVERTER *self, const char *context);
 
+int html_setup_output (CONVERTER *self, char **paths);
+void html_setup_convert (CONVERTER *self);
+
 void html_prepare_conversion_units (CONVERTER *self);
 
 #endif



reply via email to

[Prev in Thread] Current Thread [Next in Thread]