groff-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[groff] 19/80: [grohtml]: Revise and document new option.


From: G. Branden Robinson
Subject: [groff] 19/80: [grohtml]: Revise and document new option.
Date: Sat, 30 Nov 2024 04:02:15 -0500 (EST)

gbranden pushed a commit to branch master
in repository groff.

commit 98a00440a34a0492b2b0854a7ff5426a2a30ef92
Author: G. Branden Robinson <g.branden.robinson@gmail.com>
AuthorDate: Wed Nov 27 03:56:28 2024 -0600

    [grohtml]: Revise and document new option.
    
    Rename the `-U` option provided by Takuji to `-k`, and document it.
    
    * src/preproc/html/pre-html.cpp (scanArguments): Recognize but ignore
      `-k` (rather than `-U`) option.  Also, make the argument mandatory.
      Nothing else in groff supports an optional option argument, which can
      be ambiguous to parse, especially in light of groff(1)'s `-P` option.
    
    * src/devices/grohtml/post-html.cpp (main): Recognize and interpret `-k`
      option, renamed from `-U`.  Make the argument mandatory, and expect
      values of "ascii", "mixed", or "utf-8" (case-insensitively).  Throw
      warning if argument unrecognized.
    
      (usage): Update.
    
    * src/devices/grohtml/grohtml.1.man (Synopsis, Options): Document it.
    
    Also annotate potential code simplification.
---
 ChangeLog                         | 19 ++++++++++++++++
 src/devices/grohtml/grohtml.1.man | 14 ++++++++++++
 src/devices/grohtml/post-html.cpp | 46 ++++++++++++++++++++-------------------
 src/preproc/html/pre-html.cpp     | 10 ++++-----
 4 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 1a42e9f5c..862ee14e3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2024-11-27  G. Branden Robinson <g.branden.robinson@gmail.com>
+
+       [grohtml]: Rename the `-U` option provided by Takuji to `-k`,
+       and document it.
+
+       * src/preproc/html/pre-html.cpp (scanArguments): Recognize but
+       ignore `-k` (rather than `-U`) option.  Also, make the argument
+       mandatory.  Nothing else in groff supports an optional option
+       argument, which can be ambiguous to parse, especially in light
+       of groff(1)'s `-P` option.
+       * src/devices/grohtml/post-html.cpp (main): Recognize and
+       interpret `-k` option, renamed from `-U`.  Make the argument
+       mandatory, and expect values of "ascii", "mixed", or "utf-8"
+       {case-insensitively}.  Throw warning if argument unrecognized.
+       (usage): Update.
+
+       * src/devices/grohtml/grohtml.1.man (Synopsis, Options):
+       Document it.
+
 2024-11-27  G. Branden Robinson <g.branden.robinson@gmail.com>
 
        Fix Savannah #66479 (2/2).
diff --git a/src/devices/grohtml/grohtml.1.man 
b/src/devices/grohtml/grohtml.1.man
index 590bdd7cd..6b2ee1c68 100644
--- a/src/devices/grohtml/grohtml.1.man
+++ b/src/devices/grohtml/grohtml.1.man
@@ -98,6 +98,8 @@ output driver for HTML
 .IR font-directory ]
 .RB [ \-j
 .IR output-stem ]
+.RB [ \-k
+.IR encoding ]
 .RB [ \-s
 .IR base-point-size ]
 .RB [ \-S
@@ -490,6 +492,18 @@ named
 .
 .
 .TP
+.BI \-k \~encoding
+Select the character encoding used in the generated document,
+affecting the declared encoding in the preamble
+and the form of character entity references.
+.
+.\" XXX: Don't present "mixed" until we know what it's for.
+Valid values are \[lq]ASCII\[rq] and \[lq]UTF\-8\[rq].
+.
+The default is \[lq]UTF-8\[rq].
+.
+.
+.TP
 .B \-l
 Turn off the production of automatic section links at the top of the
 document.
diff --git a/src/devices/grohtml/post-html.cpp 
b/src/devices/grohtml/post-html.cpp
index a8c040e56..1c8d399d6 100644
--- a/src/devices/grohtml/post-html.cpp
+++ b/src/devices/grohtml/post-html.cpp
@@ -101,7 +101,12 @@ static const int CHARSET_UTF8  = 2;
 static int charset_encoding = CHARSET_MIXED;/* The character set may be plain 
ASCII,    */
                                             /* pure UTF-8, or a mixture of 
character    */
                                             /* entity references.              
         */
-
+/*
+ * TODO: CHARSET_MIXED doesn't seem to govern the output in any way.
+ * Everywhere `charset_encoding` is tested, it is cast to a `bool`, so
+ * all we're really choosing between is ASCII and UTF-8.  Can we get rid
+ * of CHARSET_MIXED?
+ */
 
 /*
  *  start with a few favorites
@@ -5575,10 +5580,8 @@ int main(int argc, char **argv)
     { NULL, 0, 0, 0 }
   };
   opterr = 0;
-  // TODO: Rename `U` option, which generally means "unsafe mode" in
-  // groff, to `u`.
   while ((c = getopt_long(argc, argv,
-         "a:bCdD:eF:g:Ghi:I:j:lno:prs:S:U::vVx:y", long_options, NULL))
+         "a:bCdD:eF:g:Ghi:I:j:k:lno:prs:S:vVx:y", long_options, NULL))
         != EOF)
     switch(c) {
     case 'a':
@@ -5626,6 +5629,20 @@ int main(int argc, char **argv)
       multiple_files = TRUE;
       job_name = optarg;
       break;
+    case 'k':
+      if (strcasecmp(optarg, "ascii") == 0)
+       charset_encoding = CHARSET_ASCII;
+      else if (strcasecmp(optarg, "mixed") == 0)
+       charset_encoding = CHARSET_MIXED;
+      else if ((strcasecmp(optarg, "utf8") == 0)
+              || (strcasecmp(optarg, "utf-8") == 0))
+       charset_encoding = CHARSET_UTF8;
+      else {
+       warning("unsupported character encoding '%1'; assuming UTF-8",
+               optarg);
+       charset_encoding = CHARSET_UTF8;
+      }
+      break;
     case 'l':
       auto_links = FALSE;
       break;
@@ -5647,22 +5664,6 @@ int main(int argc, char **argv)
     case 'S':
       split_level = atoi(optarg) + 1;
       break;
-    case 'U':
-      if (optarg) {
-       // TODO: This argument semantic scheme seems unergonomic to GBR;
-       // come up with an alternative.
-        if ((strcmp(optarg, "0") == 0 || strcmp(optarg, "-") == 0))
-          charset_encoding = CHARSET_ASCII;
-        else if ((strcmp(optarg, "1") == 0))
-          charset_encoding = CHARSET_MIXED;
-        else if (optarg && ((strcmp(optarg, "2") == 0)
-                            || strcmp(optarg, "+") == 0))
-          charset_encoding = CHARSET_UTF8;
-        else
-          charset_encoding = CHARSET_UTF8;
-      } else
-        charset_encoding = CHARSET_UTF8;
-      break;
     case 'v':
       printf("GNU post-grohtml (groff) version %s\n", Version_string);
       exit(0);
@@ -5706,8 +5707,9 @@ int main(int argc, char **argv)
 static void usage(FILE *stream)
 {
   fprintf(stream,
-"usage: %s [-bCGhlnrUVy] [-F font-directory] [-j output-stem]"
-" [-s base-type-size] [-S heading-level] [-x html-dialect] [file ...]\n"
+"usage: %s [-bCGhlnrVy] [-F font-directory] [-j output-stem]"
+" [-k encoding] [-s base-type-size] [-S heading-level]"
+" [-x html-dialect] [file ...]\n"
 "usage: %s {-v | --version}\n"
 "usage: %s --help\n",
          program_name, program_name, program_name);
diff --git a/src/preproc/html/pre-html.cpp b/src/preproc/html/pre-html.cpp
index c11598fcc..a3b933406 100644
--- a/src/preproc/html/pre-html.cpp
+++ b/src/preproc/html/pre-html.cpp
@@ -1604,10 +1604,8 @@ static int scanArguments(int argc, char **argv)
     { 0 /* nullptr */, 0, 0, 0 }
   };
   opterr = 0;
-  // TODO: Rename `U` option, which generally means "unsafe mode" in
-  // groff, to `u`.
   while ((c = getopt_long(argc, argv,
-         "+a:bCdD:eF:g:Ghi:I:j:lno:prs:S:U::vVx:y", long_options,
+         "+a:bCdD:eF:g:Ghi:I:j:k:lno:prs:S:vVx:y", long_options,
          0 /* nullptr */))
         != EOF)
     switch(c) {
@@ -1658,6 +1656,9 @@ static int scanArguments(int argc, char **argv)
     case 'j':
       // handled by post-grohtml (set job name for multiple file output)
       break;
+    case 'k':
+      // handled by post-grohtml (charset ASCII/mixed/UTF-8)
+      break;
     case 'l':
       // handled by post-grohtml (no automatic section links)
       break;
@@ -1679,9 +1680,6 @@ static int scanArguments(int argc, char **argv)
     case 'S':
       // handled by post-grohtml (set file split level)
       break;
-    case 'U':
-      // handled by post-grohtml (charset UTF-8)
-      break;
     case 'v':
       printf("GNU pre-grohtml (groff) version %s\n", Version_string);
       exit(EXIT_SUCCESS);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]