[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[groff] 19/80: [grohtml]: Revise and document new option.
From: |
G. Branden Robinson |
Subject: |
[groff] 19/80: [grohtml]: Revise and document new option. |
Date: |
Sat, 30 Nov 2024 04:02:15 -0500 (EST) |
gbranden pushed a commit to branch master
in repository groff.
commit 98a00440a34a0492b2b0854a7ff5426a2a30ef92
Author: G. Branden Robinson <g.branden.robinson@gmail.com>
AuthorDate: Wed Nov 27 03:56:28 2024 -0600
[grohtml]: Revise and document new option.
Rename the `-U` option provided by Takuji to `-k`, and document it.
* src/preproc/html/pre-html.cpp (scanArguments): Recognize but ignore
`-k` (rather than `-U`) option. Also, make the argument mandatory.
Nothing else in groff supports an optional option argument, which can
be ambiguous to parse, especially in light of groff(1)'s `-P` option.
* src/devices/grohtml/post-html.cpp (main): Recognize and interpret `-k`
option, renamed from `-U`. Make the argument mandatory, and expect
values of "ascii", "mixed", or "utf-8" (case-insensitively). Throw
warning if argument unrecognized.
(usage): Update.
* src/devices/grohtml/grohtml.1.man (Synopsis, Options): Document it.
Also annotate potential code simplification.
---
ChangeLog | 19 ++++++++++++++++
src/devices/grohtml/grohtml.1.man | 14 ++++++++++++
src/devices/grohtml/post-html.cpp | 46 ++++++++++++++++++++-------------------
src/preproc/html/pre-html.cpp | 10 ++++-----
4 files changed, 61 insertions(+), 28 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 1a42e9f5c..862ee14e3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2024-11-27 G. Branden Robinson <g.branden.robinson@gmail.com>
+
+ [grohtml]: Rename the `-U` option provided by Takuji to `-k`,
+ and document it.
+
+ * src/preproc/html/pre-html.cpp (scanArguments): Recognize but
+ ignore `-k` (rather than `-U`) option. Also, make the argument
+ mandatory. Nothing else in groff supports an optional option
+ argument, which can be ambiguous to parse, especially in light
+ of groff(1)'s `-P` option.
+ * src/devices/grohtml/post-html.cpp (main): Recognize and
+ interpret `-k` option, renamed from `-U`. Make the argument
+ mandatory, and expect values of "ascii", "mixed", or "utf-8"
+ {case-insensitively}. Throw warning if argument unrecognized.
+ (usage): Update.
+
+ * src/devices/grohtml/grohtml.1.man (Synopsis, Options):
+ Document it.
+
2024-11-27 G. Branden Robinson <g.branden.robinson@gmail.com>
Fix Savannah #66479 (2/2).
diff --git a/src/devices/grohtml/grohtml.1.man
b/src/devices/grohtml/grohtml.1.man
index 590bdd7cd..6b2ee1c68 100644
--- a/src/devices/grohtml/grohtml.1.man
+++ b/src/devices/grohtml/grohtml.1.man
@@ -98,6 +98,8 @@ output driver for HTML
.IR font-directory ]
.RB [ \-j
.IR output-stem ]
+.RB [ \-k
+.IR encoding ]
.RB [ \-s
.IR base-point-size ]
.RB [ \-S
@@ -490,6 +492,18 @@ named
.
.
.TP
+.BI \-k \~encoding
+Select the character encoding used in the generated document,
+affecting the declared encoding in the preamble
+and the form of character entity references.
+.
+.\" XXX: Don't present "mixed" until we know what it's for.
+Valid values are \[lq]ASCII\[rq] and \[lq]UTF\-8\[rq].
+.
+The default is \[lq]UTF-8\[rq].
+.
+.
+.TP
.B \-l
Turn off the production of automatic section links at the top of the
document.
diff --git a/src/devices/grohtml/post-html.cpp
b/src/devices/grohtml/post-html.cpp
index a8c040e56..1c8d399d6 100644
--- a/src/devices/grohtml/post-html.cpp
+++ b/src/devices/grohtml/post-html.cpp
@@ -101,7 +101,12 @@ static const int CHARSET_UTF8 = 2;
static int charset_encoding = CHARSET_MIXED;/* The character set may be plain
ASCII, */
/* pure UTF-8, or a mixture of
character */
/* entity references.
*/
-
+/*
+ * TODO: CHARSET_MIXED doesn't seem to govern the output in any way.
+ * Everywhere `charset_encoding` is tested, it is cast to a `bool`, so
+ * all we're really choosing between is ASCII and UTF-8. Can we get rid
+ * of CHARSET_MIXED?
+ */
/*
* start with a few favorites
@@ -5575,10 +5580,8 @@ int main(int argc, char **argv)
{ NULL, 0, 0, 0 }
};
opterr = 0;
- // TODO: Rename `U` option, which generally means "unsafe mode" in
- // groff, to `u`.
while ((c = getopt_long(argc, argv,
- "a:bCdD:eF:g:Ghi:I:j:lno:prs:S:U::vVx:y", long_options, NULL))
+ "a:bCdD:eF:g:Ghi:I:j:k:lno:prs:S:vVx:y", long_options, NULL))
!= EOF)
switch(c) {
case 'a':
@@ -5626,6 +5629,20 @@ int main(int argc, char **argv)
multiple_files = TRUE;
job_name = optarg;
break;
+ case 'k':
+ if (strcasecmp(optarg, "ascii") == 0)
+ charset_encoding = CHARSET_ASCII;
+ else if (strcasecmp(optarg, "mixed") == 0)
+ charset_encoding = CHARSET_MIXED;
+ else if ((strcasecmp(optarg, "utf8") == 0)
+ || (strcasecmp(optarg, "utf-8") == 0))
+ charset_encoding = CHARSET_UTF8;
+ else {
+ warning("unsupported character encoding '%1'; assuming UTF-8",
+ optarg);
+ charset_encoding = CHARSET_UTF8;
+ }
+ break;
case 'l':
auto_links = FALSE;
break;
@@ -5647,22 +5664,6 @@ int main(int argc, char **argv)
case 'S':
split_level = atoi(optarg) + 1;
break;
- case 'U':
- if (optarg) {
- // TODO: This argument semantic scheme seems unergonomic to GBR;
- // come up with an alternative.
- if ((strcmp(optarg, "0") == 0 || strcmp(optarg, "-") == 0))
- charset_encoding = CHARSET_ASCII;
- else if ((strcmp(optarg, "1") == 0))
- charset_encoding = CHARSET_MIXED;
- else if (optarg && ((strcmp(optarg, "2") == 0)
- || strcmp(optarg, "+") == 0))
- charset_encoding = CHARSET_UTF8;
- else
- charset_encoding = CHARSET_UTF8;
- } else
- charset_encoding = CHARSET_UTF8;
- break;
case 'v':
printf("GNU post-grohtml (groff) version %s\n", Version_string);
exit(0);
@@ -5706,8 +5707,9 @@ int main(int argc, char **argv)
static void usage(FILE *stream)
{
fprintf(stream,
-"usage: %s [-bCGhlnrUVy] [-F font-directory] [-j output-stem]"
-" [-s base-type-size] [-S heading-level] [-x html-dialect] [file ...]\n"
+"usage: %s [-bCGhlnrVy] [-F font-directory] [-j output-stem]"
+" [-k encoding] [-s base-type-size] [-S heading-level]"
+" [-x html-dialect] [file ...]\n"
"usage: %s {-v | --version}\n"
"usage: %s --help\n",
program_name, program_name, program_name);
diff --git a/src/preproc/html/pre-html.cpp b/src/preproc/html/pre-html.cpp
index c11598fcc..a3b933406 100644
--- a/src/preproc/html/pre-html.cpp
+++ b/src/preproc/html/pre-html.cpp
@@ -1604,10 +1604,8 @@ static int scanArguments(int argc, char **argv)
{ 0 /* nullptr */, 0, 0, 0 }
};
opterr = 0;
- // TODO: Rename `U` option, which generally means "unsafe mode" in
- // groff, to `u`.
while ((c = getopt_long(argc, argv,
- "+a:bCdD:eF:g:Ghi:I:j:lno:prs:S:U::vVx:y", long_options,
+ "+a:bCdD:eF:g:Ghi:I:j:k:lno:prs:S:vVx:y", long_options,
0 /* nullptr */))
!= EOF)
switch(c) {
@@ -1658,6 +1656,9 @@ static int scanArguments(int argc, char **argv)
case 'j':
// handled by post-grohtml (set job name for multiple file output)
break;
+ case 'k':
+ // handled by post-grohtml (charset ASCII/mixed/UTF-8)
+ break;
case 'l':
// handled by post-grohtml (no automatic section links)
break;
@@ -1679,9 +1680,6 @@ static int scanArguments(int argc, char **argv)
case 'S':
// handled by post-grohtml (set file split level)
break;
- case 'U':
- // handled by post-grohtml (charset UTF-8)
- break;
case 'v':
printf("GNU pre-grohtml (groff) version %s\n", Version_string);
exit(EXIT_SUCCESS);
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [groff] 19/80: [grohtml]: Revise and document new option.,
G. Branden Robinson <=