groff-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[groff] 18/23: Support CJK fonts encoded in UTF-16 (4/6).


From: G. Branden Robinson
Subject: [groff] 18/23: Support CJK fonts encoded in UTF-16 (4/6).
Date: Thu, 21 Nov 2024 14:47:49 -0500 (EST)

gbranden pushed a commit to branch master
in repository groff.

commit 7d91bcb4c29a8ba882149422f8e227edef4678ac
Author: TANAKA Takuji <ttk@t-lab.opal.ne.jp>
AuthorDate: Fri Dec 29 13:56:37 2023 +0000

    Support CJK fonts encoded in UTF-16 (4/6).
    
    * src/devices/grops/ps.h:
    * src/devices/grops/ps.cpp: Include C99 "stdint.h" header for desired
      `unit16_t` data type.
    
      (class ps_output): Change type of `put_string` member function's first
      argument from `const char *` to `const uint16_t *`.  Add third
      argument of Boolean type, `is_utf16le`.
    
    * src/devices/grops/ps.cpp (ps_output::put_string): Adjust computations
      of `len` and `col` locals if the font in use is UTF-16LE-encoding, and
      write out 4-digit instead of 2-digit hexadecimal numeric literals when
      that is the case.
    
      (class ps_printer): Change type of `sbuf` member variable from `char`
      to `uint16_t`.  Change type of third argument to `set_subencoding`
      member function from `unsigned char *` to `uint16_t *`.
    
      (ps_printer::set_subencoding): Rename third argument from `codep` to
      `code`--it's no longer an indirect reference to a single `char`, but a
      2-element `uint16_t` array.  If the font's "internalname" directive
      contains the substring "-UTF16-", populate `code` argument with
      little-endian 16-bit value.
    
      (ps_printer::set_char): Declare `code` as above: a 2-element
      `uint16_t` array instead of an unsigned char.  Handle case of `code`
      using surrogate pairs (`code[1] > 0`).
    
      (ps_printer::flush_sbuf): Conditionalize form of output on font
      encoding.  Set the Boolean argument to `ps::put_string()` per the
      font's "internalname" directive matching the substring "-UTF16-".
---
 ChangeLog                | 31 +++++++++++++++++++
 src/devices/grops/ps.cpp | 79 ++++++++++++++++++++++++++++++++++++------------
 src/devices/grops/ps.h   |  6 ++--
 3 files changed, 94 insertions(+), 22 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 5fcfb050d..1ebb08f5c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,34 @@
+2024-11-20  TANAKA Takuji <ttk@t-lab.opal.ne.jp>
+
+       Support CJK fonts encoded in UTF-16 (4/6).
+
+       * src/devices/grops/ps.h:
+       * src/devices/grops/ps.cpp: Include C99 "stdint.h" header for
+       desired `unit16_t` data type.
+       (class ps_output): Change type of `put_string` member function's
+       first argument from `const char *` to `const uint16_t *`.  Add
+       third argument of Boolean type, `is_utf16le`.
+       * src/devices/grops/ps.cpp (ps_output::put_string): Adjust
+       computations of `len` and `col` locals if the font in use is
+       UTF-16LE-encoding, and write out 4-digit instead of 2-digit
+       hexadecimal numeric literals when that is the case.
+       (class ps_printer): Change type of `sbuf` member variable from
+       `char` to `uint16_t`.  Change type of third argument to
+       `set_subencoding` member function from `unsigned char *` to
+       `uint16_t *`.
+       (ps_printer::set_subencoding): Rename third argument from
+       `codep` to `code`--it's no longer an indirect reference to a
+       single `char`, but a 2-element `uint16_t` array.  If the font's
+       "internalname" directive contains the substring "-UTF16-",
+       populate `code` argument with little-endian 16-bit value.
+       (ps_printer::set_char): Declare `code` as above: a 2-element
+       `uint16_t` array instead of an unsigned char.  Handle case of
+       `code` using surrogate pairs (`code[1] > 0`).
+       (ps_printer::flush_sbuf): Conditionalize form of output on font
+       encoding.  Set the Boolean argument to `ps::put_string()` per
+       the font's "internalname" directive matching the substring
+       "-UTF16-".
+
 2024-11-20  TANAKA Takuji <ttk@t-lab.opal.ne.jp>
 
        Support CJK fonts encoded in UTF-16 (3/6).
diff --git a/src/devices/grops/ps.cpp b/src/devices/grops/ps.cpp
index 8e601a200..ab31de08a 100644
--- a/src/devices/grops/ps.cpp
+++ b/src/devices/grops/ps.cpp
@@ -33,6 +33,7 @@ along with this program.  If not, see 
<http://www.gnu.org/licenses/>. */
 #include "ps.h"
 
 #include <errno.h> // errno
+#include <stdint.h> // uint16_t
 #include <time.h>
 
 #ifdef NEED_DECLARATION_PUTENV
@@ -202,13 +203,16 @@ ps_output &ps_output::put_delimiter(char c)
   return *this;
 }
 
-ps_output &ps_output::put_string(const char *s, size_t n)
+ps_output &ps_output::put_string(const uint16_t *s, size_t n,
+                                bool is_utf16le)
 {
   size_t len = 0;
   size_t i;
   for (i = 0; i < n; i++) {
-    char c = s[i];
-    if (is_ascii(c) && csprint(c)) {
+    uint16_t c = s[i];
+    if (is_utf16le) {
+      len = (i + 1) * 4;
+    } else if (is_ascii(c) && csprint(c)) {
       if (c == '(' || c == ')' || c == '\\')
        len += 2;
       else
@@ -217,7 +221,7 @@ ps_output &ps_output::put_string(const char *s, size_t n)
     else
       len += 4;
   }
-  if (len > (n * 2)) {
+  if ((len > (n * 2)) || is_utf16le) {
     if (((col + (n * 2) + 2) > max_line_length)
        && (((n * 2) + 2) <= max_line_length)) {
       putc('\n', fp);
@@ -234,8 +238,13 @@ ps_output &ps_output::put_string(const char *s, size_t n)
        putc('\n', fp);
        col = 0;
       }
-      fprintf(fp, "%02x", s[i] & 0377);
-      col += 2;
+      if (is_utf16le) {
+        fprintf(fp, "%04X", s[i] & 0xFFFF);
+        col += 4;
+      } else {
+        fprintf(fp, "%02x", s[i] & 0377);
+        col += 2;
+      }
     }
     putc('>', fp);
     col++;
@@ -531,7 +540,7 @@ class ps_printer : public printer {
   int paper_length;
   int equalise_spaces;
   enum { SBUF_SIZE = 256 };
-  char sbuf[SBUF_SIZE];
+  uint16_t sbuf[SBUF_SIZE];
   int sbuf_len;
   int sbuf_start_hpos;
   int sbuf_vpos;
@@ -565,7 +574,7 @@ class ps_printer : public printer {
   void set_style(const style &);
   void set_space_code(unsigned char);
   int set_encoding_index(ps_font *);
-  subencoding *set_subencoding(font *, glyph *, unsigned char *);
+  subencoding *set_subencoding(font *, glyph *, uint16_t *);
   char *get_subfont(subencoding *, const char *);
   void do_exec(char *, const environment *);
   void do_import(char *, const environment *);
@@ -663,10 +672,26 @@ int ps_printer::set_encoding_index(ps_font *f)
 }
 
 subencoding *ps_printer::set_subencoding(font *f, glyph *g,
-                                        unsigned char *codep)
+                                        uint16_t *code)
 {
   unsigned int idx = f->get_code(g);
-  *codep = idx % 256;
+  const char *psname = f->get_internal_name();
+
+  if (psname && strstr(psname, "-UTF16-")) {
+    /* Unicode, convert to UTF-16LE */
+    if (idx < 0x10000) {
+      code[0] = idx;
+      code[1] = 0;
+    } else {
+      // Encode surrogate pairs.
+      code[0] = (idx - 0x10000) / 0x400 + 0xD800;
+      code[1] = (idx - 0x10000) % 0x400 + 0xDC00;
+    }
+    return 0 /* nullptr */;
+  }
+
+  code[0] = idx % 256;
+  code[1] = 0;
   unsigned int num = idx >> 8;
   if (num == 0)
     return 0 /* nullptr */;
@@ -677,7 +702,7 @@ subencoding *ps_printer::set_subencoding(font *f, glyph *g,
   if (0 /* nullptr */ == p)
     p = subencodings = new subencoding(f, num, next_subencoding_index++,
                                       subencodings);
-  p->glyphs[*codep] = f->get_special_device_encoding(g);
+  p->glyphs[*code] = f->get_special_device_encoding(g);
   return p;
 }
 
@@ -697,8 +722,8 @@ void ps_printer::set_char(glyph *g, font *f, const 
environment *env, int w,
 {
   if (g == space_glyph || invis_count > 0)
     return;
-  unsigned char code;
-  subencoding *sub = set_subencoding(f, g, &code);
+  uint16_t code[2];
+  subencoding *sub = set_subencoding(f, g, code);
   style sty(f, sub, env->size, env->height, env->slant);
   if (sty.slant != 0) {
     if (sty.slant > 80 || sty.slant < -80) {
@@ -712,14 +737,18 @@ void ps_printer::set_char(glyph *g, font *f, const 
environment *env, int w,
        && sbuf_vpos == env->vpos
        && sbuf_color == *env->col) {
       if (sbuf_end_hpos == env->hpos) {
-       sbuf[sbuf_len++] = code;
+       sbuf[sbuf_len++] = code[0];
+       if (code[1] > 0)
+         sbuf[sbuf_len++] = code[1];
        sbuf_end_hpos += w + sbuf_kern;
        return;
       }
-      if (sbuf_len == 1 && sbuf_kern == 0) {
+      if ((sbuf_len == 1) && (sbuf_kern == 0)) {
        sbuf_kern = env->hpos - sbuf_end_hpos;
        sbuf_end_hpos = env->hpos + sbuf_kern + w;
-       sbuf[sbuf_len++] = code;
+       sbuf[sbuf_len++] = code[0];
+       if (code[1] > 0)
+         sbuf[sbuf_len++] = code[1];
        return;
       }
       /* If sbuf_end_hpos - sbuf_kern == env->hpos, we are better off
@@ -732,7 +761,9 @@ void ps_printer::set_char(glyph *g, font *f, const 
environment *env, int w,
            sbuf_space_width = env->hpos - sbuf_end_hpos;
            sbuf_end_hpos = env->hpos + w + sbuf_kern;
            sbuf[sbuf_len++] = sbuf_space_code;
-           sbuf[sbuf_len++] = code;
+           sbuf[sbuf_len++] = code[0];
+           if (code[1] > 0)
+             sbuf[sbuf_len++] = code[1];
            sbuf_space_count++;
            return;
          }
@@ -742,7 +773,9 @@ void ps_printer::set_char(glyph *g, font *f, const 
environment *env, int w,
          if (diff == 0 || (equalise_spaces && (diff == 1 || diff == -1))) {
            sbuf_end_hpos = env->hpos + w + sbuf_kern;
            sbuf[sbuf_len++] = sbuf_space_code;
-           sbuf[sbuf_len++] = code;
+           sbuf[sbuf_len++] = code[0];
+           if (code[1] > 0)
+             sbuf[sbuf_len++] = code[1];
            sbuf_space_count++;
            if (diff == 1)
              sbuf_space_diff_count++;
@@ -756,7 +789,9 @@ void ps_printer::set_char(glyph *g, font *f, const 
environment *env, int w,
     flush_sbuf();
   }
   sbuf_len = 1;
-  sbuf[0] = code;
+  sbuf[0] = code[0];
+  if (code[1] > 0)
+    sbuf[sbuf_len++] = code[1];
   sbuf_end_hpos = env->hpos + w;
   sbuf_start_hpos = env->hpos;
   sbuf_vpos = env->vpos;
@@ -1028,7 +1063,11 @@ void ps_printer::flush_sbuf()
     out.put_fix_number(extra_space);
   if (sbuf_kern != 0)
     out.put_fix_number(sbuf_kern);
-  out.put_string(sbuf, sbuf_len);
+  const char *psname = sbuf_style.f->get_internal_name();
+  bool is_utf16le = false;
+  if ((psname != 0 /* nullptr */) && strstr(psname, "-UTF16-"))
+    is_utf16le = true;
+  out.put_string(sbuf, sbuf_len, is_utf16le);
   char command_array[] = {'A', 'B', 'C', 'D',
                          'E', 'F', 'G', 'H',
                          'I', 'J', 'K', 'L',
diff --git a/src/devices/grops/ps.h b/src/devices/grops/ps.h
index d2e0fb347..e1ca84eab 100644
--- a/src/devices/grops/ps.h
+++ b/src/devices/grops/ps.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
+/* Copyright (C) 1989-2024 Free Software Foundation, Inc.
      Written by James Clark (jjc@jclark.com)
 
 This file is part of groff.
@@ -16,10 +16,12 @@ for more details.
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>. */
 
+#include <stdint.h> // uint16_t
+
 class ps_output {
 public:
   ps_output(FILE *, int max_line_length);
-  ps_output &put_string(const char *, size_t n);
+  ps_output &put_string(const uint16_t *, size_t, bool);
   ps_output &put_number(int);
   ps_output &put_fix_number(int);
   ps_output &put_float(double);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]