[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[7532] parsetexi more on input encoding
From: |
gavinsmith0123 |
Subject: |
[7532] parsetexi more on input encoding |
Date: |
Tue, 22 Nov 2016 11:55:43 +0000 (UTC) |
Revision: 7532
http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=7532
Author: gavin
Date: 2016-11-22 11:55:42 +0000 (Tue, 22 Nov 2016)
Log Message:
-----------
parsetexi more on input encoding
Modified Paths:
--------------
trunk/tp/parsetexi/api.c
trunk/tp/parsetexi/end_line.c
trunk/tp/parsetexi/input.c
Modified: trunk/tp/parsetexi/api.c
===================================================================
--- trunk/tp/parsetexi/api.c 2016-11-22 07:41:01 UTC (rev 7531)
+++ trunk/tp/parsetexi/api.c 2016-11-22 11:55:42 UTC (rev 7532)
@@ -68,7 +68,7 @@
void
parse_file (char *filename)
{
- debug_output = 1;
+ debug_output = 0;
parse_texi_file (filename);
}
Modified: trunk/tp/parsetexi/end_line.c
===================================================================
--- trunk/tp/parsetexi/end_line.c 2016-11-22 07:41:01 UTC (rev 7531)
+++ trunk/tp/parsetexi/end_line.c 2016-11-22 11:55:42 UTC (rev 7532)
@@ -1572,15 +1572,34 @@
/* Get perl_encoding. */
perl_encoding = 0;
if (texinfo_encoding)
+ perl_encoding = texinfo_encoding;
+ else
{
+ int i;
+ static char *known_encodings[] = {
+ "shift_jis",
+ 0
+ };
+ for (i = 0; (known_encodings[i]); i++)
+ {
+ if (!strcmp (text2, known_encodings[i]))
+ {
+ perl_encoding = known_encodings[i];
+ break;
+ }
+ }
+ }
+
+ if (perl_encoding)
+ {
struct encoding_map {
char *from; char *to;
};
static struct encoding_map map[] = {
"utf-8", "utf-8-strict",
- "us-ascii", "ascii"
+ "us-ascii", "ascii",
+ "shift_jis", "shiftjis"
};
- perl_encoding = texinfo_encoding;
for (i = 0; i < sizeof map / sizeof *map; i++)
{
if (!strcmp (perl_encoding, map[i].from))
@@ -1589,9 +1608,6 @@
break;
}
}
- }
- if (perl_encoding)
- {
add_extra_string (current, "input_perl_encoding",
perl_encoding);
}
@@ -1621,9 +1637,9 @@
"ascii", "us-ascii",
"shiftjis", "shift_jis",
"latin-1", "iso-8859-1",
- "iso-8859-1", "iso8859_1",
- "iso-8859-2", "iso8859_2",
- "iso-8859-15", "iso8859_15",
+ "iso-8859-1", "iso-8859-1",
+ "iso-8859-2", "iso-8859-2",
+ "iso-8859-15", "iso-8859-15",
"koi8-r", "koi8",
"koi8-u", "koi8",
};
@@ -1647,10 +1663,8 @@
input_encoding);
global_info.input_encoding_name = text; // 3210
+ set_input_encoding (input_encoding);
}
-
- // TODO: Need to convert input in input.c from this encoding.
- // (INPUT_PERL_ENCODING in Perl version)
}
else if (current->cmd == CM_documentlanguage) // 3223
{
Modified: trunk/tp/parsetexi/input.c
===================================================================
--- trunk/tp/parsetexi/input.c 2016-11-22 07:41:01 UTC (rev 7531)
+++ trunk/tp/parsetexi/input.c 2016-11-22 11:55:42 UTC (rev 7532)
@@ -31,14 +31,16 @@
enum character_encoding {
ce_latin1,
- ce_utf8
+ ce_latin2,
+ ce_utf8,
+ ce_shiftjis
};
typedef struct {
enum input_type type;
FILE *file;
- enum character_encoding input_encoding;
+ char *input_encoding;
LINE_NR line_nr;
char *text; /* Input text to be parsed as Texinfo. */
@@ -92,6 +94,8 @@
#define ICONV_CONST
static iconv_t iconv_from_latin1 = (iconv_t) 0;
+static iconv_t iconv_from_latin2;
+static iconv_t iconv_from_shiftjis;
/* Run iconv using text buffer as output buffer. */
size_t
@@ -121,12 +125,13 @@
/* Return conversion of S according to ENC. This function frees S. */
static char *
-convert_to_utf8 (char *s, enum character_encoding enc)
+convert_to_utf8 (char *s, char *input_encoding)
{
iconv_t our_iconv;
static TEXT t;
char *inptr; size_t bytes_left;
size_t iconv_ret;
+ enum character_encoding enc;
/* Convert from @documentencoding to UTF-8.
It might be possible not to convert to UTF-8 and use an 8-bit encoding
@@ -152,15 +157,45 @@
return s;
}
}
+ if (iconv_from_latin2 == (iconv_t) 0)
+ {
+ /* Initialize the conversion for the first time. */
+ iconv_from_latin2 = iconv_open ("UTF-8", "ISO-8859-2");
+ if (iconv_from_latin2 == (iconv_t) -1)
+ iconv_from_latin2 = iconv_from_latin1;
+ }
+ if (iconv_from_shiftjis == (iconv_t) 0)
+ {
+ /* Initialize the conversion for the first time. */
+ iconv_from_shiftjis = iconv_open ("UTF-8", "SHIFT-JIS");
+ if (iconv_from_shiftjis == (iconv_t) -1)
+ iconv_from_shiftjis = iconv_from_latin1;
+ }
+ enc = ce_latin1;
+ if (!input_encoding)
+ ;
+ else if (!strcmp (input_encoding, "utf-8"))
+ enc = ce_utf8;
+ else if (!strcmp (input_encoding, "iso-8859-2"))
+ enc = ce_latin2;
+ else if (!strcmp (input_encoding, "shift_jis"))
+ enc = ce_shiftjis;
+
switch (enc)
{
+ case ce_utf8:
+ return s; /* no conversion required. */
+ break;
case ce_latin1:
our_iconv = iconv_from_latin1;
break;
- case ce_utf8:
- return s; /* no conversion required. */
+ case ce_latin2:
+ our_iconv = iconv_from_latin2;
break;
+ case ce_shiftjis:
+ our_iconv = iconv_from_shiftjis;
+ break;
}
t.end = 0;
@@ -279,7 +314,7 @@
i->line_nr.line_nr++;
line_nr = i->line_nr;
- return convert_to_utf8 (line, 0); // i->input_encoding);
+ return convert_to_utf8 (line, i->input_encoding);
}
free (line); line = 0;
break;
@@ -319,6 +354,7 @@
input_stack[input_number].file = 0;
input_stack[input_number].text = text;
input_stack[input_number].ptext = text;
+ input_stack[input_number].input_encoding = 0;
if (!macro)
line_number--;
@@ -354,6 +390,19 @@
/* TODO: free the memory */
}
+void
+set_input_encoding (char *encoding)
+{
+ int i;
+
+ /* Set encoding of top file in stack. */
+ i = input_number - 1;
+ while (i >= 0 && input_stack[i].type != IN_file)
+ i--;
+ if (i >= 0)
+ input_stack[i].input_encoding = encoding;
+}
+
static char **include_dirs;
static size_t include_dirs_number;
@@ -413,6 +462,7 @@
input_stack[input_number].line_nr.macro = 0;
input_stack[input_number].text = 0;
input_stack[input_number].ptext = 0;
+ input_stack[input_number].input_encoding = 0;
input_number++;
return;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [7532] parsetexi more on input encoding,
gavinsmith0123 <=