[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[7201] comments in \DeclareUnicodeCharacterUTFviii
From: |
gavinsmith0123 |
Subject: |
[7201] comments in \DeclareUnicodeCharacterUTFviii |
Date: |
Fri, 3 Jun 2016 19:02:37 +0000 (UTC) |
Revision: 7201
http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=7201
Author: gavin
Date: 2016-06-03 19:02:37 +0000 (Fri, 03 Jun 2016)
Log Message:
-----------
comments in \DeclareUnicodeCharacterUTFviii
Modified Paths:
--------------
trunk/ChangeLog
trunk/doc/texinfo.tex
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2016-06-01 20:39:07 UTC (rev 7200)
+++ trunk/ChangeLog 2016-06-03 19:02:37 UTC (rev 7201)
@@ -1,3 +1,11 @@
+2016-06-03 Gavin Smith <address@hidden>
+
+ * doc/texinfo.tex (\UTFviiLoop): Add a missing % character in
+ definition of non-initial bytes in UTF-8 sequences.
+ (\DeclareUnicodeCharacterUTFviii): Move definition of
+ \UTFviiiTwoOctets (and others) out of this macro and rename
+ them. Add more comments.
+
2016-06-01 Gavin Smith <address@hidden>
* util/texi2dvi (run_tex): Only check for certain unusual
@@ -2,3 +10,3 @@
characters in the filename, using 'sed'. (Suggestion from
- Vincent Bela\xEFche.) Loop through any that occured and change the
+ Vincent Bela\xEFche.) Loop through any that occurred and change the
catcode to 12 within a TeX group.
Modified: trunk/doc/texinfo.tex
===================================================================
--- trunk/doc/texinfo.tex 2016-06-01 20:39:07 UTC (rev 7200)
+++ trunk/doc/texinfo.tex 2016-06-03 19:02:37 UTC (rev 7201)
@@ -3,7 +3,7 @@
% Load plain if necessary, i.e., if running under initex.
\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi
%
-\def\texinfoversion{2016-05-28.16}
+\def\texinfoversion{2016-06-03.20}
%
% Copyright 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995,
% 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
@@ -10250,7 +10250,7 @@
\countUTFx = "80
\countUTFy = "C2
\def\UTFviiiTmp{%
- \gdef~{
+ \gdef~{%
\ifpassthroughchars $\fi}}%
\UTFviiiLoop
@@ -10301,6 +10301,15 @@
\fi
}
+% These macros are used here to construct the name of a control
+% sequence to be defined.
+\def\UTFviiiTwoOctetsName#1#2{%
+ \csname u8:#1\string #2\endcsname}%
+\def\UTFviiiThreeOctetsName#1#2#3{%
+ \csname u8:#1\string #2\string #3\endcsname}%
+\def\UTFviiiFourOctetsName#1#2#3#4{%
+ \csname u8:#1\string #2\string #3\string #4\endcsname}%
+
% For UTF-8 byte sequence (TeX, e-TeX and pdfTeX)
% Definition macro to replace the Unicode character
% Definition macro that is used by @U command
@@ -10317,17 +10326,18 @@
\countUTFz = "#1\relax
\begingroup
\parseXMLCharref
+
+ % Give \u8:... its definition. The sequence of seven \expandafter's
+ % expands after the \gdef three times, e.g.
%
- % Access definitions of characters given UTF-8 sequences
- \def\UTFviiiTwoOctets##1##2{%
- \csname u8:##1\string ##2\endcsname}%
- \def\UTFviiiThreeOctets##1##2##3{%
- \csname u8:##1\string ##2\string ##3\endcsname}%
- \def\UTFviiiFourOctets##1##2##3##4{%
- \csname u8:##1\string ##2\string ##3\string ##4\endcsname}%
- \expandafter\expandafter\expandafter\expandafter
- \expandafter\expandafter\expandafter
- \gdef\UTFviiiTmp{#2}%
+ % 1. \UTFviiTwoOctetsName B1 B2
+ % 2. \csname u8:B1 \string B2 \endcsname
+ % 3. \u8: B1 B2 (a single control sequence token)
+ %
+ \expandafter\expandafter
+ \expandafter\expandafter
+ \expandafter\expandafter
+ \expandafter\gdef \UTFviiiTmp{#2}%
%
\expandafter\ifx\csname uni:#1\endcsname \relax \else
\message{Internal error, already defined: #1}%
@@ -10337,37 +10347,53 @@
\expandafter\globallet\csname uni:#1\endcsname \UTFviiiTmp
\endgroup}
%
- % Given the value in \countUTFz as a Unicode code point, set \UTFviiiTmp.
+ % Given the value in \countUTFz as a Unicode code point, set \UTFviiiTmp
+ % to the corresponding UTF-8 sequence.
\gdef\parseXMLCharref{%
\ifnum\countUTFz < "A0\relax
\errhelp = \EMsimple
\errmessage{Cannot define Unicode char value < 00A0}%
\else\ifnum\countUTFz < "800\relax
\parseUTFviiiA,%
- \parseUTFviiiB C\UTFviiiTwoOctets.,%
+ \parseUTFviiiB C\UTFviiiTwoOctetsName.,%
\else\ifnum\countUTFz < "10000\relax
\parseUTFviiiA;%
\parseUTFviiiA,%
- \parseUTFviiiB E\UTFviiiThreeOctets.{,;}%
+ \parseUTFviiiB E\UTFviiiThreeOctetsName.{,;}%
\else
\parseUTFviiiA;%
\parseUTFviiiA,%
\parseUTFviiiA!%
- \parseUTFviiiB F\UTFviiiFourOctets.{!,;}%
+ \parseUTFviiiB F\UTFviiiFourOctetsName.{!,;}%
\fi\fi\fi
}
+ % Extract a byte from the end of the UTF-8 representation of \countUTFx.
+ % It must be a non-initial byte in the sequence.
+ % Change \uccode of #1 for it to be used in \parseUTFviiiB as one
+ % of the bytes.
\gdef\parseUTFviiiA#1{%
\countUTFx = \countUTFz
\divide\countUTFz by 64
- \countUTFy = \countUTFz
+ \countUTFy = \countUTFz % Save to be the future value of \countUTFz.
\multiply\countUTFz by 64
+
+ % \countUTFz is now \countUTFx with the last 5 bits cleared. Subtract
+ % in order to get the last five bits.
\advance\countUTFx by -\countUTFz
+
+ % Convert this to the byte in the UTF-8 sequence.
\advance\countUTFx by 128
\uccode `#1\countUTFx
\countUTFz = \countUTFy}
- % Used to set \UTFviiiTmp to a UTF-8 byte sequence
+ % Used to put a UTF-8 byte sequence into \UTFviiiTmp
+ % #1 is the increment for \countUTFz to yield a the first byte of the UTF-8
+ % sequence.
+ % #2 is one of the \UTFviii*OctetsName macros.
+ % #3 is always a full stop (.)
+ % #4 is a template for the other bytes in the sequence. The values for these
+ % bytes is substituted in here with \uppercase using the \uccode's.
\gdef\parseUTFviiiB#1#2#3#4{%
\advance\countUTFz by "#10\relax
\uccode `#3\countUTFz
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [7201] comments in \DeclareUnicodeCharacterUTFviii,
gavinsmith0123 <=