[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: diff -y + UTF-8 = irregular columns
From: |
Bruno Haible |
Subject: |
Re: diff -y + UTF-8 = irregular columns |
Date: |
Wed, 23 Jan 2008 17:06:56 +0100 |
User-agent: |
KMail/1.9.1 |
Paul Eggert wrote:
> It is indeed a bug, one that it would be nice to fix.
Thanks for acklowledging this bug, already reported in
http://lists.gnu.org/archive/html/bug-gnu-utils/2002-01/msg00448.html
Here is a proposed fix that
- works in all locales, not only UTF-8 locales,
- also considers the case of input that is not valid in the current
locale, e.g. ISO-8859-1 input in an UTF-8 locale),
- does not introduce code duplication.
2008-01-23 Bruno Haible <address@hidden>
* bootstrap.conf (gnulib_modules): Add mbchar and mbiter.
* src/side-half.h: New file, extracted from src/side.c, generalized
to use multibyte aware macros.
* src/side.c (print_half_line): Include side-half.c twice. Dispatch
between unibyte case and multibyte case.
* src/Makefile.am (diff_SOURCES): Add side-half.h.
*** bootstrap.conf 17 Aug 2007 23:35:47 -0000 1.4
--- bootstrap.conf 23 Jan 2008 15:58:04 -0000
***************
*** 1,6 ****
# Bootstrap configuration.
! # Copyright (C) 2006, 2007 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
--- 1,6 ----
# Bootstrap configuration.
! # Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
***************
*** 20,27 ****
gnulib_modules='
c-stack config-h diffseq dirname dup2 error exclude exit exitfail
extensions fcntl fdl file-type fnmatch-gnu getopt gettext
! gettime hard-locale inttostr inttypes mkstemp regex sh-quote
! stat-macros stat-time strcase strftime strtoumax unistd
unlocked-io verify version-etc version-etc-fsf xalloc
xstrtoumax
'
--- 20,27 ----
gnulib_modules='
c-stack config-h diffseq dirname dup2 error exclude exit exitfail
extensions fcntl fdl file-type fnmatch-gnu getopt gettext
! gettime hard-locale inttostr inttypes mbchar mbiter mkstemp regex
! sh-quote stat-macros stat-time strcase strftime strtoumax unistd
unlocked-io verify version-etc version-etc-fsf xalloc
xstrtoumax
'
*** /dev/null 2006-05-02 08:46:16.000000000 +0200
--- src/side-half.h 2008-01-23 16:46:50.000000000 +0100
***************
*** 0 ****
--- 1,100 ----
+ /* sdiff-format output routines for GNU DIFF.
+
+ Copyright (C) 1991, 1992, 1993, 1998, 2001, 2002, 2004, 2008 Free
+ Software Foundation, Inc.
+
+ This file is part of GNU DIFF.
+
+ GNU DIFF is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY. No author or distributor
+ accepts responsibility to anyone for the consequences of using it
+ or for whether it serves any particular purpose or works at all,
+ unless he says so in writing. Refer to the GNU General Public
+ License for full details.
+
+ Everyone is granted permission to copy, modify and redistribute
+ GNU DIFF, but only under the conditions described in the
+ GNU General Public License. A copy of this license is
+ supposed to have been given to you along with GNU DIFF so you
+ can know your rights and responsibilities. It should be in a
+ file named COPYING. Among other things, the copyright notice
+ and this notice must be preserved on all copies. */
+
+ static size_t
+ PRINT_HALF_LINE (char const *const *line, size_t indent, size_t out_bound)
+ {
+ FILE *out = outfile;
+ register size_t in_position = 0;
+ register size_t out_position = 0;
+ char const *text_start = line[0];
+ char const *text_limit = line[1];
+ MBI_ITERATOR_T text_pointer;
+
+ for (MBI_INIT (text_pointer, text_start, text_limit - text_start);
+ MBI_AVAIL (text_pointer, text_limit);
+ MBI_ADVANCE (text_pointer))
+ {
+ if (MB_ISEQ (MBI_CUR (text_pointer), '\t'))
+ {
+ size_t spaces = tabsize - in_position % tabsize;
+ if (in_position == out_position)
+ {
+ size_t tabstop = out_position + spaces;
+ if (expand_tabs)
+ {
+ if (out_bound < tabstop)
+ tabstop = out_bound;
+ for (; out_position < tabstop; out_position++)
+ putc (' ', out);
+ }
+ else
+ if (tabstop < out_bound)
+ {
+ out_position = tabstop;
+ fwrite (MB_PTR (MBI_CUR (text_pointer)), 1,
+ MB_LEN (MBI_CUR (text_pointer)), out);
+ }
+ }
+ in_position += spaces;
+ }
+ else if (MB_ISEQ (MBI_CUR (text_pointer), '\r'))
+ {
+ fwrite (MB_PTR (MBI_CUR (text_pointer)), 1,
+ MB_LEN (MBI_CUR (text_pointer)), out);
+ tab_from_to (0, indent);
+ in_position = out_position = 0;
+ }
+ else if (MB_ISEQ (MBI_CUR (text_pointer), '\b'))
+ {
+ if (in_position != 0 && --in_position < out_bound)
+ {
+ if (out_position <= in_position)
+ /* Add spaces to make up for suppressed tab past out_bound. */
+ for (; out_position < in_position; out_position++)
+ putc (' ', out);
+ else
+ {
+ out_position = in_position;
+ fwrite (MB_PTR (MBI_CUR (text_pointer)), 1,
+ MB_LEN (MBI_CUR (text_pointer)), out);
+ }
+ }
+ }
+ else if (MB_ISEQ (MBI_CUR (text_pointer), '\n'))
+ break;
+ else
+ {
+ int width = MB_WIDTH (MBI_CUR (text_pointer));
+
+ in_position += width;
+ if (in_position <= out_bound)
+ {
+ out_position = in_position;
+ fwrite (MB_PTR (MBI_CUR (text_pointer)), 1,
+ MB_LEN (MBI_CUR (text_pointer)), out);
+ }
+ }
+ }
+
+ return out_position;
+ }
*** src/side.c 19 Jul 2007 17:19:39 -0000 1.16
--- src/side.c 23 Jan 2008 15:58:05 -0000
***************
*** 1,6 ****
/* sdiff-format output routines for GNU DIFF.
! Copyright (C) 1991, 1992, 1993, 1998, 2001, 2002, 2004 Free
Software Foundation, Inc.
This file is part of GNU DIFF.
--- 1,6 ----
/* sdiff-format output routines for GNU DIFF.
! Copyright (C) 1991, 1992, 1993, 1998, 2001, 2002, 2004, 2008 Free
Software Foundation, Inc.
This file is part of GNU DIFF.
***************
*** 65,157 ****
width observing tabs, and trim a trailing newline. Return the
last column written (not the number of chars). */
static size_t
print_half_line (char const *const *line, size_t indent, size_t out_bound)
{
! FILE *out = outfile;
! register size_t in_position = 0;
! register size_t out_position = 0;
! register char const *text_pointer = line[0];
! register char const *text_limit = line[1];
!
! while (text_pointer < text_limit)
! {
! register unsigned char c = *text_pointer++;
!
! switch (c)
! {
! case '\t':
! {
! size_t spaces = tabsize - in_position % tabsize;
! if (in_position == out_position)
! {
! size_t tabstop = out_position + spaces;
! if (expand_tabs)
! {
! if (out_bound < tabstop)
! tabstop = out_bound;
! for (; out_position < tabstop; out_position++)
! putc (' ', out);
! }
! else
! if (tabstop < out_bound)
! {
! out_position = tabstop;
! putc (c, out);
! }
! }
! in_position += spaces;
! }
! break;
!
! case '\r':
! {
! putc (c, out);
! tab_from_to (0, indent);
! in_position = out_position = 0;
! }
! break;
!
! case '\b':
! if (in_position != 0 && --in_position < out_bound)
! {
! if (out_position <= in_position)
! /* Add spaces to make up for suppressed tab past out_bound. */
! for (; out_position < in_position; out_position++)
! putc (' ', out);
! else
! {
! out_position = in_position;
! putc (c, out);
! }
! }
! break;
!
! case '\f':
! case '\v':
! control_char:
! if (in_position < out_bound)
! putc (c, out);
! break;
!
! default:
! if (! isprint (c))
! goto control_char;
! /* falls through */
! case ' ':
! if (in_position++ < out_bound)
! {
! out_position = in_position;
! putc (c, out);
! }
! break;
!
! case '\n':
! return out_position;
! }
! }
!
! return out_position;
}
/* Print side by side lines with a separator in the middle.
--- 65,126 ----
width observing tabs, and trim a trailing newline. Return the
last column written (not the number of chars). */
+ #define PRINT_HALF_LINE print_half_line_unibyte
+ #define MBI_ITERATOR_T const char *
+ #define MBI_INIT(iter,startptr,length) (void)(iter = (startptr))
+ #define MBI_AVAIL(iter,endptr) (iter) < (endptr)
+ #define MBI_ADVANCE(iter) (void)(iter)++
+ #define MBI_CUR(iter) (iter)
+ #define MB_ISEQ(mbc,sc) (*(mbc) == (sc))
+ #define MB_PTR(mbc) (mbc)
+ #define MB_LEN(mbc) 1
+ #define MB_WIDTH(mbc) (iscntrl ((unsigned char) *(mbc)) ? 0 : 1)
+ #include "side-half.h"
+ #undef MB_WIDTH
+ #undef MB_LEN
+ #undef MB_PTR
+ #undef MB_ISEQ
+ #undef MBI_CUR
+ #undef MBI_ADVANCE
+ #undef MBI_AVAIL
+ #undef MBI_INIT
+ #undef MBI_ITERATOR_T
+ #undef PRINT_HALF_LINE
+
+ #if HAVE_MBRTOWC
+ # include "mbchar.h"
+ # include "mbiter.h"
+ # define PRINT_HALF_LINE print_half_line_multibyte
+ # define MBI_ITERATOR_T mbi_iterator_t
+ # define MBI_INIT(iter,startptr,length) mbi_init (iter, startptr, length)
+ # define MBI_AVAIL(iter,endptr) mbi_avail (iter)
+ # define MBI_ADVANCE(iter) mbi_advance (iter)
+ # define MBI_CUR(iter) mbi_cur (iter)
+ # define MB_ISEQ(mbc,sc) mb_iseq (mbc, sc)
+ # define MB_PTR(mbc) mb_ptr (mbc)
+ # define MB_LEN(mbc) mb_len (mbc)
+ # define MB_WIDTH(mbc) mb_width (mbc)
+ # include "side-half.h"
+ # undef MB_WIDTH
+ # undef MB_LEN
+ # undef MB_PTR
+ # undef MB_ISEQ
+ # undef MBI_CUR
+ # undef MBI_ADVANCE
+ # undef MBI_AVAIL
+ # undef MBI_INIT
+ # undef MBI_ITERATOR_T
+ # undef PRINT_HALF_LINE
+ #endif
+
static size_t
print_half_line (char const *const *line, size_t indent, size_t out_bound)
{
! #if HAVE_MBRTOWC
! if (MB_CUR_MAX > 1)
! return print_half_line_multibyte (line, indent, out_bound);
! #endif
! return print_half_line_unibyte (line, indent, out_bound);
}
/* Print side by side lines with a separator in the middle.
*** src/Makefile.am 19 Jul 2007 17:45:28 -0000 1.13
--- src/Makefile.am 23 Jan 2008 15:58:05 -0000
***************
*** 31,37 ****
sdiff_SOURCES = sdiff.c
diff_SOURCES = \
analyze.c context.c diff.c diff.h dir.c ed.c ifdef.c io.c \
! normal.c side.c util.c
MOSTLYCLEANFILES = paths.h paths.ht
--- 31,37 ----
sdiff_SOURCES = sdiff.c
diff_SOURCES = \
analyze.c context.c diff.c diff.h dir.c ed.c ifdef.c io.c \
! normal.c side.c side-half.h util.c
MOSTLYCLEANFILES = paths.h paths.ht