bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: diff -y + UTF-8 = irregular columns


From: Bruno Haible
Subject: Re: diff -y + UTF-8 = irregular columns
Date: Wed, 23 Jan 2008 17:06:56 +0100
User-agent: KMail/1.9.1

Paul Eggert wrote:
> It is indeed a bug, one that it would be nice to fix.

Thanks for acklowledging this bug, already reported in
  http://lists.gnu.org/archive/html/bug-gnu-utils/2002-01/msg00448.html

Here is a proposed fix that
  - works in all locales, not only UTF-8 locales,
  - also considers the case of input that is not valid in the current
    locale, e.g. ISO-8859-1 input in an UTF-8 locale),
  - does not introduce code duplication.


2008-01-23  Bruno Haible  <address@hidden>

        * bootstrap.conf (gnulib_modules): Add mbchar and mbiter.
        * src/side-half.h: New file, extracted from src/side.c, generalized
        to use multibyte aware macros.
        * src/side.c (print_half_line): Include side-half.c twice. Dispatch
        between unibyte case and multibyte case.
        * src/Makefile.am (diff_SOURCES): Add side-half.h.

*** bootstrap.conf      17 Aug 2007 23:35:47 -0000      1.4
--- bootstrap.conf      23 Jan 2008 15:58:04 -0000
***************
*** 1,6 ****
  # Bootstrap configuration.
  
! # Copyright (C) 2006, 2007 Free Software Foundation, Inc.
  
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
--- 1,6 ----
  # Bootstrap configuration.
  
! # Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
  
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
***************
*** 20,27 ****
  gnulib_modules='
        c-stack config-h diffseq dirname dup2 error exclude exit exitfail
        extensions fcntl fdl file-type fnmatch-gnu getopt gettext
!       gettime hard-locale inttostr inttypes mkstemp regex sh-quote
!       stat-macros stat-time strcase strftime strtoumax unistd
        unlocked-io verify version-etc version-etc-fsf xalloc
        xstrtoumax
  '
--- 20,27 ----
  gnulib_modules='
        c-stack config-h diffseq dirname dup2 error exclude exit exitfail
        extensions fcntl fdl file-type fnmatch-gnu getopt gettext
!       gettime hard-locale inttostr inttypes mbchar mbiter mkstemp regex
!       sh-quote stat-macros stat-time strcase strftime strtoumax unistd
        unlocked-io verify version-etc version-etc-fsf xalloc
        xstrtoumax
  '
*** /dev/null   2006-05-02 08:46:16.000000000 +0200
--- src/side-half.h     2008-01-23 16:46:50.000000000 +0100
***************
*** 0 ****
--- 1,100 ----
+ /* sdiff-format output routines for GNU DIFF.
+ 
+    Copyright (C) 1991, 1992, 1993, 1998, 2001, 2002, 2004, 2008 Free
+    Software Foundation, Inc.
+ 
+    This file is part of GNU DIFF.
+ 
+    GNU DIFF is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY.  No author or distributor
+    accepts responsibility to anyone for the consequences of using it
+    or for whether it serves any particular purpose or works at all,
+    unless he says so in writing.  Refer to the GNU General Public
+    License for full details.
+ 
+    Everyone is granted permission to copy, modify and redistribute
+    GNU DIFF, but only under the conditions described in the
+    GNU General Public License.   A copy of this license is
+    supposed to have been given to you along with GNU DIFF so you
+    can know your rights and responsibilities.  It should be in a
+    file named COPYING.  Among other things, the copyright notice
+    and this notice must be preserved on all copies.  */
+ 
+ static size_t
+ PRINT_HALF_LINE (char const *const *line, size_t indent, size_t out_bound)
+ {
+   FILE *out = outfile;
+   register size_t in_position = 0;
+   register size_t out_position = 0;
+   char const *text_start = line[0];
+   char const *text_limit = line[1];
+   MBI_ITERATOR_T text_pointer;
+ 
+   for (MBI_INIT (text_pointer, text_start, text_limit - text_start);
+        MBI_AVAIL (text_pointer, text_limit);
+        MBI_ADVANCE (text_pointer))
+     {
+       if (MB_ISEQ (MBI_CUR (text_pointer), '\t'))
+       {
+         size_t spaces = tabsize - in_position % tabsize;
+         if (in_position == out_position)
+           {
+             size_t tabstop = out_position + spaces;
+             if (expand_tabs)
+               {
+                 if (out_bound < tabstop)
+                   tabstop = out_bound;
+                 for (;  out_position < tabstop;  out_position++)
+                   putc (' ', out);
+               }
+             else
+               if (tabstop < out_bound)
+                 {
+                   out_position = tabstop;
+                   fwrite (MB_PTR (MBI_CUR (text_pointer)), 1,
+                           MB_LEN (MBI_CUR (text_pointer)), out);
+                 }
+           }
+         in_position += spaces;
+       }
+       else if (MB_ISEQ (MBI_CUR (text_pointer), '\r'))
+       {
+         fwrite (MB_PTR (MBI_CUR (text_pointer)), 1,
+                 MB_LEN (MBI_CUR (text_pointer)), out);
+         tab_from_to (0, indent);
+         in_position = out_position = 0;
+       }
+       else if (MB_ISEQ (MBI_CUR (text_pointer), '\b'))
+       {
+         if (in_position != 0 && --in_position < out_bound)
+           {
+             if (out_position <= in_position)
+               /* Add spaces to make up for suppressed tab past out_bound.  */
+               for (;  out_position < in_position;  out_position++)
+                 putc (' ', out);
+             else
+               {
+                 out_position = in_position;
+                 fwrite (MB_PTR (MBI_CUR (text_pointer)), 1,
+                         MB_LEN (MBI_CUR (text_pointer)), out);
+               }
+           }
+       }
+       else if (MB_ISEQ (MBI_CUR (text_pointer), '\n'))
+       break;
+       else
+       {
+         int width = MB_WIDTH (MBI_CUR (text_pointer));
+ 
+         in_position += width;
+         if (in_position <= out_bound)
+           {
+             out_position = in_position;
+             fwrite (MB_PTR (MBI_CUR (text_pointer)), 1,
+                     MB_LEN (MBI_CUR (text_pointer)), out);
+           }
+       }
+     }
+ 
+   return out_position;
+ }
*** src/side.c  19 Jul 2007 17:19:39 -0000      1.16
--- src/side.c  23 Jan 2008 15:58:05 -0000
***************
*** 1,6 ****
  /* sdiff-format output routines for GNU DIFF.
  
!    Copyright (C) 1991, 1992, 1993, 1998, 2001, 2002, 2004 Free
     Software Foundation, Inc.
  
     This file is part of GNU DIFF.
--- 1,6 ----
  /* sdiff-format output routines for GNU DIFF.
  
!    Copyright (C) 1991, 1992, 1993, 1998, 2001, 2002, 2004, 2008 Free
     Software Foundation, Inc.
  
     This file is part of GNU DIFF.
***************
*** 65,157 ****
     width observing tabs, and trim a trailing newline.  Return the
     last column written (not the number of chars).  */
  
  static size_t
  print_half_line (char const *const *line, size_t indent, size_t out_bound)
  {
!   FILE *out = outfile;
!   register size_t in_position = 0;
!   register size_t out_position = 0;
!   register char const *text_pointer = line[0];
!   register char const *text_limit = line[1];
! 
!   while (text_pointer < text_limit)
!     {
!       register unsigned char c = *text_pointer++;
! 
!       switch (c)
!       {
!       case '\t':
!         {
!           size_t spaces = tabsize - in_position % tabsize;
!           if (in_position == out_position)
!             {
!               size_t tabstop = out_position + spaces;
!               if (expand_tabs)
!                 {
!                   if (out_bound < tabstop)
!                     tabstop = out_bound;
!                   for (;  out_position < tabstop;  out_position++)
!                     putc (' ', out);
!                 }
!               else
!                 if (tabstop < out_bound)
!                   {
!                     out_position = tabstop;
!                     putc (c, out);
!                   }
!             }
!           in_position += spaces;
!         }
!         break;
! 
!       case '\r':
!         {
!           putc (c, out);
!           tab_from_to (0, indent);
!           in_position = out_position = 0;
!         }
!         break;
! 
!       case '\b':
!         if (in_position != 0 && --in_position < out_bound)
!           {
!             if (out_position <= in_position)
!               /* Add spaces to make up for suppressed tab past out_bound.  */
!               for (;  out_position < in_position;  out_position++)
!                 putc (' ', out);
!             else
!               {
!                 out_position = in_position;
!                 putc (c, out);
!               }
!           }
!         break;
! 
!       case '\f':
!       case '\v':
!       control_char:
!         if (in_position < out_bound)
!           putc (c, out);
!         break;
! 
!       default:
!         if (! isprint (c))
!           goto control_char;
!         /* falls through */
!       case ' ':
!         if (in_position++ < out_bound)
!           {
!             out_position = in_position;
!             putc (c, out);
!           }
!         break;
! 
!       case '\n':
!         return out_position;
!       }
!     }
! 
!   return out_position;
  }
  
  /* Print side by side lines with a separator in the middle.
--- 65,126 ----
     width observing tabs, and trim a trailing newline.  Return the
     last column written (not the number of chars).  */
  
+ #define PRINT_HALF_LINE print_half_line_unibyte
+ #define MBI_ITERATOR_T const char *
+ #define MBI_INIT(iter,startptr,length) (void)(iter = (startptr))
+ #define MBI_AVAIL(iter,endptr) (iter) < (endptr)
+ #define MBI_ADVANCE(iter) (void)(iter)++
+ #define MBI_CUR(iter) (iter)
+ #define MB_ISEQ(mbc,sc) (*(mbc) == (sc))
+ #define MB_PTR(mbc) (mbc)
+ #define MB_LEN(mbc) 1
+ #define MB_WIDTH(mbc) (iscntrl ((unsigned char) *(mbc)) ? 0 : 1)
+ #include "side-half.h"
+ #undef MB_WIDTH
+ #undef MB_LEN
+ #undef MB_PTR
+ #undef MB_ISEQ
+ #undef MBI_CUR
+ #undef MBI_ADVANCE
+ #undef MBI_AVAIL
+ #undef MBI_INIT
+ #undef MBI_ITERATOR_T
+ #undef PRINT_HALF_LINE
+ 
+ #if HAVE_MBRTOWC
+ # include "mbchar.h"
+ # include "mbiter.h"
+ # define PRINT_HALF_LINE print_half_line_multibyte
+ # define MBI_ITERATOR_T mbi_iterator_t
+ # define MBI_INIT(iter,startptr,length) mbi_init (iter, startptr, length)
+ # define MBI_AVAIL(iter,endptr) mbi_avail (iter)
+ # define MBI_ADVANCE(iter) mbi_advance (iter)
+ # define MBI_CUR(iter) mbi_cur (iter)
+ # define MB_ISEQ(mbc,sc) mb_iseq (mbc, sc)
+ # define MB_PTR(mbc) mb_ptr (mbc)
+ # define MB_LEN(mbc) mb_len (mbc)
+ # define MB_WIDTH(mbc) mb_width (mbc)
+ # include "side-half.h"
+ # undef MB_WIDTH
+ # undef MB_LEN
+ # undef MB_PTR
+ # undef MB_ISEQ
+ # undef MBI_CUR
+ # undef MBI_ADVANCE
+ # undef MBI_AVAIL
+ # undef MBI_INIT
+ # undef MBI_ITERATOR_T
+ # undef PRINT_HALF_LINE
+ #endif
+ 
  static size_t
  print_half_line (char const *const *line, size_t indent, size_t out_bound)
  {
! #if HAVE_MBRTOWC
!   if (MB_CUR_MAX > 1)
!     return print_half_line_multibyte (line, indent, out_bound);
! #endif
!   return print_half_line_unibyte (line, indent, out_bound);
  }
  
  /* Print side by side lines with a separator in the middle.
*** src/Makefile.am     19 Jul 2007 17:45:28 -0000      1.13
--- src/Makefile.am     23 Jan 2008 15:58:05 -0000
***************
*** 31,37 ****
  sdiff_SOURCES = sdiff.c
  diff_SOURCES = \
    analyze.c context.c diff.c diff.h dir.c ed.c ifdef.c io.c \
!   normal.c side.c util.c
  
  MOSTLYCLEANFILES = paths.h paths.ht
  
--- 31,37 ----
  sdiff_SOURCES = sdiff.c
  diff_SOURCES = \
    analyze.c context.c diff.c diff.h dir.c ed.c ifdef.c io.c \
!   normal.c side.c side-half.h util.c
  
  MOSTLYCLEANFILES = paths.h paths.ht
  




reply via email to

[Prev in Thread] Current Thread [Next in Thread]