help-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: character encoding


From: Michael Slass
Subject: Re: character encoding
Date: Sat, 26 Oct 2002 19:13:04 GMT
User-agent: Gnus/5.09 (Gnus v5.9.0) Emacs/21.2

Ivan Kanis <ivank@juliva.com> writes:

>
>I know it's in C. If someone cares to turn this into lisp that'll be neat :)
>
>Ivan
>
>
>#include "stdio.h"
>
>char *table [] =  {
>    "euro",  /* 0x80 0x20AC  #EURO SIGN */
>    "",      /* 0x81          #UNDEFINED */
>    "\"",    /* 0x82  0x201A  #SINGLE LOW-9 QUOTATION MARK */
>    "f",     /* 0x83  0x0192  #LATIN SMALL LETTER F WITH HOOK */
>    "\"",    /* 0x84  0x201E  #DOUBLE LOW-9 QUOTATION MARK */
>    "...",   /* 0x85  0x2026  #HORIZONTAL ELLIPSIS */
>    "*",     /* 0x86  0x2020  #DAGGER */
>    "*",     /* 0x87  0x2021  #DOUBLE DAGGER */
>    "^",     /* 0x88  0x02C6  #MODIFIER LETTER CIRCUMFLEX ACCENT */
>    " 0/00", /* 0x89  0x2030  #PER MILLE SIGN */
>    "S",     /* 0x8A  0x0160  #LATIN CAPITAL LETTER S WITH CARON */
>    "<",     /* 0x8B  0x2039  #SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
>    "OE",    /* 0x8C  0x0152  #LATIN CAPITAL LIGATURE OE */
>    "",      /* 0x8D          #UNDEFINED */
>    "Z",     /* 0x8E  0x017D  #LATIN CAPITAL LETTER Z WITH CARON */
>    "",      /* 0x8F          #UNDEFINED */
>    "",      /* 0x90          #UNDEFINED */
>    "'",     /* 0x91  0x2018  #LEFT SINGLE QUOTATION MARK */
>    "'",     /* 0x92  0x2019  #RIGHT SINGLE QUOTATION MARK */
>    "\"",    /* 0x93  0x201C  #LEFT DOUBLE QUOTATION MARK */
>    "\"",    /* 0x94  0x201D  #RIGHT DOUBLE QUOTATION MARK */
>    "*",     /* 0x95  0x2022  #BULLET */
>    "-",     /* 0x96  0x2013  #EN DASH */
>    "-",     /* 0x97  0x2014  #EM DASH */
>    "~",     /* 0x98  0x02DC  #SMALL TILDE */
>    "(TM)",  /* 0x99  0x2122  #TRADE MARK SIGN */
>    "s",     /* 0x9A  0x0161  #LATIN SMALL LETTER S WITH CARON */
>    "\"",    /* 0x9B  0x203A  #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
>    "oe",    /* 0x9C  0x0153  #LATIN SMALL LIGATURE OE */
>    "",      /* 0x9D          #UNDEFINED */
>    "z",     /* 0x9E  0x017E  #LATIN SMALL LETTER Z WITH CARON */
>    "y"     /* 0x9F  0x0178  #LATIN CAPITAL LETTER Y WITH DIAERESIS */
>};
>
>
>int main (int argc, char **argv) {
>    FILE *fd;
>    unsigned char in;
>    
>    if (argc == 2) {
>        if ((fd = fopen(argv[1], "r"))) {
>            while (fread(&in, 1, sizeof(char), fd)) {
>                if (in >= 0x80 && in < 0xa0) {
>                    printf ("%s", table[in-0x80]);
>                } else {
>                    printf("%c", in);
>                }
>            }
>            fclose (fd);
>        }
>    }
>    return 0;
>}

Ivan:

I can't resist that challenge.  Here's a first cut, almost completely
untested, because I don't have any Outlook-born mail to test it on.

(defvar de-microsquish-translation-alist
   '(( ?\x80 . "euro" )
     ( ?\x81 . "")
     ( ?\x82 . "\"" )
     ( ?\x83 . "f" )
     ( ?\x84 . "\"" )
     ( ?\x85 . "..." )
     ( ?\x86 . "*" )
     ( ?\x87 . "*" )
     ( ?\x88 . "^" )
     ( ?\x89 . " 0/00" )
     ( ?\x8A . "S" )
     ( ?\x8B . "<" )
     ( ?\x8C . "OE" )
     ( ?\x8E . "Z" )
     ( ?\x8F . "" )
     ( ?\x90 . "" )
     ( ?\x91 . "'" )
     ( ?\x92 . "'" )
     ( ?\x93 . "" )
     ( ?\x94 . "" )
     ( ?\x95 . "*" )
     ( ?\x96 . "-" )
     ( ?\x97 . "-" )
     ( ?\x98 . "~" )
     ( ?\x99 . "(TM)" )
     ( ?\x9A . "s" )
     ( ?\x9B . "\"" )
     ( ?\x9C . "oe" )
     ( ?\x9D . "" )
     ( ?\x9E . "z" )
     ( ?\x9F . "y" ))
"Table of hex values and replacement strings for unprintable Micro$oft chars.
See also `de-microsquish-region'.")


(defun de-microsquish-region (beg end)
  "Translate Micro$oft characters according to 
`de-microsquish-translation-alist'"
  (interactive "r")
  (save-restriction
    (narrow-to-region beg end)
    (goto-char (point-min))
    (while (not (eobp))
      (let* ((char (char-after))
             (replacement-cell (assoc char de-microsquish-translation-alist))
             (replacement (and replacement-cell (cdr replacement-cell))))
        (if (not replacement)
            (forward-char 1)
          (delete-char 1)
          (insert replacement))))))


-- 
Mike Slass


reply via email to

[Prev in Thread] Current Thread [Next in Thread]