lynx-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

LYNX-DEV charsets


From: Drazen Kacar
Subject: LYNX-DEV charsets
Date: Sun, 2 Feb 1997 22:21:04 +0100 (MET)

I thought that Klaus' Unicode support will make it into 2.7, but if it
won't, I think that present charset handling needs a little patching.
Lynx 2-6FM (last mod. 01/29/97) is the first browser that announces
transparent negotiation capability via

   Negotiate: trans

header, but it doesn't always send meaningful q values. When the user
selects charset(s) to send in Accept-Charset header, Lynx will append
", iso-8859-1;q=0.001, us-ascii;q=0.001", which is the lowest possible
quality value for these code pages.

If the server takes that into calculation, ISO 8859-1 or US-ASCII pages
don't have a chance. It's a bug and needs to be corrected. I've written
quick and dirty hack, untested, of course, use at your own risk. Variable
names are ugly, feel free to change them. :)

Since I don't know how good can ASCII and Latin 1 be represented in all
code pages that Lynx supports, I couldn't fill the table in the patch
completely. I left question marks at many places and I hope that people
who use those code pages will post sensible values.

Below is the context diff. There's a comment with explanation of q values
and the table with question marks. Somebody fill it, please. :)

There is one more issue I'd like to address. Lynx 2-4FM or Lynx 2.5 (I
don't remember any more) was automaticaly changing preffered document
charset when the user changed display character set. Lynx 2.6 is not
doing this. I don't know what is the reason, but I think that old
behaviour is much more user friendly. Asking users to remember charset
names is too much. I'd like to see that piece of code back.

The patch:


*** src/LYCharSets.h.orig       Sun Feb  2 20:31:15 1997
--- src/LYCharSets.h    Sun Feb  2 21:28:31 1997
***************
*** 6,12 ****
--- 6,18 ----
  extern int current_char_set;
  extern char *LYchar_set_names[];
  
+ struct charsetqvals {
+     char *ascii;
+     char *latin1;
+ };
  
+ extern struct charsetqvals LYdefault_charset_q_values[];
+ 
  extern char ** LYCharSets[];
  extern char ** p_entity_values;
  #ifdef USE_SLANG
*** src/LYCharSets.c.orig       Sun Feb  2 20:21:19 1997
--- src/LYCharSets.c    Sun Feb  2 21:24:09 1997
***************
*** 2070,2075 ****
--- 2070,2106 ----
        (char *) 0
  };
  
+ /*
+  * Quality values for US-ASCII and ISO 8859-1 when the terminal
+  * displays something else. Draft-holtman-http-negotiation-04.txt recomends:
+  *
+  *      1.000       perfect representation
+  *      0.999-0.900 near threshold of noticeable loss of quality
+  *      0.899-0.700 noticeable, but acceptable quality reduction
+  *      0.699-0.500 barely acceptable quality
+  *      0.499-0.000 severely degraded quality
+  *
+  * The order must match the order in LYCharSets and LYchar_set_names.
+  * NULL means q = 1.0
+  */
+ PUBLIC struct charsetqvals LYdefault_charset_q_values[] = {
+     { NULL,   NULL },         /* ISO Latin 1          */
+     { NULL,   "0.8" },        /* ISO Latin 2          */
+     { NULL,   "0.8" },        /* Other ISO Latin      */
+     { NULL,   "0.95" },       /* DEC Multinational    */
+     { NULL,   "?" },          /* IBM PC character set */
+     { NULL,   "?" },          /* IBM PC codepage 850  */
+     { NULL,   "?" },          /* Macintosh (8 bit)    */
+     { NULL,   "?" },          /* NeXT character set   */
+     { "?",    "?" },          /* KOI8-R character set */
+     { "?",    "?" },          /* Chinese              */
+     { "?",    "?" },          /* Japanese (EUC)       */
+     { "?",    "?" },          /* Japanese (SJIS)      */
+     { "?",    "?" },          /* Korean               */
+     { "?",    "?" },          /* Taipei (Big5)        */
+     { NULL,   "0.7" }         /* 7 bit approximations */
+ };
+ 
  #ifdef USE_SLANG
  /*
   *  Add the code of the the lowest character with the high bit set
*** WWW/Library/Implementation/HTTP.c.orig      Sun Feb  2 20:19:37 1997
--- WWW/Library/Implementation/HTTP.c   Sun Feb  2 21:42:20 1997
***************
*** 30,35 ****
--- 30,36 ----
  #include "HTML.h"
  #include "HTInit.h"
  #include "HTAABrow.h"
+ #include "../../../src/LYCharSets.h"
  
  #include "LYLeaks.h"
  
***************
*** 305,323 ****
        }
  
        if (pref_charset && *pref_charset) {
          StrAllocCat(command, "Accept-Charset: ");
!         strcpy(line, pref_charset);
!         if (line[strlen(line)-1] == ',')
!             line[strlen(line)-1] = '\0';
!         for (i = 0; line[i]; i++)
!             line[i] = TOLOWER(line[i]);
!         if (strstr(line, "iso-8859-1") == NULL)
!             strcat(line, ", iso-8859-1;q=0.001");
!         if (strstr(line, "us-ascii") == NULL)
!             strcat(line, ", us-ascii;q=0.001");
          StrAllocCat(command, line);
-         sprintf(line, "%c%c", CR, LF);
-         StrAllocCat(command, line);
        }
  
        /*
--- 306,339 ----
        }
  
        if (pref_charset && *pref_charset) {
+         char *p, *p1;
+         int n;
+ 
          StrAllocCat(command, "Accept-Charset: ");
!         for(p = pref_charset, p1 = line; *p; ++p, ++p1)
!             *p1 = TOLOWER(*p);
!         if(*(p-1) == ',')
!             *--p1 = '\0';
!         else
!             *p1 = '\0';
!         if(strstr(line, "iso-8859-1") == NULL) {
!             p = LYdefault_charset_q_values[current_char_set].latin1;
!             sprintf(p1, ", %s%s%s%n", "iso-8859-1",
!                                       p != NULL ? "; q=" : "",
!                                       p != NULL ? p : "",
!                                       &n);
!             p1 += n;
!         }
!         if(strstr(line, "us-ascii") == NULL) {
!             p = LYdefault_charset_q_values[current_char_set].ascii;
!             sprintf(p1, ", %s%s%s%n", "us-ascii",
!                                       p != NULL ? "; q=" : "",
!                                       p != NULL ? p : "",
!                                       &n);
!             p1 += n;
!         }
!         sprintf(p1, "%c%c", CR, LF);
          StrAllocCat(command, line);
        }
  
        /*



-- 
They work 24 hours a day and 256 days a year  --  address@hidden

address@hidden
address@hidden
;
; To UNSUBSCRIBE:  Send a mail message to address@hidden
;                  with "unsubscribe lynx-dev" (without the
;                  quotation marks) on a line by itself.
;

reply via email to

[Prev in Thread] Current Thread [Next in Thread]