[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: AW: treatment of U+002E that is produced by NFKC
From: |
Simon Josefsson |
Subject: |
Re: AW: treatment of U+002E that is produced by NFKC |
Date: |
Mon, 14 Jan 2008 11:10:44 +0100 |
User-agent: |
Gnus/5.110007 (No Gnus v0.7) Emacs/23.0.50 (gnu/linux) |
"Alexander Gnauck" <address@hidden> writes:
>> Sure, that is one way to deal with this. Libidn users may not be
>> clamoring for a resolution. Other implementations may be in more of a
>> rush to resolve the conflict. (I work for Google.)
>
> What about adding a define to deal with this, and make a note in the
> documentation about this "issue".
Yes, we should definitely document the problem in the manual. Erik, do
you know of any good links that discuss this issue?
Fortunately, all the idna_* APIs in libidn takes a 'flags' parameter.
It would be possibly to add a new flag IDNA_TREAT_U2024_AS_DOT and have
the code treat U+2024 as a dot character as per RFC 3490 section 3.1 if
the flag is given. I've confirmed that this makes libidn produce the
same output as MSIE/Firefox output. See initial skeleton patch below.
/Simon
diff --git a/lib/idna.c b/lib/idna.c
index b815a3f..09ef929 100644
--- a/lib/idna.c
+++ b/lib/idna.c
@@ -1,5 +1,5 @@
/* idna.c --- Convert to or from IDN strings.
- * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Simon Josefsson
*
* This file is part of GNU Libidn.
*
@@ -30,8 +30,9 @@
#include "idna.h"
-#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
- (c) == 0xFF0E || (c) == 0xFF61)
+#define DOTP(c, flags) ((c) == 0x002E || (c) == 0x3002 || \
+ (c) == 0xFF0E || (c) == 0xFF61 || \
+ ((flags & IDNA_TREAT_U2024_AS_DOT) && (c) == 0x2024))
/* Core functions */
@@ -475,7 +476,7 @@ idna_to_ascii_4z (const uint32_t * input, char **output,
int flags)
return IDNA_SUCCESS;
}
- if (DOTP (input[0]) && input[1] == 0)
+ if (DOTP (input[0], flags) && input[1] == 0)
{
/* Handle explicit zero-length root label. */
*output = malloc (2);
@@ -490,7 +491,7 @@ idna_to_ascii_4z (const uint32_t * input, char **output,
int flags)
{
end = start;
- for (; *end && !DOTP (*end); end++)
+ for (; *end && !DOTP (*end, flags); end++)
;
if (*end == '\0' && start == end)
@@ -628,7 +629,7 @@ idna_to_unicode_4z4z (const uint32_t * input, uint32_t **
output, int flags)
{
end = start;
- for (; *end && !DOTP (*end); end++)
+ for (; *end && !DOTP (*end, flags); end++)
;
buflen = end - start;
diff --git a/lib/idna.h b/lib/idna.h
index f6b24ac..e968d33 100644
--- a/lib/idna.h
+++ b/lib/idna.h
@@ -1,5 +1,5 @@
/* idna.h --- Declarations for Internationalized Domain Name in Applications.
- * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Simon Josefsson
*
* This file is part of GNU Libidn.
*
@@ -54,7 +54,8 @@ extern "C"
typedef enum
{
IDNA_ALLOW_UNASSIGNED = 0x0001,
- IDNA_USE_STD3_ASCII_RULES = 0x0002
+ IDNA_USE_STD3_ASCII_RULES = 0x0002,
+ IDNA_TREAT_U2024_AS_DOT = 0x0004
} Idna_flags;
# ifndef IDNA_ACE_PREFIX
diff --git a/src/idn.c b/src/idn.c
index abb545e..cf4009b 100644
--- a/src/idn.c
+++ b/src/idn.c
@@ -370,8 +370,10 @@ main (int argc, char *argv[])
(args_info.allow_unassigned_given ?
IDNA_ALLOW_UNASSIGNED : 0) |
(args_info.usestd3asciirules_given ?
- IDNA_USE_STD3_ASCII_RULES : 0));
- free (q);
+ IDNA_USE_STD3_ASCII_RULES : 0) |
+ (args_info.treatu2024asdot_given ?
+ IDNA_TREAT_U2024_AS_DOT : 0));
+ free (q);
if (rc != IDNA_SUCCESS)
error (EXIT_FAILURE, 0, _("idna_to_ascii_4z: %s"),
idna_strerror (rc));
@@ -385,7 +387,9 @@ main (int argc, char *argv[])
(args_info.allow_unassigned_given ?
IDNA_ALLOW_UNASSIGNED : 0) |
(args_info.usestd3asciirules_given ?
- IDNA_USE_STD3_ASCII_RULES : 0));
+ IDNA_USE_STD3_ASCII_RULES : 0) |
+ (args_info.treatu2024asdot_given ?
+ IDNA_TREAT_U2024_AS_DOT : 0));
if (rc != IDNA_SUCCESS)
error (EXIT_FAILURE, 0, _("idna_to_unicode_8z4z (TLD): %s"),
idna_strerror (rc));
@@ -450,7 +454,9 @@ main (int argc, char *argv[])
(args_info.allow_unassigned_given ?
IDNA_ALLOW_UNASSIGNED : 0) |
(args_info.usestd3asciirules_given ?
- IDNA_USE_STD3_ASCII_RULES : 0));
+ IDNA_USE_STD3_ASCII_RULES : 0) |
+ (args_info.treatu2024asdot_given ?
+ IDNA_TREAT_U2024_AS_DOT : 0));
free (p);
if (rc != IDNA_SUCCESS)
error (EXIT_FAILURE, 0, _("idna_to_unicode_8z4z: %s"),
diff --git a/src/idn.ggo b/src/idn.ggo
index 620f9f6..680686f 100644
--- a/src/idn.ggo
+++ b/src/idn.ggo
@@ -1,4 +1,4 @@
-# Copyright (C) 2003, 2004, 2005, 2006, 2007 Simon Josefsson.
+# Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Simon Josefsson.
#
# This file is part of GNU Libidn.
#
@@ -31,6 +31,7 @@ option "idna-to-ascii" a "Convert to ACE according to IDNA
(default)" no
option "idna-to-unicode" u "Convert from ACE according to IDNA" no
option "allow-unassigned" - "Toggle IDNA AllowUnassigned flag" flag off
option "usestd3asciirules" - "Toggle IDNA UseSTD3ASCIIRules flag" flag off
+option "treatu2024asdot" - "Toggle IDNA TreatU2024AsDot flag" flag off
option "tld" t "Check string for TLD specific rules\nOnly for --idna-to-ascii
and --idna-to-unicode" flag on
option "profile" p "Use specified stringprep profile instead\nValid stringprep
profiles are `Nameprep', `iSCSI', `Nodeprep', `Resourceprep', `trace', and
`SASLprep'." string no
option "debug" - "Print debugging information" flag off
- treatment of U+002E that is produced by NFKC, Erik van der Poel, 2008/01/12
- Re: treatment of U+002E that is produced by NFKC, Simon Josefsson, 2008/01/13
- Re: treatment of U+002E that is produced by NFKC, Erik van der Poel, 2008/01/13
- Re: treatment of U+002E that is produced by NFKC, Simon Josefsson, 2008/01/13
- Re: treatment of U+002E that is produced by NFKC, Erik van der Poel, 2008/01/13
- AW: treatment of U+002E that is produced by NFKC, Alexander Gnauck, 2008/01/13
- Re: AW: treatment of U+002E that is produced by NFKC,
Simon Josefsson <=
- Re: AW: treatment of U+002E that is produced by NFKC, Erik van der Poel, 2008/01/14
- Re: AW: treatment of U+002E that is produced by NFKC, Simon Josefsson, 2008/01/14
- Re: AW: treatment of U+002E that is produced by NFKC, Erik van der Poel, 2008/01/14
- Re: AW: treatment of U+002E that is produced by NFKC, Simon Josefsson, 2008/01/14
- Re: AW: treatment of U+002E that is produced by NFKC, Simon Josefsson, 2008/01/15
- Re: AW: treatment of U+002E that is produced by NFKC, Erik van der Poel, 2008/01/15
- Re: AW: treatment of U+002E that is produced by NFKC, Simon Josefsson, 2008/01/15
- Re: AW: treatment of U+002E that is produced by NFKC, Erik van der Poel, 2008/01/15
- Re: AW: treatment of U+002E that is produced by NFKC, Simon Josefsson, 2008/01/15
- Re: AW: treatment of U+002E that is produced by NFKC, Erik van der Poel, 2008/01/15