# # # delete "idna/idn-int.h" # # rename "idna/stringprep.h" # to "idna/stringprep.h.in" # # rename "monotone/po" # to "po" # # add_file "idna/c-ctype.c" # content [9c54de87761273c5694eee719b39a32866946b66] # # add_file "idna/c-ctype.h" # content [9e9e7d0d7593fab5d94ca11bb86eebfcf13813ea] # # add_file "idna/c-strcase.h" # content [aef44b4b06b113d10da632c8cf7719964aa89380] # # add_file "idna/c-strcasecmp.c" # content [2c7dc587cff8a1c616e698f5752f090ab8d075e3] # # add_file "idna/striconv.c" # content [081ef3625c10f6911144a70dc3f2cf4bf59d0371] # # add_file "idna/striconv.h" # content [02bdbe1723a53b74bfa255faacd9f3d36e045784] # # add_file "idna/strverscmp.c" # content [bfa51bf1576ca9b44db9b476dd613230629ff448] # # add_file "idna/strverscmp.h" # content [c6682866e23ac96241358bce7e88e8ef64ef63db] # # add_file "idna/tst_idna.c" # content [7d4486bee1159e5c4b2e72a05f98df9e7ef7b8c9] # # add_file "idna/tst_idna2.c" # content [d8f00e5771ad810ddbea785b11806bd4a25f618c] # # add_file "idna/tst_nfkc.c" # content [b86e2c9e6662a3134eab70c77c1ab4632372aaba] # # add_file "idna/tst_punycode.c" # content [345956cee9cb31f850f347fbafdcba825bd3165e] # # add_file "idna/tst_stringprep.c" # content [649476a16fcda276903bd72a6d97101b0b6ad5fb] # # add_file "idna/utils.c" # content [e417a5413b26ed1fc36e321db250ea010b571270] # # add_file "idna/utils.h" # content [1af81a5334b6ec8ddb624fad1304b494817aeb8e] # # patch ".mtn-ignore" # from [e97ced7acc9976e11bf1986d1613fa9be9704fad] # to [b05398492019242d50965fcbffbef791afdb157c] # # patch "Makefile.in" # from [f8194cdd2dd5414bb18916aab2589fce1b8e9b27] # to [ae54acc8e7695a62239ff945848163d6a1d9cd84] # # patch "configure.ac" # from [a7a508ca8f53f63f6985ca3545bfae070f8270d1] # to [b3ab0f0feb7b12f9e1b9846a05cdea4583d4c31a] # # patch "idna/Makefile.am" # from [24965db46035cf019f0e553dd5e75e1b5341310f] # to [387be0754ed112345b2ebf410243f306bb452d66] # # patch "idna/configure.ac" # from [2a97033aff497ff3e8178b396974517b49d33bfe] # to [b7dc4011614352b420eae67736131f05cd005447] # # patch "idna/gunicomp.h" # from [d15e88c0a8d456b85f0ac4013211e4b10bcb4bd1] # to [b1f1e58f6caf864a628c5b31540ca45b2cfb7f69] # # patch "idna/gunidecomp.h" # from [7fd72b1ba528a9db3a662664104cdf3b452189d3] # to [c600d98e705a928c7e805099514ebc66a9d0c054] # # patch "idna/idna.c" # from [2a57a22e3ea331d29cc940a39e2e2784b1249316] # to [da31a73e63391a4fbcf069c66dbbab8340009c67] # # patch "idna/idna.h" # from [dc4bfad8f440c60a7b219fec34647f9d04e1052c] # to [7cbbdbc10bfd016a6ca501ac4a8574ab9572f0d3] # # patch "idna/nfkc.c" # from [2e2528f92fab6314dbd0e0b03697de038f6fc2ad] # to [a4ab7deec83effb74b369847eeb112687aa98a29] # # patch "idna/profiles.c" # from [401a78afbc6f10768a43520c3ad60e55cf8179db] # to [df12e897155e0ffb64e85af0f8bf0a67d6e5eb76] # # patch "idna/punycode.c" # from [bfae13cf23da87463b6a3eae86ecbb30306557d4] # to [19039b793423e3c68982fbc5279887be0fe1e642] # # patch "idna/punycode.h" # from [f26023ae3560fd2b44153e8740aa6bf132274cc2] # to [36331c9ce21c86f8b1cc860c67a5e5c359721746] # # patch "idna/rfc3454.c" # from [bbb9eb2443a7d1f240345e7b153e59212bd4319c] # to [1a30904ada04fea7b6cf1afd24b29cd842f165ea] # # patch "idna/stringprep.c" # from [c6749fc1021971e084a16c0a9523fcc52cbb8476] # to [60d6e36db7d7f4f77efd39cdaa8a7e44e768f96f] # # patch "idna/stringprep.h.in" # from [dec94e47c4c9ed31135f8437ae5eb3d146332328] # to [49cee66eabccdf023a2116c5b6240bb39b3475d3] # # patch "idna/toutf8.c" # from [564af0eb9f1d23250e63fedb66c8c778a45cc989] # to [897c8520fe7be0c07cd384051a616ce41b5af619] # # patch "idna/version.c" # from [f0fd69ee06dcad4e22b1f30d1a4220cb5bd5f518] # to [30a0a5f71c59986d792fac7c1e0760d2ef0e8bcc] # ============================================================ --- idna/c-ctype.c 9c54de87761273c5694eee719b39a32866946b66 +++ idna/c-ctype.c 9c54de87761273c5694eee719b39a32866946b66 @@ -0,0 +1,396 @@ +/* Character handling in C locale. + + Copyright 2000-2003, 2006 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#define NO_C_CTYPE_MACROS +#include "c-ctype.h" + +/* The function isascii is not locale dependent. Its use in EBCDIC is + questionable. */ +bool +c_isascii (int c) +{ + return (c >= 0x00 && c <= 0x7f); +} + +bool +c_isalnum (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')); +#else + return ((c >= '0' && c <= '9') + || (c >= 'A' && c <= 'Z') + || (c >= 'a' && c <= 'z')); +#endif +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isalpha (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'); +#else + return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); +#endif +#else + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isblank (int c) +{ + return (c == ' ' || c == '\t'); +} + +bool +c_iscntrl (int c) +{ +#if C_CTYPE_ASCII + return ((c & ~0x1f) == 0 || c == 0x7f); +#else + switch (c) + { + case ' ': case '!': case '"': case '#': case '$': case '%': + case '&': case '\'': case '(': case ')': case '*': case '+': + case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 0; + default: + return 1; + } +#endif +} + +bool +c_isdigit (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS + return (c >= '0' && c <= '9'); +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + return 1; + default: + return 0; + } +#endif +} + +bool +c_islower (int c) +{ +#if C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'a' && c <= 'z'); +#else + switch (c) + { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isgraph (int c) +{ +#if C_CTYPE_ASCII + return (c >= '!' && c <= '~'); +#else + switch (c) + { + case '!': case '"': case '#': case '$': case '%': case '&': + case '\'': case '(': case ')': case '*': case '+': case ',': + case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isprint (int c) +{ +#if C_CTYPE_ASCII + return (c >= ' ' && c <= '~'); +#else + switch (c) + { + case ' ': case '!': case '"': case '#': case '$': case '%': + case '&': case '\'': case '(': case ')': case '*': case '+': + case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '[': case '\\': case ']': case '^': case '_': case '`': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_ispunct (int c) +{ +#if C_CTYPE_ASCII + return ((c >= '!' && c <= '~') + && !((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'))); +#else + switch (c) + { + case '!': case '"': case '#': case '$': case '%': case '&': + case '\'': case '(': case ')': case '*': case '+': case ',': + case '-': case '.': case '/': + case ':': case ';': case '<': case '=': case '>': case '?': + case '@': + case '[': case '\\': case ']': case '^': case '_': case '`': + case '{': case '|': case '}': case '~': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isspace (int c) +{ + return (c == ' ' || c == '\t' + || c == '\n' || c == '\v' || c == '\f' || c == '\r'); +} + +bool +c_isupper (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE + return (c >= 'A' && c <= 'Z'); +#else + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + return 1; + default: + return 0; + } +#endif +} + +bool +c_isxdigit (int c) +{ +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII + return ((c >= '0' && c <= '9') + || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F')); +#else + return ((c >= '0' && c <= '9') + || (c >= 'A' && c <= 'F') + || (c >= 'a' && c <= 'f')); +#endif +#else + switch (c) + { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + return 1; + default: + return 0; + } +#endif +} + +int +c_tolower (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c); +#else + switch (c) + { + case 'A': return 'a'; + case 'B': return 'b'; + case 'C': return 'c'; + case 'D': return 'd'; + case 'E': return 'e'; + case 'F': return 'f'; + case 'G': return 'g'; + case 'H': return 'h'; + case 'I': return 'i'; + case 'J': return 'j'; + case 'K': return 'k'; + case 'L': return 'l'; + case 'M': return 'm'; + case 'N': return 'n'; + case 'O': return 'o'; + case 'P': return 'p'; + case 'Q': return 'q'; + case 'R': return 'r'; + case 'S': return 's'; + case 'T': return 't'; + case 'U': return 'u'; + case 'V': return 'v'; + case 'W': return 'w'; + case 'X': return 'x'; + case 'Y': return 'y'; + case 'Z': return 'z'; + default: return c; + } +#endif +} + +int +c_toupper (int c) +{ +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE + return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); +#else + switch (c) + { + case 'a': return 'A'; + case 'b': return 'B'; + case 'c': return 'C'; + case 'd': return 'D'; + case 'e': return 'E'; + case 'f': return 'F'; + case 'g': return 'G'; + case 'h': return 'H'; + case 'i': return 'I'; + case 'j': return 'J'; + case 'k': return 'K'; + case 'l': return 'L'; + case 'm': return 'M'; + case 'n': return 'N'; + case 'o': return 'O'; + case 'p': return 'P'; + case 'q': return 'Q'; + case 'r': return 'R'; + case 's': return 'S'; + case 't': return 'T'; + case 'u': return 'U'; + case 'v': return 'V'; + case 'w': return 'W'; + case 'x': return 'X'; + case 'y': return 'Y'; + case 'z': return 'Z'; + default: return c; + } +#endif +} ============================================================ --- idna/c-ctype.h 9e9e7d0d7593fab5d94ca11bb86eebfcf13813ea +++ idna/c-ctype.h 9e9e7d0d7593fab5d94ca11bb86eebfcf13813ea @@ -0,0 +1,280 @@ +/* Character handling in C locale. + + These functions work like the corresponding functions in , + except that they have the C (POSIX) locale hardwired, whereas the + functions' behaviour depends on the current locale set via + setlocale. + + Copyright (C) 2000-2003, 2006 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef C_CTYPE_H +#define C_CTYPE_H + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* The functions defined in this file assume the "C" locale and a character + set without diacritics (ASCII-US or EBCDIC-US or something like that). + Even if the "C" locale on a particular system is an extension of the ASCII + character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it + is ISO-8859-1), the functions in this file recognize only the ASCII + characters. */ + + +/* Check whether the ASCII optimizations apply. */ + +/* ANSI C89 (and ISO C99 5.2.1.3 too) already guarantees that + '0', '1', ..., '9' have consecutive integer values. */ +#define C_CTYPE_CONSECUTIVE_DIGITS 1 + +#if ('A' <= 'Z') \ + && ('A' + 1 == 'B') && ('B' + 1 == 'C') && ('C' + 1 == 'D') \ + && ('D' + 1 == 'E') && ('E' + 1 == 'F') && ('F' + 1 == 'G') \ + && ('G' + 1 == 'H') && ('H' + 1 == 'I') && ('I' + 1 == 'J') \ + && ('J' + 1 == 'K') && ('K' + 1 == 'L') && ('L' + 1 == 'M') \ + && ('M' + 1 == 'N') && ('N' + 1 == 'O') && ('O' + 1 == 'P') \ + && ('P' + 1 == 'Q') && ('Q' + 1 == 'R') && ('R' + 1 == 'S') \ + && ('S' + 1 == 'T') && ('T' + 1 == 'U') && ('U' + 1 == 'V') \ + && ('V' + 1 == 'W') && ('W' + 1 == 'X') && ('X' + 1 == 'Y') \ + && ('Y' + 1 == 'Z') +#define C_CTYPE_CONSECUTIVE_UPPERCASE 1 +#endif + +#if ('a' <= 'z') \ + && ('a' + 1 == 'b') && ('b' + 1 == 'c') && ('c' + 1 == 'd') \ + && ('d' + 1 == 'e') && ('e' + 1 == 'f') && ('f' + 1 == 'g') \ + && ('g' + 1 == 'h') && ('h' + 1 == 'i') && ('i' + 1 == 'j') \ + && ('j' + 1 == 'k') && ('k' + 1 == 'l') && ('l' + 1 == 'm') \ + && ('m' + 1 == 'n') && ('n' + 1 == 'o') && ('o' + 1 == 'p') \ + && ('p' + 1 == 'q') && ('q' + 1 == 'r') && ('r' + 1 == 's') \ + && ('s' + 1 == 't') && ('t' + 1 == 'u') && ('u' + 1 == 'v') \ + && ('v' + 1 == 'w') && ('w' + 1 == 'x') && ('x' + 1 == 'y') \ + && ('y' + 1 == 'z') +#define C_CTYPE_CONSECUTIVE_LOWERCASE 1 +#endif + +#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126) +/* The character set is ASCII or one of its variants or extensions, not EBCDIC. + Testing the value of '\n' and '\r' is not relevant. */ +#define C_CTYPE_ASCII 1 +#endif + + +/* Function declarations. */ + +extern bool c_isascii (int c); /* not locale dependent */ + +extern bool c_isalnum (int c); +extern bool c_isalpha (int c); +extern bool c_isblank (int c); +extern bool c_iscntrl (int c); +extern bool c_isdigit (int c); +extern bool c_islower (int c); +extern bool c_isgraph (int c); +extern bool c_isprint (int c); +extern bool c_ispunct (int c); +extern bool c_isspace (int c); +extern bool c_isupper (int c); +extern bool c_isxdigit (int c); + +extern int c_tolower (int c); +extern int c_toupper (int c); + + +#if defined __GNUC__ && defined __OPTIMIZE__ && !defined __OPTIMIZE_SIZE__ && !defined NO_C_CTYPE_MACROS + +/* ASCII optimizations. */ + +#undef c_isascii +#define c_isascii(c) \ + ({ int __c = (c); \ + (__c >= 0x00 && __c <= 0x7f); \ + }) + +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isalnum +#define c_isalnum(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z')); \ + }) +#else +#undef c_isalnum +#define c_isalnum(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || (__c >= 'A' && __c <= 'Z') \ + || (__c >= 'a' && __c <= 'z')); \ + }) +#endif +#endif + +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isalpha +#define c_isalpha(c) \ + ({ int __c = (c); \ + ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z'); \ + }) +#else +#undef c_isalpha +#define c_isalpha(c) \ + ({ int __c = (c); \ + ((__c >= 'A' && __c <= 'Z') || (__c >= 'a' && __c <= 'z')); \ + }) +#endif +#endif + +#undef c_isblank +#define c_isblank(c) \ + ({ int __c = (c); \ + (__c == ' ' || __c == '\t'); \ + }) + +#if C_CTYPE_ASCII +#undef c_iscntrl +#define c_iscntrl(c) \ + ({ int __c = (c); \ + ((__c & ~0x1f) == 0 || __c == 0x7f); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_DIGITS +#undef c_isdigit +#define c_isdigit(c) \ + ({ int __c = (c); \ + (__c >= '0' && __c <= '9'); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_LOWERCASE +#undef c_islower +#define c_islower(c) \ + ({ int __c = (c); \ + (__c >= 'a' && __c <= 'z'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_isgraph +#define c_isgraph(c) \ + ({ int __c = (c); \ + (__c >= '!' && __c <= '~'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_isprint +#define c_isprint(c) \ + ({ int __c = (c); \ + (__c >= ' ' && __c <= '~'); \ + }) +#endif + +#if C_CTYPE_ASCII +#undef c_ispunct +#define c_ispunct(c) \ + ({ int _c = (c); \ + (c_isgraph (_c) && ! c_isalnum (_c)); \ + }) +#endif + +#undef c_isspace +#define c_isspace(c) \ + ({ int __c = (c); \ + (__c == ' ' || __c == '\t' \ + || __c == '\n' || __c == '\v' || __c == '\f' || __c == '\r'); \ + }) + +#if C_CTYPE_CONSECUTIVE_UPPERCASE +#undef c_isupper +#define c_isupper(c) \ + ({ int __c = (c); \ + (__c >= 'A' && __c <= 'Z'); \ + }) +#endif + +#if C_CTYPE_CONSECUTIVE_DIGITS \ + && C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#if C_CTYPE_ASCII +#undef c_isxdigit +#define c_isxdigit(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'F')); \ + }) +#else +#undef c_isxdigit +#define c_isxdigit(c) \ + ({ int __c = (c); \ + ((__c >= '0' && __c <= '9') \ + || (__c >= 'A' && __c <= 'F') \ + || (__c >= 'a' && __c <= 'f')); \ + }) +#endif +#endif + +#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE +#undef c_tolower +#define c_tolower(c) \ + ({ int __c = (c); \ + (__c >= 'A' && __c <= 'Z' ? __c - 'A' + 'a' : __c); \ + }) +#undef c_toupper +#define c_toupper(c) \ + ({ int __c = (c); \ + (__c >= 'a' && __c <= 'z' ? __c - 'a' + 'A' : __c); \ + }) +#endif + +#endif /* optimizing for speed */ + + +#ifdef __cplusplus +} +#endif + +#endif /* C_CTYPE_H */ ============================================================ --- idna/c-strcase.h aef44b4b06b113d10da632c8cf7719964aa89380 +++ idna/c-strcase.h aef44b4b06b113d10da632c8cf7719964aa89380 @@ -0,0 +1,55 @@ +/* Case-insensitive string comparison functions in C locale. + Copyright (C) 1995-1996, 2001, 2003, 2005 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef C_STRCASE_H +#define C_STRCASE_H + +#include + + +/* The functions defined in this file assume the "C" locale and a character + set without diacritics (ASCII-US or EBCDIC-US or something like that). + Even if the "C" locale on a particular system is an extension of the ASCII + character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it + is ISO-8859-1), the functions in this file recognize only the ASCII + characters. More precisely, one of the string arguments must be an ASCII + string; the other one can also contain non-ASCII characters (but then + the comparison result will be nonzero). */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Compare strings S1 and S2, ignoring case, returning less than, equal to or + greater than zero if S1 is lexicographically less than, equal to or greater + than S2. */ +extern int c_strcasecmp (const char *s1, const char *s2); + +/* Compare no more than N characters of strings S1 and S2, ignoring case, + returning less than, equal to or greater than zero if S1 is + lexicographically less than, equal to or greater than S2. */ +extern int c_strncasecmp (const char *s1, const char *s2, size_t n); + + +#ifdef __cplusplus +} +#endif + + +#endif /* C_STRCASE_H */ ============================================================ --- idna/c-strcasecmp.c 2c7dc587cff8a1c616e698f5752f090ab8d075e3 +++ idna/c-strcasecmp.c 2c7dc587cff8a1c616e698f5752f090ab8d075e3 @@ -0,0 +1,57 @@ +/* c-strcasecmp.c -- case insensitive string comparator in C locale + Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#include "c-strcase.h" + +#include + +#include "c-ctype.h" + +int +c_strcasecmp (const char *s1, const char *s2) +{ + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2) + return 0; + + do + { + c1 = c_tolower (*p1); + c2 = c_tolower (*p2); + + if (c1 == '\0') + break; + + ++p1; + ++p2; + } + while (c1 == c2); + + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); +} ============================================================ --- idna/striconv.c 081ef3625c10f6911144a70dc3f2cf4bf59d0371 +++ idna/striconv.c 081ef3625c10f6911144a70dc3f2cf4bf59d0371 @@ -0,0 +1,458 @@ +/* Charset conversion. + Copyright (C) 2001-2007 Free Software Foundation, Inc. + Written by Bruno Haible and Simon Josefsson. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#include "striconv.h" + +#include +#include +#include + +#if HAVE_ICONV +# include +/* Get MB_LEN_MAX, CHAR_BIT. */ +# include +#endif + +#include "c-strcase.h" + +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + + +#if HAVE_ICONV + +int +mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, + char **resultp, size_t *lengthp) +{ +# define tmpbufsize 4096 + size_t length; + char *result; + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Set to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Determine the length we need. */ + { + size_t count = 0; + /* The alignment is needed when converting e.g. to glibc's WCHAR_T or + libiconv's UCS-4-INTERNAL encoding. */ + union { unsigned int align; char buf[tmpbufsize]; } tmp; +# define tmpbuf tmp.buf + const char *inptr = src; + size_t insize = srclen; + + while (insize > 0) + { + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == E2BIG) + ; + else if (errno == EINVAL) + break; + else + return -1; + } +# if !defined _LIBICONV_VERSION && !defined __GLIBC__ + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. */ + else if (res > 0) + { + errno = EILSEQ; + return -1; + } +# endif + count += outptr - tmpbuf; + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + { + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); + + if (res == (size_t)(-1)) + return -1; + count += outptr - tmpbuf; + } +# endif + length = count; +# undef tmpbuf + } + + if (length == 0) + { + *lengthp = 0; + return 0; + } + if (*resultp != NULL && *lengthp >= length) + result = *resultp; + else + { + result = (char *) malloc (length); + if (result == NULL) + { + errno = ENOMEM; + return -1; + } + } + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Return to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Do the conversion for real. */ + { + const char *inptr = src; + size_t insize = srclen; + char *outptr = result; + size_t outsize = length; + + while (insize > 0) + { + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else + goto fail; + } +# if !defined _LIBICONV_VERSION && !defined __GLIBC__ + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. */ + else if (res > 0) + { + errno = EILSEQ; + goto fail; + } +# endif + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + { + size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); + + if (res == (size_t)(-1)) + goto fail; + } +# endif + if (outsize != 0) + abort (); + } + + *resultp = result; + *lengthp = length; + + return 0; + + fail: + { + if (result != *resultp) + { + int saved_errno = errno; + free (result); + errno = saved_errno; + } + return -1; + } +# undef tmpbufsize +} + +char * +str_cd_iconv (const char *src, iconv_t cd) +{ + /* For most encodings, a trailing NUL byte in the input will be converted + to a trailing NUL byte in the output. But not for UTF-7. So that this + function is usable for UTF-7, we have to exclude the NUL byte from the + conversion and add it by hand afterwards. */ +# if !defined _LIBICONV_VERSION && !defined __GLIBC__ + /* Irix iconv() inserts a NUL byte if it cannot convert. + NetBSD iconv() inserts a question mark if it cannot convert. + Only GNU libiconv and GNU libc are known to prefer to fail rather + than doing a lossy conversion. For other iconv() implementations, + we have to look at the number of irreversible conversions returned; + but this information is lost when iconv() returns for an E2BIG reason. + Therefore we cannot use the second, faster algorithm. */ + + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); + char *final_result; + + if (retval < 0) + { + if (result != NULL) + abort (); + return NULL; + } + + /* Add the terminating NUL byte. */ + final_result = + (result != NULL ? realloc (result, length + 1) : malloc (length + 1)); + if (final_result == NULL) + { + if (result != NULL) + free (result); + errno = ENOMEM; + return NULL; + } + final_result[length] = '\0'; + + return final_result; + +# else + /* This algorithm is likely faster than the one above. But it may produce + iconv() returns for an E2BIG reason, when the output size guess is too + small. Therefore it can only be used when we don't need the number of + irreversible conversions performed. */ + char *result; + size_t result_size; + size_t length; + const char *inptr = src; + size_t inbytes_remaining = strlen (src); + + /* Make a guess for the worst-case output size, in order to avoid a + realloc. It's OK if the guess is wrong as long as it is not zero and + doesn't lead to an integer overflow. */ + result_size = inbytes_remaining; + { + size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2); + if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX) + result_size *= MB_LEN_MAX; + } + result_size += 1; /* for the terminating NUL */ + + result = (char *) malloc (result_size); + if (result == NULL) + { + errno = ENOMEM; + return NULL; + } + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Set to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Do the conversion. */ + { + char *outptr = result; + size_t outbytes_remaining = result_size - 1; + + for (;;) + { + /* Here inptr + inbytes_remaining = src + strlen (src), + outptr + outbytes_remaining = result + result_size - 1. */ + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &inbytes_remaining, + &outptr, &outbytes_remaining); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else if (errno == E2BIG) + { + size_t used = outptr - result; + size_t newsize = result_size * 2; + char *newresult; + + if (!(newsize > result_size)) + { + errno = ENOMEM; + goto failed; + } + newresult = (char *) realloc (result, newsize); + if (newresult == NULL) + { + errno = ENOMEM; + goto failed; + } + result = newresult; + result_size = newsize; + outptr = result + used; + outbytes_remaining = result_size - 1 - used; + } + else + goto failed; + } + else + break; + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + for (;;) + { + /* Here outptr + outbytes_remaining = result + result_size - 1. */ + size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); + + if (res == (size_t)(-1)) + { + if (errno == E2BIG) + { + size_t used = outptr - result; + size_t newsize = result_size * 2; + char *newresult; + + if (!(newsize > result_size)) + { + errno = ENOMEM; + goto failed; + } + newresult = (char *) realloc (result, newsize); + if (newresult == NULL) + { + errno = ENOMEM; + goto failed; + } + result = newresult; + result_size = newsize; + outptr = result + used; + outbytes_remaining = result_size - 1 - used; + } + else + goto failed; + } + else + break; + } +# endif + + /* Add the terminating NUL byte. */ + *outptr++ = '\0'; + + length = outptr - result; + } + + /* Give away unused memory. */ + if (length < result_size) + { + char *smaller_result = (char *) realloc (result, length); + + if (smaller_result != NULL) + result = smaller_result; + } + + return result; + + failed: + { + int saved_errno = errno; + free (result); + errno = saved_errno; + return NULL; + } + +# endif +} + +#endif + +char * +str_iconv (const char *src, const char *from_codeset, const char *to_codeset) +{ + if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) + { + char *result = strdup (src); + + if (result == NULL) + errno = ENOMEM; + return result; + } + else + { +#if HAVE_ICONV + iconv_t cd; + char *result; + + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION + if (c_strcasecmp (from_codeset, "EUC-KR") == 0 + || c_strcasecmp (to_codeset, "EUC-KR") == 0) + { + errno = EINVAL; + return NULL; + } +# endif + cd = iconv_open (to_codeset, from_codeset); + if (cd == (iconv_t) -1) + return NULL; + + result = str_cd_iconv (src, cd); + + if (result == NULL) + { + /* Close cd, but preserve the errno from str_cd_iconv. */ + int saved_errno = errno; + iconv_close (cd); + errno = saved_errno; + } + else + { + if (iconv_close (cd) < 0) + { + /* Return NULL, but free the allocated memory, and while doing + that, preserve the errno from iconv_close. */ + int saved_errno = errno; + free (result); + errno = saved_errno; + return NULL; + } + } + return result; +#else + /* This is a different error code than if iconv_open existed but didn't + support from_codeset and to_codeset, so that the caller can emit + an error message such as + "iconv() is not supported. Installing GNU libiconv and + then reinstalling this package would fix this." */ + errno = ENOSYS; + return NULL; +#endif + } +} ============================================================ --- idna/striconv.h 02bdbe1723a53b74bfa255faacd9f3d36e045784 +++ idna/striconv.h 02bdbe1723a53b74bfa255faacd9f3d36e045784 @@ -0,0 +1,76 @@ +/* Charset conversion. + Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc. + Written by Bruno Haible and Simon Josefsson. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef _STRICONV_H +#define _STRICONV_H + +#include +#if HAVE_ICONV +#include +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +#if HAVE_ICONV + +/* Convert an entire string from one encoding to another, using iconv. + The original string is at [SRC,...,SRC+SRCLEN-1]. + The conversion descriptor is passed as CD. + *RESULTP and *LENGTH should initially be a scratch buffer and its size, + or *RESULTP can initially be NULL. + May erase the contents of the memory at *RESULTP. + Return value: 0 if successful, otherwise -1 and errno set. + If successful: The resulting string is stored in *RESULTP and its length + in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is + unchanged if no dynamic memory allocation was necessary. */ +extern int mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, + char **resultp, size_t *lengthp); + +/* Convert an entire string from one encoding to another, using iconv. + The original string is the NUL-terminated string starting at SRC. + The conversion descriptor is passed as CD. Both the "from" and the "to" + encoding must use a single NUL byte at the end of the string (i.e. not + UCS-2, UCS-4, UTF-16, UTF-32). + Allocate a malloced memory block for the result. + Return value: the freshly allocated resulting NUL-terminated string if + successful, otherwise NULL and errno set. */ +extern char * str_cd_iconv (const char *src, iconv_t cd); + +#endif + +/* Convert an entire string from one encoding to another, using iconv. + The original string is the NUL-terminated string starting at SRC. + Both the "from" and the "to" encoding must use a single NUL byte at the + end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). + Allocate a malloced memory block for the result. + Return value: the freshly allocated resulting NUL-terminated string if + successful, otherwise NULL and errno set. */ +extern char * str_iconv (const char *src, + const char *from_codeset, const char *to_codeset); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _STRICONV_H */ ============================================================ --- idna/strverscmp.c bfa51bf1576ca9b44db9b476dd613230629ff448 +++ idna/strverscmp.c bfa51bf1576ca9b44db9b476dd613230629ff448 @@ -0,0 +1,131 @@ +/* Compare strings while treating digits characters numerically. + Copyright (C) 1997, 2000, 2002, 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jean-François Bignolles , 1997. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#if !_LIBC +# include +#endif + +#include +#include + +/* states: S_N: normal, S_I: comparing integral part, S_F: comparing + fractional parts, S_Z: idem but with leading Zeroes only */ +#define S_N 0x0 +#define S_I 0x4 +#define S_F 0x8 +#define S_Z 0xC + +/* result_type: CMP: return diff; LEN: compare using len_diff/diff */ +#define CMP 2 +#define LEN 3 + + +/* ISDIGIT differs from isdigit, as follows: + - Its arg may be any int or unsigned int; it need not be an unsigned char + or EOF. + - It's typically faster. + POSIX says that only '0' through '9' are digits. Prefer ISDIGIT to + isdigit unless it's important to use the locale's definition + of `digit' even when the host does not conform to POSIX. */ +#define ISDIGIT(c) ((unsigned int) (c) - '0' <= 9) + +#undef __strverscmp +#undef strverscmp + +#ifndef weak_alias +# define __strverscmp strverscmp +#endif + +/* Compare S1 and S2 as strings holding indices/version numbers, + returning less than, equal to or greater than zero if S1 is less than, + equal to or greater than S2 (for more info, see the texinfo doc). +*/ + +int +__strverscmp (const char *s1, const char *s2) +{ + const unsigned char *p1 = (const unsigned char *) s1; + const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + int state; + int diff; + + /* Symbol(s) 0 [1-9] others (padding) + Transition (10) 0 (01) d (00) x (11) - */ + static const unsigned int next_state[] = + { + /* state x d 0 - */ + /* S_N */ S_N, S_I, S_Z, S_N, + /* S_I */ S_N, S_I, S_I, S_I, + /* S_F */ S_N, S_F, S_F, S_F, + /* S_Z */ S_N, S_F, S_Z, S_Z + }; + + static const int result_type[] = + { + /* state x/x x/d x/0 x/- d/x d/d d/0 d/- + 0/x 0/d 0/0 0/- -/x -/d -/0 -/- */ + + /* S_N */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP, + CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, + /* S_I */ CMP, -1, -1, CMP, 1, LEN, LEN, CMP, + 1, LEN, LEN, CMP, CMP, CMP, CMP, CMP, + /* S_F */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP, + CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, + /* S_Z */ CMP, 1, 1, CMP, -1, CMP, CMP, CMP, + -1, CMP, CMP, CMP + }; + + if (p1 == p2) + return 0; + + c1 = *p1++; + c2 = *p2++; + /* Hint: '0' is a digit too. */ + state = S_N | ((c1 == '0') + (ISDIGIT (c1) != 0)); + + while ((diff = c1 - c2) == 0 && c1 != '\0') + { + state = next_state[state]; + c1 = *p1++; + c2 = *p2++; + state |= (c1 == '0') + (ISDIGIT (c1) != 0); + } + + state = result_type[state << 2 | ((c2 == '0') + (ISDIGIT (c2) != 0))]; + + switch (state) + { + case CMP: + return diff; + + case LEN: + while (ISDIGIT (*p1++)) + if (!ISDIGIT (*p2++)) + return 1; + + return ISDIGIT (*p2) ? -1 : diff; + + default: + return state; + } +} +#ifdef weak_alias +weak_alias (__strverscmp, strverscmp) +#endif ============================================================ --- idna/strverscmp.h c6682866e23ac96241358bce7e88e8ef64ef63db +++ idna/strverscmp.h c6682866e23ac96241358bce7e88e8ef64ef63db @@ -0,0 +1,24 @@ +/* Compare strings while treating digits characters numerically. + + Copyright (C) 1997, 2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#ifndef STRVERSCMP_H_ +# define STRVERSCMP_H_ + +int strverscmp (const char *, const char *); + +#endif /* not STRVERSCMP_H_ */ ============================================================ --- idna/tst_idna.c 7d4486bee1159e5c4b2e72a05f98df9e7ef7b8c9 +++ idna/tst_idna.c 7d4486bee1159e5c4b2e72a05f98df9e7ef7b8c9 @@ -0,0 +1,326 @@ +/* tst_idna.c --- Self tests for idna_to_ascii(). + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson + * + * This file is part of GNU Libidn. + * + * GNU Libidn is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNU Libidn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Libidn; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include +#include + +#include "utils.h" + +struct idna +{ + const char *name; + size_t inlen; + uint32_t in[100]; + const char *out; + int flags; + int toasciirc; + int tounicoderc; +}; + +static const struct idna idna[] = { + { + "Arabic (Egyptian)", 17, + { + 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, + 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, + 0x061F}, + IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn", 0, IDNA_SUCCESS, IDNA_SUCCESS}, + { + "Chinese (simplified)", 9, + { + 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587}, + IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye", 0, IDNA_SUCCESS, IDNA_SUCCESS}, + { + "Chinese (traditional)", 9, + { + 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587}, + IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb", 0, IDNA_SUCCESS, + IDNA_SUCCESS}, + { + "Czech: Pro[CCARON]prost[ECARON]nemluv[IACUTE][CCARON]esky", 22, + { + 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, + 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, + 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079}, + IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a", 0, IDNA_SUCCESS, + IDNA_SUCCESS}, + { + "Hebrew", 22, + { + 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, + 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, + 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA}, + IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b", 0, IDNA_SUCCESS, + IDNA_SUCCESS}, + { + "Hindi (Devanagari)", 30, + { + 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, + 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, + 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, + 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902}, + IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", 0, + IDNA_SUCCESS}, + { + "Japanese (kanji and hiragana)", 18, + { + 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, + 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, + 0x306E, 0x304B}, + IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", 0, IDNA_SUCCESS}, + { + "Korean (Hangul syllables)", 24, + { + 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, + 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, + 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C}, + IDNA_ACE_PREFIX "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt" + "30a5jpsd879ccm6fea98c", 0, IDNA_PUNYCODE_ERROR, IDNA_PUNYCODE_ERROR}, + /* too long output */ + { + "Russian (Cyrillic)", 28, + { + 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, + 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, + 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, + 0x0441, 0x0441, 0x043A, 0x0438}, + IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, + IDNA_SUCCESS, IDNA_SUCCESS}, + { + "Spanish: Porqu[EACUTE]nopuedensimplementehablarenEspa[NTILDE]ol", 40, + { + 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, + 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, + 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, + 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, + 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C}, + IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a", 0, + IDNA_SUCCESS}, + { + "Vietnamese", 31, + { + 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, + 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, + 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, + 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074}, + IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", 0, + IDNA_SUCCESS}, + { + "Japanese 3[NEN]B[GUMI][KINPACHI][SENSEI]", 8, + { + 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F}, + IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b", 0, IDNA_SUCCESS, + IDNA_SUCCESS}, + { + "Japanese [AMURO][NAMIE]-with-SUPER-MONKEYS", 24, + { + 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, + 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, + 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053}, + IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", 0, + IDNA_SUCCESS}, + { + "Japanese Hello-Another-Way-[SOREZORE][NO][BASHO]", 25, + { + 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, + 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, + 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, + 0x6240}, + IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b", 0, + IDNA_SUCCESS}, + { + "Japanese [HITOTSU][YANE][NO][SHITA]2", 8, + { + 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032}, + IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v", 0, IDNA_SUCCESS, IDNA_SUCCESS}, + { + "Japanese Maji[DE]Koi[SURU]5[BYOU][MAE]", 13, + { + 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, + 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D}, + IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e", 0, IDNA_SUCCESS, + IDNA_SUCCESS}, + { + "Japanese [PAFII]de[RUNBA]", 9, + { + 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0}, + IDNA_ACE_PREFIX "de-jg4avhby1noc0d", 0, IDNA_SUCCESS, IDNA_SUCCESS}, + { + "Japanese [SONO][SUPIIDO][DE]", 7, + { + 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067}, + IDNA_ACE_PREFIX "d9juau41awczczp", 0, IDNA_SUCCESS, IDNA_SUCCESS}, + { + "Greek", 8, + { + 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac}, + IDNA_ACE_PREFIX "hxargifdar", 0, IDNA_SUCCESS, IDNA_SUCCESS}, + { + "Maltese (Malti)", 10, + { + 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127, + 0x0127, 0x0061}, + IDNA_ACE_PREFIX "bonusaa-5bb1da", 0, IDNA_SUCCESS, IDNA_SUCCESS}, + { + "Russian (Cyrillic)", 28, + { + 0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435, + 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432, + 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443, + 0x0441, 0x0441, 0x043a, 0x0438}, + IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l", 0, + IDNA_SUCCESS, IDNA_SUCCESS}, +#if 0 + { + "(S) -> $1.00 <-", 11, + { + 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, + 0x0020, 0x003C, 0x002D}, + IDNA_ACE_PREFIX "-> $1.00 <--", 0, IDNA_SUCCESS, IDNA_SUCCESS}, +#endif + { /* XXX depends on IDNA_ACE_PREFIX */ + "ToASCII() with ACE prefix", 4 + 3, + { + 'x', 'n', '-', '-', 'f', 'o', 0x3067}, + IDNA_ACE_PREFIX "too long too long too long too long too long too " + "long too long too long too long too long ", 0, + IDNA_CONTAINS_ACE_PREFIX, IDNA_PUNYCODE_ERROR} +}; + +void +doit (void) +{ + char label[100]; + uint32_t *ucs4label = NULL; + uint32_t tmp[100]; + size_t len, len2, i; + int rc; + + for (i = 0; i < sizeof (idna) / sizeof (idna[0]); i++) + { + if (debug) + printf ("IDNA entry %d: %s\n", i, idna[i].name); + + if (debug) + { + printf ("in:\n"); + ucs4print (idna[i].in, idna[i].inlen); + } + + rc = idna_to_ascii_4i (idna[i].in, idna[i].inlen, label, idna[i].flags); + if (rc != idna[i].toasciirc) + { + fail ("IDNA entry %d failed: %d\n", i, rc); + if (debug) + printf ("FATAL\n"); + continue; + } + + if (debug && rc == IDNA_SUCCESS) + { + printf ("computed out: %s\n", label); + printf ("expected out: %s\n", idna[i].out); + } + else if (debug) + printf ("returned %d expected %d\n", rc, idna[i].toasciirc); + + if (rc == IDNA_SUCCESS) + { + if (strlen (idna[i].out) != strlen (label) || + strcasecmp (idna[i].out, label) != 0) + { + fail ("IDNA entry %d failed\n", i); + if (debug) + printf ("ERROR\n"); + } + else if (debug) + printf ("OK\n"); + } + else if (debug) + printf ("OK\n"); + + if (ucs4label) + free (ucs4label); + + ucs4label = stringprep_utf8_to_ucs4 (idna[i].out, -1, &len); + + if (debug) + { + printf ("in: %s (%d==%d)\n", idna[i].out, strlen (idna[i].out), + len); + ucs4print (ucs4label, len); + } + + len2 = sizeof (tmp) / sizeof (tmp[0]); + rc = idna_to_unicode_44i (ucs4label, len, tmp, &len2, idna[i].flags); + if (debug) + { + printf ("expected out (%d):\n", + rc == IDNA_SUCCESS ? idna[i].inlen : len); + if (rc == IDNA_SUCCESS) + ucs4print (idna[i].in, idna[i].inlen); + else + ucs4print (ucs4label, len); + + printf ("computed out (%d):\n", len2); + ucs4print (tmp, len2); + } + + if (rc != idna[i].tounicoderc) + { + fail ("IDNA entry %d failed: %d\n", i, rc); + if (debug) + printf ("FATAL\n"); + continue; + } + + if ((rc == IDNA_SUCCESS && (len2 != idna[i].inlen || + memcmp (idna[i].in, tmp, len2) != 0)) || + (rc != IDNA_SUCCESS && (len2 != len || + memcmp (ucs4label, tmp, len) != 0))) + { + if (debug) + { + if (rc == IDNA_SUCCESS) + printf ("len=%d len2=%d\n", len2, idna[i].inlen); + else + printf ("len=%d len2=%d\n", len, len2); + } + fail ("IDNA entry %d failed\n", i); + if (debug) + printf ("ERROR\n"); + } + else if (debug) + printf ("OK\n\n"); + } + + if (ucs4label) + free (ucs4label); +} ============================================================ --- idna/tst_idna2.c d8f00e5771ad810ddbea785b11806bd4a25f618c +++ idna/tst_idna2.c d8f00e5771ad810ddbea785b11806bd4a25f618c @@ -0,0 +1,523 @@ +/* tst_idna2.c --- Self tests for idna_to_ascii_8z(). + * Copyright (C) 2002, 2003, 2004, 2006, 2007 Simon Josefsson + * + * This file is part of GNU Libidn. + * + * GNU Libidn is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNU Libidn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Libidn; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include + +#include "utils.h" + +struct idna +{ + const char *in; + const char *out; +}; + +static const struct idna idna[] = { + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xc3\xad\x64\x6e", "example.xn--dn-mja" + /* 1-1-1 Has an IDN in just the TLD */ + }, + {"\xc3\xab\x78\x2e\xc3\xad\x64\x6e", "xn--x-ega.xn--dn-mja" + /* 1-1-2 Has an IDN in the TLD and SLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xc3\xa5\xc3\xbe\xc3\xa7", + "example.xn--5cae2e" + /* 1-2-1 Latin-1 TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xc4\x83\x62\xc4\x89", + "example.xn--b-rhat" + /* 1-2-2 Latin Extended A TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xc8\xa7\xc6\x80\xc6\x88", + "example.xn--lhaq98b" + /* 1-2-3 Latin Extended B TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe1\xb8\x81\xe1\xb8\x83\xe1\xb8\x89", + "example.xn--2fges" + /* 1-2-4 Latin Extended Additional TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe4\xb8\xbf\xe4\xba\xba\xe5\xb0\xb8", + "example.xn--xiqplj17a" + /* 1-3-1 Han TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe3\x81\x8b\xe3\x81\x8c\xe3\x81\x8d", + "example.xn--u8jcd" + /* 1-3-2 Hiragana TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe3\x82\xab\xe3\x82\xac\xe3\x82\xad", + "example.xn--lckcd" + /* 1-3-3 Katakana TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe1\x84\x80\xe1\x85\xa1\xe1\x86\xa8", + "example.xn--p39a" + /* 1-3-4 Hangul Jamo TLD */ + /* Don't resolve as example.xn--ypd8qrh */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xea\xb1\xa9\xeb\x93\x86\xec\x80\xba", + "example.xn--o69aq2nl0j" + /* 1-3-5 Hangul TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xea\x80\x8a\xea\x80\xa0\xea\x8a\xb8", + "example.xn--6l7arby7j" + /* 1-3-6 Yi TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xce\xb1\xce\xb2\xce\xb3", + "example.xn--mxacd" + /* 1-3-7 Greek TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe1\xbc\x82\xe1\xbc\xa6\xe1\xbd\x95", + "example.xn--fng7dpg" + /* 1-3-8 Greek Extended TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xd0\xb0\xd0\xb1\xd0\xb2", + "example.xn--80acd" + /* 1-3-9 Cyrillic TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xd5\xa1\xd5\xa2\xd5\xa3", + "example.xn--y9acd" + /* 1-3-10 Armeian TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe1\x83\x90\xe1\x83\x91\xe1\x83\x92", + "example.xn--lodcd" + /* 1-3-11 Georgian TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe2\x88\xa1\xe2\x86\xba\xe2\x8a\x82", + "example.xn--b7gxomk" + /* 1-4-1 Symbols TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xa4\x95\xe0\xa4\x96\xe0\xa4\x97", + "example.xn--11bcd" + /* 1-5-1 Devanagari TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xa6\x95\xe0\xa6\x96\xe0\xa6\x97", + "example.xn--p5bcd" + /* 1-5-2 Bengali TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xa8\x95\xe0\xa8\x96\xe0\xa8\x97", + "example.xn--d9bcd" + /* 1-5-3 Gurmukhi TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xaa\x95\xe0\xaa\x96\xe0\xaa\x97", + "example.xn--0dccd" + /* 1-5-4 Gujarati TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xac\x95\xe0\xac\x96\xe0\xac\x97", + "example.xn--ohccd" + /* 1-5-5 Oriya TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xae\x95\xe0\xae\x99\xe0\xae\x9a", + "example.xn--clcid" + /* 1-5-6 Tamil TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xb0\x95\xe0\xb0\x96\xe0\xb0\x97", + "example.xn--zoccd" + /* 1-5-7 Telugu TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xb2\x95\xe0\xb2\x96\xe0\xb2\x97", + "example.xn--nsccd" + /* 1-5-8 Kannada TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xb4\x95\xe0\xb4\x96\xe0\xb4\x97", + "example.xn--bwccd" + /* 1-5-9 Malayalam TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xb6\x9a\xe0\xb6\x9b\xe0\xb6\x9c", + "example.xn--3zccd" + /* 1-5-10 Sinhala TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xb8\x81\xe0\xb8\x82\xe0\xb8\x83", + "example.xn--12ccd" + /* 1-5-11 Thai TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xba\x81\xe0\xba\x82\xe0\xba\x84", + "example.xn--p6ccg" + /* 1-5-12 Lao TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe0\xbd\x80\xe0\xbd\x81\xe0\xbd\x82", + "example.xn--5cdcd" + /* 1-5-13 Tibetan TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe1\x80\x80\xe1\x80\x81\xe1\x80\x82", + "example.xn--nidcd" + /* 1-5-14 Myanmar TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe1\x9e\x80\xe1\x9e\x81\xe1\x9e\x82", + "example.xn--i2ecd" + /* 1-5-15 Khmer TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xe1\xa0\xa0\xe1\xa0\xa1\xe1\xa0\xa2", + "example.xn--26ecd" + /* 1-5-16 Mongolian TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xd8\xa7\xd8\xa8\xd8\xa9", + "example.xn--mgbcd" + /* 1-6-1 Arabic TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xd7\x90\xd7\x91\xd7\x92", + "example.xn--4dbcd" + /* 1-6-2 Hebrew TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xdc\x90\xdc\x91\xdc\x92", + "example.xn--9mbcd" + /* 1-6-3 Syriac TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\x61\x62\x63\xe3\x82\xab\xe3\x82\xac\xe3\x82\xad", + "example.xn--abc-mj4bfg" + /* 1-7-1 ASCII and non-Latin TLD */ + }, + {"\x65\x78\x61\x6d\x70\x6c\x65\x2e\xc3\xa5\xc3\xbe\xc3\xa7\xe3\x82\xab\xe3\x82\xac\xe3\x82\xad", + "example.xn--5cae2e328wfag" + /* 1-7-2 Latin (non-ASCII) and non-Latin TLD */ + }, + {"\xc3\xad\x21\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-1-1 Includes ! before Nameprep */ + /* Don't resolve as xn--!dn-qma.example */ + }, + {"\xc3\xad\x24\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-1-2 Includes $ before Nameprep */ + /* Don't resolve as xn--$dn-qma.example */ + }, + {"\xc3\xad\x2b\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-1-3 Includes + before Nameprep */ + /* Don't resolve as xn--+dn-qma.example */ + }, + {"\x2d\xc3\xad\x31\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-2-1 Leading hyphen before Nameprep */ + /* Don't resolve as xn---1dn-vpa.example */ + }, + {"\xc3\xad\x31\x64\x6e\x2d\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-2-2 Trailing hyphen before Nameprep */ + /* Don't resolve as xn--1dn--upa.example */ + }, + {"\xc3\xad\xef\xbc\x8b\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-3-1 Gets a + after Nameprep */ + /* Don't resolve as xn--dn-mja0331x.example */ + }, + {"\xc3\xad\xe2\x81\xbc\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-3-2 Gets a = after Nameprep */ + /* Don't resolve as xn--dn-mja0343a.example */ + }, + {"\xef\xb9\xa3\xc3\xad\x32\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-4-1 Leading hyphen after Nameprep */ + /* Don't resolve as xn--2dn-qma32863a.example */ + /* Don't resolve as xn---2dn-vpa.example */ + }, + {"\xc3\xad\x32\x64\x6e\xef\xbc\x8d\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-3-4-2 Trailing hyphen after Nameprep */ + /* Don't resolve as xn--2dn-qma79363a.example */ + /* Don't resolve as xn--2dn--upa.example */ + }, + {"\xc2\xb9\x31\x2e\x65\x78\x61\x6d\x70\x6c\x65", "11.example" + /* 2-4-1 All-ASCII check, Latin */ + }, + {"\xe2\x85\xa5\x76\x69\x2e\x65\x78\x61\x6d\x70\x6c\x65", "vivi.example" + /* 2-4-2 All-ASCII check, symbol */ + }, + {"\xc3\x9f\x73\x73\x2e\x65\x78\x61\x6d\x70\x6c\x65", "ssss.example" + /* 2-4-3 All-ASCII check, sharp S */ + }, + {"\x78\x6e\x2d\x2d\xc3\xaf\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 2-5-1 ACE prefix before Nameprep, body */ + /* Don't resolve as xn--xn--dn-sja.example */ + /* Don't resolve as xn--dn-sja.example */ + }, + {"\xe2\x85\xb9\x6e\x2d\x2d\xc3\xa4\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "" + /* 2-5-2 ACE prefix before Nameprep, prefix */ + /* Don't resolve as xn--xn--dn-uia.example */ + /* Don't resolve as xn--dn-uia.example */ + }, + {"", "" + /* 2-8-1 Zero-length label after Nameprep */ + /* Don't resolve as xn--kba.example */ + /* Don't resolve as xn--.example */ + }, + {"\x33\x30\x30\x32\x2d\x74\x65\x73\x74\xe3\x80\x82\xc3\xad\x64\x6e", + "3002-test.xn--dn-mja" + /* 2-9-1 U+3002 acts as a label separator */ + /* Don't resolve as xn--3002-testdn-wcb2087m.example */ + }, + {"\x66\x66\x30\x65\x2d\x74\x65\x73\x74\xef\xbc\x8e\xc3\xad\x64\x6e", + "ff0e-test.xn--dn-mja" + /* 2-9-2 U+FF0E acts as a label separator */ + /* Don't resolve as xn--ff0e-testdn-wcb45865f.example */ + }, + {"\x66\x66\x36\x31\x2d\x74\x65\x73\x74\xef\xbd\xa1\xc3\xad\x64\x6e", + "ff61-test.xn--dn-mja" + /* 2-9-3 U+FF61 acts as a label separator */ + /* Don't resolve as xn--ff61-testdn-wcb33975f.example */ + }, + {"\x30\x30\x61\x64\x6f\x75\x74\xc2\xad\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--00adoutdn-m5a.example" + /* 4-1-1-1 00adout<00AD><00ED>dn.example -> 00adout<00ED>dn.example */ + /* Don't resolve as xn--00adoutdn-cna81e.example */ + }, + {"\x32\x30\x30\x64\x6f\x75\x74\xe2\x80\x8d\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--200doutdn-m5a.example" + /* 4-1-1-2 200dout<200D><00ED>dn.example -> 200dout<00ED>dn.example */ + /* Don't resolve as xn--200doutdn-m5a1678f.example */ + }, + {"\x73\x69\x6d\x70\x6c\x65\x63\x61\x70\x44\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--simplecapddn-1fb.example" + /* 4-1-2-1 simplecap<0044><00ED>dn.example -> simplecap<0064><00ED>dn.example */ + }, + {"\x6c\x61\x74\x69\x6e\x74\x6f\x67\x72\x65\x65\x6b\xc2\xb5\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--latintogreekdn-cmb716i.example" + /* 4-1-2-2 latintogreek<00B5><00ED>dn.example -> latintogreek<03BC><00ED>dn.example */ + /* Don't resolve as xn--latintogreekdn-cxa01g.example */ + }, + {"\x6c\x61\x74\x69\x6e\x65\x78\x74\xc3\x87\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--latinextdn-v6a6e.example" + /* 4-1-2-3 latinext<00C7><00ED>dn.example -> latinext<00E7><00ED>dn.example */ + /* Don't resolve as xn--latinextdn-twa07b.example */ + }, + {"\x73\x68\x61\x72\x70\x73\xc3\x9f\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--sharpsssdn-r8a.example" + /* 4-1-2-4 sharps<00DF><00ED>dn.example -> sharpsss<00ED>dn.example */ + /* Don't resolve as xn--sharpsdn-vya4l.example */ + }, + {"\x74\x75\x72\x6b\x69\x73\x68\x69\xc4\xb0\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--turkishiidn-wcb701e.example" + /* 4-1-2-5 turkishi<0130><00ED>dn.example -> turkishi<0069><0307><00ED>dn.example */ + /* Don't resolve as xn--turkishidn-r8a71f.example */ + }, + {"\x65\x78\x70\x74\x77\x6f\xc5\x89\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--exptwondn-m5a502c.example" + /* 4-1-2-6 exptwo<0149><00ED>dn.example -> exptwo<02BC><006E><00ED>dn.example */ + /* Don't resolve as xn--exptwodn-h2a33g.example */ + }, + {"\x61\x64\x64\x66\x6f\x6c\x64\xcf\x92\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--addfolddn-m5a121f.example" + /* 4-1-2-7 addfold<03D2><00ED>dn.example -> addfold<03C5><00ED>dn.example */ + /* Don't resolve as xn--addfolddn-m5a462f.example */ + }, + {"\x65\x78\x70\x74\x68\x72\x65\x65\xe1\xbd\x92\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--expthreedn-r8a5844g.example" + /* 4-1-2-8 expthree<1F52><00ED>dn.example -> expthree<03C5><0313><0300><00ED>dn.example */ + }, + {"\x6e\x6f\x6e\x62\x6d\x70\xf0\x90\x90\x80\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--nonbmpdn-h2a34747d.example" + /* 4-1-2-9 nonbmp<10400><00ED>dn.example -> nonbmp<10428><00ED>dn.example */ + /* Don't resolve as xn--nonbmpdn-h2a37046d.example */ + }, + {"\x6e\x6f\x6e\x62\x6d\x70\x74\x6f\x61\x73\x63\x69\x69\xf0\x9d\x90\x80\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--nonbmptoasciiadn-msb.example" + /* 4-1-2-10 nonbmptoascii<1D400><00ED>dn.example -> nonbmptoasciia<00ED>dn.example */ + /* Don't resolve as xn--nonbmptoasciidn-hpb54112i.example */ + }, + {"\x72\x65\x67\x63\x6f\x6d\x62\x65\xcc\x81\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--regcombdn-h4a8b.example" + /* 4-2-1-1 regcomb<0065><0301><00ED>dn.example -> regcomb<00E9><00ED>dn.example */ + /* Don't resolve as xn--regcombedn-r8a794d.example */ + }, + {"\x63\x6f\x6d\x62\x61\x6e\x64\x63\x61\x73\x65\x45\xcc\x81\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--combandcasedn-lhb4d.example" + /* 4-2-1-2 combandcase<0045><0301><00ED>dn.example -> combandcase<00E9><00ED>dn.example */ + /* Don't resolve as xn--combandcaseedn-cmb526f.example */ + }, + {"\x61\x64\x6a\x63\x6f\x6d\x62\xc2\xba\xcc\x81\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--adjcombdn-m5a9d.example" + /* 4-2-1-3 adjcomb<00BA><0301><00ED>dn.example -> adjcomb<00F3><00ED>dn.example */ + /* Don't resolve as xn--adjcombdn-1qa57cp3r.example */ + }, + {"\x65\x78\x74\x63\x6f\x6d\x62\x6f\x63\xcc\x81\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--extcombodn-r8a52a.example" + /* 4-2-1-4 extcombo<0063><0301><00ED>dn.example -> extcombo<0107><00ED>dn.example */ + /* Don't resolve as xn--extcombocdn-wcb920e.example */ + }, + {"\x64\x6f\x75\x62\x6c\x65\x64\x69\x61\x63\x31\x75\xcc\x88\xcc\x81\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--doublediac1dn-6ib836a.example" + /* 4-2-1-5 doublediac1<0075><0308><0301><00ED>dn.example -> doublediac2<01D8><00ED>dn.example */ + /* Don't resolve as xn--doublediac1udn-cmb526fnd.example */ + }, + {"\x64\x6f\x75\x62\x6c\x65\x64\x69\x61\x63\x32\x75\xcc\x81\xcc\x88\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--doublediac2dn-6ib8qs73a.example" + /* 4-2-1-6 doublediac2<0075><0301><0308><00ED>dn.example -> doublediac2<01D8><00ED>dn.example */ + /* Don't resolve as xn--doublediac2udn-cmb526fod.example */ + }, + {"\x6e\x65\x77\x6e\x6f\x72\x6d\xf0\xaf\xa1\xb4\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--newnormdn-m5a7856x.example" + /* 4-2-2-1 newnorm<2F874><00ED>dn.example -> newnorm<5F33><00ED>dn.example should not become <5F53> */ + /* Don't resolve as xn--newnormdn-m5a9396x.example */ + /* Don't resolve as xn--newnormdn-m5a9968x.example */ + }, + {"\xe2\x80\x80\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-1 Spacing */ + /* Don't resolve as xn--dn-mja3392a.example */ + }, + {"\xdb\x9d\xc3\xad\x64\x6e\x2d\x32\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-2 Control */ + /* Don't resolve as xn--dn-2-upa332g.example */ + }, + {"\xee\x80\x85\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-3 Private use */ + /* Don't resolve as xn--dn-mja1659t.example */ + }, + {"\xf3\xb0\x80\x85\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-4 Private use, non-BMP */ + /* Don't resolve as xn--dn-mja7922x.example */ + }, + {"\xef\xb7\x9d\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-5 Non-character */ + /* Don't resolve as xn--dn-mja1210x.example */ + }, + {"\xf0\x9f\xbf\xbe\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-6 Non-character, non-BMP */ + /* Don't resolve as xn--dn-mja7922x.example */ + }, + {"\xef\xbf\xbd\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-7 Surrogate points */ + /* Don't resolve as xn--dn-mja7922x.example */ + }, + {"\xef\xbf\xba\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-8 Inappropriate for plain */ + /* Don't resolve as xn--dn-mja5822x.example */ + }, + {"\xe2\xbf\xb5\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-9 Inappropriate for canonical */ + /* Don't resolve as xn--dn-mja3729b.example */ + }, + {"\xe2\x81\xaa\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-10 Change display simple */ + /* Don't resolve as xn--dn-mja7533a.example */ + }, + {"\xe2\x80\x8f\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-3-11 Change display RTL */ + /* Don't resolve as xn--dn-mja3992a.example */ + }, + {"\xf3\xa0\x80\x81\xf3\xa0\x81\x85\xf3\xa0\x81\x8e\x68\x69\x69\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "" + /* 4-3-12 Language tags */ + /* Don't resolve as xn--hiidn-km43aaa.example */ + }, + {"\xd8\xa8\x6f\xd8\xb8\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-4-1 Arabic RandALCat-LCat-RandALCat */ + /* Don't resolve as xn--o-0mc3c.example */ + }, + {"\xd8\xa8\xd8\xb8\x6f\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-4-2 Arabic RandALCat-RandALCat-other */ + /* Don't resolve as xn--o-0mc2c.example */ + }, + {"\x6f\xd8\xa8\xd8\xb8\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-4-3 Arabic other-RandALCat-RandALCat */ + /* Don't resolve as xn--o-1mc2c.example */ + }, + {"\xd7\x91\x6f\xd7\xa1\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-4-4 Hebrew RandALCat-LCat-RandALCat */ + /* Don't resolve as xn--o-1hc3c.example */ + }, + {"\xd7\x91\xd7\xa1\x6f\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-4-5 Hebrew RandALCat-RandALCat-other */ + /* Don't resolve as xn--o-1hc2c.example */ + }, + {"\x6f\xd7\x91\xd7\xa1\x2e\x65\x78\x61\x6d\x70\x6c\x65", "" + /* 4-4-6 Hebrew other-RandALCat-RandALCat */ + /* Don't resolve as xn--o-2hc2c.example */ + }, + {"\xc8\xb7\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--dn-mja33k.example" + /* 5-1-1 Unassigned in BMP; zone editors should reject */ + }, + {"\xf0\x90\x88\x85\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--dn-mja7734x.example" + /* 5-1-2 Unassinged outside BMP; zone editors should reject */ + /* Don't resolve as xn--dn-mja7922x.example */ + }, + {"\xc8\xb4\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--dn-mja12k.example" + /* 5-2-1 Newly assigned in BMP; zone editors should reject */ + }, + {"\xf0\x90\x80\x85\xc3\xad\x64\x6e\x2e\x65\x78\x61\x6d\x70\x6c\x65", + "xn--dn-mja9232x.example" + /* 5-2-2 Newly assigned outside of BMP; zone editors should reject */ + /* Don't resolve as xn--dn-mja7922x.example */ + } +}; + +void +doit (void) +{ + size_t i; + char *out; + int rc; + + for (i = 0; i < sizeof (idna) / sizeof (idna[0]); i++) + { + if (debug) + printf ("IDNA2 entry %d\n", i); + + if (debug) + { + uint32_t *p; + size_t len; + printf ("in: %s\n", idna[i].in); + hexprint (idna[i].in, strlen (idna[i].in)); + escapeprint (idna[i].in, strlen (idna[i].in)); + p = stringprep_utf8_to_ucs4 (idna[i].in, -1, &len); + ucs4print (p, len); + free (p); + } + + rc = idna_to_ascii_8z (idna[i].in, &out, + IDNA_ALLOW_UNASSIGNED | + IDNA_USE_STD3_ASCII_RULES); + if (rc != IDNA_SUCCESS && strlen (idna[i].out) > 0) + { + fail ("IDNA2 entry %d failed: %d\n", i, rc); + continue; + } + + if (debug && rc == IDNA_SUCCESS) + { + printf ("computed out: %s\n", out); + printf ("expected out: %s\n", idna[i].out); + } + else if (debug) + printf ("returned %d\n", rc); + + if (rc == IDNA_SUCCESS) + { + if (strlen (idna[i].out) != strlen (out) || + strcasecmp (idna[i].out, out) != 0) + { + fail ("IDNA2 entry %d failed\n", i); + if (debug) + printf ("ERROR\n"); + } + else if (debug) + printf ("OK\n"); + + free (out); + out = NULL; + } + else if (rc != IDNA_SUCCESS && strlen (idna[i].out) == 0 && debug) + printf ("OK (fail)\n"); + else if (debug) + printf ("OK\n"); + } +} ============================================================ --- idna/tst_nfkc.c b86e2c9e6662a3134eab70c77c1ab4632372aaba +++ idna/tst_nfkc.c b86e2c9e6662a3134eab70c77c1ab4632372aaba @@ -0,0 +1,126 @@ +/* tst_nfkc.c --- Self tests for stringprep_utf8_nfkc_normalize(). + * Copyright (C) 2002, 2003, 2004, 2006, 2006, 2007 Simon Josefsson + * + * This file is part of GNU Libidn. + * + * GNU Libidn is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNU Libidn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Libidn; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include + +#include "utils.h" + +struct nfkc +{ + const char *in; + const char *out; +}; + +static struct nfkc nfkc[] = { + {"\xC2\xB5", "\xCE\xBC"}, + {"\xC2\xAA", "\x61"}, + /* From . Note that we + * compute the output according to Unicode 3.2 without the proposed + * update. + * + * 1. + * + * U+1100 (ᄀ) HANGUL CHOSEONG KIYEOK + + * U+0300 (◌̀) COMBINING GRAVE ACCENT + + * U+1161 (ᅡ) HANGUL JUNGSEONG A + * + * According to the old language, the NFC form of this would be B: + * + * 2. + * + * U+AC00 (가) HANGUL SYLLABLE GA + + * U+0300 (◌̀) COMBINING GRAVE ACCENT + */ + {"\xE1\x84\x80\xCC\x80\xE1\x85\xA1", "\xEA\xB0\x80\xCC\x80"}, + /* Second test case from page. Again, we do not implement the + * updated proposal. -> U+0B4B U+0300 + */ + {"\xE0\xAD\x87\xCC\x80\xE0\xAC\xBE", "\xE0\xAD\x8b\xCC\x80"} +}; + +void +doit (void) +{ + char *out; + size_t i; + + for (i = 0; i < sizeof (nfkc) / sizeof (nfkc[0]); i++) + { + if (debug) + printf ("NFKC entry %d\n", i); + + out = stringprep_utf8_nfkc_normalize (nfkc[i].in, strlen (nfkc[i].in)); + if (out == NULL) + { + fail ("NFKC entry %d failed fatally\n", i); + continue; + } + + if (debug) + { + uint32_t *t; + size_t len; + + printf ("in:\n"); + escapeprint (nfkc[i].in, strlen (nfkc[i].in)); + hexprint (nfkc[i].in, strlen (nfkc[i].in)); + binprint (nfkc[i].in, strlen (nfkc[i].in)); + + + printf ("out:\n"); + escapeprint (out, strlen (out)); + hexprint (out, strlen (out)); + binprint (out, strlen (out)); + t = stringprep_utf8_to_ucs4 (out, -1, &len); + if (t) + { + ucs4print (t, len); + free (t); + } + + printf ("expected out:\n"); + escapeprint (nfkc[i].out, strlen (nfkc[i].out)); + hexprint (nfkc[i].out, strlen (nfkc[i].out)); + binprint (nfkc[i].out, strlen (nfkc[i].out)); + } + + if (strlen (nfkc[i].out) != strlen (out) || + memcmp (nfkc[i].out, out, strlen (out)) != 0) + { + fail ("NFKC entry %d failed\n", i); + if (debug) + printf ("ERROR\n"); + } + else if (debug) + printf ("OK\n"); + + free (out); + } +} ============================================================ --- idna/tst_punycode.c 345956cee9cb31f850f347fbafdcba825bd3165e +++ idna/tst_punycode.c 345956cee9cb31f850f347fbafdcba825bd3165e @@ -0,0 +1,279 @@ +/* tst_punycode.c --- Self tests for punycode. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson + * + * This file is part of GNU Libidn. + * + * GNU Libidn is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNU Libidn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Libidn; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include + +#include "utils.h" + +struct punycode +{ + const char *name; + size_t inlen; + uint32_t in[100]; + const char *out; + int rc; +}; + +const struct punycode punycode[] = { + { + "(A) Arabic (Egyptian)", 17, + { + 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, + 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, + 0x061F}, "egbpdaj6bu4bxfgehfvwxn", PUNYCODE_SUCCESS}, + { + "(B) Chinese (simplified)", 9, + { + 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, + 0x6587}, "ihqwcrb4cv8a8dqg056pqjye", PUNYCODE_SUCCESS}, + { + "(C) Chinese (traditional)", 9, + { + 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, + 0x6587}, "ihqwctvzc91f659drss3x8bo0yb", PUNYCODE_SUCCESS}, + { + "(D) Czech: Proprostnemluvesky", 22, + { + 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, + 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, + 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079}, + "Proprostnemluvesky-uyb24dma41a", PUNYCODE_SUCCESS}, + { + "(E) Hebrew:", 22, + { + 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, + 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, + 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA}, + "4dbcagdahymbxekheh6e0a7fei0b", PUNYCODE_SUCCESS}, + { + "(F) Hindi (Devanagari):", 30, + { + 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, + 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, + 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, + 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902}, + "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd", PUNYCODE_SUCCESS}, + { + "(G) Japanese (kanji and hiragana):", 18, + { + 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, + 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, + 0x306E, 0x304B}, + "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa", PUNYCODE_SUCCESS}, + { + "(H) Korean (Hangul syllables):", 24, + { + 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, + 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, + 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C}, + "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c", + PUNYCODE_SUCCESS}, + { + "(I) Russian (Cyrillic):", 28, + { + 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, + 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, + 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, + 0x0441, 0x0441, 0x043A, 0x0438}, + "b1abfaaepdrnnbgefbadotcwatmq2g4l", PUNYCODE_SUCCESS}, + { + "(J) Spanish: PorqunopuedensimplementehablarenEspaol", 40, + { + 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, + 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, + 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, + 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, + 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C}, + "PorqunopuedensimplementehablarenEspaol-fmd56a", PUNYCODE_SUCCESS}, + { + "(K) Vietnamese:", 31, + { + 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, + 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, + 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, + 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074}, + "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g", PUNYCODE_SUCCESS}, + { + "(L) 3B", 8, + { + 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F}, + "3B-ww4c5e180e575a65lsy2b", PUNYCODE_SUCCESS}, + { + "(M) -with-SUPER-MONKEYS", 24, + { + 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, + 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, + 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053}, + "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n", PUNYCODE_SUCCESS}, + { + "(N) Hello-Another-Way-", 25, + { + 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, + 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, + 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, + 0x6240}, + "Hello-Another-Way--fc4qua05auwb3674vfr0b", PUNYCODE_SUCCESS}, + { + "(O) 2", 8, + { + 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032}, + "2-u9tlzr9756bt3uc0v", PUNYCODE_SUCCESS}, + { + "(P) MajiKoi5", 13, + { + 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, + 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D}, + "MajiKoi5-783gue6qz075azm5e", PUNYCODE_SUCCESS}, + { + "(Q) de", 9, + { + 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0}, + "de-jg4avhby1noc0d", PUNYCODE_SUCCESS}, + { + "(R) ", 7, + { + 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067}, + "d9juau41awczczp", PUNYCODE_SUCCESS}, + { + "(S) -> $1.00 <-", 11, + { + 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, + 0x0020, 0x003C, 0x002D}, "-> $1.00 <--", PUNYCODE_SUCCESS} +}; + +void +doit (void) +{ + char *p; + uint32_t *q; + int rc; + size_t i, outlen; + + p = malloc (sizeof (*p) * BUFSIZ); + if (p == NULL) + fail ("malloc() returned NULL\n"); + + q = malloc (sizeof (*q) * BUFSIZ); + if (q == NULL) + fail ("malloc() returned NULL\n"); + + for (i = 0; i < sizeof (punycode) / sizeof (punycode[0]); i++) + { + if (debug) + printf ("PUNYCODE entry %d: %s\n", i, punycode[i].name); + + if (debug) + { + printf ("in:\n"); + ucs4print (punycode[i].in, punycode[i].inlen); + } + + outlen = BUFSIZ; + rc = punycode_encode (punycode[i].inlen, punycode[i].in, + NULL, &outlen, p); + if (rc != punycode[i].rc) + { + fail ("punycode_encode() entry %d failed: %d\n", i, rc); + if (debug) + printf ("FATAL\n"); + continue; + } + + if (rc == PUNYCODE_SUCCESS) + p[outlen] = '\0'; + + if (debug && rc == PUNYCODE_SUCCESS) + { + printf ("computed out: %s\n", p); + printf ("expected out: %s\n", punycode[i].out); + } + else if (debug) + printf ("returned %d expected %d\n", rc, punycode[i].rc); + + if (rc == PUNYCODE_SUCCESS) + { + if (strlen (punycode[i].out) != strlen (p) || + memcmp (punycode[i].out, p, strlen (p)) != 0) + { + fail ("punycode() entry %d failed\n", i); + if (debug) + printf ("ERROR\n"); + } + else if (debug) + printf ("OK\n\n"); + } + else if (debug) + printf ("OK\n\n"); + + if (debug) + { + printf ("in: %s\n", punycode[i].out); + } + + outlen = BUFSIZ; + rc = punycode_decode (strlen (punycode[i].out), punycode[i].out, + &outlen, q, NULL); + if (rc != punycode[i].rc) + { + fail ("punycode() entry %d failed: %d\n", i, rc); + if (debug) + printf ("FATAL\n"); + continue; + } + + if (debug && rc == PUNYCODE_SUCCESS) + { + printf ("computed out:\n"); + ucs4print (q, outlen); + printf ("expected out:\n"); + ucs4print (punycode[i].in, punycode[i].inlen); + } + else if (debug) + printf ("returned %d expected %d\n", rc, punycode[i].rc); + + if (rc == PUNYCODE_SUCCESS) + { + if (punycode[i].inlen != outlen || + memcmp (punycode[i].in, q, outlen) != 0) + { + fail ("punycode_decode() entry %d failed\n", i); + if (debug) + printf ("ERROR\n"); + } + else if (debug) + printf ("OK\n\n"); + } + else if (debug) + printf ("OK\n\n"); + } + + free (q); + free (p); +} ============================================================ --- idna/tst_stringprep.c 649476a16fcda276903bd72a6d97101b0b6ad5fb +++ idna/tst_stringprep.c 649476a16fcda276903bd72a6d97101b0b6ad5fb @@ -0,0 +1,307 @@ +/* tst_stringprep.c --- Self tests for stringprep(). + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson + * + * This file is part of GNU Libidn. + * + * GNU Libidn is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNU Libidn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Libidn; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include + +#include "utils.h" + +struct stringprep +{ + const char *comment; + const char *in; + const char *out; + const char *profile; + int flags; + int rc; +}; + +const struct stringprep strprep[] = { + {"Map to nothing", + "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B" + "bar" "\xE2\x80\x8B\xE2\x81\xA0" "baz\xEF\xB8\x80\xEF\xB8\x88" + "\xEF\xB8\x8F\xEF\xBB\xBF", "foobarbaz"}, + {"Case folding ASCII U+0043 U+0041 U+0046 U+0045", "CAFE", "cafe"}, + {"Case folding 8bit U+00DF (german sharp s)", "\xC3\x9F", "ss"}, + {"Case folding U+0130 (turkish capital I with dot)", + "\xC4\xB0", "i\xcc\x87"}, + {"Case folding multibyte U+0143 U+037A", + "\xC5\x83\xCD\xBA", "\xC5\x84 \xCE\xB9"}, + {"Case folding U+2121 U+33C6 U+1D7BB", + "\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB", + "telc\xE2\x88\x95" "kg\xCF\x83"}, + {"Normalization of U+006a U+030c U+00A0 U+00AA", + "\x6A\xCC\x8C\xC2\xA0\xC2\xAA", "\xC7\xB0 a"}, + {"Case folding U+1FB7 and normalization", + "\xE1\xBE\xB7", "\xE1\xBE\xB6\xCE\xB9"}, + {"Self-reverting case folding U+01F0 and normalization", + "\xC7\xB0", "\xC7\xB0"}, + {"Self-reverting case folding U+0390 and normalization", + "\xCE\x90", "\xCE\x90"}, + {"Self-reverting case folding U+03B0 and normalization", + "\xCE\xB0", "\xCE\xB0"}, + {"Self-reverting case folding U+1E96 and normalization", + "\xE1\xBA\x96", "\xE1\xBA\x96"}, + {"Self-reverting case folding U+1F56 and normalization", + "\xE1\xBD\x96", "\xE1\xBD\x96"}, + {"ASCII space character U+0020", "\x20", "\x20"}, + {"Non-ASCII 8bit space character U+00A0", "\xC2\xA0", "\x20"}, + {"Non-ASCII multibyte space character U+1680", + "\xE1\x9A\x80", NULL, "Nameprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Non-ASCII multibyte space character U+2000", "\xE2\x80\x80", "\x20"}, + {"Zero Width Space U+200b", "\xE2\x80\x8b", ""}, + {"Non-ASCII multibyte space character U+3000", "\xE3\x80\x80", "\x20"}, + {"ASCII control characters U+0010 U+007F", "\x10\x7F", "\x10\x7F"}, + {"Non-ASCII 8bit control character U+0085", + "\xC2\x85", NULL, "Nameprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Non-ASCII multibyte control character U+180E", + "\xE1\xA0\x8E", NULL, "Nameprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Zero Width No-Break Space U+FEFF", "\xEF\xBB\xBF", ""}, + {"Non-ASCII control character U+1D175", + "\xF0\x9D\x85\xB5", NULL, "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Plane 0 private use character U+F123", + "\xEF\x84\xA3", NULL, "Nameprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Plane 15 private use character U+F1234", + "\xF3\xB1\x88\xB4", NULL, "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Plane 16 private use character U+10F234", + "\xF4\x8F\x88\xB4", NULL, "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Non-character code point U+8FFFE", + "\xF2\x8F\xBF\xBE", NULL, "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Non-character code point U+10FFFF", + "\xF4\x8F\xBF\xBF", NULL, "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Surrogate code U+DF42", + "\xED\xBD\x82", NULL, "Nameprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Non-plain text character U+FFFD", + "\xEF\xBF\xBD", NULL, "Nameprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Ideographic description character U+2FF5", + "\xE2\xBF\xB5", NULL, "Nameprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Display property character U+0341", "\xCD\x81", "\xCC\x81"}, + {"Left-to-right mark U+200E", + "\xE2\x80\x8E", "\xCC\x81", "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Deprecated U+202A", "\xE2\x80\xAA", "\xCC\x81", "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Language tagging character U+E0001", + "\xF3\xA0\x80\x81", "\xCC\x81", "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Language tagging character U+E0042", + "\xF3\xA0\x81\x82", NULL, "Nameprep", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"Bidi: RandALCat character U+05BE and LCat characters", + "foo\xD6\xBE" "bar", NULL, "Nameprep", 0, + STRINGPREP_BIDI_BOTH_L_AND_RAL}, + {"Bidi: RandALCat character U+FD50 and LCat characters", + "foo\xEF\xB5\x90" "bar", NULL, "Nameprep", 0, + STRINGPREP_BIDI_BOTH_L_AND_RAL}, + {"Bidi: RandALCat character U+FB38 and LCat characters", + "foo\xEF\xB9\xB6" "bar", "foo \xd9\x8e" "bar"}, + {"Bidi: RandALCat without trailing RandALCat U+0627 U+0031", + "\xD8\xA7\x31", NULL, "Nameprep", 0, STRINGPREP_BIDI_LEADTRAIL_NOT_RAL}, + {"Bidi: RandALCat character U+0627 U+0031 U+0628", + "\xD8\xA7\x31\xD8\xA8", "\xD8\xA7\x31\xD8\xA8"}, + {"Unassigned code point U+E0002", + "\xF3\xA0\x80\x82", NULL, "Nameprep", STRINGPREP_NO_UNASSIGNED, + STRINGPREP_CONTAINS_UNASSIGNED}, + {"Larger test (shrinking)", + "X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2" + "\xaa\xce\xb0\xe2\x80\x80", "xssi\xcc\x87" "tel\xc7\xb0 a\xce\xb0 ", + "Nameprep"}, + {"Larger test (expanding)", + "X\xC3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80", + "xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88" + "\xe3\x83\xab" "i\xcc\x87" "tel\x28" "d\x29\xe3\x82\xa2\xe3\x83\x91" + "\xe3\x83\xbc\xe3\x83\x88"}, + {"Test of prohibited ASCII character U+0020", + "\x20", NULL, "Nodeprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Test of NFKC U+00A0 and prohibited character U+0020", + "\xC2\xA0", NULL, "Nodeprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"Case map + normalization", "\xC2\xB5", "\xCE\xBC", "Nameprep"}, + /* The rest are rather non-interesting, but no point in removing + working test cases... */ + {"case_nonfkc", "\xC2\xB5", "\xCE\xBC", "Nameprep", STRINGPREP_NO_NFKC, + STRINGPREP_FLAG_ERROR}, + {"NFKC test", "\xC2\xAA", "\x61", "Nameprep"}, + {"nameprep, exposed a bug in libstringprep 0.0.5", + "\xC2\xAA\x0A", "\x61\x0A"}, + {"unassigned code point U+0221", "\xC8\xA1", "\xC8\xA1", "Nameprep"}, + {"Unassigned code point U+0221", + "\xC8\xA1", NULL, "Nameprep", STRINGPREP_NO_UNASSIGNED, + STRINGPREP_CONTAINS_UNASSIGNED}, + {"Unassigned code point U+0236", "\xC8\xB6", "\xC8\xB6", "Nameprep"}, + {"unassigned code point U+0236", + "\xC8\xB6", NULL, "Nameprep", STRINGPREP_NO_UNASSIGNED, + STRINGPREP_CONTAINS_UNASSIGNED}, + {"bidi both RandALCat and LCat U+0627 U+00AA U+0628", + "\xD8\xA7\xC2\xAA\xD8\xA8", NULL, "Nameprep", 0, + STRINGPREP_BIDI_BOTH_L_AND_RAL}, + /* XMPP */ + {"XMPP node profile prohibited output", + "address@hidden", NULL, "Nodeprep", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"XMPP resource profile on same string should work though", + "address@hidden", "address@hidden", "Resourceprep"}, + /* iSCSI */ + {"iSCSI 1", "Example-Name", "example-name", "iSCSI"}, + {"iSCSI 2", "O+o", NULL, "iSCSI", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"iSCSI 3", "\x01", NULL, "iSCSI", 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"iSCSI 4", "\xE3\x80\x82", NULL, "iSCSI", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"iSCSI 5", "\xE2\xBF\xB5", NULL, "iSCSI", 0, + STRINGPREP_CONTAINS_PROHIBITED}, + {"SASL profile", "Example\xC2\xA0" "Name", "Example Name", "SASLprep"}, + /* SASL trace */ + {"SASL ANONYMOUS plain mechanism", "address@hidden", + "address@hidden", "plain"}, + {"SASLprep 1 old", "x\xC2\xADy", "xy", "SASLprep"}, + {"SASLprep 4 old", "\xE2\x85\xA3", "IV", "SASLprep"}, + /* SASLprep test vectors. */ + {"SASLprep 1 SOFT HYPHEN mapped to nothing", "I\xC2\xADX", "IX", + "SASLprep"}, + {"SASLprep 2 no transformation", "user", "user", "SASLprep"}, + {"SASLprep 3 case preserved, will not match #2", "USER", "USER", + "SASLprep"}, + {"SASLprep 4 output is NFKC, input in ISO 8859-1", "\xC2\xAA", "a", + "SASLprep"}, + {"SASLprep 5 output is NFKC, will match #1", "\xE2\x85\xA8", "IX", + "SASLprep"}, + {"SASLprep 6 Error - prohibited character", "\x07", NULL, "SASLprep", + 0, STRINGPREP_CONTAINS_PROHIBITED}, + {"SASLprep 7 Error - bidirectional check", "\xD8\xA7" "1", NULL, "SASLprep", + 0, STRINGPREP_BIDI_LEADTRAIL_NOT_RAL} +}; + +void +doit (void) +{ + char *p; + int rc; + size_t i; + + if (!stringprep_check_version (STRINGPREP_VERSION)) + fail ("stringprep_check_version() failed\n"); + + for (i = 0; i < sizeof (strprep) / sizeof (strprep[0]); i++) + { + if (debug) + printf ("STRINGPREP entry %d\n", i); + + if (debug) + { + printf ("flags: %d\n", strprep[i].flags); + + printf ("in: "); + escapeprint (strprep[i].in, strlen (strprep[i].in)); + hexprint (strprep[i].in, strlen (strprep[i].in)); + binprint (strprep[i].in, strlen (strprep[i].in)); + } + + { + uint32_t *l; + char *x; + l = stringprep_utf8_to_ucs4 (strprep[i].in, -1, NULL); + x = stringprep_ucs4_to_utf8 (l, -1, NULL, NULL); + free (l); + + if (strcmp (strprep[i].in, x) != 0) + { + fail ("bad UTF-8 in entry %d\n", i); + if (debug) + { + puts ("expected:"); + escapeprint (strprep[i].in, strlen (strprep[i].in)); + hexprint (strprep[i].in, strlen (strprep[i].in)); + puts ("computed:"); + escapeprint (x, strlen (x)); + hexprint (x, strlen (x)); + } + } + + free (x); + } + rc = stringprep_profile (strprep[i].in, &p, + strprep[i].profile ? + strprep[i].profile : + "Nameprep", strprep[i].flags); + if (rc != strprep[i].rc) + { + fail ("stringprep() entry %d failed: %d\n", i, rc); + if (debug) + printf ("FATAL\n"); + if (rc == STRINGPREP_OK) + free (p); + continue; + } + + if (debug && rc == STRINGPREP_OK) + { + printf ("out: "); + escapeprint (p, strlen (p)); + hexprint (p, strlen (p)); + binprint (p, strlen (p)); + + printf ("expected out: "); + escapeprint (strprep[i].out, strlen (strprep[i].out)); + hexprint (strprep[i].out, strlen (strprep[i].out)); + binprint (strprep[i].out, strlen (strprep[i].out)); + } + else if (debug) + printf ("returned %d expected %d\n", rc, strprep[i].rc); + + if (rc == STRINGPREP_OK) + { + if (strlen (strprep[i].out) != strlen (p) || + memcmp (strprep[i].out, p, strlen (p)) != 0) + { + fail ("stringprep() entry %d failed\n", i); + if (debug) + printf ("ERROR\n"); + } + else if (debug) + printf ("OK\n\n"); + + free (p); + } + else if (debug) + printf ("OK\n\n"); + } + +#if 0 + { + char p[20]; + memset (p, 0, 10); + stringprep_unichar_to_utf8 (0x00DF, p); + hexprint (p, strlen (p)); + puts (""); + } +#endif +} ============================================================ --- idna/utils.c e417a5413b26ed1fc36e321db250ea010b571270 +++ idna/utils.c e417a5413b26ed1fc36e321db250ea010b571270 @@ -0,0 +1,153 @@ +/* utils.c --- Self test utilities. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson + * + * This file is part of GNU Libidn. + * + * GNU Libidn is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNU Libidn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Libidn; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +#include "utils.h" + +int debug = 0; +int error_count = 0; +int break_on_error = 0; + +void +fail (const char *format, ...) +{ + va_list arg_ptr; + + va_start (arg_ptr, format); + vfprintf (stderr, format, arg_ptr); + va_end (arg_ptr); + error_count++; + if (break_on_error) + exit (1); +} + +void +escapeprint (const char *str, size_t len) +{ + size_t i; + + printf (" (length %d bytes):\n\t", len); + for (i = 0; i < len; i++) + { + if (((str[i] & 0xFF) >= 'A' && (str[i] & 0xFF) <= 'Z') || + ((str[i] & 0xFF) >= 'a' && (str[i] & 0xFF) <= 'z') || + ((str[i] & 0xFF) >= '0' && (str[i] & 0xFF) <= '9') + || (str[i] & 0xFF) == ' ' || (str[i] & 0xFF) == '.') + printf ("%c", (str[i] & 0xFF)); + else + printf ("\\x%02X", (str[i] & 0xFF)); + if ((i + 1) % 16 == 0 && (i + 1) < len) + printf ("'\n\t'"); + } + printf ("\n"); +} + +void +hexprint (const char *str, size_t len) +{ + size_t i; + + printf ("\t;; "); + for (i = 0; i < len; i++) + { + printf ("%02x ", (str[i] & 0xFF)); + if ((i + 1) % 8 == 0) + printf (" "); + if ((i + 1) % 16 == 0 && i + 1 < len) + printf ("\n\t;; "); + } + printf ("\n"); +} + +void +binprint (const char *str, size_t len) +{ + size_t i; + + printf ("\t;; "); + for (i = 0; i < len; i++) + { + printf ("%d%d%d%d%d%d%d%d ", + (str[i] & 0xFF) & 0x80 ? 1 : 0, + (str[i] & 0xFF) & 0x40 ? 1 : 0, + (str[i] & 0xFF) & 0x20 ? 1 : 0, + (str[i] & 0xFF) & 0x10 ? 1 : 0, + (str[i] & 0xFF) & 0x08 ? 1 : 0, + (str[i] & 0xFF) & 0x04 ? 1 : 0, + (str[i] & 0xFF) & 0x02 ? 1 : 0, (str[i] & 0xFF) & 0x01 ? 1 : 0); + if ((i + 1) % 3 == 0) + printf (" "); + if ((i + 1) % 6 == 0 && i + 1 < len) + printf ("\n\t;; "); + } + printf ("\n"); +} + +void +ucs4print (const uint32_t * str, size_t len) +{ + size_t i; + + printf ("\t;; "); + for (i = 0; i < len; i++) + { + printf ("U+%04x ", str[i]); + if ((i + 1) % 4 == 0) + printf (" "); + if ((i + 1) % 8 == 0 && i + 1 < len) + printf ("\n\t;; "); + } + puts (""); +} + +int +main (int argc, char *argv[]) +{ + do + if (strcmp (argv[argc - 1], "-v") == 0 || + strcmp (argv[argc - 1], "--verbose") == 0) + debug = 1; + else if (strcmp (argv[argc - 1], "-b") == 0 || + strcmp (argv[argc - 1], "--break-on-error") == 0) + break_on_error = 1; + else if (strcmp (argv[argc - 1], "-h") == 0 || + strcmp (argv[argc - 1], "-?") == 0 || + strcmp (argv[argc - 1], "--help") == 0) + { + printf ("Usage: %s [-vbh?] [--verbose] [--break-on-error] [--help]\n", + argv[0]); + return 1; + } + while (argc-- > 1); + + doit (); + + if (debug) + printf ("Self tests done with %d errors\n", error_count); + + return error_count ? 1 : 0; +} ============================================================ --- idna/utils.h 1af81a5334b6ec8ddb624fad1304b494817aeb8e +++ idna/utils.h 1af81a5334b6ec8ddb624fad1304b494817aeb8e @@ -0,0 +1,42 @@ +/* utils.h --- Prototypes for self test utilities. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson + * + * This file is part of GNU Libidn. + * + * GNU Libidn is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNU Libidn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Libidn; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +#ifndef UTILS_H +# define UTILS_H + +# include +# include +# include + +extern int debug; +extern int error_count; +extern int break_on_error; + +extern void fail (const char *format, ...); +extern void escapeprint (const char *str, size_t len); +extern void hexprint (const char *str, size_t len); +extern void binprint (const char *str, size_t len); +extern void ucs4print (const uint32_t * str, size_t len); + +/* This must be implemented elsewhere. */ +extern void doit (void); + +#endif /* UTILS_H */ ============================================================ --- .mtn-ignore e97ced7acc9976e11bf1986d1613fa9be9704fad +++ .mtn-ignore b05398492019242d50965fcbffbef791afdb157c @@ -35,21 +35,22 @@ ^m4/stdint_h\.m4$ ^m4/uintmax_t\.m4$ ^m4/ulonglong\.m4$ -^monotone/ABOUT-NLS$ +^ABOUT-NLS$ +^po/boldquot\.sed$ +^po/address@hidden +^po/address@hidden +^po/insert-header\.sin$ +^po/Makefile(\.in)*$ +^po/monotone\.pot$ +^po/POTFILES$ +^po/quot\.sed$ +^po/Rules-quot$ +^idna/idn-int.h ^monotone/html ^monotone/lua_tests\.status$ ^monotone/monotone\.(cps?|fn|ky|pg|tp|vr|toc|pdf|ps|dvi|log|info(-[0-9]+)?)$ ^monotone/mtn$ ^monotone/package_(full_)?revision(\.c|\.txt|_(dist|raw)\.txt)$ -^monotone/po/boldquot\.sed$ -^monotone/po/address@hidden -^monotone/po/address@hidden -^monotone/po/insert-header\.sin$ -^monotone/po/Makefile(\.in)*$ -^monotone/po/monotone\.pot$ -^monotone/po/POTFILES$ -^monotone/po/quot\.sed$ -^monotone/po/Rules-quot$ ^monotone/run_lua_tests$ ^monotone/run_tester_tests$ ^monotone/run_unit_tests$ ============================================================ --- Makefile.in f8194cdd2dd5414bb18916aab2589fce1b8e9b27 +++ Makefile.in ae54acc8e7695a62239ff945848163d6a1d9cd84 @@ -34,13 +34,22 @@ default: all # Autoconf substitution variables - we don't need to know most of the # standard ones. -CONFIGARGS := @CONFIGARGS@ -LIBRARIES := @LIBRARIES@ -srcdir := @srcdir@ -subsrcdir := @subsrcdir@ -blddir := $(shell pwd) -subdirs := $(LIBRARIES) monotone +CONFIGARGS := @CONFIGARGS@ +LIBRARIES := @LIBRARIES@ +srcdir := @srcdir@ +subsrcdir := @subsrcdir@ +blddir := $(shell pwd) +subdirs := $(LIBRARIES) monotone + +# We want to pass different options to the subdirectory configures +# depending on what they are. LIBSWITCHES lists the patterns +# acceptable to pass to the library directories. Everything in +# CONFIGARGS is single-quoted by the top level configure. + +LIBSWITCHES := '--enable%' '--disable%' '--with%' '-q' '-C' '--config-cache%' +LIBCONFIGARGS := $(filter $(LIBSWITCHES),$(CONFIGARGS)) + # Most of the "standard targets for users" only recurse into the # monotone subdirectory. cmdtgts := all install install-exec install-strip uninstall ============================================================ --- configure.ac a7a508ca8f53f63f6985ca3545bfae070f8270d1 +++ configure.ac b3ab0f0feb7b12f9e1b9846a05cdea4583d4c31a @@ -26,6 +26,11 @@ AC_SUBST(CONFIGARGS) CONFIGARGS="$ac_configure_args" AC_SUBST(CONFIGARGS) +# Gettext is invoked here, and the po directory lives at top level, +# primarily because autopoint is a horrid monster. +AM_GNU_GETTEXT([external]) +AM_GNU_GETTEXT_VERSION([0.11.5]) + # --with-system-foo support goes here LIBRARIES="botan idna lua netxx pcre sqlite" AC_SUBST(LIBRARIES) ============================================================ --- idna/Makefile.am 24965db46035cf019f0e553dd5e75e1b5341310f +++ idna/Makefile.am 387be0754ed112345b2ebf410243f306bb452d66 @@ -1,6 +1,87 @@ -lib_LIBRARIES = libidna.a -include_HEADERS = idna.h punycode.h stringprep.h -libidna_a_SOURCES = \ - gunibreak.h gunicomp.h gunidecomp.h idna.c idn-int.h nfkc.c \ - profiles.c punycode.c rfc3454.c stringprep.c toutf8.c \ - version.c +## Process this file with automake to produce Makefile.in +# Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson. +# Copyright (C) 2004 Free Software Foundation, Inc. +# +# This file is part of GNU Libidn. +# +# GNU Libidn is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# GNU Libidn is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GNU Libidn; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +# MONOTONE: A bunch of undesirable stuff has been pruned from this +# MONOTONE: Makefile. Also, the required pieces of gnulib have been +# MONOTONE: incorporated into this makefile rather than having a gl +# MONOTONE: subdirectory, and the method for creating idn-int.h is +# MONOTONE: very different. + +DEFS = -DLIBIDN_BUILDING @DEFS@ + +lib_LIBRARIES = libidn.a +idn_int = idn-int.h + +BUILT_SOURCES = $(idn_int) +DISTCLEANFILES = $(idn_int) +include_HEADERS = stringprep.h idna.h punycode.h +nodist_include_HEADERS = $(idn_int) + +idn-int.h: config.h + inttypes_h=$$(grep -q '^#define HAVE_INTTYPES_H' config.h; echo $$?);\ + stdint_h=$$(grep -q '^#define HAVE_STDINT_H' config.h; echo $$?);\ + uint32_t=$$(grep -q '^#define uint32_t' config.h; echo $$?);\ + uint16_t=$$(grep -q '^#define uint16_t' config.h; echo $$?);\ + if [ $$uint32_t -ne 0 ] || [ $$uint16_t -ne 0 ];\ + then if [ $$stdint_h -eq 0 ];\ + then echo '#include ' > idn-int.h.T;\ + elif [ $$inttypes_h -eq 0 ];\ + then echo '#include ' > idn-int.h.T;\ + else echo 'Missing inttypes.h and stdint.h' >&2; exit 1;\ + fi;\ + fi;\ + if [ $$uint32_t -eq 0 ] || [ $$uint16_t -eq 0 ];\ + then grep '^#define uint[0-9]*_t' config.h >> idn-int.h.T;\ + fi;\ + mv -f idn-int.h.T idn-int.h + +libidn_a_SOURCES = gunibreak.h gunicomp.h gunidecomp.h \ + nfkc.c toutf8.c version.c \ + stringprep.h.in stringprep.c rfc3454.c profiles.c \ + punycode.h punycode.c \ + idna.h idna.c \ + striconv.h striconv.c strverscmp.h strverscmp.c \ + c-strcase.h c-strcasecmp.c c-ctype.c c-ctype.h + +# This is a subset of the tests originally in a separate directory +# of the libidn distribution. tst_pr29, tst_strerror, and tst_tld +# have been excluded, since the corresponding code has been excluded +# from the library proper. + +TESTS = tst_stringprep$(EXEEXT) tst_punycode$(EXEEXT) \ + tst_idna$(EXEEXT) tst_idna2$(EXEEXT) tst_nfkc$(EXEEXT) +check_PROGRAMS = $(TESTS) + +# You might think there was a more elegant way to do this, but no. +tst_stringprep_SOURCES = tst_stringprep.c utils.c utils.h +tst_punycode_SOURCES = tst_punycode.c utils.c utils.h +tst_idna_SOURCES = tst_idna.c utils.c utils.h +tst_idna2_SOURCES = tst_idna2.c utils.c utils.h +tst_nfkc_SOURCES = tst_nfkc.c utils.c utils.h + +tst_stringprep_LDADD = libidn.a +tst_punycode_LDADD = libidn.a +tst_idna_LDADD = libidn.a +tst_idna2_LDADD = libidn.a +tst_nfkc_LDADD = libidn.a + +# This is necessary because autoreconf doesn't pay attention to +# AC_CONFIG_MACRO_DIR. +ACLOCAL_AMFLAGS = -I ../m4 ============================================================ --- idna/configure.ac 2a97033aff497ff3e8178b396974517b49d33bfe +++ idna/configure.ac b7dc4011614352b420eae67736131f05cd005447 @@ -9,23 +9,26 @@ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR # PURPOSE. -# This file will be replaced by the actual third-party library -# configuration script for all directories where that is practical. -# (It may not be practical for dead-upstream or heavily modified -# third-party libraries, e.g. netxx and idna.) It makes no attempt to -# do anything beyond the bare minimum, just like the associated -# Makefile.am. +# This is a drastically trimmed down version of the configure.ac from +# libidn 1.4, with additional tweakage for monotone's purposes. AC_PREREQ(2.58) -AC_INIT(idna, XX, address@hidden) +AC_INIT([libidn (monotone)], 1.4, address@hidden) AC_CONFIG_AUX_DIR([..]) AC_CONFIG_MACRO_DIR([../m4]) AM_INIT_AUTOMAKE([1.7.1 foreign no-dist no-define]) AC_CONFIG_SRCDIR([idna.h]) -AC_CONFIG_FILES([Makefile]) +AC_CONFIG_FILES([Makefile stringprep.h]) +AC_CONFIG_HEADERS([config.h]) AC_PROG_CC AC_PROG_RANLIB +AM_ICONV +AM_LANGINFO_CODESET + +AC_TYPE_UINT16_T +AC_TYPE_UINT32_T + AC_OUTPUT ============================================================ --- idna/gunicomp.h d15e88c0a8d456b85f0ac4013211e4b10bcb4bd1 +++ idna/gunicomp.h b1f1e58f6caf864a628c5b31540ca45b2cfb7f69 @@ -1,3 +1,6 @@ +/* This file is automatically generated. DO NOT EDIT! + Instead, edit gen-unicode-tables.pl and re-run. */ + #define COMPOSE_FIRST_START 1 #define COMPOSE_FIRST_SINGLE_START 147 #define COMPOSE_SECOND_START 357 ============================================================ --- idna/gunidecomp.h 7fd72b1ba528a9db3a662664104cdf3b452189d3 +++ idna/gunidecomp.h c600d98e705a928c7e805099514ebc66a9d0c054 @@ -1,4 +1,5 @@ -/* This file is automatically generated. DO NOT EDIT! */ +/* This file is automatically generated. DO NOT EDIT! + Instead, edit gen-unicode-tables.pl and re-run. */ #ifndef DECOMP_H #define DECOMP_H ============================================================ --- idna/idna.c 2a57a22e3ea331d29cc940a39e2e2784b1249316 +++ idna/idna.c da31a73e63391a4fbcf069c66dbbab8340009c67 @@ -1,5 +1,5 @@ -/* idna.c Convert to or from IDN strings. - * Copyright (C) 2002, 2003 Simon Josefsson +/* idna.c --- Convert to or from IDN strings. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,37 +15,34 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ -#if HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include -#include "idna/stringprep.h" -#include "idna/punycode.h" +#include +#include -#include "idna/idna.h" +#include "idna.h" -#ifdef _MSC_VER -#define strcasecmp(a,b) _stricmp(a,b) -#endif - #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \ (c) == 0xFF0E || (c) == 0xFF61) /* Core functions */ /** - * idna_to_ascii_4i + * idna_to_ascii_4i - convert Unicode domain name label to text * @in: input array with unicode code points. * @inlen: length of input array with unicode code points. * @out: output zero terminated string that must have room for at * least 63 characters plus the terminating zero. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * The ToASCII operation takes a sequence of Unicode code points that make * up one label and transforms it into a sequence of code points in the @@ -67,7 +64,7 @@ * operation multiple times has exactly the same effect as applying it just * once. * - * Return value: Returns 0 on success, or an error code. + * Return value: Returns 0 on success, or an #Idna_rc error code. */ int idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags) @@ -119,10 +116,16 @@ idna_to_ascii_4i (const uint32_t * in, s len = strlen (p); do { + char *newp; + len = 2 * len + 10; /* XXX better guess? */ - p = realloc (p, len); - if (p == NULL) - return IDNA_MALLOC_ERROR; + newp = realloc (p, len); + if (newp == NULL) + { + free (p); + return IDNA_MALLOC_ERROR; + } + p = newp; if (flags & IDNA_ALLOW_UNASSIGNED) rc = stringprep_nameprep (p, len); @@ -244,7 +247,7 @@ step3: /* * 8. Verify that the number of code points is in the range 1 to 63 - * inclusive. + * inclusive (0 is excluded). */ step8: @@ -255,7 +258,7 @@ step8: return IDNA_SUCCESS; } -/* ToUnicode(). May realloc() utf8in. */ +/* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */ static int idna_to_unicode_internal (char *utf8in, uint32_t * out, size_t * outlen, int flags) @@ -266,8 +269,10 @@ idna_to_unicode_internal (char *utf8in, size_t addlen = 0; /* - * 1. If all code points in the sequence are in the ASCII range (0..7F) - * then skip to step 3. + * ToUnicode consists of the following steps: + * + * 1. If the sequence contains any code points outside the ASCII range + * (0..7F) then proceed to step 2, otherwise skip to step 3. */ { @@ -290,9 +295,13 @@ idna_to_unicode_internal (char *utf8in, */ do { - utf8in = realloc (utf8in, utf8len + addlen); - if (!utf8in) - return IDNA_MALLOC_ERROR; + char *newp = realloc (utf8in, utf8len + addlen); + if (newp == NULL) + { + free (utf8in); + return IDNA_MALLOC_ERROR; + } + utf8in = newp; if (flags & IDNA_ALLOW_UNASSIGNED) rc = stringprep_nameprep (utf8in, utf8len + addlen); else @@ -302,7 +311,10 @@ idna_to_unicode_internal (char *utf8in, while (rc == STRINGPREP_TOO_SMALL_BUFFER); if (rc != STRINGPREP_OK) - return IDNA_STRINGPREP_ERROR; + { + free (utf8in); + return IDNA_STRINGPREP_ERROR; + } /* 3. Verify that the sequence begins with the ACE prefix, and save a * copy of the sequence. @@ -310,7 +322,10 @@ step3: step3: if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0) - return IDNA_NO_ACE_PREFIX; + { + free (utf8in); + return IDNA_NO_ACE_PREFIX; + } /* 4. Remove the ACE prefix. */ @@ -327,7 +342,10 @@ step3: rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL); if (rc != PUNYCODE_SUCCESS) - return IDNA_PUNYCODE_ERROR; + { + free (utf8in); + return IDNA_PUNYCODE_ERROR; + } out[*outlen] = 0; /* add zero */ @@ -336,29 +354,37 @@ step3: rc = idna_to_ascii_4i (out, *outlen, tmpout, flags); if (rc != IDNA_SUCCESS) - return rc; + { + free (utf8in); + return rc; + } /* 7. Verify that the result of step 6 matches the saved copy from * step 3, using a case-insensitive ASCII comparison. */ if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0) - return IDNA_ROUNDTRIP_VERIFY_ERROR; + { + free (utf8in); + return IDNA_ROUNDTRIP_VERIFY_ERROR; + } /* 8. Return the saved copy from step 5. */ + free (utf8in); return IDNA_SUCCESS; } /** - * idna_to_unicode_44i + * idna_to_unicode_44i - convert domain name label to Unicode * @in: input array with unicode code points. * @inlen: length of input array with unicode code points. * @out: output array with unicode code points. * @outlen: on input, maximum size of output array with unicode code points, * on exit, actual size of output array with unicode code points. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * The ToUnicode operation takes a sequence of Unicode code points * that make up one label and returns a sequence of Unicode code @@ -369,22 +395,21 @@ step3: * ToUnicode never fails. If any step fails, then the original input * sequence is returned immediately in that step. * - * The ToUnicode output never contains more code points than its - * input. Note that the number of octets needed to represent a - * sequence of code points depends on the particular character - * encoding used. + * The Punycode decoder can never output more code points than it + * inputs, but Nameprep can, and therefore ToUnicode can. Note that + * the number of octets needed to represent a sequence of code points + * depends on the particular character encoding used. * * The inputs to ToUnicode are a sequence of code points, the * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of * ToUnicode is always a sequence of Unicode code points. * - * Return value: Returns error condition, but it must only be used for - * debugging purposes. The output buffer is always - * guaranteed to contain the correct data according to - * the specification (sans malloc induced errors). NB! - * This means that you normally ignore the return code - * from this function, as checking it means breaking the - * standard. + * Return value: Returns #Idna_rc error condition, but it must only be + * used for debugging purposes. The output buffer is always + * guaranteed to contain the correct data according to the + * specification (sans malloc induced errors). NB! This means that + * you normally ignore the return code from this function, as + * checking it means breaking the standard. */ int idna_to_unicode_44i (const uint32_t * in, size_t inlen, @@ -406,7 +431,7 @@ idna_to_unicode_44i (const uint32_t * in *outlen = inlen; } - free (p); + /* p is freed in idna_to_unicode_internal. */ return rc; } @@ -414,16 +439,17 @@ idna_to_unicode_44i (const uint32_t * in /* Wrappers that handle several labels */ /** - * idna_to_ascii_4z: + * idna_to_ascii_4z - convert Unicode domain name label to text * @input: zero terminated input Unicode string. * @output: pointer to newly allocated output string. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * Convert UCS-4 domain name to ASCII string. The domain name may * contain several labels, separated by dots. The output buffer must * be deallocated by the caller. * - * Return value: Returns IDNA_SUCCESS on success, or error code. + * Return value: Returns %IDNA_SUCCESS on success, or error code. **/ int idna_to_ascii_4z (const uint32_t * input, char **output, int flags) @@ -481,9 +507,13 @@ idna_to_ascii_4z (const uint32_t * input if (out) { - out = realloc (out, strlen (out) + 1 + strlen (buf) + 1); - if (!out) - return IDNA_MALLOC_ERROR; + char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1); + if (!newp) + { + free (out); + return IDNA_MALLOC_ERROR; + } + out = newp; strcat (out, "."); strcat (out, buf); } @@ -505,16 +535,17 @@ idna_to_ascii_4z (const uint32_t * input } /** - * idna_to_ascii_8z: + * idna_to_ascii_8z - convert Unicode domain name label to text * @input: zero terminated input UTF-8 string. * @output: pointer to newly allocated output string. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * Convert UTF-8 domain name to ASCII string. The domain name may * contain several labels, separated by dots. The output buffer must * be deallocated by the caller. * - * Return value: Returns IDNA_SUCCESS on success, or error code. + * Return value: Returns %IDNA_SUCCESS on success, or error code. **/ int idna_to_ascii_8z (const char *input, char **output, int flags) @@ -536,16 +567,18 @@ idna_to_ascii_8z (const char *input, cha } /** - * idna_to_ascii_lz: - * @input: zero terminated input UTF-8 string. + * idna_to_ascii_lz - convert Unicode domain name label to text + * @input: zero terminated input string encoded in the current locale's + * character set. * @output: pointer to newly allocated output string. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * Convert domain name in the locale's encoding to ASCII string. The * domain name may contain several labels, separated by dots. The * output buffer must be deallocated by the caller. * - * Return value: Returns IDNA_SUCCESS on success, or error code. + * Return value: Returns %IDNA_SUCCESS on success, or error code. **/ int idna_to_ascii_lz (const char *input, char **output, int flags) @@ -565,17 +598,18 @@ idna_to_ascii_lz (const char *input, cha } /** - * idna_to_unicode_4z4z: + * idna_to_unicode_4z4z - convert domain name label to Unicode * @input: zero-terminated Unicode string. * @output: pointer to newly allocated output Unicode string. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * Convert possibly ACE encoded domain name in UCS-4 format into a * UCS-4 string. The domain name may contain several labels, * separated by dots. The output buffer must be deallocated by the * caller. * - * Return value: Returns IDNA_SUCCESS on success, or error code. + * Return value: Returns %IDNA_SUCCESS on success, or error code. **/ int idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags) @@ -607,9 +641,16 @@ idna_to_unicode_4z4z (const uint32_t * i if (out) { - out = realloc (out, sizeof (out[0]) * (outlen + 1 + buflen + 1)); - if (!out) - return IDNA_MALLOC_ERROR; + uint32_t *newp = realloc (out, + sizeof (out[0]) + * (outlen + 1 + buflen + 1)); + if (!newp) + { + free (buf); + free (out); + return IDNA_MALLOC_ERROR; + } + out = newp; out[outlen++] = 0x002E; /* '.' (full stop) */ memcpy (out + outlen, buf, sizeof (buf[0]) * buflen); outlen += buflen; @@ -633,17 +674,18 @@ idna_to_unicode_4z4z (const uint32_t * i } /** - * idna_to_unicode_8z4z: + * idna_to_unicode_8z4z - convert domain name label to Unicode * @input: zero-terminated UTF-8 string. * @output: pointer to newly allocated output Unicode string. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * Convert possibly ACE encoded domain name in UTF-8 format into a * UCS-4 string. The domain name may contain several labels, * separated by dots. The output buffer must be deallocated by the * caller. * - * Return value: Returns IDNA_SUCCESS on success, or error code. + * Return value: Returns %IDNA_SUCCESS on success, or error code. **/ int idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags) @@ -663,17 +705,18 @@ idna_to_unicode_8z4z (const char *input, } /** - * idna_to_unicode_8z8z: + * idna_to_unicode_8z8z - convert domain name label to Unicode * @input: zero-terminated UTF-8 string. * @output: pointer to newly allocated output UTF-8 string. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * Convert possibly ACE encoded domain name in UTF-8 format into a * UTF-8 string. The domain name may contain several labels, * separated by dots. The output buffer must be deallocated by the * caller. * - * Return value: Returns IDNA_SUCCESS on success, or error code. + * Return value: Returns %IDNA_SUCCESS on success, or error code. **/ int idna_to_unicode_8z8z (const char *input, char **output, int flags) @@ -692,18 +735,19 @@ idna_to_unicode_8z8z (const char *input, } /** - * idna_to_unicode_8zlz: + * idna_to_unicode_8zlz - convert domain name label to Unicode * @input: zero-terminated UTF-8 string. * @output: pointer to newly allocated output string encoded in the * current locale's character set. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * Convert possibly ACE encoded domain name in UTF-8 format into a * string encoded in the current locale's character set. The domain * name may contain several labels, separated by dots. The output * buffer must be deallocated by the caller. * - * Return value: Returns IDNA_SUCCESS on success, or error code. + * Return value: Returns %IDNA_SUCCESS on success, or error code. **/ int idna_to_unicode_8zlz (const char *input, char **output, int flags) @@ -722,19 +766,20 @@ idna_to_unicode_8zlz (const char *input, } /** - * idna_to_unicode_lzlz: + * idna_to_unicode_lzlz - convert domain name label to Unicode * @input: zero-terminated string encoded in the current locale's * character set. * @output: pointer to newly allocated output string encoded in the * current locale's character set. - * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. + * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or + * %IDNA_USE_STD3_ASCII_RULES. * * Convert possibly ACE encoded domain name in the locale's character * set into a string encoded in the current locale's character set. * The domain name may contain several labels, separated by dots. The * output buffer must be deallocated by the caller. * - * Return value: Returns IDNA_SUCCESS on success, or error code. + * Return value: Returns %IDNA_SUCCESS on success, or error code. **/ int idna_to_unicode_lzlz (const char *input, char **output, int flags) @@ -780,6 +825,8 @@ idna_to_unicode_lzlz (const char *input, * @IDNA_ICONV_ERROR: Could not convert string in locale encoding. * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a * fatal error). + * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used + * internally in libc). * * Enumerated return codes of idna_to_ascii_4i(), * idna_to_unicode_44i() functions (and functions derived from those ============================================================ --- idna/idna.h dc4bfad8f440c60a7b219fec34647f9d04e1052c +++ idna/idna.h 7cbbdbc10bfd016a6ca501ac4a8574ab9572f0d3 @@ -1,5 +1,5 @@ -/* idna.h Declarations for IDNA. - * Copyright (C) 2002, 2003 Simon Josefsson +/* idna.h --- Declarations for Internationalized Domain Name in Applications. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,20 +15,20 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ #ifndef _IDNA_H -#define _IDNA_H +# define _IDNA_H -#ifdef __cplusplus +# ifdef __cplusplus extern "C" { -#endif +# endif -#include /* size_t */ -#include "idna/idn-int.h" /* uint32_t */ +# include /* size_t */ +# include /* uint32_t */ /* Error codes. */ typedef enum @@ -37,6 +37,8 @@ extern "C" IDNA_STRINGPREP_ERROR = 1, IDNA_PUNYCODE_ERROR = 2, IDNA_CONTAINS_NON_LDH = 3, + /* Workaround typo in earlier versions. */ + IDNA_CONTAINS_LDH = IDNA_CONTAINS_NON_LDH, IDNA_CONTAINS_MINUS = 4, IDNA_INVALID_LENGTH = 5, IDNA_NO_ACE_PREFIX = 6, @@ -44,7 +46,8 @@ extern "C" IDNA_CONTAINS_ACE_PREFIX = 8, IDNA_ICONV_ERROR = 9, /* Internal errors. */ - IDNA_MALLOC_ERROR = 201 + IDNA_MALLOC_ERROR = 201, + IDNA_DLOPEN_ERROR = 202 } Idna_rc; /* IDNA flags */ @@ -54,10 +57,12 @@ extern "C" IDNA_USE_STD3_ASCII_RULES = 0x0002 } Idna_flags; -#ifndef IDNA_ACE_PREFIX -#define IDNA_ACE_PREFIX "xn--" -#endif +# ifndef IDNA_ACE_PREFIX +# define IDNA_ACE_PREFIX "xn--" +# endif + extern const char *idna_strerror (Idna_rc rc); + /* Core functions */ extern int idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags); @@ -89,7 +94,7 @@ extern "C" extern int idna_to_unicode_lzlz (const char *input, char **output, int flags); -#ifdef __cplusplus +# ifdef __cplusplus } +# endif +#endif /* _IDNA_H */ -#endif -#endif /* _PUNYCODE_H */ ============================================================ --- idna/nfkc.c 2e2528f92fab6314dbd0e0b03697de038f6fc2ad +++ idna/nfkc.c a4ab7deec83effb74b369847eeb112687aa98a29 @@ -1,5 +1,5 @@ -/* nfkc.c Unicode normalization utilities. - * Copyright (C) 2002, 2003 Simon Josefsson +/* nfkc.c --- Unicode normalization utilities. + * Copyright (C) 2002, 2003, 2004, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,18 +15,18 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ -#if HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include -#include "idna/stringprep.h" +#include "stringprep.h" /* This file contains functions from GLIB, including gutf8.c and * gunidecomp.c, all licensed under LGPL and copyright hold by: @@ -51,23 +51,23 @@ #define g_malloc malloc #define g_free free #define GError void -#define g_set_error(a,b,c,d) /* */ -#define g_new(struct_type, n_structs) \ +#define g_set_error(a,b,c,d) ((void) 0) +#define g_new(struct_type, n_structs) \ ((struct_type *) g_malloc (((gsize) sizeof (struct_type)) * ((gsize) (n_structs)))) # if defined (__GNUC__) && !defined (__STRICT_ANSI__) && !defined (__cplusplus) -# define G_STMT_START (void)( -# define G_STMT_END ) +# define G_STMT_START (void)( +# define G_STMT_END ) # else # if (defined (sun) || defined (__sun__)) -# define G_STMT_START if (1) -# define G_STMT_END else (void)0 +# define G_STMT_START if (1) +# define G_STMT_END else (void)0 # else -# define G_STMT_START do -# define G_STMT_END while (0) +# define G_STMT_START do +# define G_STMT_END while (0) # endif # endif -#define g_return_val_if_fail(expr,val) G_STMT_START{ (void)0; }G_STMT_END -#define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) +#define g_return_val_if_fail(expr,val) G_STMT_START{ (void)0; }G_STMT_END +#define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) #define TRUE 1 #define FALSE 0 @@ -88,65 +88,65 @@ GNormalizeMode; /* Code from GLIB gutf8.c starts here. */ -#define UTF8_COMPUTE(Char, Mask, Len) \ - if (Char < 128) \ - { \ - Len = 1; \ - Mask = 0x7f; \ - } \ - else if ((Char & 0xe0) == 0xc0) \ - { \ - Len = 2; \ - Mask = 0x1f; \ - } \ - else if ((Char & 0xf0) == 0xe0) \ - { \ - Len = 3; \ - Mask = 0x0f; \ - } \ - else if ((Char & 0xf8) == 0xf0) \ - { \ - Len = 4; \ - Mask = 0x07; \ - } \ - else if ((Char & 0xfc) == 0xf8) \ - { \ - Len = 5; \ - Mask = 0x03; \ - } \ - else if ((Char & 0xfe) == 0xfc) \ - { \ - Len = 6; \ - Mask = 0x01; \ - } \ - else \ +#define UTF8_COMPUTE(Char, Mask, Len) \ + if (Char < 128) \ + { \ + Len = 1; \ + Mask = 0x7f; \ + } \ + else if ((Char & 0xe0) == 0xc0) \ + { \ + Len = 2; \ + Mask = 0x1f; \ + } \ + else if ((Char & 0xf0) == 0xe0) \ + { \ + Len = 3; \ + Mask = 0x0f; \ + } \ + else if ((Char & 0xf8) == 0xf0) \ + { \ + Len = 4; \ + Mask = 0x07; \ + } \ + else if ((Char & 0xfc) == 0xf8) \ + { \ + Len = 5; \ + Mask = 0x03; \ + } \ + else if ((Char & 0xfe) == 0xfc) \ + { \ + Len = 6; \ + Mask = 0x01; \ + } \ + else \ Len = -1; -#define UTF8_LENGTH(Char) \ - ((Char) < 0x80 ? 1 : \ - ((Char) < 0x800 ? 2 : \ - ((Char) < 0x10000 ? 3 : \ - ((Char) < 0x200000 ? 4 : \ +#define UTF8_LENGTH(Char) \ + ((Char) < 0x80 ? 1 : \ + ((Char) < 0x800 ? 2 : \ + ((Char) < 0x10000 ? 3 : \ + ((Char) < 0x200000 ? 4 : \ ((Char) < 0x4000000 ? 5 : 6))))) -#define UTF8_GET(Result, Chars, Count, Mask, Len) \ - (Result) = (Chars)[0] & (Mask); \ - for ((Count) = 1; (Count) < (Len); ++(Count)) \ - { \ - if (((Chars)[(Count)] & 0xc0) != 0x80) \ - { \ - (Result) = -1; \ - break; \ - } \ - (Result) <<= 6; \ - (Result) |= ((Chars)[(Count)] & 0x3f); \ +#define UTF8_GET(Result, Chars, Count, Mask, Len) \ + (Result) = (Chars)[0] & (Mask); \ + for ((Count) = 1; (Count) < (Len); ++(Count)) \ + { \ + if (((Chars)[(Count)] & 0xc0) != 0x80) \ + { \ + (Result) = -1; \ + break; \ + } \ + (Result) <<= 6; \ + (Result) |= ((Chars)[(Count)] & 0x3f); \ } -#define UNICODE_VALID(Char) \ - ((Char) < 0x110000 && \ - (((Char) & 0xFFFFF800) != 0xD800) && \ - ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \ +#define UNICODE_VALID(Char) \ + ((Char) < 0x110000 && \ + (((Char) & 0xFFFFF800) != 0xD800) && \ + ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \ ((Char) & 0xFFFE) != 0xFFFE) @@ -169,7 +169,7 @@ static const gchar utf8_skip_data[256] = 5, 5, 5, 6, 6, 1, 1 }; -const gchar *const g_utf8_skip = utf8_skip_data; +static const gchar *const g_utf8_skip = utf8_skip_data; #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)]) @@ -185,7 +185,7 @@ const gchar *const g_utf8_skip = utf8_sk * * Return value: the length of the string in characters **/ -glong +static glong g_utf8_strlen (const gchar * p, gssize max) { glong len = 0; @@ -195,29 +195,29 @@ g_utf8_strlen (const gchar * p, gssize m if (max < 0) { while (*p) - { - p = g_utf8_next_char (p); - ++len; - } + { + p = g_utf8_next_char (p); + ++len; + } } else { if (max == 0 || !*p) - return 0; + return 0; p = g_utf8_next_char (p); while (p - start < max && *p) - { - ++len; - p = g_utf8_next_char (p); - } + { + ++len; + p = g_utf8_next_char (p); + } /* only do the last len increment if we got a complete * char (don't count partial chars) */ if (p - start == max) - ++len; + ++len; } return len; @@ -302,10 +302,10 @@ g_unichar_to_utf8 (gunichar c, gchar * o if (outbuf) { for (i = len - 1; i > 0; --i) - { - outbuf[i] = (c & 0x3f) | 0x80; - c >>= 6; - } + { + outbuf[i] = (c & 0x3f) | 0x80; + c >>= 6; + } outbuf[0] = c | first; } @@ -343,18 +343,18 @@ g_utf8_to_ucs4_fast (const gchar * str, if (len < 0) { while (*p) - { - p = g_utf8_next_char (p); - ++n_chars; - } + { + p = g_utf8_next_char (p); + ++n_chars; + } } else { while (p < str + len && *p) - { - p = g_utf8_next_char (p); - ++n_chars; - } + { + p = g_utf8_next_char (p); + ++n_chars; + } } result = g_new (gunichar, n_chars + 1); @@ -367,47 +367,47 @@ g_utf8_to_ucs4_fast (const gchar * str, gunichar wc = ((unsigned char *) p)[0]; if (wc < 0x80) - { - result[i] = wc; - p++; - } + { + result[i] = wc; + p++; + } else - { - if (wc < 0xe0) - { - charlen = 2; - wc &= 0x1f; - } - else if (wc < 0xf0) - { - charlen = 3; - wc &= 0x0f; - } - else if (wc < 0xf8) - { - charlen = 4; - wc &= 0x07; - } - else if (wc < 0xfc) - { - charlen = 5; - wc &= 0x03; - } - else - { - charlen = 6; - wc &= 0x01; - } + { + if (wc < 0xe0) + { + charlen = 2; + wc &= 0x1f; + } + else if (wc < 0xf0) + { + charlen = 3; + wc &= 0x0f; + } + else if (wc < 0xf8) + { + charlen = 4; + wc &= 0x07; + } + else if (wc < 0xfc) + { + charlen = 5; + wc &= 0x03; + } + else + { + charlen = 6; + wc &= 0x01; + } - for (j = 1; j < charlen; j++) - { - wc <<= 6; - wc |= ((unsigned char *) p)[j] & 0x3f; - } + for (j = 1; j < charlen; j++) + { + wc <<= 6; + wc |= ((unsigned char *) p)[j] & 0x3f; + } - result[i] = wc; - p += charlen; - } + result[i] = wc; + p += charlen; + } } result[i] = 0; @@ -440,8 +440,8 @@ g_ucs4_to_utf8 (const gunichar * str, **/ static gchar * g_ucs4_to_utf8 (const gunichar * str, - glong len, - glong * items_read, glong * items_written, GError ** error) + glong len, + glong * items_read, glong * items_written, GError ** error) { gint result_length; gchar *result = NULL; @@ -452,18 +452,18 @@ g_ucs4_to_utf8 (const gunichar * str, for (i = 0; len < 0 || i < len; i++) { if (!str[i]) - break; + break; if (str[i] >= 0x80000000) - { - if (items_read) - *items_read = i; + { + if (items_read) + *items_read = i; - g_set_error (error, G_CONVERT_ERROR, - G_CONVERT_ERROR_ILLEGAL_SEQUENCE, - _("Character out of range for UTF-8")); - goto err_out; - } + g_set_error (error, G_CONVERT_ERROR, + G_CONVERT_ERROR_ILLEGAL_SEQUENCE, + _("Character out of range for UTF-8")); + goto err_out; + } result_length += UTF8_LENGTH (str[i]); } @@ -491,8 +491,8 @@ err_out: /* Code from GLIB gunidecomp.c starts here. */ -#include "idna/gunidecomp.h" -#include "idna/gunicomp.h" +#include "gunidecomp.h" +#include "gunicomp.h" #define CC_PART1(Page, Char) \ ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ @@ -527,7 +527,7 @@ err_out: * @string: a UCS-4 encoded string. * @len: the maximum length of @string to use. * - * Computes the canonical ordering of a string in-place. + * Computes the canonical ordering of a string in-place. * This rearranges decomposed characters in the string * according to their combining classes. See the Unicode * manual for more information. @@ -544,28 +544,28 @@ g_unicode_canonical_ordering (gunichar * swap = 0; last = COMBINING_CLASS (string[0]); for (i = 0; i < len - 1; ++i) - { - int next = COMBINING_CLASS (string[i + 1]); - if (next != 0 && last > next) - { - gsize j; - /* Percolate item leftward through string. */ - for (j = i + 1; j > 0; --j) - { - gunichar t; - if (COMBINING_CLASS (string[j - 1]) <= next) - break; - t = string[j]; - string[j] = string[j - 1]; - string[j - 1] = t; - swap = 1; - } - /* We're re-entering the loop looking at the old - character again. */ - next = last; - } - last = next; - } + { + int next = COMBINING_CLASS (string[i + 1]); + if (next != 0 && last > next) + { + gsize j; + /* Percolate item leftward through string. */ + for (j = i + 1; j > 0; --j) + { + gunichar t; + if (COMBINING_CLASS (string[j - 1]) <= next) + break; + t = string[j]; + string[j] = string[j - 1]; + string[j - 1] = t; + swap = 1; + } + /* We're re-entering the loop looking at the old + character again. */ + next = last; + } + last = next; + } } } @@ -582,7 +582,7 @@ decompose_hangul (gunichar s, gunichar * if (SIndex < 0 || SIndex >= SCount) { if (r) - r[0] = s; + r[0] = s; *result_len = 1; } else @@ -592,19 +592,19 @@ decompose_hangul (gunichar s, gunichar * gunichar T = TBase + SIndex % TCount; if (r) - { - r[0] = L; - r[1] = V; - } + { + r[0] = L; + r[1] = V; + } if (T != TBase) - { - if (r) - r[2] = T; - *result_len = 3; - } + { + if (r) + r[2] = T; + *result_len = 3; + } else - *result_len = 2; + *result_len = 2; } } @@ -618,34 +618,34 @@ find_decomposition (gunichar ch, gboolea if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch) { while (TRUE) - { - int half = (start + end) / 2; - if (ch == decomp_table[half].ch) - { - int offset; + { + int half = (start + end) / 2; + if (ch == decomp_table[half].ch) + { + int offset; - if (compat) - { - offset = decomp_table[half].compat_offset; - if (offset == G_UNICODE_NOT_PRESENT_OFFSET) - offset = decomp_table[half].canon_offset; - } - else - { - offset = decomp_table[half].canon_offset; - if (offset == G_UNICODE_NOT_PRESENT_OFFSET) - return NULL; - } + if (compat) + { + offset = decomp_table[half].compat_offset; + if (offset == G_UNICODE_NOT_PRESENT_OFFSET) + offset = decomp_table[half].canon_offset; + } + else + { + offset = decomp_table[half].canon_offset; + if (offset == G_UNICODE_NOT_PRESENT_OFFSET) + return NULL; + } - return &(decomp_expansion_string[offset]); - } - else if (half == start) - break; - else if (ch > decomp_table[half].ch) - start = half; - else - end = half; - } + return &(decomp_expansion_string[offset]); + } + else if (half == start) + break; + else if (ch > decomp_table[half].ch) + start = half; + else + end = half; + } } return NULL; @@ -667,7 +667,7 @@ combine_hangul (gunichar a, gunichar b, return TRUE; } else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0 - && 0 <= TIndex && TIndex <= TCount) + && 0 <= TIndex && TIndex <= TCount) { *result = a + TIndex; return TRUE; @@ -697,13 +697,13 @@ combine (gunichar a, gunichar b, gunicha if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START) { if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0]) - { - *result = - compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1]; - return TRUE; - } + { + *result = + compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1]; + return TRUE; + } else - return FALSE; + return FALSE; } index_b = COMPOSE_INDEX (b); @@ -711,14 +711,14 @@ combine (gunichar a, gunichar b, gunicha if (index_b >= COMPOSE_SECOND_SINGLE_START) { if (a == - compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0]) - { - *result = - compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1]; - return TRUE; - } + compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0]) + { + *result = + compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1]; + return TRUE; + } else - return FALSE; + return FALSE; } if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START @@ -726,14 +726,14 @@ combine (gunichar a, gunichar b, gunicha && index_b < COMPOSE_SECOND_SINGLE_START) { gunichar res = - compose_array[index_a - COMPOSE_FIRST_START][index_b - - COMPOSE_SECOND_START]; + compose_array[index_a - COMPOSE_FIRST_START][index_b - + COMPOSE_SECOND_START]; if (res) - { - *result = res; - return TRUE; - } + { + *result = res; + return TRUE; + } } return FALSE; @@ -757,20 +757,20 @@ _g_utf8_normalize_wc (const gchar * str, gunichar wc = g_utf8_get_char (p); if (wc >= 0xac00 && wc <= 0xd7af) - { - gsize result_len; - decompose_hangul (wc, NULL, &result_len); - n_wc += result_len; - } + { + gsize result_len; + decompose_hangul (wc, NULL, &result_len); + n_wc += result_len; + } else - { - decomp = find_decomposition (wc, do_compat); + { + decomp = find_decomposition (wc, do_compat); - if (decomp) - n_wc += g_utf8_strlen (decomp, -1); - else - n_wc++; - } + if (decomp) + n_wc += g_utf8_strlen (decomp, -1); + else + n_wc++; + } p = g_utf8_next_char (p); } @@ -790,36 +790,36 @@ _g_utf8_normalize_wc (const gchar * str, gsize old_n_wc = n_wc; if (wc >= 0xac00 && wc <= 0xd7af) - { - gsize result_len; - decompose_hangul (wc, wc_buffer + n_wc, &result_len); - n_wc += result_len; - } + { + gsize result_len; + decompose_hangul (wc, wc_buffer + n_wc, &result_len); + n_wc += result_len; + } else - { - decomp = find_decomposition (wc, do_compat); + { + decomp = find_decomposition (wc, do_compat); - if (decomp) - { - const char *pd; - for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd)) - wc_buffer[n_wc++] = g_utf8_get_char (pd); - } - else - wc_buffer[n_wc++] = wc; - } + if (decomp) + { + const char *pd; + for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd)) + wc_buffer[n_wc++] = g_utf8_get_char (pd); + } + else + wc_buffer[n_wc++] = wc; + } if (n_wc > 0) - { - cc = COMBINING_CLASS (wc_buffer[old_n_wc]); + { + cc = COMBINING_CLASS (wc_buffer[old_n_wc]); - if (cc == 0) - { - g_unicode_canonical_ordering (wc_buffer + last_start, - n_wc - last_start); - last_start = old_n_wc; - } - } + if (cc == 0) + { + g_unicode_canonical_ordering (wc_buffer + last_start, + n_wc - last_start); + last_start = old_n_wc; + } + } p = g_utf8_next_char (p); } @@ -827,7 +827,7 @@ _g_utf8_normalize_wc (const gchar * str, if (n_wc > 0) { g_unicode_canonical_ordering (wc_buffer + last_start, - n_wc - last_start); + n_wc - last_start); last_start = n_wc; } @@ -842,32 +842,32 @@ _g_utf8_normalize_wc (const gchar * str, last_start = 0; for (i = 0; i < n_wc; i++) - { - int cc = COMBINING_CLASS (wc_buffer[i]); + { + int cc = COMBINING_CLASS (wc_buffer[i]); - if (i > 0 && - (last_cc == 0 || last_cc != cc) && - combine (wc_buffer[last_start], wc_buffer[i], - &wc_buffer[last_start])) - { - for (j = i + 1; j < n_wc; j++) - wc_buffer[j - 1] = wc_buffer[j]; - n_wc--; - i--; + if (i > 0 && + (last_cc == 0 || last_cc != cc) && + combine (wc_buffer[last_start], wc_buffer[i], + &wc_buffer[last_start])) + { + for (j = i + 1; j < n_wc; j++) + wc_buffer[j - 1] = wc_buffer[j]; + n_wc--; + i--; - if (i == last_start) - last_cc = 0; - else - last_cc = COMBINING_CLASS (wc_buffer[i - 1]); + if (i == last_start) + last_cc = 0; + else + last_cc = COMBINING_CLASS (wc_buffer[i - 1]); - continue; - } + continue; + } - if (cc == 0) - last_start = i; + if (cc == 0) + last_start = i; - last_cc = cc; - } + last_cc = cc; + } } wc_buffer[n_wc] = 0; @@ -926,7 +926,7 @@ g_utf8_normalize (const gchar * str, gss /* Public Libidn API starts here. */ /** - * stringprep_utf8_to_unichar: + * stringprep_utf8_to_unichar - convert UTF-8 to Unicode code point * @p: a pointer to Unicode character encoded as UTF-8 * * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. @@ -942,7 +942,7 @@ stringprep_utf8_to_unichar (const char * } /** - * stringprep_unichar_to_utf8: + * stringprep_unichar_to_utf8 - convert Unicode code point to UTF-8 * @c: a ISO10646 character code * @outbuf: output buffer, must have at least 6 bytes of space. * If %NULL, the length will be computed and returned @@ -959,7 +959,7 @@ stringprep_unichar_to_utf8 (uint32_t c, } /** - * stringprep_utf8_to_ucs4: + * stringprep_utf8_to_ucs4 - convert UTF-8 string to UCS-4 * @str: a UTF-8 encoded string * @len: the maximum length of @str to use. If @len < 0, then * the string is nul-terminated. @@ -980,7 +980,7 @@ stringprep_utf8_to_ucs4 (const char *str } /** - * stringprep_ucs4_to_utf8: + * stringprep_ucs4_to_utf8 - convert UCS-4 string to UTF-8 * @str: a UCS-4 encoded string * @len: the maximum length of @str to use. If @len < 0, then * the string is terminated with a 0 character. @@ -999,14 +999,14 @@ stringprep_ucs4_to_utf8 (const uint32_t **/ char * stringprep_ucs4_to_utf8 (const uint32_t * str, ssize_t len, - size_t * items_read, size_t * items_written) + size_t * items_read, size_t * items_written) { return g_ucs4_to_utf8 (str, len, (glong *) items_read, - (glong *) items_written, NULL); + (glong *) items_written, NULL); } /** - * stringprep_utf8_nfkc_normalize: + * stringprep_utf8_nfkc_normalize - normalize Unicode string * @str: a UTF-8 encoded string. * @len: length of @str, in bytes, or -1 if @str is nul-terminated. * @@ -1034,7 +1034,7 @@ stringprep_utf8_nfkc_normalize (const ch } /** - * stringprep_ucs4_nfkc_normalize: + * stringprep_ucs4_nfkc_normalize - normalize Unicode string * @str: a Unicode string. * @len: length of @str array, or -1 if @str is nul-terminated. * ============================================================ --- idna/profiles.c 401a78afbc6f10768a43520c3ad60e55cf8179db +++ idna/profiles.c df12e897155e0ffb64e85af0f8bf0a67d6e5eb76 @@ -1,5 +1,5 @@ -/* profiles.c Definitions of stringprep profiles. - * Copyright (C) 2002, 2003 Simon Josefsson +/* profiles.c --- Definitions of stringprep profiles. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,21 +15,22 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ -#include "idna/stringprep.h" +#include "stringprep.h" const Stringprep_profiles stringprep_profiles[] = { {"Nameprep", stringprep_nameprep}, - {"KRBprep", stringprep_kerberos5}, + {"KRBprep", stringprep_kerberos5}, /* Deprecate? */ {"Nodeprep", stringprep_xmpp_nodeprep}, {"Resourceprep", stringprep_xmpp_resourceprep}, - {"plain", stringprep_plain}, /* sasl-anon-00 */ - {"trace", stringprep_trace}, /* sasl-anon-01,02 */ + {"plain", stringprep_plain}, /* sasl-anon-00. */ + {"trace", stringprep_trace}, /* sasl-anon-01,02,03. */ {"SASLprep", stringprep_saslprep}, - {"ISCSIprep", stringprep_iscsi}, + {"ISCSIprep", stringprep_iscsi}, /* Obsolete. */ + {"iSCSI", stringprep_iscsi}, /* IANA. */ {NULL, NULL} }; @@ -173,7 +174,9 @@ const Stringprep_table_element stringpre }; const Stringprep_table_element stringprep_iscsi_prohibit[] = { - {0x0000}, /* [ASCII CONTROL CHARACTERS and SPACE through ,] */ + /* NB, since start == 0, we must have that end != 0 for the + end-of-table logic to work. */ + {0x0000, 1}, /* [ASCII CONTROL CHARACTERS and SPACE through ,] */ {0x0001}, {0x0002}, {0x0003}, @@ -244,14 +247,15 @@ const Stringprep_profile stringprep_iscs {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1}, {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_2}, {STRINGPREP_NFKC, 0, 0}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_1}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, - {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, - {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_4}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_5}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_6}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_7}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_8}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_9}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_iscsi_prohibit}, @@ -289,7 +293,7 @@ const Stringprep_profile stringprep_sasl {STRINGPREP_MAP_TABLE, 0, stringprep_saslprep_space_map}, {STRINGPREP_MAP_TABLE, 0, stringprep_rfc3454_B_1}, {STRINGPREP_NFKC, 0, 0}, - {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, + {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_1_2}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_1}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_2_2}, {STRINGPREP_PROHIBIT_TABLE, 0, stringprep_rfc3454_C_3}, ============================================================ --- idna/punycode.c bfae13cf23da87463b6a3eae86ecbb30306557d4 +++ idna/punycode.c 19039b793423e3c68982fbc5279887be0fe1e642 @@ -1,5 +1,5 @@ -/* punycode.c Implementation of punycode used to ASCII encode IDN's. - * Copyright (C) 2002, 2003 Simon Josefsson +/* punycode.c --- Implementation of punycode used to ASCII encode IDN's. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,7 +15,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ @@ -61,7 +61,7 @@ #include -#include "idna/punycode.h" +#include "punycode.h" /*** Bootstring parameters for Punycode ***/ @@ -148,7 +148,7 @@ adapt (punycode_uint delta, punycode_uin /*** Main encode function ***/ /** - * punycode_encode: + * punycode_encode - encode Unicode to Punycode * @input_length: The number of code points in the @input array and * the number of flags in the @case_flags array. * @input: An array of code points. They are presumed to be Unicode @@ -179,16 +179,16 @@ adapt (punycode_uint delta, punycode_uin * Converts a sequence of code points (presumed to be Unicode code * points) to Punycode. * - * Return value: The return value can be any of the punycode_status - * values defined above except %punycode_bad_input. If not - * %punycode_success, then @output_size and @output might contain + * Return value: The return value can be any of the #Punycode_status + * values defined above except %PUNYCODE_BAD_INPUT. If not + * %PUNYCODE_SUCCESS, then @output_size and @output might contain * garbage. **/ int punycode_encode (size_t input_length, const punycode_uint input[], const unsigned char case_flags[], - size_t *output_length, char output[]) + size_t * output_length, char output[]) { punycode_uint input_len, n, delta, h, b, bias, j, m, q, k, t; size_t out, max_out; @@ -299,7 +299,7 @@ punycode_encode (size_t input_length, /*** Main decode function ***/ /** - * punycode_decode: + * punycode_decode - decode Punycode to Unicode * @input_length: The number of ASCII code points in the @input array. * @input: An array of ASCII code points (0..7F). * @output_length: The caller passes in the maximum number of code @@ -328,8 +328,8 @@ punycode_encode (size_t input_length, * Converts Punycode to a sequence of code points (presumed to be * Unicode code points). * - * Return value: The return value can be any of the punycode_status - * values defined above. If not %punycode_success, then + * Return value: The return value can be any of the #Punycode_status + * values defined above. If not %PUNYCODE_SUCCESS, then * @output_length, @output, and @case_flags might contain garbage. * **/ ============================================================ --- idna/punycode.h f26023ae3560fd2b44153e8740aa6bf132274cc2 +++ idna/punycode.h 36331c9ce21c86f8b1cc860c67a5e5c359721746 @@ -1,5 +1,5 @@ -/* punycode.h Declarations for punycode functions. - * Copyright (C) 2002, 2003 Simon Josefsson +/* punycode.h --- Declarations for punycode functions. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,7 +15,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ @@ -68,7 +68,7 @@ extern "C" #endif #include /* size_t */ -#include "idna/idn-int.h" /* uint32_t */ +#include /* uint32_t */ enum punycode_status { @@ -86,6 +86,8 @@ extern "C" PUNYCODE_OVERFLOW = punycode_overflow } Punycode_status; + extern const char *punycode_strerror (Punycode_status rc); + /* punycode_uint needs to be unsigned and needs to be */ /* at least 26 bits wide. */ ============================================================ --- idna/rfc3454.c bbb9eb2443a7d1f240345e7b153e59212bd4319c +++ idna/rfc3454.c 1a30904ada04fea7b6cf1afd24b29cd842f165ea @@ -1,7 +1,7 @@ /* This file is automatically generated. DO NOT EDIT! Instead, edit gen-stringprep-tables.pl and re-run. */ -#include "idna/stringprep.h" +#include "stringprep.h" /* * A.1 Unassigned code points in Unicode 3.2 ============================================================ --- idna/stringprep.c c6749fc1021971e084a16c0a9523fcc52cbb8476 +++ idna/stringprep.c 60d6e36db7d7f4f77efd39cdaa8a7e44e768f96f @@ -1,5 +1,5 @@ -/* stringprep.c Core stringprep implementation. - * Copyright (C) 2002, 2003 Simon Josefsson +/* stringprep.c --- Core stringprep implementation. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,18 +15,18 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ -#if HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include -#include "idna/stringprep.h" +#include "stringprep.h" static ssize_t stringprep_find_character_in_table (uint32_t ucs4, @@ -34,10 +34,15 @@ stringprep_find_character_in_table (uint { ssize_t i; - /* During self tests, this is where it spends its CPU time and - causes most cache misses. Do a binary search? */ + /* This is where typical uses of Libidn spends very close to all CPU + time and causes most cache misses. One could easily do a binary + search instead. Before rewriting this, I want hard evidence this + slowness is at all relevant in typical applications. (I don't + dispute optimization may improve matters significantly, I'm + mostly interested in having someone give real-world benchmark on + the impact of libidn.) */ - for (i = 0; table[i].start; i++) + for (i = 0; table[i].start || table[i].end; i++) if (ucs4 >= table[i].start && ucs4 <= (table[i].end ? table[i].end : table[i].start)) return i; @@ -99,13 +104,13 @@ stringprep_apply_table_to_string (uint32 ( INVERTED(profileflags) && (profileflags & flags))) /** - * stringprep_4i: + * stringprep_4i - prepare internationalized string * @ucs4: input/output array with string to prepare. * @len: on input, length of input array with Unicode code points, - * on exit, length of output array with Unicode code points. + * on exit, length of output array with Unicode code points. * @maxucs4len: maximum length of input/output array. - * @flags: stringprep profile flags, or 0. - * @profile: pointer to stringprep profile to use. + * @flags: a #Stringprep_profile_flags value, or 0. + * @profile: pointer to #Stringprep_profile to use. * * Prepare the input UCS-4 string according to the stringprep profile, * and write back the result to the input string. @@ -119,14 +124,15 @@ stringprep_apply_table_to_string (uint32 * indicate how large the buffer holding the string is. This function * will not read or write to code points outside that size. * - * The @flags are one of Stringprep_profile_flags, or 0. + * The @flags are one of #Stringprep_profile_flags values, or 0. * - * The @profile contain the instructions to perform. Your application - * can define new profiles, possibly re-using the generic stringprep - * tables that always will be part of the library, or use one of the - * currently supported profiles. + * The @profile contain the #Stringprep_profile instructions to + * perform. Your application can define new profiles, possibly + * re-using the generic stringprep tables that always will be part of + * the library, or use one of the currently supported profiles. * - * Return value: Returns %STRINGPREP_OK iff successful, or an error code. + * Return value: Returns %STRINGPREP_OK iff successful, or an + * #Stringprep_rc error code. **/ int stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len, @@ -286,11 +292,11 @@ stringprep_4zi_1 (uint32_t * ucs4, size_ } /** - * stringprep_4zi: + * stringprep_4zi - prepare internationalized string * @ucs4: input/output array with zero terminated string to prepare. * @maxucs4len: maximum length of input/output array. - * @flags: stringprep profile flags, or 0. - * @profile: pointer to stringprep profile to use. + * @flags: a #Stringprep_profile_flags value, or 0. + * @profile: pointer to #Stringprep_profile to use. * * Prepare the input zero terminated UCS-4 string according to the * stringprep profile, and write back the result to the input string. @@ -299,14 +305,15 @@ stringprep_4zi_1 (uint32_t * ucs4, size_ * indicate how large the buffer holding the string is. This function * will not read or write to code points outside that size. * - * The @flags are one of Stringprep_profile_flags, or 0. + * The @flags are one of #Stringprep_profile_flags values, or 0. * - * The @profile contain the instructions to perform. Your application - * can define new profiles, possibly re-using the generic stringprep - * tables that always will be part of the library, or use one of the - * currently supported profiles. + * The @profile contain the #Stringprep_profile instructions to + * perform. Your application can define new profiles, possibly + * re-using the generic stringprep tables that always will be part of + * the library, or use one of the currently supported profiles. * - * Return value: Returns %STRINGPREP_OK iff successful, or an error code. + * Return value: Returns %STRINGPREP_OK iff successful, or an + * #Stringprep_rc error code. **/ int stringprep_4zi (uint32_t * ucs4, size_t maxucs4len, @@ -322,11 +329,11 @@ stringprep_4zi (uint32_t * ucs4, size_t } /** - * stringprep: + * stringprep - prepare internationalized string * @in: input/ouput array with string to prepare. * @maxlen: maximum length of input/output array. - * @flags: stringprep profile flags, or 0. - * @profile: pointer to stringprep profile to use. + * @flags: a #Stringprep_profile_flags value, or 0. + * @profile: pointer to #Stringprep_profile to use. * * Prepare the input zero terminated UTF-8 string according to the * stringprep profile, and write back the result to the input string. @@ -339,12 +346,12 @@ stringprep_4zi (uint32_t * ucs4, size_t * indicate how large the buffer holding the string is. This function * will not read or write to characters outside that size. * - * The @flags are one of Stringprep_profile_flags, or 0. + * The @flags are one of #Stringprep_profile_flags values, or 0. * - * The @profile contain the instructions to perform. Your application - * can define new profiles, possibly re-using the generic stringprep - * tables that always will be part of the library, or use one of the - * currently supported profiles. + * The @profile contain the #Stringprep_profile instructions to + * perform. Your application can define new profiles, possibly + * re-using the generic stringprep tables that always will be part of + * the library, or use one of the currently supported profiles. * * Return value: Returns %STRINGPREP_OK iff successful, or an error code. **/ @@ -361,13 +368,19 @@ stringprep (char *in, do { + uint32_t *newp; + if (ucs4) free (ucs4); ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len); maxucs4len = ucs4len + adducs4len; - ucs4 = realloc (ucs4, maxucs4len * sizeof (uint32_t)); - if (!ucs4) - return STRINGPREP_MALLOC_ERROR; + newp = realloc (ucs4, maxucs4len * sizeof (uint32_t)); + if (!newp) + { + free (ucs4); + return STRINGPREP_MALLOC_ERROR; + } + ucs4 = newp; rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile); adducs4len += 50; @@ -398,11 +411,11 @@ stringprep (char *in, } /** - * stringprep_profile: + * stringprep_profile - prepare internationalized string * @in: input array with UTF-8 string to prepare. * @out: output variable with pointer to newly allocate string. * @profile: name of stringprep profile to use. - * @flags: stringprep profile flags, or 0. + * @flags: a #Stringprep_profile_flags value, or 0. * * Prepare the input zero terminated UTF-8 string according to the * stringprep profile, and return the result in a newly allocated @@ -414,7 +427,7 @@ stringprep (char *in, * * The output @out variable must be deallocated by the caller. * - * The @flags are one of Stringprep_profile_flags, or 0. + * The @flags are one of #Stringprep_profile_flags values, or 0. * * The @profile specifies the name of the stringprep profile to use. * It must be one of the internally supported stringprep profiles. @@ -424,8 +437,7 @@ stringprep_profile (const char *in, int stringprep_profile (const char *in, char **out, - const char *profile, - Stringprep_profile_flags flags) + const char *profile, Stringprep_profile_flags flags) { const Stringprep_profiles *p; char *str = NULL; @@ -473,18 +485,26 @@ stringprep_profile (const char *in, * * The library contains a generic Stringprep implementation that does * Unicode 3.2 NFKC normalization, mapping and prohibitation of - * characters, and bidirectional character handling. Profiles for iSCSI, - * Kerberos 5, Nameprep, SASL and XMPP are included. Punycode and ASCII - * Compatible Encoding (ACE) via IDNA are supported. + * characters, and bidirectional character handling. Profiles for + * Nameprep, iSCSI, SASL and XMPP are included. Punycode and ASCII + * Compatible Encoding (ACE) via IDNA are supported. A mechanism to + * define Top-Level Domain (TLD) specific validation tables, and to + * compare strings against those tables, is included. Default tables + * for some TLDs are also included. * - * The Stringprep API consists of two main functions, one for converting - * data from the system's native representation into UTF-8, and one - * function to perform the Stringprep processing. Adding a new - * Stringprep profile for your application within the API is - * straightforward. The Punycode API consists of one encoding function - * and one decoding function. The IDNA API consists of the ToASCII and - * ToUnicode functions, as well as an high-level interface for converting - * entire domain names to and from the ACE encoded form. + * The Stringprep API consists of two main functions, one for + * converting data from the system's native representation into UTF-8, + * and one function to perform the Stringprep processing. Adding a + * new Stringprep profile for your application within the API is + * straightforward. The Punycode API consists of one encoding + * function and one decoding function. The IDNA API consists of the + * ToASCII and ToUnicode functions, as well as an high-level interface + * for converting entire domain names to and from the ACE encoded + * form. The TLD API consists of one set of functions to extract the + * TLD name from a domain string, one set of functions to locate the + * proper TLD table to use based on the TLD name, and core functions + * to validate a string against a TLD table, and some utility wrappers + * to perform all the steps in one call. * * The library is used by, e.g., GNU SASL and Shishi to process user * names and passwords. Libidn can be built into GNU Libc to enable a @@ -522,6 +542,7 @@ stringprep_profile (const char *in, * \include example.c * \include example3.c * \include example4.c + * \include example5.c */ /** @@ -627,15 +648,6 @@ stringprep_profile (const char *in, **/ /** - * stringprep_kerberos5: - * @in: input/ouput array with string to prepare. - * @maxlen: maximum length of input/output array. - * - * Prepare the input UTF-8 string according to the draft Kerberos5 - * stringprep profile. Returns 0 iff successful, or an error code. - **/ - -/** * stringprep_plain: * @in: input/ouput array with string to prepare. * @maxlen: maximum length of input/output array. ============================================================ --- idna/stringprep.h dec94e47c4c9ed31135f8437ae5eb3d146332328 +++ idna/stringprep.h.in 49cee66eabccdf023a2116c5b6240bb39b3475d3 @@ -1,5 +1,5 @@ -/* stringprep.h Header file for stringprep functions. -*- c -*- - * Copyright (C) 2002, 2003 Simon Josefsson +/* stringprep.h --- Header file for stringprep functions. -*- c -*- + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,7 +15,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ @@ -29,10 +29,20 @@ extern "C" #include /* size_t */ #include /* ssize_t */ -#include "idna/idn-int.h" /* uint32_t */ +#include /* uint32_t */ -#define STRINGPREP_VERSION "0.3.4" + /* On Windows, variables that may be in a DLL must be marked + * specially. This is only active when not building libidn itself + * (!LIBIDN_BUILDING). It is only used for MinGW which declare + * __DECLSPEC_SUPPORTED or MSVC (_MSC_VER && _DLL). */ +#if !defined (LIBIDN_BUILDING) && (defined(__DECLSPEC_SUPPORTED) || (defined(_MSC_VER) && defined(_DLL))) +# define IDN_DLL_VAR __declspec (dllimport) +#else +# define IDN_DLL_VAR +#endif +#define STRINGPREP_VERSION "@PACKAGE_VERSION@" + /* Error codes. */ typedef enum { @@ -94,35 +104,35 @@ extern "C" struct Stringprep_profiles { - char *name; + const char *name; const Stringprep_profile *tables; }; typedef struct Stringprep_profiles Stringprep_profiles; - extern const Stringprep_profiles stringprep_profiles[]; + extern IDN_DLL_VAR const Stringprep_profiles stringprep_profiles[]; /* Profiles */ - extern const Stringprep_table_element stringprep_rfc3454_A_1[]; - extern const Stringprep_table_element stringprep_rfc3454_B_1[]; - extern const Stringprep_table_element stringprep_rfc3454_B_2[]; - extern const Stringprep_table_element stringprep_rfc3454_B_3[]; - extern const Stringprep_table_element stringprep_rfc3454_C_1_1[]; - extern const Stringprep_table_element stringprep_rfc3454_C_1_2[]; - extern const Stringprep_table_element stringprep_rfc3454_C_2_1[]; - extern const Stringprep_table_element stringprep_rfc3454_C_2_2[]; - extern const Stringprep_table_element stringprep_rfc3454_C_3[]; - extern const Stringprep_table_element stringprep_rfc3454_C_4[]; - extern const Stringprep_table_element stringprep_rfc3454_C_5[]; - extern const Stringprep_table_element stringprep_rfc3454_C_6[]; - extern const Stringprep_table_element stringprep_rfc3454_C_7[]; - extern const Stringprep_table_element stringprep_rfc3454_C_8[]; - extern const Stringprep_table_element stringprep_rfc3454_C_9[]; - extern const Stringprep_table_element stringprep_rfc3454_D_1[]; - extern const Stringprep_table_element stringprep_rfc3454_D_2[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_A_1[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_B_1[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_B_2[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_B_3[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_1_1[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_1_2[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_2_1[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_2_2[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_3[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_4[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_5[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_6[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_7[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_8[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_C_9[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_D_1[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_rfc3454_D_2[]; /* Nameprep */ - extern const Stringprep_profile stringprep_nameprep[]; + extern IDN_DLL_VAR const Stringprep_profile stringprep_nameprep[]; #define stringprep_nameprep(in, maxlen) \ stringprep(in, maxlen, 0, stringprep_nameprep) @@ -132,25 +142,25 @@ extern "C" /* SASL */ - extern const Stringprep_profile stringprep_saslprep[]; - extern const Stringprep_profile stringprep_plain[]; - extern const Stringprep_profile stringprep_trace[]; + extern IDN_DLL_VAR const Stringprep_profile stringprep_saslprep[]; + extern IDN_DLL_VAR const Stringprep_profile stringprep_plain[]; + extern IDN_DLL_VAR const Stringprep_profile stringprep_trace[]; #define stringprep_plain(in, maxlen) \ stringprep(in, maxlen, 0, stringprep_plain) /* Kerberos */ - extern const Stringprep_profile stringprep_kerberos5[]; + extern IDN_DLL_VAR const Stringprep_profile stringprep_kerberos5[]; #define stringprep_kerberos5(in, maxlen) \ stringprep(in, maxlen, 0, stringprep_kerberos5) /* XMPP */ - extern const Stringprep_profile stringprep_xmpp_nodeprep[]; - extern const Stringprep_profile stringprep_xmpp_resourceprep[]; - extern const Stringprep_table_element stringprep_xmpp_nodeprep_prohibit[]; + extern IDN_DLL_VAR const Stringprep_profile stringprep_xmpp_nodeprep[]; + extern IDN_DLL_VAR const Stringprep_profile stringprep_xmpp_resourceprep[]; + extern IDN_DLL_VAR const Stringprep_table_element stringprep_xmpp_nodeprep_prohibit[]; #define stringprep_xmpp_nodeprep(in, maxlen) \ stringprep(in, maxlen, 0, stringprep_xmpp_nodeprep) @@ -159,7 +169,7 @@ extern "C" /* iSCSI */ - extern const Stringprep_profile stringprep_iscsi[]; + extern IDN_DLL_VAR const Stringprep_profile stringprep_iscsi[]; #define stringprep_iscsi(in, maxlen) \ stringprep(in, maxlen, 0, stringprep_iscsi) @@ -181,6 +191,8 @@ extern "C" const char *profile, Stringprep_profile_flags flags); + extern const char *stringprep_strerror (Stringprep_rc rc); + extern const char *stringprep_check_version (const char *req_version); /* Utility */ @@ -201,8 +213,7 @@ extern "C" extern const char *stringprep_locale_charset (void); extern char *stringprep_convert (const char *str, const char *to_codeset, - const char *from_codeset, - int best_effort); + const char *from_codeset); extern char *stringprep_locale_to_utf8 (const char *str); extern char *stringprep_utf8_to_locale (const char *str); ============================================================ --- idna/toutf8.c 564af0eb9f1d23250e63fedb66c8c778a45cc989 +++ idna/toutf8.c 897c8520fe7be0c07cd384051a616ce41b5af619 @@ -1,5 +1,5 @@ -/* toutf8.c Convert strings from system locale into UTF-8. - * Copyright (C) 2002, 2003 Simon Josefsson +/* toutf8.c --- Convert strings from system locale into UTF-8. + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson * * This file is part of GNU Libidn. * @@ -15,99 +15,87 @@ * * You should have received a copy of the GNU Lesser General Public * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ -#if HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H # include "config.h" #endif +/* Get prototypes. */ +#include "stringprep.h" + +/* Get fprintf. */ #include + +/* Get getenv. */ #include + +/* Get strlen. */ #include -#include "idna/stringprep.h" +/* Get iconv_string. */ +#include "striconv.h" #ifdef _LIBC # define HAVE_ICONV 1 -# define LOCALE_WORKS 1 -# define ICONV_CONST +# define HAVE_LOCALE_H 1 +# define HAVE_LANGINFO_CODESET 1 #endif -#if defined(HAVE_ERRNO_H) || defined(_LIBC) -# include +#if HAVE_LOCALE_H +# include #endif -#ifdef HAVE_ICONV -# include +#if HAVE_LANGINFO_CODESET +# include +#endif -# if LOCALE_WORKS -# include -# include -# endif - -static const char * -stringprep_locale_charset_slow (void) -{ - const char *charset = getenv ("CHARSET"); /* flawfinder: ignore */ - - if (charset && *charset) - return charset; - -# ifdef LOCALE_WORKS - { - char *p; - - p = setlocale (LC_CTYPE, NULL); - setlocale (LC_CTYPE, ""); - - charset = nl_langinfo (CODESET); - - setlocale (LC_CTYPE, p); - - if (charset && *charset) - return charset; - } -# endif - - return "ASCII"; -} - -static const char *stringprep_locale_charset_cache = NULL; - +#ifdef _LIBC +# define stringprep_locale_charset() nl_langinfo (CODESET) +#else /** - * stringprep_locale_charset: + * stringprep_locale_charset - return charset used in current locale * - * Find out system locale charset. + * Find out current locale charset. The function respect the CHARSET + * environment variable, but typically uses nl_langinfo(CODESET) when + * it is supported. It fall back on "ASCII" if CHARSET isn't set and + * nl_langinfo isn't supported or return anything. * - * Note that this function return what it believe the SYSTEM is using - * as a locale, not what locale the program is currently in (modified, - * e.g., by a setlocale(LC_CTYPE, "ISO-8859-1")). The reason is that - * data read from argv[], stdin etc comes from the system, and is more - * likely to be encoded using the system locale than the program - * locale. + * Note that this function return the application's locale's preferred + * charset (or thread's locale's preffered charset, if your system + * support thread-specific locales). It does not return what the + * system may be using. Thus, if you receive data from external + * sources you cannot in general use this function to guess what + * charset it is encoded in. Use stringprep_convert from the external + * representation into the charset returned by this function, to have + * data in the locale encoding. * - * You can set the environment variable CHARSET to override the value - * returned. Note that this function caches the result, so you will - * have to modify CHARSET before calling (even indirectly) any - * stringprep functions, e.g., by setting it when invoking the - * application. - * - * Return value: Return the character set used by the system locale. + * Return value: Return the character set used by the current locale. * It will never return NULL, but use "ASCII" as a fallback. **/ const char * stringprep_locale_charset (void) { - if (!stringprep_locale_charset_cache) - stringprep_locale_charset_cache = stringprep_locale_charset_slow (); + const char *charset = getenv ("CHARSET"); /* flawfinder: ignore */ - return stringprep_locale_charset_cache; + if (charset && *charset) + return charset; + +# ifdef HAVE_LANGINFO_CODESET + charset = nl_langinfo (CODESET); + + if (charset && *charset) + return charset; +# endif + + return "ASCII"; } +#endif /** - * stringprep_convert: + * stringprep_convert - encode string using new character set * @str: input zero-terminated string. * @to_codeset: name of destination character set. * @from_codeset: name of origin character set, as used by @str. @@ -120,182 +108,23 @@ stringprep_convert (const char *str, **/ char * stringprep_convert (const char *str, - const char *to_codeset, const char *from_codeset, - int best_effort) + const char *to_codeset, const char *from_codeset) { - iconv_t cd; - char *dest; - char *outp; - char *p, *startp; - size_t inbytes_remaining; - size_t outbytes_remaining; - size_t err; - size_t outbuf_size; - int have_error = 0; - int from_utf8; - int len; - - if (strcmp (to_codeset, from_codeset) == 0) - { - char *p; - p = malloc (strlen (str) + 1); - if (!p) - return NULL; - strcpy (p, str); - return p; - } - - from_utf8 = (strcmp (from_codeset, "UTF-8") == 0); - -#ifdef ICONV_TRANSLIT - if (best_effort) - { - char to_c[strlen (to_codeset) + 10]; - strcpy (to_c, to_codeset); - strcat (to_c, "//TRANSLIT"); - cd = iconv_open (to_c, from_codeset); - } - else - cd = iconv_open (to_codeset, from_codeset); +#if HAVE_ICONV + return str_iconv (str, from_codeset, to_codeset); #else - cd = iconv_open (to_codeset, from_codeset); -#endif - - if (cd == (iconv_t) - 1) - return NULL; - - p = (char *) malloc (strlen (str) + 1); - if (p == NULL) - return NULL; - strcpy (p, str); - len = strlen (p); - startp = p; - inbytes_remaining = len; - outbuf_size = len + 1; /* + 1 for nul in case len == 1 */ - - outbytes_remaining = outbuf_size - 1; /* -1 for nul */ - outp = dest = malloc (outbuf_size); - -again: - - err = iconv (cd, (ICONV_CONST char **) &p, &inbytes_remaining, - &outp, &outbytes_remaining); - - if (err == (size_t) - 1) - { - switch (errno) - { - case EINVAL: - /* Incomplete text, do not report an error */ - break; - - case E2BIG: - { - size_t used = outp - dest; - - outbuf_size *= 2; - dest = realloc (dest, outbuf_size); - - outp = dest + used; - outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */ - - goto again; - } - break; - - case EILSEQ: - if (!best_effort || outbytes_remaining == 0) - { - have_error = 1; - break; - } - else - { - int char_len; - if (!from_utf8) - char_len = 1; // not from UTF-8, one '?' will do - else - { - if ((*p & 0x80) == 0) - char_len = 1; - else if ((*p & 0x40) == 0) - char_len = 1; // error: not allowed to begin a sequence - else if ((*p & 0x20) == 0) - char_len = 2; - else if ((*p & 0x10) == 0) - char_len = 3; - else if ((*p & 0x08) == 0) - char_len = 4; - else if ((*p & 0x04) == 0) - char_len = 5; - else if ((*p & 0x02) == 0) - char_len = 6; - else - char_len = 1; // error: 0xFE/0xFF not used by UTF-8 - } - if (char_len > inbytes_remaining) - char_len = inbytes_remaining; - p += char_len; - inbytes_remaining -= char_len; - *outp++ = '?'; - --outbytes_remaining; - if (inbytes_remaining > 0) - goto again; - } - break; - - default: - have_error = 1; - break; - } - } - - *outp = '\0'; - - if ((p - startp) != len) - have_error = 1; - - - free (startp); - - iconv_close (cd); - - if (have_error) - { - free (dest); - dest = NULL; - } - - return dest; -} - -#else /* HAVE_ICONV */ - -const char * -stringprep_locale_charset () -{ - return "ASCII"; -} - -char * -stringprep_convert (const char *str, - const char *to_codeset, const char *from_codeset, - int best_effort) -{ char *p; fprintf (stderr, "libidn: warning: libiconv not installed, cannot " - "convert data from %s to %s\n", from_codeset, to_codeset); + "convert data to UTF-8\n"); p = malloc (strlen (str) + 1); if (!p) return NULL; - strcpy (p, str); - return p; + return strcpy (p, str); +#endif } -#endif /* HAVE_ICONV */ - /** - * stringprep_locale_to_utf8: + * stringprep_locale_to_utf8 - convert locale encoded string to UTF-8 * @str: input zero terminated string. * * Convert string encoded in the locale's character set into UTF-8 by @@ -307,11 +136,11 @@ stringprep_locale_to_utf8 (const char *s char * stringprep_locale_to_utf8 (const char *str) { - return stringprep_convert (str, "UTF-8", stringprep_locale_charset (), 0); + return stringprep_convert (str, "UTF-8", stringprep_locale_charset ()); } /** - * stringprep_utf8_to_locale: + * stringprep_utf8_to_locale - encode UTF-8 string to locale encoding * @str: input zero terminated string. * * Convert string encoded in UTF-8 into the locale's character set by @@ -323,5 +152,5 @@ stringprep_utf8_to_locale (const char *s char * stringprep_utf8_to_locale (const char *str) { - return stringprep_convert (str, stringprep_locale_charset (), "UTF-8", 0); + return stringprep_convert (str, stringprep_locale_charset (), "UTF-8"); } ============================================================ --- idna/version.c f0fd69ee06dcad4e22b1f30d1a4220cb5bd5f518 +++ idna/version.c 30a0a5f71c59986d792fac7c1e0760d2ef0e8bcc @@ -1,6 +1,5 @@ -/* version.c Version handling. - * Copyright (C) 2002, 2003 Simon Josefsson - * Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. +/* version.c --- Version handling. + * Copyright (C) 2002, 2003, 2004, 2006, 2007 Simon Josefsson * * This file is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -14,57 +13,20 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this file; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * */ -/* This file is based on src/global.c from Werner Koch's libgcrypt */ - -#if HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H # include "config.h" #endif -#include -#include +#include "stringprep.h" -#include "idna/stringprep.h" +#include -static const char * -parse_version_number (const char *s, int *number) -{ - int val = 0; - - if (*s == '0' && isdigit (s[1])) - return NULL; /* leading zeros are not allowed */ - for (; isdigit (*s); s++) - { - val *= 10; - val += *s - '0'; - } - *number = val; - return val < 0 ? NULL : s; -} - - -static const char * -parse_version_string (const char *s, int *major, int *minor, int *micro) -{ - s = parse_version_number (s, major); - if (!s || *s != '.') - return NULL; - s++; - s = parse_version_number (s, minor); - if (!s || *s != '.') - return NULL; - s++; - s = parse_version_number (s, micro); - if (!s) - return NULL; - return s; /* patchlevel */ -} - /** - * stringprep_check_version + * stringprep_check_version - check for library version * @req_version: Required version number, or NULL. * * Check that the the version of the library is at minimum the requested one @@ -80,30 +42,8 @@ stringprep_check_version (const char *re const char * stringprep_check_version (const char *req_version) { - const char *ver = STRINGPREP_VERSION; - int my_major, my_minor, my_micro; - int rq_major, rq_minor, rq_micro; - const char *my_plvl, *rq_plvl; + if (!req_version || strverscmp (req_version, PACKAGE_VERSION) <= 0) + return PACKAGE_VERSION; - if (!req_version) - return ver; - - my_plvl = parse_version_string (ver, &my_major, &my_minor, &my_micro); - if (!my_plvl) - return NULL; /* very strange our own version is bogus */ - rq_plvl = parse_version_string (req_version, &rq_major, &rq_minor, - &rq_micro); - if (!rq_plvl) - return NULL; /* req version string is invalid */ - - if (my_major > rq_major - || (my_major == rq_major && my_minor > rq_minor) - || (my_major == rq_major && my_minor == rq_minor - && my_micro > rq_micro) - || (my_major == rq_major && my_minor == rq_minor - && my_micro == rq_micro && strcmp (my_plvl, rq_plvl) >= 0)) - { - return ver; - } return NULL; }