/* Illustration of mishandling of UNDEFINED codes by collation. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include static const wchar_t line1[] = L"\xFFFD\x000A"; static const wchar_t line2[] = L"\x000A"; static const char *locales[] = { "en_US.UTF-8", "th_TH.UTF-8", "cs_CZ.UTF-8", "ja_JP.UTF-8", "ko_KR.UTF-8", NULL }; static const char *expected_str[] = { "Expected \"greater than\" since UFFFD is UNDEFINED and should sort after U000A!", "Expected \"equal to\" since UFFFD is UNDEFINED and should be ignored!", "Expected \"equal to\" since UFFFD is UNDEFINED and should be ignored!", "Expected \"greater than\" since UFFFD is UNDEFINED and should sort after U000A!", "Expected \"greater than\" since UFFFD is UNDEFINED and should sort after U000A!" }; static const int expected_val[] = { 1, 0, 0, 1, 1 }; int main(int argc, char **argv) { wchar_t buf1[1024]; wchar_t buf2[1024]; size_t r; int o, j, i, retval; const wchar_t *p; char *lctype; char *lcollate; retval = EXIT_SUCCESS; if (!setlocale(LC_CTYPE,"en_US.UTF-8")) { fprintf(stderr, "setlocale failed\n"); return EXIT_FAILURE; } wprintf(L"%5d: %ls ", 1, line1); for (p=line1, j=6 ; *p ; p++) { if (j >= 68) { wprintf(L"\n "); j = 6; } if (((unsigned long) *p) <= 0xffffUL) { wprintf(L" U%04X", (unsigned long) *p); j += 6; } else { wprintf(L" U%08X ", (unsigned long) *p); j += 12; } } wprintf(L"\n"); wprintf(L"%5d: %ls ", 2, line2); for (p=line2, j=6 ; *p ; p++) { if (j >= 68) { wprintf(L"\n "); j = 6; } if (((unsigned long) *p) <= 0xffffUL) { wprintf(L" U%04X", (unsigned long) *p); j += 6; } else { wprintf(L" U%08X ", (unsigned long) *p); j += 12; } } wprintf(L"\n\n"); for (i=0 ; locales[i] ; i++) { if (!(lctype = setlocale(LC_CTYPE,locales[i]))) { fprintf(stderr, "setlocale failed\n"); return EXIT_FAILURE; } if (!(lcollate = setlocale(LC_COLLATE,locales[i]))) { fprintf(stderr, "setlocale failed\n"); return EXIT_FAILURE; } wprintf(L"locale: LC_CTYPE=%s LC_COLLATE=%s\n", lctype, lcollate); wprintf(L" wcsxfrm --\n"); if ((r = wcsxfrm(buf1, line1, 1024)) >= 1024) { fprintf(stderr, "wcsxfrm returned %u >= 1024\n", r); return EXIT_FAILURE; } wprintf(L"%5d:", 1); for (j=0 ; j < r ; j++) { if (j && !(j & 7)) { wprintf(L"\n "); } wprintf(L"%9x", buf1[j]); } wprintf(L"\n"); if ((r = wcsxfrm(buf2, line2, 1024)) >= 1024) { fprintf(stderr, "wcsxfrm returned %u >= 1024\n", r); return EXIT_FAILURE; } wprintf(L"%5d:", 2); for (j=0 ; j < r ; j++) { if (j && !(j & 7)) { wprintf(L"\n "); } wprintf(L"%9x", buf2[j]); } wprintf(L"\n"); o = wcscoll(line1, line2); j = wcscmp(buf1, buf2); /* wprintf(L"\nwcscoll=%d wcscmp=%d\n", o, j); */ if ((j != o) && ((j == 0) || ((j < 0) && (o >= 0)) || ((j > 0) && (o <= 0))) ) { wprintf(L"wcscoll and wcscmp(wcsxfrm,wcsxfrm) disagree!\n"); return EXIT_FAILURE; } if (o == 0) { wprintf(L" strings compare equal\n"); } else if (o < 0) { wprintf(L" string1 compares less than string2\n"); o = -1; } else { wprintf(L" string1 compares greater than string2\n"); o = 1; } if (o != expected_val[i]) { wprintf(L" %s\n", expected_str[i]); retval = EXIT_FAILURE; } wprintf(L"\n"); } return retval; }