From 8defa2ad87d9a58488a894f91fbddfebefe3cc5f Mon Sep 17 00:00:00 2001 From: Marko Kreen Date: Sun, 6 Jan 2013 12:48:38 +0200 Subject: [PATCH] wchar: compat mbsnrtowcs(), tests --- test/Makefile | 1 + test/force_compat.sed | 1 + test/test_common.c | 1 + test/test_common.h | 1 + test/test_wchar.c | 139 ++++++++++++++++++++++++++++++++++++++++++ usual/wchar.c | 67 ++++++++++++++++++-- usual/wchar.h | 5 ++ 7 files changed, 209 insertions(+), 6 deletions(-) create mode 100644 test/test_wchar.c diff --git a/test/Makefile b/test/Makefile index dc8d339..af63339 100644 --- a/test/Makefile +++ b/test/Makefile @@ -14,6 +14,7 @@ regtest_system_SOURCES = \ test_cfparser.c test_endian.c test_hashtab.c test_mdict.c \ test_shlist.c test_time.c test_hashing.c test_fileutil.c \ test_socket.c test_getopt.c test_ctype.c test_fnmatch.c \ + test_wchar.c \ test_common.h tinytest.h tinytest_macros.h # build regtest_system against actual library diff --git a/test/force_compat.sed b/test/force_compat.sed index 33430da..dc3033e 100644 --- a/test/force_compat.sed +++ b/test/force_compat.sed @@ -11,3 +11,4 @@ /^#define.*GETOPT/s,.*,/* & */, /^#define.*CTYPE_ON_CHAR/s,.*,/* & */, /^#define.*FNMATCH/s,.*,/* & */, +/^#define.*MBSNRTOWCS/s,.*,/* & */, diff --git a/test/test_common.c b/test/test_common.c index 5c407cc..dca1520 100644 --- a/test/test_common.c +++ b/test/test_common.c @@ -12,6 +12,7 @@ struct testgroup_t groups[] = { { "hashing/", hashing_tests }, { "endian/", endian_tests }, { "string/", string_tests }, + { "wchar/", wchar_tests }, { "fnmatch/", fnmatch_tests }, { "ctype/", ctype_tests }, { "heap/", heap_tests }, diff --git a/test/test_common.h b/test/test_common.h index 375fa71..a9074b8 100644 --- a/test/test_common.h +++ b/test/test_common.h @@ -34,3 +34,4 @@ extern struct testcase_t socket_tests[]; extern struct testcase_t getopt_tests[]; extern struct testcase_t ctype_tests[]; extern struct testcase_t fnmatch_tests[]; +extern struct testcase_t wchar_tests[]; diff --git a/test/test_wchar.c b/test/test_wchar.c new file mode 100644 index 0000000..df87b45 --- /dev/null +++ b/test/test_wchar.c @@ -0,0 +1,139 @@ + +#include +#include + +#include "test_common.h" + + +/* + * mbstr_decode() + */ + +static const char *decode(const char *s, int inbuf) +{ + static char out[128]; + wchar_t tmp[128]; + wchar_t *res; + int reslen = 4; + unsigned i; + + for (i = 0; i < 128; i++) + tmp[i] = '~'; + + res = mbstr_decode(s, inbuf, &reslen, tmp, sizeof(tmp), true); + if (res == NULL) { + if (errno == EILSEQ) return "EILSEQ"; + if (errno == ENOMEM) return "ENOMEM"; + return "NULL??"; + } + if (res != tmp) + return "EBUF"; + if (res[reslen] == 0) + res[reslen] = 'Z'; + else + return "reslen fail?"; + + for (i = 0; i < 128; i++) { + out[i] = tmp[i]; + if (out[i] == '~') { + out[i+1] = 0; + break; + } else if (out[i] == 0) { + out[i] = '#'; + } else if (tmp[i] > 127) { + out[i] = 'A' + tmp[i] % 26; + } + } + return out; +} + +static void test_mbstr_decode(void *p) +{ + str_check(decode("", 0), "Z~"); + str_check(decode("", 1), "Z~"); + str_check(decode("a", 0), "Z~"); + + str_check(decode("abc", 0), "Z~"); + str_check(decode("abc", 1), "aZ~"); + str_check(decode("abc", 2), "abZ~"); + str_check(decode("abc", 3), "abcZ~"); + str_check(decode("abc", 4), "abcZ~"); + str_check(decode("abc", 5), "abcZ~"); + + if (MB_CUR_MAX > 1) { + str_check(decode("aa\200cc", 5), "aaYccZ~"); + str_check(decode("a\200cc", 5), "aYccZ~"); + str_check(decode("aa\200c", 5), "aaYcZ~"); + } +end:; +} + +/* + * mbsnrtowcs() + */ + + +static const char *mbsnr(const char *str, int inbuf, int outbuf) +{ + static char out[128]; + wchar_t tmp[128]; + int res; + unsigned i; + const char *s = str; + mbstate_t ps; + + for (i = 0; i < 128; i++) + tmp[i] = '~'; + + memset(&ps, 0, sizeof(ps)); + res = mbsnrtowcs(tmp, &s, inbuf, outbuf, &ps); + if (res < 0) { + if (errno == EILSEQ) { + snprintf(out, sizeof(out), "EILSEQ(%d)", (int)(s - str)); + return out; + } + return "unknown error"; + } + if (tmp[res] == 0) + tmp[res] = s ? 'z' : 'Z'; + + for (i = 0; i < 128; i++) { + out[i] = tmp[i]; + if (out[i] == '~') { + out[i+1] = 0; + break; + } + } + return out; +} + +static void test_mbsnrtowcs(void *p) +{ + str_check(mbsnr("", 1, 1), "Z~"); + str_check(mbsnr("", 0, 0), "~"); + str_check(mbsnr("", 0, 1), "~"); /* XXX */ + str_check(mbsnr("", 1, 0), "~"); + + str_check(mbsnr("x", 1, 1), "x~"); + str_check(mbsnr("x", 0, 0), "~"); + str_check(mbsnr("x", 0, 1), "~"); /* XXX */ + str_check(mbsnr("x", 1, 0), "~"); + + str_check(mbsnr("abc", 3, 3), "abc~"); + str_check(mbsnr("abc", 3, 4), "abc~"); /* XXX */ + + str_check(mbsnr("abc", 4, 3), "abc~"); + str_check(mbsnr("abc", 4, 4), "abcZ~"); +end:; +} + +/* + * Describe + */ + +struct testcase_t wchar_tests[] = { + { "mbsnrtowcs", test_mbsnrtowcs }, + { "mbstr_decode", test_mbstr_decode }, + END_OF_TESTCASES +}; + diff --git a/usual/wchar.c b/usual/wchar.c index fbed966..640ce76 100644 --- a/usual/wchar.c +++ b/usual/wchar.c @@ -44,17 +44,18 @@ wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p, return NULL; } -#ifdef HAVE_MBSNRTOWCS /* try full decode at once */ s = str; memset(&ps, 0, sizeof(ps)); clen = mbsnrtowcs(dst, &s, str_len, wmax, &ps); - if (clen > 0 && s == NULL) { + if (clen >= 0) { if (wlen_p) *wlen_p = clen; + dst[clen] = 0; return dst; } -#endif + if (!allow_invalid) + goto fail; /* full decode failed, decode chars one-by-one */ s = str; @@ -64,29 +65,34 @@ wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p, while (s < str_end && w < wend) { clen = mbrtowc(w, s, str_end - s, &ps); if (clen > 0) { + /* single char */ w++; s += clen; + } else if (clen == 0) { + /* string end */ + break; } else if (allow_invalid) { /* allow invalid encoding */ memset(&ps, 0, sizeof(ps)); *w++ = (unsigned char)*s++; } else { + /* invalid encoding */ goto fail; } } - if (s != str_end) + /* make sure we got string end */ + if (s < str_end && *s != '\0') goto fail; *w = 0; - if (wlen_p != NULL) + if (wlen_p) *wlen_p = w - dst; return dst; fail: if (dst != wbuf) free(dst); - errno = EILSEQ; return NULL; } @@ -107,3 +113,52 @@ wctype_t wctype_wcsn(const wchar_t *name, unsigned int namelen) return wctype(buf); } +#ifndef HAVE_MBSNRTOWCS + +size_t mbsnrtowcs(wchar_t *dst, const char **src_p, size_t srclen, size_t dstlen, mbstate_t *ps) +{ + int clen; + const char *s, *s_end; + wchar_t *w; + mbstate_t pstmp; + size_t count = 0; + + if (!ps) { + memset(&pstmp, 0, sizeof(pstmp)); + ps = &pstmp; + } + + s = *src_p; + s_end = s + srclen; + w = dst; + while (s < s_end) { + if (w && count >= dstlen) { + /* dst is full */ + break; + } + clen = mbrtowc(w, s, s_end - s, ps); + if (clen > 0) { + /* proper character */ + if (w) + w++; + count++; + s += clen; + } else if (clen < 0) { + /* invalid encoding */ + *src_p = s; + return (size_t)(-1); + } else { + /* end of string */ + if (w) + *w = 0; + *src_p = NULL; + return count; + } + } + /* end due to srclen */ + *src_p = s; + return count; +} + +#endif + diff --git a/usual/wchar.h b/usual/wchar.h index 1d06a84..38c98f5 100644 --- a/usual/wchar.h +++ b/usual/wchar.h @@ -28,4 +28,9 @@ wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p, wchar_t *wbuf, wctype_t wctype_wcsn(const wchar_t *name, unsigned int namelen); +#ifndef HAVE_MBSNRTOWCS +#define mbsnrtowcs(a,b,c,d,e) usual_mbsnrtowcs(a,b,c,d,e) +size_t mbsnrtowcs(wchar_t *dst, const char **src_p, size_t srclen, size_t dstlen, mbstate_t *ps); +#endif + #endif -- 2.39.5