Back to index

glibc  2.9
tst-mbrtowc.c
Go to the documentation of this file.
00001 /* Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Ulrich Drepper <drepper@redhat.com>, 2000.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.  */
00019 
00020 /* We always want assert to be fully defined.  */
00021 #undef NDEBUG
00022 #include <assert.h>
00023 #include <locale.h>
00024 #include <stdio.h>
00025 #include <stdlib.h>
00026 #include <string.h>
00027 #include <wchar.h>
00028 
00029 
00030 static int check_ascii (const char *locname);
00031 
00032 /* UTF-8 single byte feeding test for mbrtowc(),
00033    contributed by Markus Kuhn <mkuhn@acm.org>.  */
00034 static int
00035 utf8_test_1 (void)
00036 {
00037   wchar_t wc;
00038   mbstate_t s;
00039 
00040   wc = 42;                  /* arbitrary number */
00041   memset (&s, 0, sizeof (s));      /* get s into initial state */
00042   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
00043   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
00044   assert (wc == 42);        /* no value has not been stored into &wc yet */
00045   assert (mbrtowc (&wc, "\xA0", 1, &s) == 1);    /* 3nd byte processed */
00046   assert (wc == 0x2260);    /* E2 89 A0 = U+2260 (not equal) decoded correctly */
00047   assert (mbrtowc (&wc, "", 1, &s) == 0); /* test final byte processing */
00048   assert (wc == 0);         /* test final byte decoding */
00049 
00050   /* The following test is by Al Viro <aviro@redhat.com>.  */
00051   const char str[] = "\xe0\xa0\x80";
00052 
00053   wc = 42;                  /* arbitrary number */
00054   memset (&s, 0, sizeof (s));      /* get s into initial state */
00055   assert (mbrtowc (&wc, str, 1, &s) == -2);
00056   assert (mbrtowc (&wc, str + 1, 2, &s) == 2);
00057   assert (wc == 0x800);
00058 
00059   wc = 42;                  /* arbitrary number */
00060   memset (&s, 0, sizeof (s));      /* get s into initial state */
00061   assert (mbrtowc (&wc, str, 3, &s) == 3);
00062   assert (wc == 0x800);
00063 
00064   return 0;
00065 }
00066 
00067 /* Test for NUL byte processing via empty string.  */
00068 static int
00069 utf8_test_2 (void)
00070 {
00071   wchar_t wc;
00072   mbstate_t s;
00073 
00074   wc = 42;                  /* arbitrary number */
00075   memset (&s, 0, sizeof (s));      /* get s into initial state */
00076   assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
00077   assert (mbsinit (&s));
00078 
00079   wc = 42;                  /* arbitrary number */
00080   memset (&s, 0, sizeof (s));      /* get s into initial state */
00081   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
00082   assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
00083 
00084   wc = 42;                  /* arbitrary number */
00085   memset (&s, 0, sizeof (s));      /* get s into initial state */
00086   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
00087   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
00088   assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
00089 
00090   wc = 42;                  /* arbitrary number */
00091   memset (&s, 0, sizeof (s));      /* get s into initial state */
00092   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
00093   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
00094   assert (mbrtowc (&wc, "\xA0", 1, &s) == 1);    /* 3nd byte processed */
00095   assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
00096   assert (mbsinit (&s));
00097 
00098   return 0;
00099 }
00100 
00101 /* Test for NUL byte processing via NULL string.  */
00102 static int
00103 utf8_test_3 (void)
00104 {
00105   wchar_t wc;
00106   mbstate_t s;
00107 
00108   wc = 42;                  /* arbitrary number */
00109   memset (&s, 0, sizeof (s));      /* get s into initial state */
00110   assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
00111   assert (mbsinit (&s));
00112 
00113   wc = 42;                  /* arbitrary number */
00114   memset (&s, 0, sizeof (s));      /* get s into initial state */
00115   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
00116   assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
00117 
00118   wc = 42;                  /* arbitrary number */
00119   memset (&s, 0, sizeof (s));      /* get s into initial state */
00120   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
00121   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
00122   assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
00123 
00124   wc = 42;                  /* arbitrary number */
00125   memset (&s, 0, sizeof (s));      /* get s into initial state */
00126   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
00127   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
00128   assert (mbrtowc (&wc, "\xA0", 1, &s) == 1);    /* 3nd byte processed */
00129   assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
00130   assert (mbsinit (&s));
00131 
00132   return 0;
00133 }
00134 
00135 static int
00136 utf8_test (void)
00137 {
00138   const char *locale = "de_DE.UTF-8";
00139   int error = 0;
00140 
00141   if (!setlocale (LC_CTYPE, locale))
00142     {
00143       fprintf (stderr, "locale '%s' not available!\n", locale);
00144       exit (1);
00145     }
00146 
00147   error |= utf8_test_1 ();
00148   error |= utf8_test_2 ();
00149   error |= utf8_test_3 ();
00150 
00151   return error;
00152 }
00153 
00154 
00155 int
00156 main (void)
00157 {
00158   int result = 0;
00159 
00160   /* Check mapping of ASCII range for some character sets which have
00161      ASCII as a subset.  For those the wide char generated must have
00162      the same value.  */
00163   setlocale (LC_ALL, "C");
00164   result |= check_ascii (setlocale (LC_ALL, NULL));
00165 
00166   setlocale (LC_ALL, "de_DE.UTF-8");
00167   result |= check_ascii (setlocale (LC_ALL, NULL));
00168   result |= utf8_test ();
00169 
00170   setlocale (LC_ALL, "ja_JP.EUC-JP");
00171   result |= check_ascii (setlocale (LC_ALL, NULL));
00172 
00173   return result;
00174 }
00175 
00176 
00177 static int
00178 check_ascii (const char *locname)
00179 {
00180   int c;
00181   int res = 0;
00182 
00183   printf ("Testing locale \"%s\":\n", locname);
00184 
00185   for (c = 0; c <= 127; ++c)
00186     {
00187       char buf[MB_CUR_MAX];
00188       wchar_t wc = 0xffffffff;
00189       mbstate_t s;
00190       size_t n, i;
00191 
00192       for (i = 0; i < MB_CUR_MAX; ++i)
00193        buf[i] = c + i;
00194 
00195       memset (&s, '\0', sizeof (s));
00196 
00197       n = mbrtowc (&wc, buf, MB_CUR_MAX, &s);
00198       if (n == (size_t) -1)
00199        {
00200          printf ("%s: '\\x%x': encoding error\n", locname, c);
00201          ++res;
00202        }
00203       else if (n == (size_t) -2)
00204        {
00205          printf ("%s: '\\x%x': incomplete character\n", locname, c);
00206          ++res;
00207        }
00208       else if (n == 0 && c != 0)
00209        {
00210          printf ("%s: '\\x%x': 0 returned\n", locname, c);
00211          ++res;
00212        }
00213       else if (n != 0 && c == 0)
00214        {
00215          printf ("%s: '\\x%x': not 0 returned\n", locname, c);
00216          ++res;
00217        }
00218       else if (c != 0 && n != 1)
00219        {
00220          printf ("%s: '\\x%x': not 1 returned\n", locname, c);
00221          ++res;
00222        }
00223       else if (wc != (wchar_t) c)
00224        {
00225          printf ("%s: '\\x%x': wc != L'\\x%x'\n", locname, c, c);
00226          ++res;
00227        }
00228     }
00229 
00230   printf (res == 1 ? "%d error\n" : "%d errors\n", res);
00231 
00232   return res != 0;
00233 }