Back to index

glibc  2.9
iofwide.c
Go to the documentation of this file.
00001 /* Copyright (C) 1999-2003, 2005 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003 
00004    The GNU C Library is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU Lesser General Public
00006    License as published by the Free Software Foundation; either
00007    version 2.1 of the License, or (at your option) any later version.
00008 
00009    The GNU C Library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Lesser General Public License for more details.
00013 
00014    You should have received a copy of the GNU Lesser General Public
00015    License along with the GNU C Library; if not, write to the Free
00016    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00017    02111-1307 USA.
00018 
00019    As a special exception, if you link the code in this file with
00020    files compiled with a GNU compiler to produce an executable,
00021    that does not cause the resulting executable to be covered by
00022    the GNU Lesser General Public License.  This exception does not
00023    however invalidate any other reasons why the executable file
00024    might be covered by the GNU Lesser General Public License.
00025    This exception applies to code released by its copyright holders
00026    in files containing the exception.  */
00027 
00028 #include <libioP.h>
00029 #ifdef _LIBC
00030 # include <dlfcn.h>
00031 # include <wchar.h>
00032 #endif
00033 #include <assert.h>
00034 #include <stdlib.h>
00035 #include <string.h>
00036 
00037 #ifdef _LIBC
00038 # include <langinfo.h>
00039 # include <locale/localeinfo.h>
00040 # include <wcsmbs/wcsmbsload.h>
00041 # include <iconv/gconv_int.h>
00042 # include <shlib-compat.h>
00043 # include <sysdep.h>
00044 #endif
00045 
00046 
00047 /* Prototypes of libio's codecvt functions.  */
00048 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
00049                                  __mbstate_t *statep,
00050                                  const wchar_t *from_start,
00051                                  const wchar_t *from_end,
00052                                  const wchar_t **from_stop, char *to_start,
00053                                  char *to_end, char **to_stop);
00054 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
00055                                     __mbstate_t *statep, char *to_start,
00056                                     char *to_end, char **to_stop);
00057 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
00058                                 __mbstate_t *statep,
00059                                 const char *from_start,
00060                                 const char *from_end,
00061                                 const char **from_stop, wchar_t *to_start,
00062                                 wchar_t *to_end, wchar_t **to_stop);
00063 static int do_encoding (struct _IO_codecvt *codecvt);
00064 static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
00065                     const char *from_start,
00066                     const char *from_end, _IO_size_t max);
00067 static int do_max_length (struct _IO_codecvt *codecvt);
00068 static int do_always_noconv (struct _IO_codecvt *codecvt);
00069 
00070 
00071 /* The functions used in `codecvt' for libio are always the same.  */
00072 const struct _IO_codecvt __libio_codecvt =
00073 {
00074   .__codecvt_destr = NULL,         /* Destructor, never used.  */
00075   .__codecvt_do_out = do_out,
00076   .__codecvt_do_unshift = do_unshift,
00077   .__codecvt_do_in = do_in,
00078   .__codecvt_do_encoding = do_encoding,
00079   .__codecvt_do_always_noconv = do_always_noconv,
00080   .__codecvt_do_length = do_length,
00081   .__codecvt_do_max_length = do_max_length
00082 };
00083 
00084 
00085 #ifdef _LIBC
00086 const struct __gconv_trans_data __libio_translit attribute_hidden =
00087 {
00088   .__trans_fct = __gconv_transliterate
00089 };
00090 #endif
00091 
00092 
00093 /* Return orientation of stream.  If mode is nonzero try to change
00094    the orientation first.  */
00095 #undef _IO_fwide
00096 int
00097 _IO_fwide (fp, mode)
00098      _IO_FILE *fp;
00099      int mode;
00100 {
00101   /* Normalize the value.  */
00102   mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
00103 
00104 #if defined SHARED && defined _LIBC \
00105     && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
00106   if (__builtin_expect (&_IO_stdin_used == NULL, 0)
00107       && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr))
00108     /* This is for a stream in the glibc 2.0 format.  */
00109     return -1;
00110 #endif
00111 
00112   /* The orientation already has been determined.  */
00113   if (fp->_mode != 0
00114       /* Or the caller simply wants to know about the current orientation.  */
00115       || mode == 0)
00116     return fp->_mode;
00117 
00118   /* Set the orientation appropriately.  */
00119   if (mode > 0)
00120     {
00121       struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
00122 
00123       fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
00124       fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
00125 
00126       /* Get the character conversion functions based on the currently
00127         selected locale for LC_CTYPE.  */
00128 #ifdef _LIBC
00129       {
00130        /* Clear the state.  We start all over again.  */
00131        memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
00132        memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
00133 
00134        struct gconv_fcts fcts;
00135        __wcsmbs_clone_conv (&fcts);
00136        assert (fcts.towc_nsteps == 1);
00137        assert (fcts.tomb_nsteps == 1);
00138 
00139        /* The functions are always the same.  */
00140        *cc = __libio_codecvt;
00141 
00142        cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
00143        cc->__cd_in.__cd.__steps = fcts.towc;
00144 
00145        cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
00146        cc->__cd_in.__cd.__data[0].__internal_use = 1;
00147        cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
00148        cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
00149 
00150        /* XXX For now no transliteration.  */
00151        cc->__cd_in.__cd.__data[0].__trans = NULL;
00152 
00153        cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
00154        cc->__cd_out.__cd.__steps = fcts.tomb;
00155 
00156        cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
00157        cc->__cd_out.__cd.__data[0].__internal_use = 1;
00158        cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
00159        cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
00160 
00161        /* And now the transliteration.  */
00162        cc->__cd_out.__cd.__data[0].__trans
00163          = (struct __gconv_trans_data  *) &__libio_translit;
00164       }
00165 #else
00166 # ifdef _GLIBCPP_USE_WCHAR_T
00167       {
00168        /* Determine internal and external character sets.
00169 
00170           XXX For now we make our life easy: we assume a fixed internal
00171           encoding (as most sane systems have; hi HP/UX!).  If somebody
00172           cares about systems which changing internal charsets they
00173           should come up with a solution for the determination of the
00174           currently used internal character set.  */
00175        const char *internal_ccs = _G_INTERNAL_CCS;
00176        const char *external_ccs = NULL;
00177 
00178 #  ifdef HAVE_NL_LANGINFO
00179        external_ccs = nl_langinfo (CODESET);
00180 #  endif
00181        if (external_ccs == NULL)
00182          external_ccs = "ISO-8859-1";
00183 
00184        cc->__cd_in = iconv_open (internal_ccs, external_ccs);
00185        if (cc->__cd_in != (iconv_t) -1)
00186          cc->__cd_out = iconv_open (external_ccs, internal_ccs);
00187 
00188        if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
00189          {
00190            if (cc->__cd_in != (iconv_t) -1)
00191              iconv_close (cc->__cd_in);
00192            /* XXX */
00193            abort ();
00194          }
00195       }
00196 # else
00197 #  error "somehow determine this from LC_CTYPE"
00198 # endif
00199 #endif
00200 
00201       /* From now on use the wide character callback functions.  */
00202       ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
00203 
00204       /* One last twist: we get the current stream position.  The wide
00205         char streams have much more problems with not knowing the
00206         current position and so we should disable the optimization
00207         which allows the functions without knowing the position.  */
00208       fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur);
00209     }
00210 
00211   /* Set the mode now.  */
00212   fp->_mode = mode;
00213 
00214   return mode;
00215 }
00216 
00217 
00218 static enum __codecvt_result
00219 do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
00220        const wchar_t *from_start, const wchar_t *from_end,
00221        const wchar_t **from_stop, char *to_start, char *to_end,
00222        char **to_stop)
00223 {
00224   enum __codecvt_result result;
00225 
00226 #ifdef _LIBC
00227   struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
00228   int status;
00229   size_t dummy;
00230   const unsigned char *from_start_copy = (unsigned char *) from_start;
00231 
00232   codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
00233   codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
00234   codecvt->__cd_out.__cd.__data[0].__statep = statep;
00235 
00236   __gconv_fct fct = gs->__fct;
00237 #ifdef PTR_DEMANGLE
00238   if (gs->__shlib_handle != NULL)
00239     PTR_DEMANGLE (fct);
00240 #endif
00241 
00242   status = DL_CALL_FCT (fct,
00243                      (gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
00244                       (const unsigned char *) from_end, NULL,
00245                       &dummy, 0, 0));
00246 
00247   *from_stop = (wchar_t *) from_start_copy;
00248   *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
00249 
00250   switch (status)
00251     {
00252     case __GCONV_OK:
00253     case __GCONV_EMPTY_INPUT:
00254       result = __codecvt_ok;
00255       break;
00256 
00257     case __GCONV_FULL_OUTPUT:
00258     case __GCONV_INCOMPLETE_INPUT:
00259       result = __codecvt_partial;
00260       break;
00261 
00262     default:
00263       result = __codecvt_error;
00264       break;
00265     }
00266 #else
00267 # ifdef _GLIBCPP_USE_WCHAR_T
00268   size_t res;
00269   const char *from_start_copy = (const char *) from_start;
00270   size_t from_len = from_end - from_start;
00271   char *to_start_copy = to_start;
00272   size_t to_len = to_end - to_start;
00273   res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
00274               &to_start_copy, &to_len);
00275 
00276   if (res == 0 || from_len == 0)
00277     result = __codecvt_ok;
00278   else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
00279     result = __codecvt_partial;
00280   else
00281     result = __codecvt_error;
00282 
00283 # else
00284   /* Decide what to do.  */
00285   result = __codecvt_error;
00286 # endif
00287 #endif
00288 
00289   return result;
00290 }
00291 
00292 
00293 static enum __codecvt_result
00294 do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
00295            char *to_start, char *to_end, char **to_stop)
00296 {
00297   enum __codecvt_result result;
00298 
00299 #ifdef _LIBC
00300   struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
00301   int status;
00302   size_t dummy;
00303 
00304   codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
00305   codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
00306   codecvt->__cd_out.__cd.__data[0].__statep = statep;
00307 
00308   __gconv_fct fct = gs->__fct;
00309 #ifdef PTR_DEMANGLE
00310   if (gs->__shlib_handle != NULL)
00311     PTR_DEMANGLE (fct);
00312 #endif
00313 
00314   status = DL_CALL_FCT (fct,
00315                      (gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
00316                       NULL, &dummy, 1, 0));
00317 
00318   *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
00319 
00320   switch (status)
00321     {
00322     case __GCONV_OK:
00323     case __GCONV_EMPTY_INPUT:
00324       result = __codecvt_ok;
00325       break;
00326 
00327     case __GCONV_FULL_OUTPUT:
00328     case __GCONV_INCOMPLETE_INPUT:
00329       result = __codecvt_partial;
00330       break;
00331 
00332     default:
00333       result = __codecvt_error;
00334       break;
00335     }
00336 #else
00337 # ifdef _GLIBCPP_USE_WCHAR_T
00338   size_t res;
00339   char *to_start_copy = (char *) to_start;
00340   size_t to_len = to_end - to_start;
00341 
00342   res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
00343 
00344   if (res == 0)
00345     result = __codecvt_ok;
00346   else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
00347     result = __codecvt_partial;
00348   else
00349     result = __codecvt_error;
00350 # else
00351   /* Decide what to do.  */
00352   result = __codecvt_error;
00353 # endif
00354 #endif
00355 
00356   return result;
00357 }
00358 
00359 
00360 static enum __codecvt_result
00361 do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
00362        const char *from_start, const char *from_end, const char **from_stop,
00363        wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
00364 {
00365   enum __codecvt_result result;
00366 
00367 #ifdef _LIBC
00368   struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
00369   int status;
00370   size_t dummy;
00371   const unsigned char *from_start_copy = (unsigned char *) from_start;
00372 
00373   codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_start;
00374   codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) to_end;
00375   codecvt->__cd_in.__cd.__data[0].__statep = statep;
00376 
00377   __gconv_fct fct = gs->__fct;
00378 #ifdef PTR_DEMANGLE
00379   if (gs->__shlib_handle != NULL)
00380     PTR_DEMANGLE (fct);
00381 #endif
00382 
00383   status = DL_CALL_FCT (fct,
00384                      (gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
00385                       (const unsigned char *) from_end, NULL,
00386                       &dummy, 0, 0));
00387 
00388   *from_stop = (const char *) from_start_copy;
00389   *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
00390 
00391   switch (status)
00392     {
00393     case __GCONV_OK:
00394     case __GCONV_EMPTY_INPUT:
00395       result = __codecvt_ok;
00396       break;
00397 
00398     case __GCONV_FULL_OUTPUT:
00399     case __GCONV_INCOMPLETE_INPUT:
00400       result = __codecvt_partial;
00401       break;
00402 
00403     default:
00404       result = __codecvt_error;
00405       break;
00406     }
00407 #else
00408 # ifdef _GLIBCPP_USE_WCHAR_T
00409   size_t res;
00410   const char *from_start_copy = (const char *) from_start;
00411   size_t from_len = from_end - from_start;
00412   char *to_start_copy = (char *) from_start;
00413   size_t to_len = to_end - to_start;
00414 
00415   res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
00416               &to_start_copy, &to_len);
00417 
00418   if (res == 0)
00419     result = __codecvt_ok;
00420   else if (to_len == 0)
00421     result = __codecvt_partial;
00422   else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
00423     result = __codecvt_partial;
00424   else
00425     result = __codecvt_error;
00426 # else
00427   /* Decide what to do.  */
00428   result = __codecvt_error;
00429 # endif
00430 #endif
00431 
00432   return result;
00433 }
00434 
00435 
00436 static int
00437 do_encoding (struct _IO_codecvt *codecvt)
00438 {
00439 #ifdef _LIBC
00440   /* See whether the encoding is stateful.  */
00441   if (codecvt->__cd_in.__cd.__steps[0].__stateful)
00442     return -1;
00443   /* Fortunately not.  Now determine the input bytes for the conversion
00444      necessary for each wide character.  */
00445   if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
00446       != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
00447     /* Not a constant value.  */
00448     return 0;
00449 
00450   return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
00451 #else
00452   /* Worst case scenario.  */
00453   return -1;
00454 #endif
00455 }
00456 
00457 
00458 static int
00459 do_always_noconv (struct _IO_codecvt *codecvt)
00460 {
00461   return 0;
00462 }
00463 
00464 
00465 static int
00466 do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
00467           const char *from_start, const char *from_end, _IO_size_t max)
00468 {
00469   int result;
00470 #ifdef _LIBC
00471   const unsigned char *cp = (const unsigned char *) from_start;
00472   wchar_t to_buf[max];
00473   struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
00474   int status;
00475   size_t dummy;
00476 
00477   codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_buf;
00478   codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) &to_buf[max];
00479   codecvt->__cd_in.__cd.__data[0].__statep = statep;
00480 
00481   __gconv_fct fct = gs->__fct;
00482 #ifdef PTR_DEMANGLE
00483   if (gs->__shlib_handle != NULL)
00484     PTR_DEMANGLE (fct);
00485 #endif
00486 
00487   status = DL_CALL_FCT (fct,
00488                      (gs, codecvt->__cd_in.__cd.__data, &cp,
00489                       (const unsigned char *) from_end, NULL,
00490                       &dummy, 0, 0));
00491 
00492   result = cp - (const unsigned char *) from_start;
00493 #else
00494 # ifdef _GLIBCPP_USE_WCHAR_T
00495   const char *from_start_copy = (const char *) from_start;
00496   size_t from_len = from_end - from_start;
00497   wchar_t to_buf[max];
00498   size_t res;
00499   char *to_start = (char *) to_buf;
00500 
00501   res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
00502               &to_start, &max);
00503 
00504   result = from_start_copy - (char *) from_start;
00505 # else
00506   /* Decide what to do.  */
00507   result = 0;
00508 # endif
00509 #endif
00510 
00511   return result;
00512 }
00513 
00514 
00515 static int
00516 do_max_length (struct _IO_codecvt *codecvt)
00517 {
00518 #ifdef _LIBC
00519   return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
00520 #else
00521   return MB_CUR_MAX;
00522 #endif
00523 }