Back to index

tetex-bin  3.0
Defines | Functions | Variables
localcharset.c File Reference
#include "localcharset.h"
#include <stdio.h>
#include <strings.h>

Go to the source code of this file.

Defines

#define relocate(pathname)   (pathname)
#define DIRECTORY_SEPARATOR   '/'
#define ISSLASH(C)   ((C) == DIRECTORY_SEPARATOR)
#define volatile   /* empty */

Functions

static const char * get_charset_aliases ()
const char * locale_charset ()

Variables

static const char *volatile charset_aliases

Define Documentation

#define DIRECTORY_SEPARATOR   '/'

Definition at line 82 of file localcharset.c.

#define ISSLASH (   C)    ((C) == DIRECTORY_SEPARATOR)

Definition at line 86 of file localcharset.c.

#define relocate (   pathname)    (pathname)

Definition at line 73 of file localcharset.c.

#define volatile   /* empty */

Definition at line 101 of file localcharset.c.


Function Documentation

static const char* get_charset_aliases ( ) [static]

Definition at line 110 of file localcharset.c.

{
  const char *cp;

  cp = charset_aliases;
  if (cp == NULL)
    {
#if !(defined VMS || defined WIN32)
      FILE *fp;
      const char *dir = relocate (LIBDIR);
      const char *base = "charset.alias";
      char *file_name;

      /* Concatenate dir and base into freshly allocated file_name.  */
      {
       size_t dir_len = strlen (dir);
       size_t base_len = strlen (base);
       int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
       file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
       if (file_name != NULL)
         {
           memcpy (file_name, dir, dir_len);
           if (add_slash)
             file_name[dir_len] = DIRECTORY_SEPARATOR;
           memcpy (file_name + dir_len + add_slash, base, base_len + 1);
         }
      }

      if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
       /* Out of memory or file not found, treat it as empty.  */
       cp = "";
      else
       {
         /* Parse the file's contents.  */
         int c;
         char buf1[50+1];
         char buf2[50+1];
         char *res_ptr = NULL;
         size_t res_size = 0;
         size_t l1, l2;

         for (;;)
           {
             c = getc (fp);
             if (c == EOF)
              break;
             if (c == '\n' || c == ' ' || c == '\t')
              continue;
             if (c == '#')
              {
                /* Skip comment, to end of line.  */
                do
                  c = getc (fp);
                while (!(c == EOF || c == '\n'));
                if (c == EOF)
                  break;
                continue;
              }
             ungetc (c, fp);
             if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
              break;
             l1 = strlen (buf1);
             l2 = strlen (buf2);
             if (res_size == 0)
              {
                res_size = l1 + 1 + l2 + 1;
                res_ptr = (char *) malloc (res_size + 1);
              }
             else
              {
                res_size += l1 + 1 + l2 + 1;
                res_ptr = (char *) realloc (res_ptr, res_size + 1);
              }
             if (res_ptr == NULL)
              {
                /* Out of memory. */
                res_size = 0;
                break;
              }
             strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
             strcpy (res_ptr + res_size - (l2 + 1), buf2);
           }
         fclose (fp);
         if (res_size == 0)
           cp = "";
         else
           {
             *(res_ptr + res_size) = '\0';
             cp = res_ptr;
           }
       }

      if (file_name != NULL)
       free (file_name);

#else

# if defined VMS
      /* To avoid the troubles of an extra file charset.alias_vms in the
        sources of many GNU packages, simply inline the aliases here.  */
      /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
        "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
        section 10.7 "Handling Different Character Sets".  */
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
          "ISO8859-2" "\0" "ISO-8859-2" "\0"
          "ISO8859-5" "\0" "ISO-8859-5" "\0"
          "ISO8859-7" "\0" "ISO-8859-7" "\0"
          "ISO8859-8" "\0" "ISO-8859-8" "\0"
          "ISO8859-9" "\0" "ISO-8859-9" "\0"
          /* Japanese */
          "eucJP" "\0" "EUC-JP" "\0"
          "SJIS" "\0" "SHIFT_JIS" "\0"
          "DECKANJI" "\0" "DEC-KANJI" "\0"
          "SDECKANJI" "\0" "EUC-JP" "\0"
          /* Chinese */
          "eucTW" "\0" "EUC-TW" "\0"
          "DECHANYU" "\0" "DEC-HANYU" "\0"
          "DECHANZI" "\0" "GB2312" "\0"
          /* Korean */
          "DECKOREAN" "\0" "EUC-KR" "\0";
# endif

# if defined WIN32
      /* To avoid the troubles of installing a separate file in the same
        directory as the DLL and of retrieving the DLL's directory at
        runtime, simply inline the aliases here.  */

      cp = "CP936" "\0" "GBK" "\0"
          "CP1361" "\0" "JOHAB" "\0"
          "CP20127" "\0" "ASCII" "\0"
          "CP20866" "\0" "KOI8-R" "\0"
          "CP21866" "\0" "KOI8-RU" "\0"
          "CP28591" "\0" "ISO-8859-1" "\0"
          "CP28592" "\0" "ISO-8859-2" "\0"
          "CP28593" "\0" "ISO-8859-3" "\0"
          "CP28594" "\0" "ISO-8859-4" "\0"
          "CP28595" "\0" "ISO-8859-5" "\0"
          "CP28596" "\0" "ISO-8859-6" "\0"
          "CP28597" "\0" "ISO-8859-7" "\0"
          "CP28598" "\0" "ISO-8859-8" "\0"
          "CP28599" "\0" "ISO-8859-9" "\0"
          "CP28605" "\0" "ISO-8859-15" "\0";
# endif
#endif

      charset_aliases = cp;
    }

  return cp;
}

Here is the call graph for this function:

Here is the caller graph for this function:

const char* locale_charset ( void  )

Definition at line 271 of file localcharset.c.

{
  const char *codeset;
  const char *aliases;

#if !(defined WIN32 || defined OS2)

# if HAVE_LANGINFO_CODESET

  /* Most systems support nl_langinfo (CODESET) nowadays.  */
  codeset = nl_langinfo (CODESET);

# else

  /* On old systems which lack it, use setlocale or getenv.  */
  const char *locale = NULL;

  /* But most old systems don't have a complete set of locales.  Some
     (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
     use setlocale here; it would return "C" when it doesn't support the
     locale name the user has set.  */
#  if HAVE_SETLOCALE && 0
  locale = setlocale (LC_CTYPE, NULL);
#  endif
  if (locale == NULL || locale[0] == '\0')
    {
      locale = getenv ("LC_ALL");
      if (locale == NULL || locale[0] == '\0')
       {
         locale = getenv ("LC_CTYPE");
         if (locale == NULL || locale[0] == '\0')
           locale = getenv ("LANG");
       }
    }

  /* On some old systems, one used to set locale = "iso8859_1". On others,
     you set it to "language_COUNTRY.charset". In any case, we resolve it
     through the charset.alias file.  */
  codeset = locale;

# endif

#elif defined WIN32

  static char buf[2 + 10 + 1];

  /* Woe32 has a function returning the locale's codepage as a number.  */
  sprintf (buf, "CP%u", GetACP ());
  codeset = buf;

#elif defined OS2

  const char *locale;
  static char buf[2 + 10 + 1];
  ULONG cp[3];
  ULONG cplen;

  /* Allow user to override the codeset, as set in the operating system,
     with standard language environment variables.  */
  locale = getenv ("LC_ALL");
  if (locale == NULL || locale[0] == '\0')
    {
      locale = getenv ("LC_CTYPE");
      if (locale == NULL || locale[0] == '\0')
       locale = getenv ("LANG");
    }
  if (locale != NULL && locale[0] != '\0')
    {
      /* If the locale name contains an encoding after the dot, return it.  */
      const char *dot = strchr (locale, '.');

      if (dot != NULL)
       {
         const char *modifier;

         dot++;
         /* Look for the possible @... trailer and remove it, if any.  */
         modifier = strchr (dot, '@');
         if (modifier == NULL)
           return dot;
         if (modifier - dot < sizeof (buf))
           {
             memcpy (buf, dot, modifier - dot);
             buf [modifier - dot] = '\0';
             return buf;
           }
       }

      /* Resolve through the charset.alias file.  */
      codeset = locale;
    }
  else
    {
      /* OS/2 has a function returning the locale's codepage as a number.  */
      if (DosQueryCp (sizeof (cp), cp, &cplen))
       codeset = "";
      else
       {
         sprintf (buf, "CP%u", cp[0]);
         codeset = buf;
       }
    }

#endif

  if (codeset == NULL)
    /* The canonical name cannot be determined.  */
    codeset = "";

  /* Resolve alias. */
  for (aliases = get_charset_aliases ();
       *aliases != '\0';
       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
    if (strcmp (codeset, aliases) == 0
       || (aliases[0] == '*' && aliases[1] == '\0'))
      {
       codeset = aliases + strlen (aliases) + 1;
       break;
      }

  /* Don't return an empty string.  GNU libc and GNU libiconv interpret
     the empty string as denoting "the locale's character encoding",
     thus GNU libiconv would call this function a second time.  */
  if (codeset[0] == '\0')
    codeset = "ASCII";

  return codeset;
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

const char* volatile charset_aliases [static]

Definition at line 106 of file localcharset.c.