Back to index

php5  5.3.10
encoding.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) Ian F. Darwin 1986-1995.
00003  * Software written by Ian F. Darwin and others;
00004  * maintained 1995-present by Christos Zoulas and others.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice immediately at the beginning of the file, without modification,
00011  *    this list of conditions, and the following disclaimer.
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in the
00014  *    documentation and/or other materials provided with the distribution.
00015  *
00016  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00018  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00019  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
00020  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00021  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00022  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00023  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00024  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00025  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00026  * SUCH DAMAGE.
00027  */
00028 /*
00029  * Encoding -- determine the character encoding of a text file.
00030  *
00031  * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
00032  * international characters.
00033  */
00034 
00035 #include "file.h"
00036 
00037 #ifndef       lint
00038 FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
00039 #endif /* lint */
00040 
00041 #include "magic.h"
00042 #include <string.h>
00043 #include <memory.h>
00044 #include <stdlib.h>
00045 
00046 
00047 private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
00048 private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
00049     size_t *);
00050 private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
00051 private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
00052 private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
00053 private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
00054 
00055 /*
00056  * Try to determine whether text is in some character code we can
00057  * identify.  Each of these tests, if it succeeds, will leave
00058  * the text converted into one-unichar-per-character Unicode in
00059  * ubuf, and the number of characters converted in ulen.
00060  */
00061 protected int
00062 file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
00063 {
00064        size_t mlen;
00065        int rv = 1, ucs_type;
00066        unsigned char *nbuf = NULL;
00067 
00068        mlen = (nbytes + 1) * sizeof(nbuf[0]);
00069        if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) {
00070               file_oomem(ms, mlen);
00071               goto done;
00072        }
00073        mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
00074        if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) {
00075               file_oomem(ms, mlen);
00076               goto done;
00077        }
00078 
00079        *type = "text";
00080        if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
00081               *code = "ASCII";
00082               *code_mime = "us-ascii";
00083        } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
00084               *code = "UTF-8 Unicode (with BOM)";
00085               *code_mime = "utf-8";
00086        } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
00087               *code = "UTF-8 Unicode";
00088               *code_mime = "utf-8";
00089        } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
00090               if (ucs_type == 1) {
00091                      *code = "Little-endian UTF-16 Unicode";
00092                      *code_mime = "utf-16le";
00093               } else {
00094                      *code = "Big-endian UTF-16 Unicode";
00095                      *code_mime = "utf-16be";
00096               }
00097        } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
00098               *code = "ISO-8859";
00099               *code_mime = "iso-8859-1";
00100        } else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
00101               *code = "Non-ISO extended-ASCII";
00102               *code_mime = "unknown-8bit";
00103        } else {
00104               from_ebcdic(buf, nbytes, nbuf);
00105 
00106               if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
00107                      *code = "EBCDIC";
00108                      *code_mime = "ebcdic";
00109               } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
00110                      *code = "International EBCDIC";
00111                      *code_mime = "ebcdic";
00112               } else { /* Doesn't look like text at all */
00113                      rv = 0;
00114                      *type = "binary";
00115               }
00116        }
00117 
00118  done:
00119        if (nbuf)
00120               free(nbuf);
00121 
00122        return rv;
00123 }
00124 
00125 /*
00126  * This table reflects a particular philosophy about what constitutes
00127  * "text," and there is room for disagreement about it.
00128  *
00129  * Version 3.31 of the file command considered a file to be ASCII if
00130  * each of its characters was approved by either the isascii() or
00131  * isalpha() function.  On most systems, this would mean that any
00132  * file consisting only of characters in the range 0x00 ... 0x7F
00133  * would be called ASCII text, but many systems might reasonably
00134  * consider some characters outside this range to be alphabetic,
00135  * so the file command would call such characters ASCII.  It might
00136  * have been more accurate to call this "considered textual on the
00137  * local system" than "ASCII."
00138  *
00139  * It considered a file to be "International language text" if each
00140  * of its characters was either an ASCII printing character (according
00141  * to the real ASCII standard, not the above test), a character in
00142  * the range 0x80 ... 0xFF, or one of the following control characters:
00143  * backspace, tab, line feed, vertical tab, form feed, carriage return,
00144  * escape.  No attempt was made to determine the language in which files
00145  * of this type were written.
00146  *
00147  *
00148  * The table below considers a file to be ASCII if all of its characters
00149  * are either ASCII printing characters (again, according to the X3.4
00150  * standard, not isascii()) or any of the following controls: bell,
00151  * backspace, tab, line feed, form feed, carriage return, esc, nextline.
00152  *
00153  * I include bell because some programs (particularly shell scripts)
00154  * use it literally, even though it is rare in normal text.  I exclude
00155  * vertical tab because it never seems to be used in real text.  I also
00156  * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
00157  * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
00158  * character to.  It might be more appropriate to include it in the 8859
00159  * set instead of the ASCII set, but it's got to be included in *something*
00160  * we recognize or EBCDIC files aren't going to be considered textual.
00161  * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
00162  * and Latin characters, so these should possibly be allowed.  But they
00163  * make a real mess on VT100-style displays if they're not paired properly,
00164  * so we are probably better off not calling them text.
00165  *
00166  * A file is considered to be ISO-8859 text if its characters are all
00167  * either ASCII, according to the above definition, or printing characters
00168  * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
00169  *
00170  * Finally, a file is considered to be international text from some other
00171  * character code if its characters are all either ISO-8859 (according to
00172  * the above definition) or characters in the range 0x80 ... 0x9F, which
00173  * ISO-8859 considers to be control characters but the IBM PC and Macintosh
00174  * consider to be printing characters.
00175  */
00176 
00177 #define F 0   /* character never appears in text */
00178 #define T 1   /* character appears in plain ASCII text */
00179 #define I 2   /* character appears in ISO-8859 text */
00180 #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
00181 
00182 private char text_chars[256] = {
00183        /*                  BEL BS HT LF    FF CR    */
00184        F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
00185        /*                              ESC          */
00186        F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
00187        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
00188        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
00189        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
00190        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
00191        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
00192        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
00193        /*            NEL                            */
00194        X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
00195        X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
00196        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
00197        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
00198        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
00199        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
00200        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
00201        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
00202 };
00203 
00204 private int
00205 looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
00206     size_t *ulen)
00207 {
00208        size_t i;
00209 
00210        *ulen = 0;
00211 
00212        for (i = 0; i < nbytes; i++) {
00213               int t = text_chars[buf[i]];
00214 
00215               if (t != T)
00216                      return 0;
00217 
00218               ubuf[(*ulen)++] = buf[i];
00219        }
00220 
00221        return 1;
00222 }
00223 
00224 private int
00225 looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
00226 {
00227        size_t i;
00228 
00229        *ulen = 0;
00230 
00231        for (i = 0; i < nbytes; i++) {
00232               int t = text_chars[buf[i]];
00233 
00234               if (t != T && t != I)
00235                      return 0;
00236 
00237               ubuf[(*ulen)++] = buf[i];
00238        }
00239 
00240        return 1;
00241 }
00242 
00243 private int
00244 looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
00245     size_t *ulen)
00246 {
00247        size_t i;
00248 
00249        *ulen = 0;
00250 
00251        for (i = 0; i < nbytes; i++) {
00252               int t = text_chars[buf[i]];
00253 
00254               if (t != T && t != I && t != X)
00255                      return 0;
00256 
00257               ubuf[(*ulen)++] = buf[i];
00258        }
00259 
00260        return 1;
00261 }
00262 
00263 /*
00264  * Decide whether some text looks like UTF-8. Returns:
00265  *
00266  *     -1: invalid UTF-8
00267  *      0: uses odd control characters, so doesn't look like text
00268  *      1: 7-bit text
00269  *      2: definitely UTF-8 text (valid high-bit set bytes)
00270  *
00271  * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
00272  * ubuf must be big enough!
00273  */
00274 protected int
00275 file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
00276 {
00277        size_t i;
00278        int n;
00279        unichar c;
00280        int gotone = 0, ctrl = 0;
00281 
00282        if (ubuf)
00283               *ulen = 0;
00284 
00285        for (i = 0; i < nbytes; i++) {
00286               if ((buf[i] & 0x80) == 0) {    /* 0xxxxxxx is plain ASCII */
00287                      /*
00288                       * Even if the whole file is valid UTF-8 sequences,
00289                       * still reject it if it uses weird control characters.
00290                       */
00291 
00292                      if (text_chars[buf[i]] != T)
00293                             ctrl = 1;
00294 
00295                      if (ubuf)
00296                             ubuf[(*ulen)++] = buf[i];
00297               } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
00298                      return -1;
00299               } else {                       /* 11xxxxxx begins UTF-8 */
00300                      int following;
00301 
00302                      if ((buf[i] & 0x20) == 0) {        /* 110xxxxx */
00303                             c = buf[i] & 0x1f;
00304                             following = 1;
00305                      } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
00306                             c = buf[i] & 0x0f;
00307                             following = 2;
00308                      } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
00309                             c = buf[i] & 0x07;
00310                             following = 3;
00311                      } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
00312                             c = buf[i] & 0x03;
00313                             following = 4;
00314                      } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
00315                             c = buf[i] & 0x01;
00316                             following = 5;
00317                      } else
00318                             return -1;
00319 
00320                      for (n = 0; n < following; n++) {
00321                             i++;
00322                             if (i >= nbytes)
00323                                    goto done;
00324 
00325                             if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
00326                                    return -1;
00327 
00328                             c = (c << 6) + (buf[i] & 0x3f);
00329                      }
00330 
00331                      if (ubuf)
00332                             ubuf[(*ulen)++] = c;
00333                      gotone = 1;
00334               }
00335        }
00336 done:
00337        return ctrl ? 0 : (gotone ? 2 : 1);
00338 }
00339 
00340 /*
00341  * Decide whether some text looks like UTF-8 with BOM. If there is no
00342  * BOM, return -1; otherwise return the result of looks_utf8 on the
00343  * rest of the text.
00344  */
00345 private int
00346 looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
00347     size_t *ulen)
00348 {
00349        if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
00350               return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
00351        else
00352               return -1;
00353 }
00354 
00355 private int
00356 looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
00357     size_t *ulen)
00358 {
00359        int bigend;
00360        size_t i;
00361 
00362        if (nbytes < 2)
00363               return 0;
00364 
00365        if (buf[0] == 0xff && buf[1] == 0xfe)
00366               bigend = 0;
00367        else if (buf[0] == 0xfe && buf[1] == 0xff)
00368               bigend = 1;
00369        else
00370               return 0;
00371 
00372        *ulen = 0;
00373 
00374        for (i = 2; i + 1 < nbytes; i += 2) {
00375               /* XXX fix to properly handle chars > 65536 */
00376 
00377               if (bigend)
00378                      ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
00379               else
00380                      ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
00381 
00382               if (ubuf[*ulen - 1] == 0xfffe)
00383                      return 0;
00384               if (ubuf[*ulen - 1] < 128 &&
00385                   text_chars[(size_t)ubuf[*ulen - 1]] != T)
00386                      return 0;
00387        }
00388 
00389        return 1 + bigend;
00390 }
00391 
00392 #undef F
00393 #undef T
00394 #undef I
00395 #undef X
00396 
00397 /*
00398  * This table maps each EBCDIC character to an (8-bit extended) ASCII
00399  * character, as specified in the rationale for the dd(1) command in
00400  * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
00401  *
00402  * Unfortunately it does not seem to correspond exactly to any of the
00403  * five variants of EBCDIC documented in IBM's _Enterprise Systems
00404  * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
00405  * Edition, July, 1999, pp. I-1 - I-4.
00406  *
00407  * Fortunately, though, all versions of EBCDIC, including this one, agree
00408  * on most of the printing characters that also appear in (7-bit) ASCII.
00409  * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
00410  *
00411  * Fortunately too, there is general agreement that codes 0x00 through
00412  * 0x3F represent control characters, 0x41 a nonbreaking space, and the
00413  * remainder printing characters.
00414  *
00415  * This is sufficient to allow us to identify EBCDIC text and to distinguish
00416  * between old-style and internationalized examples of text.
00417  */
00418 
00419 private unsigned char ebcdic_to_ascii[] = {
00420   0,   1,   2,   3, 156,   9, 134, 127, 151, 141, 142,  11,  12,  13,  14,  15,
00421  16,  17,  18,  19, 157, 133,   8, 135,  24,  25, 146, 143,  28,  29,  30,  31,
00422 128, 129, 130, 131, 132,  10,  23,  27, 136, 137, 138, 139, 140,   5,   6,   7,
00423 144, 145,  22, 147, 148, 149, 150,   4, 152, 153, 154, 155,  20,  21, 158,  26,
00424 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
00425 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
00426 '-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
00427 186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
00428 195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
00429 202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
00430 209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
00431 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
00432 '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
00433 '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
00434 '\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
00435 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
00436 };
00437 
00438 #ifdef notdef
00439 /*
00440  * The following EBCDIC-to-ASCII table may relate more closely to reality,
00441  * or at least to modern reality.  It comes from
00442  *
00443  *   http://ftp.s390.ibm.com/products/oe/bpxqp9.html
00444  *
00445  * and maps the characters of EBCDIC code page 1047 (the code used for
00446  * Unix-derived software on IBM's 390 systems) to the corresponding
00447  * characters from ISO 8859-1.
00448  *
00449  * If this table is used instead of the above one, some of the special
00450  * cases for the NEL character can be taken out of the code.
00451  */
00452 
00453 private unsigned char ebcdic_1047_to_8859[] = {
00454 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
00455 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
00456 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
00457 0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
00458 0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
00459 0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
00460 0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
00461 0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
00462 0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
00463 0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
00464 0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
00465 0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
00466 0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
00467 0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
00468 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
00469 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
00470 };
00471 #endif
00472 
00473 /*
00474  * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
00475  */
00476 private void
00477 from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
00478 {
00479        size_t i;
00480 
00481        for (i = 0; i < nbytes; i++) {
00482               out[i] = ebcdic_to_ascii[buf[i]];
00483        }
00484 }