Back to index

lightning-sunbird  0.9+nobinonly
Defines | Functions
utf8.c File Reference
#include "seccomon.h"
#include "secport.h"

Go to the source code of this file.

Defines

#define L_0   0
#define L_1   1
#define L_2   2
#define L_3   3
#define H_0   0
#define H_1   1
#define BAD_UTF8   ((PRUint32)-1)

Functions

static PRUint32 sec_port_read_utf8 (unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
 sec_port_ucs4_utf8_conversion_function (PRBool toUnicode, unsigned char *inBuf, unsigned int inBufLen, unsigned char *outBuf, unsigned int maxOutBufLen, unsigned int *outBufLen)
 sec_port_ucs2_utf8_conversion_function (PRBool toUnicode, unsigned char *inBuf, unsigned int inBufLen, unsigned char *outBuf, unsigned int maxOutBufLen, unsigned int *outBufLen)
PRBool sec_port_iso88591_utf8_conversion_function (const unsigned char *inBuf, unsigned int inBufLen, unsigned char *outBuf, unsigned int maxOutBufLen, unsigned int *outBufLen)

Define Documentation

#define BAD_UTF8   ((PRUint32)-1)

Definition at line 106 of file utf8.c.

#define H_0   0

Definition at line 103 of file utf8.c.

#define H_1   1

Definition at line 104 of file utf8.c.

#define L_0   0

Definition at line 99 of file utf8.c.

#define L_1   1

Definition at line 100 of file utf8.c.

#define L_2   2

Definition at line 101 of file utf8.c.

#define L_3   3

Definition at line 102 of file utf8.c.


Function Documentation

PRBool sec_port_iso88591_utf8_conversion_function ( const unsigned char *  inBuf,
unsigned int  inBufLen,
unsigned char *  outBuf,
unsigned int  maxOutBufLen,
unsigned int outBufLen 
)

Definition at line 439 of file utf8.c.

{
  unsigned int i, len = 0;

  PORT_Assert((unsigned int *)NULL != outBufLen);

  for( i = 0; i < inBufLen; i++) {
    if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
    else len += 2;
  }

  if( len > maxOutBufLen ) {
    *outBufLen = len;
    return PR_FALSE;
  }

  len = 0;

  for( i = 0; i < inBufLen; i++) {
    if( (inBuf[i] & 0x80) == 0x00 ) {
      /* 00-7F -> 0xxxxxxx */
      /* 0abcdefg -> 0abcdefg */

      outBuf[len] = inBuf[i];
      len += 1;
    } else {
      /* 80-FF <- 110xxxxx 10xxxxxx */
      /* 00000000 abcdefgh -> 110000ab 10cdefgh */

      outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
      outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);

      len += 2;
    }
  }

  *outBufLen = len;
  return PR_TRUE;
}

Here is the caller graph for this function:

static PRUint32 sec_port_read_utf8 ( unsigned int index,
unsigned char *  inBuf,
unsigned int  inBufLen 
) [static]

Definition at line 123 of file utf8.c.

{
  PRUint32 result;
  unsigned int i = *index;
  int bytes_left;
  PRUint32 min_value;

  PORT_Assert(i < inBufLen);

  if ( (inBuf[i] & 0x80) == 0x00 ) {
    result = inBuf[i++];
    bytes_left = 0;
    min_value = 0;
  } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
    result = inBuf[i++] & 0x1F;
    bytes_left = 1;
    min_value = 0x80;
  } else if ( (inBuf[i] & 0xF0) == 0xE0) {
    result = inBuf[i++] & 0x0F;
    bytes_left = 2;
    min_value = 0x800;
  } else if ( (inBuf[i] & 0xF8) == 0xF0) {
    result = inBuf[i++] & 0x07;
    bytes_left = 3;
    min_value = 0x10000;
  } else {
    return BAD_UTF8;
  }

  while (bytes_left--) {
    if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
    result = (result << 6) | (inBuf[i++] & 0x3F);
  }

  /* Check for overlong sequences, surrogates, and outside unicode range */
  if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
    return BAD_UTF8;
  }

  *index = i;
  return result;
}

Here is the caller graph for this function:

sec_port_ucs2_utf8_conversion_function ( PRBool  toUnicode,
unsigned char *  inBuf,
unsigned int  inBufLen,
unsigned char *  outBuf,
unsigned int  maxOutBufLen,
unsigned int outBufLen 
)

Definition at line 292 of file utf8.c.

{
  PORT_Assert((unsigned int *)NULL != outBufLen);

  if( toUnicode ) {
    unsigned int i, len = 0;

    for( i = 0; i < inBufLen; ) {
      if( (inBuf[i] & 0x80) == 0x00 ) {
        i += 1;
        len += 2;
      } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
        i += 2;
        len += 2;
      } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
        i += 3;
        len += 2;
      } else if( (inBuf[i] & 0xF8) == 0xF0 ) { 
        i += 4;
        len += 4;
      } else return PR_FALSE;
    }

    if( len > maxOutBufLen ) {
      *outBufLen = len;
      return PR_FALSE;
    }

    len = 0;

    for( i = 0; i < inBufLen; ) {
      PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);

      if (ucs4 == BAD_UTF8) return PR_FALSE;

      if( ucs4 < 0x10000) {
        outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
        outBuf[len+H_1] = (unsigned char)ucs4;
        len += 2;
      } else {
       ucs4 -= 0x10000;
        outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
        outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
        outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
        outBuf[len+2+H_1] = (unsigned char)ucs4;
       len += 4;
      }
    }

    *outBufLen = len;
    return PR_TRUE;
  } else {
    unsigned int i, len = 0;
    PORT_Assert((inBufLen % 2) == 0);
    if ((inBufLen % 2) != 0) {
      *outBufLen = 0;
      return PR_FALSE;
    }

    for( i = 0; i < inBufLen; i += 2 ) {
      if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
      else if( inBuf[i+H_0] < 0x08 ) len += 2;
      else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
        if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
          i += 2;
          len += 4;
        } else {
          return PR_FALSE;
        }
      }
      else len += 3;
    }

    if( len > maxOutBufLen ) {
      *outBufLen = len;
      return PR_FALSE;
    }

    len = 0;

    for( i = 0; i < inBufLen; i += 2 ) {
      if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
        /* 0000-007F -> 0xxxxxx */
        /* 00000000 0abcdefg -> 0abcdefg */

        outBuf[len] = inBuf[i+H_1] & 0x7F;

        len += 1;
      } else if( inBuf[i+H_0] < 0x08 ) {
        /* 0080-07FF -> 110xxxxx 10xxxxxx */
        /* 00000abc defghijk -> 110abcde 10fghijk */

        outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) 
                             | ((inBuf[i+H_1] & 0xC0) >> 6);
        outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);

        len += 2;
      } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
        int abcde, BCDE;

        PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));

        /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
        /* 110110BC DEfghijk 110111lm nopqrstu ->
           { Let abcde = BCDE + 1 }
           11110abc 10defghi 10jklmno 10pqrstu */

        BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
        abcde = BCDE + 1;

        outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
        outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) 
                             | ((inBuf[i+0+H_1] & 0x3C) >> 2);
        outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
                             | ((inBuf[i+2+H_0] & 0x03) << 2)
                             | ((inBuf[i+2+H_1] & 0xC0) >> 6);
        outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);

        i += 2;
        len += 4;
      } else {
        /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
        /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */

        outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
        outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) 
                             | ((inBuf[i+H_1] & 0xC0) >> 6);
        outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);

        len += 3;
      }
    }

    *outBufLen = len;
    return PR_TRUE;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

sec_port_ucs4_utf8_conversion_function ( PRBool  toUnicode,
unsigned char *  inBuf,
unsigned int  inBufLen,
unsigned char *  outBuf,
unsigned int  maxOutBufLen,
unsigned int outBufLen 
)

Definition at line 168 of file utf8.c.

{
  PORT_Assert((unsigned int *)NULL != outBufLen);

  if( toUnicode ) {
    unsigned int i, len = 0;

    for( i = 0; i < inBufLen; ) {
      if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
      else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
      else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
      else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
      else return PR_FALSE;

      len += 4;
    }

    if( len > maxOutBufLen ) {
      *outBufLen = len;
      return PR_FALSE;
    }

    len = 0;

    for( i = 0; i < inBufLen; ) {
      PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);

      if (ucs4 == BAD_UTF8) return PR_FALSE;
           
      outBuf[len+L_0] = 0x00;
      outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
      outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
      outBuf[len+L_3] = (unsigned char)ucs4;

      len += 4;
    }

    *outBufLen = len;
    return PR_TRUE;
  } else {
    unsigned int i, len = 0;
    PORT_Assert((inBufLen % 4) == 0);
    if ((inBufLen % 4) != 0) {
      *outBufLen = 0;
      return PR_FALSE;
    }

    for( i = 0; i < inBufLen; i += 4 ) {
      if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
       *outBufLen = 0;
       return PR_FALSE;
      } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
      else if( inBuf[i+L_2] >= 0x08 ) len += 3;
      else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
      else len += 1;
    }

    if( len > maxOutBufLen ) {
      *outBufLen = len;
      return PR_FALSE;
    }

    len = 0;

    for( i = 0; i < inBufLen; i += 4 ) {
      if( inBuf[i+L_1] >= 0x01 ) {
        /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
        /* 00000000 000abcde fghijklm nopqrstu ->
           11110abc 10defghi 10jklmno 10pqrstu */

        outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
        outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
                             | ((inBuf[i+L_2] & 0xF0) >> 4);
        outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
                             | ((inBuf[i+L_3] & 0xC0) >> 6);
        outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);

        len += 4;
      } else if( inBuf[i+L_2] >= 0x08 ) {
        /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
        /* 00000000 00000000 abcdefgh ijklmnop ->
           1110abcd 10efghij 10klmnop */

        outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
        outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
                             | ((inBuf[i+L_3] & 0xC0) >> 6);
        outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);

        len += 3;
      } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
        /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
        /* 00000000 00000000 00000abc defghijk ->
           110abcde 10fghijk */

        outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
                             | ((inBuf[i+L_3] & 0xC0) >> 6);
        outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);

        len += 2;
      } else {
        /* 0000 0000-0000 007F -> 0xxxxxx */
        /* 00000000 00000000 00000000 0abcdefg ->
           0abcdefg */

        outBuf[len+0] = (inBuf[i+L_3] & 0x7F);

        len += 1;
      }
    }
                            
    *outBufLen = len;
    return PR_TRUE;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function: