Back to index

texmacs  1.0.7.15
Functions
converter.cpp File Reference
#include "converter.hpp"
#include "convert.hpp"
#include <errno.h>

Go to the source code of this file.

Functions

 RESOURCE_CODE (converter)
void operator<< (converter c, string str)
string apply (converter c, string str)
string flush (converter c)
converter load_converter (string from, string to)
bool check_encoding (string input, string encoding)
string convert (string input, string from, string to)
string convert_to_cork (string input, string from)
string convert_from_cork (string input, string to)
string utf8_to_cork (string input)
string cork_to_utf8 (string input)
string t2a_to_utf8 (string input)
string utf8_to_html (string input)
bool check_using_iconv (string input, string encoding)
string convert_using_iconv (string input, string from, string to)
void put_prefix_code (string key, string value, hashtree< char, string > tree)
hashtree< char, stringfind_node (string key, hashtree< char, string > ht)
void hashtree_from_dictionary (hashtree< char, string > dic, string file_name, escape_type key_escape, escape_type val_escape, bool reverse)
bool is_hex_digit (char c)
int hex_digit_to_int (unsigned char c)
string convert_escapes (string in, bool utf8)
string convert_char_entities (string s)
static unsigned int as_unsigned_int (string s)
string convert_char_entity (string s, int &start, bool &success)
string encode_as_utf8 (unsigned int code)
unsigned int decode_from_utf8 (string s, int &i)
string utf8_to_hex_entities (string s)

Function Documentation

string apply ( converter  c,
string  str 
)

Definition at line 33 of file converter.cpp.

                                {
  c->output = string();
  c << str;
  return flush(c);
}

Here is the call graph for this function:

static unsigned int as_unsigned_int ( string  s) [static]

Definition at line 481 of file converter.cpp.

                           {
  int i=0, n=N(s);
  unsigned int val=0;
  if (n==0) return 0;
  while (i<n) {
    if (s[i]<'0') break;
    if (s[i]>'9') break;
    val *= 10;
    val += (int) (s[i]-'0');
    i++;
  }
  return val;
}

Here is the call graph for this function:

Here is the caller graph for this function:

bool check_encoding ( string  input,
string  encoding 
)

Definition at line 146 of file converter.cpp.

                                               {
  if (encoding == "Cork") return true;
  else return check_using_iconv (input, encoding);
}

Here is the call graph for this function:

Here is the caller graph for this function:

bool check_using_iconv ( string  input,
string  encoding 
)

Definition at line 323 of file converter.cpp.

                                                       {
#ifdef USE_ICONV
  iconv_converter conv (encoding, encoding, false);
  apply (conv, input);
  return conv.is_successful();
#else
  (void) input;
  (void) encoding;
  FAILED ("iconv not enabled");
  return false;
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

string convert ( string  input,
string  from,
string  to 
)

Definition at line 152 of file converter.cpp.

                                               {
  if (from == "Cork")
    return convert_from_cork (input, to);
  else if (to == "Cork")
    return convert_to_cork (input,from);
  else
    return convert_using_iconv (input, from, to);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 464 of file converter.cpp.

                                 {
  int i, n=N(s);
  string r;
  for (i=0; i<n; /* noop */) {
    if (s[i] == '&' && i+1<n && s[i+1] == '#') {
      i += 2;
      bool okay= false;
      string rr= convert_char_entity(s, i, okay);
      if (okay) r << rr;
      else { r << "&#"; continue; }
    }
    else r << s[i++];
  }
  return r;
}

Here is the call graph for this function:

Here is the caller graph for this function:

string convert_char_entity ( string  s,
int &  start,
bool &  success 
)

Definition at line 496 of file converter.cpp.

                                                          {
  // start: position in s after the character entity marker "&#".
  success = false;
  int i= start;
  int n= N(s);
  unsigned int num= 0;
  if (i >= n) return "";
  else if (s[i] == 'x' || s[i] == 'X') {
    i++;
    // int j=i;
    while (i<n && is_hex_digit (s[i])) {
      success = true;
      num = 0x10 * num + hex_digit_to_int(s[i]);
      i++;
    }
    // if (success) cout << "hex-ent: " << s(j,i) ;
  }
  else {
    int j=i;
    while (i<n && is_digit (s[i])) {
      success = true;
      i++;
    }
    // if (success) cout << "dec-ent: " << s(j,i) ;
    num = as_unsigned_int (s(j,i));
  }
  if (success) {
    if (i<n && s[i]==';') i++;
    start= i;
    // cout << " --> (" << num << ") " << encode_as_utf8 (num) << '\n' ;
    return encode_as_utf8(num);
  }
  else return "";
}

Here is the call graph for this function:

Here is the caller graph for this function:

string convert_escapes ( string  in,
bool  utf8 
)

Definition at line 442 of file converter.cpp.

                                       {
  // cout << "converting " << in ;
  string result;
  int i = 0;
  while (i < N(in)) {
    if (in[i]!='#') result << in[i++];
    else {
      i++;
      unsigned int num = 0;
      while (i < N(in) && is_hex_digit(in[i]))
        num = 0x10 * num + hex_digit_to_int((unsigned char) in[i++]);
      //cout << " to num "; printf("%x",num); cout << " then to ";
      if (utf8) result << encode_as_utf8 (num);
      else result << string((char)num);
    }
  }
  //for(int i = 0; i < N(result);i++)
  //  printf("%x ", (unsigned char)result[i]); printf("\n");
  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

string convert_from_cork ( string  input,
string  to 
)

Definition at line 170 of file converter.cpp.

                                            {
  string str = cork_to_utf8 (input);
  if (to != "UTF-8")
    str = convert_using_iconv (str, "UTF-8", to);
  return str;
}

Here is the call graph for this function:

Here is the caller graph for this function:

string convert_to_cork ( string  input,
string  from 
)

Definition at line 162 of file converter.cpp.

                                            {
  string str;
  if (from != "UTF-8")
    str = convert_using_iconv (input, from, "UTF-8");
  return utf8_to_cork (str);
}

Here is the call graph for this function:

Here is the caller graph for this function:

string convert_using_iconv ( string  input,
string  from,
string  to 
)

Definition at line 337 of file converter.cpp.

                                                           {
#ifdef USE_ICONV
  iconv_converter conv (from, to, true);
  return apply (conv, input);
#else
  (void) input;
  (void) from;
  (void) to;
  FAILED ("iconv not enabled");
  return "";
#endif
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 195 of file converter.cpp.

                            {
  converter conv= load_converter ("Cork", "UTF-8");
  int start= 0, i, n= N(input);
  string r;
  for (i=0; i<n; i++)
    if (input[i] == '<' && i+1<n && input[i+1] == '#') {
      r << apply (conv, input (start, i));
      start= i= i+2;
      while (i<n && input[i] != '>') i++;
      r << encode_as_utf8 (from_hexadecimal (input (start, i)));
      start= i+1;
    }
  r << apply (conv, input (start, n));
  return r;
}

Here is the call graph for this function:

Here is the caller graph for this function:

unsigned int decode_from_utf8 ( string  s,
int &  i 
)

Definition at line 565 of file converter.cpp.

                                    {
  unsigned char c = s[i];
  if ((0x80 & c) == 0) {
    // 0x0ddddddd
    i++;
    return (unsigned int) c;
  }
  unsigned int code;
  int trail;
  if ((0xE0 & c) == 0xC0) {
    // 0x110ddddd 0x10dddddd
    trail = 1;
    code = c & 0x1F;
  }
  else if ((0xF0 & c) == 0xE0) {
    // 0x1110dddd 0x10dddddd 0x10dddddd
    trail = 2;
    code = c & 0x0F;
  }
  else if ((0xF8 & c) == 0xF0) {
    // 0x11110dddd 0x10dddddd 0x10dddddd 0x10dddddd
    trail = 3;
    code = c & 0x07;
  }
  else {
    // failsafe
    //cout << "failsafe: " << c << " (" << (unsigned int)(c) << ")\n";
    i++;
    return (unsigned int) c;
  }
  for (; trail > 0; trail--) {
    i++;
    if (i >= N(s)) i= N(s)-1;
    c = s[i];
    code = (code << 6) | (c & 0x3F);
  }
  i++;
  return code;
}

Here is the call graph for this function:

Here is the caller graph for this function:

string encode_as_utf8 ( unsigned int  code)

Definition at line 532 of file converter.cpp.

                                   {
  if (/* 0x0 <= code && */ code <= 0x7F) {
    // 0x0ddddddd
    return string((char) code);
  }
  else if (0x80 <= code  && code <= 0x7FF) {
    // 0x110ddddd 0x10dddddd
    string str(2);
    str[0] = ((code >> 6) & 0x1F) | 0xC0;
    str[1] = (code & 0x3F) | 0x80;
    return str;
  } 
  else if (0x800 <= code && code <= 0xFFFF) {
    // 0x1110dddd 0x10dddddd 0x10dddddd
    string str(3);
    str[0] = ((code >> 12) & 0x0F) | 0xE0;
    str[1] = ((code >> 6) & 0x3F) | 0x80;
    str[2] = (code & 0x3F) | 0x80;
    return str;
  }
  else if (0x10000 <= code && code <= 0x1FFFFF) {
    // 0x11110uuu 0x10zzzzzz 0x10yyyyyy 0x10xxxxxx
    string str(4);
    str[0] = ((code >> 18) & 0x07) | 0xF0;
    str[1] = ((code >> 12) & 0x3F) | 0x80;
    str[2] = ((code >> 6) & 0x3F) | 0x80;
    str[3] = (code & 0x3F) | 0x80;
    return str;
  }
  else return "";
}

Here is the caller graph for this function:

hashtree<char,string> find_node ( string  key,
hashtree< char, string ht 
)

Definition at line 365 of file converter.cpp.

                                                 {
  int i;
  for(i = 0; i < N(key); i++)
    ht = ht(key[i]);
  return ht;
}

Here is the call graph for this function:

Here is the caller graph for this function:

string flush ( converter  c)

Definition at line 40 of file converter.cpp.

                    {
  string result = c->output;
  c->output = string();
  return result;
}
void hashtree_from_dictionary ( hashtree< char, string dic,
string  file_name,
escape_type  key_escape,
escape_type  val_escape,
bool  reverse 
)

Definition at line 373 of file converter.cpp.

{
  system_info ("Loading",file_name);
  string key_string, val_string, file;
  file_name = file_name * ".scm";
  if (load_string (url ("$TEXMACS_PATH/langs/encoding", file_name), file, false)) {
    system_error ("Couldn't open encoding dictionary", file_name);
    return;
  }
  tree t = block_to_scheme_tree (file);
  if (!is_tuple (t)) {
    system_error ("Malformed encoding dictionary", file_name);
    return;
  }
  for (int i=0; i<N(t); i++) {
    if (is_func (t[i], TUPLE, 2) &&
        is_atomic (t[i][0]) && is_atomic (t[i][1]))
      {
        //cout << N(pairs[i]) << "\n" << as_string(pairs[i]) << "\n";
        reverse ? key_string = t[i][1]->label : key_string = t[i][0]->label;
        reverse ? val_string = t[i][0]->label : val_string = t[i][1]->label;
        if (is_quoted (key_string)) key_string = scm_unquote (key_string);
        if (is_quoted (val_string)) val_string = scm_unquote (val_string);
        //cout << "key: " << key_string << " val: " << val_string << "\n";
        if (key_escape == BIT2BIT)
          key_string = convert_escapes (key_string, false);
        else if (key_escape == UTF8)
          key_string = convert_escapes (key_string, true);
       else if (key_escape == CHAR_ENTITY)
         key_string = convert_char_entities (key_string);
        if (val_escape == BIT2BIT)
          val_string = convert_escapes (val_string, false);
        else if (val_escape == UTF8)
          val_string = convert_escapes (val_string, true);
       else if (val_escape == ENTITY_NAME)
         val_string = "&" * val_string * ";";
        //cout << "key: " << key_string << " val: " << val_string << "\n";
        put_prefix_code(key_string,val_string,dic);        
      }
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

int hex_digit_to_int ( unsigned char  c)

Definition at line 430 of file converter.cpp.

                                      {
  if (48 <= c && c <= 57)
    return c - 0x30;
  else if (65 <= c && c <= 70)
    return c - 0x41 + 0x0A;
  else if (97 <= c && c <= 102)
    return c - 0x61 + 0x0A;
  else
    return 0;
}

Here is the caller graph for this function:

bool is_hex_digit ( char  c)

Definition at line 423 of file converter.cpp.

                           {
  return
    (48 <= c && c <= 57) ||
    (65 <= c && c <= 70) ||
    (97 <= c && c <= 102);
}

Here is the caller graph for this function:

converter load_converter ( string  from,
string  to 
)

Definition at line 51 of file converter.cpp.

                                        {
  string name= from * "-" * to;
  if (converter::instances -> contains (name))
    return converter (name);
  converter conv = tm_new<converter_rep> (from, to);
  return conv;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void operator<< ( converter  c,
string  str 
)

Definition at line 26 of file converter.cpp.

                                      {
  int index = 0;
  while (index < N(str))
    c->match(str, index);
}

Here is the call graph for this function:

void put_prefix_code ( string  key,
string  value,
hashtree< char, string tree 
)

Definition at line 355 of file converter.cpp.

                                                                       {
  if (DEBUG_STD) {
    hashtree<char,string> ht= find_node (key,tree);
    if (ht->label != "")
      cout << "overwriting: " << ht->label << " with " << value << '\n';
  }
  find_node (key,tree)->set_label(value);
}

Here is the call graph for this function:

Here is the caller graph for this function:

RESOURCE_CODE ( converter  )

Definition at line 212 of file converter.cpp.

                           {
  converter conv= load_converter ("T2A", "UTF-8");
  int start= 0, i, n= N(input);
  string r;
  for (i=0; i<n; i++)
    if (input[i] == '<' && i+1<n && input[i+1] == '#') {
      r << apply (conv, input (start, i));
      start= i= i+2;
      while (i<n && input[i] != '>') i++;
      r << encode_as_utf8 (from_hexadecimal (input (start, i)));
      start= i+1;
    }
  r << apply (conv, input (start, n));
  return r;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 178 of file converter.cpp.

                            {
  converter conv= load_converter ("UTF-8", "Cork");
  int start, i, n= N(input);
  string output;
  for (i=0; i<n; ) {
    start= i;
    unsigned int code= decode_from_utf8 (input, i);
    string s= input (start, i);
    string r= apply (conv, s);
    if (r == s && code >= 256)
      r= "<#" * as_hexadecimal (code) * ">";
    output << r;
  }
  return output;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 606 of file converter.cpp.

                                {
  string result;
  int i, n= N(s);
  for (i=0; i<n; ) {
    unsigned char c = s[i];
    if ((0x80 & c) == 0 || ((0xF8 & c) == 0xF8)) {
      result << c;
      i++;
    }
    else {
      unsigned int code= decode_from_utf8 (s, i);
      string hex= as_hexadecimal (code);
      while (N(hex) < 4) hex = "0" * hex;
      //cout << "entity: " << hex << " (" << code << ")\n";
      result << "&#x" << hex << ";";
    }
  }
  return result;

  /*
  string result;
  const int n = N(s);
  int i;
  for (i=0; i<n; i++) {
    unsigned char c = s[i];
    if ((0x80 & c) == 0) {
      // 0x0ddddddd
      //cout << "ASCII: " << c << '\n';
      result << c;
      continue;
    }
    unsigned int code;
    int trail;
    if ((0xE0 & c) == 0xC0) {
      // 0x110ddddd 0x10dddddd
      trail = 1;
      code = c & 0x1F;
    }
    else if ((0xF0 & c) == 0xE0) {
      // 0x1110dddd 0x10dddddd 0x10dddddd
      trail = 2;
      code = c & 0x0F;
    }
    else if ((0xF8 & c) == 0xF0) {
      // 0x11110dddd 0x10dddddd 0x10dddddd 0x10dddddd
      trail = 3;
      code = c & 0x07;
    }
    else {
      // failsafe
      //cout << "failsafe: " << c << " (" << (unsigned int)(c) << ")\n";
      result << c;
      continue;
    }
    for (; trail > 0; trail--) {
      // Garbage in, garbage out. Do not resync when input is bad.
      i++;
      c = s[i];
      code = (code << 6) | (c & 0x3F);
    }
    string hex= as_hexadecimal (code);
    while (N(hex) < 4) hex = "0" * hex;
    //cout << "entity: " << hex << " (" << code << ")\n";
    result << "&#x" << hex << ";";
  }
  return result;
  */
}

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 229 of file converter.cpp.

                            {
  converter conv = load_converter ("UTF-8", "HTML");
  string s = apply (conv, input);
  return utf8_to_hex_entities(s);
}

Here is the call graph for this function:

Here is the caller graph for this function: