Back to index

im-sdk  12.3.91
EIMILTextUtil.c
Go to the documentation of this file.
00001 /*
00002   EIMILTextUtil.c
00003     EIMIL mtext Manager.
00004 */
00005 
00006 #include <stdio.h>
00007 #include <string.h>
00008 #include <stdlib.h>
00009 #include <EIMIL.h>
00010 #include "EIMILint.h"
00011 
00012 /* 
00013    NOTICE!!!
00014    IMTextUtil.c set native_chars in UTF-32 format, and set encoding to
00015    UTF32_CODESET.  All of the functions below except Normalize_IMText()
00016    treat IMText as UTF-32 based string.  Therefore, you cannot use it
00017    as an external representation before calling Normalize_IMText().
00018 
00019    EIMIL_convert_mtext_to_IMText convert text.naitive_chars, which is encoded in UTF-32,
00020    into text.utf_chars in UTF-16; recaliculate start_pos and end_pos of IMAnnotationValue;
00021    and properly fill IMFeedBack from the original value and 'feedback attribute.
00022 */
00023 
00024 #ifndef MIN
00025 #define MIN(x, y) ((x) < (y) ? (x) : (y))
00026 #endif
00027 #ifndef MAX
00028 #define MAX(x, y) ((x) > (y) ? (x) : (y))
00029 #endif
00030 
00031 #define EIMIL_INTERVAL_OVERLAP_P(s1, e1, s2, e2) (MIN(e1, e2) > MAX(s1, s2))
00032 
00033 #define UTF16_S_P(v) ((v & 0xF800) == 0xD800)
00034 #define UTF16_HS_P(v) ((v & 0xFC00) == 0xD800)
00035 #define UTF16_LS_P(v) ((v & 0xFC00) == 0xDC00)
00036 
00037 int
00038 EIMIL_UTF32_string_len(
00039     const UTF32 *pstr
00040 )
00041 {
00042     int i;
00043     for (i = 0;*pstr;pstr++, i++);
00044     return i;
00045 }
00046 
00047 int
00048 EIMIL_UTF32_strcmp(
00049     const UTF32 *pstr1,
00050     const UTF32 *pstr2
00051 )
00052 {
00053     for (;; pstr1++, pstr2++) {
00054        if (*pstr1 > *pstr2) return 1;
00055        if (*pstr1 < *pstr2) return -1;
00056        if (*pstr1 == 0) return 0;
00057     }
00058     return 0;
00059 }
00060 
00061 int
00062 EIMIL_convert_UTF32char_to_UTF8(
00063     UTF32 ch,
00064     UTF8 *p
00065 )
00066 {
00067     if (ch < 0x80) {
00068        *p++ = ch;
00069        return 1;
00070     } else if (ch < 0x800) {
00071        *p++ = (ch >> 6) | 0xC0;
00072        *p++ = (ch & 0x3F) | 0x80;
00073        return 2;
00074     } else if (ch < 0x10000) {
00075        *p++ = (ch >> 12) | 0xE0;
00076        *p++ = ((ch >> 6) & 0x3F) | 0x80;
00077        *p++ = (ch & 0x3F) | 0x80;
00078        return 3;
00079     } else if (ch < 0x200000) {
00080        *p++ = (ch >> 18) | 0xF0;
00081        *p++ = ((ch >> 12) & 0x3F) | 0x80;
00082        *p++ = ((ch >> 6) & 0x3F) | 0x80;
00083        *p++ = (ch & 0x3F) | 0x80;
00084        return 4;
00085     } else if (ch < 0x4000000) {
00086        /* actually, UTF-32 forbids the area over 0x10FFFF. */
00087        *p++ = (ch >> 24) | 0xF8;
00088        *p++ = ((ch >> 18) & 0x3F) | 0x80;
00089        *p++ = ((ch >> 12) & 0x3F) | 0x80;
00090        *p++ = ((ch >> 6) & 0x3F) | 0x80;
00091        *p++ = (ch & 0x3F) | 0x80;
00092        return 5;
00093     } else {
00094        /* actually, UTF-32 forbids the area over 0x10FFFF. */
00095        *p++ = ((ch >> 30) & 0x1) | 0xFC;
00096        *p++ = ((ch >> 24) & 0x3F) | 0x80;
00097        *p++ = ((ch >> 18) & 0x3F) | 0x80;
00098        *p++ = ((ch >> 12) & 0x3F) | 0x80;
00099        *p++ = ((ch >> 6) & 0x3F) | 0x80;
00100        *p++ = (ch & 0x3F) | 0x80;
00101        return 6;
00102     }
00103     return 0;
00104 }
00105 
00106 UTF8*
00107 EIMIL_convert_UTF32_to_UTF8(
00108     const UTF32 *putf32
00109 )
00110 {
00111     int n;
00112     const UTF32 *pu32;
00113     UTF8 *ps, *p;
00114 
00115     for (n = 0, pu32 = putf32; *pu32; pu32++) {
00116        if (*pu32 < 0x80) n++;
00117        else if (*pu32 < 0x800) n += 2;
00118        else if (*pu32 < 0x10000) n += 3;
00119        else if (*pu32 < 0x200000) n += 4;
00120        else if (*pu32 < 0x4000000) n += 5;
00121        else n += 6;
00122     }
00123 
00124     ps = p = (UTF8*) malloc(sizeof(UTF8) * (n + 1));
00125     if (!ps) return NULL;
00126 
00127     for (; *putf32; putf32++) {
00128        if (*putf32 < 0x80) {
00129            *p++ = *putf32;
00130        } else if (*putf32 < 0x800) {
00131            *p++ = (*putf32 >> 6) | 0xC0;
00132            *p++ = (*putf32 & 0x3F) | 0x80;
00133        } else if (*putf32 < 0x10000) {
00134            *p++ = (*putf32 >> 12) | 0xE0;
00135            *p++ = ((*putf32 >> 6) & 0x3F) | 0x80;
00136            *p++ = (*putf32 & 0x3F) | 0x80;
00137        } else if (*putf32 < 0x200000) {
00138            *p++ = (*putf32 >> 18) | 0xF0;
00139            *p++ = ((*putf32 >> 12) & 0x3F) | 0x80;
00140            *p++ = ((*putf32 >> 6) & 0x3F) | 0x80;
00141            *p++ = (*putf32 & 0x3F) | 0x80;
00142        } else if (*putf32 < 0x4000000) {
00143            /* actually, UTF-32 forbids the area over 0x10FFFF. */
00144            *p++ = (*putf32 >> 24) | 0xF8;
00145            *p++ = ((*putf32 >> 18) & 0x3F) | 0x80;
00146            *p++ = ((*putf32 >> 12) & 0x3F) | 0x80;
00147            *p++ = ((*putf32 >> 6) & 0x3F) | 0x80;
00148            *p++ = (*putf32 & 0x3F) | 0x80;
00149        } else {
00150            /* actually, UTF-32 forbids the area over 0x10FFFF. */
00151            *p++ = ((*putf32 >> 30) & 0x1) | 0xFC;
00152            *p++ = ((*putf32 >> 24) & 0x3F) | 0x80;
00153            *p++ = ((*putf32 >> 18) & 0x3F) | 0x80;
00154            *p++ = ((*putf32 >> 12) & 0x3F) | 0x80;
00155            *p++ = ((*putf32 >> 6) & 0x3F) | 0x80;
00156            *p++ = (*putf32 & 0x3F) | 0x80;
00157        }
00158     }
00159     *p = 0;
00160     return ps;
00161 }
00162 
00163 int
00164 EIMIL_convert_UTF8_to_UTF32char(
00165     const UTF8 *p,
00166     UTF32 *pch
00167 )
00168 {
00169     if (!p) return 0;
00170 
00171     if (*p < 0x80) {
00172        if (pch) *pch = *p;
00173        return 1;
00174     } else if (*p < 0xE0) {
00175        if (pch) *pch = (((p[0] & 0x1F) << 6)
00176               | (p[1] & 0x3F));
00177        return 2;
00178     } else if (*p < 0xF0) {
00179        if (pch) *pch = (((p[0] & 0x0F) << 12)
00180               | ((p[1] & 0x3F) << 6)
00181               | (p[2] & 0x3F));
00182        return 3;
00183     } else if (*p < 0xF8) {
00184        if (pch) *pch = (((p[0] & 0x07) << 18)
00185               | ((p[1] & 0x3F) << 12)
00186               | ((p[2] & 0x3F) << 6)
00187               | (p[3] & 0x3F));
00188        return 4;
00189     } else if (*p < 0xFC) {
00190        if (pch) *pch = (((p[0] & 0x03) << 24)
00191               | ((p[1] & 0x3F) << 18)
00192               | ((p[2] & 0x3F) << 12)
00193               | ((p[3] & 0x3F) << 6)
00194               | (p[4] & 0x3F));
00195        return 5;
00196     } else {
00197        if (pch) *pch = (((p[0] & 0x01) << 30)
00198               | ((p[1] & 0x3F) << 24)
00199               | ((p[2] & 0x3F) << 18)
00200               | ((p[3] & 0x3F) << 12)
00201               | ((p[4] & 0x3F) << 6)
00202               | (p[5] & 0x3F));
00203        return 6;
00204     }
00205     return 0;
00206 }
00207 
00208 UTF32*
00209 EIMIL_convert_UTF8_to_UTF32(
00210     const UTF8 *putf8
00211 )
00212 {
00213     int n;
00214     const UTF8 *pu8;
00215     UTF32 *ps, *p;
00216 
00217     for (n = 0, pu8 = putf8; *pu8; n++) {
00218        if (*pu8 < 0x80) pu8++;
00219        else if (*pu8 < 0xE0) pu8 += 2;
00220        else if (*pu8 < 0xF0) pu8 += 3;
00221        else if (*pu8 < 0xF8) pu8 += 4;
00222        else if (*pu8 < 0xFC) pu8 += 5;
00223        else pu8 += 6;
00224     }
00225 
00226     ps = p = (UTF32*) malloc(sizeof(UTF32) * (n + 1));
00227     if (!ps) return NULL;
00228 
00229     while (*putf8) {
00230        if (*putf8 < 0x80) {
00231            *p++ = *putf8++;
00232        } else if (*putf8 < 0xE0) {
00233            *p++ = (((putf8[0] & 0x1F) << 6)
00234                   | (putf8[1] & 0x3F));
00235            putf8 += 2;
00236        } else if (*putf8 < 0xF0) {
00237            *p++ = (((putf8[0] & 0x0F) << 12)
00238                   | ((putf8[1] & 0x3F) << 6)
00239                   | (putf8[2] & 0x3F));
00240            putf8 += 3;
00241        } else if (*putf8 < 0xF8) {
00242            *p++ = (((putf8[0] & 0x07) << 18)
00243                   | ((putf8[1] & 0x3F) << 12)
00244                   | ((putf8[2] & 0x3F) << 6)
00245                   | (putf8[3] & 0x3F));
00246            putf8 += 4;
00247        } else if (*putf8 < 0xFC) {
00248            *p++ = (((putf8[0] & 0x03) << 24)
00249                   | ((putf8[1] & 0x3F) << 18)
00250                   | ((putf8[2] & 0x3F) << 12)
00251                   | ((putf8[3] & 0x3F) << 6)
00252                   | (putf8[4] & 0x3F));
00253            putf8 += 5;
00254        } else {
00255            *p++ = (((putf8[0] & 0x01) << 30)
00256                   | ((putf8[1] & 0x3F) << 24)
00257                   | ((putf8[2] & 0x3F) << 18)
00258                   | ((putf8[3] & 0x3F) << 12)
00259                   | ((putf8[4] & 0x3F) << 6)
00260                   | (putf8[5] & 0x3F));
00261            putf8 += 6;
00262        }
00263     }
00264     *p = 0;
00265     return ps;
00266 }
00267 
00268 int
00269 EIMIL_adjust_UTF16_pos_to_UTF32(
00270     int pos,
00271     const UTF32 *pbase,
00272     const UTF32 *pbaseend
00273 )
00274 {
00275     int i, npos;
00276 
00277     for (i = 0, npos = 0;i < pos;npos++, pbase++) {
00278        if (pbase >= pbaseend) return -1;
00279        if (*pbase < 0x10000) {
00280            i++;
00281        }else{
00282            i += 2;
00283        }
00284     }
00285     return npos;
00286 }
00287 
00288 int
00289 EIMIL_adjust_UTF32_pos_to_UTF16(
00290     int pos,
00291     const UTF32 *pbase,
00292     const UTF32 *pbaseend
00293 )
00294 {
00295     const UTF32 *pe;
00296     int npos;
00297 
00298     pe = pbase + pos;
00299     for (npos = 0;pbase < pe;pbase++) {
00300        if (pbase >= pbaseend) return -1;
00301        if (*pbase < 0x10000) {
00302            npos++;
00303        }else{
00304            npos += 2;
00305        }
00306     }
00307     return npos;
00308 }
00309 
00310 int
00311 EIMIL_convert_UTF32_to_UTF16(
00312     const UTF32 *pu32,
00313     int u32len,
00314     UTF16 **ppu16,
00315     int *pu16len
00316 )
00317 {
00318     int i, rlen;
00319     UTF32 u32;
00320     UTF16 *pr, *prh;
00321 
00322     prh = (UTF16*) malloc(sizeof(UTF16) * (u32len * 2 + 1));
00323     if (!prh) return 0;
00324     pr = prh;
00325 
00326     for (i = 0; i < u32len; i++) {
00327        u32 = *pu32++;
00328        if (UTF16_S_P(u32)) {
00329            /* invalid code.
00330               TODO:We should output error.  */
00331        }else if (u32 < 0x10000) {
00332            *pr++ = (UTF16) u32;
00333        }else if (u32 < 0x110000) {
00334            u32 -= 0x10000;
00335            *pr++ = ((u32 >> 10) | 0xD800);
00336            *pr++ = ((u32 & 0x3FF) | 0xDC00);
00337        }else{
00338            /* invalid code.
00339               TODO:We should output error.  */
00340        }
00341     }
00342     *pr = 0;
00343     rlen = pr - prh;
00344     prh = (UTF16*) realloc(prh, sizeof(UTF16) * (rlen + 1));
00345     *pu16len = rlen;
00346     *ppu16 = prh;
00347 
00348     return 1;
00349 }
00350 
00351 int
00352 EIMIL_convert_UTF16_to_UTF32(
00353     const UTF16 *pu16,
00354     int u16len,
00355     UTF32 **ppu32,
00356     int *pu32len
00357 )
00358 {
00359     int i, rlen;
00360     UTF16 hs1, hs2;
00361     UTF32 *pr, *prh;
00362 
00363     prh = (UTF32*) malloc(sizeof(UTF32) * (u16len + 1));
00364     if (!prh) return 0;
00365     pr = prh;
00366 
00367     for (i = 0; i < u16len;) {
00368        hs1 = *pu16++;
00369        i++;
00370        if (UTF16_HS_P(hs1)) {
00371            hs2 = *pu16++;
00372            i++;
00373            if (i > u16len) {
00374               /* Invalid code.
00375                  TODO:We should output error.  */
00376               break;
00377            }
00378            if (UTF16_LS_P(hs2)) {
00379               *pr++ = (((hs1 & 0x3FF) << 10) | (hs2 & 0x3FF)) + 0x10000;
00380            }else{
00381               /* Invalid code.
00382                  TODO:We should output error.  */
00383               *pr++ = hs2;
00384            }
00385        }else{
00386            *pr++ = hs1;
00387        }
00388     }
00389     rlen = pr - prh;
00390     if (rlen != u16len)
00391        prh = (UTF32*) realloc(prh, sizeof(UTF32) * (rlen + 1));
00392     *pr = 0;
00393     *pu32len = rlen;
00394     *ppu32 = prh;
00395        
00396     return 1;
00397 }
00398 
00399 void
00400 EIMIL_destruct_mtext(
00401     EIMIL_mtext *pmt
00402 )
00403 {
00404     int i, j;
00405     EIMIL_mtext_props *pmp;
00406     EIMIL_value **ppv;
00407     EIMIL_prop *pprop;
00408 
00409 
00410     if (pmt->pslots) {
00411        for (pmp = pmt->pslots, i = 0;
00412             i < pmt->slotsnum;
00413             i++, pmp++) {
00414            if (pmp->pprops) {
00415               for (ppv = pmp->pprops, j = 0;
00416                    j < pmp->num;
00417                    j++, ppv++) {
00418                   ASSERT((*ppv)->type == EIMIL_TYPE_PROP);
00419                   pprop = &(*ppv)->v.prop;
00420                   ASSERT(pprop->target == pmt);
00421                   pprop->st = pprop->end = -1;
00422                   pprop->target = NULL;
00423                   EIMIL_RMREF(**ppv);
00424               }
00425               free(pmp->pprops);
00426            }
00427        }
00428        free(pmt->pslots);
00429     }
00430     if (pmt->ustr) free(pmt->ustr);
00431 
00432     return;
00433 }
00434 
00435 EIMIL_value*
00436 EIMIL_construct_mtext_from_UTF8(
00437     const UTF8 *in
00438 )
00439 {
00440     UTF32 *pu;
00441     EIMIL_value *pv;
00442     EIMIL_mtext *pm;
00443 
00444     pv = (EIMIL_value*) malloc(sizeof(EIMIL_value));
00445     if (!pv) return NULL;
00446     memset(pv, 0, sizeof(EIMIL_value));
00447     pv->type = EIMIL_TYPE_MTEXT;
00448     pm = &pv->v.mtext;
00449     if (!(pu = EIMIL_convert_UTF8_to_UTF32(in))) {
00450        free(pm);
00451        return NULL;
00452     }
00453     pm->len = EIMIL_UTF32_string_len(pu);
00454     pm->slotsnum = 0;
00455     pm->pslots = NULL;
00456     pm->UIdatap = 0;
00457     pm->ustr = pu;
00458 
00459     return pv;
00460 }
00461 
00462 EIMIL_value*
00463 EIMIL_construct_mtext_from_UTF16(
00464     int len,
00465     const UTF16 *in
00466 )
00467 {
00468     EIMIL_value *pv;
00469     EIMIL_mtext *pm;
00470 
00471     pv = (EIMIL_value*) malloc(sizeof(EIMIL_value));
00472     if (!pv) return NULL;
00473     memset(pv, 0, sizeof(EIMIL_value));
00474     pv->type = EIMIL_TYPE_MTEXT;
00475     pm = &pv->v.mtext;
00476     if (!EIMIL_convert_UTF16_to_UTF32(in, len, &pm->ustr, &pm->len))
00477        return NULL;
00478     pm->slotsnum = 0;
00479     pm->pslots = NULL;
00480     pm->UIdatap = 0;
00481 
00482     return pv;
00483 }
00484 
00485 EIMIL_value*
00486 EIMIL_construct_mtext_from_UTF32(
00487     int len,
00488     const UTF32 *in
00489 )
00490 {
00491     int i;
00492     UTF32 *pu;
00493     EIMIL_value *pv;
00494     EIMIL_mtext *pm;
00495 
00496     pv = (EIMIL_value*) malloc(sizeof(EIMIL_value));
00497     if (!pv) return NULL;
00498     memset(pv, 0, sizeof(EIMIL_value));
00499     pv->type = EIMIL_TYPE_MTEXT;
00500     pm = &pv->v.mtext;
00501     pu = (UTF32*) malloc(sizeof(UTF32) * (len + 1));
00502     if (!pu) {
00503        free(pm);
00504        return NULL;
00505     }
00506     pm->len = len;
00507     pm->slotsnum = 0;
00508     pm->pslots = NULL;
00509     pm->UIdatap = 0;
00510     pm->ustr = pu;
00511 
00512     for (i = 0; i < len; i++) *pu++ = *in++;
00513     *pu = 0;
00514 
00515     return pv;
00516 }
00517 
00518 EIMIL_value*
00519 EIMIL_construct_mtext_from_UTF32_char(
00520     UTF32 in
00521 )
00522 {
00523     UTF32 *pu;
00524     EIMIL_value *pv;
00525     EIMIL_mtext *pm;
00526 
00527     pv = (EIMIL_value*) malloc(sizeof(EIMIL_value));
00528     if (!pv) return NULL;
00529     memset(pv, 0, sizeof(EIMIL_value));
00530     pv->type = EIMIL_TYPE_MTEXT;
00531     pm = &pv->v.mtext;
00532     pu = (UTF32*) malloc(sizeof(UTF32) * 2);
00533     if (!pu) {
00534        free(pm);
00535        return NULL;
00536     }
00537     pm->len = 1;
00538     pm->slotsnum = 0;
00539     pm->pslots = NULL;
00540     pm->UIdatap = 0;
00541     pm->ustr = pu;
00542     *pu = in;
00543     pu[1] = 0;
00544 
00545     return pv;
00546 }
00547 
00548 int
00549 EIMIL_mtext_equal(
00550     EIMIL_mtext *pm1,
00551     EIMIL_mtext *pm2
00552 )
00553 {
00554     /* TODO!! */
00555     return 0;
00556 }
00557 
00558 static EIMIL_mtext_props*
00559 EIMIL_find_mtext_props(
00560     EIMIL_mtext *pm,
00561     EIMIL_symbol *property_sym
00562 )
00563 {
00564     int i, n;
00565     EIMIL_mtext_props *pmp;
00566 
00567     n = pm->slotsnum;
00568     for (pmp = pm->pslots, i = 0; i < n; i++, pmp++) {
00569        if (pmp->property_sym == property_sym) break;
00570     }
00571     if (i == n) return NULL;
00572 
00573     return pmp;
00574 }
00575 
00576 static EIMIL_mtext_props*
00577 EIMIL_prepare_mtext_props_slot(
00578     EIMIL_mtext *pm,
00579     EIMIL_symbol *property_sym
00580 )
00581 {
00582     int n;
00583     EIMIL_mtext_props *pmp;
00584 
00585     pmp = EIMIL_find_mtext_props(pm, property_sym);
00586     if (pmp) return pmp;
00587     n = pm->slotsnum;
00588     pm->slotsnum++;
00589     pm->pslots = realloc(pm->pslots, sizeof(EIMIL_mtext_props) * pm->slotsnum);
00590     if (!pm->pslots) return NULL;
00591     pmp = pm->pslots + n;
00592     pmp->num = 0;
00593     pmp->property_sym = property_sym;
00594     pmp->pprops = NULL;
00595 
00596     return pmp;
00597 }
00598 
00599 EIMIL_value*
00600 EIMIL_find_prop_from_mtext(
00601     EIMIL_mtext *pm,
00602     EIMIL_symbol *property_sym, int pos
00603 )
00604 {
00605     int i, n;
00606     int minpos, minpos_idx;
00607     EIMIL_mtext_props *pmp;
00608     EIMIL_value **ppv, *pv;
00609     EIMIL_prop *pprop;
00610 
00611     minpos_idx = -1;
00612 
00613     pmp = EIMIL_find_mtext_props(pm, property_sym);
00614     if (!pmp) return NULL;
00615 
00616     n = pmp->num;
00617     for (ppv = pmp->pprops, i = 0; i < n; ppv++, i++) {
00618        pv = *ppv;
00619        ASSERT(pv->type == EIMIL_TYPE_PROP);
00620        pprop = &pv->v.prop;
00621        ASSERT(pprop->property_sym == property_sym);
00622        if ((pprop->st <= pos)
00623            && (pprop->end > pos))
00624            return pv;
00625        if ((pprop->st > pos)
00626            && ((minpos_idx < 0) || (minpos > pprop->st))) {
00627            minpos_idx = i;
00628            minpos = pprop->st;
00629        }
00630     }
00631     if (minpos_idx > 0)
00632        return pmp->pprops[minpos_idx];
00633 
00634     return NULL;
00635 }
00636 
00637 EIMIL_value*
00638 EIMIL_get_prop_from_mtext(
00639     EIMIL_mtext *pm,
00640     EIMIL_symbol *property_sym,
00641     int pos
00642 )
00643 {
00644     int i, n;
00645     EIMIL_mtext_props *pmp;
00646     EIMIL_value **ppv, *pv;
00647     EIMIL_prop *pprop;
00648 
00649     pmp = EIMIL_find_mtext_props(pm, property_sym);
00650     if (!pmp) return NULL;
00651 
00652     n = pmp->num;
00653     for (ppv = pmp->pprops, i = 0; i < n; ppv++, i++) {
00654        pv = *ppv;
00655        ASSERT(pv->type == EIMIL_TYPE_PROP);
00656        pprop = &pv->v.prop;
00657        ASSERT(pprop->property_sym == property_sym);
00658        if ((pprop->st <= pos)
00659            && (pprop->end > pos))
00660            return pv;
00661     }
00662 
00663     return NULL;
00664 }
00665 
00666 void
00667 EIMIL_detach_prop_from_mtext(
00668     EIMIL_value *pv
00669 )
00670 {
00671     int i, n;
00672     EIMIL_mtext *pmt;
00673     EIMIL_mtext_props *pmp;
00674     EIMIL_value **ppv;
00675     EIMIL_prop *pprop;
00676 
00677     ASSERT(pv->type == EIMIL_TYPE_PROP);
00678 
00679     pprop = &pv->v.prop;
00680     pmt = pprop->target;
00681        
00682     if (!pmt) return;
00683 
00684     pmp = EIMIL_find_mtext_props(pmt, pprop->property_sym);
00685     ASSERT(pmp);
00686     n = pmp->num;
00687 
00688     for (ppv = pmp->pprops, i = 0; i < pmp->num; ppv++, i++) {
00689        if (*ppv == pv) {
00690            pmp->num--;
00691            if ((n - i - 1) > 0) {
00692               memmove(ppv, ppv + 1, sizeof(EIMIL_value*) * (n - i - 1));
00693            }
00694            pprop->st = pprop->end = -1;
00695            pprop->target = NULL;
00696            EIMIL_RMREF(*pv);
00697            return;
00698        }
00699     }
00700     /* not reached */
00701     abort();
00702 
00703     return;
00704 }
00705 
00706 /*
00707   mtext and property
00708 
00709   
00710   Notation: |----| mtext
00711             <----> property
00712 
00713   |-------------------------------------------|
00714             <--P1-->                 <--P4-->
00715                           <--P2->
00716                 <---P3----->
00717    pprops : [P3 P1 P2 P4]
00718 */
00719 
00720 EIMIL_mtext*
00721 EIMIL_add_prop_on_mtext(
00722     EIMIL_mtext *pm,
00723     EIMIL_value *pv,
00724     int st,
00725     int end
00726 )
00727 {
00728     int i, n;
00729     EIMIL_mtext_props *pmp;
00730     EIMIL_value **ppv, *pv2;
00731     EIMIL_prop *pprop;
00732 
00733     ASSERT(pv->type == EIMIL_TYPE_PROP);
00734 
00735     pmp = EIMIL_prepare_mtext_props_slot(pm, pv->v.prop.property_sym);
00736     if (!pmp) return NULL;
00737     n = pmp->num;
00738 
00739     for (ppv = pmp->pprops, i = 0; i < n; ppv++, i++) {
00740        pv2 = *ppv;
00741        ASSERT(pv2->type == EIMIL_TYPE_PROP);
00742        pprop = &pv2->v.prop;
00743        ASSERT(pprop->property_sym == pmp->property_sym);
00744        if (EIMIL_INTERVAL_OVERLAP_P(st, end, pprop->st, pprop->end)) break;
00745     }
00746     pmp->pprops = (EIMIL_value**) realloc(pmp->pprops,
00747                                      sizeof(EIMIL_value*) * n + 1);
00748     if (!pmp->pprops) return NULL;
00749     ppv = pmp->pprops + i;
00750     if (n > i) {
00751        memmove(ppv + 1, ppv, sizeof(EIMIL_value*) * (n - i));
00752     }
00753     *ppv = pv;
00754     pmp->num++;
00755     pv->v.prop.st = st;
00756     pv->v.prop.end = end;
00757     pv->v.prop.target = pm;
00758     EIMIL_ADDREF(*pv);
00759 
00760     return pm;
00761 }
00762 
00763 /*
00764   |-------------------------------------------|
00765             <--P1-->                 <--P4-->
00766                           <--P2->              
00767                 <---P3----->
00768                     <==setmprop:P5=====>
00769 
00770           [P3 P2 P1 P4]
00771                 then...
00772 
00773 
00774   |-------------------------------------------|
00775             <--P1-->                     <P4>
00776                           XXP2XXX              
00777                 <P3->
00778                      <==setmprop:P5=====>
00779          [P3 P1 P5 P4]
00780 */
00781 
00782 EIMIL_mtext*
00783 EIMIL_set_prop_on_mtext(
00784     EIMIL_mtext *pm,
00785     EIMIL_value *pv,
00786     int st,
00787     int end
00788 )
00789 {
00790     int i, n, idx;
00791     int mst, mend;
00792     EIMIL_mtext_props *pmp;
00793     EIMIL_value **ppv, *pv2, *pv3;
00794     EIMIL_prop *pprop;
00795 
00796     ASSERT(pv->type == EIMIL_TYPE_PROP);
00797        
00798     pmp = EIMIL_prepare_mtext_props_slot(pm, pv->v.prop.property_sym);
00799     if (!pmp) return NULL;
00800     n = pmp->num;
00801 
00802     idx = -1;
00803     ppv = pmp->pprops;
00804     for (i = 0; i < n;) {
00805        pv2 = ppv[i];
00806        ASSERT(pv2->type == EIMIL_TYPE_PROP);
00807        pprop = &pv2->v.prop;
00808        ASSERT(pprop->target == pm);
00809        ASSERT(pprop->property_sym == pmp->property_sym);
00810        mst = pprop->st;
00811        mend = pprop->end;
00812        if ((mend >= st) && (mst < st)) {
00813            /* <--->         */
00814            /*   <===>       */
00815            pprop->end = st;
00816            i++;
00817        }else if ((mend <= end) && (mst >= st)) {
00818            /*   <--->       */
00819            /*  <======>     */
00820            pprop->st = -1;
00821            pprop->end = -1;
00822            pprop->target = NULL;
00823            n--;
00824            if (n > i) {
00825               memmove(ppv + i, ppv + i + 1,
00826                      sizeof(EIMIL_value*) * (n - i));
00827            }
00828            EIMIL_RMREF(*pv2);
00829        }else if ((mst <= end) && (mend > end)) {
00830            /*       <--->   */
00831            /*  <======>     */
00832            pprop->st = end;
00833            i++;
00834        }else if ((mst < st) && (mend > end)) {
00835            /* <-------->    */
00836            /*    <===>      */
00837            /*     |         */
00838            /*     V         */
00839            /* <-><===><>    */
00840            /*  i      i+1   */
00841            pv3 = EIMIL_copy_value(pv2);
00842            if (!pv3) return NULL;
00843            pv3->v.prop.st = end;
00844            EIMIL_ADDREF(*pv3);
00845            pprop->end = st;
00846            ppv = (EIMIL_value**) realloc(ppv, sizeof(EIMIL_value*) * (n + 1));
00847            pmp->pprops = ppv;
00848            if (n > (i + 1)) {
00849               memmove(ppv + i + 2, ppv + i + 1,
00850                      sizeof(EIMIL_value*) * (n - i));
00851            }
00852            ppv[i + 1] = pv3;
00853            n++;
00854            if (idx < 0) idx = i + 1;
00855            i += 2;
00856        }else if (mst < st) {
00857            /* <---->        */
00858            /*       <====>  */
00859            if (idx < 0) idx = i;
00860            i++;
00861        }else{
00862            /*        <----> */
00863            /* <====>        */
00864            i++;
00865        }
00866     }
00867     pmp->pprops = (EIMIL_value**) realloc(pmp->pprops,
00868                                      sizeof(EIMIL_value*) * (n + 1));
00869     if (!pmp->pprops) return NULL;
00870     ppv = pmp->pprops + i;
00871     if (n > i) {
00872        memmove(ppv + 1, ppv, sizeof(EIMIL_value*) * (n - i));
00873     }
00874     *ppv = pv;
00875     pmp->num = n + 1;
00876     pv->v.prop.st = st;
00877     pv->v.prop.end = end;
00878     pv->v.prop.target = pm;
00879     EIMIL_ADDREF(*pv);
00880 
00881     return pm;
00882 }
00883 
00884 static EIMIL_value*
00885 EIMIL_move_prop(
00886     EIMIL_mtext *pm_target,
00887     EIMIL_value *pv,
00888     int dif,
00889     int last
00890 )
00891 {
00892     int st, end;
00893     EIMIL_value *pv2;
00894     ASSERT(pv->type == EIMIL_TYPE_PROP);
00895 
00896     pv2 = EIMIL_copy_value(pv);
00897     if (!pv2) return NULL;
00898     st = pv->v.prop.st;
00899     end = pv->v.prop.end;
00900     st += dif;
00901     end += dif;
00902     if (st < 0) st = 0;
00903     if (end > last) end = last;
00904 
00905     ASSERT((st < end) && (st >= 0) && (end <= last));
00906 
00907     pv2->v.prop.st = st;
00908     pv2->v.prop.end = end;
00909 
00910     pv2->v.prop.target = pm_target;
00911     EIMIL_ADDREF(*pv2);
00912 
00913     return pv2;
00914 }
00915 
00916 EIMIL_value*
00917 EIMIL_mtext_concat(
00918     int num,
00919     EIMIL_value **pvs
00920 )
00921 {
00922     int i, j, k;
00923     int clen, pos;
00924     EIMIL_mtext *pm, *pmr;
00925     EIMIL_value **pvs2, *pvr;
00926 
00927     EIMIL_value **ppv;
00928     EIMIL_mtext_props *pmp, *pmp2;
00929     UTF32 *pu;
00930 
00931     pvr = (EIMIL_value*) malloc(sizeof(EIMIL_value));
00932     if (!pvr) return NULL;
00933     memset(pvr, 0, sizeof(EIMIL_value));
00934     pvr->type = EIMIL_TYPE_MTEXT;
00935     pmr = &pvr->v.mtext;
00936 
00937     pmp2 = NULL;
00938     pos = 0;
00939     for (pvs2 = pvs, i = 0; i < num; pvs2++, i++) {
00940        if ((*pvs2)->type == EIMIL_TYPE_MTEXT) {
00941            pm = &((*pvs2)->v.mtext);
00942            clen = pm->len;
00943            for (pmp = pm->pslots, j = 0; j < pm->slotsnum; pmp++, j++) {
00944               pmp2 = EIMIL_prepare_mtext_props_slot(pmr, pmp->property_sym);
00945               if (!pmp2) goto error;
00946               pmp2->pprops = realloc(pmp2->pprops,
00947                                    sizeof(EIMIL_value*)
00948                                    * (pmp2->num + pmp->num));
00949               if (!pmp2->pprops) goto error;
00950               ppv = pmp2->pprops + pmp2->num;
00951               for (k = 0; k < pmp->num; k++) {
00952                   *ppv = EIMIL_move_prop(pmr, pmp->pprops[k],
00953                                       pos, pos + clen);
00954                   if (!*ppv) goto error;
00955                   ppv++;
00956               }
00957               pmp2->num += pmp->num;
00958            }
00959            pos += pm->len;
00960        } else if ((*pvs2)->type == EIMIL_TYPE_CHAR) {
00961            pos++;
00962        } else {
00963            ERROR_INTERNAL("Invalid type(must be mtext or char).");
00964        }
00965     }
00966     pmr->len = pos;
00967     pu = (UTF32*) malloc(sizeof(UTF32) * (pos + 1));
00968     if (!pu) goto error;
00969     pmr->ustr = pu;
00970 
00971     for (pvs2 = pvs, i = 0; i < num; pvs2++, i++) {
00972        if ((*pvs2)->type == EIMIL_TYPE_MTEXT) {
00973            pm = &((*pvs2)->v.mtext);
00974            memcpy(pu, pm->ustr, sizeof(UTF32) * pm->len);
00975            pu += pm->len;
00976        } else {
00977            /* EIMIL_TYPE_CHAR */
00978            *pu++ = (*pvs2)->v.ch;
00979        }
00980     }
00981     *pu = 0;
00982 
00983     return pvr;
00984 
00985 error:
00986     EIMIL_destruct_value(pvr);
00987     return NULL;
00988 }
00989 
00990 EIMIL_value*
00991 EIMIL_mtext_substr(
00992     EIMIL_value *pv_mtext,
00993     int st,
00994     int end
00995 )
00996 {
00997     int i, j, len, num_props;
00998     int mst, mend;
00999     EIMIL_mtext *pm, *pmr;
01000     EIMIL_mtext_props *pmp, *pmp2;
01001     EIMIL_value **ppv, **ppv2, *pv, *pvr;
01002     UTF32 *pu;
01003 
01004 
01005     ASSERT(end > st);
01006 
01007     pm = &pv_mtext->v.mtext;
01008 
01009     if (st >= pm->len) return NULL;
01010     if (end > pm->len) end = pm->len;
01011 
01012     pvr = (EIMIL_value*) malloc(sizeof(EIMIL_value));
01013     if (!pvr) return NULL;
01014     memset(pvr, 0, sizeof(EIMIL_value));
01015     pvr->type = EIMIL_TYPE_MTEXT;
01016     pmr = &pvr->v.mtext;
01017 
01018     len = end - st;
01019     pmr->len = len;
01020 
01021     pu = (UTF32*) malloc(sizeof(UTF32) * (len + 1));
01022     if (!pu) {
01023        free(pmr);
01024        return NULL;
01025     }
01026     pmr->ustr = pu;
01027     memcpy(pu, pm->ustr + st, sizeof(UTF32) * len);
01028     pu[len] = 0;
01029 
01030     for (pmp = pm->pslots, i = 0; i < pm->slotsnum; pmp++, i++) {
01031        pmp2 = EIMIL_prepare_mtext_props_slot(pmr, pmp->property_sym);
01032        if (!pmp2) goto error;
01033        num_props = 0;
01034        ppv2 = (EIMIL_value**) malloc(sizeof(EIMIL_value*) * pmp->num);
01035        pmp2->pprops = ppv2;
01036        for (ppv = pmp->pprops, j = 0; j < pmp->num; ppv++, j++) {
01037            pv = *ppv;
01038            mst = pv->v.prop.st;
01039            mend = pv->v.prop.end;
01040            if (EIMIL_INTERVAL_OVERLAP_P(st, end, mst, mend)) {
01041               *ppv2 = EIMIL_move_prop(pmr, pv, -st, len);
01042               if (!*ppv2) goto error;
01043               ppv2++;
01044               num_props++;
01045            }
01046        }
01047        if (num_props > 0) {
01048            pmp2->pprops = (EIMIL_value**) realloc(pmp2->pprops,
01049                                              sizeof(EIMIL_value*) * num_props);
01050            if (!pmp2->pprops) goto error;
01051        } else {
01052            free(pmp2->pprops);
01053            pmp2->pprops = NULL;
01054        }
01055        pmp2->num = num_props;
01056     }
01057 
01058     return pvr;
01059 
01060 error:
01061     EIMIL_destruct_mtext(pmr);
01062     free(pmr);
01063     return NULL;
01064 }
01065 
01066 /*******************************************************************************
01067    Interfacial functions for IM structure <--> EIMIL structure
01068  ******************************************************************************/
01069 
01070 static IMProp*
01071 EIMIL_prop_convert_to_IMProp(
01072     EIMIL_prop *pprop
01073 )
01074 {
01075     int i;
01076     IMProp *pim;
01077     EIMIL_value **ppv, *pv;
01078 
01079     pim = (IMProp*) malloc(sizeof(IMProp));
01080        
01081     pim->count = pprop->size;
01082     switch(pprop->type) {
01083       case EIMIL_TYPE_NUMBER:
01084       {
01085          int *pnums;
01086 
01087          pim->type = IM_SYMBOL_PROPERTY_NUMBER;
01088          pnums = (int*) malloc(sizeof(int) * pim->count);
01089          pim->vals.numbers = pnums;
01090          if (!pnums) return NULL;
01091          for (ppv = pprop->pvals, i = 0; i < pim->count; ppv++, i++) {
01092              pv = *ppv;
01093              ASSERT(pv->type == EIMIL_TYPE_NUMBER);
01094              pnums[i] = pv->v.number;
01095          }
01096          break;
01097       }
01098 
01099       case EIMIL_TYPE_BOOL:
01100       {
01101          int *pbools;
01102 
01103          pim->type = IM_SYMBOL_PROPERTY_BOOL;
01104          pbools = (int*) malloc(sizeof(int) * pim->count);
01105          pim->vals.bools = pbools;
01106          if (!pbools) return NULL;
01107          for (ppv = pprop->pvals, i = 0; i < pim->count; ppv++, i++) {
01108              pv = *ppv;
01109              ASSERT(pv->type == EIMIL_TYPE_BOOL);
01110              pbools[i] = pv->v.bool_val;
01111          }
01112          break;
01113       }
01114       case EIMIL_TYPE_CHAR:
01115       {
01116          CARD32BIT *pchars;
01117 
01118          pim->type = IM_SYMBOL_PROPERTY_CHAR;
01119          pchars = (CARD32BIT*) malloc(sizeof(CARD32BIT) * pim->count);
01120          pim->vals.chars = pchars;
01121          if (!pchars) return NULL;
01122          for (ppv = pprop->pvals, i = 0; i < pim->count; ppv++, i++) {
01123              pv = *ppv;
01124              ASSERT(pv->type == EIMIL_TYPE_CHAR);
01125              pchars[i] = pv->v.ch;
01126          }
01127          break;
01128       }
01129       case EIMIL_TYPE_MTEXT:
01130       {
01131          IMText *ptexts;
01132 
01133          pim->type = IM_SYMBOL_PROPERTY_MTEXT;
01134          ptexts = (IMText*) malloc(sizeof(IMText) * pim->count);
01135          pim->vals.mtexts = ptexts;
01136          if (!ptexts) return NULL;
01137          for (ppv = pprop->pvals, i = 0; i < pim->count; ppv++, i++) {
01138              pv = *ppv;
01139              ASSERT(pv->type == EIMIL_TYPE_MTEXT);
01140              if (!EIMIL_convert_mtext_to_IMText(&ptexts[i], &pv->v.mtext)) {
01141                 return NULL;
01142              }
01143          }
01144          break;
01145       }
01146               
01147       default:
01148        abort();
01149     }
01150 
01151     return pim;
01152 }
01153 
01154 static EIMIL_value*
01155 EIMIL_prop_convert_IMProp(
01156     IMProp *pim
01157 )
01158 {
01159     int i;
01160     EIMIL_value *pv, *pv2;
01161     EIMIL_prop *pprop;
01162 
01163     switch(pim->type) {
01164       case IM_SYMBOL_PROPERTY_NUMBER:
01165        pv = EIMIL_construct_prop2(EIMIL_TYPE_NUMBER);
01166        if (!pv) return NULL;
01167        pprop = &pv->v.prop;
01168        for (i = 0; i < pim->count; i++) {
01169           pv2 = EIMIL_construct_number(pim->vals.numbers[i]);
01170           if (!pv2) return NULL;
01171           if (!EIMIL_add_prop(pprop, pv2)) return NULL;
01172        }
01173        break;
01174       case IM_SYMBOL_PROPERTY_BOOL:
01175        pv = EIMIL_construct_prop2(EIMIL_TYPE_BOOL);
01176        if (!pv) return NULL;
01177        pprop = &pv->v.prop;
01178        for (i = 0; i < pim->count; i++) {
01179           pv2 = EIMIL_construct_bool(pim->vals.bools[i]);
01180           if (!pv2) return NULL;
01181           if (!EIMIL_add_prop(pprop, pv2)) return NULL;
01182        }
01183        break;
01184       case IM_SYMBOL_PROPERTY_CHAR:
01185        pv = EIMIL_construct_prop2(EIMIL_TYPE_CHAR);
01186        if (!pv) return NULL;
01187        pprop = &pv->v.prop;
01188        for (i = 0; i < pim->count; i++) {
01189           pv2 = EIMIL_construct_char(pim->vals.chars[i]);
01190           if (!pv2) return NULL;
01191           if (!EIMIL_add_prop(pprop, pv2)) return NULL;
01192        }
01193        break;
01194       case IM_SYMBOL_PROPERTY_MTEXT:
01195        pv = EIMIL_construct_prop2(EIMIL_TYPE_MTEXT);
01196        if (!pv) return NULL;
01197        pprop = &pv->v.prop;
01198        for (i = 0; i < pim->count; i++) {
01199           pv2 = EIMIL_construct_mtext_from_IMText(&pim->vals.mtexts[i]);
01200           if (!pv2) return NULL;
01201           if (!EIMIL_add_prop(pprop, pv2)) return NULL;
01202        }
01203       default:
01204        abort();
01205     }
01206 
01207     return pv;
01208 }
01209 
01210 static IMFeedbackList*
01211 create_feedback(
01212     int size
01213 )
01214 {
01215     int i;
01216     IMFeedbackList *feedback;
01217     IMFeedback *fb;
01218     
01219     feedback = (IMFeedbackList *) malloc(sizeof(IMFeedbackList) * size);
01220     for (i = 0; i < size; i++) {
01221         IMFeedbackList *fbl = &feedback[i];
01222         fbl->count_feedbacks = 1;
01223        fb = (IMFeedback *) malloc(sizeof(IMFeedback) * 4);
01224        fbl->feedbacks = fb;
01225         memset(fbl->feedbacks, 0, sizeof(IMFeedback) * 4);
01226     }
01227     return feedback;
01228 }
01229 
01230 static void
01231 set_feedback(
01232     UTF32 *basestr,
01233     UTF32 *strend,
01234     EIMIL_prop *pprop,
01235     IMFeedbackList* pfbl
01236 )
01237 {
01238     int st, end;
01239     IMFeedback *pfb;
01240     EIMIL_value *pv;
01241 
01242     if (pprop->type != EIMIL_TYPE_NUMBER) return;
01243 
01244     st = EIMIL_adjust_UTF32_pos_to_UTF16(pprop->st, basestr, strend); 
01245     ASSERT(st >= 0);
01246     end = EIMIL_adjust_UTF32_pos_to_UTF16(pprop->end, basestr, strend);
01247     ASSERT(end >= 0);
01248 
01249     pv = pprop->pvals[0];
01250     ASSERT(pv->type == EIMIL_TYPE_NUMBER);
01251     for (pfb = pfbl->feedbacks + st; end > st; st++, pfb++) {
01252        if (IM_FEEDBACK_TYPE(pfb)) continue;
01253        IM_FEEDBACK_TYPE(pfb) = IM_DECORATION_FEEDBACK;
01254        IM_FEEDBACK_VALUE(pfb) = pv->v.number;
01255     }
01256 }
01257 
01258 /* TODO: check recursive loop.  */
01259 int
01260 EIMIL_convert_mtext_to_IMText(
01261     IMText *pim,
01262     EIMIL_mtext *psrc
01263 )
01264 {
01265     int i, j;
01266     EIMIL_value **ppv, *pv;
01267     EIMIL_mtext_props *pmp;
01268     EIMIL_prop *pprop;
01269 
01270     IMAnnotation *pima;
01271     IMAnnotationValue *pimav;
01272     IMProp *pimp;
01273 
01274     memset(pim, 0, sizeof(IMText));
01275     pim->encoding = UTF16_CODESET;
01276     pim->count_annotations = psrc->slotsnum;
01277     pima = (IMAnnotation*) malloc(sizeof(IMAnnotation)
01278                               * pim->count_annotations);
01279     pim->annotations = pima;
01280     if (!pima) {
01281        free(pim);
01282        return 0;
01283     }
01284     /* UTF16 string */
01285     if (!EIMIL_convert_UTF32_to_UTF16(psrc->ustr, psrc->len,
01286                                   &pim->text.utf_chars,
01287                                   &pim->char_length)) {
01288        free(pim->annotations);
01289        free(pim);
01290        return 0;
01291     }
01292 
01293     pim->feedback = create_feedback(pim->char_length);
01294     if (!pim->feedback) {
01295        free(pim->text.utf_chars);
01296        free(pim->annotations);
01297        free(pim);
01298        return 0;
01299     }
01300     /* feedback & annotation */
01301     for (pmp = psrc->pslots, i = 0;
01302         i < psrc->slotsnum;
01303         pmp++, pima++, i++) {
01304        pima->type = pmp->property_sym->symbolid;
01305 
01306        if (pima->type == EIMIL_SYMBOL_ID_FEEDBACK) {
01307            for (ppv = pmp->pprops, j = 0;
01308                j < pmp->num;
01309                ppv++, pimav++, j++) {
01310               pv = *ppv;
01311               ASSERT(pv->type == EIMIL_TYPE_PROP);
01312               pprop = &pv->v.prop;
01313               set_feedback(psrc->ustr, psrc->ustr + psrc->len,
01314                           pprop, pim->feedback);
01315            }
01316        }
01317 
01318        pima->num_values = pmp->num;
01319        pimav = (IMAnnotationValue*) malloc(sizeof(IMAnnotationValue) * pmp->num);
01320        if (!pimav) {
01321            free(pim->annotations);
01322            free(pim);
01323            return 0;
01324        }
01325        pima->values = pimav;
01326        for (ppv = pmp->pprops, j = 0;
01327             j < pmp->num;
01328             ppv++, pimav++, j++) {
01329            pv = *ppv;
01330            ASSERT(pv->type == EIMIL_TYPE_PROP);
01331            pprop = &pv->v.prop;
01332            pimav->start_pos = EIMIL_adjust_UTF32_pos_to_UTF16(pprop->st,
01333                                                         psrc->ustr,
01334                                                         psrc->ustr
01335                                                         + psrc->len);
01336            ASSERT(pimav->start_pos >= 0);
01337            pimav->end_pos = EIMIL_adjust_UTF32_pos_to_UTF16(pprop->end,
01338                                                       psrc->ustr,
01339                                                       psrc->ustr
01340                                                       + psrc->len);
01341            ASSERT(pimav->end_pos >= 0);
01342            pimav->len = -1;
01343            pimp = EIMIL_prop_convert_to_IMProp(pprop);
01344            if (!pimp) {
01345               free(pim->annotations);
01346               free(pim);
01347               return 0;
01348            }
01349        }
01350     }
01351 
01352     return 1;
01353 }
01354 
01355 static int
01356 EIMIL_add_props_of_IMText(
01357     EIMIL_mtext *pm,
01358     IMText *pim
01359 )
01360 {
01361     int i, j;
01362     int mst, mend;
01363     EIMIL_value *pv;
01364     IMAnnotation *pima;
01365     IMAnnotationValue *pimav;
01366 
01367     for (pima = pim->annotations, i = 0;
01368         i < pim->count_annotations;
01369         pima++, i++) {
01370        for (pimav = pima->values, j = 0;
01371             j < pima->num_values;
01372             pimav++, j++) {
01373            pv = EIMIL_prop_convert_IMProp((IMProp*)pimav->value);
01374            mst = EIMIL_adjust_UTF16_pos_to_UTF32(pimav->start_pos,
01375                                             pm->ustr,
01376                                             pm->ustr + pm->len);
01377            if (mst < 0) return 0;
01378            mend = EIMIL_adjust_UTF16_pos_to_UTF32(pimav->end_pos,
01379                                              pm->ustr,
01380                                              pm->ustr + pm->len);
01381            if (mend < 0) return 0;
01382            if (!EIMIL_add_prop_on_mtext(pm, pv, mst, mend)) return 0;
01383        }
01384     }
01385 
01386     return 1;
01387        
01388 }
01389 
01390 EIMIL_value*
01391 EIMIL_construct_mtext_from_IMText(
01392     IMText *pim
01393 )
01394 {
01395     EIMIL_value *pv;
01396 
01397     pv = EIMIL_construct_mtext_from_UTF16(pim->char_length, pim->text.utf_chars);
01398     if (!pv) return NULL;
01399     if (!EIMIL_add_props_of_IMText(&pv->v.mtext, pim)) return NULL;
01400 
01401     return pv;
01402 }
01403 
01404 /*
01405   orig mtext
01406   |------------------------------|
01407          <=====>
01408          st   end
01409          <*********>
01410             ulen
01411   new mtext
01412   |------<*********>-----------------|
01413          st       nend
01414                 |<->|
01415                  dlen
01416   |<----------nlen------------------>|
01417  */
01418 int
01419 EIMIL_sync_mtext(
01420     EIMIL_mtext *pm,
01421     IMDifferential *pdiff
01422 )
01423 {
01424     int i, j, n;
01425     int st, end, nend, mst, mend, nlen, dlen;
01426     UTF32 *pustr, ulen;
01427     EIMIL_mtext_props *pmp;
01428     EIMIL_value **ppv, *pv, *pv2;
01429     EIMIL_prop *pprop;
01430     IMText *ptx;
01431 
01432     ptx = &pdiff->text;
01433 
01434     /* STEP1: replace the interval with the text.  */
01435     st = EIMIL_adjust_UTF16_pos_to_UTF32(pdiff->chg_first, pm->ustr,
01436                                     pm->ustr + pm->len);
01437     if (st < 0) return 0;
01438     end = EIMIL_adjust_UTF16_pos_to_UTF32(pdiff->chg_len, pm->ustr + st,
01439                                      pm->ustr + pm->len);
01440     if (end < 0) return 0;
01441     end += st;
01442     if (!EIMIL_convert_UTF16_to_UTF32(ptx->text.utf_chars, ptx->char_length,
01443                                   &pustr, &ulen))
01444        return 0;
01445     dlen = ulen - (end - st);
01446     nlen = pm->len + dlen;
01447     if ((end - st) > ulen) {
01448        memmove(pm->ustr + st + ulen, pm->ustr + end,
01449               sizeof(UTF32) * (pm->len - end));
01450        pm->ustr = (UTF32*) realloc(pm->ustr, sizeof(UTF32) * nlen);
01451     }else if ((end - st) < ulen) {
01452        pm->ustr = (UTF32*) realloc(pm->ustr, sizeof(UTF32) * nlen);
01453        memmove(pm->ustr + st + ulen, pm->ustr + end,
01454               sizeof(UTF32) * (pm->len - end));
01455     }
01456     memcpy(pm->ustr + st, pustr, sizeof(UTF32) * ulen);
01457     free(pustr);
01458 
01459     /* STEP2: strip or move props in the interval.  */
01460     for (pmp = pm->pslots, i = 0; i < pm->slotsnum; pmp++, i++) {
01461        n = pmp->num;
01462        ppv = pmp->pprops;
01463        for (j = 0; j < n;) {
01464            pv = ppv[j];
01465            ASSERT(pv->type == EIMIL_TYPE_PROP);
01466            pprop = &pv->v.prop;
01467            mst = pprop->st;
01468            mend = pprop->end;
01469            if ((mend >= st) && (mst < st)) {
01470               /* <--->         */
01471               /*   <===>       */
01472               pprop->end = st;
01473               j++;
01474            }else if ((mend <= end) && (mst >= st)) {
01475               /*   <--->       */
01476               /*  <======>     */
01477               pprop->st = -1;
01478               pprop->end = -1;
01479               pprop->target = NULL;
01480               n--;
01481               if (n > j) {
01482                   memmove(ppv + j, ppv + j + 1,
01483                          sizeof(EIMIL_value*) * (n - i));
01484               }
01485               EIMIL_RMREF(*pv);
01486            }else if ((mst <= end) && (mend > end)) {
01487               /*       <--->   */
01488               /*  <======>     */
01489               pprop->st = nend;
01490               j++;
01491            }else if ((mst < st) && (mend > end)) {
01492               /* <-------->    */
01493               /*    <===>      */
01494               /*     |         */
01495               /*     V         */
01496               /* <-><===><>    */
01497               /*  i      i+1   */
01498               pv2 = EIMIL_copy_value(pv);
01499               if (!pv2) return 0;
01500               pv2->v.prop.st = nend;
01501               EIMIL_ADDREF(*pv2);
01502               pprop->end = st;
01503               ppv = (EIMIL_value**) realloc(ppv, sizeof(EIMIL_value*) * (n + 1));
01504               pmp->pprops = ppv;
01505               if (n > (j + 1)) {
01506                   memmove(ppv + j + 2, ppv + j + 1,
01507                          sizeof(EIMIL_value*) * (n - j));
01508               }
01509               ppv[j + 1] = pv2;
01510               n++;
01511               j += 2;
01512            }else if (mst < st) {
01513               /* <---->        */
01514               /*       <====>  */
01515               j++;
01516            }else{
01517               /*        <----> */
01518               /* <====>        */
01519               pprop->st += dlen;
01520               pprop->end += dlen;
01521               j++;
01522            }
01523        }
01524     }
01525 
01526     /* STEP3: add props of the text. */
01527     if (!EIMIL_add_props_of_IMText(pm, ptx)) return 0;
01528 
01529     return 1;
01530 }
01531 
01532 int
01533 EIMIL_mtext_diff(
01534     EIMIL_mtext *porig,
01535     EIMIL_mtext *pnew,
01536     IMDifferential *pdiff
01537 )
01538 {
01539     /* TODO: make it more efficient!!! */
01540     if (!EIMIL_convert_mtext_to_IMText(&pdiff->text, pnew)) return 0;
01541     pdiff->chg_first = 0;
01542     pdiff->chg_len = 0;
01543 
01544     return 1;
01545 }
01546 
01547 /* Local Variables: */
01548 /* c-file-style: "iiim-project" */
01549 /* End: */