Back to index

tetex-bin  3.0
sortid.c
Go to the documentation of this file.
00001 /*
00002  *
00003  *  This file is part of
00004  *     MakeIndex - A formatter and format independent index processor
00005  *
00006  *  Copyright (C) 1989 by Chen & Harrison International Systems, Inc.
00007  *  Copyright (C) 1988 by Olivetti Research Center
00008  *  Copyright (C) 1987 by Regents of the University of California
00009  *
00010  *  Author:
00011  *     Pehong Chen
00012  *     Chen & Harrison International Systems, Inc.
00013  *     Palo Alto, California
00014  *     USA
00015  *     (phc@renoir.berkeley.edu or chen@orc.olivetti.com)
00016  *
00017  *  Contributors:
00018  *     Please refer to the CONTRIB file that comes with this release
00019  *     for a list of people who have contributed to this and/or previous
00020  *     release(s) of MakeIndex.
00021  *
00022  *  All rights reserved by the copyright holders.  See the copyright
00023  *  notice distributed with this software for a complete description of
00024  *  the conditions under which it is made available.
00025  *
00026  */
00027 
00028 #include    "mkind.h"
00029 
00030 #ifdef HAVE_LOCALE_H
00031 #include <locale.h>
00032 #endif
00033 
00034 static long   idx_gc;
00035 
00036 static int    check_mixsym ARGS((char *x,char *y));
00037 static int    compare ARGS((struct KFIELD * *a,struct KFIELD * *b));
00038 static int    compare_one ARGS((char *x,char *y));
00039 static int    compare_page ARGS((struct KFIELD * *a,struct KFIELD * *b));
00040 static int    compare_string ARGS((unsigned char *a,unsigned char *b));
00041 static int    new_strcmp ARGS((unsigned char *a, unsigned char *b,
00042                              int option));
00043 
00044 void
00045 sort_idx(VOID_ARG)
00046 {
00047 #ifdef HAVE_SETLOCALE
00048     char *prev_locale;
00049 #endif
00050 
00051     MESSAGE("Sorting entries...", "");
00052 #ifdef HAVE_SETLOCALE
00053     prev_locale = setlocale(LC_CTYPE, NULL);
00054     setlocale(LC_CTYPE, "");
00055 #endif
00056     idx_dc = 0;
00057     idx_gc = 0L;
00058     qqsort((char *) idx_key, (int) idx_gt, (int) sizeof(FIELD_PTR), 
00059        (int (*) ARGS((char*,char*)))compare);
00060 #ifdef HAVE_SETLOCALE
00061     setlocale(LC_COLLATE, prev_locale);
00062 #endif
00063     MESSAGE("done (%ld comparisons).\n", idx_gc);
00064 }
00065 
00066 static int
00067 #if STDC
00068 compare(FIELD_PTR *a, FIELD_PTR *b)
00069 #else
00070 compare(a, b)
00071 FIELD_PTR *a;
00072 FIELD_PTR *b;
00073 #endif
00074 {
00075     int     i;
00076     int     dif;
00077 
00078     idx_gc++;
00079     IDX_DOT(CMP_MAX);
00080 
00081     for (i = 0; i < FIELD_MAX; i++) {
00082        /* compare the sort fields */
00083        if ((dif = compare_one((*a)->sf[i], (*b)->sf[i])) != 0)
00084            break;
00085 
00086        /* compare the actual fields */
00087        if ((dif = compare_one((*a)->af[i], (*b)->af[i])) != 0)
00088            break;
00089     }
00090 
00091     /* both key aggregates are identical, compare page numbers */
00092     if (i == FIELD_MAX)
00093        dif = compare_page(a, b);
00094     return (dif);
00095 }
00096 
00097 static int
00098 #if STDC
00099 compare_one(char *x,char *y)
00100 #else
00101 compare_one(x, y)
00102 char   *x;
00103 char   *y;
00104 #endif
00105 {
00106     int     m;
00107     int     n;
00108 
00109     if ((x[0] == NUL) && (y[0] == NUL))
00110        return (0);
00111 
00112     if (x[0] == NUL)
00113        return (-1);
00114 
00115     if (y[0] == NUL)
00116        return (1);
00117 
00118     m = group_type(x);
00119     n = group_type(y);
00120 
00121     /* both pure digits */
00122     if ((m >= 0) && (n >= 0))
00123        return (m - n);
00124 
00125     /* x digit, y non-digit */
00126     if (m >= 0) {
00127        if (german_sort)
00128            return (1);
00129        else
00130            return ((n == -1) ? 1 : -1);
00131     }
00132     /* x non-digit, y digit */
00133     if (n >= 0) {
00134        if (german_sort)
00135            return (-1);
00136        else
00137            return ((m == -1) ? -1 : 1);
00138     }
00139     /* strings started with a symbol (including digit) */
00140     if ((m == SYMBOL) && (n == SYMBOL))
00141        return (check_mixsym(x, y));
00142 
00143     /* x symbol, y non-symbol */
00144     if (m == SYMBOL)
00145        return (-1);
00146 
00147     /* x non-symbol, y symbol */
00148     if (n == SYMBOL)
00149        return (1);
00150 
00151     /* strings with a leading letter, the ALPHA type */
00152     return (compare_string((unsigned char*)x, (unsigned char*)y));
00153 }
00154 
00155 static int
00156 #if STDC
00157 check_mixsym(char *x, char *y)
00158 #else
00159 check_mixsym(x, y)
00160 char   *x;
00161 char   *y;
00162 #endif
00163 {
00164     int     m;
00165     int     n;
00166 
00167     m = ISDIGIT(x[0]);
00168     n = ISDIGIT(y[0]);
00169 
00170     if (m && !n)
00171        return (1);
00172 
00173     if (!m && n)
00174        return (-1);
00175 
00176     return (locale_sort ? strcoll(x, y) : strcmp(x, y));
00177 }
00178 
00179 
00180 static int
00181 #if STDC
00182 compare_string(unsigned char *a, unsigned char *b)
00183 #else
00184 compare_string(a, b)
00185 unsigned char   *a;
00186 unsigned char   *b;
00187 #endif
00188 {
00189     int     i = 0;
00190     int     j = 0;
00191     int     al;
00192     int     bl;
00193 
00194     if (locale_sort) return strcoll(a, b);
00195 
00196     while ((a[i] != NUL) || (b[j] != NUL)) {
00197        if (a[i] == NUL)
00198            return (-1);
00199        if (b[j] == NUL)
00200            return (1);
00201        if (letter_ordering) {
00202            if (a[i] == SPC)
00203               i++;
00204            if (b[j] == SPC)
00205               j++;
00206        }
00207        al = TOLOWER(a[i]);
00208        bl = TOLOWER(b[j]);
00209 
00210        if (al != bl)
00211            return (al - bl);
00212        i++;
00213        j++;
00214     }
00215     if (german_sort)
00216        return (new_strcmp(a, b, GERMAN));
00217     else
00218        return (strcmp((char*)a, (char*)b));
00219 }
00220 
00221 static int
00222 #if STDC
00223 compare_page(FIELD_PTR *a, FIELD_PTR *b)
00224 #else
00225 compare_page(a, b)
00226 FIELD_PTR *a;
00227 FIELD_PTR *b;
00228 #endif
00229 {
00230     int     m = 0;
00231     short   i = 0;
00232 
00233     while ((i < (*a)->count) && (i < (*b)->count) &&
00234           ((m = (*a)->npg[i] - (*b)->npg[i]) == 0))
00235     {
00236        i++;
00237     }
00238     if (m == 0)
00239     {                       /* common leading page numbers match */
00240        if ((i == (*a)->count) && (i == (*b)->count))
00241        {                    /* all page numbers match */
00242            /***********************************************************
00243            We have identical entries, except possibly in encap fields.
00244            The ordering is tricky here.  Consider the following input
00245            sequence of index names, encaps, and page numbers:
00246 
00247               foo|(  2
00248               foo|)  6
00249               foo|(  6
00250               foo|)  10
00251 
00252            This might legimately occur when a page range ends, and
00253            subsequently, a new range starts, on the same page.  If we
00254            just order by range_open and range_close (here, parens),
00255            then we will produce
00256 
00257               foo|(  2
00258               foo|(  6
00259               foo|)  6
00260               foo|)  10
00261 
00262            This will later generate the index entry
00263 
00264               foo, 2--6, \({6}, 10
00265 
00266            which is not only wrong, but has also introduced an illegal
00267            LaTeX macro, \({6}, because the merging step treated this
00268            like a \see{6} entry.
00269 
00270            The solution is to preserve the original input order, which
00271            we can do by treating range_open and range_close as equal,
00272            and then ordering by input line number.  This will then
00273            generate the correct index entry
00274 
00275               foo, 2--10
00276 
00277            Ordering inconsistencies from missing range open or close
00278            entries, or mixing roman and arabic page numbers, will be
00279            detected later.
00280            ***********************************************************/
00281 
00282 #define isrange(c) ( ((c) == idx_ropen) || ((c) == idx_rclose) )
00283 
00284            /* Order two range values by input line number */
00285 
00286            if (isrange(*(*a)->encap) && isrange(*(*b)->encap))
00287               m = (*a)->lc - (*b)->lc;
00288 
00289            /* Handle identical encap fields; neither is a range delimiter */
00290 
00291            else if (STREQ((*a)->encap, (*b)->encap))
00292            {
00293               /* If neither are yet marked duplicate, mark the second
00294               of them to be ignored. */
00295               if (((*a)->type != DUPLICATE) &&
00296                   ((*b)->type != DUPLICATE))
00297                   (*b)->type = DUPLICATE;
00298               /* leave m == 0 to show equality */
00299            }
00300 
00301            /* Encap fields differ: only one may be a range delimiter, */
00302            /* or else neither of them is.   If either of them is a range */
00303            /* delimiter, order by input line number; otherwise, order */
00304            /* by name. */
00305 
00306            else
00307            {
00308               if ( isrange(*(*a)->encap) || isrange(*(*b)->encap) )
00309                   m = (*a)->lc - (*b)->lc; /* order by input line number */
00310               else                 /* order non-range items by */
00311                                    /* their encap strings */
00312                   m = compare_string((unsigned char*)((*a)->encap),
00313                                    (unsigned char*)((*b)->encap));
00314            }
00315        }
00316        else if ((i == (*a)->count) && (i < (*b)->count))
00317            m = -1;
00318        else if ((i < (*a)->count) && (i == (*b)->count))
00319            m = 1;
00320     }
00321     return (m);
00322 }
00323 
00324 
00325 static int
00326 #if STDC
00327 new_strcmp(unsigned char *s1, unsigned char *s2, int option)
00328 #else
00329 new_strcmp(s1, s2, option)
00330 unsigned char   *s1;
00331 unsigned char   *s2;
00332 int     option;
00333 #endif
00334 {
00335     int     i;
00336 
00337     i = 0;
00338     while (s1[i] == s2[i])
00339        if (s1[i++] == NUL)
00340            return (0);
00341     if (option)                           /* ASCII */
00342        return (isupper(s1[i]) ? -1 : 1);
00343     else                           /* GERMAN */
00344        return (isupper(s1[i]) ? 1 : -1);
00345 }