Back to index

tetex-bin  3.0
texindex.c
Go to the documentation of this file.
00001 /* texindex -- sort TeX index dribble output into an actual index.
00002    $Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp $
00003 
00004    Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
00005    2002, 2003, 2004 Free Software Foundation, Inc.
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2, or (at your option)
00010    any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
00020 
00021 #include "system.h"
00022 #include <getopt.h>
00023 
00024 static char *program_name = "texindex";
00025 
00026 #if defined (emacs)
00027 #  include "../src/config.h"
00028 /* Some s/os.h files redefine these. */
00029 #  undef read
00030 #  undef close
00031 #  undef write
00032 #  undef open
00033 #endif
00034 
00035 #if !defined (HAVE_MEMSET)
00036 #undef memset
00037 #define memset(ptr, ignore, count) bzero (ptr, count)
00038 #endif
00039 
00040 char *mktemp (char *);
00041 
00042 #if !defined (SEEK_SET)
00043 #  define SEEK_SET 0
00044 #  define SEEK_CUR 1
00045 #  define SEEK_END 2
00046 #endif /* !SEEK_SET */
00047 
00048 struct linebuffer;
00049 
00050 /* When sorting in core, this structure describes one line
00051    and the position and length of its first keyfield.  */
00052 struct lineinfo
00053 {
00054   char *text;           /* The actual text of the line. */
00055   union {
00056     char *text;         /* The start of the key (for textual comparison). */
00057     long number;        /* The numeric value (for numeric comparison). */
00058   } key;
00059   long keylen;          /* Length of KEY field. */
00060 };
00061 
00062 /* This structure describes a field to use as a sort key. */
00063 struct keyfield
00064 {
00065   int startwords;       /* Number of words to skip. */
00066   int startchars;       /* Number of additional chars to skip. */
00067   int endwords;         /* Number of words to ignore at end. */
00068   int endchars;         /* Ditto for characters of last word. */
00069   char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
00070   char fold_case;       /* Non-zero means case doesn't matter. */
00071   char reverse;         /* Non-zero means compare in reverse order. */
00072   char numeric;         /* Non-zeros means field is ASCII numeric. */
00073   char positional;      /* Sort according to file position. */
00074   char braced;          /* Count balanced-braced groupings as fields. */
00075 };
00076 
00077 /* Vector of keyfields to use. */
00078 struct keyfield keyfields[3];
00079 
00080 /* Number of keyfields stored in that vector.  */
00081 int num_keyfields = 3;
00082 
00083 /* Vector of input file names, terminated with a null pointer. */
00084 char **infiles;
00085 
00086 /* Vector of corresponding output file names, or NULL, meaning default it
00087    (add an `s' to the end). */
00088 char **outfiles;
00089 
00090 /* Length of `infiles'. */
00091 int num_infiles;
00092 
00093 /* Pointer to the array of pointers to lines being sorted. */
00094 char **linearray;
00095 
00096 /* The allocated length of `linearray'. */
00097 long nlines;
00098 
00099 /* Directory to use for temporary files.  On Unix, it ends with a slash.  */
00100 char *tempdir;
00101 
00102 /* Number of last temporary file.  */
00103 int tempcount;
00104 
00105 /* Number of last temporary file already deleted.
00106    Temporary files are deleted by `flush_tempfiles' in order of creation.  */
00107 int last_deleted_tempcount;
00108 
00109 /* During in-core sort, this points to the base of the data block
00110    which contains all the lines of data.  */
00111 char *text_base;
00112 
00113 /* Initially 0; changed to 1 if we want initials in this index.  */
00114 int need_initials;
00115 
00116 /* Remembers the first initial letter seen in this index, so we can
00117    determine whether we need initials in the sorted form.  */
00118 char first_initial;
00119 
00120 /* Additional command switches .*/
00121 
00122 /* Nonzero means do not delete tempfiles -- for debugging. */
00123 int keep_tempfiles;
00124 
00125 /* Forward declarations of functions in this file. */
00126 void decode_command (int argc, char **argv);
00127 void sort_in_core (char *infile, int total, char *outfile);
00128 void sort_offline (char *infile, off_t total, char *outfile);
00129 char **parsefile (char *filename, char **nextline, char *data, long int size);
00130 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
00131 char *find_pos (char *str, int words, int chars, int ignore_blanks);
00132 long find_value (char *start, long int length);
00133 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
00134 char *find_braced_end (char *str);
00135 void writelines (char **linearray, int nlines, FILE *ostream);
00136 int compare_field (struct keyfield *keyfield, char *start1,
00137                    long int length1, long int pos1, char *start2,
00138                    long int length2, long int pos2);
00139 int compare_full (const void *, const void *);
00140 long readline (struct linebuffer *linebuffer, FILE *stream);
00141 int merge_files (char **infiles, int nfiles, char *outfile);
00142 int merge_direct (char **infiles, int nfiles, char *outfile);
00143 void pfatal_with_name (const char *name);
00144 void fatal (const char *format, const char *arg);
00145 void error (const char *format, const char *arg);
00146 void *xmalloc (), *xrealloc ();
00147 char *concat (char *s1, char *s2);
00148 void flush_tempfiles (int to_count);
00149 
00150 #define MAX_IN_CORE_SORT 500000
00151 
00152 int
00153 main (int argc, char **argv)
00154 {
00155   int i;
00156 
00157   tempcount = 0;
00158   last_deleted_tempcount = 0;
00159 
00160 #ifdef HAVE_SETLOCALE
00161   /* Set locale via LC_ALL.  */
00162   setlocale (LC_ALL, "");
00163 #endif
00164 
00165   /* Set the text message domain.  */
00166   bindtextdomain (PACKAGE, LOCALEDIR);
00167   textdomain (PACKAGE);
00168 
00169   /* In case we write to a redirected stdout that fails.  */
00170   /* not ready atexit (close_stdout); */
00171 
00172   /* Describe the kind of sorting to do. */
00173   /* The first keyfield uses the first braced field and folds case. */
00174   keyfields[0].braced = 1;
00175   keyfields[0].fold_case = 1;
00176   keyfields[0].endwords = -1;
00177   keyfields[0].endchars = -1;
00178 
00179   /* The second keyfield uses the second braced field, numerically. */
00180   keyfields[1].braced = 1;
00181   keyfields[1].numeric = 1;
00182   keyfields[1].startwords = 1;
00183   keyfields[1].endwords = -1;
00184   keyfields[1].endchars = -1;
00185 
00186   /* The third keyfield (which is ignored while discarding duplicates)
00187      compares the whole line. */
00188   keyfields[2].endwords = -1;
00189   keyfields[2].endchars = -1;
00190 
00191   decode_command (argc, argv);
00192 
00193   /* Process input files completely, one by one.  */
00194 
00195   for (i = 0; i < num_infiles; i++)
00196     {
00197       int desc;
00198       off_t ptr;
00199       char *outfile;
00200       struct stat instat;
00201 
00202       desc = open (infiles[i], O_RDONLY, 0);
00203       if (desc < 0)
00204         pfatal_with_name (infiles[i]);
00205 
00206       if (stat (infiles[i], &instat))
00207         pfatal_with_name (infiles[i]);
00208       if (S_ISDIR (instat.st_mode))
00209         {
00210 #ifdef EISDIR
00211           errno = EISDIR;
00212 #endif
00213           pfatal_with_name (infiles[i]);
00214         }
00215 
00216       lseek (desc, (off_t) 0, SEEK_END);
00217       ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
00218 
00219       close (desc);
00220 
00221       outfile = outfiles[i];
00222       if (!outfile)
00223         outfile = concat (infiles[i], "s");
00224 
00225       need_initials = 0;
00226       first_initial = '\0';
00227 
00228       if (ptr < MAX_IN_CORE_SORT)
00229         /* Sort a small amount of data. */
00230         sort_in_core (infiles[i], (int)ptr, outfile);
00231       else
00232         sort_offline (infiles[i], ptr, outfile);
00233     }
00234 
00235   flush_tempfiles (tempcount);
00236   xexit (0);
00237   return 0; /* Avoid bogus warnings.  */
00238 }
00239 
00240 typedef struct
00241 {
00242   char *long_name;
00243   char *short_name;
00244   int *variable_ref;
00245   int variable_value;
00246   char *arg_name;
00247   char *doc_string;
00248 } TEXINDEX_OPTION;
00249 
00250 TEXINDEX_OPTION texindex_options[] = {
00251   { "--help", "-h", (int *)NULL, 0, (char *)NULL,
00252       N_("display this help and exit") },
00253   { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
00254       N_("keep temporary files around after processing") },
00255   { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
00256       N_("do not keep temporary files around after processing (default)") },
00257   { "--output", "-o", (int *)NULL, 0, "FILE",
00258       N_("send output to FILE") },
00259   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
00260       N_("display version information and exit") },
00261   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
00262 };
00263 
00264 void
00265 usage (int result_value)
00266 {
00267   register int i;
00268   FILE *f = result_value ? stderr : stdout;
00269 
00270   fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
00271   fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
00272   /* Avoid trigraph nonsense.  */
00273   fprintf (f,
00274 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
00275            '?', '?'); /* avoid trigraph in cat-id-tbl.c */
00276   fprintf (f, _("\nOptions:\n"));
00277 
00278   for (i = 0; texindex_options[i].long_name; i++)
00279     {
00280       putc (' ', f);
00281 
00282       if (texindex_options[i].short_name)
00283         fprintf (f, "%s, ", texindex_options[i].short_name);
00284 
00285       fprintf (f, "%s %s",
00286                texindex_options[i].long_name,
00287                texindex_options[i].arg_name
00288                ? texindex_options[i].arg_name : "");
00289 
00290       fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
00291     }
00292   fputs (_("\n\
00293 Email bug reports to bug-texinfo@gnu.org,\n\
00294 general questions and discussion to help-texinfo@gnu.org.\n\
00295 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
00296   fputs ("\n", f);
00297 
00298   xexit (result_value);
00299 }
00300 
00301 /* Decode the command line arguments to set the parameter variables
00302    and set up the vector of keyfields and the vector of input files. */
00303 
00304 void
00305 decode_command (int argc, char **argv)
00306 {
00307   int arg_index = 1;
00308   char **ip;
00309   char **op;
00310 
00311   /* Store default values into parameter variables. */
00312 
00313   tempdir = getenv ("TMPDIR");
00314   if (tempdir == NULL)
00315     tempdir = getenv ("TEMP");
00316   if (tempdir == NULL)
00317     tempdir = getenv ("TMP");
00318   if (tempdir == NULL)
00319     tempdir = DEFAULT_TMPDIR;
00320   else
00321     tempdir = concat (tempdir, "/");
00322 
00323   keep_tempfiles = 0;
00324 
00325   /* Allocate ARGC input files, which must be enough.  */
00326 
00327   infiles = (char **) xmalloc (argc * sizeof (char *));
00328   outfiles = (char **) xmalloc (argc * sizeof (char *));
00329   ip = infiles;
00330   op = outfiles;
00331 
00332   while (arg_index < argc)
00333     {
00334       char *arg = argv[arg_index++];
00335 
00336       if (*arg == '-')
00337         {
00338           if (strcmp (arg, "--version") == 0)
00339             {
00340               printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
00341               puts ("");
00342               puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
00343               printf (_("There is NO warranty.  You may redistribute this software\n\
00344 under the terms of the GNU General Public License.\n\
00345 For more information about these matters, see the files named COPYING.\n"));
00346               xexit (0);
00347             }
00348           else if ((strcmp (arg, "--keep") == 0) ||
00349                    (strcmp (arg, "-k") == 0))
00350             {
00351               keep_tempfiles = 1;
00352             }
00353           else if ((strcmp (arg, "--help") == 0) ||
00354                    (strcmp (arg, "-h") == 0))
00355             {
00356               usage (0);
00357             }
00358           else if ((strcmp (arg, "--output") == 0) ||
00359                    (strcmp (arg, "-o") == 0))
00360             {
00361               if (argv[arg_index] != (char *)NULL)
00362                 {
00363                   arg_index++;
00364                   if (op > outfiles)
00365                     *(op - 1) = argv[arg_index];
00366                 }
00367               else
00368                 usage (1);
00369             }
00370           else
00371             usage (1);
00372         }
00373       else
00374         {
00375           *ip++ = arg;
00376           *op++ = (char *)NULL;
00377         }
00378     }
00379 
00380   /* Record number of keyfields and terminate list of filenames. */
00381   num_infiles = ip - infiles;
00382   *ip = (char *)NULL;
00383   if (num_infiles == 0)
00384     usage (1);
00385 }
00386 
00387 /* Return a name for temporary file COUNT. */
00388 
00389 static char *
00390 maketempname (int count)
00391 {
00392   static char *tempbase = NULL;
00393   char tempsuffix[10];
00394 
00395   if (!tempbase)
00396     {
00397       int fd;
00398       tempbase = concat (tempdir, "txidxXXXXXX");
00399 
00400       fd = mkstemp (tempbase);
00401       if (fd == -1)
00402         pfatal_with_name (tempbase);
00403     }
00404 
00405   sprintf (tempsuffix, ".%d", count);
00406   return concat (tempbase, tempsuffix);
00407 }
00408 
00409 
00410 /* Delete all temporary files up to TO_COUNT. */
00411 
00412 void
00413 flush_tempfiles (int to_count)
00414 {
00415   if (keep_tempfiles)
00416     return;
00417   while (last_deleted_tempcount < to_count)
00418     unlink (maketempname (++last_deleted_tempcount));
00419 }
00420 
00421 
00422 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
00423 
00424 int
00425 compare_full (const void *p1, const void *p2)
00426 {
00427   char **line1 = (char **) p1;
00428   char **line2 = (char **) p2;
00429   int i;
00430 
00431   /* Compare using the first keyfield;
00432      if that does not distinguish the lines, try the second keyfield;
00433      and so on. */
00434 
00435   for (i = 0; i < num_keyfields; i++)
00436     {
00437       long length1, length2;
00438       char *start1 = find_field (&keyfields[i], *line1, &length1);
00439       char *start2 = find_field (&keyfields[i], *line2, &length2);
00440       int tem = compare_field (&keyfields[i], start1, length1,
00441                                *line1 - text_base,
00442                                start2, length2, *line2 - text_base);
00443       if (tem)
00444         {
00445           if (keyfields[i].reverse)
00446             return -tem;
00447           return tem;
00448         }
00449     }
00450 
00451   return 0;                     /* Lines match exactly. */
00452 }
00453 
00454 /* Compare LINE1 and LINE2, described by structures
00455    in which the first keyfield is identified in advance.
00456    For positional sorting, assumes that the order of the lines in core
00457    reflects their nominal order.  */
00458 int
00459 compare_prepared (const void *p1, const void *p2)
00460 {
00461   struct lineinfo *line1 = (struct lineinfo *) p1;
00462   struct lineinfo *line2 = (struct lineinfo *) p2;
00463   int i;
00464   int tem;
00465   char *text1, *text2;
00466 
00467   /* Compare using the first keyfield, which has been found for us already. */
00468   if (keyfields->positional)
00469     {
00470       if (line1->text - text_base > line2->text - text_base)
00471         tem = 1;
00472       else
00473         tem = -1;
00474     }
00475   else if (keyfields->numeric)
00476     tem = line1->key.number - line2->key.number;
00477   else
00478     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
00479                          line2->key.text, line2->keylen, 0);
00480   if (tem)
00481     {
00482       if (keyfields->reverse)
00483         return -tem;
00484       return tem;
00485     }
00486 
00487   text1 = line1->text;
00488   text2 = line2->text;
00489 
00490   /* Compare using the second keyfield;
00491      if that does not distinguish the lines, try the third keyfield;
00492      and so on. */
00493 
00494   for (i = 1; i < num_keyfields; i++)
00495     {
00496       long length1, length2;
00497       char *start1 = find_field (&keyfields[i], text1, &length1);
00498       char *start2 = find_field (&keyfields[i], text2, &length2);
00499       int tem = compare_field (&keyfields[i], start1, length1,
00500                                text1 - text_base,
00501                                start2, length2, text2 - text_base);
00502       if (tem)
00503         {
00504           if (keyfields[i].reverse)
00505             return -tem;
00506           return tem;
00507         }
00508     }
00509 
00510   return 0;                     /* Lines match exactly. */
00511 }
00512 
00513 /* Like compare_full but more general.
00514    You can pass any strings, and you can say how many keyfields to use.
00515    POS1 and POS2 should indicate the nominal positional ordering of
00516    the two lines in the input.  */
00517 
00518 int
00519 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
00520 {
00521   int i;
00522 
00523   /* Compare using the first keyfield;
00524      if that does not distinguish the lines, try the second keyfield;
00525      and so on. */
00526 
00527   for (i = 0; i < use_keyfields; i++)
00528     {
00529       long length1, length2;
00530       char *start1 = find_field (&keyfields[i], str1, &length1);
00531       char *start2 = find_field (&keyfields[i], str2, &length2);
00532       int tem = compare_field (&keyfields[i], start1, length1, pos1,
00533                                start2, length2, pos2);
00534       if (tem)
00535         {
00536           if (keyfields[i].reverse)
00537             return -tem;
00538           return tem;
00539         }
00540     }
00541 
00542   return 0;                     /* Lines match exactly. */
00543 }
00544 
00545 /* Find the start and length of a field in STR according to KEYFIELD.
00546    A pointer to the starting character is returned, and the length
00547    is stored into the int that LENGTHPTR points to.  */
00548 
00549 char *
00550 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
00551 {
00552   char *start;
00553   char *end;
00554   char *(*fun) ();
00555 
00556   if (keyfield->braced)
00557     fun = find_braced_pos;
00558   else
00559     fun = find_pos;
00560 
00561   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
00562                   keyfield->ignore_blanks);
00563   if (keyfield->endwords < 0)
00564     {
00565       if (keyfield->braced)
00566         end = find_braced_end (start);
00567       else
00568         {
00569           end = start;
00570           while (*end && *end != '\n')
00571             end++;
00572         }
00573     }
00574   else
00575     {
00576       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
00577       if (end - str < start - str)
00578         end = start;
00579     }
00580   *lengthptr = end - start;
00581   return start;
00582 }
00583 
00584 /* Return a pointer to a specified place within STR,
00585    skipping (from the beginning) WORDS words and then CHARS chars.
00586    If IGNORE_BLANKS is nonzero, we skip all blanks
00587    after finding the specified word.  */
00588 
00589 char *
00590 find_pos (char *str, int words, int chars, int ignore_blanks)
00591 {
00592   int i;
00593   char *p = str;
00594 
00595   for (i = 0; i < words; i++)
00596     {
00597       char c;
00598       /* Find next bunch of nonblanks and skip them. */
00599       while ((c = *p) == ' ' || c == '\t')
00600         p++;
00601       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
00602         p++;
00603       if (!*p || *p == '\n')
00604         return p;
00605     }
00606 
00607   while (*p == ' ' || *p == '\t')
00608     p++;
00609 
00610   for (i = 0; i < chars; i++)
00611     {
00612       if (!*p || *p == '\n')
00613         break;
00614       p++;
00615     }
00616   return p;
00617 }
00618 
00619 /* Like find_pos but assumes that each field is surrounded by braces
00620    and that braces within fields are balanced. */
00621 
00622 char *
00623 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
00624 {
00625   int i;
00626   int bracelevel;
00627   char *p = str;
00628   char c;
00629 
00630   for (i = 0; i < words; i++)
00631     {
00632       bracelevel = 1;
00633       while ((c = *p++) != '{' && c != '\n' && c)
00634         /* Do nothing. */ ;
00635       if (c != '{')
00636         return p - 1;
00637       while (bracelevel)
00638         {
00639           c = *p++;
00640           if (c == '{')
00641             bracelevel++;
00642           if (c == '}')
00643             bracelevel--;
00644           if (c == 0 || c == '\n')
00645             return p - 1;
00646         }
00647     }
00648 
00649   while ((c = *p++) != '{' && c != '\n' && c)
00650     /* Do nothing. */ ;
00651 
00652   if (c != '{')
00653     return p - 1;
00654 
00655   if (ignore_blanks)
00656     while ((c = *p) == ' ' || c == '\t')
00657       p++;
00658 
00659   for (i = 0; i < chars; i++)
00660     {
00661       if (!*p || *p == '\n')
00662         break;
00663       p++;
00664     }
00665   return p;
00666 }
00667 
00668 /* Find the end of the balanced-brace field which starts at STR.
00669    The position returned is just before the closing brace. */
00670 
00671 char *
00672 find_braced_end (char *str)
00673 {
00674   int bracelevel;
00675   char *p = str;
00676   char c;
00677 
00678   bracelevel = 1;
00679   while (bracelevel)
00680     {
00681       c = *p++;
00682       if (c == '{')
00683         bracelevel++;
00684       if (c == '}')
00685         bracelevel--;
00686       if (c == 0 || c == '\n')
00687         return p - 1;
00688     }
00689   return p - 1;
00690 }
00691 
00692 long
00693 find_value (char *start, long int length)
00694 {
00695   while (length != 0L)
00696     {
00697       if (isdigit (*start))
00698         return atol (start);
00699       length--;
00700       start++;
00701     }
00702   return 0l;
00703 }
00704 
00705 /* Vector used to translate characters for comparison.
00706    This is how we make all alphanumerics follow all else,
00707    and ignore case in the first sorting.  */
00708 int char_order[256];
00709 
00710 void
00711 init_char_order (void)
00712 {
00713   int i;
00714   for (i = 1; i < 256; i++)
00715     char_order[i] = i;
00716 
00717   for (i = '0'; i <= '9'; i++)
00718     char_order[i] += 512;
00719 
00720   for (i = 'a'; i <= 'z'; i++)
00721     {
00722       char_order[i] = 512 + i;
00723       char_order[i + 'A' - 'a'] = 512 + i;
00724     }
00725 }
00726 
00727 /* Compare two fields (each specified as a start pointer and a character count)
00728    according to KEYFIELD.
00729    The sign of the value reports the relation between the fields. */
00730 
00731 int
00732 compare_field (struct keyfield *keyfield, char *start1, long int length1,
00733                long int pos1, char *start2, long int length2, long int pos2)
00734 {
00735   if (keyfields->positional)
00736     {
00737       if (pos1 > pos2)
00738         return 1;
00739       else
00740         return -1;
00741     }
00742   if (keyfield->numeric)
00743     {
00744       long value = find_value (start1, length1) - find_value (start2, length2);
00745       if (value > 0)
00746         return 1;
00747       if (value < 0)
00748         return -1;
00749       return 0;
00750     }
00751   else
00752     {
00753       char *p1 = start1;
00754       char *p2 = start2;
00755       char *e1 = start1 + length1;
00756       char *e2 = start2 + length2;
00757 
00758       while (1)
00759         {
00760           int c1, c2;
00761 
00762           if (p1 == e1)
00763             c1 = 0;
00764           else
00765             c1 = *p1++;
00766           if (p2 == e2)
00767             c2 = 0;
00768           else
00769             c2 = *p2++;
00770 
00771           if (char_order[c1] != char_order[c2])
00772             return char_order[c1] - char_order[c2];
00773           if (!c1)
00774             break;
00775         }
00776 
00777       /* Strings are equal except possibly for case.  */
00778       p1 = start1;
00779       p2 = start2;
00780       while (1)
00781         {
00782           int c1, c2;
00783 
00784           if (p1 == e1)
00785             c1 = 0;
00786           else
00787             c1 = *p1++;
00788           if (p2 == e2)
00789             c2 = 0;
00790           else
00791             c2 = *p2++;
00792 
00793           if (c1 != c2)
00794             /* Reverse sign here so upper case comes out last.  */
00795             return c2 - c1;
00796           if (!c1)
00797             break;
00798         }
00799 
00800       return 0;
00801     }
00802 }
00803 
00804 /* A `struct linebuffer' is a structure which holds a line of text.
00805    `readline' reads a line from a stream into a linebuffer
00806    and works regardless of the length of the line.  */
00807 
00808 struct linebuffer
00809 {
00810   long size;
00811   char *buffer;
00812 };
00813 
00814 /* Initialize LINEBUFFER for use. */
00815 
00816 void
00817 initbuffer (struct linebuffer *linebuffer)
00818 {
00819   linebuffer->size = 200;
00820   linebuffer->buffer = (char *) xmalloc (200);
00821 }
00822 
00823 /* Read a line of text from STREAM into LINEBUFFER.
00824    Return the length of the line.  */
00825 
00826 long
00827 readline (struct linebuffer *linebuffer, FILE *stream)
00828 {
00829   char *buffer = linebuffer->buffer;
00830   char *p = linebuffer->buffer;
00831   char *end = p + linebuffer->size;
00832 
00833   while (1)
00834     {
00835       int c = getc (stream);
00836       if (p == end)
00837         {
00838           buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
00839           p += buffer - linebuffer->buffer;
00840           end += buffer - linebuffer->buffer;
00841           linebuffer->buffer = buffer;
00842         }
00843       if (c < 0 || c == '\n')
00844         {
00845           *p = 0;
00846           break;
00847         }
00848       *p++ = c;
00849     }
00850 
00851   return p - buffer;
00852 }
00853 
00854 /* Sort an input file too big to sort in core.  */
00855 
00856 void
00857 sort_offline (char *infile, off_t total, char *outfile)
00858 {
00859   /* More than enough. */
00860   int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
00861   char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
00862   FILE *istream = fopen (infile, "r");
00863   int i;
00864   struct linebuffer lb;
00865   long linelength;
00866   int failure = 0;
00867 
00868   initbuffer (&lb);
00869 
00870   /* Read in one line of input data.  */
00871 
00872   linelength = readline (&lb, istream);
00873 
00874   if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
00875     {
00876       error (_("%s: not a texinfo index file"), infile);
00877       return;
00878     }
00879 
00880   /* Split up the input into `ntemps' temporary files, or maybe fewer,
00881      and put the new files' names into `tempfiles' */
00882 
00883   for (i = 0; i < ntemps; i++)
00884     {
00885       char *outname = maketempname (++tempcount);
00886       FILE *ostream = fopen (outname, "w");
00887       long tempsize = 0;
00888 
00889       if (!ostream)
00890         pfatal_with_name (outname);
00891       tempfiles[i] = outname;
00892 
00893       /* Copy lines into this temp file as long as it does not make file
00894          "too big" or until there are no more lines.  */
00895 
00896       while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
00897         {
00898           tempsize += linelength + 1;
00899           fputs (lb.buffer, ostream);
00900           putc ('\n', ostream);
00901 
00902           /* Read another line of input data.  */
00903 
00904           linelength = readline (&lb, istream);
00905           if (!linelength && feof (istream))
00906             break;
00907 
00908           if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
00909             {
00910               error (_("%s: not a texinfo index file"), infile);
00911               failure = 1;
00912               goto fail;
00913             }
00914         }
00915       fclose (ostream);
00916       if (feof (istream))
00917         break;
00918     }
00919 
00920   free (lb.buffer);
00921 
00922 fail:
00923   /* Record number of temp files we actually needed.  */
00924 
00925   ntemps = i;
00926 
00927   /* Sort each tempfile into another tempfile.
00928     Delete the first set of tempfiles and put the names of the second
00929     into `tempfiles'. */
00930 
00931   for (i = 0; i < ntemps; i++)
00932     {
00933       char *newtemp = maketempname (++tempcount);
00934       sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
00935       if (!keep_tempfiles)
00936         unlink (tempfiles[i]);
00937       tempfiles[i] = newtemp;
00938     }
00939 
00940   if (failure)
00941     return;
00942 
00943   /* Merge the tempfiles together and indexify. */
00944 
00945   merge_files (tempfiles, ntemps, outfile);
00946 }
00947 
00948 /* Sort INFILE, whose size is TOTAL,
00949    assuming that is small enough to be done in-core,
00950    then indexify it and send the output to OUTFILE (or to stdout).  */
00951 
00952 void
00953 sort_in_core (char *infile, int total, char *outfile)
00954 {
00955   char **nextline;
00956   char *data = (char *) xmalloc (total + 1);
00957   char *file_data;
00958   long file_size;
00959   int i;
00960   FILE *ostream = stdout;
00961   struct lineinfo *lineinfo;
00962 
00963   /* Read the contents of the file into the moby array `data'. */
00964 
00965   int desc = open (infile, O_RDONLY, 0);
00966 
00967   if (desc < 0)
00968     fatal (_("failure reopening %s"), infile);
00969   for (file_size = 0;;)
00970     {
00971       i = read (desc, data + file_size, total - file_size);
00972       if (i <= 0)
00973         break;
00974       file_size += i;
00975     }
00976   file_data = data;
00977   data[file_size] = 0;
00978 
00979   close (desc);
00980 
00981   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
00982     {
00983       error (_("%s: not a texinfo index file"), infile);
00984       return;
00985     }
00986 
00987   init_char_order ();
00988 
00989   /* Sort routines want to know this address. */
00990 
00991   text_base = data;
00992 
00993   /* Create the array of pointers to lines, with a default size
00994      frequently enough.  */
00995 
00996   nlines = total / 50;
00997   if (!nlines)
00998     nlines = 2;
00999   linearray = (char **) xmalloc (nlines * sizeof (char *));
01000 
01001   /* `nextline' points to the next free slot in this array.
01002      `nlines' is the allocated size.  */
01003 
01004   nextline = linearray;
01005 
01006   /* Parse the input file's data, and make entries for the lines.  */
01007 
01008   nextline = parsefile (infile, nextline, file_data, file_size);
01009   if (nextline == 0)
01010     {
01011       error (_("%s: not a texinfo index file"), infile);
01012       return;
01013     }
01014 
01015   /* Sort the lines. */
01016 
01017   /* If we have enough space, find the first keyfield of each line in advance.
01018      Make a `struct lineinfo' for each line, which records the keyfield
01019      as well as the line, and sort them.  */
01020 
01021   lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
01022 
01023   if (lineinfo)
01024     {
01025       struct lineinfo *lp;
01026       char **p;
01027 
01028       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
01029         {
01030           lp->text = *p;
01031           lp->key.text = find_field (keyfields, *p, &lp->keylen);
01032           if (keyfields->numeric)
01033             lp->key.number = find_value (lp->key.text, lp->keylen);
01034         }
01035 
01036       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
01037              compare_prepared);
01038 
01039       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
01040         *p = lp->text;
01041 
01042       free (lineinfo);
01043     }
01044   else
01045     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
01046 
01047   /* Open the output file. */
01048 
01049   if (outfile)
01050     {
01051       ostream = fopen (outfile, "w");
01052       if (!ostream)
01053         pfatal_with_name (outfile);
01054     }
01055 
01056   writelines (linearray, nextline - linearray, ostream);
01057   if (outfile)
01058     fclose (ostream);
01059 
01060   free (linearray);
01061   free (data);
01062 }
01063 
01064 /* Parse an input string in core into lines.
01065    DATA is the input string, and SIZE is its length.
01066    Data goes in LINEARRAY starting at NEXTLINE.
01067    The value returned is the first entry in LINEARRAY still unused.
01068    Value 0 means input file contents are invalid.  */
01069 
01070 char **
01071 parsefile (char *filename, char **nextline, char *data, long int size)
01072 {
01073   char *p, *end;
01074   char **line = nextline;
01075 
01076   p = data;
01077   end = p + size;
01078   *end = 0;
01079 
01080   while (p != end)
01081     {
01082       if (p[0] != '\\' && p[0] != '@')
01083         return 0;
01084 
01085       *line = p;
01086 
01087       /* Find the first letter of the first field of this line.  If it
01088          is different from the first letter of the first field of the
01089          first line, we need initial headers in the output index.  */
01090       while (*p && *p != '{')
01091         p++;
01092       if (p == end)
01093         return 0;
01094       p++;
01095       if (first_initial)
01096         {
01097           if (first_initial != toupper (*p))
01098             need_initials = 1;
01099         }
01100       else
01101         first_initial = toupper (*p);
01102 
01103       while (*p && *p != '\n')
01104         p++;
01105       if (p != end)
01106         p++;
01107 
01108       line++;
01109       if (line == linearray + nlines)
01110         {
01111           char **old = linearray;
01112           linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
01113           line += linearray - old;
01114         }
01115     }
01116 
01117   return line;
01118 }
01119 
01120 /* Indexification is a filter applied to the sorted lines
01121    as they are being written to the output file.
01122    Multiple entries for the same name, with different page numbers,
01123    get combined into a single entry with multiple page numbers.
01124    The first braced field, which is used for sorting, is discarded.
01125    However, its first character is examined, folded to lower case,
01126    and if it is different from that in the previous line fed to us
01127    a \initial line is written with one argument, the new initial.
01128 
01129    If an entry has four braced fields, then the second and third
01130    constitute primary and secondary names.
01131    In this case, each change of primary name
01132    generates a \primary line which contains only the primary name,
01133    and in between these are \secondary lines which contain
01134    just a secondary name and page numbers. */
01135 
01136 /* The last primary name we wrote a \primary entry for.
01137    If only one level of indexing is being done, this is the last name seen. */
01138 char *lastprimary;
01139 /* Length of storage allocated for lastprimary. */
01140 int lastprimarylength;
01141 
01142 /* Similar, for the secondary name. */
01143 char *lastsecondary;
01144 int lastsecondarylength;
01145 
01146 /* Zero if we are not in the middle of writing an entry.
01147    One if we have written the beginning of an entry but have not
01148    yet written any page numbers into it.
01149    Greater than one if we have written the beginning of an entry
01150    plus at least one page number. */
01151 int pending;
01152 
01153 /* The initial (for sorting purposes) of the last primary entry written.
01154    When this changes, a \initial {c} line is written */
01155 
01156 char *lastinitial;
01157 
01158 int lastinitiallength;
01159 
01160 /* When we need a string of length 1 for the value of lastinitial,
01161    store it here.  */
01162 
01163 char lastinitial1[2];
01164 
01165 /* Initialize static storage for writing an index. */
01166 
01167 void
01168 init_index (void)
01169 {
01170   pending = 0;
01171   lastinitial = lastinitial1;
01172   lastinitial1[0] = 0;
01173   lastinitial1[1] = 0;
01174   lastinitiallength = 0;
01175   lastprimarylength = 100;
01176   lastprimary = (char *) xmalloc (lastprimarylength + 1);
01177   memset (lastprimary, '\0', lastprimarylength + 1);
01178   lastsecondarylength = 100;
01179   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
01180   memset (lastsecondary, '\0', lastsecondarylength + 1);
01181 }
01182 
01183 /* Indexify.  Merge entries for the same name,
01184    insert headers for each initial character, etc.  */
01185 
01186 void
01187 indexify (char *line, FILE *ostream)
01188 {
01189   char *primary, *secondary, *pagenumber;
01190   int primarylength, secondarylength = 0, pagelength;
01191   int nosecondary;
01192   int initiallength;
01193   char *initial;
01194   char initial1[2];
01195   register char *p;
01196 
01197   /* First, analyze the parts of the entry fed to us this time. */
01198 
01199   p = find_braced_pos (line, 0, 0, 0);
01200   if (*p == '{')
01201     {
01202       initial = p;
01203       /* Get length of inner pair of braces starting at `p',
01204          including that inner pair of braces.  */
01205       initiallength = find_braced_end (p + 1) + 1 - p;
01206     }
01207   else
01208     {
01209       initial = initial1;
01210       initial1[0] = toupper (*p);
01211       initial1[1] = 0;
01212       initiallength = 1;
01213     }
01214 
01215   pagenumber = find_braced_pos (line, 1, 0, 0);
01216   pagelength = find_braced_end (pagenumber) - pagenumber;
01217   if (pagelength == 0)
01218     fatal (_("No page number in %s"), line);
01219 
01220   primary = find_braced_pos (line, 2, 0, 0);
01221   primarylength = find_braced_end (primary) - primary;
01222 
01223   secondary = find_braced_pos (line, 3, 0, 0);
01224   nosecondary = !*secondary;
01225   if (!nosecondary)
01226     secondarylength = find_braced_end (secondary) - secondary;
01227 
01228   /* If the primary is different from before, make a new primary entry. */
01229   if (strncmp (primary, lastprimary, primarylength))
01230     {
01231       /* Close off current secondary entry first, if one is open. */
01232       if (pending)
01233         {
01234           fputs ("}\n", ostream);
01235           pending = 0;
01236         }
01237 
01238       /* If this primary has a different initial, include an entry for
01239          the initial. */
01240       if (need_initials &&
01241           (initiallength != lastinitiallength ||
01242            strncmp (initial, lastinitial, initiallength)))
01243         {
01244           fprintf (ostream, "\\initial {");
01245           fwrite (initial, 1, initiallength, ostream);
01246           fputs ("}\n", ostream);
01247           if (initial == initial1)
01248             {
01249               lastinitial = lastinitial1;
01250               *lastinitial1 = *initial1;
01251             }
01252           else
01253             {
01254               lastinitial = initial;
01255             }
01256           lastinitiallength = initiallength;
01257         }
01258 
01259       /* Make the entry for the primary.  */
01260       if (nosecondary)
01261         fputs ("\\entry {", ostream);
01262       else
01263         fputs ("\\primary {", ostream);
01264       fwrite (primary, primarylength, 1, ostream);
01265       if (nosecondary)
01266         {
01267           fputs ("}{", ostream);
01268           pending = 1;
01269         }
01270       else
01271         fputs ("}\n", ostream);
01272 
01273       /* Record name of most recent primary. */
01274       if (lastprimarylength < primarylength)
01275         {
01276           lastprimarylength = primarylength + 100;
01277           lastprimary = (char *) xrealloc (lastprimary,
01278                                            1 + lastprimarylength);
01279         }
01280       strncpy (lastprimary, primary, primarylength);
01281       lastprimary[primarylength] = 0;
01282 
01283       /* There is no current secondary within this primary, now. */
01284       lastsecondary[0] = 0;
01285     }
01286 
01287   /* Should not have an entry with no subtopic following one with a
01288      subtopic. */
01289 
01290   if (nosecondary && *lastsecondary)
01291     error (_("entry %s follows an entry with a secondary name"), line);
01292 
01293   /* Start a new secondary entry if necessary. */
01294   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
01295     {
01296       if (pending)
01297         {
01298           fputs ("}\n", ostream);
01299           pending = 0;
01300         }
01301 
01302       /* Write the entry for the secondary.  */
01303       fputs ("\\secondary {", ostream);
01304       fwrite (secondary, secondarylength, 1, ostream);
01305       fputs ("}{", ostream);
01306       pending = 1;
01307 
01308       /* Record name of most recent secondary. */
01309       if (lastsecondarylength < secondarylength)
01310         {
01311           lastsecondarylength = secondarylength + 100;
01312           lastsecondary = (char *) xrealloc (lastsecondary,
01313                                              1 + lastsecondarylength);
01314         }
01315       strncpy (lastsecondary, secondary, secondarylength);
01316       lastsecondary[secondarylength] = 0;
01317     }
01318 
01319   /* Here to add one more page number to the current entry. */
01320   if (pending++ != 1)
01321     fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
01322   fwrite (pagenumber, pagelength, 1, ostream);
01323 }
01324 
01325 /* Close out any unfinished output entry. */
01326 
01327 void
01328 finish_index (FILE *ostream)
01329 {
01330   if (pending)
01331     fputs ("}\n", ostream);
01332   free (lastprimary);
01333   free (lastsecondary);
01334 }
01335 
01336 /* Copy the lines in the sorted order.
01337    Each line is copied out of the input file it was found in. */
01338 
01339 void
01340 writelines (char **linearray, int nlines, FILE *ostream)
01341 {
01342   char **stop_line = linearray + nlines;
01343   char **next_line;
01344 
01345   init_index ();
01346 
01347   /* Output the text of the lines, and free the buffer space. */
01348 
01349   for (next_line = linearray; next_line != stop_line; next_line++)
01350     {
01351       /* If -u was specified, output the line only if distinct from
01352          previous one.  */
01353       if (next_line == linearray
01354       /* Compare previous line with this one, using only the
01355          explicitly specd keyfields. */
01356           || compare_general (*(next_line - 1), *next_line, 0L, 0L,
01357                               num_keyfields - 1))
01358         {
01359           char *p = *next_line;
01360           char c;
01361 
01362           while ((c = *p++) && c != '\n')
01363             /* Do nothing. */ ;
01364           *(p - 1) = 0;
01365           indexify (*next_line, ostream);
01366         }
01367     }
01368 
01369   finish_index (ostream);
01370 }
01371 
01372 /* Assume (and optionally verify) that each input file is sorted;
01373    merge them and output the result.
01374    Returns nonzero if any input file fails to be sorted.
01375 
01376    This is the high-level interface that can handle an unlimited
01377    number of files.  */
01378 
01379 #define MAX_DIRECT_MERGE 10
01380 
01381 int
01382 merge_files (char **infiles, int nfiles, char *outfile)
01383 {
01384   char **tempfiles;
01385   int ntemps;
01386   int i;
01387   int value = 0;
01388   int start_tempcount = tempcount;
01389 
01390   if (nfiles <= MAX_DIRECT_MERGE)
01391     return merge_direct (infiles, nfiles, outfile);
01392 
01393   /* Merge groups of MAX_DIRECT_MERGE input files at a time,
01394      making a temporary file to hold each group's result.  */
01395 
01396   ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
01397   tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
01398   for (i = 0; i < ntemps; i++)
01399     {
01400       int nf = MAX_DIRECT_MERGE;
01401       if (i + 1 == ntemps)
01402         nf = nfiles - i * MAX_DIRECT_MERGE;
01403       tempfiles[i] = maketempname (++tempcount);
01404       value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
01405     }
01406 
01407   /* All temporary files that existed before are no longer needed
01408      since their contents have been merged into our new tempfiles.
01409      So delete them.  */
01410   flush_tempfiles (start_tempcount);
01411 
01412   /* Now merge the temporary files we created.  */
01413 
01414   merge_files (tempfiles, ntemps, outfile);
01415 
01416   free (tempfiles);
01417 
01418   return value;
01419 }
01420 
01421 /* Assume (and optionally verify) that each input file is sorted;
01422    merge them and output the result.
01423    Returns nonzero if any input file fails to be sorted.
01424 
01425    This version of merging will not work if the number of
01426    input files gets too high.  Higher level functions
01427    use it only with a bounded number of input files.  */
01428 
01429 int
01430 merge_direct (char **infiles, int nfiles, char *outfile)
01431 {
01432   struct linebuffer *lb1, *lb2;
01433   struct linebuffer **thisline, **prevline;
01434   FILE **streams;
01435   int i;
01436   int nleft;
01437   int lossage = 0;
01438   int *file_lossage;
01439   struct linebuffer *prev_out = 0;
01440   FILE *ostream = stdout;
01441 
01442   if (outfile)
01443     {
01444       ostream = fopen (outfile, "w");
01445     }
01446   if (!ostream)
01447     pfatal_with_name (outfile);
01448 
01449   init_index ();
01450 
01451   if (nfiles == 0)
01452     {
01453       if (outfile)
01454         fclose (ostream);
01455       return 0;
01456     }
01457 
01458   /* For each file, make two line buffers.  Also, for each file, there
01459      is an element of `thisline' which points at any time to one of the
01460      file's two buffers, and an element of `prevline' which points to
01461      the other buffer.  `thisline' is supposed to point to the next
01462      available line from the file, while `prevline' holds the last file
01463      line used, which is remembered so that we can verify that the file
01464      is properly sorted. */
01465 
01466   /* lb1 and lb2 contain one buffer each per file. */
01467   lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
01468   lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
01469 
01470   /* thisline[i] points to the linebuffer holding the next available
01471      line in file i, or is zero if there are no lines left in that file.  */
01472   thisline = (struct linebuffer **)
01473     xmalloc (nfiles * sizeof (struct linebuffer *));
01474   /* prevline[i] points to the linebuffer holding the last used line
01475      from file i.  This is just for verifying that file i is properly
01476      sorted.  */
01477   prevline = (struct linebuffer **)
01478     xmalloc (nfiles * sizeof (struct linebuffer *));
01479   /* streams[i] holds the input stream for file i.  */
01480   streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
01481   /* file_lossage[i] is nonzero if we already know file i is not
01482      properly sorted.  */
01483   file_lossage = (int *) xmalloc (nfiles * sizeof (int));
01484 
01485   /* Allocate and initialize all that storage. */
01486 
01487   for (i = 0; i < nfiles; i++)
01488     {
01489       initbuffer (&lb1[i]);
01490       initbuffer (&lb2[i]);
01491       thisline[i] = &lb1[i];
01492       prevline[i] = &lb2[i];
01493       file_lossage[i] = 0;
01494       streams[i] = fopen (infiles[i], "r");
01495       if (!streams[i])
01496         pfatal_with_name (infiles[i]);
01497 
01498       readline (thisline[i], streams[i]);
01499     }
01500 
01501   /* Keep count of number of files not at eof. */
01502   nleft = nfiles;
01503 
01504   while (nleft)
01505     {
01506       struct linebuffer *best = 0;
01507       struct linebuffer *exch;
01508       int bestfile = -1;
01509       int i;
01510 
01511       /* Look at the next avail line of each file; choose the least one.  */
01512 
01513       for (i = 0; i < nfiles; i++)
01514         {
01515           if (thisline[i] &&
01516               (!best ||
01517                0 < compare_general (best->buffer, thisline[i]->buffer,
01518                                  (long) bestfile, (long) i, num_keyfields)))
01519             {
01520               best = thisline[i];
01521               bestfile = i;
01522             }
01523         }
01524 
01525       /* Output that line, unless it matches the previous one and we
01526          don't want duplicates. */
01527 
01528       if (!(prev_out &&
01529             !compare_general (prev_out->buffer,
01530                               best->buffer, 0L, 1L, num_keyfields - 1)))
01531         indexify (best->buffer, ostream);
01532       prev_out = best;
01533 
01534       /* Now make the line the previous of its file, and fetch a new
01535          line from that file.  */
01536 
01537       exch = prevline[bestfile];
01538       prevline[bestfile] = thisline[bestfile];
01539       thisline[bestfile] = exch;
01540 
01541       while (1)
01542         {
01543           /* If the file has no more, mark it empty. */
01544 
01545           if (feof (streams[bestfile]))
01546             {
01547               thisline[bestfile] = 0;
01548               /* Update the number of files still not empty. */
01549               nleft--;
01550               break;
01551             }
01552           readline (thisline[bestfile], streams[bestfile]);
01553           if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
01554             break;
01555         }
01556     }
01557 
01558   finish_index (ostream);
01559 
01560   /* Free all storage and close all input streams. */
01561 
01562   for (i = 0; i < nfiles; i++)
01563     {
01564       fclose (streams[i]);
01565       free (lb1[i].buffer);
01566       free (lb2[i].buffer);
01567     }
01568   free (file_lossage);
01569   free (lb1);
01570   free (lb2);
01571   free (thisline);
01572   free (prevline);
01573   free (streams);
01574 
01575   if (outfile)
01576     fclose (ostream);
01577 
01578   return lossage;
01579 }
01580 
01581 /* Print error message and exit.  */
01582 
01583 void
01584 fatal (const char *format, const char *arg)
01585 {
01586   error (format, arg);
01587   xexit (1);
01588 }
01589 
01590 /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
01591 void
01592 error (const char *format, const char *arg)
01593 {
01594   printf ("%s: ", program_name);
01595   printf (format, arg);
01596   if (format[strlen (format) -1] != '\n')
01597     printf ("\n");
01598 }
01599 
01600 void
01601 perror_with_name (const char *name)
01602 {
01603   fprintf (stderr, "%s: ", program_name);
01604   perror (name);
01605 }
01606 
01607 void
01608 pfatal_with_name (const char *name)
01609 {
01610   perror_with_name (name);
01611   xexit (1);
01612 }
01613 
01614 
01615 /* Return a newly-allocated string concatenating S1 and S2.  */
01616 
01617 char *
01618 concat (char *s1, char *s2)
01619 {
01620   int len1 = strlen (s1), len2 = strlen (s2);
01621   char *result = (char *) xmalloc (len1 + len2 + 1);
01622 
01623   strcpy (result, s1);
01624   strcpy (result + len1, s2);
01625   *(result + len1 + len2) = 0;
01626 
01627   return result;
01628 }