Back to index

cell-binutils  2.17cvs20070401
strverscmp.c
Go to the documentation of this file.
00001 /* Compare strings while treating digits characters numerically.
00002    Copyright (C) 1997, 2002, 2005 Free Software Foundation, Inc.
00003    This file is part of the libiberty library.
00004    Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997.
00005 
00006    Libiberty is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    Libiberty is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00019    02110-1301 USA.  */
00020 
00021 #include "libiberty.h"
00022 #include "safe-ctype.h"
00023 
00024 /* 
00025 @deftypefun int strverscmp (const char *@var{s1}, const char *@var{s2})
00026 The @code{strverscmp} function compares the string @var{s1} against
00027 @var{s2}, considering them as holding indices/version numbers.  Return
00028 value follows the same conventions as found in the @code{strverscmp}
00029 function.  In fact, if @var{s1} and @var{s2} contain no digits,
00030 @code{strverscmp} behaves like @code{strcmp}.
00031 
00032 Basically, we compare strings normally (character by character), until
00033 we find a digit in each string - then we enter a special comparison
00034 mode, where each sequence of digits is taken as a whole.  If we reach the
00035 end of these two parts without noticing a difference, we return to the
00036 standard comparison mode.  There are two types of numeric parts:
00037 "integral" and "fractional" (those  begin with a '0'). The types
00038 of the numeric parts affect the way we sort them:
00039 
00040 @itemize @bullet
00041 @item
00042 integral/integral: we compare values as you would expect.
00043 
00044 @item
00045 fractional/integral: the fractional part is less than the integral one.
00046 Again, no surprise.
00047 
00048 @item
00049 fractional/fractional: the things become a bit more complex.
00050 If the common prefix contains only leading zeroes, the longest part is less
00051 than the other one; else the comparison behaves normally.
00052 @end itemize
00053 
00054 @smallexample
00055 strverscmp ("no digit", "no digit")
00056     @result{} 0    // @r{same behavior as strcmp.}
00057 strverscmp ("item#99", "item#100")
00058     @result{} <0   // @r{same prefix, but 99 < 100.}
00059 strverscmp ("alpha1", "alpha001")
00060     @result{} >0   // @r{fractional part inferior to integral one.}
00061 strverscmp ("part1_f012", "part1_f01")
00062     @result{} >0   // @r{two fractional parts.}
00063 strverscmp ("foo.009", "foo.0")
00064     @result{} <0   // @r{idem, but with leading zeroes only.}
00065 @end smallexample
00066 
00067 This function is especially useful when dealing with filename sorting,
00068 because filenames frequently hold indices/version numbers.
00069 @end deftypefun
00070 
00071 */
00072 
00073 /* states: S_N: normal, S_I: comparing integral part, S_F: comparing
00074            fractional parts, S_Z: idem but with leading Zeroes only */
00075 #define  S_N    0x0
00076 #define  S_I    0x4
00077 #define  S_F    0x8
00078 #define  S_Z    0xC
00079 
00080 /* result_type: CMP: return diff; LEN: compare using len_diff/diff */
00081 #define  CMP    2
00082 #define  LEN    3
00083 
00084 
00085 /* Compare S1 and S2 as strings holding indices/version numbers,
00086    returning less than, equal to or greater than zero if S1 is less than,
00087    equal to or greater than S2 (for more info, see the Glibc texinfo doc).  */
00088 
00089 int
00090 strverscmp (const char *s1, const char *s2)
00091 {
00092   const unsigned char *p1 = (const unsigned char *) s1;
00093   const unsigned char *p2 = (const unsigned char *) s2;
00094   unsigned char c1, c2;
00095   int state;
00096   int diff;
00097 
00098   /* Symbol(s)    0       [1-9]   others  (padding)
00099      Transition   (10) 0  (01) d  (00) x  (11) -   */
00100   static const unsigned int next_state[] =
00101     {
00102       /* state    x    d    0    - */
00103       /* S_N */  S_N, S_I, S_Z, S_N,
00104       /* S_I */  S_N, S_I, S_I, S_I,
00105       /* S_F */  S_N, S_F, S_F, S_F,
00106       /* S_Z */  S_N, S_F, S_Z, S_Z
00107     };
00108 
00109   static const int result_type[] =
00110     {
00111       /* state   x/x  x/d  x/0  x/-  d/x  d/d  d/0  d/-
00112                  0/x  0/d  0/0  0/-  -/x  -/d  -/0  -/- */
00113 
00114       /* S_N */  CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
00115                  CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
00116       /* S_I */  CMP, -1,  -1,  CMP, +1,  LEN, LEN, CMP,
00117                  +1,  LEN, LEN, CMP, CMP, CMP, CMP, CMP,
00118       /* S_F */  CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
00119                  CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
00120       /* S_Z */  CMP, +1,  +1,  CMP, -1,  CMP, CMP, CMP,
00121                  -1,  CMP, CMP, CMP
00122     };
00123 
00124   if (p1 == p2)
00125     return 0;
00126 
00127   c1 = *p1++;
00128   c2 = *p2++;
00129   /* Hint: '0' is a digit too.  */
00130   state = S_N | ((c1 == '0') + (ISDIGIT (c1) != 0));
00131 
00132   while ((diff = c1 - c2) == 0 && c1 != '\0')
00133     {
00134       state = next_state[state];
00135       c1 = *p1++;
00136       c2 = *p2++;
00137       state |= (c1 == '0') + (ISDIGIT (c1) != 0);
00138     }
00139 
00140   state = result_type[state << 2 | (((c2 == '0') + (ISDIGIT (c2) != 0)))];
00141 
00142   switch (state)
00143     {
00144     case CMP:
00145       return diff;
00146       
00147     case LEN:
00148       while (ISDIGIT (*p1++))
00149        if (!ISDIGIT (*p2++))
00150          return 1;
00151       
00152       return ISDIGIT (*p2) ? -1 : diff;
00153       
00154     default:
00155       return state;
00156     }
00157 }