Back to index

glibc  2.9
fnmatch_loop.c
Go to the documentation of this file.
00001 /* Copyright (C) 1991-1993,1996-2001,2003-2005,2007
00002    Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.  */
00019 
00020 struct STRUCT
00021 {
00022   const CHAR *pattern;
00023   const CHAR *string;
00024   int no_leading_period;
00025 };
00026 
00027 /* Match STRING against the filename pattern PATTERN, returning zero if
00028    it matches, nonzero if not.  */
00029 static int FCT (const CHAR *pattern, const CHAR *string,
00030               const CHAR *string_end, int no_leading_period, int flags,
00031               struct STRUCT *ends)
00032      internal_function;
00033 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
00034               const CHAR *string_end, int no_leading_period, int flags)
00035      internal_function;
00036 static const CHAR *END (const CHAR *patternp) internal_function;
00037 
00038 static int
00039 internal_function
00040 FCT (pattern, string, string_end, no_leading_period, flags, ends)
00041      const CHAR *pattern;
00042      const CHAR *string;
00043      const CHAR *string_end;
00044      int no_leading_period;
00045      int flags;
00046      struct STRUCT *ends;
00047 {
00048   register const CHAR *p = pattern, *n = string;
00049   register UCHAR c;
00050 #ifdef _LIBC
00051 # if WIDE_CHAR_VERSION
00052   const char *collseq = (const char *)
00053     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
00054 # else
00055   const UCHAR *collseq = (const UCHAR *)
00056     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
00057 # endif
00058 #endif
00059 
00060   while ((c = *p++) != L('\0'))
00061     {
00062       int new_no_leading_period = 0;
00063       c = FOLD (c);
00064 
00065       switch (c)
00066        {
00067        case L('?'):
00068          if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
00069            {
00070              int res;
00071 
00072              res = EXT (c, p, n, string_end, no_leading_period,
00073                       flags);
00074              if (res != -1)
00075               return res;
00076            }
00077 
00078          if (n == string_end)
00079            return FNM_NOMATCH;
00080          else if (*n == L('/') && (flags & FNM_FILE_NAME))
00081            return FNM_NOMATCH;
00082          else if (*n == L('.') && no_leading_period)
00083            return FNM_NOMATCH;
00084          break;
00085 
00086        case L('\\'):
00087          if (!(flags & FNM_NOESCAPE))
00088            {
00089              c = *p++;
00090              if (c == L('\0'))
00091               /* Trailing \ loses.  */
00092               return FNM_NOMATCH;
00093              c = FOLD (c);
00094            }
00095          if (n == string_end || FOLD ((UCHAR) *n) != c)
00096            return FNM_NOMATCH;
00097          break;
00098 
00099        case L('*'):
00100          if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
00101            {
00102              int res;
00103 
00104              res = EXT (c, p, n, string_end, no_leading_period,
00105                       flags);
00106              if (res != -1)
00107               return res;
00108            }
00109          else if (ends != NULL)
00110            {
00111              ends->pattern = p - 1;
00112              ends->string = n;
00113              ends->no_leading_period = no_leading_period;
00114              return 0;
00115            }
00116 
00117          if (n != string_end && *n == L('.') && no_leading_period)
00118            return FNM_NOMATCH;
00119 
00120          for (c = *p++; c == L('?') || c == L('*'); c = *p++)
00121            {
00122              if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
00123               {
00124                 const CHAR *endp = END (p);
00125                 if (endp != p)
00126                   {
00127                     /* This is a pattern.  Skip over it.  */
00128                     p = endp;
00129                     continue;
00130                   }
00131               }
00132 
00133              if (c == L('?'))
00134               {
00135                 /* A ? needs to match one character.  */
00136                 if (n == string_end)
00137                   /* There isn't another character; no match.  */
00138                   return FNM_NOMATCH;
00139                 else if (*n == L('/')
00140                         && __builtin_expect (flags & FNM_FILE_NAME, 0))
00141                   /* A slash does not match a wildcard under
00142                      FNM_FILE_NAME.  */
00143                   return FNM_NOMATCH;
00144                 else
00145                   /* One character of the string is consumed in matching
00146                      this ? wildcard, so *??? won't match if there are
00147                      less than three characters.  */
00148                   ++n;
00149               }
00150            }
00151 
00152          if (c == L('\0'))
00153            /* The wildcard(s) is/are the last element of the pattern.
00154               If the name is a file name and contains another slash
00155               this means it cannot match, unless the FNM_LEADING_DIR
00156               flag is set.  */
00157            {
00158              int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
00159 
00160              if (flags & FNM_FILE_NAME)
00161               {
00162                 if (flags & FNM_LEADING_DIR)
00163                   result = 0;
00164                 else
00165                   {
00166                     if (MEMCHR (n, L('/'), string_end - n) == NULL)
00167                      result = 0;
00168                   }
00169               }
00170 
00171              return result;
00172            }
00173          else
00174            {
00175              const CHAR *endp;
00176              struct STRUCT end;
00177 
00178              end.pattern = NULL;
00179              endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
00180                           string_end - n);
00181              if (endp == NULL)
00182               endp = string_end;
00183 
00184              if (c == L('[')
00185                 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
00186                     && (c == L('@') || c == L('+') || c == L('!'))
00187                     && *p == L('(')))
00188               {
00189                 int flags2 = ((flags & FNM_FILE_NAME)
00190                             ? flags : (flags & ~FNM_PERIOD));
00191 
00192                 for (--p; n < endp; ++n, no_leading_period = 0)
00193                   if (FCT (p, n, string_end, no_leading_period, flags2,
00194                           &end) == 0)
00195                     goto found;
00196               }
00197              else if (c == L('/') && (flags & FNM_FILE_NAME))
00198               {
00199                 while (n < string_end && *n != L('/'))
00200                   ++n;
00201                 if (n < string_end && *n == L('/')
00202                     && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
00203                             NULL) == 0))
00204                   return 0;
00205               }
00206              else
00207               {
00208                 int flags2 = ((flags & FNM_FILE_NAME)
00209                             ? flags : (flags & ~FNM_PERIOD));
00210 
00211                 if (c == L('\\') && !(flags & FNM_NOESCAPE))
00212                   c = *p;
00213                 c = FOLD (c);
00214                 for (--p; n < endp; ++n, no_leading_period = 0)
00215                   if (FOLD ((UCHAR) *n) == c
00216                      && (FCT (p, n, string_end, no_leading_period, flags2,
00217                              &end) == 0))
00218                     {
00219                     found:
00220                      if (end.pattern == NULL)
00221                        return 0;
00222                      break;
00223                     }
00224                 if (end.pattern != NULL)
00225                   {
00226                     p = end.pattern;
00227                     n = end.string;
00228                     no_leading_period = end.no_leading_period;
00229                     continue;
00230                   }
00231               }
00232            }
00233 
00234          /* If we come here no match is possible with the wildcard.  */
00235          return FNM_NOMATCH;
00236 
00237        case L('['):
00238          {
00239            /* Nonzero if the sense of the character class is inverted.  */
00240            register int not;
00241            CHAR cold;
00242            UCHAR fn;
00243 
00244            if (posixly_correct == 0)
00245              posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
00246 
00247            if (n == string_end)
00248              return FNM_NOMATCH;
00249 
00250            if (*n == L('.') && no_leading_period)
00251              return FNM_NOMATCH;
00252 
00253            if (*n == L('/') && (flags & FNM_FILE_NAME))
00254              /* `/' cannot be matched.  */
00255              return FNM_NOMATCH;
00256 
00257            not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
00258            if (not)
00259              ++p;
00260 
00261            fn = FOLD ((UCHAR) *n);
00262 
00263            c = *p++;
00264            for (;;)
00265              {
00266               if (!(flags & FNM_NOESCAPE) && c == L('\\'))
00267                 {
00268                   if (*p == L('\0'))
00269                     return FNM_NOMATCH;
00270                   c = FOLD ((UCHAR) *p);
00271                   ++p;
00272 
00273                   goto normal_bracket;
00274                 }
00275               else if (c == L('[') && *p == L(':'))
00276                 {
00277                   /* Leave room for the null.  */
00278                   CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
00279                   size_t c1 = 0;
00280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
00281                   wctype_t wt;
00282 #endif
00283                   const CHAR *startp = p;
00284 
00285                   for (;;)
00286                     {
00287                      if (c1 == CHAR_CLASS_MAX_LENGTH)
00288                        /* The name is too long and therefore the pattern
00289                           is ill-formed.  */
00290                        return FNM_NOMATCH;
00291 
00292                      c = *++p;
00293                      if (c == L(':') && p[1] == L(']'))
00294                        {
00295                          p += 2;
00296                          break;
00297                        }
00298                      if (c < L('a') || c >= L('z'))
00299                        {
00300                          /* This cannot possibly be a character class name.
00301                             Match it as a normal range.  */
00302                          p = startp;
00303                          c = L('[');
00304                          goto normal_bracket;
00305                        }
00306                      str[c1++] = c;
00307                     }
00308                   str[c1] = L('\0');
00309 
00310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
00311                   wt = IS_CHAR_CLASS (str);
00312                   if (wt == 0)
00313                     /* Invalid character class name.  */
00314                     return FNM_NOMATCH;
00315 
00316 # if defined _LIBC && ! WIDE_CHAR_VERSION
00317                   /* The following code is glibc specific but does
00318                      there a good job in speeding up the code since
00319                      we can avoid the btowc() call.  */
00320                   if (_ISCTYPE ((UCHAR) *n, wt))
00321                     goto matched;
00322 # else
00323                   if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
00324                     goto matched;
00325 # endif
00326 #else
00327                   if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
00328                      || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
00329                      || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
00330                      || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
00331                      || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
00332                      || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
00333                      || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
00334                      || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
00335                      || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
00336                      || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
00337                      || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
00338                      || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
00339                     goto matched;
00340 #endif
00341                   c = *p++;
00342                 }
00343 #ifdef _LIBC
00344               else if (c == L('[') && *p == L('='))
00345                 {
00346                   UCHAR str[1];
00347                   uint32_t nrules =
00348                     _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
00349                   const CHAR *startp = p;
00350 
00351                   c = *++p;
00352                   if (c == L('\0'))
00353                     {
00354                      p = startp;
00355                      c = L('[');
00356                      goto normal_bracket;
00357                     }
00358                   str[0] = c;
00359 
00360                   c = *++p;
00361                   if (c != L('=') || p[1] != L(']'))
00362                     {
00363                      p = startp;
00364                      c = L('[');
00365                      goto normal_bracket;
00366                     }
00367                   p += 2;
00368 
00369                   if (nrules == 0)
00370                     {
00371                      if ((UCHAR) *n == str[0])
00372                        goto matched;
00373                     }
00374                   else
00375                     {
00376                      const int32_t *table;
00377 # if WIDE_CHAR_VERSION
00378                      const int32_t *weights;
00379                      const int32_t *extra;
00380 # else
00381                      const unsigned char *weights;
00382                      const unsigned char *extra;
00383 # endif
00384                      const int32_t *indirect;
00385                      int32_t idx;
00386                      const UCHAR *cp = (const UCHAR *) str;
00387 
00388                      /* This #include defines a local function!  */
00389 # if WIDE_CHAR_VERSION
00390 #  include <locale/weightwc.h>
00391 # else
00392 #  include <locale/weight.h>
00393 # endif
00394 
00395 # if WIDE_CHAR_VERSION
00396                      table = (const int32_t *)
00397                        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
00398                      weights = (const int32_t *)
00399                        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
00400                      extra = (const int32_t *)
00401                        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
00402                      indirect = (const int32_t *)
00403                        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
00404 # else
00405                      table = (const int32_t *)
00406                        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
00407                      weights = (const unsigned char *)
00408                        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
00409                      extra = (const unsigned char *)
00410                        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
00411                      indirect = (const int32_t *)
00412                        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
00413 # endif
00414 
00415                      idx = findidx (&cp);
00416                      if (idx != 0)
00417                        {
00418                          /* We found a table entry.  Now see whether the
00419                             character we are currently at has the same
00420                             equivalance class value.  */
00421                          int len = weights[idx & 0xffffff];
00422                          int32_t idx2;
00423                          const UCHAR *np = (const UCHAR *) n;
00424 
00425                          idx2 = findidx (&np);
00426                          if (idx2 != 0
00427                             && (idx >> 24) == (idx2 >> 24)
00428                             && len == weights[idx2 & 0xffffff])
00429                            {
00430                             int cnt = 0;
00431 
00432                             idx &= 0xffffff;
00433                             idx2 &= 0xffffff;
00434 
00435                             while (cnt < len
00436                                    && (weights[idx + 1 + cnt]
00437                                       == weights[idx2 + 1 + cnt]))
00438                               ++cnt;
00439 
00440                             if (cnt == len)
00441                               goto matched;
00442                            }
00443                        }
00444                     }
00445 
00446                   c = *p++;
00447                 }
00448 #endif
00449               else if (c == L('\0'))
00450                 /* [ (unterminated) loses.  */
00451                 return FNM_NOMATCH;
00452               else
00453                 {
00454                   int is_range = 0;
00455 
00456 #ifdef _LIBC
00457                   int is_seqval = 0;
00458 
00459                   if (c == L('[') && *p == L('.'))
00460                     {
00461                      uint32_t nrules =
00462                        _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
00463                      const CHAR *startp = p;
00464                      size_t c1 = 0;
00465 
00466                      while (1)
00467                        {
00468                          c = *++p;
00469                          if (c == L('.') && p[1] == L(']'))
00470                            {
00471                             p += 2;
00472                             break;
00473                            }
00474                          if (c == '\0')
00475                            return FNM_NOMATCH;
00476                          ++c1;
00477                        }
00478 
00479                      /* We have to handling the symbols differently in
00480                         ranges since then the collation sequence is
00481                         important.  */
00482                      is_range = *p == L('-') && p[1] != L('\0');
00483 
00484                      if (nrules == 0)
00485                        {
00486                          /* There are no names defined in the collation
00487                             data.  Therefore we only accept the trivial
00488                             names consisting of the character itself.  */
00489                          if (c1 != 1)
00490                            return FNM_NOMATCH;
00491 
00492                          if (!is_range && *n == startp[1])
00493                            goto matched;
00494 
00495                          cold = startp[1];
00496                          c = *p++;
00497                        }
00498                      else
00499                        {
00500                          int32_t table_size;
00501                          const int32_t *symb_table;
00502 # ifdef WIDE_CHAR_VERSION
00503                          char str[c1];
00504                          unsigned int strcnt;
00505 # else
00506 #  define str (startp + 1)
00507 # endif
00508                          const unsigned char *extra;
00509                          int32_t idx;
00510                          int32_t elem;
00511                          int32_t second;
00512                          int32_t hash;
00513 
00514 # ifdef WIDE_CHAR_VERSION
00515                          /* We have to convert the name to a single-byte
00516                             string.  This is possible since the names
00517                             consist of ASCII characters and the internal
00518                             representation is UCS4.  */
00519                          for (strcnt = 0; strcnt < c1; ++strcnt)
00520                            str[strcnt] = startp[1 + strcnt];
00521 #endif
00522 
00523                          table_size =
00524                            _NL_CURRENT_WORD (LC_COLLATE,
00525                                           _NL_COLLATE_SYMB_HASH_SIZEMB);
00526                          symb_table = (const int32_t *)
00527                            _NL_CURRENT (LC_COLLATE,
00528                                       _NL_COLLATE_SYMB_TABLEMB);
00529                          extra = (const unsigned char *)
00530                            _NL_CURRENT (LC_COLLATE,
00531                                       _NL_COLLATE_SYMB_EXTRAMB);
00532 
00533                          /* Locate the character in the hashing table.  */
00534                          hash = elem_hash (str, c1);
00535 
00536                          idx = 0;
00537                          elem = hash % table_size;
00538                          if (symb_table[2 * elem] != 0)
00539                            {
00540                             second = hash % (table_size - 2) + 1;
00541 
00542                             do
00543                               {
00544                                 /* First compare the hashing value.  */
00545                                 if (symb_table[2 * elem] == hash
00546                                    && (c1
00547                                        == extra[symb_table[2 * elem + 1]])
00548                                    && memcmp (str,
00549                                              &extra[symb_table[2 * elem
00550                                                              + 1]
00551                                                    + 1], c1) == 0)
00552                                   {
00553                                    /* Yep, this is the entry.  */
00554                                    idx = symb_table[2 * elem + 1];
00555                                    idx += 1 + extra[idx];
00556                                    break;
00557                                   }
00558 
00559                                 /* Next entry.  */
00560                                 elem += second;
00561                               }
00562                             while (symb_table[2 * elem] != 0);
00563                            }
00564 
00565                          if (symb_table[2 * elem] != 0)
00566                            {
00567                             /* Compare the byte sequence but only if
00568                                this is not part of a range.  */
00569 # ifdef WIDE_CHAR_VERSION
00570                             int32_t *wextra;
00571 
00572                             idx += 1 + extra[idx];
00573                             /* Adjust for the alignment.  */
00574                             idx = (idx + 3) & ~3;
00575 
00576                             wextra = (int32_t *) &extra[idx + 4];
00577 # endif
00578 
00579                             if (! is_range)
00580                               {
00581 # ifdef WIDE_CHAR_VERSION
00582                                 for (c1 = 0;
00583                                     (int32_t) c1 < wextra[idx];
00584                                     ++c1)
00585                                   if (n[c1] != wextra[1 + c1])
00586                                    break;
00587 
00588                                 if ((int32_t) c1 == wextra[idx])
00589                                   goto matched;
00590 # else
00591                                 for (c1 = 0; c1 < extra[idx]; ++c1)
00592                                   if (n[c1] != extra[1 + c1])
00593                                    break;
00594 
00595                                 if (c1 == extra[idx])
00596                                   goto matched;
00597 # endif
00598                               }
00599 
00600                             /* Get the collation sequence value.  */
00601                             is_seqval = 1;
00602 # ifdef WIDE_CHAR_VERSION
00603                             cold = wextra[1 + wextra[idx]];
00604 # else
00605                             /* Adjust for the alignment.  */
00606                             idx += 1 + extra[idx];
00607                             idx = (idx + 3) & ~4;
00608                             cold = *((int32_t *) &extra[idx]);
00609 # endif
00610 
00611                             c = *p++;
00612                            }
00613                          else if (c1 == 1)
00614                            {
00615                             /* No valid character.  Match it as a
00616                                single byte.  */
00617                             if (!is_range && *n == str[0])
00618                               goto matched;
00619 
00620                             cold = str[0];
00621                             c = *p++;
00622                            }
00623                          else
00624                            return FNM_NOMATCH;
00625                        }
00626                     }
00627                   else
00628 # undef str
00629 #endif
00630                     {
00631                      c = FOLD (c);
00632                     normal_bracket:
00633 
00634                      /* We have to handling the symbols differently in
00635                         ranges since then the collation sequence is
00636                         important.  */
00637                      is_range = (*p == L('-') && p[1] != L('\0')
00638                                 && p[1] != L(']'));
00639 
00640                      if (!is_range && c == fn)
00641                        goto matched;
00642 
00643                      /* This is needed if we goto normal_bracket; from
00644                         outside of is_seqval's scope.  */
00645                      is_seqval = 0;
00646                      cold = c;
00647                      c = *p++;
00648                     }
00649 
00650                   if (c == L('-') && *p != L(']'))
00651                     {
00652 #if _LIBC
00653                      /* We have to find the collation sequence
00654                         value for C.  Collation sequence is nothing
00655                         we can regularly access.  The sequence
00656                         value is defined by the order in which the
00657                         definitions of the collation values for the
00658                         various characters appear in the source
00659                         file.  A strange concept, nowhere
00660                         documented.  */
00661                      uint32_t fcollseq;
00662                      uint32_t lcollseq;
00663                      UCHAR cend = *p++;
00664 
00665 # ifdef WIDE_CHAR_VERSION
00666                      /* Search in the `names' array for the characters.  */
00667                      fcollseq = __collseq_table_lookup (collseq, fn);
00668                      if (fcollseq == ~((uint32_t) 0))
00669                        /* XXX We don't know anything about the character
00670                           we are supposed to match.  This means we are
00671                           failing.  */
00672                        goto range_not_matched;
00673 
00674                      if (is_seqval)
00675                        lcollseq = cold;
00676                      else
00677                        lcollseq = __collseq_table_lookup (collseq, cold);
00678 # else
00679                      fcollseq = collseq[fn];
00680                      lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
00681 # endif
00682 
00683                      is_seqval = 0;
00684                      if (cend == L('[') && *p == L('.'))
00685                        {
00686                          uint32_t nrules =
00687                            _NL_CURRENT_WORD (LC_COLLATE,
00688                                           _NL_COLLATE_NRULES);
00689                          const CHAR *startp = p;
00690                          size_t c1 = 0;
00691 
00692                          while (1)
00693                            {
00694                             c = *++p;
00695                             if (c == L('.') && p[1] == L(']'))
00696                               {
00697                                 p += 2;
00698                                 break;
00699                               }
00700                             if (c == '\0')
00701                               return FNM_NOMATCH;
00702                             ++c1;
00703                            }
00704 
00705                          if (nrules == 0)
00706                            {
00707                             /* There are no names defined in the
00708                                collation data.  Therefore we only
00709                                accept the trivial names consisting
00710                                of the character itself.  */
00711                             if (c1 != 1)
00712                               return FNM_NOMATCH;
00713 
00714                             cend = startp[1];
00715                            }
00716                          else
00717                            {
00718                             int32_t table_size;
00719                             const int32_t *symb_table;
00720 # ifdef WIDE_CHAR_VERSION
00721                             char str[c1];
00722                             unsigned int strcnt;
00723 # else
00724 #  define str (startp + 1)
00725 # endif
00726                             const unsigned char *extra;
00727                             int32_t idx;
00728                             int32_t elem;
00729                             int32_t second;
00730                             int32_t hash;
00731 
00732 # ifdef WIDE_CHAR_VERSION
00733                             /* We have to convert the name to a single-byte
00734                                string.  This is possible since the names
00735                                consist of ASCII characters and the internal
00736                                representation is UCS4.  */
00737                             for (strcnt = 0; strcnt < c1; ++strcnt)
00738                               str[strcnt] = startp[1 + strcnt];
00739 # endif
00740 
00741                             table_size =
00742                               _NL_CURRENT_WORD (LC_COLLATE,
00743                                               _NL_COLLATE_SYMB_HASH_SIZEMB);
00744                             symb_table = (const int32_t *)
00745                               _NL_CURRENT (LC_COLLATE,
00746                                           _NL_COLLATE_SYMB_TABLEMB);
00747                             extra = (const unsigned char *)
00748                               _NL_CURRENT (LC_COLLATE,
00749                                           _NL_COLLATE_SYMB_EXTRAMB);
00750 
00751                             /* Locate the character in the hashing
00752                                    table.  */
00753                             hash = elem_hash (str, c1);
00754 
00755                             idx = 0;
00756                             elem = hash % table_size;
00757                             if (symb_table[2 * elem] != 0)
00758                               {
00759                                 second = hash % (table_size - 2) + 1;
00760 
00761                                 do
00762                                   {
00763                                    /* First compare the hashing value.  */
00764                                    if (symb_table[2 * elem] == hash
00765                                        && (c1
00766                                           == extra[symb_table[2 * elem + 1]])
00767                                        && memcmp (str,
00768                                                  &extra[symb_table[2 * elem + 1]
00769                                                        + 1], c1) == 0)
00770                                      {
00771                                        /* Yep, this is the entry.  */
00772                                        idx = symb_table[2 * elem + 1];
00773                                        idx += 1 + extra[idx];
00774                                        break;
00775                                      }
00776 
00777                                    /* Next entry.  */
00778                                    elem += second;
00779                                   }
00780                                 while (symb_table[2 * elem] != 0);
00781                               }
00782 
00783                             if (symb_table[2 * elem] != 0)
00784                               {
00785                                 /* Compare the byte sequence but only if
00786                                    this is not part of a range.  */
00787 # ifdef WIDE_CHAR_VERSION
00788                                 int32_t *wextra;
00789 
00790                                 idx += 1 + extra[idx];
00791                                 /* Adjust for the alignment.  */
00792                                 idx = (idx + 3) & ~4;
00793 
00794                                 wextra = (int32_t *) &extra[idx + 4];
00795 # endif
00796                                 /* Get the collation sequence value.  */
00797                                 is_seqval = 1;
00798 # ifdef WIDE_CHAR_VERSION
00799                                 cend = wextra[1 + wextra[idx]];
00800 # else
00801                                 /* Adjust for the alignment.  */
00802                                 idx += 1 + extra[idx];
00803                                 idx = (idx + 3) & ~4;
00804                                 cend = *((int32_t *) &extra[idx]);
00805 # endif
00806                               }
00807                             else if (symb_table[2 * elem] != 0 && c1 == 1)
00808                               {
00809                                 cend = str[0];
00810                                 c = *p++;
00811                               }
00812                             else
00813                               return FNM_NOMATCH;
00814                            }
00815 # undef str
00816                        }
00817                      else
00818                        {
00819                          if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
00820                            cend = *p++;
00821                          if (cend == L('\0'))
00822                            return FNM_NOMATCH;
00823                          cend = FOLD (cend);
00824                        }
00825 
00826                      /* XXX It is not entirely clear to me how to handle
00827                         characters which are not mentioned in the
00828                         collation specification.  */
00829                      if (
00830 # ifdef WIDE_CHAR_VERSION
00831                          lcollseq == 0xffffffff ||
00832 # endif
00833                          lcollseq <= fcollseq)
00834                        {
00835                          /* We have to look at the upper bound.  */
00836                          uint32_t hcollseq;
00837 
00838                          if (is_seqval)
00839                            hcollseq = cend;
00840                          else
00841                            {
00842 # ifdef WIDE_CHAR_VERSION
00843                             hcollseq =
00844                               __collseq_table_lookup (collseq, cend);
00845                             if (hcollseq == ~((uint32_t) 0))
00846                               {
00847                                 /* Hum, no information about the upper
00848                                    bound.  The matching succeeds if the
00849                                    lower bound is matched exactly.  */
00850                                 if (lcollseq != fcollseq)
00851                                   goto range_not_matched;
00852 
00853                                 goto matched;
00854                               }
00855 # else
00856                             hcollseq = collseq[cend];
00857 # endif
00858                            }
00859 
00860                          if (lcollseq <= hcollseq && fcollseq <= hcollseq)
00861                            goto matched;
00862                        }
00863 # ifdef WIDE_CHAR_VERSION
00864                     range_not_matched:
00865 # endif
00866 #else
00867                      /* We use a boring value comparison of the character
00868                         values.  This is better than comparing using
00869                         `strcoll' since the latter would have surprising
00870                         and sometimes fatal consequences.  */
00871                      UCHAR cend = *p++;
00872 
00873                      if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
00874                        cend = *p++;
00875                      if (cend == L('\0'))
00876                        return FNM_NOMATCH;
00877 
00878                      /* It is a range.  */
00879                      if (cold <= fn && fn <= cend)
00880                        goto matched;
00881 #endif
00882 
00883                      c = *p++;
00884                     }
00885                 }
00886 
00887               if (c == L(']'))
00888                 break;
00889              }
00890 
00891            if (!not)
00892              return FNM_NOMATCH;
00893            break;
00894 
00895          matched:
00896            /* Skip the rest of the [...] that already matched.  */
00897            do
00898              {
00899              ignore_next:
00900               c = *p++;
00901 
00902               if (c == L('\0'))
00903                 /* [... (unterminated) loses.  */
00904                 return FNM_NOMATCH;
00905 
00906               if (!(flags & FNM_NOESCAPE) && c == L('\\'))
00907                 {
00908                   if (*p == L('\0'))
00909                     return FNM_NOMATCH;
00910                   /* XXX 1003.2d11 is unclear if this is right.  */
00911                   ++p;
00912                 }
00913               else if (c == L('[') && *p == L(':'))
00914                 {
00915                   int c1 = 0;
00916                   const CHAR *startp = p;
00917 
00918                   while (1)
00919                     {
00920                      c = *++p;
00921                      if (++c1 == CHAR_CLASS_MAX_LENGTH)
00922                        return FNM_NOMATCH;
00923 
00924                      if (*p == L(':') && p[1] == L(']'))
00925                        break;
00926 
00927                      if (c < L('a') || c >= L('z'))
00928                        {
00929                          p = startp;
00930                          goto ignore_next;
00931                        }
00932                     }
00933                   p += 2;
00934                   c = *p++;
00935                 }
00936               else if (c == L('[') && *p == L('='))
00937                 {
00938                   c = *++p;
00939                   if (c == L('\0'))
00940                     return FNM_NOMATCH;
00941                   c = *++p;
00942                   if (c != L('=') || p[1] != L(']'))
00943                     return FNM_NOMATCH;
00944                   p += 2;
00945                   c = *p++;
00946                 }
00947               else if (c == L('[') && *p == L('.'))
00948                 {
00949                   ++p;
00950                   while (1)
00951                     {
00952                      c = *++p;
00953                      if (c == '\0')
00954                        return FNM_NOMATCH;
00955 
00956                      if (*p == L('.') && p[1] == L(']'))
00957                        break;
00958                     }
00959                   p += 2;
00960                   c = *p++;
00961                 }
00962              }
00963            while (c != L(']'));
00964            if (not)
00965              return FNM_NOMATCH;
00966          }
00967          break;
00968 
00969        case L('+'):
00970        case L('@'):
00971        case L('!'):
00972          if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
00973            {
00974              int res;
00975 
00976              res = EXT (c, p, n, string_end, no_leading_period, flags);
00977              if (res != -1)
00978               return res;
00979            }
00980          goto normal_match;
00981 
00982        case L('/'):
00983          if (NO_LEADING_PERIOD (flags))
00984            {
00985              if (n == string_end || c != (UCHAR) *n)
00986               return FNM_NOMATCH;
00987 
00988              new_no_leading_period = 1;
00989              break;
00990            }
00991          /* FALLTHROUGH */
00992        default:
00993        normal_match:
00994          if (n == string_end || c != FOLD ((UCHAR) *n))
00995            return FNM_NOMATCH;
00996        }
00997 
00998       no_leading_period = new_no_leading_period;
00999       ++n;
01000     }
01001 
01002   if (n == string_end)
01003     return 0;
01004 
01005   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
01006     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
01007     return 0;
01008 
01009   return FNM_NOMATCH;
01010 }
01011 
01012 
01013 static const CHAR *
01014 internal_function
01015 END (const CHAR *pattern)
01016 {
01017   const CHAR *p = pattern;
01018 
01019   while (1)
01020     if (*++p == L('\0'))
01021       /* This is an invalid pattern.  */
01022       return pattern;
01023     else if (*p == L('['))
01024       {
01025        /* Handle brackets special.  */
01026        if (posixly_correct == 0)
01027          posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
01028 
01029        /* Skip the not sign.  We have to recognize it because of a possibly
01030           following ']'.  */
01031        if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
01032          ++p;
01033        /* A leading ']' is recognized as such.  */
01034        if (*p == L(']'))
01035          ++p;
01036        /* Skip over all characters of the list.  */
01037        while (*p != L(']'))
01038          if (*p++ == L('\0'))
01039            /* This is no valid pattern.  */
01040            return pattern;
01041       }
01042     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
01043              || *p == L('!')) && p[1] == L('('))
01044       p = END (p + 1);
01045     else if (*p == L(')'))
01046       break;
01047 
01048   return p + 1;
01049 }
01050 
01051 
01052 static int
01053 internal_function
01054 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
01055      int no_leading_period, int flags)
01056 {
01057   const CHAR *startp;
01058   int level;
01059   struct patternlist
01060   {
01061     struct patternlist *next;
01062     CHAR str[0];
01063   } *list = NULL;
01064   struct patternlist **lastp = &list;
01065   size_t pattern_len = STRLEN (pattern);
01066   const CHAR *p;
01067   const CHAR *rs;
01068 
01069   /* Parse the pattern.  Store the individual parts in the list.  */
01070   level = 0;
01071   for (startp = p = pattern + 1; level >= 0; ++p)
01072     if (*p == L('\0'))
01073       /* This is an invalid pattern.  */
01074       return -1;
01075     else if (*p == L('['))
01076       {
01077        /* Handle brackets special.  */
01078        if (posixly_correct == 0)
01079          posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
01080 
01081        /* Skip the not sign.  We have to recognize it because of a possibly
01082           following ']'.  */
01083        if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
01084          ++p;
01085        /* A leading ']' is recognized as such.  */
01086        if (*p == L(']'))
01087          ++p;
01088        /* Skip over all characters of the list.  */
01089        while (*p != L(']'))
01090          if (*p++ == L('\0'))
01091            /* This is no valid pattern.  */
01092            return -1;
01093       }
01094     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
01095              || *p == L('!')) && p[1] == L('('))
01096       /* Remember the nesting level.  */
01097       ++level;
01098     else if (*p == L(')'))
01099       {
01100        if (level-- == 0)
01101          {
01102            /* This means we found the end of the pattern.  */
01103 #define NEW_PATTERN \
01104            struct patternlist *newp;                                        \
01105                                                                      \
01106            if (opt == L('?') || opt == L('@'))                              \
01107              newp = alloca (sizeof (struct patternlist)              \
01108                           + (pattern_len * sizeof (CHAR)));                 \
01109            else                                                      \
01110              newp = alloca (sizeof (struct patternlist)              \
01111                           + ((p - startp + 1) * sizeof (CHAR)));            \
01112            *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
01113            newp->next = NULL;                                               \
01114            *lastp = newp;                                            \
01115            lastp = &newp->next
01116            NEW_PATTERN;
01117          }
01118       }
01119     else if (*p == L('|'))
01120       {
01121        if (level == 0)
01122          {
01123            NEW_PATTERN;
01124            startp = p + 1;
01125          }
01126       }
01127   assert (list != NULL);
01128   assert (p[-1] == L(')'));
01129 #undef NEW_PATTERN
01130 
01131   switch (opt)
01132     {
01133     case L('*'):
01134       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
01135        return 0;
01136       /* FALLTHROUGH */
01137 
01138     case L('+'):
01139       do
01140        {
01141          for (rs = string; rs <= string_end; ++rs)
01142            /* First match the prefix with the current pattern with the
01143               current pattern.  */
01144            if (FCT (list->str, string, rs, no_leading_period,
01145                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
01146                    NULL) == 0
01147               /* This was successful.  Now match the rest with the rest
01148                  of the pattern.  */
01149               && (FCT (p, rs, string_end,
01150                       rs == string
01151                       ? no_leading_period
01152                       : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
01153                       flags & FNM_FILE_NAME
01154                       ? flags : flags & ~FNM_PERIOD, NULL) == 0
01155                   /* This didn't work.  Try the whole pattern.  */
01156                   || (rs != string
01157                      && FCT (pattern - 1, rs, string_end,
01158                             rs == string
01159                             ? no_leading_period
01160                             : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
01161                                ? 1 : 0),
01162                             flags & FNM_FILE_NAME
01163                             ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
01164              /* It worked.  Signal success.  */
01165              return 0;
01166        }
01167       while ((list = list->next) != NULL);
01168 
01169       /* None of the patterns lead to a match.  */
01170       return FNM_NOMATCH;
01171 
01172     case L('?'):
01173       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
01174        return 0;
01175       /* FALLTHROUGH */
01176 
01177     case L('@'):
01178       do
01179        /* I cannot believe it but `strcat' is actually acceptable
01180           here.  Match the entire string with the prefix from the
01181           pattern list and the rest of the pattern following the
01182           pattern list.  */
01183        if (FCT (STRCAT (list->str, p), string, string_end,
01184                no_leading_period,
01185                flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
01186                NULL) == 0)
01187          /* It worked.  Signal success.  */
01188          return 0;
01189       while ((list = list->next) != NULL);
01190 
01191       /* None of the patterns lead to a match.  */
01192       return FNM_NOMATCH;
01193 
01194     case L('!'):
01195       for (rs = string; rs <= string_end; ++rs)
01196        {
01197          struct patternlist *runp;
01198 
01199          for (runp = list; runp != NULL; runp = runp->next)
01200            if (FCT (runp->str, string, rs,  no_leading_period,
01201                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
01202                    NULL) == 0)
01203              break;
01204 
01205          /* If none of the patterns matched see whether the rest does.  */
01206          if (runp == NULL
01207              && (FCT (p, rs, string_end,
01208                      rs == string
01209                      ? no_leading_period
01210                      : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
01211                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
01212                      NULL) == 0))
01213            /* This is successful.  */
01214            return 0;
01215        }
01216 
01217       /* None of the patterns together with the rest of the pattern
01218         lead to a match.  */
01219       return FNM_NOMATCH;
01220 
01221     default:
01222       assert (! "Invalid extended matching operator");
01223       break;
01224     }
01225 
01226   return -1;
01227 }
01228 
01229 
01230 #undef FOLD
01231 #undef CHAR
01232 #undef UCHAR
01233 #undef INT
01234 #undef FCT
01235 #undef EXT
01236 #undef END
01237 #undef STRUCT
01238 #undef MEMPCPY
01239 #undef MEMCHR
01240 #undef STRCOLL
01241 #undef STRLEN
01242 #undef STRCAT
01243 #undef L
01244 #undef BTOWC