Back to index

glibc  2.9
strchrnul.c
Go to the documentation of this file.
00001 /* Copyright (C) 1991,1993-1997,99,2000,2005 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
00004    with help from Dan Sahlin (dan@sics.se) and
00005    bug fix and commentary by Jim Blandy (jimb@ai.mit.edu);
00006    adaptation to strchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
00007    and implemented by Roland McGrath (roland@ai.mit.edu).
00008 
00009    The GNU C Library is free software; you can redistribute it and/or
00010    modify it under the terms of the GNU Lesser General Public
00011    License as published by the Free Software Foundation; either
00012    version 2.1 of the License, or (at your option) any later version.
00013 
00014    The GNU C Library is distributed in the hope that it will be useful,
00015    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017    Lesser General Public License for more details.
00018 
00019    You should have received a copy of the GNU Lesser General Public
00020    License along with the GNU C Library; if not, write to the Free
00021    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00022    02111-1307 USA.  */
00023 
00024 #include <string.h>
00025 #include <memcopy.h>
00026 #include <stdlib.h>
00027 
00028 #undef __strchrnul
00029 #undef strchrnul
00030 
00031 /* Find the first occurrence of C in S or the final NUL byte.  */
00032 char *
00033 __strchrnul (s, c_in)
00034      const char *s;
00035      int c_in;
00036 {
00037   const unsigned char *char_ptr;
00038   const unsigned long int *longword_ptr;
00039   unsigned long int longword, magic_bits, charmask;
00040   unsigned reg_char c;
00041 
00042   c = (unsigned char) c_in;
00043 
00044   /* Handle the first few characters by reading one character at a time.
00045      Do this until CHAR_PTR is aligned on a longword boundary.  */
00046   for (char_ptr = (const unsigned char *) s;
00047        ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
00048        ++char_ptr)
00049     if (*char_ptr == c || *char_ptr == '\0')
00050       return (void *) char_ptr;
00051 
00052   /* All these elucidatory comments refer to 4-byte longwords,
00053      but the theory applies equally well to 8-byte longwords.  */
00054 
00055   longword_ptr = (unsigned long int *) char_ptr;
00056 
00057   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00058      the "holes."  Note that there is a hole just to the left of
00059      each byte, with an extra at the end:
00060 
00061      bits:  01111110 11111110 11111110 11111111
00062      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00063 
00064      The 1-bits make sure that carries propagate to the next 0-bit.
00065      The 0-bits provide holes for carries to fall into.  */
00066   switch (sizeof (longword))
00067     {
00068     case 4: magic_bits = 0x7efefeffL; break;
00069     case 8: magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL; break;
00070     default:
00071       abort ();
00072     }
00073 
00074   /* Set up a longword, each of whose bytes is C.  */
00075   charmask = c | (c << 8);
00076   charmask |= charmask << 16;
00077   if (sizeof (longword) > 4)
00078     /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
00079     charmask |= (charmask << 16) << 16;
00080   if (sizeof (longword) > 8)
00081     abort ();
00082 
00083   /* Instead of the traditional loop which tests each character,
00084      we will test a longword at a time.  The tricky part is testing
00085      if *any of the four* bytes in the longword in question are zero.  */
00086   for (;;)
00087     {
00088       /* We tentatively exit the loop if adding MAGIC_BITS to
00089         LONGWORD fails to change any of the hole bits of LONGWORD.
00090 
00091         1) Is this safe?  Will it catch all the zero bytes?
00092         Suppose there is a byte with all zeros.  Any carry bits
00093         propagating from its left will fall into the hole at its
00094         least significant bit and stop.  Since there will be no
00095         carry from its most significant bit, the LSB of the
00096         byte to the left will be unchanged, and the zero will be
00097         detected.
00098 
00099         2) Is this worthwhile?  Will it ignore everything except
00100         zero bytes?  Suppose every byte of LONGWORD has a bit set
00101         somewhere.  There will be a carry into bit 8.  If bit 8
00102         is set, this will carry into bit 16.  If bit 8 is clear,
00103         one of bits 9-15 must be set, so there will be a carry
00104         into bit 16.  Similarly, there will be a carry into bit
00105         24.  If one of bits 24-30 is set, there will be a carry
00106         into bit 31, so all of the hole bits will be changed.
00107 
00108         The one misfire occurs when bits 24-30 are clear and bit
00109         31 is set; in this case, the hole at bit 31 is not
00110         changed.  If we had access to the processor carry flag,
00111         we could close this loophole by putting the fourth hole
00112         at bit 32!
00113 
00114         So it ignores everything except 128's, when they're aligned
00115         properly.
00116 
00117         3) But wait!  Aren't we looking for C as well as zero?
00118         Good point.  So what we do is XOR LONGWORD with a longword,
00119         each of whose bytes is C.  This turns each byte that is C
00120         into a zero.  */
00121 
00122       longword = *longword_ptr++;
00123 
00124       /* Add MAGIC_BITS to LONGWORD.  */
00125       if ((((longword + magic_bits)
00126 
00127            /* Set those bits that were unchanged by the addition.  */
00128            ^ ~longword)
00129 
00130           /* Look at only the hole bits.  If any of the hole bits
00131              are unchanged, most likely one of the bytes was a
00132              zero.  */
00133           & ~magic_bits) != 0 ||
00134 
00135          /* That caught zeroes.  Now test for C.  */
00136          ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask))
00137           & ~magic_bits) != 0)
00138        {
00139          /* Which of the bytes was C or zero?
00140             If none of them were, it was a misfire; continue the search.  */
00141 
00142          const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
00143 
00144          if (*cp == c || *cp == '\0')
00145            return (char *) cp;
00146          if (*++cp == c || *cp == '\0')
00147            return (char *) cp;
00148          if (*++cp == c || *cp == '\0')
00149            return (char *) cp;
00150          if (*++cp == c || *cp == '\0')
00151            return (char *) cp;
00152          if (sizeof (longword) > 4)
00153            {
00154              if (*++cp == c || *cp == '\0')
00155               return (char *) cp;
00156              if (*++cp == c || *cp == '\0')
00157               return (char *) cp;
00158              if (*++cp == c || *cp == '\0')
00159               return (char *) cp;
00160              if (*++cp == c || *cp == '\0')
00161               return (char *) cp;
00162            }
00163        }
00164     }
00165 
00166   /* This should never happen.  */
00167   return NULL;
00168 }
00169 
00170 weak_alias (__strchrnul, strchrnul)