Back to index

glibc  2.9
strchr.c
Go to the documentation of this file.
00001 /* Copyright (C) 1991,1993-1997,1999,2000,2003,2006
00002    Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
00005    with help from Dan Sahlin (dan@sics.se) and
00006    bug fix and commentary by Jim Blandy (jimb@ai.mit.edu);
00007    adaptation to strchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
00008    and implemented by Roland McGrath (roland@ai.mit.edu).
00009 
00010    The GNU C Library is free software; you can redistribute it and/or
00011    modify it under the terms of the GNU Lesser General Public
00012    License as published by the Free Software Foundation; either
00013    version 2.1 of the License, or (at your option) any later version.
00014 
00015    The GNU C Library is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018    Lesser General Public License for more details.
00019 
00020    You should have received a copy of the GNU Lesser General Public
00021    License along with the GNU C Library; if not, write to the Free
00022    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00023    02111-1307 USA.  */
00024 
00025 #include <string.h>
00026 #include <memcopy.h>
00027 #include <stdlib.h>
00028 
00029 #undef strchr
00030 
00031 /* Find the first occurrence of C in S.  */
00032 char *
00033 strchr (s, c_in)
00034      const char *s;
00035      int c_in;
00036 {
00037   const unsigned char *char_ptr;
00038   const unsigned long int *longword_ptr;
00039   unsigned long int longword, magic_bits, charmask;
00040   unsigned reg_char c;
00041 
00042   c = (unsigned char) c_in;
00043 
00044   /* Handle the first few characters by reading one character at a time.
00045      Do this until CHAR_PTR is aligned on a longword boundary.  */
00046   for (char_ptr = (const unsigned char *) s;
00047        ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
00048        ++char_ptr)
00049     if (*char_ptr == c)
00050       return (void *) char_ptr;
00051     else if (*char_ptr == '\0')
00052       return NULL;
00053 
00054   /* All these elucidatory comments refer to 4-byte longwords,
00055      but the theory applies equally well to 8-byte longwords.  */
00056 
00057   longword_ptr = (unsigned long int *) char_ptr;
00058 
00059   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00060      the "holes."  Note that there is a hole just to the left of
00061      each byte, with an extra at the end:
00062 
00063      bits:  01111110 11111110 11111110 11111111
00064      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00065 
00066      The 1-bits make sure that carries propagate to the next 0-bit.
00067      The 0-bits provide holes for carries to fall into.  */
00068   switch (sizeof (longword))
00069     {
00070     case 4: magic_bits = 0x7efefeffL; break;
00071     case 8: magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL; break;
00072     default:
00073       abort ();
00074     }
00075 
00076   /* Set up a longword, each of whose bytes is C.  */
00077   charmask = c | (c << 8);
00078   charmask |= charmask << 16;
00079   if (sizeof (longword) > 4)
00080     /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
00081     charmask |= (charmask << 16) << 16;
00082   if (sizeof (longword) > 8)
00083     abort ();
00084 
00085   /* Instead of the traditional loop which tests each character,
00086      we will test a longword at a time.  The tricky part is testing
00087      if *any of the four* bytes in the longword in question are zero.  */
00088   for (;;)
00089     {
00090       /* We tentatively exit the loop if adding MAGIC_BITS to
00091         LONGWORD fails to change any of the hole bits of LONGWORD.
00092 
00093         1) Is this safe?  Will it catch all the zero bytes?
00094         Suppose there is a byte with all zeros.  Any carry bits
00095         propagating from its left will fall into the hole at its
00096         least significant bit and stop.  Since there will be no
00097         carry from its most significant bit, the LSB of the
00098         byte to the left will be unchanged, and the zero will be
00099         detected.
00100 
00101         2) Is this worthwhile?  Will it ignore everything except
00102         zero bytes?  Suppose every byte of LONGWORD has a bit set
00103         somewhere.  There will be a carry into bit 8.  If bit 8
00104         is set, this will carry into bit 16.  If bit 8 is clear,
00105         one of bits 9-15 must be set, so there will be a carry
00106         into bit 16.  Similarly, there will be a carry into bit
00107         24.  If one of bits 24-30 is set, there will be a carry
00108         into bit 31, so all of the hole bits will be changed.
00109 
00110         The one misfire occurs when bits 24-30 are clear and bit
00111         31 is set; in this case, the hole at bit 31 is not
00112         changed.  If we had access to the processor carry flag,
00113         we could close this loophole by putting the fourth hole
00114         at bit 32!
00115 
00116         So it ignores everything except 128's, when they're aligned
00117         properly.
00118 
00119         3) But wait!  Aren't we looking for C as well as zero?
00120         Good point.  So what we do is XOR LONGWORD with a longword,
00121         each of whose bytes is C.  This turns each byte that is C
00122         into a zero.  */
00123 
00124       longword = *longword_ptr++;
00125 
00126       /* Add MAGIC_BITS to LONGWORD.  */
00127       if ((((longword + magic_bits)
00128 
00129            /* Set those bits that were unchanged by the addition.  */
00130            ^ ~longword)
00131 
00132           /* Look at only the hole bits.  If any of the hole bits
00133              are unchanged, most likely one of the bytes was a
00134              zero.  */
00135           & ~magic_bits) != 0 ||
00136 
00137          /* That caught zeroes.  Now test for C.  */
00138          ((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask))
00139           & ~magic_bits) != 0)
00140        {
00141          /* Which of the bytes was C or zero?
00142             If none of them were, it was a misfire; continue the search.  */
00143 
00144          const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
00145 
00146          if (*cp == c)
00147            return (char *) cp;
00148          else if (*cp == '\0')
00149            return NULL;
00150          if (*++cp == c)
00151            return (char *) cp;
00152          else if (*cp == '\0')
00153            return NULL;
00154          if (*++cp == c)
00155            return (char *) cp;
00156          else if (*cp == '\0')
00157            return NULL;
00158          if (*++cp == c)
00159            return (char *) cp;
00160          else if (*cp == '\0')
00161            return NULL;
00162          if (sizeof (longword) > 4)
00163            {
00164              if (*++cp == c)
00165               return (char *) cp;
00166              else if (*cp == '\0')
00167               return NULL;
00168              if (*++cp == c)
00169               return (char *) cp;
00170              else if (*cp == '\0')
00171               return NULL;
00172              if (*++cp == c)
00173               return (char *) cp;
00174              else if (*cp == '\0')
00175               return NULL;
00176              if (*++cp == c)
00177               return (char *) cp;
00178              else if (*cp == '\0')
00179               return NULL;
00180            }
00181        }
00182     }
00183 
00184   return NULL;
00185 }
00186 
00187 #ifdef weak_alias
00188 #undef index
00189 weak_alias (strchr, index)
00190 #endif
00191 libc_hidden_builtin_def (strchr)