Back to index

glibc  2.9
rawmemchr.c
Go to the documentation of this file.
00001 /* Copyright (C) 1991,93,96,97,99,2000,2002 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
00004    with help from Dan Sahlin (dan@sics.se) and
00005    commentary by Jim Blandy (jimb@ai.mit.edu);
00006    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
00007    and implemented by Roland McGrath (roland@ai.mit.edu).
00008 
00009    The GNU C Library is free software; you can redistribute it and/or
00010    modify it under the terms of the GNU Lesser General Public
00011    License as published by the Free Software Foundation; either
00012    version 2.1 of the License, or (at your option) any later version.
00013 
00014    The GNU C Library is distributed in the hope that it will be useful,
00015    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017    Lesser General Public License for more details.
00018 
00019    You should have received a copy of the GNU Lesser General Public
00020    License along with the GNU C Library; if not, write to the Free
00021    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00022    02111-1307 USA.  */
00023 
00024 #ifdef HAVE_CONFIG_H
00025 #include <config.h>
00026 #endif
00027 
00028 #undef __ptr_t
00029 #if defined (__cplusplus) || (defined (__STDC__) && __STDC__)
00030 # define __ptr_t void *
00031 #else /* Not C++ or ANSI C.  */
00032 # define __ptr_t char *
00033 #endif /* C++ or ANSI C.  */
00034 
00035 #if defined (_LIBC)
00036 # include <string.h>
00037 # include <memcopy.h>
00038 # include <stdlib.h>
00039 #else
00040 # define reg_char char
00041 #endif
00042 
00043 #if defined (HAVE_LIMITS_H) || defined (_LIBC)
00044 # include <limits.h>
00045 #endif
00046 
00047 #define LONG_MAX_32_BITS 2147483647
00048 
00049 #ifndef LONG_MAX
00050 #define LONG_MAX LONG_MAX_32_BITS
00051 #endif
00052 
00053 #include <sys/types.h>
00054 
00055 #undef memchr
00056 
00057 
00058 /* Find the first occurrence of C in S.  */
00059 __ptr_t
00060 __rawmemchr (s, c_in)
00061      const __ptr_t s;
00062      int c_in;
00063 {
00064   const unsigned char *char_ptr;
00065   const unsigned long int *longword_ptr;
00066   unsigned long int longword, magic_bits, charmask;
00067   unsigned reg_char c;
00068 
00069   c = (unsigned char) c_in;
00070 
00071   /* Handle the first few characters by reading one character at a time.
00072      Do this until CHAR_PTR is aligned on a longword boundary.  */
00073   for (char_ptr = (const unsigned char *) s;
00074        ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0;
00075        ++char_ptr)
00076     if (*char_ptr == c)
00077       return (__ptr_t) char_ptr;
00078 
00079   /* All these elucidatory comments refer to 4-byte longwords,
00080      but the theory applies equally well to 8-byte longwords.  */
00081 
00082   longword_ptr = (unsigned long int *) char_ptr;
00083 
00084   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00085      the "holes."  Note that there is a hole just to the left of
00086      each byte, with an extra at the end:
00087 
00088      bits:  01111110 11111110 11111110 11111111
00089      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00090 
00091      The 1-bits make sure that carries propagate to the next 0-bit.
00092      The 0-bits provide holes for carries to fall into.  */
00093 
00094   if (sizeof (longword) != 4 && sizeof (longword) != 8)
00095     abort ();
00096 
00097 #if LONG_MAX <= LONG_MAX_32_BITS
00098   magic_bits = 0x7efefeff;
00099 #else
00100   magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff;
00101 #endif
00102 
00103   /* Set up a longword, each of whose bytes is C.  */
00104   charmask = c | (c << 8);
00105   charmask |= charmask << 16;
00106 #if LONG_MAX > LONG_MAX_32_BITS
00107   charmask |= charmask << 32;
00108 #endif
00109 
00110   /* Instead of the traditional loop which tests each character,
00111      we will test a longword at a time.  The tricky part is testing
00112      if *any of the four* bytes in the longword in question are zero.  */
00113   while (1)
00114     {
00115       /* We tentatively exit the loop if adding MAGIC_BITS to
00116         LONGWORD fails to change any of the hole bits of LONGWORD.
00117 
00118         1) Is this safe?  Will it catch all the zero bytes?
00119         Suppose there is a byte with all zeros.  Any carry bits
00120         propagating from its left will fall into the hole at its
00121         least significant bit and stop.  Since there will be no
00122         carry from its most significant bit, the LSB of the
00123         byte to the left will be unchanged, and the zero will be
00124         detected.
00125 
00126         2) Is this worthwhile?  Will it ignore everything except
00127         zero bytes?  Suppose every byte of LONGWORD has a bit set
00128         somewhere.  There will be a carry into bit 8.  If bit 8
00129         is set, this will carry into bit 16.  If bit 8 is clear,
00130         one of bits 9-15 must be set, so there will be a carry
00131         into bit 16.  Similarly, there will be a carry into bit
00132         24.  If one of bits 24-30 is set, there will be a carry
00133         into bit 31, so all of the hole bits will be changed.
00134 
00135         The one misfire occurs when bits 24-30 are clear and bit
00136         31 is set; in this case, the hole at bit 31 is not
00137         changed.  If we had access to the processor carry flag,
00138         we could close this loophole by putting the fourth hole
00139         at bit 32!
00140 
00141         So it ignores everything except 128's, when they're aligned
00142         properly.
00143 
00144         3) But wait!  Aren't we looking for C, not zero?
00145         Good point.  So what we do is XOR LONGWORD with a longword,
00146         each of whose bytes is C.  This turns each byte that is C
00147         into a zero.  */
00148 
00149       longword = *longword_ptr++ ^ charmask;
00150 
00151       /* Add MAGIC_BITS to LONGWORD.  */
00152       if ((((longword + magic_bits)
00153 
00154            /* Set those bits that were unchanged by the addition.  */
00155            ^ ~longword)
00156 
00157           /* Look at only the hole bits.  If any of the hole bits
00158              are unchanged, most likely one of the bytes was a
00159              zero.  */
00160           & ~magic_bits) != 0)
00161        {
00162          /* Which of the bytes was C?  If none of them were, it was
00163             a misfire; continue the search.  */
00164 
00165          const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
00166 
00167          if (cp[0] == c)
00168            return (__ptr_t) cp;
00169          if (cp[1] == c)
00170            return (__ptr_t) &cp[1];
00171          if (cp[2] == c)
00172            return (__ptr_t) &cp[2];
00173          if (cp[3] == c)
00174            return (__ptr_t) &cp[3];
00175 #if LONG_MAX > 2147483647
00176          if (cp[4] == c)
00177            return (__ptr_t) &cp[4];
00178          if (cp[5] == c)
00179            return (__ptr_t) &cp[5];
00180          if (cp[6] == c)
00181            return (__ptr_t) &cp[6];
00182          if (cp[7] == c)
00183            return (__ptr_t) &cp[7];
00184 #endif
00185        }
00186     }
00187 }
00188 libc_hidden_def (__rawmemchr)
00189 weak_alias (__rawmemchr, rawmemchr)