Back to index

glibc  2.9
memchr.c
Go to the documentation of this file.
00001 /* Copyright (C) 1991,93,96,97,99,2000,2003 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
00004    with help from Dan Sahlin (dan@sics.se) and
00005    commentary by Jim Blandy (jimb@ai.mit.edu);
00006    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
00007    and implemented by Roland McGrath (roland@ai.mit.edu).
00008 
00009    The GNU C Library is free software; you can redistribute it and/or
00010    modify it under the terms of the GNU Lesser General Public
00011    License as published by the Free Software Foundation; either
00012    version 2.1 of the License, or (at your option) any later version.
00013 
00014    The GNU C Library is distributed in the hope that it will be useful,
00015    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017    Lesser General Public License for more details.
00018 
00019    You should have received a copy of the GNU Lesser General Public
00020    License along with the GNU C Library; if not, write to the Free
00021    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00022    02111-1307 USA.  */
00023 
00024 #ifdef HAVE_CONFIG_H
00025 #include <config.h>
00026 #endif
00027 
00028 #undef __ptr_t
00029 #if defined (__cplusplus) || (defined (__STDC__) && __STDC__)
00030 # define __ptr_t void *
00031 #else /* Not C++ or ANSI C.  */
00032 # define __ptr_t char *
00033 #endif /* C++ or ANSI C.  */
00034 
00035 #if defined _LIBC
00036 # include <string.h>
00037 # include <memcopy.h>
00038 #else
00039 # define reg_char char
00040 #endif
00041 
00042 #if HAVE_STDLIB_H || defined _LIBC
00043 # include <stdlib.h>
00044 #endif
00045 
00046 #if HAVE_LIMITS_H || defined _LIBC
00047 # include <limits.h>
00048 #endif
00049 
00050 #define LONG_MAX_32_BITS 2147483647
00051 
00052 #ifndef LONG_MAX
00053 #define LONG_MAX LONG_MAX_32_BITS
00054 #endif
00055 
00056 #include <sys/types.h>
00057 #if HAVE_BP_SYM_H || defined _LIBC
00058 #include <bp-sym.h>
00059 #else
00060 # define BP_SYM(sym) sym
00061 #endif
00062 
00063 #undef memchr
00064 #undef __memchr
00065 
00066 /* Search no more than N bytes of S for C.  */
00067 __ptr_t
00068 __memchr (s, c_in, n)
00069      const __ptr_t s;
00070      int c_in;
00071      size_t n;
00072 {
00073   const unsigned char *char_ptr;
00074   const unsigned long int *longword_ptr;
00075   unsigned long int longword, magic_bits, charmask;
00076   unsigned reg_char c;
00077 
00078   c = (unsigned char) c_in;
00079 
00080   /* Handle the first few characters by reading one character at a time.
00081      Do this until CHAR_PTR is aligned on a longword boundary.  */
00082   for (char_ptr = (const unsigned char *) s;
00083        n > 0 && ((unsigned long int) char_ptr
00084                & (sizeof (longword) - 1)) != 0;
00085        --n, ++char_ptr)
00086     if (*char_ptr == c)
00087       return (__ptr_t) char_ptr;
00088 
00089   /* All these elucidatory comments refer to 4-byte longwords,
00090      but the theory applies equally well to 8-byte longwords.  */
00091 
00092   longword_ptr = (unsigned long int *) char_ptr;
00093 
00094   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00095      the "holes."  Note that there is a hole just to the left of
00096      each byte, with an extra at the end:
00097 
00098      bits:  01111110 11111110 11111110 11111111
00099      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00100 
00101      The 1-bits make sure that carries propagate to the next 0-bit.
00102      The 0-bits provide holes for carries to fall into.  */
00103 
00104   if (sizeof (longword) != 4 && sizeof (longword) != 8)
00105     abort ();
00106 
00107 #if LONG_MAX <= LONG_MAX_32_BITS
00108   magic_bits = 0x7efefeff;
00109 #else
00110   magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff;
00111 #endif
00112 
00113   /* Set up a longword, each of whose bytes is C.  */
00114   charmask = c | (c << 8);
00115   charmask |= charmask << 16;
00116 #if LONG_MAX > LONG_MAX_32_BITS
00117   charmask |= charmask << 32;
00118 #endif
00119 
00120   /* Instead of the traditional loop which tests each character,
00121      we will test a longword at a time.  The tricky part is testing
00122      if *any of the four* bytes in the longword in question are zero.  */
00123   while (n >= sizeof (longword))
00124     {
00125       /* We tentatively exit the loop if adding MAGIC_BITS to
00126         LONGWORD fails to change any of the hole bits of LONGWORD.
00127 
00128         1) Is this safe?  Will it catch all the zero bytes?
00129         Suppose there is a byte with all zeros.  Any carry bits
00130         propagating from its left will fall into the hole at its
00131         least significant bit and stop.  Since there will be no
00132         carry from its most significant bit, the LSB of the
00133         byte to the left will be unchanged, and the zero will be
00134         detected.
00135 
00136         2) Is this worthwhile?  Will it ignore everything except
00137         zero bytes?  Suppose every byte of LONGWORD has a bit set
00138         somewhere.  There will be a carry into bit 8.  If bit 8
00139         is set, this will carry into bit 16.  If bit 8 is clear,
00140         one of bits 9-15 must be set, so there will be a carry
00141         into bit 16.  Similarly, there will be a carry into bit
00142         24.  If one of bits 24-30 is set, there will be a carry
00143         into bit 31, so all of the hole bits will be changed.
00144 
00145         The one misfire occurs when bits 24-30 are clear and bit
00146         31 is set; in this case, the hole at bit 31 is not
00147         changed.  If we had access to the processor carry flag,
00148         we could close this loophole by putting the fourth hole
00149         at bit 32!
00150 
00151         So it ignores everything except 128's, when they're aligned
00152         properly.
00153 
00154         3) But wait!  Aren't we looking for C, not zero?
00155         Good point.  So what we do is XOR LONGWORD with a longword,
00156         each of whose bytes is C.  This turns each byte that is C
00157         into a zero.  */
00158 
00159       longword = *longword_ptr++ ^ charmask;
00160 
00161       /* Add MAGIC_BITS to LONGWORD.  */
00162       if ((((longword + magic_bits)
00163 
00164            /* Set those bits that were unchanged by the addition.  */
00165            ^ ~longword)
00166 
00167           /* Look at only the hole bits.  If any of the hole bits
00168              are unchanged, most likely one of the bytes was a
00169              zero.  */
00170           & ~magic_bits) != 0)
00171        {
00172          /* Which of the bytes was C?  If none of them were, it was
00173             a misfire; continue the search.  */
00174 
00175          const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
00176 
00177          if (cp[0] == c)
00178            return (__ptr_t) cp;
00179          if (cp[1] == c)
00180            return (__ptr_t) &cp[1];
00181          if (cp[2] == c)
00182            return (__ptr_t) &cp[2];
00183          if (cp[3] == c)
00184            return (__ptr_t) &cp[3];
00185 #if LONG_MAX > 2147483647
00186          if (cp[4] == c)
00187            return (__ptr_t) &cp[4];
00188          if (cp[5] == c)
00189            return (__ptr_t) &cp[5];
00190          if (cp[6] == c)
00191            return (__ptr_t) &cp[6];
00192          if (cp[7] == c)
00193            return (__ptr_t) &cp[7];
00194 #endif
00195        }
00196 
00197       n -= sizeof (longword);
00198     }
00199 
00200   char_ptr = (const unsigned char *) longword_ptr;
00201 
00202   while (n-- > 0)
00203     {
00204       if (*char_ptr == c)
00205        return (__ptr_t) char_ptr;
00206       else
00207        ++char_ptr;
00208     }
00209 
00210   return 0;
00211 }
00212 #ifdef weak_alias
00213 weak_alias (__memchr, BP_SYM (memchr))
00214 #endif
00215 libc_hidden_builtin_def (memchr)