Back to index

glibc  2.9
memrchr.c
Go to the documentation of this file.
00001 /* memrchr -- find the last occurrence of a byte in a memory block
00002    Copyright (C) 1991, 93, 96, 97, 99, 2000 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
00005    with help from Dan Sahlin (dan@sics.se) and
00006    commentary by Jim Blandy (jimb@ai.mit.edu);
00007    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
00008    and implemented by Roland McGrath (roland@ai.mit.edu).
00009 
00010    The GNU C Library is free software; you can redistribute it and/or
00011    modify it under the terms of the GNU Lesser General Public
00012    License as published by the Free Software Foundation; either
00013    version 2.1 of the License, or (at your option) any later version.
00014 
00015    The GNU C Library is distributed in the hope that it will be useful,
00016    but WITHOUT ANY WARRANTY; without even the implied warranty of
00017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018    Lesser General Public License for more details.
00019 
00020    You should have received a copy of the GNU Lesser General Public
00021    License along with the GNU C Library; if not, write to the Free
00022    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00023    02111-1307 USA.  */
00024 
00025 #include <stdlib.h>
00026 
00027 #ifdef HAVE_CONFIG_H
00028 # include <config.h>
00029 #endif
00030 
00031 #undef __ptr_t
00032 #if defined __cplusplus || (defined __STDC__ && __STDC__)
00033 # define __ptr_t void *
00034 #else /* Not C++ or ANSI C.  */
00035 # define __ptr_t char *
00036 #endif /* C++ or ANSI C.  */
00037 
00038 #if defined _LIBC
00039 # include <string.h>
00040 # include <memcopy.h>
00041 #else
00042 # define reg_char char
00043 #endif
00044 
00045 #if defined HAVE_LIMITS_H || defined _LIBC
00046 # include <limits.h>
00047 #endif
00048 
00049 #define LONG_MAX_32_BITS 2147483647
00050 
00051 #ifndef LONG_MAX
00052 # define LONG_MAX LONG_MAX_32_BITS
00053 #endif
00054 
00055 #include <sys/types.h>
00056 
00057 #undef __memrchr
00058 #undef memrchr
00059 
00060 #ifndef weak_alias
00061 # define __memrchr memrchr
00062 #endif
00063 
00064 /* Search no more than N bytes of S for C.  */
00065 __ptr_t
00066 __memrchr (s, c_in, n)
00067      const __ptr_t s;
00068      int c_in;
00069      size_t n;
00070 {
00071   const unsigned char *char_ptr;
00072   const unsigned long int *longword_ptr;
00073   unsigned long int longword, magic_bits, charmask;
00074   unsigned reg_char c;
00075 
00076   c = (unsigned char) c_in;
00077 
00078   /* Handle the last few characters by reading one character at a time.
00079      Do this until CHAR_PTR is aligned on a longword boundary.  */
00080   for (char_ptr = (const unsigned char *) s + n;
00081        n > 0 && ((unsigned long int) char_ptr
00082                & (sizeof (longword) - 1)) != 0;
00083        --n)
00084     if (*--char_ptr == c)
00085       return (__ptr_t) char_ptr;
00086 
00087   /* All these elucidatory comments refer to 4-byte longwords,
00088      but the theory applies equally well to 8-byte longwords.  */
00089 
00090   longword_ptr = (const unsigned long int *) char_ptr;
00091 
00092   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00093      the "holes."  Note that there is a hole just to the left of
00094      each byte, with an extra at the end:
00095 
00096      bits:  01111110 11111110 11111110 11111111
00097      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00098 
00099      The 1-bits make sure that carries propagate to the next 0-bit.
00100      The 0-bits provide holes for carries to fall into.  */
00101 
00102   if (sizeof (longword) != 4 && sizeof (longword) != 8)
00103     abort ();
00104 
00105 #if LONG_MAX <= LONG_MAX_32_BITS
00106   magic_bits = 0x7efefeff;
00107 #else
00108   magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff;
00109 #endif
00110 
00111   /* Set up a longword, each of whose bytes is C.  */
00112   charmask = c | (c << 8);
00113   charmask |= charmask << 16;
00114 #if LONG_MAX > LONG_MAX_32_BITS
00115   charmask |= charmask << 32;
00116 #endif
00117 
00118   /* Instead of the traditional loop which tests each character,
00119      we will test a longword at a time.  The tricky part is testing
00120      if *any of the four* bytes in the longword in question are zero.  */
00121   while (n >= sizeof (longword))
00122     {
00123       /* We tentatively exit the loop if adding MAGIC_BITS to
00124         LONGWORD fails to change any of the hole bits of LONGWORD.
00125 
00126         1) Is this safe?  Will it catch all the zero bytes?
00127         Suppose there is a byte with all zeros.  Any carry bits
00128         propagating from its left will fall into the hole at its
00129         least significant bit and stop.  Since there will be no
00130         carry from its most significant bit, the LSB of the
00131         byte to the left will be unchanged, and the zero will be
00132         detected.
00133 
00134         2) Is this worthwhile?  Will it ignore everything except
00135         zero bytes?  Suppose every byte of LONGWORD has a bit set
00136         somewhere.  There will be a carry into bit 8.  If bit 8
00137         is set, this will carry into bit 16.  If bit 8 is clear,
00138         one of bits 9-15 must be set, so there will be a carry
00139         into bit 16.  Similarly, there will be a carry into bit
00140         24.  If one of bits 24-30 is set, there will be a carry
00141         into bit 31, so all of the hole bits will be changed.
00142 
00143         The one misfire occurs when bits 24-30 are clear and bit
00144         31 is set; in this case, the hole at bit 31 is not
00145         changed.  If we had access to the processor carry flag,
00146         we could close this loophole by putting the fourth hole
00147         at bit 32!
00148 
00149         So it ignores everything except 128's, when they're aligned
00150         properly.
00151 
00152         3) But wait!  Aren't we looking for C, not zero?
00153         Good point.  So what we do is XOR LONGWORD with a longword,
00154         each of whose bytes is C.  This turns each byte that is C
00155         into a zero.  */
00156 
00157       longword = *--longword_ptr ^ charmask;
00158 
00159       /* Add MAGIC_BITS to LONGWORD.  */
00160       if ((((longword + magic_bits)
00161 
00162            /* Set those bits that were unchanged by the addition.  */
00163            ^ ~longword)
00164 
00165           /* Look at only the hole bits.  If any of the hole bits
00166              are unchanged, most likely one of the bytes was a
00167              zero.  */
00168           & ~magic_bits) != 0)
00169        {
00170          /* Which of the bytes was C?  If none of them were, it was
00171             a misfire; continue the search.  */
00172 
00173          const unsigned char *cp = (const unsigned char *) longword_ptr;
00174 
00175 #if LONG_MAX > 2147483647
00176          if (cp[7] == c)
00177            return (__ptr_t) &cp[7];
00178          if (cp[6] == c)
00179            return (__ptr_t) &cp[6];
00180          if (cp[5] == c)
00181            return (__ptr_t) &cp[5];
00182          if (cp[4] == c)
00183            return (__ptr_t) &cp[4];
00184 #endif
00185          if (cp[3] == c)
00186            return (__ptr_t) &cp[3];
00187          if (cp[2] == c)
00188            return (__ptr_t) &cp[2];
00189          if (cp[1] == c)
00190            return (__ptr_t) &cp[1];
00191          if (cp[0] == c)
00192            return (__ptr_t) cp;
00193        }
00194 
00195       n -= sizeof (longword);
00196     }
00197 
00198   char_ptr = (const unsigned char *) longword_ptr;
00199 
00200   while (n-- > 0)
00201     {
00202       if (*--char_ptr == c)
00203        return (__ptr_t) char_ptr;
00204     }
00205 
00206   return 0;
00207 }
00208 #ifdef weak_alias
00209 weak_alias (__memrchr, memrchr)
00210 #endif