Back to index

glibc  2.9
strlen.c
Go to the documentation of this file.
00001 /* Copyright (C) 1991, 1993, 1997, 2000, 2003 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Written by Torbjorn Granlund (tege@sics.se),
00004    with help from Dan Sahlin (dan@sics.se);
00005    commentary by Jim Blandy (jimb@ai.mit.edu).
00006 
00007    The GNU C Library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011 
00012    The GNU C Library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016 
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with the GNU C Library; if not, write to the Free
00019    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00020    02111-1307 USA.  */
00021 
00022 #include <string.h>
00023 #include <stdlib.h>
00024 
00025 #undef strlen
00026 
00027 /* Return the length of the null-terminated string STR.  Scan for
00028    the null terminator quickly by testing four bytes at a time.  */
00029 size_t
00030 strlen (str)
00031      const char *str;
00032 {
00033   const char *char_ptr;
00034   const unsigned long int *longword_ptr;
00035   unsigned long int longword, magic_bits, himagic, lomagic;
00036 
00037   /* Handle the first few characters by reading one character at a time.
00038      Do this until CHAR_PTR is aligned on a longword boundary.  */
00039   for (char_ptr = str; ((unsigned long int) char_ptr
00040                      & (sizeof (longword) - 1)) != 0;
00041        ++char_ptr)
00042     if (*char_ptr == '\0')
00043       return char_ptr - str;
00044 
00045   /* All these elucidatory comments refer to 4-byte longwords,
00046      but the theory applies equally well to 8-byte longwords.  */
00047 
00048   longword_ptr = (unsigned long int *) char_ptr;
00049 
00050   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00051      the "holes."  Note that there is a hole just to the left of
00052      each byte, with an extra at the end:
00053 
00054      bits:  01111110 11111110 11111110 11111111
00055      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00056 
00057      The 1-bits make sure that carries propagate to the next 0-bit.
00058      The 0-bits provide holes for carries to fall into.  */
00059   magic_bits = 0x7efefeffL;
00060   himagic = 0x80808080L;
00061   lomagic = 0x01010101L;
00062   if (sizeof (longword) > 4)
00063     {
00064       /* 64-bit version of the magic.  */
00065       /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
00066       magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL;
00067       himagic = ((himagic << 16) << 16) | himagic;
00068       lomagic = ((lomagic << 16) << 16) | lomagic;
00069     }
00070   if (sizeof (longword) > 8)
00071     abort ();
00072 
00073   /* Instead of the traditional loop which tests each character,
00074      we will test a longword at a time.  The tricky part is testing
00075      if *any of the four* bytes in the longword in question are zero.  */
00076   for (;;)
00077     {
00078       /* We tentatively exit the loop if adding MAGIC_BITS to
00079         LONGWORD fails to change any of the hole bits of LONGWORD.
00080 
00081         1) Is this safe?  Will it catch all the zero bytes?
00082         Suppose there is a byte with all zeros.  Any carry bits
00083         propagating from its left will fall into the hole at its
00084         least significant bit and stop.  Since there will be no
00085         carry from its most significant bit, the LSB of the
00086         byte to the left will be unchanged, and the zero will be
00087         detected.
00088 
00089         2) Is this worthwhile?  Will it ignore everything except
00090         zero bytes?  Suppose every byte of LONGWORD has a bit set
00091         somewhere.  There will be a carry into bit 8.  If bit 8
00092         is set, this will carry into bit 16.  If bit 8 is clear,
00093         one of bits 9-15 must be set, so there will be a carry
00094         into bit 16.  Similarly, there will be a carry into bit
00095         24.  If one of bits 24-30 is set, there will be a carry
00096         into bit 31, so all of the hole bits will be changed.
00097 
00098         The one misfire occurs when bits 24-30 are clear and bit
00099         31 is set; in this case, the hole at bit 31 is not
00100         changed.  If we had access to the processor carry flag,
00101         we could close this loophole by putting the fourth hole
00102         at bit 32!
00103 
00104         So it ignores everything except 128's, when they're aligned
00105         properly.  */
00106 
00107       longword = *longword_ptr++;
00108 
00109       if (
00110 #if 0
00111          /* Add MAGIC_BITS to LONGWORD.  */
00112          (((longword + magic_bits)
00113 
00114            /* Set those bits that were unchanged by the addition.  */
00115            ^ ~longword)
00116 
00117           /* Look at only the hole bits.  If any of the hole bits
00118              are unchanged, most likely one of the bytes was a
00119              zero.  */
00120           & ~magic_bits)
00121 #else
00122          ((longword - lomagic) & himagic)
00123 #endif
00124          != 0)
00125        {
00126          /* Which of the bytes was the zero?  If none of them were, it was
00127             a misfire; continue the search.  */
00128 
00129          const char *cp = (const char *) (longword_ptr - 1);
00130 
00131          if (cp[0] == 0)
00132            return cp - str;
00133          if (cp[1] == 0)
00134            return cp - str + 1;
00135          if (cp[2] == 0)
00136            return cp - str + 2;
00137          if (cp[3] == 0)
00138            return cp - str + 3;
00139          if (sizeof (longword) > 4)
00140            {
00141              if (cp[4] == 0)
00142               return cp - str + 4;
00143              if (cp[5] == 0)
00144               return cp - str + 5;
00145              if (cp[6] == 0)
00146               return cp - str + 6;
00147              if (cp[7] == 0)
00148               return cp - str + 7;
00149            }
00150        }
00151     }
00152 }
00153 libc_hidden_builtin_def (strlen)