Back to index

glibc  2.9
strnlen.c
Go to the documentation of this file.
00001 /* Find the length of STRING, but scan at most MAXLEN characters.
00002    Copyright (C) 1991,1993,1997,2000,2001,2005 Free Software Foundation, Inc.
00003    Contributed by Jakub Jelinek <jakub@redhat.com>.
00004 
00005    Based on strlen written by Torbjorn Granlund (tege@sics.se),
00006    with help from Dan Sahlin (dan@sics.se);
00007    commentary by Jim Blandy (jimb@ai.mit.edu).
00008 
00009    The GNU C Library is free software; you can redistribute it and/or
00010    modify it under the terms of the GNU Lesser General Public License as
00011    published by the Free Software Foundation; either version 2.1 of the
00012    License, or (at your option) any later version.
00013 
00014    The GNU C Library is distributed in the hope that it will be useful,
00015    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017    Lesser General Public License for more details.
00018 
00019    You should have received a copy of the GNU Lesser General Public
00020    License along with the GNU C Library; see the file COPYING.LIB.  If not,
00021    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00022    Boston, MA 02111-1307, USA.  */
00023 
00024 #include <string.h>
00025 #include <stdlib.h>
00026 
00027 /* Find the length of S, but scan at most MAXLEN characters.  If no
00028    '\0' terminator is found in that many characters, return MAXLEN.  */
00029 size_t
00030 __strnlen (const char *str, size_t maxlen)
00031 {
00032   const char *char_ptr, *end_ptr = str + maxlen;
00033   const unsigned long int *longword_ptr;
00034   unsigned long int longword, magic_bits, himagic, lomagic;
00035 
00036   if (maxlen == 0)
00037     return 0;
00038 
00039   if (__builtin_expect (end_ptr < str, 0))
00040     end_ptr = (const char *) ~0UL;
00041 
00042   /* Handle the first few characters by reading one character at a time.
00043      Do this until CHAR_PTR is aligned on a longword boundary.  */
00044   for (char_ptr = str; ((unsigned long int) char_ptr
00045                      & (sizeof (longword) - 1)) != 0;
00046        ++char_ptr)
00047     if (*char_ptr == '\0')
00048       {
00049        if (char_ptr > end_ptr)
00050          char_ptr = end_ptr;
00051        return char_ptr - str;
00052       }
00053 
00054   /* All these elucidatory comments refer to 4-byte longwords,
00055      but the theory applies equally well to 8-byte longwords.  */
00056 
00057   longword_ptr = (unsigned long int *) char_ptr;
00058 
00059   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00060      the "holes."  Note that there is a hole just to the left of
00061      each byte, with an extra at the end:
00062 
00063      bits:  01111110 11111110 11111110 11111111
00064      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00065 
00066      The 1-bits make sure that carries propagate to the next 0-bit.
00067      The 0-bits provide holes for carries to fall into.  */
00068   magic_bits = 0x7efefeffL;
00069   himagic = 0x80808080L;
00070   lomagic = 0x01010101L;
00071   if (sizeof (longword) > 4)
00072     {
00073       /* 64-bit version of the magic.  */
00074       /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
00075       magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL;
00076       himagic = ((himagic << 16) << 16) | himagic;
00077       lomagic = ((lomagic << 16) << 16) | lomagic;
00078     }
00079   if (sizeof (longword) > 8)
00080     abort ();
00081 
00082   /* Instead of the traditional loop which tests each character,
00083      we will test a longword at a time.  The tricky part is testing
00084      if *any of the four* bytes in the longword in question are zero.  */
00085   while (longword_ptr < (unsigned long int *) end_ptr)
00086     {
00087       /* We tentatively exit the loop if adding MAGIC_BITS to
00088         LONGWORD fails to change any of the hole bits of LONGWORD.
00089 
00090         1) Is this safe?  Will it catch all the zero bytes?
00091         Suppose there is a byte with all zeros.  Any carry bits
00092         propagating from its left will fall into the hole at its
00093         least significant bit and stop.  Since there will be no
00094         carry from its most significant bit, the LSB of the
00095         byte to the left will be unchanged, and the zero will be
00096         detected.
00097 
00098         2) Is this worthwhile?  Will it ignore everything except
00099         zero bytes?  Suppose every byte of LONGWORD has a bit set
00100         somewhere.  There will be a carry into bit 8.  If bit 8
00101         is set, this will carry into bit 16.  If bit 8 is clear,
00102         one of bits 9-15 must be set, so there will be a carry
00103         into bit 16.  Similarly, there will be a carry into bit
00104         24.  If one of bits 24-30 is set, there will be a carry
00105         into bit 31, so all of the hole bits will be changed.
00106 
00107         The one misfire occurs when bits 24-30 are clear and bit
00108         31 is set; in this case, the hole at bit 31 is not
00109         changed.  If we had access to the processor carry flag,
00110         we could close this loophole by putting the fourth hole
00111         at bit 32!
00112 
00113         So it ignores everything except 128's, when they're aligned
00114         properly.  */
00115 
00116       longword = *longword_ptr++;
00117 
00118       if ((longword - lomagic) & himagic)
00119        {
00120          /* Which of the bytes was the zero?  If none of them were, it was
00121             a misfire; continue the search.  */
00122 
00123          const char *cp = (const char *) (longword_ptr - 1);
00124 
00125          char_ptr = cp;
00126          if (cp[0] == 0)
00127            break;
00128          char_ptr = cp + 1;
00129          if (cp[1] == 0)
00130            break;
00131          char_ptr = cp + 2;
00132          if (cp[2] == 0)
00133            break;
00134          char_ptr = cp + 3;
00135          if (cp[3] == 0)
00136            break;
00137          if (sizeof (longword) > 4)
00138            {
00139              char_ptr = cp + 4;
00140              if (cp[4] == 0)
00141               break;
00142              char_ptr = cp + 5;
00143              if (cp[5] == 0)
00144               break;
00145              char_ptr = cp + 6;
00146              if (cp[6] == 0)
00147               break;
00148              char_ptr = cp + 7;
00149              if (cp[7] == 0)
00150               break;
00151            }
00152        }
00153       char_ptr = end_ptr;
00154     }
00155 
00156   if (char_ptr > end_ptr)
00157     char_ptr = end_ptr;
00158   return char_ptr - str;
00159 }
00160 weak_alias (__strnlen, strnlen)
00161 libc_hidden_def (strnlen)