memchr.c

00001 /* Copyright (C) 1991, 1993, 1996, 1997, 1999, 2000, 2003, 2004 Free
00002    Software Foundation, Inc.
00003 
00004    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
00005    with help from Dan Sahlin (dan@sics.se) and
00006    commentary by Jim Blandy (jimb@ai.mit.edu);
00007    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
00008    and implemented by Roland McGrath (roland@ai.mit.edu).
00009 
00010 NOTE: The canonical source of this file is maintained with the GNU C Library.
00011 Bugs can be reported to bug-glibc@prep.ai.mit.edu.
00012 
00013 This program is free software; you can redistribute it and/or modify it
00014 under the terms of the GNU Lesser General Public License as published by the
00015 Free Software Foundation; either version 2.1, or (at your option) any
00016 later version.
00017 
00018 This program is distributed in the hope that it will be useful,
00019 but WITHOUT ANY WARRANTY; without even the implied warranty of
00020 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00021 GNU Lesser General Public License for more details.
00022 
00023 You should have received a copy of the GNU Lesser General Public License
00024 along with this program; if not, write to the Free Software Foundation,
00025 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
00026 
00027 #ifdef HAVE_CONFIG_H
00028 # include <config.h>
00029 #endif
00030 
00031 #include <string.h>
00032 
00033 #include <stddef.h>
00034 
00035 #if defined _LIBC
00036 # include <memcopy.h>
00037 #else
00038 # define reg_char char
00039 #endif
00040 
00041 #include <limits.h>
00042 
00043 #if HAVE_BP_SYM_H || defined _LIBC
00044 # include <bp-sym.h>
00045 #else
00046 # define BP_SYM(sym) sym
00047 #endif
00048 
00049 #undef memchr
00050 #undef __memchr
00051 
00052 /* Search no more than N bytes of S for C.  */
00053 void *
00054 __memchr (void const *s, int c_in, size_t n)
00055 {
00056   const unsigned char *char_ptr;
00057   const unsigned long int *longword_ptr;
00058   unsigned long int longword, magic_bits, charmask;
00059   unsigned reg_char c;
00060   int i;
00061 
00062   c = (unsigned char) c_in;
00063 
00064   /* Handle the first few characters by reading one character at a time.
00065      Do this until CHAR_PTR is aligned on a longword boundary.  */
00066   for (char_ptr = (const unsigned char *) s;
00067        n > 0 && (size_t) char_ptr % sizeof longword != 0;
00068        --n, ++char_ptr)
00069     if (*char_ptr == c)
00070       return (void *) char_ptr;
00071 
00072   /* All these elucidatory comments refer to 4-byte longwords,
00073      but the theory applies equally well to any size longwords.  */
00074 
00075   longword_ptr = (const unsigned long int *) char_ptr;
00076 
00077   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
00078      the "holes."  Note that there is a hole just to the left of
00079      each byte, with an extra at the end:
00080 
00081      bits:  01111110 11111110 11111110 11111111
00082      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
00083 
00084      The 1-bits make sure that carries propagate to the next 0-bit.
00085      The 0-bits provide holes for carries to fall into.  */
00086 
00087   /* Set MAGIC_BITS to be this pattern of 1 and 0 bits.
00088      Set CHARMASK to be a longword, each of whose bytes is C.  */
00089 
00090   magic_bits = 0xfefefefe;
00091   charmask = c | (c << 8);
00092   charmask |= charmask << 16;
00093 #if 0xffffffffU < ULONG_MAX
00094   magic_bits |= magic_bits << 32;
00095   charmask |= charmask << 32;
00096   if (8 < sizeof longword)
00097     for (i = 64; i < sizeof longword * 8; i *= 2)
00098       {
00099         magic_bits |= magic_bits << i;
00100         charmask |= charmask << i;
00101       }
00102 #endif
00103   magic_bits = (ULONG_MAX >> 1) & (magic_bits | 1);
00104 
00105   /* Instead of the traditional loop which tests each character,
00106      we will test a longword at a time.  The tricky part is testing
00107      if *any of the four* bytes in the longword in question are zero.  */
00108   while (n >= sizeof longword)
00109     {
00110       /* We tentatively exit the loop if adding MAGIC_BITS to
00111          LONGWORD fails to change any of the hole bits of LONGWORD.
00112 
00113          1) Is this safe?  Will it catch all the zero bytes?
00114          Suppose there is a byte with all zeros.  Any carry bits
00115          propagating from its left will fall into the hole at its
00116          least significant bit and stop.  Since there will be no
00117          carry from its most significant bit, the LSB of the
00118          byte to the left will be unchanged, and the zero will be
00119          detected.
00120 
00121          2) Is this worthwhile?  Will it ignore everything except
00122          zero bytes?  Suppose every byte of LONGWORD has a bit set
00123          somewhere.  There will be a carry into bit 8.  If bit 8
00124          is set, this will carry into bit 16.  If bit 8 is clear,
00125          one of bits 9-15 must be set, so there will be a carry
00126          into bit 16.  Similarly, there will be a carry into bit
00127          24.  If one of bits 24-30 is set, there will be a carry
00128          into bit 31, so all of the hole bits will be changed.
00129 
00130          The one misfire occurs when bits 24-30 are clear and bit
00131          31 is set; in this case, the hole at bit 31 is not
00132          changed.  If we had access to the processor carry flag,
00133          we could close this loophole by putting the fourth hole
00134          at bit 32!
00135 
00136          So it ignores everything except 128's, when they're aligned
00137          properly.
00138 
00139          3) But wait!  Aren't we looking for C, not zero?
00140          Good point.  So what we do is XOR LONGWORD with a longword,
00141          each of whose bytes is C.  This turns each byte that is C
00142          into a zero.  */
00143 
00144       longword = *longword_ptr++ ^ charmask;
00145 
00146       /* Add MAGIC_BITS to LONGWORD.  */
00147       if ((((longword + magic_bits)
00148 
00149             /* Set those bits that were unchanged by the addition.  */
00150             ^ ~longword)
00151 
00152            /* Look at only the hole bits.  If any of the hole bits
00153               are unchanged, most likely one of the bytes was a
00154               zero.  */
00155            & ~magic_bits) != 0)
00156         {
00157           /* Which of the bytes was C?  If none of them were, it was
00158              a misfire; continue the search.  */
00159 
00160           const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
00161 
00162           if (cp[0] == c)
00163             return (void *) cp;
00164           if (cp[1] == c)
00165             return (void *) &cp[1];
00166           if (cp[2] == c)
00167             return (void *) &cp[2];
00168           if (cp[3] == c)
00169             return (void *) &cp[3];
00170           if (4 < sizeof longword && cp[4] == c)
00171             return (void *) &cp[4];
00172           if (5 < sizeof longword && cp[5] == c)
00173             return (void *) &cp[5];
00174           if (6 < sizeof longword && cp[6] == c)
00175             return (void *) &cp[6];
00176           if (7 < sizeof longword && cp[7] == c)
00177             return (void *) &cp[7];
00178           if (8 < sizeof longword)
00179             for (i = 8; i < sizeof longword; i++)
00180               if (cp[i] == c)
00181                 return (void *) &cp[i];
00182         }
00183 
00184       n -= sizeof longword;
00185     }
00186 
00187   char_ptr = (const unsigned char *) longword_ptr;
00188 
00189   while (n-- > 0)
00190     {
00191       if (*char_ptr == c)
00192         return (void *) char_ptr;
00193       else
00194         ++char_ptr;
00195     }
00196 
00197   return 0;
00198 }
00199 #ifdef weak_alias
00200 weak_alias (__memchr, BP_SYM (memchr))
00201 #endif

Generated on Mon Feb 5 10:54:28 2007 for WvStreams by  doxygen 1.5.1