全面深入介绍C语言字符串函数

                ----刘黎明(liuliming2008@126.com)

1 函数头文件
2
函数实现源代码
3
后记
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

有人说,C语言就是提供了函数/结构体/指针,可是语言库确实相当重要, 特别是字符串库。


1
函数头文件
怎么用就不用介绍了,帮你回忆一下:


extern char * strcpy(char *,const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//__kernel_size_t:最多复制__kernel_size_t个字节
extern char * strncpy(char *,const char *, __kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//strncpy略有不同
size_t strlcpy(char *, const char *, size_t);

extern char * strcat(char *, const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//最多追加__kernel_size_t个字符到src
extern char * strncat(char *, const char *, __kernel_size_t);

extern size_t strlcat(char *, const char *, __kernel_size_t);

extern int strcmp(const char *,const char *);

extern int strncmp(const char *,const char *,__kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//不管大小写的比较
extern int strnicmp(const char *, const char *, __kernel_size_t);

extern char * strchr(const char *,int);

extern char * strnchr(const char *, size_t, int);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//最后一次出现int的地址
extern char * strrchr(const char *,int);

extern char * strstr(const char *,const char *);

extern __kernel_size_t strlen(const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//获得最长不超过__kernel_size_t长度的字符串的长度
extern __kernel_size_t strnlen(const char *,__kernel_size_t);

//在源字符串(source-string)中找出最先含有搜索字符串(searching-string)中的任一字符的位置并返回,若找不到则返回空指针
extern char * strpbrk(const char *,const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//返回传入的char **,  char **指向传入的字符串中被const char *分隔开的下部分字符串
extern char * strsep(char **,const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//传回str1最初的部份的长度,这个部份是完全地由str2中的字符所构成的
extern __kernel_size_t strspn(const char *,const char *);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//传回str1最初的部份的长度,这个部份没有str2中的任何字符

extern __kernel_size_t strcspn(const char *,const char *);

extern void * memset(void *,int,__kernel_size_t);

extern void * memcpy(void *,const void *,__kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//如果源和目的参数有重叠, memmove() 提供有保证的行为。而 memcpy() 则不能提供这样的保证, 因此可以实现得更加有效率。如果有疑问, 最好使用 memmove()memmove可以把自己的一部分拷贝给自己的另一部分。其他函数不行
extern void * memmove(void *,const void *,__kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//在长度为__kernel_size_t的内存中查找int
extern void * memscan(void *,int,__kernel_size_t);

extern int memcmp(const void *,const void *,__kernel_size_t);

extern void * memchr(const void *,int,__kernel_size_t);
<!--[if !supportLineBreakNewLine]-->
<!--[endif]-->

//申请内存,复制s到该内存
extern char *kstrdup(const char *s, gfp_t gfp);

 

2 函数实现源代码

 

/*

 *  linux/lib/string.c

 *

 *  Copyright (C) 1991, 1992  Linus Torvalds

 */

 

/*

 * stupid library routines.. The optimized versions should generally be found

 * as inline code in <asm-xx/string.h>

 *

 * These are buggy as well..

 *

 * * Fri Jun 25 1999, Ingo Oeser <ioe@informatik.tu-chemnitz.de>

 * -  Added strsep() which will replace strtok() soon (because strsep() is

 *    reentrant and should be faster). Use only strsep() in new code, please.

 *

 * * Sat Feb 09 2002, Jason Thomas <jason@topic.com.au>,

 *                    Matthew Hawkins <matt@mh.dropbear.id.au>

 * -  Kissed strtok() goodbye

 */

 

#include <linux/types.h>

#include <linux/string.h>

#include <linux/ctype.h>

#include <linux/module.h>

 

#ifndef __HAVE_ARCH_STRNICMP

/**

 * strnicmp - Case insensitive, length-limited string comparison

 * @s1: One string

 * @s2: The other string

 * @len: the maximum number of characters to compare

 */

int strnicmp(const char *s1, const char *s2, size_t len)

{

    /* Yes, Virginia , it had better be unsigned */

    unsigned char c1, c2;

 

    c1 = c2 = 0;

    if (len) {

         do {

             c1 = *s1;

             c2 = *s2;

             s1++;

             s2++;

             if (!c1)

                 break;

             if (!c2)

                 break;

             if (c1 == c2)

                 continue;

             c1 = tolower(c1);

             c2 = tolower(c2);

             if (c1 != c2)

                 break;

         } while (--len);

    }

    return (int)c1 - (int)c2;

}

EXPORT_SYMBOL(strnicmp);

#endif

 

#ifndef __HAVE_ARCH_STRCPY

/**

 * strcpy - Copy a %NUL terminated string

 * @dest: Where to copy the string to

 * @src: Where to copy the string from

 */

#undef strcpy

char *strcpy(char *dest, const char *src)

{

    char *tmp = dest;

 

    while ((*dest++ = *src++) != '/0')

         /* nothing */;

    return tmp;

}

EXPORT_SYMBOL(strcpy);

#endif

 

#ifndef __HAVE_ARCH_STRNCPY

/**

 * strncpy - Copy a length-limited, %NUL-terminated string

 * @dest: Where to copy the string to

 * @src: Where to copy the string from

 * @count: The maximum number of bytes to copy

 *

 * The result is not %NUL-terminated if the source exceeds

 * @count bytes.

 *

 * In the case where the length of @src is less than  that  of

 * count, the remainder of @dest will be padded with %NUL.

 *

 */

char *strncpy(char *dest, const char *src, size_t count)

{

    char *tmp = dest;

 

    while (count) {

         if ((*tmp = *src) != 0)

             src++;

         tmp++;

         count--;

    }

    return dest;

}

EXPORT_SYMBOL(strncpy);

#endif

 

#ifndef __HAVE_ARCH_STRLCPY

/**

 * strlcpy - Copy a %NUL terminated string into a sized buffer

 * @dest: Where to copy the string to

 * @src: Where to copy the string from

 * @size: size of destination buffer

 *

 * Compatible with *BSD: the result is always a valid

 * NUL-terminated string that fits in the buffer (unless,

 * of course, the buffer size is zero). It does not pad

 * out the result like strncpy() does.

 */

size_t strlcpy(char *dest, const char *src, size_t size)

{

    size_t ret = strlen(src);

 

    if (size) {

         size_t len = (ret >= size) ? size - 1 : ret;

         memcpy(dest, src, len);

         dest[len] = '/0';

    }

    return ret;

}

EXPORT_SYMBOL(strlcpy);

#endif

 

#ifndef __HAVE_ARCH_STRCAT

/**

 * strcat - Append one %NUL-terminated string to another

 * @dest: The string to be appended to

 * @src: The string to append to it

 */

#undef strcat

char *strcat(char *dest, const char *src)

{

    char *tmp = dest;

 

    while (*dest)

         dest++;

    while ((*dest++ = *src++) != '/0')

         ;

    return tmp;

}

EXPORT_SYMBOL(strcat);

#endif

 

#ifndef __HAVE_ARCH_STRNCAT

/**

 * strncat - Append a length-limited, %NUL-terminated string to another

 * @dest: The string to be appended to

 * @src: The string to append to it

 * @count: The maximum numbers of bytes to copy

 *

 * Note that in contrast to strncpy, strncat ensures the result is

 * terminated.

 */

char *strncat(char *dest, const char *src, size_t count)

{

    char *tmp = dest;

 

    if (count) {

         while (*dest)

             dest++;

         while ((*dest++ = *src++) != 0) {

             if (--count == 0) {

                 *dest = '/0';

                 break;

             }

         }

    }

    return tmp;

}

EXPORT_SYMBOL(strncat);

#endif

 

#ifndef __HAVE_ARCH_STRLCAT

/**

 * strlcat - Append a length-limited, %NUL-terminated string to another

 * @dest: The string to be appended to

 * @src: The string to append to it

 * @count: The size of the destination buffer.

 */

size_t strlcat(char *dest, const char *src, size_t count)

{

    size_t dsize = strlen(dest);

    size_t len = strlen(src);

    size_t res = dsize + len;

 

    /* This would be a bug */

    BUG_ON(dsize >= count);

 

    dest += dsize;

    count -= dsize;

    if (len >= count)

         len = count-1;

    memcpy(dest, src, len);

    dest[len] = 0;

    return res;

}

EXPORT_SYMBOL(strlcat);

#endif

 

#ifndef __HAVE_ARCH_STRCMP

/**

 * strcmp - Compare two strings

 * @cs: One string

 * @ct: Another string

 */

#undef strcmp

int strcmp(const char *cs, const char *ct)

{

    signed char __res;

 

    while (1) {

         if ((__res = *cs - *ct++) != 0 || !*cs++)

             break;

    }

    return __res;

}

EXPORT_SYMBOL(strcmp);

#endif

 

#ifndef __HAVE_ARCH_STRNCMP

/**

 * strncmp - Compare two length-limited strings

 * @cs: One string

 * @ct: Another string

 * @count: The maximum number of bytes to compare

 */

int strncmp(const char *cs, const char *ct, size_t count)

{

    signed char __res = 0;

 

    while (count) {

         if ((__res = *cs - *ct++) != 0 || !*cs++)

             break;

         count--;

    }

    return __res;

}

EXPORT_SYMBOL(strncmp);

#endif

 

#ifndef __HAVE_ARCH_STRCHR

/**

 * strchr - Find the first occurrence of a character in a string

 * @s: The string to be searched

 * @c: The character to search for

 */

char *strchr(const char *s, int c)

{

    for (; *s != (char)c; ++s)

         if (*s == '/0')

             return NULL;

    return (char *)s;

}

EXPORT_SYMBOL(strchr);

#endif

 

#ifndef __HAVE_ARCH_STRRCHR

/**

 * strrchr - Find the last occurrence of a character in a string

 * @s: The string to be searched

 * @c: The character to search for

 */

char *strrchr(const char *s, int c)

{

       const char *p = s + strlen(s);

       do {

           if (*p == (char)c)

               return (char *)p;

       } while (--p >= s);

       return NULL;

}

EXPORT_SYMBOL(strrchr);

#endif

 

#ifndef __HAVE_ARCH_STRNCHR

/**

 * strnchr - Find a character in a length limited string

 * @s: The string to be searched

 * @count: The number of characters to be searched

 * @c: The character to search for

 */

char *strnchr(const char *s, size_t count, int c)

{

    for (; count-- && *s != '/0'; ++s)

         if (*s == (char)c)

             return (char *)s;

    return NULL;

}

EXPORT_SYMBOL(strnchr);

#endif

 

/**

 * strstrip - Removes leading and trailing whitespace from @s.

 * @s: The string to be stripped.

 *

 * Note that the first trailing whitespace is replaced with a %NUL-terminator

 * in the given string @s. Returns a pointer to the first non-whitespace

 * character in @s.

 */

char *strstrip(char *s)

{

    size_t size;

    char *end;

 

    size = strlen(s);

 

    if (!size)

         return s;

 

    end = s + size - 1;

    while (end != s && isspace(*end))

         end--;

    *(end + 1) = '/0';

 

    while (*s && isspace(*s))

         s++;

 

    return s;

}

EXPORT_SYMBOL(strstrip);

 

#ifndef __HAVE_ARCH_STRLEN

/**

 * strlen - Find the length of a string

 * @s: The string to be sized

 */

size_t strlen(const char *s)

{

    const char *sc;

 

    for (sc = s; *sc != '/0'; ++sc)

         /* nothing */;

    return sc - s;

}

EXPORT_SYMBOL(strlen);

#endif

 

#ifndef __HAVE_ARCH_STRNLEN

/**

 * strnlen - Find the length of a length-limited string

 * @s: The string to be sized

 * @count: The maximum number of bytes to search

 */

size_t strnlen(const char *s, size_t count)

{

    const char *sc;

 

    for (sc = s; count-- && *sc != '/0'; ++sc)

         /* nothing */;

    return sc - s;

}

EXPORT_SYMBOL(strnlen);

#endif

 

#ifndef __HAVE_ARCH_STRSPN

/**

 * strspn - Calculate the length of the initial substring of @s which only

 * contain letters in @accept

 * @s: The string to be searched

 * @accept: The string to search for

 */

size_t strspn(const char *s, const char *accept)

{

    const char *p;

    const char *a;

    size_t count = 0;

 

    for (p = s; *p != '/0'; ++p) {

         for (a = accept; *a != '/0'; ++a) {

             if (*p == *a)

                 break;

         }

         if (*a == '/0')

             return count;

         ++count;

    }

    return count;

}

 

EXPORT_SYMBOL(strspn);

#endif

 

#ifndef __HAVE_ARCH_STRCSPN

/**

 * strcspn - Calculate the length of the initial substring of @s which does

 * not contain letters in @reject

 * @s: The string to be searched

 * @reject: The string to avoid

 */

size_t strcspn(const char *s, const char *reject)

{

    const char *p;

    const char *r;

    size_t count = 0;

 

    for (p = s; *p != '/0'; ++p) {

         for (r = reject; *r != '/0'; ++r) {

             if (*p == *r)

                 return count;

         }

         ++count;

    }

    return count;

}

EXPORT_SYMBOL(strcspn);

#endif

 

#ifndef __HAVE_ARCH_STRPBRK

/**

 * strpbrk - Find the first occurrence of a set of characters

 * @cs: The string to be searched

 * @ct: The characters to search for

 */

char *strpbrk(const char *cs, const char *ct)

{

    const char *sc1, *sc2;

 

    for (sc1 = cs; *sc1 != '/0'; ++sc1) {

         for (sc2 = ct; *sc2 != '/0'; ++sc2) {

             if (*sc1 == *sc2)

                 return (char *)sc1;

         }

    }

    return NULL;

}

EXPORT_SYMBOL(strpbrk);

#endif

 

#ifndef __HAVE_ARCH_STRSEP

/**

 * strsep - Split a string into tokens

 * @s: The string to be searched

 * @ct: The characters to search for

 *

 * strsep() updates @s to point after the token, ready for the next call.

 *

 * It returns empty tokens, too, behaving exactly like the libc function

 * of that name. In fact, it was stolen from glibc2 and de-fancy-fied.

 * Same semantics, slimmer shape. ;)

 */

char *strsep(char **s, const char *ct)

{

    char *sbegin = *s;

    char *end;

 

    if (sbegin == NULL)

         return NULL;

 

    end = strpbrk(sbegin, ct);

    if (end)

         *end++ = '/0';

    *s = end;

    return sbegin;

}

EXPORT_SYMBOL(strsep);

#endif

 

#ifndef __HAVE_ARCH_MEMSET

/**

 * memset - Fill a region of memory with the given value

 * @s: Pointer to the start of the area.

 * @c: The byte to fill the area with

 * @count: The size of the area.

 *

 * Do not use memset() to access IO space, use memset_io() instead.

 */

void *memset(void *s, int c, size_t count)

{

    char *xs = s;

 

    while (count--)

         *xs++ = c;

    return s;

}

EXPORT_SYMBOL(memset);

#endif

 

#ifndef __HAVE_ARCH_MEMCPY

/**

 * memcpy - Copy one area of memory to another

 * @dest: Where to copy to

 * @src: Where to copy from

 * @count: The size of the area.

 *

 * You should not use this function to access IO space, use memcpy_toio()

 * or memcpy_fromio() instead.

 */

void *memcpy(void *dest, const void *src, size_t count)

{

    char *tmp = dest;

    const char *s = src;

 

    while (count--)

         *tmp++ = *s++;

    return dest;

}

EXPORT_SYMBOL(memcpy);

#endif

 

#ifndef __HAVE_ARCH_MEMMOVE

/**

 * memmove - Copy one area of memory to another

 * @dest: Where to copy to

 * @src: Where to copy from

 * @count: The size of the area.

 *

 * Unlike memcpy(), memmove() copes with overlapping areas.

 */

void *memmove(void *dest, const void *src, size_t count)

{

    char *tmp;

    const char *s;

 

    if (dest <= src) {

         tmp = dest;

         s = src;

         while (count--)

             *tmp++ = *s++;

    } else {

         tmp = dest;

         tmp += count;

         s = src;

         s += count;

         while (count--)

             *--tmp = *--s;

    }

    return dest;

}

EXPORT_SYMBOL(memmove);

#endif

 

#ifndef __HAVE_ARCH_MEMCMP

/**

 * memcmp - Compare two areas of memory

 * @cs: One area of memory

 * @ct: Another area of memory

 * @count: The size of the area.

 */

#undef memcmp

int memcmp(const void *cs, const void *ct, size_t count)

{

    const unsigned char *su1, *su2;

    int res = 0;

 

    for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)

         if ((res = *su1 - *su2) != 0)

             break;

    return res;

}

EXPORT_SYMBOL(memcmp);

#endif

 

#ifndef __HAVE_ARCH_MEMSCAN

/**

 * memscan - Find a character in an area of memory.

 * @addr: The memory area

 * @c: The byte to search for

 * @size: The size of the area.

 *

 * returns the address of the first occurrence of @c, or 1 byte past

 * the area if @c is not found

 */

void *memscan(void *addr, int c, size_t size)

{

    unsigned char *p = addr;

 

    while (size) {

         if (*p == c)

             return (void *)p;

         p++;

         size--;

    }

    return (void *)p;

}

EXPORT_SYMBOL(memscan);

#endif

 

#ifndef __HAVE_ARCH_STRSTR

/**

 * strstr - Find the first substring in a %NUL terminated string

 * @s1: The string to be searched

 * @s2: The string to search for

 */

char *strstr(const char *s1, const char *s2)

{

    int l1, l2;

 

    l2 = strlen(s2);

    if (!l2)

         return (char *)s1;

    l1 = strlen(s1);

    while (l1 >= l2) {

         l1--;

         if (!memcmp(s1, s2, l2))

             return (char *)s1;

         s1++;

    }

    return NULL;

}

EXPORT_SYMBOL(strstr);

#endif

 

#ifndef __HAVE_ARCH_MEMCHR

/**

 * memchr - Find a character in an area of memory.

 * @s: The memory area

 * @c: The byte to search for

 * @n: The size of the area.

 *

 * returns the address of the first occurrence of @c, or %NULL

 * if @c is not found

 */

void *memchr(const void *s, int c, size_t n)

{

    const unsigned char *p = s;

    while (n-- != 0) {

        if ((unsigned char)c == *p++) {

             return (void *)(p - 1);

         }

    }

    return NULL;

}

EXPORT_SYMBOL(memchr);

#endif

 

 

#include <linux/slab.h>

#include <linux/string.h>

#include <linux/module.h>

#include <linux/err.h>

#include <asm/uaccess.h>

 

/**

 * __kzalloc - allocate memory. The memory is set to zero.

 * @size: how many bytes of memory are required.

 * @flags: the type of memory to allocate.

 */

void *__kzalloc(size_t size, gfp_t flags)

{

    void *ret = ____kmalloc(size, flags);

    if (ret)

         memset(ret, 0, size);

    return ret;

}

EXPORT_SYMBOL(__kzalloc);

 

/*

 * kstrdup - allocate space for and copy an existing string

 *

 * @s: the string to duplicate

 * @gfp: the GFP mask used in the kmalloc() call when allocating memory

 */

char *kstrdup(const char *s, gfp_t gfp)

{

    size_t len;

    char *buf;

 

    if (!s)

         return NULL;

 

    len = strlen(s) + 1;

    buf = ____kmalloc(len, gfp);

    if (buf)

         memcpy(buf, s, len);

    return buf;

}

EXPORT_SYMBOL(kstrdup);

 

/*

 * strndup_user - duplicate an existing string from user space

 *

 * @s: The string to duplicate

 * @n: Maximum number of bytes to copy, including the trailing NUL.

 */

char *strndup_user(const char __user *s, long n)

{

    char *p;

    long length;

 

    length = strnlen_user(s, n);

 

    if (!length)

         return ERR_PTR(-EFAULT);

 

    if (length > n)

         return ERR_PTR(-EINVAL);

 

    p = kmalloc(length, GFP_KERNEL);

 

    if (!p)

         return ERR_PTR(-ENOMEM);

 

    if (copy_from_user(p, s, length)) {

         kfree(p);

         return ERR_PTR(-EFAULT);

    }

 

    p[length - 1] = '/0';

 

    return p;

}

EXPORT_SYMBOL(strndup_user);

 

 

3 后记

Windows上的实现与平台相关性稍大,不好理解,虽然也很好用。

 Linux下的string.c实现在/linuxxx.xx.xx/lib

String.h/linuxxx.xx.xx/include/linux

汇编实现在/linuxxx.xx.xx/include/asm

 

是不是觉得string.c写的也不是那么完美,也许你能写出更好的。但是在你看源代码之前,你能写出这样的效率和简洁的实现么?

为什么没有参数检查?也许这是为高手实现的,高手是不会出现参数错误,呵呵,这当然是玩笑。可能是出于效率考虑吧,参数传入前自己检查好了。

【来源】

 
Logo

开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!

更多推荐