Some string library functions in C language and their simulation implementation

Keywords: C string

String function (the following header files are "string.h")

1.strlen (find string length)

prototype

  • String counting does not need to modify the string, so it is decorated with conut;

Simulation Implementation

#include<stdio.h>
#include<Windows.h>
#include<assert.h>
#pragma warning(disable:4996)

//Method 1: counter method -- you need to set a variable separately
size_t myStrlen1(const char *arr)  //The string does not need to be modified, so it is decorated with conut
//size_t refers to unsigned int in vs2013
{
	assert(arr); //The assert function is used to determine whether the string is not empty

	int count = 0;
	while (*arr)
	{
		count++;
		arr++;
	}
	return count;
}

//Method 2: recursion -- do not create temporary variables
size_t myStrlen2(const char *arr)
{
	assert(arr);

	if (*arr == '\0')
	{
		return 0;
	}
	else
	{
		return 1 + myStrlen2(arr + 1);
	}
}

//Method 3: pointer method -- use tail pointer head pointer to obtain the length
size_t myStrlen3(const char *arr)
{
	assert(arr); 

	char *p = arr;
	while (*p != '\0')
	{
		p++;
	}
	return p - arr;
}

int main()
{
	const char *arr = "abcd1234";
	int len1 = myStrlen1(arr);
	printf("Method 1 string arr Length:%d\n", len1);
	int len2 = myStrlen2(arr);
	printf("Method 2 string arr Length:%d\n", len1);
	int len3 = myStrlen3(arr);
	printf("Method 3 string arr Length:%d\n", len1);

	system("pause");
	return 0;
}

2.strcpy (string copy)

prototype

  • Copy the following parameters to the previous parameters without modifying the latter, so the latter parameters are modified with const;
  • Note that the size of the following string parameters cannot be greater than the former, otherwise an error will be reported;
  • The return value of the function is char * type, which is to support the chain call of the class library function;

Simulation Implementation

#include<stdio.h>
#include<Windows.h>
#include<assert.h>
#pragma warning(disable:4996)

//Programming habit - variable name: ret - > result; SRC - > source (source data); Destination - > destination

char *myStrcpy(char *dest, const char *src)
{
	assert(src);  assert Function to determine whether the string is not empty
	assert(dest);

	char * ret = dest;
	while ((*dest++ = *src++));
	return ret;
}

int main()
{
	const char *src = "abcd";
	char dest[30] = { "12345" };
	printf("before:%s\n",dest);
	printf("after:%s\n", myStrcpy(dest, src));

	system("pause");
	return 0;
}

3.strcat (string splicing)

prototype

  • Splice the following parameters to the previous parameters without modifying the latter, so the latter parameters are modified with const;
  • In the former, the string space must be large enough to accommodate the spliced string;
  • Note that self splicing is not allowed (in this way, the original string will continue to increase during splicing);
  • The return value of the function is char * type, which is to support the chain call of the class library function;

Simulation Implementation

#include<stdio.h>
#include<Windows.h>
#include<assert.h>
#pragma warning(disable:4996)

char *myStrcpy(char *dest, const char *src)
{
	assert(src);
	assert(dest);

	char * ret = dest;
	while (*dest) //Point the pointer to the end of the dest string
	{
		dest++;
	}
	while ((*dest++ = *src++)); //Splice the latter string from the tail
	return ret;
}

int main()
{
	const char *src = "abcd";
	char dest[30] = { "12345" };
	printf("before:%s\n", dest);
	printf("after:%s\n", myStrcpy(dest, src));

	system("pause");
	return 0;
}

4.strcmp (string comparison)

prototype

  • Compare the size of the two strings without modifying the string, so the two parameters are modified with const;
  • String comparison is judged by the ASSIC code value of the first different character of two strings;
  • If STR1 > STR2, return 1; If str1=str2, return 0; If STR1 < STR2, - 1 is returned;

Simulation Implementation

#include<stdio.h>
#include<Windows.h>
#include<assert.h>
#pragma warning(disable:4996)

int myStrcmp(const char *str1, const char *str2)
{
	int ret = 0;
	assert(str1);
	assert(str2);

	//The two string pointers are subtracted. If they are the same, look at the next character until there are characters of different sizes or '\ 0'.
	while (!(ret = *(unsigned char *)str1 - *(unsigned char *)str2) && *str2)  
	{
		++str1, ++str2;
	}
	if (ret < 0)
	{
		ret = -1;
	}
	else if (ret>0)
	{
		ret = 1;
	}
	return (ret);
}

int main()
{
	const char *str1 = "abcd4";
	const char *str2 = "abcd5";

	int comp = myStrcmp(str1, str2);
	if (comp > 0)
	{
		printf("str1>str2\n");
	}
	else if (comp < 0)
	{
		printf("str1<str2\n");
	}
	else
	{
		printf("str1=str2\n");
	}

	system("pause");
	return 0;
}

5.strncpy (length limited string copy)

prototype

  • "Secure copy" function, that is, compared with strcpy function, the parameter increases the length parameter of the string that needs to be copied;
  • '\ 0' will not be brought after copying, and you need to add it yourself;

6.strncat (length limited string splicing)

prototype

  • Compared with strcat function, the parameter of "safe splicing" function increases the length parameter of the string that needs to be spliced;
  • '\ 0' will be added to the spliced string by default;

7.strncmp (length limited string comparison)

prototype

  • The "security comparison" function compares the first num characters of two strings;
  • The operation rules are the same as strcmp;

8. Str (string substring lookup)

prototype

  • This function can return the first character address of str2 function for the first time in str1;

application

  • Replace a string of a string
  • Through the while loop to find the location of a string in a string, that is, the number of times;
    (the following are two application examples of strtr)
//strstr example_1 replacement
//Replace a small string with a long string
int main()
{
	char str[] = "This is a simple string.";
	char *pch;
	pch = strstr(str, "simple");
	strncpy(pch, "sample", 6);
	puts(str);

	system("pause");
	return 0;
}

//strstr example_2 find
//Find the specific location of the small string from a long string
int main()
{
	const char *s1 = "abcd1234worldABCD-1world,nkworld.";
	const char *s2 = "world";

	const char *s = s1;
	int count = 0;
	char *sub_str = NULL;
	while (1)
	{
		sub_str = strstr(s, s2);
		if (sub_str == NULL)
		{
			break;
		}
		count++;
		//sub_str = strcpy(sub_str, s2);
		printf("The first%d individual%s Location of:%d\n", count, s2, sub_str - s1);
		s = sub_str + 1;
	}

	system("pause");
	return 0;
}

Simulation Implementation

char *myStrstr(const char*str1, const char *str2)
{
	assert(str1);
	assert(str2);

	char* str_p = (char*)str1;
	char* str_q = (char*)str2;

	char* judge_p = str_p; //External circulation aiming point

	if (*str2 == '\0')
	{
		return NULL;
	}
	while (*judge_p!='\0')
	{
		const char* move_p = judge_p ; //Internal loop str1 compare pointer
		char* judge_q = str_q; //Internal loop str2 compare pointer
		while (*move_p && *judge_q && *move_p == *judge_q)
		{
			move_p++;
			judge_q++;
		}
		if (*judge_q == '\0')
		{
			return judge_p;
		}
		judge_p++;
	}
	return NULL;
}

int main()
{
	const char *s1 = "abcd1234world5678";
	const char *s2 = "world";

	char *s = myStrstr(s1, s2);
	printf("before:%s\n", s1);
	printf("after:%s\n", s);

	system("pause");
	return 0;
}

9.strtok (string segmentation)

prototype

  • Split string function: traverse the string pointed to by parameter 1, and return the first pointer of the corresponding string after encountering the split character contained in parameter 2;
  • The second parameter can store multiple split characters, such as: ",." (all characters in double quotation marks, including spaces);
  • Only one split string can be returned at a time because there is only one return value;
  • That is, if you want to use the function for a string multiple times, you can only set parameter 1 to NULL for the next parameter transfer after the first use, otherwise the record will be cleared and the starting position of the string will be traversed again;
  • There is a static variable inside the strtok library function to save the original string address as a static global variable. When one is divided, it will automatically move to the address after the first separator;
  • The essence of split string is to convert the corresponding separator character of the original string to '\ 0';
  • [note] according to the previous regulation, the library function will modify parameter 1, so the string pointed to by parameter 1 cannot be defined in the character constant area;

application

  • Dividing a long string into multiple small strings by multiple references to the function;
  • Note: as can be seen from the above features 4 and 5, when calling the function repeatedly to split multiple substrings, the first parameter does not change;
//strtok example_1
//Splits a long string into smaller strings

int main()
{
	char s1[] = "You are so beautiful,but not belong to me.";
	char *s = strtok(s1, ", .");
	while (s != NULL)
	{
		printf("%s\n", s);
		s = strtok(NULL, ", .");
	}

	system("pause");
	return 0;
}

10.strerror (error report)

prototype

  • Return error information through error code;
  • Understanding of error code: 0 in Eg:return 0; is an "error code" returned by the main function. Generally, 0 represents success;
  • The existence of error codes is convenient for computers and error messages are convenient for programmers; computers are good at dealing with numbers but not strings, which is opposite to people;
  • The C language system global variable errno will automatically set the error code of the corresponding error message when an error occurs when using the C library function;

application

#include<stdio.h>
#include<Windows.h>
#include<string.h>
#include<errno.h>
#pragma warning(disable:4996)

//strerror example
//Understand the error information and error code by opening the error file
//The header file < errno. H > must be added when using the system global variable errno
int main()
{
	printf("before errno:%d\n", errno); //Output errno original content
	FILE * pf = NULL;
	pf = fopen("test.text", "r");
	if (pf == NULL)
	{
		printf("after errno:%d\n", errno); //Output the content automatically modified by errno after encountering an error
		printf("ERROR Opening file test.text:%s\n", strerror(errno));
	}

	system("pause");
	return 0;
}

Character classification function

Character classification function (return true if its parameters meet the following conditions) header file: < ctype. H >
iscntrl: any control character
isspace: white space characters: space ',' page feed '\ f', line feed '\ n', carriage return '\ r', tab '\ t' or vertical tab '\ v'
isdigit: decimal digits 0 ~ 9
isxdigit: hexadecimal digit, including all decimal digits, lowercase AF and uppercase AF
islower: lowercase letter a~z
isupper: capital letters A~Z
isalpha: letter AZ or AZ
isalnum: letters or numbers, az,AZ,0~9
ispunct: punctuation mark, any graphic character not belonging to numbers or letters (printable)
isgraph: any graphic character
isprint: any printable character, including graphic characters and white space characters

Memory operation function (the following header files are "string.h")

  • The basic unit of operation of mem series library functions is bytes
  • The parameter type of void * indicates that the operation can be performed on any type

1.memcpy (memory copy by byte)

prototype

  • Self copying is not allowed, which is the difference from memmove; (but the current editor supports it, which means that there is no difference between the two functions)

Analog implementation + special applications

#include<stdio.h>
#include<Windows.h>
#include<string.h>
#include<assert.h>
#pragma warning(disable:4996)
//Simulation Implementation of memcpy
//test2 and test3 are special applications
void* myMemcpy(void* dest, const void* src, size_t len)
{
	void* ret = dest;
	assert(dest);
	assert(src);

	while (len--)
	{
		*(char*)dest = *(char*)src;
		dest = (char*)dest + 1;
		src = (char*)src + 1;
	}
	return ret;
}

int main()
{
	const char* src = "abcd";
	char dest[30] = { "12345" };
	//test1. General test
	printf("before:%s\n", dest);
	printf("after:%s\n", (char*)myMemcpy(dest, src, strlen(src)));
	//test2. Forward self copy:
	myMemcpy(dest, dest + 1, (strlen(dest) - 1));
	printf("Forward self copy:%s\n", dest);
	//test3. Reverse self copying
	myMemcpy(dest + 1, dest, (strlen(dest) - 1));
	printf("Reverse self copy:%s\n", dest);

	system("pause");
	return 0;
}

  • The error of reverse copy reflects that it cannot copy itself
  • Specific reason: memory overlap occurs. This function copies the copied data to the next as new data while copying;

2.memmove (two overlapping memories can be copied by byte)

prototype

Analog implementation + special applications

#include<stdio.h>
#include<Windows.h>
#include<string.h>
#include<assert.h>
#pragma warning(disable:4996)
//Simulation Implementation of memmove
//test2 and test3 are special applications
void* myMemmove(void* dest, const void* src, size_t len)
{
	void* ret = dest;
	assert(dest);
	assert(src);
	char* p = (char*)dest;
	char* q = (char*)src;

	if (p > q && p < q + len)  //Reverse self assignment, special treatment
	{
		//Assignment from the tail can be from right to left
		p += (len-1);
		q += (len-1);
		while (len--)
		{
			*p = *q;
			p--;
			q--;
		}
	}
	else
	{
		//From left to right
		while (len--)
		{
			*p = *q;
			p++;
			q++;
		}
	}
	return ret;
}

int main()
{
	const char* src = "abcd";
	char dest[30] = { "12345" };
	//test1. General test
	printf("before:%s\n", dest);
	printf("after:%s\n", (char*)myMemmove(dest, src, strlen(src)));
	//test2. Forward self copy:
	myMemmove(dest, dest + 1, (strlen(dest) - 1));
	printf("Forward self copy:%s\n", dest);
	//test3. Reverse self copying
	myMemmove(dest + 1, dest, (strlen(dest) - 1));
	printf("Reverse self copy:%s\n", dest);

	system("pause");
	return 0;
}

  • There is no error in reverse copy, which is different from memcpy function;
  • For library functions, the method is more complex and perfect. Now there is no difference between the two functions;

3.memset (memory assignment by byte)

prototype

  • Function: initialize the address pointed by ptr byte by byte and num bytes to value;
  • size_t is the abbreviation of unsigned integer type;

application

#include<stdio.h>
#include<Windows.h>
#include<string.h>
#pragma warning(disable:4996)

//memset can make the initialization of an array not cyclic
int main()
{
	int a[5];
	memset(a, 0, sizeof(a)); //Note not sizeof(a) / sizeof(a[0])
	int b[5];
	memset(b, 1, sizeof(b)); //Initialization by byte, so the assignment of 1 will not meet the expectation
	for (int i = 0; i < 5; i++)
	{
		printf("%d ", a[i]);
	}
	printf("\n");
	for (int j = 0; j < 5; j++)
	{
		printf("%d ", b[j]);
	}
	printf("\n");

	system("pause");
	return 0;
}

4.memcmp (compare memory content size)

prototype

  • The comparison shall be processed in strict accordance with ASCII code, including digital type;
  • Any ASCII character can be compared, while strcmp can only compare characters;
  • If PTR1 > ptr2, return 1; If ptr1=ptr2, return 0; If PTR1 < ptr2, return - 1;

Posted by mikosiko on Mon, 01 Nov 2021 10:25:22 -0700