C++ split string segmentation

There is no direct split function in C++. String segmentation can be achieved by the following methods:

1. With the help of strtok function

Function prototype: char * strtok (char *str, char * delim);

Function function: delim as delimiter to split string str

Parameter Description: str: String to be Separated; delim: Separator

Return value: Strings that are segmented from the beginning of str. Returns null when not split

Code 1: Divide char* type strings directly using strtok functions

#include <iostream>
using namespace std;

int main() {
	char s[] = "my name is lmm";
	char *p;
	const char *delim = " ";
	p = strtok(s, delim);
	while(p) {
		cout << p << endl;
		p = strtok(NULL, delim);
	}

	return 0;
}

Code 2: String type strings are segmented by strtok, and the results are saved in vector < string>.

Idea: First, the whole string is converted to char * type, and then the sub-string of char * type is segmented. Then, the sub-string is converted to string type and stored in the result array.

#include <iostream>
#include <vector>
using namespace std;

vector<string> split(const string& str, const string& delim) {
	vector<string> res;
	if("" == str) return res;
	//The string to be cut is first converted from string to char*
	char * strs = new char[str.length() + 1] ; //Don't forget
	strcpy(strs, str.c_str()); 

	char * d = new char[delim.length() + 1];
	strcpy(d, delim.c_str());

	char *p = strtok(strs, d);
	while(p) {
		string s = p; //The split string is converted to string type
		res.push_back(s); //Save in the result array
		p = strtok(NULL, d);
	}

	return res;
}

void test1() { //Empty string
	cout << "******test1****** "<<endl;
	string s = "";
	
	std::vector<string> res = split(s, " ");
	for (int i = 0; i < res.size(); ++i)
	{
		cout << res[i] <<endl;
	}
}

void test2() { //Only one string
	cout << "******test2****** " <<endl;
	string s = "my";
	
	std::vector<string> res = split(s, " ");
	for (int i = 0; i < res.size(); ++i)
	{
		cout << res[i] <<endl;
	}
}

void test3() { //Normal string
	cout << "******test3****** "<<endl;
	string s = "my name is lmm   ";//Continuous multiple spaces, which are filtered out
	
	std::vector<string> res = split(s, " ");
	for (int i = 0; i < res.size(); ++i)
	{
		cout << res[i] <<endl;
	}
}


int main() {

	test1();
	test2();
	test3();
	return 0;
}


Note: In test 3, multiple spaces appear in succession, and the spaces are filtered out.

2. find and substr functions of string class

1) find function

Function prototype: size_t find (const string & str, size_t POS = 0) const;

Function description: Find the first occurrence of substring str from pos position

Parametric description: str is the substring to be found, pos is the initial location to be found

Return value: If found, return the location of the first occurrence of the substring, otherwise return string::npos

2) substr function

Function prototype: string substr(size_t pos = 0, size_t n = npos) const;

Function description: Gets a substring from the specified starting position to the end position

Parameter description: pos is the starting position and n is the ending position

Return value: substring

#include <iostream>
#include <string>
#include <cstring>
#include <vector>
using namespace std;

string reverse_one_word(string str) {
	for(int i = 0; i < str.length()/2; i ++) {
		char tmp;
		tmp = str[i];
		str[i] = str[ str.length() - i - 1 ];
		str[ str.length() - i - 1 ] = tmp;
	}
	return str;
}

vector<string>  split(const string& str,const string& delim) { //Store the segmented substrings in vector s
	vector<string> res;
	if("" == str) return  res;
	
	string strs = str + delim; //Extending strings to facilitate retrieval of the last separated string
	size_t pos;
	size_t size = strs.size();

	for (int i = 0; i < size; ++i) {
		pos = strs.find(delim, i); //pos is where the separator first appears, and the string before i to pos is the separated string
		if( pos < size) { //If it finds, if no separator is found, pos is string::npos
			string s = strs.substr(i, pos - i);
			res.push_back(s);
			i = pos + delim.size() - 1;
		}
		
	}
	return res;	
}

void test1() { //Empty string
	cout << "******test1****** "<<endl;
	string s = "";
	
	std::vector<string> res = split(s, " ");
	for (int i = 0; i < res.size(); ++i)
	{
		cout << res[i] <<endl;
	}
}

void test2() { //Only one string
	cout << "******test2****** " <<endl;
	string s = "my";
	
	std::vector<string> res = split(s, " ");
	for (int i = 0; i < res.size(); ++i)
	{
		cout << res[i] <<endl;
	}
}

void test3() { //Normal string
	cout << "******test3****** "<<endl;
	string s = "my name is  lmm   ";
	
	std::vector<string> res = split(s, " ");
	for (int i = 0; i < res.size(); ++i)
	{
		cout << res[i] <<endl;
	}
}


int main() {

	test1();
	test2();
	test3();
	return 0;
}

Note: Many spaces in test3 are not filtered out, that is to say, the empty substrings between two space separators are also stored in the result array. To avoid this problem, when separating the substrings s s, it can be judged that if they are empty (two separators are adjacent, and the substrings in the middle are empty), they can be removed without adding an array of characters.


Posted by supratwinturbo on Sun, 02 Jun 2019 15:03:53 -0700