【C++】string类的底层-EW帮帮网

1.经典的string类问题

上一篇博客已经对string类进行了简单的介绍，大家只要能够正常使用即可。在面试中，面试官总喜欢让学生自己来模拟实现string类，最主要是实现string类的构造、拷贝构造、赋值运算符重载以及析构函数。大家看下以下string类的实现是否有问题？

// 为了和标准库区分，此处使用String
class String
{
public:
	/*String()
	:_str(new char[1])
	{*_str = '\0';}
	*/
	//String(const char* str = "\0") 错误示范
	//String(const char* str = nullptr) 错误示范
	String(const char* str = "")
	{
		// 构造String类对象时，如果传递nullptr指针，可以认为程序非
		if (nullptr == str)
		{
			assert(false);
			return;
		}
		_str = new char[strlen(str) + 1];
		strcpy(_str, str);
	}
	~String()
	{
		if (_str)
		{
			delete[] _str;
			_str = nullptr;
		}
	}
private:
	char* _str;
};
// 测试
void TestString()
{
	String s1("hello bit!!!");
	String s2(s1);
}

说明：上述String类没有显式定义其拷贝构造函数与赋值运算符重载，此时编译器会合成默认
的，当用s1构造s2时，编译器会调用默认的拷贝构造。最终导致的问题是，s1、s2共用同一块内存空间，在释放时同一块空间被释放多次而引起程序崩溃，这种拷贝方式，称为浅拷贝。

2 .浅拷贝

浅拷贝：也称位拷贝，编译器只是将对象中的值拷贝过来。如果对象中管理资源，最后就会导致
多个对象共享同一份资源，当一个对象销毁时就会将该资源释放掉，而此时另一些对象不知道该
资源已经被释放，以为还有效，所以当继续对资源进项操作时，就会发生发生了访问违规。

就像一个家庭中有两个孩子，但父母只买了一份玩具，两个孩子愿意一块玩，则万事大吉，万一
不想分享就你争我夺，玩具损坏。

可以采用深拷贝解决浅拷贝问题，即：每个对象都有一份独立的资源，不要和其他对象共享。父
母给每个孩子都买一份玩具，各自玩各自的就不会有问题了。

3.深拷贝

如果一个类中涉及到资源的管理，其拷贝构造函数、赋值运算符重载以及析构函数必须要显式给出。一般情况都是按照深拷贝方式提供。

3.1 传统版写法的String类

class String
{
public:
	String(const char* str = "")
	{
		// 构造String类对象时，如果传递nullptr指针，可以认为程序非
		if (nullptr == str)
		{
			assert(false);
			return;
		}
		_str = new char[strlen(str) + 1];
		strcpy(_str, str);
	}
	String(const String& s)
		: _str(new char[strlen(s._str) + 1])
	{
		strcpy(_str, s._str);
	}
	String& operator=(const String& s)
	{
		if (this != &s)
		{
			char* pStr = new char[strlen(s._str) + 1];
			strcpy(pStr, s._str);
			delete[] _str;
			_str = pStr;
		}
		return *this;
	}
	~String()
	{
		if (_str)
		{
			delete[] _str;
			_str = nullptr;
		}
	}
private:
	char* _str;
};

3.2 现代版写法的String类

class String
{
public:
	String(const char* str = "")
	{
		if (nullptr == str)
		{
			assert(false);
			return;
		}
		_str = new char[strlen(str) + 1];
		strcpy(_str, str);
	}
	String(const String& s)
		: _str(nullptr)
	{
		String strTmp(s._str);
		swap(_str, strTmp._str);
	}
	// 对比下和上面的赋值那个实现比较好？
	String& operator=(String s)
	{
		swap(_str, s._str);
		return *this;
	}
	/*
	String& operator=(const String& s)
	{
	if(this != &s)
	{
	String strTmp(s);
	swap(_str, strTmp._str);
	}
	return *this;
	}
	*/
	~String()
	{
		if (_str)
		{
			delete[] _str;
			_str = nullptr;
		}
	}
private:
	char* _str;
};

4.string类模拟实现代码参考

string.h

#define _CRT_SECURE_NO_WARNINGS 1
#pragma once
#include<iostream>
#include<cstring>
#include<assert.h>
using namespace std;

namespace LCL
{
	class string
	{
	public:
		friend 	ostream& operator<<(ostream& os, const string& s);
		friend istream& operator>>(istream& is, string& s);
		friend istream& getline(istream& is, string& s, char delim);

		using iterator = char*;
		using const_iterator = const char*;

		string(const char* str = "");//这里不能给空

		string(const string& s);
		string& operator=(const string& s);

		~string();

		char& operator[](size_t i)
		{
			assert(i < _size);

			return _str[i];
		}

		const char& operator[](size_t i) const
		{
			assert(i < _size);

			return _str[i];
		}


		size_t size() const
		{
			return _size;
		}

		const char* c_str() const
		{
			return _str;
		}

		iterator begin()
		{
			return _str;
		}

		iterator end()
		{
			return _str + _size;
		}

		const_iterator begin() const
		{
			return _str;
		}

		const_iterator end() const
		{
			return _str + _size;
		}

		void clear()
		{
			_str[0] = '\0';
			_size = 0;
		}

		void reserve(size_t n);//开空间
		void push_back(char ch);
		void append(const char* str);
		string& operator+=(char ch);
		string& operator+=(const char* str);

		void insert(size_t pos, char ch);
		void insert(size_t pos, const char* str);
		void erase(size_t pos, size_t len=npos);

		size_t find(char ch, size_t pos = 0);
		size_t find(const char* str, size_t pos = 0);

		string substr(size_t pos, size_t len = npos);
		
		void swap(string& s);



	private:
		char* _str;
		size_t _size;
		size_t _capacity;

	public:
		//特殊处理 只有整形才可以
	 /*  static const size_t npos = -1;*/

	   static const size_t npos;
	};

	bool operator==(const string& lhs, const string& rhs);
	bool operator!=(const string& lhs, const string& rhs);
	bool operator>(const string& lhs, const string& rhs);
	bool operator<(const string& lhs, const string& rhs);
	bool operator<=(const string& lhs, const string& rhs);
	bool operator>=(const string& lhs, const string& rhs);

	ostream& operator<<(ostream& os, const string& s);
	istream& operator>>(istream& is, string& s);
	istream& getline(istream& is, string& s, char delim = '\n');
}

string.c

#include"string.h"

namespace LCL
{	
	const size_t string::npos = -1;
	string::string(const char* str)
		:_size(strlen(str))
	{
		_capacity = _size;
		_str = new char[_size + 1];
		strcpy(_str, str);
	}

	//s2(s1)
	//传统写法
	//string::string(const string& s)
	//{
	//	_str = new char[s._capacity+1];
	//	strcpy(_str, s._str);
	//	_size = s._size;
	//	_capacity = s._capacity;
	//}

	string::string(const string& s)
	{
		string tmp(s._str);
		swap(tmp);
	}

	string& string::operator=(const string& s)
	{
		if (this != &s)
		{
			delete[] _str;
			_str = new char[s._capacity + 1];
			strcpy(_str, s._str);
			_size = s._size;
			_capacity = s._capacity;
		}

		return *this;//为了连续赋值必须返回自身的引用

	}

	string::~string()
	{
		delete[] _str;
		_str = nullptr;
		_size = _capacity = 0;
	}

	void string::reserve(size_t n)
	{
		if (n > _capacity)
		{
			char* newstr = new char[n+1];//开空间永远要多开一个给'\0'留位置
			//把原来的数据拷过来
			strcpy(newstr, _str);
			delete[] _str;
			_str = newstr;

			_capacity = n;
		}

	}

	void string::push_back(char ch)
	{
		if (_size == _capacity)
		{
			//扩容
			reserve(_capacity == 0 ? 4 : 2 * _capacity);
		}
		_str[_size] = ch;//插入
		_size++;
		_str[_size] = '\0';//防止溢出
	}
	void string::append(const char* str)
	{
		size_t len = strlen(str);
		if (_size + len > _capacity)
		{
			//扩容
			size_t newcapacity = 2 * _capacity;
			//扩2倍不够,需要多少阔多少
			if (newcapacity < len+_size)
			{
				newcapacity = len + _size;
			}
			reserve(newcapacity);
		}
		//插入
		strcpy(_str+_size, str);
		_size += len;
	}
	string& string::operator+=(char ch)
	{
		push_back(ch);
		return *this;
	}
	string& string::operator+=(const char* str)
	{
		append(str);
		return *this;
	}


	void string::insert(size_t pos, char ch)
	{
		assert(pos <= _size);
		if (_size == _capacity)
		{
			//扩容
			reserve(_capacity == 0 ? 4 : 2 * _capacity);
		}
		//挪动pos后面的元素
		int end = _size;
		while (end >= (int)pos)
		{
			_str[end + 1] = _str[end];
			end--;
		}
		_str[pos] = ch;//插入
		_size++;//更新_size
	}
	void string::insert(size_t pos, const char* str)
	{
		assert(pos <= _size);
		size_t len = strlen(str);
		if (_size + len > _capacity)
		{
			//扩容
			size_t newcapacity = 2 * _capacity;
			//扩2倍不够,需要多少阔多少
			if (newcapacity < len + _size)
			{
				newcapacity = len + _size;
			}
			reserve(newcapacity);
		}

		size_t end = _size + len;
		while (end > pos+len-1)
		{
			_str[end] = _str[end - len];
			end--;
		}
		for (size_t i = 0; i < len; i++)
		{
			_str[pos + i] = str[i];
		}
		_size += len;
	}
	void string::erase(size_t pos, size_t len)
	{
		assert(pos < _size);
		if (len >= _size - pos)
		{
			_str[pos] = '\0';
			_size = pos;
		}
		else
		{
			size_t end = pos + len;
			while (end<=_size)
			{
				_str[end - len] = _str[end]; 
				end++;
			}

			_size -= len;
		}
	}

	size_t string::find(char ch, size_t pos)
	{
		assert(pos < _size);

		for (size_t i = pos; i < _size; i++)
		{
			if (ch == _str[i])
				return i;
		}

		return npos;
	}


	size_t string::find(const char* str, size_t pos)
	{
		assert(pos<_size);
		const char* ptr = strstr(_str + pos, str);
		if (ptr == nullptr)
		{
			return npos;
		}
		else
		{
			return ptr - _str;
		}

	}


	string string::substr(size_t pos, size_t len)
	{
		assert(pos < _size);
		if (len > (_size - pos))
		{
			len = _size - pos;
		}
		string str;
		reserve(len);
		for (size_t i = 0; i < len; i++)
		{
			str += _str[pos + i];
		}

		return str;
	}

	bool operator==(const string& lhs, const string& rhs)
	{
		return strcmp(lhs.c_str(), rhs.c_str()) == 0;
	}

	bool operator!=(const string& lhs, const string& rhs)
	{
		return !(lhs == rhs);
	}
	bool operator>(const string& lhs, const string& rhs)
	{
		return !(lhs <= rhs);
	}
	bool operator<(const string& lhs, const string& rhs)
	{
		return strcmp(lhs.c_str(), rhs.c_str()) < 0;
	}
	bool operator<=(const string& lhs, const string& rhs)
	{
		return lhs < rhs || lhs == rhs;
	}
	bool operator>=(const string& lhs, const string& rhs)
	{
		return !(lhs < rhs);
	}

	ostream& operator<<(ostream& os, const string& s)
	{
		for(size_t i = 0; i < s.size(); i++)
		{
			os << s[i];
		}


		return os;
	}
	istream& operator>>(istream& is, string& s)
	{
		s.clear();
		int i = 0;
		char buffer[256];

		char ch;
		ch = is.get();
		while (ch != ' ' && ch != '\n')
		{
			buffer[i++] = ch;
			if (i == 255)
			{
				buffer[i] = '\0';
				s += buffer;
				i = 0;
			}
			ch = is.get();
		}

		if (i > 0)
		{
			buffer[i] = '\0';
			s += buffer;
		}


		return is;
	}


	istream& getline(istream& is, string& s, char delim)
	{
		s.clear();
		int i = 0;
		char buffer[256];

		char ch;
		ch = is.get();
		while (ch != delim)
		{
			buffer[i++] = ch;
			if (i == 255)
			{
				buffer[i] = '\0';
				s += buffer;
				i = 0;
			}
			ch = is.get();
		}

		if (i > 0)
		{
			buffer[i] = '\0';
			s += buffer;
		}


		return is;
	}

	void string::swap(string& s)
	{
		std::swap(_str, s._str);
		std::swap(_size, s._size );
		std::swap(_capacity, s._capacity);
	}
}

测试：test.c

#include"string.h"

void testString01()
{
	LCL::string s1("hello world");
	cout << s1.c_str() << endl;

	s1 += ' ';
	s1 += "hello bit";

	cout << s1.c_str() << endl;


	LCL::string s2("hello world");
	s2.insert(0, 'a');
	cout << s2.c_str() << endl;

	LCL::string s3("hello world");
	cout << s3.c_str() << endl;
	s3.insert(0, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
	cout << s3.c_str() << endl;

	LCL::string s4("hello world");
	s4.erase(5);
	cout << s4.c_str() << endl;
}

void testString02()
{
	LCL::string s5("hello world");
	cout << s5.find(' ') << endl;
	cout << s5.find("world") << endl;

	LCL::string s6 = "https://legacy.cplusplus.com/reference/string/string/?kw=string";
	size_t pos1 = s6.find(':');
	size_t pos2=s6.find('/', pos1 + 3);

	if (pos1 != string::npos && pos2 != string::npos)
	{
		LCL::string domain = s6.substr(pos1 + 3, pos2 - (pos1 + 3));
		cout << domain.c_str() << endl;
	}
}

void testString03()
{
	LCL::string s1("hello world");
	LCL::string s2(s1);
	cout << s2.c_str() << endl;	

	cout << (s1 == s2) << endl;
	cout << (s1 < s2) << endl;
	cout << ("hello world" == s1) << endl;
}

void testString04()
{
	//LCL::string s1="hhhhh";
	//cin >> s1;
	//cout << s1 << endl;

	LCL::string s2;
	getline(cin, s2,'#');
	cout << s2 << endl;
	

}

void testString05()
{
	LCL::string s1 = "llllll";
	LCL::string s2 = "ccccccc";
	
	s1.swap(s2);
	cout << s1 << endl;
	cout << s2 << endl;
}



int main()
{
	//LCL::string s1("1111");
	//cout << s1.c_str() << endl;

	//LCL::string s2;
	//cout << s2.c_str() << endl;


	//LCL::string s3("1234");
	//cout << s3.size() << endl;
	//s3[0] = 'x';
	//cout << s3.c_str() << endl;
	//for (size_t i = 0; i < s3.size(); i++)
	//{
	//	cout << s3[i] << " ";
	//}
	//cout << endl; 

	//LCL::string::iterator it1 = s3.begin();
	//while (it1!=s3.end())
	//{
	//	//*it1 = 'a';
	//	cout << *it1 << " ";
	//	it1++;
	//}
	//cout << endl;
	范围for
	支持迭代器就支持范围for
	//for (auto e : s3)
	//{
	//	cout << e << " ";
	//}
	//cout << endl;


	//const LCL::string s4("lllllll");
	//cout << s4.size() << endl;	
	//cout << s4[1] << endl;
	//LCL::string::const_iterator it2 = s4.begin();
	//while (it2!=s4.end())
	//{
	//	cout << *it2 << " ";
	//	it2++;
	//}
	//cout << endl;
	//for (auto a : s4)
	//{
	//	cout << a << " ";
	//}
	//cout << endl;
	/*LCL::string s2("2");
	cout << s2.c_str() << endl;*/

	//s2 += '1';
	//s2 += "33333";

	//cout << s2.c_str() << endl;

	//testString01();
	//testString02();
	//testString03();
	//testString04();
	testString05();
	return 0;
}

5.写时拷贝

写时拷贝就是一种拖延症，是在浅拷贝的基础之上增加了引用计数的方式来实现的。
引用计数：用来记录资源使用者的个数。在构造时，将资源的计数给成1，每增加一个对象使用该资源，就给计数增加1，当某个对象被销毁时，先给该计数减1，然后再检查是否需要释放资源，如果计数为1，说明该对象时资源的最后一个使用者，将该资源释放；否则就不能释放，因为还有其他对象在使用该资源。

#include <iostream>
#include <cstring>
#include <atomic>

class CowString {
public:
	// 默认构造函数
	CowString() : data(nullptr), length(0), ref_count(new std::atomic<int>(1)) {}

	// 构造函数，接受C风格字符串
	CowString(const char* str) {
		if (str) {
			length = std::strlen(str);
			data = new char[length + 1];
			std::strcpy(data, str);
			ref_count = new std::atomic<int>(1);
		}
		else {
			data = nullptr;
			length = 0;
			ref_count = new std::atomic<int>(1);
		}
	}

	// 拷贝构造函数
	CowString(const CowString& other) : data(other.data), length(other.length), ref_count(other.ref_count) {
		(*ref_count)++; // 增加引用计数
	}

	// 移动构造函数
	CowString(CowString&& other) noexcept : data(other.data), length(other.length), ref_count(other.ref_count) {
		other.data = nullptr;
		other.length = 0;
		other.ref_count = new std::atomic<int>(1); // 重置移动后的对象为独立状态
	}

	// 析构函数
	~CowString() {
		if (data && --(*ref_count) == 0) { // 当引用计数为零时释放内存
			delete[] data;
			delete ref_count;
		}
	}

	// 赋值运算符
	CowString& operator=(const CowString& other) {
		if (this != &other) { // 防止自我赋值
			CowString temp(other); // 使用拷贝构造函数创建临时对象
			std::swap(data, temp.data);
			std::swap(length, temp.length);
			std::swap(ref_count, temp.ref_count);
		}
		return *this;
	}

	// 移动赋值运算符
	CowString& operator=(CowString&& other) noexcept {
		if (this != &other) {
			delete[] data; // 释放当前数据（如果存在）
			data = other.data;
			length = other.length;
			ref_count = other.ref_count;

			// 重置移动后的对象为独立状态
			other.data = nullptr;
			other.length = 0;
			other.ref_count = new std::atomic<int>(1);
		}
		return *this;
	}

	// 获取C风格字符串
	const char* c_str() const {
		return data;
	}

	// 获取字符串长度
	size_t size() const {
		return length;
	}

	// ... 其他成员函数（如operator+，substr等）可以按需添加 ...

private:
	char* data; // 字符数据
	size_t length; // 字符串长度
	std::atomic<int>* ref_count; // 引用计数（使用原子操作以保证线程安全）
};

int main() {
	CowString str1("Hello, World!");
	CowString str2 = str1; // 使用拷贝构造函数

	std::cout << "str1: " << str1.c_str() << std::endl;
	std::cout << "str2: " << str2.c_str() << std::endl;

	// 在这里，str1和str2共享相同的数据

	// 修改str1会导致数据拷贝
	char* temp = new char[str1.size() + 1];
	std::strcpy(temp, str1.c_str());
	temp[5] = 'C++'; // 修改拷贝的数据以避免未定义行为（不能直接修改共享数据）

	// 注意：下面的操作是错误的，因为它直接修改了共享的数据，这在实际COW实现中是不允许的。
	// 为了演示，我们暂时这样做，但在真正的COW实现中，你应该先执行深拷贝再修改。
	// data[5] = 'C'; // 错误！这将直接修改str1和str2共享的数据。

	// 正确的做法是先深拷贝再修改（这里只是演示，所以跳过深拷贝步骤）
	// CowString str1_copy = str1; // 深拷贝
	// str1_copy.data[5] = 'C'; // 修改深拷贝后的数据

	// 由于我们直接修改了共享数据（上面的错误做法），下面的输出将是不确定的。
	// 在真正的COW实现中，你应该避免这种情况。
	std::cout << "After incorrect modification: " << std::endl;
	std::cout << "str1: " << str1.c_str() << std::endl; // 输出可能是不确定的
	std::cout << "str2: " << str2.c_str() << std::endl; // 输出可能是不确定的

	// 清理内存（在main函数结束时，所有CowString对象都会被销毁，并释放内存）
	delete[] temp;

	return 0;
}

// 注意：上面的代码包含了一个错误示例，即直接修改共享数据。
// 在真正的COW实现中，你应该在修改前检查引用计数，并在需要时进行深拷贝。
// 此外，上面的代码没有实现完整的COW字符串类，比如缺少对operator+等的支持。
// 这个示例仅用于教学目的，展示了COW的基本概念。

【C++】string类的底层

1.经典的string类问题

2 .浅拷贝

3.深拷贝

3.1 传统版写法的String类

3.2 现代版写法的String类

4.string类模拟实现代码参考

5.写时拷贝

网站公告

今日签到

热门文章

最新发布