1.2-8线性回归进行二分类
代码
# 2-8线性回归进行二分类 import numpy as np import matplotlib.pyplot as plt # 参数设置 iterations = 20 # 迭代次数 learning_rate = 0.1 # 学习率 dataset = 1 # 选择训练数据集 threshold = 0.5 # 判决门限 # 训练数据集 if dataset == 1: # 数据集1 x_train = np.array([50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]).reshape( (1, -1)) y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]).reshape((1, -1)) elif dataset == 2: # 数据集2 x_train = np.array( [0, 5, 10, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]).reshape((-1, 1)) y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]).reshape((1, -1)) m_train = x_train.size # 训练样本数量 # 标准化输入特征 mean = np.mean(x_train) std = np.std(x_train, ddof=1) x_train = (x_train - mean) / std # 初始化 w, b = 0, 0 v = np.ones((1, m_train)).reshape((1, -1)) costs_saved = [] # 梯度下降法 for i in range(iterations): e = w * x_train + b * v - y_train b = b - 2. * learning_rate * np.dot(v, e.T) / m_train w = w - 2. * learning_rate * np.dot(x_train, e.T) / m_train # 保存代价函数值 costs = np.dot(e, e.T) / m_train costs_saved.append(costs.item(0)) # 打印最新权重与偏差 print(f'Weight={w.item(0):.3f}') print(f'Bias={b.item(0):.3f}') # 画代价函数值 plt.plot(range(1, np.size(costs_saved) + 1), costs_saved, 'r-o', linewidth=2, markersize=5) plt.ylabel('costs') plt.xlabel('itertions') plt.title('learning rate=' + str(learning_rate)) plt.show() y_train_hat = (w * x_train + b) >= threshold print('Trainset class:', np.array2string(np.squeeze(y_train, axis=0))) print('Predicted class:', np.array2string(np.squeeze(y_train_hat.astype(int), axis=0))) # 画拟合直线 plt.figure() plt.plot(x_train[0] * std + mean, y_train[0], 'xc', markersize=12, markeredgewidth=2, label='training examples') plt.plot(x_train[1:] * std + mean, y_train[1:], 'xc', markersize=12, markeredgewidth=2) # 画训练样本 plot_x = np.arange(np.amin(x_train), np.amax(x_train) + 0.01, 0.01).reshape((-1, 1)) # 生成用于画图的x坐标 plot_y = w * plot_x + b # 拟合直线y坐标) plot_x = plot_x * std + mean # 特征缩放的逆过程 plt.plot(plot_x, plot_y >= threshold, '--y', linewidth=2, label='predicted grade') # 画判决后的类别值 plt.plot(plot_x, plot_y, 'r', linewidth=2, label='Fittted straight line') # 画拟合直线 plt.xlabel('Points') plt.ylabel('Grade') plt.legend() plt.show() 结果图![]()
2.2-9逻辑回归分类预测
代码
# 2-9逻辑回归分类预测 import pandas as pd import numpy as np import matplotlib.pyplot as plt # 参数设置 iterations = 1000 # 迭代次数 learning_rate = 0.1 # 学习率 m_train = 250 # 训练样本数量 # 读入酒驾检测数据集 df = pd.read_csv('alcohol_dataset.csv') data=np.array(df) m_all = np.shape(data)[0] # 样本总数 d =np.shape(data)[1] - 1 # 输入特征的维数 m_test = m_all - m_train # 测试数据集样本数量 # 构造随机种子为指定值的随机数生成器,并对数据集中样本随机排序 rng=np.random.default_rng(1) rng.shuffle(data) # 对输入特征标准化 mean=np.mean(data[0:m_train, 0:d], axis=0) # 计算训练样本输入特征的均值 std=np.std(data[0:m_train, 0:d], axis=0, ddof=1) # 计算训练样本输入特诊的标准差 data[:, 0:d]=(data[:, 0:d]-mean)/std # 标准化所有样本的输入特征 # 划分数据集 X_train = data[0:m_train, 0:d].T # 训练集输入特征 X_test = data[m_train:, 0:d].T # 测试集输入特征 Y_train = data[0:m_train, d].reshape((1, -1)) # 训练集目标值 Y_test = data[m_train:, d].reshape((1, -1)) # 测试集目标值 # 初始化 w = np.zeros((d, 1)).reshape((-1, 1)) # 权重 b = 0 # 偏差(标量) v = np.ones((1, m_train)) # 1向量 costs_saved = [] # 用于保存代价函数的值 # 训练过程,迭代循环 for i in range(iterations): # 更新权重与偏差 z=np.dot(w.T, X_train)+ b * v # 线性回归部分 Y_hat = 1./(1+np.exp(-z)) e = Y_hat - Y_train # 计算误差 Y_1_Y_hat=Y_hat * (1-Y_hat) b = b - 2. * learning_rate * np.dot(Y_1_Y_hat, e.T) / m_train # 更新偏差 w = w - 2. * learning_rate * np.dot(X_train, (Y_1_Y_hat*e).T) / m_train # 更新权重 # 保存代价函数的值 costs = np.dot(e, e.T) / m_train costs_saved.append(costs.item(0)) # 打印最新权重与偏差 print('Weights=', np.array2string(np.squeeze(w, axis=1), precision=3)) print(f'Bias={b.item(0):.3f}') # 画代价函数值 plt.plot(range(1, np.size(costs_saved) + 1), costs_saved, 'r-o', linewidth=2, markersize=5) plt.ylabel('costs') plt.xlabel('itertions') plt.title('learning rate=' + str(learning_rate)) plt.show() # 训练数据集上的分类错误 Y_train_hat = (np.dot(w.T, X_train) + b * v) >= 0 errors_train=np.sum(np.abs(Y_train_hat-Y_train)) print('Trainset Predicted errors=', errors_train.astype(int)) # 测试数据集上的分类错误 Y_test_hat=(np.dot(w.T,X_test)+b)>=0 errors_test=np.sum(np.abs(Y_test_hat-Y_test)) print('Testset Predicted errors=', errors_test.astype(int))
结果图