机器学习课堂5二分类与逻辑回归

发布于:2025-03-31 ⋅ 阅读:(28) ⋅ 点赞:(0)

1.2-8线性回归进行二分类

代码

#  2-8线性回归进行二分类
import numpy as np
import matplotlib.pyplot as plt

#  参数设置
iterations = 20  # 迭代次数
learning_rate = 0.1  # 学习率
dataset = 1  # 选择训练数据集
threshold = 0.5  # 判决门限
# 训练数据集
if dataset == 1:  # 数据集1
    x_train = np.array([50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]).reshape(
        (1, -1))
    y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]).reshape((1, -1))
elif dataset == 2:  # 数据集2
    x_train = np.array(
        [0, 5, 10, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]).reshape((-1, 1))
    y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]).reshape((1, -1))
m_train = x_train.size  # 训练样本数量
# 标准化输入特征
mean = np.mean(x_train)
std = np.std(x_train, ddof=1)
x_train = (x_train - mean) / std
# 初始化
w, b = 0, 0
v = np.ones((1, m_train)).reshape((1, -1))
costs_saved = []
# 梯度下降法
for i in range(iterations):
    e = w * x_train + b * v - y_train
    b = b - 2. * learning_rate * np.dot(v, e.T) / m_train
    w = w - 2. * learning_rate * np.dot(x_train, e.T) / m_train
    #  保存代价函数值
    costs = np.dot(e, e.T) / m_train
    costs_saved.append(costs.item(0))
#  打印最新权重与偏差
print(f'Weight={w.item(0):.3f}')
print(f'Bias={b.item(0):.3f}')
# 画代价函数值
plt.plot(range(1, np.size(costs_saved) + 1), costs_saved, 'r-o', linewidth=2, markersize=5)
plt.ylabel('costs')
plt.xlabel('itertions')
plt.title('learning rate=' + str(learning_rate))
plt.show()
y_train_hat = (w * x_train + b) >= threshold
print('Trainset class:', np.array2string(np.squeeze(y_train, axis=0)))
print('Predicted class:', np.array2string(np.squeeze(y_train_hat.astype(int), axis=0)))
# 画拟合直线
plt.figure()
plt.plot(x_train[0] * std + mean, y_train[0], 'xc', markersize=12, markeredgewidth=2,
         label='training examples')
plt.plot(x_train[1:] * std + mean, y_train[1:], 'xc', markersize=12, markeredgewidth=2)  # 画训练样本
plot_x = np.arange(np.amin(x_train), np.amax(x_train) + 0.01, 0.01).reshape((-1, 1))  # 生成用于画图的x坐标
plot_y = w * plot_x + b  # 拟合直线y坐标)
plot_x = plot_x * std + mean  # 特征缩放的逆过程
plt.plot(plot_x, plot_y >= threshold, '--y', linewidth=2, label='predicted grade')  # 画判决后的类别值
plt.plot(plot_x, plot_y, 'r', linewidth=2, label='Fittted straight line')  # 画拟合直线
plt.xlabel('Points')
plt.ylabel('Grade')
plt.legend()
plt.show()
结果图

2.2-9逻辑回归分类预测

代码

#  2-9逻辑回归分类预测
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#  参数设置
iterations = 1000  # 迭代次数
learning_rate = 0.1  # 学习率
m_train = 250  # 训练样本数量

# 读入酒驾检测数据集
df = pd.read_csv('alcohol_dataset.csv')
data=np.array(df)
m_all = np.shape(data)[0]  # 样本总数
d =np.shape(data)[1] - 1  # 输入特征的维数
m_test = m_all - m_train  # 测试数据集样本数量
# 构造随机种子为指定值的随机数生成器,并对数据集中样本随机排序
rng=np.random.default_rng(1)
rng.shuffle(data)
# 对输入特征标准化
mean=np.mean(data[0:m_train, 0:d], axis=0)  # 计算训练样本输入特征的均值
std=np.std(data[0:m_train, 0:d], axis=0, ddof=1)  # 计算训练样本输入特诊的标准差
data[:, 0:d]=(data[:, 0:d]-mean)/std  # 标准化所有样本的输入特征
# 划分数据集
X_train = data[0:m_train, 0:d].T  # 训练集输入特征
X_test = data[m_train:, 0:d].T  # 测试集输入特征
Y_train = data[0:m_train, d].reshape((1, -1))  # 训练集目标值
Y_test = data[m_train:, d].reshape((1, -1))  # 测试集目标值
# 初始化
w = np.zeros((d, 1)).reshape((-1, 1))  # 权重
b = 0  # 偏差(标量)
v = np.ones((1, m_train))  # 1向量
costs_saved = []  # 用于保存代价函数的值
# 训练过程,迭代循环
for i in range(iterations):  # 更新权重与偏差
    z=np.dot(w.T, X_train)+ b * v  # 线性回归部分
    Y_hat = 1./(1+np.exp(-z))
    e = Y_hat - Y_train  # 计算误差
    Y_1_Y_hat=Y_hat * (1-Y_hat)
    b = b - 2. * learning_rate * np.dot(Y_1_Y_hat, e.T) / m_train  # 更新偏差
    w = w - 2. * learning_rate * np.dot(X_train, (Y_1_Y_hat*e).T) / m_train  # 更新权重
    # 保存代价函数的值
    costs = np.dot(e, e.T) / m_train
    costs_saved.append(costs.item(0))
#  打印最新权重与偏差
print('Weights=', np.array2string(np.squeeze(w, axis=1), precision=3))
print(f'Bias={b.item(0):.3f}')
# 画代价函数值
plt.plot(range(1, np.size(costs_saved) + 1), costs_saved, 'r-o', linewidth=2, markersize=5)
plt.ylabel('costs')
plt.xlabel('itertions')
plt.title('learning rate=' + str(learning_rate))
plt.show()
# 训练数据集上的分类错误
Y_train_hat = (np.dot(w.T, X_train) + b * v) >= 0
errors_train=np.sum(np.abs(Y_train_hat-Y_train))
print('Trainset Predicted errors=', errors_train.astype(int))
# 测试数据集上的分类错误
Y_test_hat=(np.dot(w.T,X_test)+b)>=0
errors_test=np.sum(np.abs(Y_test_hat-Y_test))
print('Testset Predicted errors=', errors_test.astype(int))

结果图