机器学习大作业答案

发布于:2025-06-26 ⋅ 阅读:(15) ⋅ 点赞:(0)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from dataset.mnist import load_mnist

# sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_grad(x):
    s = sigmoid(x)
    return s * (1 - s)

# relu
def relu(x):
    return np.maximum(0, x)

def relu_grad(x):
    grad = np.zeros(x)
    grad[x >= 0] = 1
    return grad

# softmax
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) 
    return np.exp(x) / np.sum(np.exp(x))

def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

def accuracy(y, t):
    pred = np.argmax(y, axis=1)
    true = np.argmax(t, axis=1)
    return np.mean(pred == true)

class BatchNorm:
    def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
        self.gamma = gamma
        self.beta = beta
        self.momentum = momentum
        self.input_shape = None 


        self.running_mean = running_mean
        self.running_var = running_var  
        
        self.batch_size = None
        self.xc = None
        self.std = None
        self.dgamma = None
        self.dbeta = None

    def forward(self, x, train_flg=True):
        self.input_shape = x.shape
        if x.ndim != 2:
            N, C, H, W = x.shape
            x = x.reshape(N, -1)

        out = self.__forward(x, train_flg)
        
        return out.reshape(*self.input_shape)
            
    def __forward(self, x, train_flg):
        if self.running_mean is None:
            N, D = x.shape
            self.running_mean = np.zeros(D)
            self.running_var = np.zeros(D)
                        
        if train_flg:
            mu = x.mean(axis=0)
            xc = x - mu
            var = np.mean(xc**2, axis=0)
            std = np.sqrt(var + 10e-7)
            xn = xc / std
            
            self.batch_size = x.shape[0]
            self.xc = xc
            self.xn = xn
            self.std = std
            self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
            self.running_var = self.momentum * self.running_var + (1-self.momentum) * var            
        else:
            xc = x - self.running_mean
            xn = xc / ((np.sqrt(self.running_var + 10e-7)))
            
        out = self.gamma * xn + self.beta 
        return out

    def backward(self, dout):
        if dout.ndim != 2:
            N, C, H, W = dout.shape
            dout = dout.reshape(N, -1)

        dx = self.__backward(dout)

        dx = dx.reshape(*self.input_shape)
        return dx

    def __backward(self, dout):
        dbeta = dout.sum(axis=0)
        dgamma = np.sum(self.xn * dout, axis=0)
        dxn = self.gamma * dout
        dxc = dxn / self.std
        dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
        dvar = 0.5 * dstd / self.std
        dxc += (2.0 / self.batch_size) * self.xc * dvar
        dmu = np.sum(dxc, axis=0)
        dx = dxc - dmu / self.batch_size
        
        self.dgamma = dgamma
        self.dbeta = dbeta
        
        return dx

# Dropout层
class Dropout:
    def __init__(self, dropout_ratio=0.08):
        self.dropout_ratio = dropout_ratio
        self.mask = None
    def forward(self, x, train_flg=True):
        if train_flg:
            self.mask = np.random.rand(*x.shape) > self.dropout_ratio
            return x * self.mask
        else:
            return x * (1.0 - self.dropout_ratio)
    def backward(self, dout):
        return dout * self.mask

#  五层网络实现
class FiveLayerNet:
    def __init__(self, input_size=784, hidden_sizes=[100, 100, 50, 50], output_size=10,
                 weight_init_std='he', use_batchnorm=False, use_dropout=False, dropout_ratio=0.08,
                 weight_decay_lambda=0.0):
        self.use_batchnorm = use_batchnorm
        self.use_dropout = use_dropout
        self.weight_decay_lambda = weight_decay_lambda

        # 初始化权重和偏置
        self.params = {}
        all_sizes = [input_size] + hidden_sizes + [output_size]
        
        # 改进权重初始化方法
        for i in range(len(all_sizes)-1):
            if weight_init_std == 'he':
                # ReLU激活函数
                scale = np.sqrt(2.0 / all_sizes[i])
                self.params['W'+str(i+1)] = scale * np.random.randn(all_sizes[i], all_sizes[i+1])
            elif weight_init_std == 'xavier':
                # sigmoid等激活函数
                scale = np.sqrt(1.0 / all_sizes[i])
                self.params['W'+str(i+1)] = scale * np.random.randn(all_sizes[i], all_sizes[i+1])
            else:
                self.params['W'+str(i+1)] = weight_init_std * np.random.randn(all_sizes[i], all_sizes[i+1])
                
            self.params['b'+str(i+1)] = np.zeros(all_sizes[i+1])

        # 初始化BatchNorm层
        if self.use_batchnorm:
            self.bn_layers = {}
            for i in range(len(hidden_sizes)):
                gamma = np.ones(hidden_sizes[i])
                beta = np.zeros(hidden_sizes[i])
                self.bn_layers['bn' + str(i+1)] = BatchNorm(gamma, beta)

        # 初始化Dropout层
        if self.use_dropout:
            self.dropout_layers = {}
            for i in range(len(hidden_sizes)):
                self.dropout_layers['dropout' + str(i+1)] = Dropout(dropout_ratio)

    def forward(self, x, train_flg=True):
        self.x = x
        self.layers = {}
        self.activations = {}

        a = x
        L = len(self.params) // 2  

        for i in range(1, L):
            W = self.params['W'+str(i)]
            b = self.params['b'+str(i)]

            a = np.dot(a, W) + b
            if self.use_batchnorm:
                a = self.bn_layers['bn'+str(i)].forward(a, train_flg)
            a = relu(a)
            if self.use_dropout:
                a = self.dropout_layers['dropout'+str(i)].forward(a, train_flg)
            self.activations['layer' + str(i)] = a

        # 最后一层输出
        W = self.params['W'+str(L)]
        b = self.params['b'+str(L)]
        score = np.dot(a, W) + b
        y = softmax(score)

        self.score = score
        self.y = y
        return y

    def loss(self, x, t):
        y = self.forward(x, train_flg=True)
        weight_decay = 0
        for i in range(len(self.params)//2):
            W = self.params['W'+str(i+1)]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
        return cross_entropy_error(y, t) + weight_decay

    def accuracy(self, x, t):
        y = self.forward(x, train_flg=False)
        return accuracy(y, t)

    def gradient(self, x, t):
        # 前向传播
        self.loss(x, t)  
        batch_num = x.shape[0]
        grads = {}

        # 初始化梯度
        dout = self.y.copy()
        dout[np.arange(batch_num), np.argmax(t, axis=1)] -= 1
        dout = dout / batch_num

        L = len(self.params)//2

        # 反向传播最后一层
        grads['W'+str(L)] = np.dot(self.activations['layer'+str(L-1)].T, dout)
        grads['b'+str(L)] = np.sum(dout, axis=0)
        
        if self.weight_decay_lambda > 0:
            grads['W'+str(L)] += self.weight_decay_lambda * self.params['W'+str(L)]

        dout = np.dot(dout, self.params['W'+str(L)].T)

        # 反向传播隐藏层
        for i in reversed(range(1, L)):
            # dropout反向
            if self.use_dropout:
                dout = self.dropout_layers['dropout'+str(i)].backward(dout)

            dout = dout * (self.activations['layer'+str(i)] > 0)

            if self.use_batchnorm and i > 1:
                dout = self.bn_layers['bn'+str(i)].backward(dout)

            if i == 1:
                a_prev = self.x
            else:
                a_prev = self.activations['layer'+str(i-1)]

            grads['W'+str(i)] = np.dot(a_prev.T, dout)
            grads['b'+str(i)] = np.sum(dout, axis=0)
            
            if self.weight_decay_lambda > 0:
                grads['W'+str(i)] += self.weight_decay_lambda * self.params['W'+str(i)]

            dout = np.dot(dout, self.params['W'+str(i)].T)

        return grads

# 优化器
class SGD:
    """随机梯度下降法(Stochastic Gradient Descent)"""

    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

# 读入数据
(x_train_full, t_train_full), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)

# 取一部分数据进行训练和验证
train_size = 500  
test_size = 125   

x_train = x_train_full[:train_size]
t_train = t_train_full[:train_size]
x_test = x_test[:test_size]
t_test = t_test[:test_size]

print(f"训练集: {x_train.shape[0]},测试集: {x_test.shape[0]}")

# 超参数
iters_num = 400  
batch_size = 25
learning_rate = 0.01
train_size = x_train.shape[0]
iter_per_epoch = max(train_size // batch_size, 1)
epochs = iters_num // iter_per_epoch

训练集: 500,测试集: 125

# 实验1: 批量标准化对比
# 不使用批量标准化的网络
network1 = FiveLayerNet(
    input_size=784,
    hidden_sizes=[100, 100, 50, 50],
    output_size=10,
    weight_init_std='he',
    use_batchnorm=False
)
# 使用批量标准化的网络
network2 = FiveLayerNet(
    input_size=784,
    hidden_sizes=[100, 100, 50, 50],
    output_size=10,
    weight_init_std='he',
    use_batchnorm=True
)

optimizer1 = SGD(lr=learning_rate)
optimizer2 = SGD(lr=learning_rate)

# 训练记录
train_loss1, train_acc1, test_acc1 = [], [], []
train_loss2, train_acc2, test_acc2 = [], [], []

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 无批量标准化
    grad1 = network1.gradient(x_batch, t_batch)
    optimizer1.update(network1.params, grad1)
    loss1 = network1.loss(x_batch, t_batch)
    train_loss1.append(loss1)
    
    # 有批量标准化
    grad2 = network2.gradient(x_batch, t_batch)
    optimizer2.update(network2.params, grad2)
    loss2 = network2.loss(x_batch, t_batch)
    train_loss2.append(loss2)
   
    if i % iter_per_epoch == 0:
        train_acc1.append(network1.accuracy(x_train, t_train))
        test_acc1.append(network1.accuracy(x_test, t_test))
        train_acc2.append(network2.accuracy(x_train, t_train))
        test_acc2.append(network2.accuracy(x_test, t_test))
       
        print(f"Epoch {i//iter_per_epoch + 1}/{epochs}")
        print(f"Without BatchNorm - Loss: {loss1:.4f}, Train Acc: {train_acc1[-1]:.4f}, Test Acc: {test_acc1[-1]:.4f}")
        print(f"With BatchNorm - Loss: {loss2:.4f}, Train Acc: {train_acc2[-1]:.4f}, Test Acc: {test_acc2[-1]:.4f}")

 Epoch 1/20
Without BatchNorm - Loss: 2.3055, Train Acc: 0.0940, Test Acc: 0.0800
With BatchNorm - Loss: 2.5311, Train Acc: 0.0440, Test Acc: 0.0240
Epoch 2/20
Without BatchNorm - Loss: 2.2174, Train Acc: 0.2040, Test Acc: 0.2160
With BatchNorm - Loss: 1.9168, Train Acc: 0.2520, Test Acc: 0.1200
Epoch 3/20
Without BatchNorm - Loss: 1.7899, Train Acc: 0.4020, Test Acc: 0.3440
With BatchNorm - Loss: 1.6802, Train Acc: 0.4760, Test Acc: 0.2880
Epoch 4/20
Without BatchNorm - Loss: 1.7844, Train Acc: 0.5360, Test Acc: 0.4320
With BatchNorm - Loss: 1.4192, Train Acc: 0.5800, Test Acc: 0.3600
Epoch 5/20
Without BatchNorm - Loss: 1.2317, Train Acc: 0.6180, Test Acc: 0.5120
With BatchNorm - Loss: 1.1777, Train Acc: 0.6280, Test Acc: 0.4080
Epoch 6/20
Without BatchNorm - Loss: 1.3574, Train Acc: 0.6980, Test Acc: 0.6160
With BatchNorm - Loss: 1.1342, Train Acc: 0.7180, Test Acc: 0.4640
Epoch 7/20
Without BatchNorm - Loss: 1.1397, Train Acc: 0.7360, Test Acc: 0.6560
With BatchNorm - Loss: 1.0016, Train Acc: 0.7480, Test Acc: 0.5360
Epoch 8/20
Without BatchNorm - Loss: 0.9572, Train Acc: 0.7840, Test Acc: 0.6960
With BatchNorm - Loss: 0.9753, Train Acc: 0.7940, Test Acc: 0.5280
Epoch 9/20
Without BatchNorm - Loss: 0.9413, Train Acc: 0.8160, Test Acc: 0.7040
With BatchNorm - Loss: 1.1090, Train Acc: 0.8260, Test Acc: 0.5760
Epoch 10/20
Without BatchNorm - Loss: 0.7149, Train Acc: 0.8400, Test Acc: 0.7280
With BatchNorm - Loss: 0.7278, Train Acc: 0.8440, Test Acc: 0.5840
Epoch 11/20
Without BatchNorm - Loss: 0.5245, Train Acc: 0.8500, Test Acc: 0.7280
With BatchNorm - Loss: 0.7085, Train Acc: 0.8520, Test Acc: 0.5440
Epoch 12/20
Without BatchNorm - Loss: 0.7069, Train Acc: 0.8700, Test Acc: 0.7280
With BatchNorm - Loss: 0.9384, Train Acc: 0.8840, Test Acc: 0.6080
Epoch 13/20
Without BatchNorm - Loss: 0.5311, Train Acc: 0.8880, Test Acc: 0.7360
With BatchNorm - Loss: 0.8163, Train Acc: 0.9160, Test Acc: 0.6480
Epoch 14/20
Without BatchNorm - Loss: 0.3427, Train Acc: 0.8960, Test Acc: 0.7920
With BatchNorm - Loss: 0.7136, Train Acc: 0.9000, Test Acc: 0.6400
Epoch 15/20
Without BatchNorm - Loss: 0.4568, Train Acc: 0.9240, Test Acc: 0.7840
With BatchNorm - Loss: 0.6349, Train Acc: 0.9280, Test Acc: 0.7040
Epoch 16/20
Without BatchNorm - Loss: 0.3327, Train Acc: 0.9240, Test Acc: 0.7840
With BatchNorm - Loss: 0.5653, Train Acc: 0.9340, Test Acc: 0.6720
Epoch 17/20
Without BatchNorm - Loss: 0.2246, Train Acc: 0.9280, Test Acc: 0.8160
With BatchNorm - Loss: 0.6319, Train Acc: 0.9360, Test Acc: 0.6960
Epoch 18/20
Without BatchNorm - Loss: 0.1577, Train Acc: 0.9340, Test Acc: 0.8320
With BatchNorm - Loss: 0.4392, Train Acc: 0.9340, Test Acc: 0.7120
Epoch 19/20
Without BatchNorm - Loss: 0.2883, Train Acc: 0.9380, Test Acc: 0.8240
With BatchNorm - Loss: 0.5763, Train Acc: 0.9540, Test Acc: 0.7280
Epoch 20/20
Without BatchNorm - Loss: 0.2185, Train Acc: 0.9480, Test Acc: 0.8320
With BatchNorm - Loss: 0.4844, Train Acc: 0.9540, Test Acc: 0.6880


plt.figure(figsize=(16, 6))


plt.subplot(1, 2, 1)
plt.plot(np.arange(len(train_loss1)), train_loss1, label='Without BatchNorm') 
plt.plot(np.arange(len(train_loss2)), train_loss2, label='With BatchNorm') 
plt.xlabel("iterations")
plt.ylabel("loss")
plt.title("Training loss")
plt.legend()


plt.subplot(1, 2, 2)
x = np.arange(len(train_acc1))
plt.plot(x, train_acc1, label='Without BatchNorm (train)')
plt.plot(x, test_acc1, label='Without BatchNorm (test)', linestyle='--')
plt.plot(x, train_acc2, label='With BatchNorm (train)')
plt.plot(x, test_acc2, label='With BatchNorm (test)', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.title("Comparison of training and testing accuracy")
plt.legend()

plt.tight_layout()
plt.show()

 

# 实验2: 权重衰减对比


network1 = FiveLayerNet(
    input_size=784,
    hidden_sizes=[100, 100, 50, 50],
    output_size=10,
    weight_init_std='he',  
    use_batchnorm=True,    
    weight_decay_lambda=0  
)

network2 = FiveLayerNet(
    input_size=784,
    hidden_sizes=[100, 100, 50, 50],
    output_size=10,
    weight_init_std='he',  
    use_batchnorm=True,    
    weight_decay_lambda=0.001  
)

optimizer1 = SGD(lr=learning_rate)
optimizer2 = SGD(lr=learning_rate)

train_loss1, train_acc1, test_acc1 = [], [], []
train_loss2, train_acc2, test_acc2 = [], [], []

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # No weight decay
    grad1 = network1.gradient(x_batch, t_batch)
    optimizer1.update(network1.params, grad1)
    loss1 = network1.loss(x_batch, t_batch)
    train_loss1.append(loss1)
    
    # With weight decay
    grad2 = network2.gradient(x_batch, t_batch)
    optimizer2.update(network2.params, grad2)
    loss2 = network2.loss(x_batch, t_batch)
    train_loss2.append(loss2)
   
    if i % iter_per_epoch == 0:
        train_acc1.append(network1.accuracy(x_train, t_train))
        test_acc1.append(network1.accuracy(x_test, t_test))
        train_acc2.append(network2.accuracy(x_train, t_train))
        test_acc2.append(network2.accuracy(x_test, t_test))
        
        print(f"Epoch {i//iter_per_epoch + 1}/{epochs}")
        print(f"No weight decay - Loss: {loss1:.4f}, Train Acc: {train_acc1[-1]:.4f}, Test Acc: {test_acc1[-1]:.4f}")
        print(f"With weight decay- Loss: {loss2:.4f}, Train Acc: {train_acc2[-1]:.4f}, Test Acc: {test_acc2[-1]:.4f}")

 Epoch 1/20
No weight decay - Loss: 2.3354, Train Acc: 0.1680, Test Acc: 0.1600
With weight decay- Loss: 2.7179, Train Acc: 0.1100, Test Acc: 0.1520
Epoch 2/20
No weight decay - Loss: 1.9522, Train Acc: 0.2880, Test Acc: 0.1840
With weight decay- Loss: 2.4056, Train Acc: 0.3000, Test Acc: 0.2240
Epoch 3/20
No weight decay - Loss: 1.3362, Train Acc: 0.4400, Test Acc: 0.2880
With weight decay- Loss: 1.8403, Train Acc: 0.4840, Test Acc: 0.3360
Epoch 4/20
No weight decay - Loss: 1.3748, Train Acc: 0.5420, Test Acc: 0.3440
With weight decay- Loss: 1.7578, Train Acc: 0.5840, Test Acc: 0.3840
Epoch 5/20
No weight decay - Loss: 1.2470, Train Acc: 0.6200, Test Acc: 0.4160
With weight decay- Loss: 1.4355, Train Acc: 0.6820, Test Acc: 0.4400
Epoch 6/20
No weight decay - Loss: 1.3875, Train Acc: 0.6620, Test Acc: 0.4400
With weight decay- Loss: 1.7909, Train Acc: 0.7280, Test Acc: 0.4800
Epoch 7/20
No weight decay - Loss: 1.1725, Train Acc: 0.7260, Test Acc: 0.5040
With weight decay- Loss: 1.5532, Train Acc: 0.7860, Test Acc: 0.5200
Epoch 8/20
No weight decay - Loss: 1.1724, Train Acc: 0.7600, Test Acc: 0.5120
With weight decay- Loss: 1.5477, Train Acc: 0.8120, Test Acc: 0.5760
Epoch 9/20
No weight decay - Loss: 0.7348, Train Acc: 0.7980, Test Acc: 0.5440
With weight decay- Loss: 1.0268, Train Acc: 0.8340, Test Acc: 0.5840
Epoch 10/20
No weight decay - Loss: 0.7982, Train Acc: 0.7960, Test Acc: 0.5680
With weight decay- Loss: 1.0995, Train Acc: 0.8580, Test Acc: 0.6080
Epoch 11/20
No weight decay - Loss: 0.9589, Train Acc: 0.8500, Test Acc: 0.5600
With weight decay- Loss: 1.1479, Train Acc: 0.8760, Test Acc: 0.6240
Epoch 12/20
No weight decay - Loss: 0.7069, Train Acc: 0.8660, Test Acc: 0.5920
With weight decay- Loss: 1.0813, Train Acc: 0.8900, Test Acc: 0.6960
Epoch 13/20
No weight decay - Loss: 0.6297, Train Acc: 0.8940, Test Acc: 0.6240
With weight decay- Loss: 0.9254, Train Acc: 0.8900, Test Acc: 0.6800
Epoch 14/20
No weight decay - Loss: 0.8401, Train Acc: 0.8920, Test Acc: 0.6400
With weight decay- Loss: 1.0330, Train Acc: 0.9040, Test Acc: 0.6640
Epoch 15/20
No weight decay - Loss: 0.7174, Train Acc: 0.8980, Test Acc: 0.6240
With weight decay- Loss: 1.0208, Train Acc: 0.9160, Test Acc: 0.6960
Epoch 16/20
No weight decay - Loss: 0.6756, Train Acc: 0.9040, Test Acc: 0.6480
With weight decay- Loss: 1.1875, Train Acc: 0.9140, Test Acc: 0.7040
Epoch 17/20
No weight decay - Loss: 0.5438, Train Acc: 0.9260, Test Acc: 0.7120
With weight decay- Loss: 0.9118, Train Acc: 0.9280, Test Acc: 0.7040
Epoch 18/20
No weight decay - Loss: 0.6022, Train Acc: 0.9300, Test Acc: 0.6800
With weight decay- Loss: 0.7961, Train Acc: 0.9360, Test Acc: 0.7120
Epoch 19/20
No weight decay - Loss: 0.7079, Train Acc: 0.9320, Test Acc: 0.7120
With weight decay- Loss: 0.8546, Train Acc: 0.9260, Test Acc: 0.6720
Epoch 20/20
No weight decay - Loss: 0.6250, Train Acc: 0.9540, Test Acc: 0.7280
With weight decay- Loss: 0.9674, Train Acc: 0.9420, Test Acc: 0.7120


plt.figure(figsize=(15, 6))

plt.subplot(1, 2, 1)
plt.plot(np.arange(len(train_loss1)), train_loss1, label='No weight decay')
plt.plot(np.arange(len(train_loss2)), train_loss2, label='With weight decay')
plt.xlabel("iterations")
plt.ylabel("loss")
plt.title("Training loss")
plt.legend()


plt.subplot(1, 2, 2)
x = np.arange(len(train_acc1))
plt.plot(x, train_acc1, label='No weight decay (train)')
plt.plot(x, test_acc1, label='No weight decay (test)', linestyle='--')
plt.plot(x, train_acc2, label='With weight decay(train)')
plt.plot(x, test_acc2, label='With weight decay(test)',  linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.title("Training and testing accuracy")
plt.legend()

plt.tight_layout()
plt.show()

# 实验3: Dropout

# 不用Dropout
network1 = FiveLayerNet(
    input_size=784,
    hidden_sizes=[100, 100, 50, 50],
    output_size=10,
    weight_init_std='he',      
    use_batchnorm=True,         
    use_dropout=False
)

# 使用Dropout
network2 = FiveLayerNet(
    input_size=784,
    hidden_sizes=[100, 100, 50, 50],
    output_size=10,
    weight_init_std='he',       
    use_batchnorm=True,         
    use_dropout=True,
    dropout_ratio=0.08         
)


optimizer1 = SGD(lr=learning_rate)
optimizer2 = SGD(lr=learning_rate)

train_loss1, train_acc1, test_acc1 = [], [], []
train_loss2, train_acc2, test_acc2 = [], [], []

for i in range(iters_num):

    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 无Dropout
    grad1 = network1.gradient(x_batch, t_batch)
    optimizer1.update(network1.params, grad1)
    loss1 = network1.loss(x_batch, t_batch)
    train_loss1.append(loss1)
    
    # 有Dropout
    grad2 = network2.gradient(x_batch, t_batch)
    optimizer2.update(network2.params, grad2)
    loss2 = network2.loss(x_batch, t_batch)
    train_loss2.append(loss2)
    

    if i % iter_per_epoch == 0:
        train_acc1.append(network1.accuracy(x_train, t_train))
        test_acc1.append(network1.accuracy(x_test, t_test))
        train_acc2.append(network2.accuracy(x_train, t_train))
        test_acc2.append(network2.accuracy(x_test, t_test))

        print(f"Epoch {i//iter_per_epoch + 1}/{epochs}")
        print(f"无Dropout - Loss: {loss1:.4f}, Train Acc: {train_acc1[-1]:.4f}, Test Acc: {test_acc1[-1]:.4f}")
        print(f"有Dropout - Loss: {loss2:.4f}, Train Acc: {train_acc2[-1]:.4f}, Test Acc: {test_acc2[-1]:.4f}")

 Epoch 1/20
无Dropout - Loss: 2.8517, Train Acc: 0.1180, Test Acc: 0.0880
有Dropout - Loss: 2.2606, Train Acc: 0.1240, Test Acc: 0.1440
Epoch 2/20
无Dropout - Loss: 1.9766, Train Acc: 0.2780, Test Acc: 0.2640
有Dropout - Loss: 2.1891, Train Acc: 0.2380, Test Acc: 0.1600
Epoch 3/20
无Dropout - Loss: 1.6820, Train Acc: 0.4260, Test Acc: 0.3280
有Dropout - Loss: 2.1212, Train Acc: 0.3920, Test Acc: 0.2160
Epoch 4/20
无Dropout - Loss: 1.6266, Train Acc: 0.5420, Test Acc: 0.3440
有Dropout - Loss: 1.8294, Train Acc: 0.5400, Test Acc: 0.3200
Epoch 5/20
无Dropout - Loss: 1.4466, Train Acc: 0.6400, Test Acc: 0.4240
有Dropout - Loss: 1.5504, Train Acc: 0.5960, Test Acc: 0.3440
Epoch 6/20
无Dropout - Loss: 1.1496, Train Acc: 0.6960, Test Acc: 0.4640
有Dropout - Loss: 1.5032, Train Acc: 0.6660, Test Acc: 0.4240
Epoch 7/20
无Dropout - Loss: 1.2502, Train Acc: 0.7280, Test Acc: 0.4720
有Dropout - Loss: 1.7346, Train Acc: 0.7120, Test Acc: 0.4320
Epoch 8/20
无Dropout - Loss: 0.8974, Train Acc: 0.7760, Test Acc: 0.5200
有Dropout - Loss: 1.5057, Train Acc: 0.7420, Test Acc: 0.5040
Epoch 9/20
无Dropout - Loss: 0.9743, Train Acc: 0.8000, Test Acc: 0.5440
有Dropout - Loss: 1.2010, Train Acc: 0.7700, Test Acc: 0.5120
Epoch 10/20
无Dropout - Loss: 1.0423, Train Acc: 0.8220, Test Acc: 0.5520
有Dropout - Loss: 1.4685, Train Acc: 0.8060, Test Acc: 0.5200
Epoch 11/20
无Dropout - Loss: 0.7758, Train Acc: 0.8480, Test Acc: 0.5840
有Dropout - Loss: 1.0502, Train Acc: 0.8180, Test Acc: 0.5440
Epoch 12/20
无Dropout - Loss: 0.7487, Train Acc: 0.8660, Test Acc: 0.6000
有Dropout - Loss: 0.9353, Train Acc: 0.8280, Test Acc: 0.5920
Epoch 13/20
无Dropout - Loss: 0.9809, Train Acc: 0.8840, Test Acc: 0.6320
有Dropout - Loss: 1.3208, Train Acc: 0.8360, Test Acc: 0.5680
Epoch 14/20
无Dropout - Loss: 0.6833, Train Acc: 0.9000, Test Acc: 0.6080
有Dropout - Loss: 0.9210, Train Acc: 0.8540, Test Acc: 0.6240
Epoch 15/20
无Dropout - Loss: 0.7784, Train Acc: 0.9080, Test Acc: 0.6480
有Dropout - Loss: 1.0822, Train Acc: 0.8700, Test Acc: 0.6080
Epoch 16/20
无Dropout - Loss: 0.6455, Train Acc: 0.9120, Test Acc: 0.6800
有Dropout - Loss: 0.9547, Train Acc: 0.8900, Test Acc: 0.6240
Epoch 17/20
无Dropout - Loss: 0.6330, Train Acc: 0.9140, Test Acc: 0.6640
有Dropout - Loss: 0.8852, Train Acc: 0.8780, Test Acc: 0.6240
Epoch 18/20
无Dropout - Loss: 0.7478, Train Acc: 0.9340, Test Acc: 0.7200
有Dropout - Loss: 0.9836, Train Acc: 0.8860, Test Acc: 0.6720
Epoch 19/20
无Dropout - Loss: 0.6356, Train Acc: 0.9360, Test Acc: 0.6960
有Dropout - Loss: 1.1165, Train Acc: 0.9080, Test Acc: 0.6480
Epoch 20/20
无Dropout - Loss: 0.5600, Train Acc: 0.9400, Test Acc: 0.7360
有Dropout - Loss: 0.9190, Train Acc: 0.9180, Test Acc: 0.6480


plt.figure(figsize=(15, 6))


plt.subplot(1, 2, 1)
plt.plot(np.arange(len(train_loss1)), train_loss1, label='无Dropout')
plt.plot(np.arange(len(train_loss2)), train_loss2, label='有Dropout')
plt.xlabel("iterations")
plt.ylabel("loss")
plt.title("Training loss")
plt.legend()


plt.subplot(1, 2, 2)
x = np.arange(len(train_acc1))
plt.plot(x, train_acc1, label='无Dropout(train)')
plt.plot(x, test_acc1, label='无Dropout (test)', linestyle='--')
plt.plot(x, train_acc2, label='有Dropout(train)')
plt.plot(x, test_acc2, label='有Dropout(test)',  linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.title("Training and testing accuracy")
plt.legend()

plt.tight_layout()
plt.show()

 

# 实验4: 组合


# 组合
network2 = FiveLayerNet(
    input_size=784,
    hidden_sizes=[50, 50, 50, 50],
    output_size=10,
    weight_init_std=0.01,
    use_batchnorm=True,
    use_dropout=True,
    dropout_ratio=0.1,
    weight_decay_lambda=0.01
)

optimizer2 = SGD(lr=learning_rate)


train_loss2, train_acc2, test_acc2 = [], [], []

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 组合
    grad2 = network2.gradient(x_batch, t_batch)
    optimizer2.update(network2.params, grad2)
    loss2 = network2.loss(x_batch, t_batch)
    train_loss2.append(loss2)

    if i % iter_per_epoch == 0:
        train_acc2.append(network2.accuracy(x_train, t_train))
        test_acc2.append(network2.accuracy(x_test, t_test))
        
        print(f"Epoch {i//iter_per_epoch + 1}/{epochs}")
    
        print(f"Combination Model - Loss: {loss2:.4f}, Train Acc: {train_acc2[-1]:.4f}, Test Acc: {test_acc2[-1]:.4f}")

Epoch 1/20
Combination Model - Loss: 2.3020, Train Acc: 0.1520, Test Acc: 0.0960
Epoch 2/20
Combination Model - Loss: 2.2120, Train Acc: 0.5200, Test Acc: 0.4160
Epoch 3/20
Combination Model - Loss: 2.0455, Train Acc: 0.5600, Test Acc: 0.5360
Epoch 4/20
Combination Model - Loss: 1.8359, Train Acc: 0.5960, Test Acc: 0.5840
Epoch 5/20
Combination Model - Loss: 1.7061, Train Acc: 0.6400, Test Acc: 0.6240
Epoch 6/20
Combination Model - Loss: 1.5048, Train Acc: 0.6540, Test Acc: 0.6160
Epoch 7/20
Combination Model - Loss: 1.5717, Train Acc: 0.6260, Test Acc: 0.6240
Epoch 8/20
Combination Model - Loss: 1.5191, Train Acc: 0.6800, Test Acc: 0.6560
Epoch 9/20
Combination Model - Loss: 1.2902, Train Acc: 0.7160, Test Acc: 0.6640
Epoch 10/20
Combination Model - Loss: 1.2222, Train Acc: 0.7140, Test Acc: 0.6480
Epoch 11/20
Combination Model - Loss: 1.0935, Train Acc: 0.7440, Test Acc: 0.7040
Epoch 12/20
Combination Model - Loss: 0.9825, Train Acc: 0.7780, Test Acc: 0.7120
Epoch 13/20
Combination Model - Loss: 0.9710, Train Acc: 0.8080, Test Acc: 0.7600
Epoch 14/20
Combination Model - Loss: 1.1035, Train Acc: 0.8460, Test Acc: 0.7440
Epoch 15/20
Combination Model - Loss: 1.0875, Train Acc: 0.9020, Test Acc: 0.7520
Epoch 16/20
Combination Model - Loss: 0.9283, Train Acc: 0.8960, Test Acc: 0.7760
Epoch 17/20
Combination Model - Loss: 0.7865, Train Acc: 0.9160, Test Acc: 0.7840
Epoch 18/20
Combination Model - Loss: 0.7991, Train Acc: 0.9300, Test Acc: 0.8080
Epoch 19/20
Combination Model - Loss: 0.6010, Train Acc: 0.9300, Test Acc: 0.8240
Epoch 20/20
Combination Model - Loss: 0.6647, Train Acc: 0.9460, Test Acc: 0.8320 

### 保证所用代码均来自课件查找frx:230121420105  防伪代码
plt.figure(figsize=(15, 6))
### 保证所用代码均来自课件查找frx:230121420105  防伪代码
plt.subplot(1, 2, 1)
plt.plot(np.arange(len(train_loss2)), train_loss2, label='Combination Model')
plt.xlabel("iterations")
plt.ylabel("loss")
plt.title("Training loss")
plt.legend()
### 保证所用代码均来自课件查找frx:230121420105  防伪代码
plt.subplot(1, 2, 2)
x = np.arange(len(train_acc1))
plt.plot(x, train_acc2, label='Combination Model (train)')
plt.plot(x, test_acc2, label='Combination Model (test)',  linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.title("Training and testing accuracy")
plt.legend()

plt.tight_layout()
plt.show()
### 保证所用代码均来自课件查找frx:230121420105  防伪代码