DAY 45 Tensorboard使用介绍
1.tensorboard的发展历史和原理
2.tensorboard的常见操作
log_dir = 'runs/cifar10_mlp_experiment'
if os.path.exists(log_dir):
i = 1
while os.path.exists(f"{log_dir}_{i}"):
i += 1
log_dir = f"{log_dir}_{i}"
writer = SummaryWriter(log_dir)
writer.add_scalar('Train/Batch_Loss', batch_loss, global_step)
writer.add_scalar('Train/Batch_Accuracy', batch_acc, global_step)
writer.add_scalar('Train/Epoch_Loss', epoch_train_loss, epoch)
writer.add_scalar('Train/Epoch_Accuracy', epoch_train_acc, epoch)
dataiter = iter(train_loader)
images, labels = next(dataiter)
images = images.to(device)
writer.add_graph(model, images)
img_grid = torchvision.utils.make_grid(images[:8].cpu())
writer.add_image('原始训练图像', img_grid)
wrong_img_grid = torchvision.utils.make_grid(wrong_images[:display_count])
writer.add_image('错误预测样本', wrong_img_grid)
if (batch_idx + 1) % 500 == 0:
for name, param in model.named_parameters():
writer.add_histogram(f'weights/{name}', param, global_step)
if param.grad is not None:
writer.add_histogram(f'grads/{name}', param.grad, global_step)
3.tensorboard在cifar上的实战:MLP和CNN模型
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import matplotlib.pyplot as plt
import os
torch.manual_seed(42)
np.random.seed(42)
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_dataset = datasets.CIFAR10(
root='./data',
train=True,
download=True,
transform=transform
)
test_dataset = datasets.CIFAR10(
root='./data',
train=False,
transform=transform
)
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.flatten = nn.Flatten()
self.layer1 = nn.Linear(3072, 512)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(0.2)
self.layer2 = nn.Linear(512, 256)
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(0.2)
self.layer3 = nn.Linear(256, 10)
def forward(self, x):
x = self.flatten(x)
x = self.layer1(x)
x = self.relu1(x)
x = self.dropout1(x)
x = self.layer2(x)
x = self.relu2(x)
x = self.dropout2(x)
x = self.layer3(x)
return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLP()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
log_dir = 'runs/cifar10_mlp_experiment'
if os.path.exists(log_dir):
i = 1
while os.path.exists(f"{log_dir}_{i}"):
i += 1
log_dir = f"{log_dir}_{i}"
writer = SummaryWriter(log_dir)
def train(model, train_loader, test_loader, criterion, optimizer, device, epochs, writer):
model.train()
global_step = 0
dataiter = iter(train_loader)
images, labels = next(dataiter)
images = images.to(device)
writer.add_graph(model, images)
img_grid = torchvision.utils.make_grid(images[:8].cpu())
writer.add_image('原始训练图像', img_grid)
for epoch in range(epochs):
running_loss = 0.0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
if (batch_idx + 1) % 100 == 0:
batch_loss = loss.item()
batch_acc = 100. * correct / total
writer.add_scalar('Train/Batch_Loss', batch_loss, global_step)
writer.add_scalar('Train/Batch_Accuracy', batch_acc, global_step)
writer.add_scalar('Train/Learning_Rate', optimizer.param_groups[0]['lr'], global_step)
if (batch_idx + 1) % 500 == 0:
for name, param in model.named_parameters():
writer.add_histogram(f'weights/{name}', param, global_step)
if param.grad is not None:
writer.add_histogram(f'grads/{name}', param.grad, global_step)
print(f'Epoch: {epoch+1}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} '
f'| 单Batch损失: {batch_loss:.4f} | 累计平均损失: {running_loss/(batch_idx+1):.4f}')
global_step += 1
epoch_train_loss = running_loss / len(train_loader)
epoch_train_acc = 100. * correct / total
writer.add_scalar('Train/Epoch_Loss', epoch_train_loss, epoch)
writer.add_scalar('Train/Epoch_Accuracy', epoch_train_acc, epoch)
model.eval()
test_loss = 0
correct_test = 0
total_test = 0
wrong_images = []
wrong_labels = []
wrong_preds = []
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
_, predicted = output.max(1)
total_test += target.size(0)
correct_test += predicted.eq(target).sum().item()
wrong_mask = (predicted != target).cpu()
if wrong_mask.sum() > 0:
wrong_batch_images = data[wrong_mask].cpu()
wrong_batch_labels = target[wrong_mask].cpu()
wrong_batch_preds = predicted[wrong_mask].cpu()
wrong_images.extend(wrong_batch_images)
wrong_labels.extend(wrong_batch_labels)
wrong_preds.extend(wrong_batch_preds)
epoch_test_loss = test_loss / len(test_loader)
epoch_test_acc = 100. * correct_test / total_test
writer.add_scalar('Test/Loss', epoch_test_loss, epoch)
writer.add_scalar('Test/Accuracy', epoch_test_acc, epoch)
samples_per_epoch = len(train_loader.dataset)
print(f'Epoch {epoch+1}/{epochs} 完成 | 训练准确率: {epoch_train_acc:.2f}% | 测试准确率: {epoch_test_acc:.2f}%')
if epoch == epochs - 1 and len(wrong_images) > 0:
display_count = min(8, len(wrong_images))
wrong_img_grid = torchvision.utils.make_grid(wrong_images[:display_count])
wrong_text = []
for i in range(display_count):
true_label = classes[wrong_labels[i]]
pred_label = classes[wrong_preds[i]]
wrong_text.append(f'True: {true_label}, Pred: {pred_label}')
writer.add_image('错误预测样本', wrong_img_grid)
writer.add_text('错误预测标签', '\n'.join(wrong_text), epoch)
writer.close()
return epoch_test_acc
epochs = 20
print("开始训练模型...")
print(f"TensorBoard日志保存在: {log_dir}")
print("训练完成后,使用命令 `tensorboard --logdir=runs` 启动TensorBoard查看可视化结果")
final_accuracy = train(model, train_loader, test_loader, criterion, optimizer, device, epochs, writer)
print(f"训练完成!最终测试准确率: {final_accuracy:.2f}%")
Files already downloaded and verified
开始训练模型...
TensorBoard日志保存在: runs/cifar10_mlp_experiment_1
训练完成后,使用命令 `tensorboard --logdir=runs` 启动TensorBoard查看可视化结果
Epoch: 1/20 | Batch: 100/782 | 单Batch损失: 1.8327 | 累计平均损失: 1.9410
Epoch: 1/20 | Batch: 200/782 | 单Batch损失: 1.8588 | 累计平均损失: 1.8519
Epoch: 1/20 | Batch: 300/782 | 单Batch损失: 1.6719 | 累计平均损失: 1.8029
Epoch: 1/20 | Batch: 400/782 | 单Batch损失: 1.7609 | 累计平均损失: 1.7754
Epoch: 1/20 | Batch: 500/782 | 单Batch损失: 1.6642 | 累计平均损失: 1.7508
Epoch: 1/20 | Batch: 600/782 | 单Batch损失: 1.6564 | 累计平均损失: 1.7330
Epoch: 1/20 | Batch: 700/782 | 单Batch损失: 1.5870 | 累计平均损失: 1.7199
Epoch 1/20 完成 | 训练准确率: 39.23% | 测试准确率: 45.11%
Epoch: 2/20 | Batch: 100/782 | 单Batch损失: 1.4987 | 累计平均损失: 1.5227
Epoch: 2/20 | Batch: 200/782 | 单Batch损失: 1.3297 | 累计平均损失: 1.4918
Epoch: 2/20 | Batch: 300/782 | 单Batch损失: 1.3329 | 累计平均损失: 1.4820
Epoch: 2/20 | Batch: 400/782 | 单Batch损失: 1.5894 | 累计平均损失: 1.4701
Epoch: 2/20 | Batch: 500/782 | 单Batch损失: 1.3843 | 累计平均损失: 1.4710
Epoch: 2/20 | Batch: 600/782 | 单Batch损失: 1.3671 | 累计平均损失: 1.4662
Epoch: 2/20 | Batch: 700/782 | 单Batch损失: 1.4408 | 累计平均损失: 1.4614
Epoch 2/20 完成 | 训练准确率: 48.51% | 测试准确率: 49.87%
Epoch: 3/20 | Batch: 100/782 | 单Batch损失: 1.3722 | 累计平均损失: 1.3401
Epoch: 3/20 | Batch: 200/782 | 单Batch损失: 1.8139 | 累计平均损失: 1.3486
Epoch: 3/20 | Batch: 300/782 | 单Batch损失: 1.1994 | 累计平均损失: 1.3457
Epoch: 3/20 | Batch: 400/782 | 单Batch损失: 1.1896 | 累计平均损失: 1.3403
Epoch: 3/20 | Batch: 500/782 | 单Batch损失: 1.4191 | 累计平均损失: 1.3419
Epoch: 3/20 | Batch: 600/782 | 单Batch损失: 1.4218 | 累计平均损失: 1.3475
Epoch: 3/20 | Batch: 700/782 | 单Batch损失: 1.4627 | 累计平均损失: 1.3441
Epoch 3/20 完成 | 训练准确率: 52.43% | 测试准确率: 51.27%
Epoch: 4/20 | Batch: 100/782 | 单Batch损失: 1.3596 | 累计平均损失: 1.2346
Epoch: 4/20 | Batch: 200/782 | 单Batch损失: 1.3270 | 累计平均损失: 1.2381
Epoch: 4/20 | Batch: 300/782 | 单Batch损失: 1.2478 | 累计平均损失: 1.2434
Epoch: 4/20 | Batch: 400/782 | 单Batch损失: 1.3861 | 累计平均损失: 1.2422
Epoch: 4/20 | Batch: 500/782 | 单Batch损失: 1.3478 | 累计平均损失: 1.2422
Epoch: 4/20 | Batch: 600/782 | 单Batch损失: 1.1521 | 累计平均损失: 1.2447
Epoch: 4/20 | Batch: 700/782 | 单Batch损失: 1.2833 | 累计平均损失: 1.2469
Epoch 4/20 完成 | 训练准确率: 55.63% | 测试准确率: 51.32%
Epoch: 5/20 | Batch: 100/782 | 单Batch损失: 0.9809 | 累计平均损失: 1.1235
Epoch: 5/20 | Batch: 200/782 | 单Batch损失: 1.0800 | 累计平均损失: 1.1295
Epoch: 5/20 | Batch: 300/782 | 单Batch损失: 1.0129 | 累计平均损失: 1.1372
Epoch: 5/20 | Batch: 400/782 | 单Batch损失: 1.0918 | 累计平均损失: 1.1459
Epoch: 5/20 | Batch: 500/782 | 单Batch损失: 1.3155 | 累计平均损失: 1.1532
Epoch: 5/20 | Batch: 600/782 | 单Batch损失: 1.1727 | 累计平均损失: 1.1588
Epoch: 5/20 | Batch: 700/782 | 单Batch损失: 1.2888 | 累计平均损失: 1.1649
Epoch 5/20 完成 | 训练准确率: 58.74% | 测试准确率: 52.74%
Epoch: 6/20 | Batch: 100/782 | 单Batch损失: 1.1855 | 累计平均损失: 1.0499
Epoch: 6/20 | Batch: 200/782 | 单Batch损失: 0.8994 | 累计平均损失: 1.0567
Epoch: 6/20 | Batch: 300/782 | 单Batch损失: 1.2460 | 累计平均损失: 1.0602
Epoch: 6/20 | Batch: 400/782 | 单Batch损失: 1.1033 | 累计平均损失: 1.0660
Epoch: 6/20 | Batch: 500/782 | 单Batch损失: 0.9182 | 累计平均损失: 1.0679
Epoch: 6/20 | Batch: 600/782 | 单Batch损失: 1.4116 | 累计平均损失: 1.0745
Epoch: 6/20 | Batch: 700/782 | 单Batch损失: 1.0211 | 累计平均损失: 1.0814
Epoch 6/20 完成 | 训练准确率: 61.37% | 测试准确率: 52.98%
Epoch: 7/20 | Batch: 100/782 | 单Batch损失: 1.0082 | 累计平均损失: 0.9592
Epoch: 7/20 | Batch: 200/782 | 单Batch损失: 1.0255 | 累计平均损失: 0.9742
Epoch: 7/20 | Batch: 300/782 | 单Batch损失: 1.1416 | 累计平均损失: 0.9837
Epoch: 7/20 | Batch: 400/782 | 单Batch损失: 0.9732 | 累计平均损失: 0.9875
Epoch: 7/20 | Batch: 500/782 | 单Batch损失: 1.1387 | 累计平均损失: 0.9947
Epoch: 7/20 | Batch: 600/782 | 单Batch损失: 0.8657 | 累计平均损失: 0.9994
Epoch: 7/20 | Batch: 700/782 | 单Batch损失: 0.9666 | 累计平均损失: 1.0046
Epoch 7/20 完成 | 训练准确率: 64.09% | 测试准确率: 52.69%
Epoch: 8/20 | Batch: 100/782 | 单Batch损失: 0.6081 | 累计平均损失: 0.8927
Epoch: 8/20 | Batch: 200/782 | 单Batch损失: 0.6484 | 累计平均损失: 0.8922
Epoch: 8/20 | Batch: 300/782 | 单Batch损失: 0.8360 | 累计平均损失: 0.9001
Epoch: 8/20 | Batch: 400/782 | 单Batch损失: 1.1883 | 累计平均损失: 0.9150
Epoch: 8/20 | Batch: 500/782 | 单Batch损失: 0.9597 | 累计平均损失: 0.9244
Epoch: 8/20 | Batch: 600/782 | 单Batch损失: 0.8802 | 累计平均损失: 0.9273
Epoch: 8/20 | Batch: 700/782 | 单Batch损失: 0.9168 | 累计平均损失: 0.9295
Epoch 8/20 完成 | 训练准确率: 66.68% | 测试准确率: 52.01%
Epoch: 9/20 | Batch: 100/782 | 单Batch损失: 0.8491 | 累计平均损失: 0.7973
Epoch: 9/20 | Batch: 200/782 | 单Batch损失: 0.8207 | 累计平均损失: 0.8219
Epoch: 9/20 | Batch: 300/782 | 单Batch损失: 0.9952 | 累计平均损失: 0.8260
Epoch: 9/20 | Batch: 400/782 | 单Batch损失: 0.8664 | 累计平均损失: 0.8395
Epoch: 9/20 | Batch: 500/782 | 单Batch损失: 0.8573 | 累计平均损失: 0.8478
Epoch: 9/20 | Batch: 600/782 | 单Batch损失: 1.2844 | 累计平均损失: 0.8503
Epoch: 9/20 | Batch: 700/782 | 单Batch损失: 0.7931 | 累计平均损失: 0.8556
Epoch 9/20 完成 | 训练准确率: 69.11% | 测试准确率: 53.24%
Epoch: 10/20 | Batch: 100/782 | 单Batch损失: 0.6661 | 累计平均损失: 0.7471
Epoch: 10/20 | Batch: 200/782 | 单Batch损失: 0.7758 | 累计平均损失: 0.7521
Epoch: 10/20 | Batch: 300/782 | 单Batch损失: 1.1638 | 累计平均损失: 0.7680
Epoch: 10/20 | Batch: 400/782 | 单Batch损失: 0.7825 | 累计平均损失: 0.7754
Epoch: 10/20 | Batch: 500/782 | 单Batch损失: 0.6984 | 累计平均损失: 0.7834
Epoch: 10/20 | Batch: 600/782 | 单Batch损失: 0.7199 | 累计平均损失: 0.7880
Epoch: 10/20 | Batch: 700/782 | 单Batch损失: 0.9765 | 累计平均损失: 0.7918
Epoch 10/20 完成 | 训练准确率: 71.70% | 测试准确率: 53.59%
Epoch: 11/20 | Batch: 100/782 | 单Batch损失: 0.7485 | 累计平均损失: 0.6873
Epoch: 11/20 | Batch: 200/782 | 单Batch损失: 0.6853 | 累计平均损失: 0.6817
Epoch: 11/20 | Batch: 300/782 | 单Batch损失: 0.7594 | 累计平均损失: 0.6880
Epoch: 11/20 | Batch: 400/782 | 单Batch损失: 0.9249 | 累计平均损失: 0.7001
Epoch: 11/20 | Batch: 500/782 | 单Batch损失: 0.5742 | 累计平均损失: 0.7060
Epoch: 11/20 | Batch: 600/782 | 单Batch损失: 0.7716 | 累计平均损失: 0.7190
Epoch: 11/20 | Batch: 700/782 | 单Batch损失: 0.6123 | 累计平均损失: 0.7273
Epoch 11/20 完成 | 训练准确率: 73.83% | 测试准确率: 52.58%
Epoch: 12/20 | Batch: 100/782 | 单Batch损失: 0.6315 | 累计平均损失: 0.6275
Epoch: 12/20 | Batch: 200/782 | 单Batch损失: 0.5326 | 累计平均损失: 0.6286
Epoch: 12/20 | Batch: 300/782 | 单Batch损失: 0.5623 | 累计平均损失: 0.6369
Epoch: 12/20 | Batch: 400/782 | 单Batch损失: 0.7911 | 累计平均损失: 0.6473
Epoch: 12/20 | Batch: 500/782 | 单Batch损失: 0.6620 | 累计平均损失: 0.6545
Epoch: 12/20 | Batch: 600/782 | 单Batch损失: 0.5583 | 累计平均损失: 0.6637
Epoch: 12/20 | Batch: 700/782 | 单Batch损失: 0.6010 | 累计平均损失: 0.6709
Epoch 12/20 完成 | 训练准确率: 75.82% | 测试准确率: 52.88%
Epoch: 13/20 | Batch: 100/782 | 单Batch损失: 0.7061 | 累计平均损失: 0.5733
Epoch: 13/20 | Batch: 200/782 | 单Batch损失: 0.5555 | 累计平均损失: 0.5713
Epoch: 13/20 | Batch: 300/782 | 单Batch损失: 0.3972 | 累计平均损失: 0.5712
Epoch: 13/20 | Batch: 400/782 | 单Batch损失: 0.8246 | 累计平均损失: 0.5824
Epoch: 13/20 | Batch: 500/782 | 单Batch损失: 0.4577 | 累计平均损失: 0.5935
Epoch: 13/20 | Batch: 600/782 | 单Batch损失: 0.7397 | 累计平均损失: 0.5992
Epoch: 13/20 | Batch: 700/782 | 单Batch损失: 0.6297 | 累计平均损失: 0.6090
Epoch 13/20 完成 | 训练准确率: 78.18% | 测试准确率: 53.19%
Epoch: 14/20 | Batch: 100/782 | 单Batch损失: 0.5944 | 累计平均损失: 0.5333
Epoch: 14/20 | Batch: 200/782 | 单Batch损失: 0.5172 | 累计平均损失: 0.5252
Epoch: 14/20 | Batch: 300/782 | 单Batch损失: 0.5107 | 累计平均损失: 0.5313
Epoch: 14/20 | Batch: 400/782 | 单Batch损失: 0.4882 | 累计平均损失: 0.5414
Epoch: 14/20 | Batch: 500/782 | 单Batch损失: 0.4880 | 累计平均损失: 0.5560
Epoch: 14/20 | Batch: 600/782 | 单Batch损失: 0.6760 | 累计平均损失: 0.5617
Epoch: 14/20 | Batch: 700/782 | 单Batch损失: 0.5190 | 累计平均损失: 0.5651
Epoch 14/20 完成 | 训练准确率: 79.50% | 测试准确率: 53.00%
Epoch: 15/20 | Batch: 100/782 | 单Batch损失: 0.3614 | 累计平均损失: 0.4667
Epoch: 15/20 | Batch: 200/782 | 单Batch损失: 0.5322 | 累计平均损失: 0.4657
Epoch: 15/20 | Batch: 300/782 | 单Batch损失: 0.5792 | 累计平均损失: 0.4838
Epoch: 15/20 | Batch: 400/782 | 单Batch损失: 0.6562 | 累计平均损失: 0.4975
Epoch: 15/20 | Batch: 500/782 | 单Batch损失: 0.5755 | 累计平均损失: 0.5062
Epoch: 15/20 | Batch: 600/782 | 单Batch损失: 0.8258 | 累计平均损失: 0.5142
Epoch: 15/20 | Batch: 700/782 | 单Batch损失: 0.4823 | 累计平均损失: 0.5194
Epoch 15/20 完成 | 训练准确率: 81.21% | 测试准确率: 52.39%
Epoch: 16/20 | Batch: 100/782 | 单Batch损失: 0.3308 | 累计平均损失: 0.4314
Epoch: 16/20 | Batch: 200/782 | 单Batch损失: 0.3376 | 累计平均损失: 0.4463
Epoch: 16/20 | Batch: 300/782 | 单Batch损失: 0.5752 | 累计平均损失: 0.4539
Epoch: 16/20 | Batch: 400/782 | 单Batch损失: 0.4853 | 累计平均损失: 0.4700
Epoch: 16/20 | Batch: 500/782 | 单Batch损失: 0.5356 | 累计平均损失: 0.4794
Epoch: 16/20 | Batch: 600/782 | 单Batch损失: 0.6754 | 累计平均损失: 0.4817
Epoch: 16/20 | Batch: 700/782 | 单Batch损失: 0.4735 | 累计平均损失: 0.4875
Epoch 16/20 完成 | 训练准确率: 82.41% | 测试准确率: 53.40%
Epoch: 17/20 | Batch: 100/782 | 单Batch损失: 0.3944 | 累计平均损失: 0.4055
Epoch: 17/20 | Batch: 200/782 | 单Batch损失: 0.3707 | 累计平均损失: 0.4074
Epoch: 17/20 | Batch: 300/782 | 单Batch损失: 0.5363 | 累计平均损失: 0.4122
Epoch: 17/20 | Batch: 400/782 | 单Batch损失: 0.3647 | 累计平均损失: 0.4147
Epoch: 17/20 | Batch: 500/782 | 单Batch损失: 0.4949 | 累计平均损失: 0.4241
Epoch: 17/20 | Batch: 600/782 | 单Batch损失: 0.2563 | 累计平均损失: 0.4316
Epoch: 17/20 | Batch: 700/782 | 单Batch损失: 0.3814 | 累计平均损失: 0.4394
Epoch 17/20 完成 | 训练准确率: 84.10% | 测试准确率: 51.73%
Epoch: 18/20 | Batch: 100/782 | 单Batch损失: 0.4645 | 累计平均损失: 0.3851
Epoch: 18/20 | Batch: 200/782 | 单Batch损失: 0.2752 | 累计平均损失: 0.3906
Epoch: 18/20 | Batch: 300/782 | 单Batch损失: 0.4404 | 累计平均损失: 0.3927
Epoch: 18/20 | Batch: 400/782 | 单Batch损失: 0.4450 | 累计平均损失: 0.4015
Epoch: 18/20 | Batch: 500/782 | 单Batch损失: 0.4082 | 累计平均损失: 0.4158
Epoch: 18/20 | Batch: 600/782 | 单Batch损失: 0.3982 | 累计平均损失: 0.4203
Epoch: 18/20 | Batch: 700/782 | 单Batch损失: 0.5168 | 累计平均损失: 0.4263
Epoch 18/20 完成 | 训练准确率: 84.83% | 测试准确率: 51.31%
Epoch: 19/20 | Batch: 100/782 | 单Batch损失: 0.2534 | 累计平均损失: 0.3471
Epoch: 19/20 | Batch: 200/782 | 单Batch损失: 0.3286 | 累计平均损失: 0.3488
Epoch: 19/20 | Batch: 300/782 | 单Batch损失: 0.2713 | 累计平均损失: 0.3563
Epoch: 19/20 | Batch: 400/782 | 单Batch损失: 0.4733 | 累计平均损失: 0.3728
Epoch: 19/20 | Batch: 500/782 | 单Batch损失: 0.3166 | 累计平均损失: 0.3756
Epoch: 19/20 | Batch: 600/782 | 单Batch损失: 0.4382 | 累计平均损失: 0.3798
Epoch: 19/20 | Batch: 700/782 | 单Batch损失: 0.3680 | 累计平均损失: 0.3888
Epoch 19/20 完成 | 训练准确率: 86.05% | 测试准确率: 52.17%
Epoch: 20/20 | Batch: 100/782 | 单Batch损失: 0.2334 | 累计平均损失: 0.3316
Epoch: 20/20 | Batch: 200/782 | 单Batch损失: 0.3335 | 累计平均损失: 0.3274
Epoch: 20/20 | Batch: 300/782 | 单Batch损失: 0.4049 | 累计平均损失: 0.3455
Epoch: 20/20 | Batch: 400/782 | 单Batch损失: 0.5196 | 累计平均损失: 0.3514
Epoch: 20/20 | Batch: 500/782 | 单Batch损失: 0.3912 | 累计平均损失: 0.3619
Epoch: 20/20 | Batch: 600/782 | 单Batch损失: 0.2988 | 累计平均损失: 0.3776
Epoch: 20/20 | Batch: 700/782 | 单Batch损失: 0.5925 | 累计平均损失: 0.3867
Epoch 20/20 完成 | 训练准确率: 86.26% | 测试准确率: 51.46%
训练完成!最终测试准确率: 51.46%
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt
import numpy as np
import os
import torchvision
plt.rcParams["font.family"] = ["SimHei"]
plt.rcParams['axes.unicode_minus'] = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.RandomRotation(15),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
train_dataset = datasets.CIFAR10(
root='./data',
train=True,
download=True,
transform=train_transform
)
test_dataset = datasets.CIFAR10(
root='./data',
train=False,
transform=test_transform
)
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(
in_channels=3,
out_channels=32,
kernel_size=3,
padding=1
)
self.bn1 = nn.BatchNorm2d(num_features=32)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
padding=1
)
self.bn2 = nn.BatchNorm2d(num_features=64)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(kernel_size=2)
self.conv3 = nn.Conv2d(
in_channels=64,
out_channels=128,
kernel_size=3,
padding=1
)
self.bn3 = nn.BatchNorm2d(num_features=128)
self.relu3 = nn.ReLU()
self.pool3 = nn.MaxPool2d(kernel_size=2)
self.fc1 = nn.Linear(
in_features=128 * 4 * 4,
out_features=512
)
self.dropout = nn.Dropout(p=0.5)
self.fc2 = nn.Linear(in_features=512, out_features=10)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.pool2(x)
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x = self.pool3(x)
x = x.view(-1, 128 * 4 * 4)
x = self.fc1(x)
x = self.relu3(x)
x = self.dropout(x)
x = self.fc2(x)
return x
model = CNN()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode='min',
patience=3,
factor=0.5,
verbose=True
)
log_dir = "runs/cifar10_cnn_exp"
if os.path.exists(log_dir):
version = 1
while os.path.exists(f"{log_dir}_v{version}"):
version += 1
log_dir = f"{log_dir}_v{version}"
writer = SummaryWriter(log_dir)
def train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs, writer):
model.train()
all_iter_losses = []
iter_indices = []
global_step = 0
dataiter = iter(train_loader)
images, labels = next(dataiter)
images = images.to(device)
writer.add_graph(model, images)
img_grid = torchvision.utils.make_grid(images[:8].cpu())
writer.add_image('原始训练图像(增强前)', img_grid, global_step=0)
for epoch in range(epochs):
running_loss = 0.0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
iter_loss = loss.item()
all_iter_losses.append(iter_loss)
iter_indices.append(global_step + 1)
running_loss += iter_loss
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
batch_acc = 100. * correct / total
writer.add_scalar('Train/Batch Loss', iter_loss, global_step)
writer.add_scalar('Train/Batch Accuracy', batch_acc, global_step)
writer.add_scalar('Train/Learning Rate', optimizer.param_groups[0]['lr'], global_step)
if (batch_idx + 1) % 200 == 0:
for name, param in model.named_parameters():
writer.add_histogram(f'Weights/{name}', param, global_step)
if param.grad is not None:
writer.add_histogram(f'Gradients/{name}', param.grad, global_step)
if (batch_idx + 1) % 100 == 0:
print(f'Epoch: {epoch+1}/{epochs} | Batch: {batch_idx+1}/{len(train_loader)} '
f'| 单Batch损失: {iter_loss:.4f} | 累计平均损失: {running_loss/(batch_idx+1):.4f}')
global_step += 1
epoch_train_loss = running_loss / len(train_loader)
epoch_train_acc = 100. * correct / total
writer.add_scalar('Train/Epoch Loss', epoch_train_loss, epoch)
writer.add_scalar('Train/Epoch Accuracy', epoch_train_acc, epoch)
model.eval()
test_loss = 0
correct_test = 0
total_test = 0
wrong_images = []
wrong_labels = []
wrong_preds = []
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
_, predicted = output.max(1)
total_test += target.size(0)
correct_test += predicted.eq(target).sum().item()
wrong_mask = (predicted != target)
if wrong_mask.sum() > 0:
wrong_batch_images = data[wrong_mask][:8].cpu()
wrong_batch_labels = target[wrong_mask][:8].cpu()
wrong_batch_preds = predicted[wrong_mask][:8].cpu()
wrong_images.extend(wrong_batch_images)
wrong_labels.extend(wrong_batch_labels)
wrong_preds.extend(wrong_batch_preds)
epoch_test_loss = test_loss / len(test_loader)
epoch_test_acc = 100. * correct_test / total_test
writer.add_scalar('Test/Epoch Loss', epoch_test_loss, epoch)
writer.add_scalar('Test/Epoch Accuracy', epoch_test_acc, epoch)
if wrong_images:
wrong_img_grid = torchvision.utils.make_grid(wrong_images)
writer.add_image('错误预测样本', wrong_img_grid, epoch)
wrong_text = [f"真实: {classes[wl]}, 预测: {classes[wp]}"
for wl, wp in zip(wrong_labels, wrong_preds)]
writer.add_text('错误预测标签', '\n'.join(wrong_text), epoch)
scheduler.step(epoch_test_loss)
print(f'Epoch {epoch+1}/{epochs} 完成 | 训练准确率: {epoch_train_acc:.2f}% | 测试准确率: {epoch_test_acc:.2f}%')
writer.close()
plot_iter_losses(all_iter_losses, iter_indices)
return epoch_test_acc
def plot_iter_losses(losses, indices):
plt.figure(figsize=(10, 4))
plt.plot(indices, losses, 'b-', alpha=0.7, label='Iteration Loss')
plt.xlabel('Iteration(Batch序号)')
plt.ylabel('损失值')
plt.title('每个 Iteration 的训练损失')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
epochs = 20
print("开始使用CNN训练模型...")
print(f"TensorBoard 日志目录: {log_dir}")
print("训练后执行: tensorboard --logdir=runs 查看可视化")
final_accuracy = train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs, writer)
print(f"训练完成!最终测试准确率: {final_accuracy:.2f}%")
使用设备: cuda
Files already downloaded and verified
开始使用CNN训练模型...
TensorBoard 日志目录: runs/cifar10_cnn_exp
训练后执行: tensorboard --logdir=runs 查看可视化
Epoch: 1/20 | Batch: 100/782 | 单Batch损失: 1.8809 | 累计平均损失: 2.0134
Epoch: 1/20 | Batch: 200/782 | 单Batch损失: 1.7645 | 累计平均损失: 1.8838
Epoch: 1/20 | Batch: 300/782 | 单Batch损失: 1.6334 | 累计平均损失: 1.8246
Epoch: 1/20 | Batch: 400/782 | 单Batch损失: 1.6380 | 累计平均损失: 1.7784
Epoch: 1/20 | Batch: 500/782 | 单Batch损失: 1.5500 | 累计平均损失: 1.7435
Epoch: 1/20 | Batch: 600/782 | 单Batch损失: 1.5527 | 累计平均损失: 1.7107
Epoch: 1/20 | Batch: 700/782 | 单Batch损失: 1.4984 | 累计平均损失: 1.6852
Epoch 1/20 完成 | 训练准确率: 38.11% | 测试准确率: 52.47%
Epoch: 2/20 | Batch: 100/782 | 单Batch损失: 1.3814 | 累计平均损失: 1.4373
Epoch: 2/20 | Batch: 200/782 | 单Batch损失: 1.2911 | 累计平均损失: 1.3985
Epoch: 2/20 | Batch: 300/782 | 单Batch损失: 1.1904 | 累计平均损失: 1.3747
Epoch: 2/20 | Batch: 400/782 | 单Batch损失: 1.4026 | 累计平均损失: 1.3556
Epoch: 2/20 | Batch: 500/782 | 单Batch损失: 1.0859 | 累计平均损失: 1.3323
Epoch: 2/20 | Batch: 600/782 | 单Batch损失: 1.0579 | 累计平均损失: 1.3118
Epoch: 2/20 | Batch: 700/782 | 单Batch损失: 1.0614 | 累计平均损失: 1.2968
Epoch 2/20 完成 | 训练准确率: 53.28% | 测试准确率: 64.18%
Epoch: 3/20 | Batch: 100/782 | 单Batch损失: 1.2669 | 累计平均损失: 1.1595
Epoch: 3/20 | Batch: 200/782 | 单Batch损失: 1.1426 | 累计平均损失: 1.1423
Epoch: 3/20 | Batch: 300/782 | 单Batch损失: 0.9215 | 累计平均损失: 1.1318
Epoch: 3/20 | Batch: 400/782 | 单Batch损失: 0.9795 | 累计平均损失: 1.1233
Epoch: 3/20 | Batch: 500/782 | 单Batch损失: 1.2100 | 累计平均损失: 1.1204
Epoch: 3/20 | Batch: 600/782 | 单Batch损失: 1.1693 | 累计平均损失: 1.1098
Epoch: 3/20 | Batch: 700/782 | 单Batch损失: 1.0973 | 累计平均损失: 1.1007
Epoch 3/20 完成 | 训练准确率: 61.37% | 测试准确率: 67.55%
Epoch: 4/20 | Batch: 100/782 | 单Batch损失: 0.8795 | 累计平均损失: 1.0080
Epoch: 4/20 | Batch: 200/782 | 单Batch损失: 1.0070 | 累计平均损失: 1.0122
Epoch: 4/20 | Batch: 300/782 | 单Batch损失: 1.1206 | 累计平均损失: 1.0071
Epoch: 4/20 | Batch: 400/782 | 单Batch损失: 1.0918 | 累计平均损失: 1.0017
Epoch: 4/20 | Batch: 500/782 | 单Batch损失: 0.8132 | 累计平均损失: 0.9982
Epoch: 4/20 | Batch: 600/782 | 单Batch损失: 1.1464 | 累计平均损失: 0.9895
Epoch: 4/20 | Batch: 700/782 | 单Batch损失: 0.9950 | 累计平均损失: 0.9883
Epoch 4/20 完成 | 训练准确率: 65.12% | 测试准确率: 70.56%
Epoch: 5/20 | Batch: 100/782 | 单Batch损失: 0.9320 | 累计平均损失: 0.9707
Epoch: 5/20 | Batch: 200/782 | 单Batch损失: 0.9041 | 累计平均损失: 0.9490
Epoch: 5/20 | Batch: 300/782 | 单Batch损失: 0.7707 | 累计平均损失: 0.9494
Epoch: 5/20 | Batch: 400/782 | 单Batch损失: 0.8947 | 累计平均损失: 0.9423
Epoch: 5/20 | Batch: 500/782 | 单Batch损失: 0.8728 | 累计平均损失: 0.9352
Epoch: 5/20 | Batch: 600/782 | 单Batch损失: 0.9779 | 累计平均损失: 0.9290
Epoch: 5/20 | Batch: 700/782 | 单Batch损失: 0.9652 | 累计平均损失: 0.9266
Epoch 5/20 完成 | 训练准确率: 67.76% | 测试准确率: 74.09%
Epoch: 6/20 | Batch: 100/782 | 单Batch损失: 0.8804 | 累计平均损失: 0.8748
Epoch: 6/20 | Batch: 200/782 | 单Batch损失: 0.9413 | 累计平均损失: 0.8779
Epoch: 6/20 | Batch: 300/782 | 单Batch损失: 0.9451 | 累计平均损失: 0.8813
Epoch: 6/20 | Batch: 400/782 | 单Batch损失: 0.9844 | 累计平均损失: 0.8811
Epoch: 6/20 | Batch: 500/782 | 单Batch损失: 0.9123 | 累计平均损失: 0.8804
Epoch: 6/20 | Batch: 600/782 | 单Batch损失: 0.7724 | 累计平均损失: 0.8747
Epoch: 6/20 | Batch: 700/782 | 单Batch损失: 0.9191 | 累计平均损失: 0.8738
Epoch 6/20 完成 | 训练准确率: 69.24% | 测试准确率: 74.44%
Epoch: 7/20 | Batch: 100/782 | 单Batch损失: 0.4618 | 累计平均损失: 0.8522
Epoch: 7/20 | Batch: 200/782 | 单Batch损失: 1.0956 | 累计平均损失: 0.8398
Epoch: 7/20 | Batch: 300/782 | 单Batch损失: 0.7080 | 累计平均损失: 0.8442
Epoch: 7/20 | Batch: 400/782 | 单Batch损失: 0.8755 | 累计平均损失: 0.8423
Epoch: 7/20 | Batch: 500/782 | 单Batch损失: 1.0161 | 累计平均损失: 0.8451
Epoch: 7/20 | Batch: 600/782 | 单Batch损失: 0.9611 | 累计平均损失: 0.8436
Epoch: 7/20 | Batch: 700/782 | 单Batch损失: 0.9344 | 累计平均损失: 0.8433
Epoch 7/20 完成 | 训练准确率: 70.60% | 测试准确率: 75.97%
Epoch: 8/20 | Batch: 100/782 | 单Batch损失: 0.5846 | 累计平均损失: 0.7982
Epoch: 8/20 | Batch: 200/782 | 单Batch损失: 1.1336 | 累计平均损失: 0.8046
Epoch: 8/20 | Batch: 300/782 | 单Batch损失: 0.7393 | 累计平均损失: 0.8122
Epoch: 8/20 | Batch: 400/782 | 单Batch损失: 0.8892 | 累计平均损失: 0.8108
Epoch: 8/20 | Batch: 500/782 | 单Batch损失: 0.9932 | 累计平均损失: 0.8128
Epoch: 8/20 | Batch: 600/782 | 单Batch损失: 0.8610 | 累计平均损失: 0.8154
Epoch: 8/20 | Batch: 700/782 | 单Batch损失: 1.0081 | 累计平均损失: 0.8130
Epoch 8/20 完成 | 训练准确率: 71.44% | 测试准确率: 76.04%
Epoch: 9/20 | Batch: 100/782 | 单Batch损失: 0.8448 | 累计平均损失: 0.8206
Epoch: 9/20 | Batch: 200/782 | 单Batch损失: 0.6494 | 累计平均损失: 0.8086
Epoch: 9/20 | Batch: 300/782 | 单Batch损失: 0.8203 | 累计平均损失: 0.8021
Epoch: 9/20 | Batch: 400/782 | 单Batch损失: 0.6053 | 累计平均损失: 0.7929
Epoch: 9/20 | Batch: 500/782 | 单Batch损失: 0.8298 | 累计平均损失: 0.7890
Epoch: 9/20 | Batch: 600/782 | 单Batch损失: 0.9492 | 累计平均损失: 0.7873
Epoch: 9/20 | Batch: 700/782 | 单Batch损失: 0.7991 | 累计平均损失: 0.7889
Epoch 9/20 完成 | 训练准确率: 72.75% | 测试准确率: 77.43%
Epoch: 10/20 | Batch: 100/782 | 单Batch损失: 0.7773 | 累计平均损失: 0.7684
Epoch: 10/20 | Batch: 200/782 | 单Batch损失: 0.7030 | 累计平均损失: 0.7681
Epoch: 10/20 | Batch: 300/782 | 单Batch损失: 0.7726 | 累计平均损失: 0.7708
Epoch: 10/20 | Batch: 400/782 | 单Batch损失: 0.7785 | 累计平均损失: 0.7681
Epoch: 10/20 | Batch: 500/782 | 单Batch损失: 0.8096 | 累计平均损失: 0.7653
Epoch: 10/20 | Batch: 600/782 | 单Batch损失: 0.6069 | 累计平均损失: 0.7635
Epoch: 10/20 | Batch: 700/782 | 单Batch损失: 0.5608 | 累计平均损失: 0.7630
Epoch 10/20 完成 | 训练准确率: 73.20% | 测试准确率: 76.64%
Epoch: 11/20 | Batch: 100/782 | 单Batch损失: 0.7491 | 累计平均损失: 0.7709
Epoch: 11/20 | Batch: 200/782 | 单Batch损失: 0.8199 | 累计平均损失: 0.7523
Epoch: 11/20 | Batch: 300/782 | 单Batch损失: 1.0428 | 累计平均损失: 0.7427
Epoch: 11/20 | Batch: 400/782 | 单Batch损失: 0.7862 | 累计平均损失: 0.7416
Epoch: 11/20 | Batch: 500/782 | 单Batch损失: 0.7416 | 累计平均损失: 0.7450
Epoch: 11/20 | Batch: 600/782 | 单Batch损失: 0.8239 | 累计平均损失: 0.7390
Epoch: 11/20 | Batch: 700/782 | 单Batch损失: 0.5744 | 累计平均损失: 0.7427
Epoch 11/20 完成 | 训练准确率: 74.04% | 测试准确率: 77.92%
Epoch: 12/20 | Batch: 100/782 | 单Batch损失: 0.7772 | 累计平均损失: 0.7281
Epoch: 12/20 | Batch: 200/782 | 单Batch损失: 0.6939 | 累计平均损失: 0.7296
Epoch: 12/20 | Batch: 300/782 | 单Batch损失: 0.6478 | 累计平均损失: 0.7348
Epoch: 12/20 | Batch: 400/782 | 单Batch损失: 0.6809 | 累计平均损失: 0.7306
Epoch: 12/20 | Batch: 500/782 | 单Batch损失: 0.7887 | 累计平均损失: 0.7308
Epoch: 12/20 | Batch: 600/782 | 单Batch损失: 0.9312 | 累计平均损失: 0.7293
Epoch: 12/20 | Batch: 700/782 | 单Batch损失: 0.8912 | 累计平均损失: 0.7249
Epoch 12/20 完成 | 训练准确率: 74.57% | 测试准确率: 78.34%
Epoch: 13/20 | Batch: 100/782 | 单Batch损失: 0.7660 | 累计平均损失: 0.7202
Epoch: 13/20 | Batch: 200/782 | 单Batch损失: 0.8096 | 累计平均损失: 0.7066
Epoch: 13/20 | Batch: 300/782 | 单Batch损失: 0.6760 | 累计平均损失: 0.7041
Epoch: 13/20 | Batch: 400/782 | 单Batch损失: 0.9175 | 累计平均损失: 0.7036
Epoch: 13/20 | Batch: 500/782 | 单Batch损失: 0.7499 | 累计平均损失: 0.7067
Epoch: 13/20 | Batch: 600/782 | 单Batch损失: 0.5950 | 累计平均损失: 0.7061
Epoch: 13/20 | Batch: 700/782 | 单Batch损失: 0.5243 | 累计平均损失: 0.7101
Epoch 13/20 完成 | 训练准确率: 75.14% | 测试准确率: 78.82%
Epoch: 14/20 | Batch: 100/782 | 单Batch损失: 0.4825 | 累计平均损失: 0.6837
Epoch: 14/20 | Batch: 200/782 | 单Batch损失: 0.6175 | 累计平均损失: 0.6888
Epoch: 14/20 | Batch: 300/782 | 单Batch损失: 0.7952 | 累计平均损失: 0.6866
Epoch: 14/20 | Batch: 400/782 | 单Batch损失: 0.5896 | 累计平均损失: 0.6942
Epoch: 14/20 | Batch: 500/782 | 单Batch损失: 0.6090 | 累计平均损失: 0.6938
Epoch: 14/20 | Batch: 600/782 | 单Batch损失: 0.7104 | 累计平均损失: 0.6953
Epoch: 14/20 | Batch: 700/782 | 单Batch损失: 0.4085 | 累计平均损失: 0.6972
Epoch 14/20 完成 | 训练准确率: 75.52% | 测试准确率: 78.96%
Epoch: 15/20 | Batch: 100/782 | 单Batch损失: 0.6176 | 累计平均损失: 0.6590
Epoch: 15/20 | Batch: 200/782 | 单Batch损失: 0.4711 | 累计平均损失: 0.6702
Epoch: 15/20 | Batch: 300/782 | 单Batch损失: 0.6192 | 累计平均损失: 0.6718
Epoch: 15/20 | Batch: 400/782 | 单Batch损失: 0.9684 | 累计平均损失: 0.6750
Epoch: 15/20 | Batch: 500/782 | 单Batch损失: 0.5928 | 累计平均损失: 0.6773
Epoch: 15/20 | Batch: 600/782 | 单Batch损失: 0.7290 | 累计平均损失: 0.6795
Epoch: 15/20 | Batch: 700/782 | 单Batch损失: 0.6996 | 累计平均损失: 0.6792
Epoch 15/20 完成 | 训练准确率: 76.17% | 测试准确率: 79.87%
Epoch: 16/20 | Batch: 100/782 | 单Batch损失: 0.4865 | 累计平均损失: 0.6782
Epoch: 16/20 | Batch: 200/782 | 单Batch损失: 0.5581 | 累计平均损失: 0.6671
Epoch: 16/20 | Batch: 300/782 | 单Batch损失: 0.6508 | 累计平均损失: 0.6648
Epoch: 16/20 | Batch: 400/782 | 单Batch损失: 0.8125 | 累计平均损失: 0.6715
Epoch: 16/20 | Batch: 500/782 | 单Batch损失: 0.5303 | 累计平均损失: 0.6735
Epoch: 16/20 | Batch: 600/782 | 单Batch损失: 0.6881 | 累计平均损失: 0.6737
Epoch: 16/20 | Batch: 700/782 | 单Batch损失: 0.9869 | 累计平均损失: 0.6726
Epoch 16/20 完成 | 训练准确率: 76.63% | 测试准确率: 79.73%
Epoch: 17/20 | Batch: 100/782 | 单Batch损失: 0.5943 | 累计平均损失: 0.6603
Epoch: 17/20 | Batch: 200/782 | 单Batch损失: 0.8486 | 累计平均损失: 0.6590
Epoch: 17/20 | Batch: 300/782 | 单Batch损失: 0.5727 | 累计平均损失: 0.6586
Epoch: 17/20 | Batch: 400/782 | 单Batch损失: 0.6489 | 累计平均损失: 0.6592
Epoch: 17/20 | Batch: 500/782 | 单Batch损失: 0.7211 | 累计平均损失: 0.6612
Epoch: 17/20 | Batch: 600/782 | 单Batch损失: 0.5552 | 累计平均损失: 0.6615
Epoch: 17/20 | Batch: 700/782 | 单Batch损失: 0.5500 | 累计平均损失: 0.6617
Epoch 17/20 完成 | 训练准确率: 76.85% | 测试准确率: 80.07%
Epoch: 18/20 | Batch: 100/782 | 单Batch损失: 0.6643 | 累计平均损失: 0.6195
Epoch: 18/20 | Batch: 200/782 | 单Batch损失: 0.5175 | 累计平均损失: 0.6383
Epoch: 18/20 | Batch: 300/782 | 单Batch损失: 0.8941 | 累计平均损失: 0.6423
Epoch: 18/20 | Batch: 400/782 | 单Batch损失: 0.5957 | 累计平均损失: 0.6494
Epoch: 18/20 | Batch: 500/782 | 单Batch损失: 0.6997 | 累计平均损失: 0.6515
Epoch: 18/20 | Batch: 600/782 | 单Batch损失: 0.8387 | 累计平均损失: 0.6526
Epoch: 18/20 | Batch: 700/782 | 单Batch损失: 0.6168 | 累计平均损失: 0.6502
Epoch 18/20 完成 | 训练准确率: 77.20% | 测试准确率: 80.18%
Epoch: 19/20 | Batch: 100/782 | 单Batch损失: 0.5368 | 累计平均损失: 0.6308
Epoch: 19/20 | Batch: 200/782 | 单Batch损失: 0.4568 | 累计平均损失: 0.6476
Epoch: 19/20 | Batch: 300/782 | 单Batch损失: 0.4435 | 累计平均损失: 0.6396
Epoch: 19/20 | Batch: 400/782 | 单Batch损失: 0.5895 | 累计平均损失: 0.6407
Epoch: 19/20 | Batch: 500/782 | 单Batch损失: 0.9253 | 累计平均损失: 0.6424
Epoch: 19/20 | Batch: 600/782 | 单Batch损失: 0.5735 | 累计平均损失: 0.6417
Epoch: 19/20 | Batch: 700/782 | 单Batch损失: 0.6145 | 累计平均损失: 0.6418
Epoch 19: reducing learning rate of group 0 to 5.0000e-04.
Epoch 19/20 完成 | 训练准确率: 77.65% | 测试准确率: 79.72%
Epoch: 20/20 | Batch: 100/782 | 单Batch损失: 0.5315 | 累计平均损失: 0.5793
Epoch: 20/20 | Batch: 200/782 | 单Batch损失: 0.5769 | 累计平均损失: 0.5837
Epoch: 20/20 | Batch: 300/782 | 单Batch损失: 0.4930 | 累计平均损失: 0.5806
Epoch: 20/20 | Batch: 400/782 | 单Batch损失: 0.5806 | 累计平均损失: 0.5821
Epoch: 20/20 | Batch: 500/782 | 单Batch损失: 0.8352 | 累计平均损失: 0.5847
Epoch: 20/20 | Batch: 600/782 | 单Batch损失: 0.5066 | 累计平均损失: 0.5831
Epoch: 20/20 | Batch: 700/782 | 单Batch损失: 0.5890 | 累计平均损失: 0.5816
Epoch 20/20 完成 | 训练准确率: 79.72% | 测试准确率: 81.85%

训练完成!最终测试准确率: 81.85%
log_dir = "runs/cifar10_cnn_exp"
if os.path.exists(log_dir):
version = 1
while os.path.exists(f"{log_dir}_v{version}"):
version += 1
log_dir = f"{log_dir}_v{version}"
writer = SummaryWriter(log_dir)
print(f"TensorBoard 日志目录: {log_dir}")
def train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs, writer):
model.train()
global_step = 0
dataiter = iter(train_loader)
images, labels = next(dataiter)
images = images.to(device)
writer.add_graph(model, images)
img_grid = torchvision.utils.make_grid(images[:8].cpu())
writer.add_image('原始训练图像(增强前)', img_grid, global_step=0)
for epoch in range(epochs):
running_loss = 0.0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
batch_acc = 100. * correct / total
writer.add_scalar('Train/Batch Loss', loss.item(), global_step)
writer.add_scalar('Train/Batch Accuracy', batch_acc, global_step)
writer.add_scalar('Train/Learning Rate', optimizer.param_groups[0]['lr'], global_step)
if (batch_idx + 1) % 200 == 0:
for name, param in model.named_parameters():
writer.add_histogram(f'Weights/{name}', param, global_step)
if param.grad is not None:
writer.add_histogram(f'Gradients/{name}', param.grad, global_step)
global_step += 1
epoch_train_loss = running_loss / len(train_loader)
epoch_train_acc = 100. * correct / total
writer.add_scalar('Train/Epoch Loss', epoch_train_loss, epoch)
writer.add_scalar('Train/Epoch Accuracy', epoch_train_acc, epoch)
model.eval()
test_loss = 0
correct_test = 0
total_test = 0
wrong_images = []
wrong_labels = []
wrong_preds = []
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
_, predicted = output.max(1)
total_test += target.size(0)
correct_test += predicted.eq(target).sum().item()
wrong_mask = (predicted != target)
if wrong_mask.sum() > 0:
wrong_batch_images = data[wrong_mask][:8].cpu()
wrong_batch_labels = target[wrong_mask][:8].cpu()
wrong_batch_preds = predicted[wrong_mask][:8].cpu()
wrong_images.extend(wrong_batch_images)
wrong_labels.extend(wrong_batch_labels)
wrong_preds.extend(wrong_batch_preds)
epoch_test_loss = test_loss / len(test_loader)
epoch_test_acc = 100. * correct_test / total_test
writer.add_scalar('Test/Epoch Loss', epoch_test_loss, epoch)
writer.add_scalar('Test/Epoch Accuracy', epoch_test_acc, epoch)
if wrong_images:
wrong_img_grid = torchvision.utils.make_grid(wrong_images)
writer.add_image('错误预测样本', wrong_img_grid, epoch)
wrong_text = [f"真实: {classes[wl]}, 预测: {classes[wp]}"
for wl, wp in zip(wrong_labels, wrong_preds)]
writer.add_text('错误预测标签', '\n'.join(wrong_text), epoch)
scheduler.step(epoch_test_loss)
print(f'Epoch {epoch+1}/{epochs} 完成 | 测试准确率: {epoch_test_acc:.2f}%')
writer.close()
return epoch_test_acc
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
epochs = 20
print("开始使用CNN训练模型...")
print("训练后执行: tensorboard --logdir=runs 查看可视化")
final_accuracy = train(model, train_loader, test_loader, criterion, optimizer, scheduler, device, epochs, writer)
print(f"训练完成!最终测试准确率: {final_accuracy:.2f}%")
TensorBoard 日志目录: runs/cifar10_cnn_exp_v2
开始使用CNN训练模型...
训练后执行: tensorboard --logdir=runs 查看可视化
Epoch 21: reducing learning rate of group 0 to 5.0000e-04.
Epoch 1/20 完成 | 测试准确率: 75.81%
Epoch 2/20 完成 | 测试准确率: 80.88%
Epoch 3/20 完成 | 测试准确率: 80.36%
Epoch 4/20 完成 | 测试准确率: 82.32%
Epoch 5/20 完成 | 测试准确率: 80.98%
Epoch 6/20 完成 | 测试准确率: 81.43%
Epoch 7/20 完成 | 测试准确率: 81.86%
Epoch 28: reducing learning rate of group 0 to 2.5000e-04.
Epoch 8/20 完成 | 测试准确率: 81.89%
Epoch 9/20 完成 | 测试准确率: 82.69%
Epoch 10/20 完成 | 测试准确率: 83.66%
Epoch 11/20 完成 | 测试准确率: 83.29%
Epoch 12/20 完成 | 测试准确率: 82.99%
Epoch 13/20 完成 | 测试准确率: 83.11%
Epoch 34: reducing learning rate of group 0 to 1.2500e-04.
Epoch 14/20 完成 | 测试准确率: 83.58%
Epoch 15/20 完成 | 测试准确率: 83.79%
Epoch 16/20 完成 | 测试准确率: 83.88%
Epoch 17/20 完成 | 测试准确率: 83.89%
Epoch 18/20 完成 | 测试准确率: 84.08%
Epoch 19/20 完成 | 测试准确率: 84.25%
Epoch 20/20 完成 | 测试准确率: 84.21%
训练完成!最终测试准确率: 84.21%
作业:对resnet18在cifar10上采用微调策略下,用tensorboard监控训练过程。
@浙大疏锦行