Pytorch-ResNet-50 网络表情识别项目(深度学习)

发布于:2024-06-29 ⋅ 阅读:(10) ⋅ 点赞:(0)


本项目采用的是FER-2013数据集加上博主的一些其他数据集整合的
FER-2013数据集链接如下
https://www.kaggle.com/datasets/msambare/fer2013

1. 导入依赖库

代码开始处导入了多个Python库,用于图像处理、数学运算、深度学习模型的构建和训练。

import cv2
import numpy as np
from PIL import ImageFont, ImageDraw
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.optim import Adam
import matplotlib.pyplot as plt
from PIL import Image

2. 加载中文字体文件

加载中文字体文件以便在图像上绘制中文标签。

font_path = "SourceHanSansSC-Bold.otf"
font = ImageFont.truetype(font_path, 30)

3. 设置图像尺寸和训练参数

定义了图像的目标尺寸、训练轮数和每批的样本数量。

img_size = 48
targetx = 48
targety = 48
epochs = 50   
batch_size = 64

4. 数据增强和预处理

定义了一个转换流程,包括调整图像大小、随机水平翻转和转换为张量。

transform = transforms.Compose([
    transforms.Resize((targetx, targety)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

5. 加载数据集

使用ImageFolder加载训练和测试数据集,并通过DataLoader进行批量加载。

train_dataset = datasets.ImageFolder(root="./FER-2013/train" , transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.ImageFolder(root="./FER-2013/test", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

6. 检查数据维度

定义了一个函数check_data_dimensions来检查数据加载器返回的批次中图像和标签的维度。

def check_data_dimensions(loader):
    for images, labels in loader:
        print("Batch image size:", images.shape)
        print("Batch label size:", labels.shape)
        break

7. 定义ResNet50模型

创建了一个ResNet50模型,使用预训练权重,并替换最后的全连接层以适应表情识别的类别数。

class ResNet50Model(nn.Module):
    def __init__(self, num_classes=7):
        super(ResNet50Model, self).__init__()
        self.resnet50 = models.resnet50(pretrained=True)
        num_ftrs = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Linear(num_ftrs, num_classes)
        
    def forward(self, x):
        return self.resnet50(x)

8. 初始化模型、损失函数和优化器

初始化了ResNet50模型,定义了损失函数和优化器,并根据GPU的可用性将它们移动到GPU。

model = ResNet50Model(num_classes=7)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.01)

9. 训练和测试函数

定义了训练和测试函数,用于迭代模型的训练和评估。

def train(model, train_loader, criterion, optimizer, device):
    # ...

def test(model, test_loader, criterion, device):
    # ...

10. 训练和测试模型

在指定的轮数内迭代训练和测试模型,并打印每个epoch的损失和准确率。

for epoch in range(num_epochs):
    # 训练和测试过程
    ...

11. 保存模型

训练完成后,保存ResNet50模型到文件。

torch.save(model.state_dict(), 'resnet50_final.pth')

12. 评估数据保存和可视化

将训练与测试的损失及准确率保存到.npy文件中,并使用matplotlib绘制损失和准确率图表。

np.save('train_losses.npy', train_losses)
# ...
plt.show()

原码

import cv2
import numpy as np
from PIL import ImageFont, ImageDraw
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.optim import Adam
import matplotlib.pyplot as plt
from PIL import Image
#%%
# 加载中文字体文件
font_path = "SourceHanSansSC-Bold.otf"
font = ImageFont.truetype(font_path, 30)
#%%
img_size = 48 #original size of the image
targetx = 48
targety = 48
epochs = 50   
batch_size = 64
# 数据增强和预处理
transform = transforms.Compose([
    transforms.Resize((targetx, targety)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

# 加载数据集
train_dataset = datasets.ImageFolder(root="./FER-2013/train" , transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.ImageFolder(root="./FER-2013/test", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
#%%
# 检查数据维度
def check_data_dimensions(loader):
    for images, labels in loader:
        print("Batch image size:", images.shape)  # 批次中图像的维度
        print("Batch label size:", labels.shape)  # 批次中标签的维度
        break  # 只查看第一个批次即可

# 查看训练数据和测试数据的维度
print("Training data dimensions:")
check_data_dimensions(train_loader)
print("\nTesting data dimensions:")
check_data_dimensions(test_loader)
#%%
import torch.nn as nn
import torchvision.models as models

class ResNet50Model(nn.Module):
    def __init__(self, num_classes=7):
        super(ResNet50Model, self).__init__()
        #加载预训练的ResNet-50模型
        self.resnet50 = models.resnet50(pretrained=True)
        
        #获取ResNet-50模型的最后一层全连接层的输入特征数量num_ftrs = self.resnet50.fc.in_features。
        num_ftrs = self.resnet50.fc.in_features
        #将ResNet-50模型的最后一层全连接层替换为一个新的全连接层,输出特征数量设置为num_classes
        self.resnet50.fc = nn.Linear(num_ftrs, num_classes)
        
    #forward方法定义了前向传播过程。
    # 在这个简单的类中,仅仅是调用self.resnet50(x),将输入x传递给预训练的ResNet-50模型进行前向传播。
    def forward(self, x):
        return self.resnet50(x)

# Example usage
model = ResNet50Model(num_classes=7)
print(model)
#%%
# 初始化模型、损失函数和优化器
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.0001, weight_decay=0.01)

# 如果GPU可用,移动模型和损失函数到GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
loss_fn.to(device)

# 训练模型
num_epochs = 60

train_losses = []
train_accuracies = []

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for data, targets in train_loader:
        # 将输入和标签移动到GPU(如果可用)
        data, targets = data.to(device), targets.to(device)
        # 前向传播
        outputs = model(data)
        loss = loss_fn(outputs, targets)
        # 零梯度
        optimizer.zero_grad()
        # 反向传播和优化
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # 计算准确率
        _, predicted = outputs.max(1)
        total_correct += predicted.eq(targets).sum().item()
        total_samples += targets.size(0)

    avg_loss = total_loss / len(train_loader)
    train_losses.append(avg_loss)

    # 计算准确率
    avg_accuracy = total_correct / total_samples
    train_accuracies.append(avg_accuracy)

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}')
    print(f'Epoch {epoch+1}/{num_epochs}, Accuracy: {avg_accuracy}')

# 保存最后训练完的模型
torch.save(model.state_dict(), 'resnet50_final.pth')
print("最后训练完的模型已保存!")

test_losses = []
test_accuracies = []
# 测试模型
for epoch in range(num_epochs):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for data, targets in test_loader:
        data, targets = data.to(device), targets.to(device)
        outputs = model(data)
        loss = loss_fn(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        total_loss += loss.item()

        # 计算准确率
        _, predicted = outputs.max(1)
        total_correct += predicted.eq(targets).sum().item()
        total_samples += targets.size(0)

    avg_loss = total_loss / len(test_loader)
    test_losses.append(avg_loss)

    # 计算准确率
    avg_accuracy = total_correct / total_samples
    test_accuracies.append(avg_accuracy)

    print(f'Epoch {epoch+1}/{num_epochs}, test_Loss: {avg_loss}')
    print(f'Epoch {epoch+1}/{num_epochs}, test_Accuracy: {avg_accuracy}')


# 评估数据保存到.npy文件
np.save('train_losses.npy', train_losses)
np.save('train_accuracies.npy', train_accuracies)
np.save('test_losses.npy', test_losses)
np.save('test_accuracies.npy', test_accuracies)

# 绘制损失图
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.title('Training Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# 绘制准确率图
plt.figure(figsize=(10, 5))
plt.plot(train_accuracies, label='Training Accuracy')
plt.title('Training Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

网站公告

今日签到

点亮在社区的每一天
去签到