第10周:Pytorch实现车牌识别

发布于:2025-03-23 ⋅ 阅读:(28) ⋅ 点赞:(0)

目录

前言

一、导入数据

1. 获取类别名

 2. 数据可视化

3. 标签数字化 

4. 加载数据文件 

 5. 划分数据

二、自建模型 

三、模型训练

1. 优化器与损失函数

2. 模型的训练 

四、结果分析 

五、总结


前言


一、导入数据

from torchvision.transforms import transforms
from torch.utils.data       import DataLoader
from torchvision            import datasets
import torchvision.models   as models
import torch.nn.functional  as F
import torch.nn             as nn
import torch,torchvision

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

1. 获取类别名

data_paths     = list(data_dir.glob('*'))
data_paths_str = [str(path) for path in data_paths]
data_paths_str
['沪G1CE81', '云G86LR6', '鄂U71R9F'。。。
'辽Z9C5T4', '贵NY7W90', '渝H3V0S4', '蒙A05N5Y']
data_paths     = list(data_dir.glob('*'))
data_paths_str = [str(path) for path in data_paths]
data_paths_str

['/Users/Downloads/015_licence_plate/000008250_沪G1CE81.jpg',
 '/Users/Downloads/015_licence_plate/000015082_云G86LR6.jpg',
 '/Users/Downloads/015_licence_plate/000004721_鄂U71R9F.jpg',。。。
'/Users/haorunkun/Downloads/015_licence_plate/000006515_鄂Q8S08J.jpg',
 ...]

 2. 数据可视化

plt.figure(figsize=(14,5))
plt.suptitle("数据示例",fontsize=15)

for i in range(18):
    plt.subplot(3,6,i+1)
    # plt.xticks([])
    # plt.yticks([])
    # plt.grid(False)
    
    # 显示图片
    images = plt.imread(data_paths_str[i])
    plt.imshow(images)

plt.show()

3. 标签数字化 

import numpy as np

char_enum = ["京","沪","津","渝","冀","晋","蒙","辽","吉","黑","苏","浙","皖","闽","赣","鲁",\
              "豫","鄂","湘","粤","桂","琼","川","贵","云","藏","陕","甘","青","宁","新","军","使"]

number   = [str(i) for i in range(0, 10)]    # 0 到 9 的数字
alphabet = [chr(i) for i in range(65, 91)]   # A 到 Z 的字母

char_set       = char_enum + number + alphabet
char_set_len   = len(char_set)
label_name_len = len(classeNames[0])

# 将字符串数字化
def text2vec(text):
    vector = np.zeros([label_name_len, char_set_len])
    for i, c in enumerate(text):
        idx = char_set.index(c)
        vector[i][idx] = 1.0
    return vector

all_labels = [text2vec(i) for i in classeNames]

4. 加载数据文件 

import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset
import torch.utils.data as data
from PIL import Image

class MyDataset(data.Dataset):
    def __init__(self, all_labels, data_paths_str, transform):
        self.img_labels = all_labels      # 获取标签信息
        self.img_dir    = data_paths_str  # 图像目录路径
        self.transform  = transform       # 目标转换函数

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, index):
        image    = Image.open(self.img_dir[index]).convert('RGB')#plt.imread(self.img_dir[index])  # 使用 torchvision.io.read_image 读取图像
        label    = self.img_labels[index]  # 获取图像对应的标签
        
        if self.transform:
            image = self.transform(image)
            
        return image, label  # 返回图像和标签
total_datadir = './03_traffic_sign/'

# 关于transforms.Compose的更多介绍可以参考:https://blog.csdn.net/qq_38251616/article/details/124878863
train_transforms = transforms.Compose([
    transforms.Resize([224, 224]),  # 将输入图片resize成统一尺寸
    transforms.ToTensor(),          # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
    transforms.Normalize(           # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
        mean=[0.485, 0.456, 0.406], 
        std =[0.229, 0.224, 0.225])  # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])

total_data = MyDataset(all_labels, data_paths_str, train_transforms)
total_data

 5. 划分数据

train_size = int(0.8 * len(total_data))
test_size  = len(total_data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(total_data, [train_size, test_size])
train_size,test_size

(10940, 2735)

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=16,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=16,
                                          shuffle=True)

print("The number of images in a training set is: ", len(train_loader)*16)
print("The number of images in a test set is: ", len(test_loader)*16)
print("The number of batches per epoch is: ", len(train_loader))

The number of images in a training set is:  10944
The number of images in a test set is:  2736
The number of batches per epoch is:  684 

for X, y in test_loader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break

 Shape of X [N, C, H, W]:  torch.Size([16, 3, 224, 224])
Shape of y:  torch.Size([16, 7, 69]) torch.float64


二、自建模型 

class Network_bn(nn.Module):
    def __init__(self):
        super(Network_bn, self).__init__()
        """
        nn.Conv2d()函数:
        第一个参数(in_channels)是输入的channel数量
        第二个参数(out_channels)是输出的channel数量
        第三个参数(kernel_size)是卷积核大小
        第四个参数(stride)是步长,默认为1
        第五个参数(padding)是填充大小,默认为0
        """
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=5, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(12)
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=5, stride=1, padding=0)
        self.bn2 = nn.BatchNorm2d(12)
        self.pool = nn.MaxPool2d(2,2)
        self.conv4 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=5, stride=1, padding=0)
        self.bn4 = nn.BatchNorm2d(24)
        self.conv5 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=5, stride=1, padding=0)
        self.bn5 = nn.BatchNorm2d(24)
        self.fc1 = nn.Linear(24*50*50, label_name_len*char_set_len)
        self.reshape = Reshape([label_name_len,char_set_len])

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))      
        x = F.relu(self.bn2(self.conv2(x)))     
        x = self.pool(x)                        
        x = F.relu(self.bn4(self.conv4(x)))     
        x = F.relu(self.bn5(self.conv5(x)))  
        x = self.pool(x)                        
        x = x.view(-1, 24*50*50)
        x = self.fc1(x)
        
        # 最终reshape
        x = self.reshape(x)

        return x
    
# 定义Reshape层
class Reshape(nn.Module):
    def __init__(self, shape):
        super(Reshape, self).__init__()
        self.shape = shape

    def forward(self, x):
        return x.view(x.size(0), *self.shape)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

model = Network_bn().to(device)
model

Using cuda device

Network_bn(
  (conv1): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(12, 12, kernel_size=(5, 5), stride=(1, 1))
  (bn2): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(12, 24, kernel_size=(5, 5), stride=(1, 1))
  (bn4): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1))
  (bn5): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=60000, out_features=483, bias=True)
  (reshape): Reshape()
)

import torchsummary

''' 显示网络结构 '''
torchsummary.summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 12, 220, 220]             912
       BatchNorm2d-2         [-1, 12, 220, 220]              24
            Conv2d-3         [-1, 12, 216, 216]           3,612
       BatchNorm2d-4         [-1, 12, 216, 216]              24
         MaxPool2d-5         [-1, 12, 108, 108]               0
            Conv2d-6         [-1, 24, 104, 104]           7,224
       BatchNorm2d-7         [-1, 24, 104, 104]              48
            Conv2d-8         [-1, 24, 100, 100]          14,424
       BatchNorm2d-9         [-1, 24, 100, 100]              48
        MaxPool2d-10           [-1, 24, 50, 50]               0
           Linear-11                  [-1, 483]      28,980,483
          Reshape-12                [-1, 7, 69]               0
================================================================
Total params: 29,006,799
Trainable params: 29,006,799
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 26.56
Params size (MB): 110.65
Estimated Total Size (MB): 137.79
---------------------------------------------------------------- 


三、模型训练

1. 优化器与损失函数

optimizer  = torch.optim.Adam(model.parameters(), 
                              lr=1e-4, 
                              weight_decay=0.0001)

loss_model = nn.CrossEntropyLoss()
from torch.autograd import Variable

def test(model, test_loader, loss_model):
    size = len(test_loader.dataset)
    num_batches = len(test_loader)
    
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            pred = model(X)

            test_loss += loss_model(pred, y).item()
            
    test_loss /= num_batches

    print(f"Avg loss: {test_loss:>8f} \n")
    return correct,test_loss

def train(model,train_loader,loss_model,optimizer):
    model=model.to(device)
    model.train()
    
    for i, (images, labels) in enumerate(train_loader, 0): #0是标起始位置的值。

        images = Variable(images.to(device))
        labels = Variable(labels.to(device))

        optimizer.zero_grad()
        outputs = model(images)

        loss = loss_model(outputs, labels)
        loss.backward()
        optimizer.step()

        if i % 1000 == 0:    
            print('[%5d] loss: %.3f' % (i, loss))

2. 模型的训练 

test_acc_list  = []
test_loss_list = []
epochs = 30

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(model,train_loader,loss_model,optimizer)
    test_acc,test_loss = test(model, test_loader, loss_model)
    test_acc_list.append(test_acc)
    test_loss_list.append(test_loss)
print("Done!")

 

。。。

 


四、结果分析 

import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime
current_time = datetime.now() # 获取当前时间

x = [i for i in range(1,31)]

plt.plot(x, test_loss_list, label="Loss", alpha=0.8)

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title(current_time) # 打卡请带上时间戳,否则代码截图无效

plt.legend()    
plt.show()


五、总结

在之前的案例中,我们多是使用datasets.ImageFolder函数直接导入已经分类好的数据集形成Dataset,然后使用DataLoader加载Dataset,但是这次对无法分类数据集,自定义一个MyDataset加载车牌数据集并完成车牌识别。模型的输出[-1, 7, 69],我们之前的网络结构输出都是[-1, 7][-1, 2][-1, 4]这样的二维数据,如果要求模型输出结果是多维数据,那么本案例将是很好的示例