深度学习-AlexNet-EW帮帮网

文章目录

1. 网络结构

1.1 简介

AlexNet 是 2012 年 ImageNet 竞赛冠军获得者 Hinton 和他的学生 Alex Krizhevsky 设计的，该网络在 ImageNet LSVRC-2010 竞赛中错误率分别为 37.5%(top-1)和 17.0%(top-5)。

论文地址

在 AlexNet 中主要有以下几个特点：

使用 GPU 进行训练；
使用 Relu 激活函数；
使用 LRN 局部响应归一化(这种归一化方法在以后的 CNN 中使用的越来越少，被 BatchNorm 替代)；
使用 Dropout，防止过拟合。

1.2 网络结构

	input	kernel size	padding	stride	output
conv	3x224x224	11x11	(1, 2)	4	96x55x55	relu
maxpool	96x55x55	3x3		2	96x27x27
conv	96x27x27	5x5	2	1	256x27x27	relu
maxpool	256x27x27	3x3		2	256x13x13
conv	256x13x13	3x3	1	1	384x13x13	relu
conv	384x13x13	3x3	1	1	256x13x13	relu
conv	256x13x13	3x3	1	1	256x13x13	relu
maxpool	256x13x13	3x3		2	256x6x6	flatten, dropout
fc	9216				4096	relu, dropout
fc	4096				2048	relu, dropout
fc	2048				1000

2. 代码实现(Pytorch)

使用数据集CIFAR10进行图像分类

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision
from tqdm import tqdm
import numpy as np
from PIL import Image

import matplotlib.pyplot as plt
%matplotlib inline

class AlexNet(nn.Module):
    def __init__(self, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.5),
            nn.Linear(9216, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 2048),
            nn.ReLU(inplace=True),
            # 输出修改为CIFAR10的类别10
            nn.Linear(2048, 10)
        )
        if init_weights:
            self._initialize_weights()
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                # 均值为0，方差为0.01的正态分布
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

# 定义网络模型
net = AlexNet(init_weights=True)
net

out:
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=9216, out_features=4096, bias=True)
    (3): ReLU(inplace=True)
    (4): Dropout(p=0.5, inplace=False)
    (5): Linear(in_features=4096, out_features=2048, bias=True)
    (6): ReLU(inplace=True)
    (7): Linear(in_features=2048, out_features=10, bias=True)
  )
)

# 加载数据并预处理
resize = (224, 224)
mean = (0.5, 0.5, 0.5)
std = (0.5, 0.5, 0.5)

data_transform = {
    "train": transforms.Compose([
        transforms.Resize(resize),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    "test": transforms.Compose([
        transforms.Resize(resize),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
}

train_dataset = torchvision.datasets.CIFAR10("./data", train=True, download=True, transform=data_transform["train"])
test_dataset = torchvision.datasets.CIFAR10("./data", train=False, download=True, transform=data_transform["test"])
classes = ("airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")

batch_size = 512
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=4)

optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 训练
net.to(device)
loss_fn.to(device)
num_epochs = 10

for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for step, data in enumerate(tqdm(train_dataloader, desc=f"Train Epoch: {epoch}/{num_epochs}"), start=0):
        inputs = data[0]
        labels = data[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    net.eval()
    with torch.no_grad():
        # 在测试集上的表现
        accuracy_num = 0
        for step, data in enumerate(tqdm(test_dataloader, desc=f"Test  Epoch: {epoch}/{num_epochs}"), start=0):
            inputs = data[0]
            labels = data[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = net(inputs)
            
            predict_y = torch.max(outputs, dim=1)[1].to(device)
            acc = (predict_y == labels).sum().item()
            accuracy_num += acc
    print(f"Epoch: {epoch}/{num_epochs}; loss: {np.round(running_loss / len(train_dataloader), 3)}; Acc: {np.round(accuracy_num / len(test_dataset) * 100, 2)} %")

print(torch.cuda.memory_summary(device, abbreviated=True))

# 输入一张图片测试模型
img = Image.open("/root/autodl-tmp/DogsVSCats/train/train/cat.100.jpg")
plt.imshow(img)
trans = data_transform["test"]
img = trans(img)
img = torch.unsqueeze(img, dim=0)
net.eval()
with torch.no_grad():
    img = img.to(device)
    output = net(img)
    predict = torch.max(output, dim=1)[1].item()
    print(f"label: {classes[int(predict)]}")

深度学习-AlexNet

文章目录

1. 网络结构

1.1 简介

1.2 网络结构

2. 代码实现(Pytorch)

网站公告

今日签到

热门文章

最新发布