动态调整神经网络批次-易微帮

简单示例

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# # 定义更简单的神经网络
# class SimpleNet(nn.Module):
#     def __init__(self):
#         super(SimpleNet, self).__init__()
#         self.fc1 = nn.Linear(28 * 28, 512)
#         self.fc2 = nn.Linear(512, 10)
#
#     def forward(self, x):
#         x = x.view(-1, 28 * 28)
#         x = torch.relu(self.fc1(x))
#         x = self.fc2(x)
#         return x

# 定义更复杂的神经网络
class MoreComplexNet(nn.Module):
    def __init__(self):
        super(MoreComplexNet, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x



# 获取当前可用的GPU内存
def get_free_gpu_memory():
    torch.cuda.empty_cache()  # 清空缓存以确保获取到准确的内存使用情况
    reserved_memory = torch.cuda.memory_reserved(device)
    allocated_memory = torch.cuda.memory_allocated(device)
    free_memory = reserved_memory - allocated_memory
    return free_memory

# 估算单个样本的内存占用
def estimate_sample_memory():
    dummy_input = torch.randn(1, 1, 28, 28).to(device)  # 增加通道维度
    dummy_model = MoreComplexNet().to(device)
    with torch.no_grad():
        dummy_output = dummy_model(dummy_input)
    return torch.cuda.memory_allocated(device) / (1024 ** 3)  # 单个样本的内存占用(GB)

# 根据空闲内存调整批次大小
def adjust_batch_size(base_batch_size, free_memory, sample_memory):
    batch_size = int(free_memory / sample_memory)
    return max(base_batch_size, min(batch_size, 2048))  # 限制批次大小在基础批次大小和1024之间

# 输出GPU使用情况
def print_gpu_usage():
    allocated_memory = torch.cuda.memory_allocated(device) / (1024 ** 3)  # 以GB为单位
    total_memory = torch.cuda.get_device_properties(device).total_memory / (1024 ** 3)  # 以GB为单位
    print(f"GPU memory usage: {allocated_memory:.2f} GB / {total_memory:.2f} GB ({(allocated_memory / total_memory) * 100:.2f}%)")

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 数据变换和加载
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# 创建模型、损失函数和优化器
model = MoreComplexNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 基础批次大小
base_batch_size = 32  # 从较小的批次大小开始

# 估算单个样本的内存占用
sample_memory = estimate_sample_memory()

# 模拟训练
num_epochs = 10

for epoch in range(num_epochs):
    free_memory = get_free_gpu_memory()
    batch_size = adjust_batch_size(base_batch_size, free_memory, sample_memory)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)  # 减少num_workers以减少负载

    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Batch size: {batch_size}")
    print_gpu_usage()  # 输出GPU使用情况

print("Training complete.")

1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
18.
19.
20.
21.
22.
23.
24.
25.
26.
27.
28.
29.
30.
31.
32.
33.
34.
35.
36.
37.
38.
39.
40.
41.
42.
43.
44.
45.
46.
47.
48.
49.
50.
51.
52.
53.
54.
55.
56.
57.
58.
59.
60.
61.
62.
63.
64.
65.
66.
67.
68.
69.
70.
71.
72.
73.
74.
75.
76.
77.
78.
79.
80.
81.
82.
83.
84.
85.
86.
87.
88.
89.
90.
91.
92.
93.
94.
95.
96.
97.
98.
99.
100.
101.
102.
103.
104.
105.
106.
107.
108.
109.
110.
111.
112.

这里是一个简单的示例，用于估算每批次样本所需的内存：

import torch

# 假设每个样本的大小为1MB
sample_size_mb = 1.0
batch_size = 32

# 计算每批次样本所需内存(以MB为单位)
batch_memory_mb = sample_size_mb * batch_size

print(f"每批次样本所需内存: {batch_memory_mb} MB")

以下是一个简单的示例，展示了如何使用Python的 sys.getsizeof() 函数估算一个列表中元素的平均内存占用：

import sys

# 假设这是一个样本数据集，每个样本是一个字典
sample_dataset = [
    {"image": "data/sample_image.jpg", "label": 1},
    {"image": "data/another_sample.jpg", "label": 0}
]

# 计算样本集中每个样本的平均内存占用
sample_memory = sum(sys.getsizeof(sample) for sample in sample_dataset) / len(sample_dataset)

print(f"每个样本的平均内存占用: {sample_memory} bytes")

这行代码的作用是计算列表 sample_dataset 中所有元素的内存大小总和。

具体解释如下：

for sample in sample_dataset: 这部分是一个生成器表达式(generator expression)，它会遍历 sample_dataset 中的每一个元素，并把元素依次赋值给 sample。

sys.getsizeof(sample): 对于每个 sample，这个函数会返回 sample 对象占用的内存大小(以字节为单位)。

sum(…): 这个函数会把生成器表达式中每次迭代返回的值(即每个 sample 的内存大小)相加起来，得到所有样本占用的总内存大小。

因此，整体来说，sum(sys.getsizeof(sample) for sample in sample_dataset) 的目的是计算 sample_dataset 中所有样本占用的总内存大小(单位是字节)。

要获取系统当前已经使用的GPU内存量，可以使用PyTorch提供的torch.cuda.memory_allocated()函数。这个函数返回当前分配的GPU内存量(以字节为单位)，可以帮助你监测系统中正在使用的GPU内存。

例如，可以这样使用：

import torch

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 获取当前已经分配的GPU内存量
allocated_memory = torch.cuda.memory_allocated(device)

print(f"当前已经使用的GPU内存: {allocated_memory / (1024 ** 3):.2f} GB")
这段代码会打印出当前已经使用的GPU内存量(以GB为单位)。请确保在运行这段代码之前，已经通过torch.cuda.is_available()检查GPU是否可用，并且已经初始化了PyTorch的设备。

动态调整神经网络批次

简单示例

网站公告

今日签到

热门文章

最新发布