文章目录
前言
代码/home/tang/RL_exa/NCO_code-main/single_objective/LCH-Regret/Regret-POMO/CVRP/POMO/CVRPTrainer.py
学习。
该代码功能是训练模型。
一、init(self, env_params, model_params, optimizer_params, trainer_params)
函数功能
该函数的功能主要是初始化训练环境及相关组件,为训练过程做好准备。
函数代码
def __init__(self,
env_params,
model_params,
optimizer_params,
trainer_params):
# save arguments 保存传入参数
self.env_params = env_params
self.model_params = model_params
self.optimizer_params = optimizer_params
self.trainer_params = trainer_params
# result folder, logger 结果文件夹和日志初始化
self.logger = getLogger(name='trainer')
self.result_folder = get_result_folder()
self.result_log = LogData()
# cuda 设置设备(CUDA/CPU)
USE_CUDA = self.trainer_params['use_cuda']
if USE_CUDA:
cuda_device_num = self.trainer_params['cuda_device_num']
torch.cuda.set_device(cuda_device_num)
device = torch.device('cuda', cuda_device_num)
torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
device = torch.device('cpu')
torch.set_default_tensor_type('torch.FloatTensor')
# Main Components 初始化主要组件
self.model = Model(**self.model_params) #初始化模型对象,将模型参数传递给 Model 类。
self.env = Env(**self.env_params) #初始化环境对象,用于生成数据和定义优化问题。
self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer'])
#使用模型的参数初始化优化器(如 Adam、SGD)。
self.scheduler = Scheduler(self.optimizer, **self.optimizer_params['scheduler'])
#初始化学习率调度器,用于动态调整优化器的学习率。
# Restore 加载预训练模型
self.start_epoch = 1
model_load = trainer_params['model_load']
if model_load['enable']:
checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load)
checkpoint = torch.load(checkpoint_fullname, map_location=device)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.start_epoch = 1 + model_load['epoch']
self.result_log.set_raw_data(checkpoint['result_log'])
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
self.scheduler.last_epoch = model_load['epoch']-1
self.logger.info('Saved Model Loaded !!')
# utility 初始化辅助工具
self.time_estimator = TimeEstimator()
二、run(self)
函数功能
run 函数是 CVRPTrainer 类的核心函数,负责执行整个训练过程。
run 函数是一个完整的训练过程控制器,负责以下操作:
- 循环执行每个训练周期,进行训练并记录日志。
- 动态调整学习率。
- 定期保存模型、图像日志和训练结果。
- 在训练结束时输出训练总结并显示最终日志。
函数代码
def run(self):
self.time_estimator.reset(self.start_epoch)
for epoch in range(self.start_epoch, self.trainer_params['epochs']+1):
self.logger.info('=================================================================')
# LR Decay
self.scheduler.step()
# Train
train_score, train_loss = self._train_one_epoch(epoch)
self.result_log.append('train_score', epoch, train_score)
self.result_log.append('train_loss', epoch, train_loss)
############################
# Logs & Checkpoint
############################
elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(epoch, self.trainer_params['epochs'])
self.logger.info("Epoch {:3d}/{:3d}: Time Est.: Elapsed[{}], Remain[{}]".format(
epoch, self.trainer_params['epochs'], elapsed_time_str, remain_time_str))
all_done = (epoch == self.trainer_params['epochs'])
model_save_interval = self.trainer_params['logging']['model_save_interval']
img_save_interval = self.trainer_params['logging']['img_save_interval']
# Save latest images, every epoch
if epoch > 1:
self.logger.info("Saving log_image")
image_prefix = '{}/latest'.format(self.result_folder)
util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'],
self.result_log, labels=['train_score'])
util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'],
self.result_log, labels=['train_loss'])
# Save Model
if all_done or (epoch % model_save_interval) == 0:
self.logger.info("Saving trained_model")
checkpoint_dict = {
'epoch': epoch,
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'scheduler_state_dict': self.scheduler.state_dict(),
'result_log': self.result_log.get_raw_data()
}
torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch))
# Save Image
if all_done or (epoch % img_save_interval) == 0:
image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch)
util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'],
self.result_log, labels=['train_score'])
util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'],
self.result_log, labels=['train_loss'])
# All-done announcement
if all_done:
self.logger.info(" *** Training Done *** ")
self.logger.info("Now, printing log array...")
util_print_log_array(self.logger, self.result_log)
三、_train_one_epoch(self, epoch)
函数功能
_train_one_epoch 函数是训练过程中的核心部分,负责执行一个训练周期(epoch)的训练过程。其主要功能是:
- 迭代多个训练回合(episodes),每个回合训练一个批次。
- 计算并更新每个批次的训练得分(score)和损失(loss)。
- 记录并输出训练的进度和统计信息,包括每个周期的得分和损失。
- 在训练的第一个周期(epoch)中,输出前 10 个批次的训练信息,以便于监控训练初期的进展。
函数代码
def _train_one_epoch(self, epoch):
score_AM = AverageMeter()
loss_AM = AverageMeter()
train_num_episode = self.trainer_params['train_episodes']
episode = 0
loop_cnt = 0
while episode < train_num_episode:
remaining = train_num_episode - episode
batch_size = min(self.trainer_params['train_batch_size'], remaining)
avg_score, avg_loss = self._train_one_batch(batch_size)
score_AM.update(avg_score, batch_size)
loss_AM.update(avg_loss, batch_size)
episode += batch_size
# Log First 10 Batch, only at the first epoch
if epoch == self.start_epoch:
loop_cnt += 1
if loop_cnt <= 10:
self.logger.info('Epoch {:3d}: Train {:3d}/{:3d}({:1.1f}%) Score: {:.4f}, Loss: {:.4f}'
.format(epoch, episode, train_num_episode, 100. * episode / train_num_episode,
score_AM.avg, loss_AM.avg))
# Log Once, for each epoch
self.logger.info('Epoch {:3d}: Train ({:3.0f}%) Score: {:.4f}, Loss: {:.4f}'
.format(epoch, 100. * episode / train_num_episode,
score_AM.avg, loss_AM.avg))
return score_AM.avg, loss_AM.avg
问题
批次与回合
- 一个回合包含多个批次。如果数据集的大小为 N,而每个批次包含 B 个样本,那么每个回合会有 N / B 个批次。
- 训练过程中的批次更新:在一个回合中,模型会在每个批次上进行一次训练,逐步更新模型的权重。每完成一个批次,模型的权重就会更新一次。
四、_train_one_batch(self, batch_size)
函数功能
该函数的目标是通过每个批次的训练来不断优化模型的表现,最终提高模型的得分并减少损失。
函数代码
def _train_one_batch(self, batch_size):
# Prep
###############################################
self.model.train()
self.env.load_problems(batch_size)
reset_state, _, _ = self.env.reset()
self.model.pre_forward(reset_state)
prob_list = torch.zeros(size=(batch_size, self.env.pomo_size, 0))
# shape: (batch, pomo, 0~problem)
# POMO Rollout
###############################################
state, reward, done = self.env.pre_step()
while not done:
selected, prob = self.model(state)
# shape: (batch, pomo)
state, reward, done = self.env.step(selected)
prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2)
# Loss
###############################################
advantage = reward - reward.float().mean(dim=1, keepdims=True)
# shape: (batch, pomo)
prob_list = prob_list.log()
regret_weight = 1
log_prob = torch.zeros((self.env.batch_size, self.env.pomo_size))
for t in range(prob_list.shape[2]):
regret_index=(self.env.regret_mask_matrix[:,:,t].unsqueeze(2)).nonzero()
add_index=(self.env.add_mask_matrix[:,:,t].unsqueeze(2)).nonzero()
log_prob[add_index[:, 0], add_index[:, 1]] = log_prob[add_index[:, 0], add_index[:, 1]].clone()+prob_list[add_index[:,0],add_index[:,1],t]
log_prob[regret_index[:, 0], regret_index[:, 1]] = log_prob[regret_index[:, 0], regret_index[:, 1]].clone() + regret_weight*prob_list[regret_index[:, 0], regret_index[:, 1], t]
loss = -advantage * log_prob # Minus Sign: To Increase REWARD
# shape: (batch, pomo)
loss_mean = loss.mean()
# Score
###############################################
max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo
score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value
# Step & Return
###############################################
self.model.zero_grad()
loss_mean.backward()
self.optimizer.step()
return score_mean.item(), loss_mean.item()
附录
代码(全)
import torch
from logging import getLogger
from CVRPEnv import CVRPEnv as Env
from CVRPModel import CVRPModel as Model
from torch.optim import Adam as Optimizer
from torch.optim.lr_scheduler import MultiStepLR as Scheduler
from utils.utils import *
class CVRPTrainer:
def __init__(self,
env_params,
model_params,
optimizer_params,
trainer_params):
# save arguments 保存传入参数
self.env_params = env_params
self.model_params = model_params
self.optimizer_params = optimizer_params
self.trainer_params = trainer_params
# result folder, logger 结果文件夹和日志初始化
self.logger = getLogger(name='trainer')
self.result_folder = get_result_folder()
self.result_log = LogData()
# cuda 设置设备(CUDA/CPU)
USE_CUDA = self.trainer_params['use_cuda']
if USE_CUDA:
cuda_device_num = self.trainer_params['cuda_device_num']
torch.cuda.set_device(cuda_device_num)
device = torch.device('cuda', cuda_device_num)
torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
device = torch.device('cpu')
torch.set_default_tensor_type('torch.FloatTensor')
# Main Components 初始化主要组件
self.model = Model(**self.model_params) #初始化模型对象,将模型参数传递给 Model 类。
self.env = Env(**self.env_params) #初始化环境对象,用于生成数据和定义优化问题。
self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer'])
#使用模型的参数初始化优化器(如 Adam、SGD)。
self.scheduler = Scheduler(self.optimizer, **self.optimizer_params['scheduler'])
#初始化学习率调度器,用于动态调整优化器的学习率。
# Restore 加载预训练模型
self.start_epoch = 1
model_load = trainer_params['model_load']
if model_load['enable']:
checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load)
checkpoint = torch.load(checkpoint_fullname, map_location=device)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.start_epoch = 1 + model_load['epoch']
self.result_log.set_raw_data(checkpoint['result_log'])
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
self.scheduler.last_epoch = model_load['epoch']-1
self.logger.info('Saved Model Loaded !!')
# utility 初始化辅助工具
self.time_estimator = TimeEstimator()
def run(self):
self.time_estimator.reset(self.start_epoch)
for epoch in range(self.start_epoch, self.trainer_params['epochs']+1):
self.logger.info('=================================================================')
# LR Decay
self.scheduler.step()
# Train
train_score, train_loss = self._train_one_epoch(epoch)
self.result_log.append('train_score', epoch, train_score)
self.result_log.append('train_loss', epoch, train_loss)
############################
# Logs & Checkpoint
############################
elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(epoch, self.trainer_params['epochs'])
self.logger.info("Epoch {:3d}/{:3d}: Time Est.: Elapsed[{}], Remain[{}]".format(
epoch, self.trainer_params['epochs'], elapsed_time_str, remain_time_str))
all_done = (epoch == self.trainer_params['epochs'])
model_save_interval = self.trainer_params['logging']['model_save_interval']
img_save_interval = self.trainer_params['logging']['img_save_interval']
# Save latest images, every epoch
if epoch > 1:
self.logger.info("Saving log_image")
image_prefix = '{}/latest'.format(self.result_folder)
util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'],
self.result_log, labels=['train_score'])
util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'],
self.result_log, labels=['train_loss'])
# Save Model
if all_done or (epoch % model_save_interval) == 0:
self.logger.info("Saving trained_model")
checkpoint_dict = {
'epoch': epoch,
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'scheduler_state_dict': self.scheduler.state_dict(),
'result_log': self.result_log.get_raw_data()
}
torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch))
# Save Image
if all_done or (epoch % img_save_interval) == 0:
image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch)
util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'],
self.result_log, labels=['train_score'])
util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'],
self.result_log, labels=['train_loss'])
# All-done announcement
if all_done:
self.logger.info(" *** Training Done *** ")
self.logger.info("Now, printing log array...")
util_print_log_array(self.logger, self.result_log)
def _train_one_epoch(self, epoch):
score_AM = AverageMeter()
loss_AM = AverageMeter()
train_num_episode = self.trainer_params['train_episodes']
episode = 0
loop_cnt = 0
while episode < train_num_episode:
remaining = train_num_episode - episode
batch_size = min(self.trainer_params['train_batch_size'], remaining)
avg_score, avg_loss = self._train_one_batch(batch_size)
score_AM.update(avg_score, batch_size)
loss_AM.update(avg_loss, batch_size)
episode += batch_size
# Log First 10 Batch, only at the first epoch
if epoch == self.start_epoch:
loop_cnt += 1
if loop_cnt <= 10:
self.logger.info('Epoch {:3d}: Train {:3d}/{:3d}({:1.1f}%) Score: {:.4f}, Loss: {:.4f}'
.format(epoch, episode, train_num_episode, 100. * episode / train_num_episode,
score_AM.avg, loss_AM.avg))
# Log Once, for each epoch
self.logger.info('Epoch {:3d}: Train ({:3.0f}%) Score: {:.4f}, Loss: {:.4f}'
.format(epoch, 100. * episode / train_num_episode,
score_AM.avg, loss_AM.avg))
return score_AM.avg, loss_AM.avg
def _train_one_batch(self, batch_size):
# Prep
###############################################
self.model.train()
self.env.load_problems(batch_size)
reset_state, _, _ = self.env.reset()
self.model.pre_forward(reset_state)
prob_list = torch.zeros(size=(batch_size, self.env.pomo_size, 0))
# shape: (batch, pomo, 0~problem)
# POMO Rollout
###############################################
state, reward, done = self.env.pre_step()
while not done:
selected, prob = self.model(state)
# shape: (batch, pomo)
state, reward, done = self.env.step(selected)
prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2)
# Loss
###############################################
advantage = reward - reward.float().mean(dim=1, keepdims=True)
# shape: (batch, pomo)
prob_list = prob_list.log()
regret_weight = 1
log_prob = torch.zeros((self.env.batch_size, self.env.pomo_size))
for t in range(prob_list.shape[2]):
regret_index=(self.env.regret_mask_matrix[:,:,t].unsqueeze(2)).nonzero()
add_index=(self.env.add_mask_matrix[:,:,t].unsqueeze(2)).nonzero()
log_prob[add_index[:, 0], add_index[:, 1]] = log_prob[add_index[:, 0], add_index[:, 1]].clone()+prob_list[add_index[:,0],add_index[:,1],t]
log_prob[regret_index[:, 0], regret_index[:, 1]] = log_prob[regret_index[:, 0], regret_index[:, 1]].clone() + regret_weight*prob_list[regret_index[:, 0], regret_index[:, 1], t]
loss = -advantage * log_prob # Minus Sign: To Increase REWARD
# shape: (batch, pomo)
loss_mean = loss.mean()
# Score
###############################################
max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo
score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value
# Step & Return
###############################################
self.model.zero_grad()
loss_mean.backward()
self.optimizer.step()
return score_mean.item(), loss_mean.item()