1、训练集
训练集解压之后是一千个压缩包,需要二次解压,编写程序循环解压
脚本如下,将其保存为unzip.py
运行该文件,注意修改路径
import os
import tarfile
def extract_tar_files(directory):
"""
遍历给定目录中的所有 .tar 文件,并将它们解压到各自的子目录中。
:param directory: 包含 .tar 文件的目录路径
"""
# 切换到目标目录
if not os.path.isdir(directory):
print(f"Error: Directory '{directory}' does not exist.")
return
os.chdir(directory)
# 获取当前目录下的所有 .tar 文件
tar_files = [f for f in os.listdir() if f.endswith('.tar')]
if not tar_files:
print("No .tar files found in the specified directory.")
return
# 解压每个 .tar 文件
for tar_file in tar_files:
# 创建一个与 .tar 文件同名(去掉扩展名)的目录
folder_name = os.path.splitext(tar_file)[0]
if not os.path.exists(folder_name):
os.makedirs(folder_name)
try:
print(f"Extracting {tar_file} to {folder_name}/")
with tarfile.open(tar_file, 'r') as tar_ref:
tar_ref.extractall(path=folder_name)
print(f"Finished extracting {tar_file}")
except Exception as e:
print(f"Failed to extract {tar_file}: {e}")
imagenet_train_dir = "train"
extract_tar_files(imagenet_train_dir)
2、验证集
验证集比较麻烦,解压之后的图片都是堆在一起的,我们需要处理为和训练集一样,每个类别在一个文件夹。
脚本如下,需要使用到标签文件
from scipy import io
import os
import shutil
def move_valimg(val_dir='E:/ImageNet/imagenet2012/val', devkit_dir='E:/ImageNet/imagenet2012/ILSVRC2012_devkit_t12'):
"""
move valimg to correspongding folders.
val_id(start from 1) -> ILSVRC_ID(start from 1) -> WIND
organize like:
/val
/n01440764
images
/n01443537
images
.....
"""
# load synset, val ground truth and val images list
synset = io.loadmat(os.path.join(devkit_dir, 'data', 'meta.mat'))
ground_truth = open(os.path.join(devkit_dir, 'data', 'ILSVRC2012_validation_ground_truth.txt'))
lines = ground_truth.readlines()
labels = [int(line[:-1]) for line in lines]
root, _, filenames = next(os.walk(val_dir))
for filename in filenames:
# val image name -> ILSVRC ID -> WIND
val_id = int(filename.split('.')[0].split('_')[-1])
ILSVRC_ID = labels[val_id-1]
WIND = synset['synsets'][ILSVRC_ID-1][0][1][0]
print("val_id:%d, ILSVRC_ID:%d, WIND:%s" % (val_id, ILSVRC_ID, WIND))
# move val images
output_dir = os.path.join(root, WIND)
if os.path.isdir(output_dir):
pass
else:
os.mkdir(output_dir)
shutil.move(os.path.join(root, filename), os.path.join(output_dir, filename))
if __name__ == '__main__':
move_valimg()