ImageNet数据集处理使用脚本【Python】

发布于:2025-02-11 ⋅ 阅读:(65) ⋅ 点赞:(0)

1、训练集

训练集解压之后是一千个压缩包,需要二次解压,编写程序循环解压

脚本如下,将其保存为unzip.py

运行该文件,注意修改路径

import os
import tarfile

def extract_tar_files(directory):
    """
    遍历给定目录中的所有 .tar 文件,并将它们解压到各自的子目录中。
    
    :param directory: 包含 .tar 文件的目录路径
    """
    # 切换到目标目录
    if not os.path.isdir(directory):
        print(f"Error: Directory '{directory}' does not exist.")
        return
    
    os.chdir(directory)
    
    # 获取当前目录下的所有 .tar 文件
    tar_files = [f for f in os.listdir() if f.endswith('.tar')]
    
    if not tar_files:
        print("No .tar files found in the specified directory.")
        return
    
    # 解压每个 .tar 文件
    for tar_file in tar_files:
        # 创建一个与 .tar 文件同名(去掉扩展名)的目录
        folder_name = os.path.splitext(tar_file)[0]
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)
        
        try:
            print(f"Extracting {tar_file} to {folder_name}/")
            with tarfile.open(tar_file, 'r') as tar_ref:
                tar_ref.extractall(path=folder_name)
            print(f"Finished extracting {tar_file}")
        except Exception as e:
            print(f"Failed to extract {tar_file}: {e}")


imagenet_train_dir = "train"
extract_tar_files(imagenet_train_dir)

2、验证集

验证集比较麻烦,解压之后的图片都是堆在一起的,我们需要处理为和训练集一样,每个类别在一个文件夹。

脚本如下,需要使用到标签文件

from scipy import io
import os
import shutil
 
def move_valimg(val_dir='E:/ImageNet/imagenet2012/val', devkit_dir='E:/ImageNet/imagenet2012/ILSVRC2012_devkit_t12'):
    """
    move valimg to correspongding folders.
    val_id(start from 1) -> ILSVRC_ID(start from 1) -> WIND
    organize like:
    /val
       /n01440764
           images
       /n01443537
           images
        .....
    """
    # load synset, val ground truth and val images list
    synset = io.loadmat(os.path.join(devkit_dir, 'data', 'meta.mat'))
    
    ground_truth = open(os.path.join(devkit_dir, 'data', 'ILSVRC2012_validation_ground_truth.txt'))
    lines = ground_truth.readlines()
    labels = [int(line[:-1]) for line in lines]
    
    root, _, filenames = next(os.walk(val_dir))
    for filename in filenames:
        # val image name -> ILSVRC ID -> WIND
        val_id = int(filename.split('.')[0].split('_')[-1])
        ILSVRC_ID = labels[val_id-1]
        WIND = synset['synsets'][ILSVRC_ID-1][0][1][0]
        print("val_id:%d, ILSVRC_ID:%d, WIND:%s" % (val_id, ILSVRC_ID, WIND))
 
        # move val images
        output_dir = os.path.join(root, WIND)
        if os.path.isdir(output_dir):
            pass
        else:
            os.mkdir(output_dir)
        shutil.move(os.path.join(root, filename), os.path.join(output_dir, filename))
 
if __name__ == '__main__':
    move_valimg()


网站公告

今日签到

点亮在社区的每一天
去签到