Objects365数据集提取类别及标签转化为Yolo格式
一、目的
在深度学习过程中,有些类别在现有数据集中就已经有了,因此可以直接利用现有数据集进行提取目标类别。Objects365数据集包含365中类别数据。
二、代码及文件夹摆放
1.文件夹摆放
output文件夹
在train文件夹中,新建images文件夹,images文件夹中的图像实际是我将数据集train文件夹中的patch0.tar.gz解压后放入的图像。因为这个数据集太大了,只能一个一个小压缩包处理。
2.代码
按照上述文件夹摆放数据后,修改成自己的路径。
import os
import json
import shutil
def save_annotations(anno_file_path, imgs_file_path, output_dir, objects_2_my_classes):
with open(anno_file_path, 'r') as f:
data = json.load(f)
print("anno count:", len(data["annotations"]))
print("image count:", len(data["images"]))
img_map = {}
img_2_anno = {}
name_2_id = {}
images_in_dir = set(os.listdir(imgs_file_path))
for i in data["images"]:
i["file_name"] = i["file_name"].split('/')[-1]
if i["file_name"] in images_in_dir:
img_map[i["id"]] = i
img_2_anno[i["id"]] = []
name_2_id[i["file_name"]] = i["id"]
for anno in data["annotations"]:
if anno["image_id"] in img_map.keys() and anno["category_id"] in objects_2_my_classes.keys():
anno["category_id"] = objects_2_my_classes[anno["category_id"]]
img_2_anno[anno["image_id"]].append(anno)
for _, id in enumerate(img_2_anno):
annos = img_2_anno[id]
if len(annos) > 0:
img = img_map[id]
img_width = img["width"]
img_height = img["height"]
objs = []
for anno in annos:
bbox = anno["bbox"]
x = min(max(bbox[0] + bbox[2] / 2, 0) / img_width, 1)
y = min(max(bbox[1] + bbox[3] / 2, 0) / img_height, 1)
w = min(max(bbox[2] / img_width, 0), 1)
h = min(max(bbox[3] / img_height, 0), 1)
my_class_id = anno["category_id"]
obj = [my_class_id, x, y, w, h]
objs.append(obj)
shutil.copy(os.path.join(imgs_file_path, img_map[id]['file_name']), os.path.join(output_dir, "images", 'train'))
write_txt(output_dir, img_map[id]['file_name'], objs)
print("conversion is done")
def write_txt(output_dir, img_name, objs):
img_name = img_name[:-4]
list_name = os.path.join(output_dir, "labels", 'train', img_name) + ".txt"
with open(list_name, 'w', encoding='utf-8') as list_fs:
for line in objs:
line = str(int(line[0])) + " " + str(line[1])[:6] + " " + str(line[2])[:6] + " " + str(line[3])[:6] + " " + str(line[4])[:6] + "\n"
list_fs.write(line)
def main_object365(classes, input_dir, output_dir):
object_2_my_classes = {classes[i]: i for i in range(len(classes))}
anno_file_path = os.path.join(input_dir, "zhiyuan_objv2_train.json")
imgs_file_path = os.path.join(input_dir, "images")
save_annotations(anno_file_path, imgs_file_path, output_dir, object_2_my_classes)
def main():
input_dir = "D:/work/PythonCode/2023RD019_python/code/train"
classes = [84] # object365_dict.txt里面的类
output_dir = "D:/work/PythonCode/2023RD019_python/code/Object365/output"
main_object365(classes, input_dir, output_dir)
if __name__ == '__main__':
main()
三、结果
在output文件夹中,将会保存处理结果。类别会从0开始。
labelImg打开后可以查看效果如下: