FiftyOne 管理数据

发布于:2025-04-20 ⋅ 阅读:(11) ⋅ 点赞:(0)

FiftyOne 管理数据

下载安装FiftyOne

https://docs.voxel51.com/

下载 coco-2017

使用 FiftyOne 查看

import fiftyone as fo
import fiftyone.zoo as foz

# 自定义路径 - 修改这些变量以匹配你的环境
image_path = '/media/wmx/ws3/AI/data/coco2017/train2017'
annotations_path = '/media/wmx/ws3/AI/data/coco2017/annotations_trainval2017/annotations/instances_train2017.json' 

# 创建一个数据集
dataset = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path=image_path,
    labels_path=annotations_path,
    name="coco2017"
)

# 查看数据集
session = fo.launch_app(dataset, port=5151) # 或者任何你选择的端口号
session.wait()

数据集比较大,我们只加载部分,导出coco到yolo格式

import fiftyone as fo
import fiftyone.zoo as foz
import yaml
import os

# 自定义路径 - 修改这些变量以匹配你的环境
image_path = '/media/wmx/ws3/AI/data/coco2017/train2017'
annotations_path = '/media/wmx/ws3/AI/data/coco2017/annotations_trainval2017/annotations/instances_train2017.json' 

# 创建一个数据集
dataset = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path=image_path,
    labels_path=annotations_path,
    max_samples=200,  # 只加载前 200
    name="coco2017"
)

# 定义要过滤的类别
# linux 命令行 :
#     jq -r '.categories[].name' /media/wmx/ws3/AI/data/coco2017/annotations_trainval2017/annotations/instances_train2017.json | sort -u
filter_names = [
    "airplane", "apple", "backpack", "banana", "baseball bat", "baseball glove", 
    "bear", "bed", "bench", "bicycle", "bird", "boat", "book", "bottle", "bowl", 
    "broccoli", "bus", "cake", "car", "carrot", "cat", "cell phone", "chair", 
    "clock", "couch", "cow", "cup", "dining table", "dog", "donut", "elephant", 
    "fire hydrant", "fork", "frisbee", "giraffe", "hair drier", "handbag", "horse", 
    "hot dog", "keyboard", "kite", "knife", "laptop", "microwave", "motorcycle", 
    "mouse", "orange", "oven", "parking meter", "person", "pizza", "potted plant", 
    "refrigerator", "remote", "sandwich", "scissors", "sheep", "sink", "skateboard", 
    "skis", "snowboard", "spoon", "sports ball", "stop sign", "suitcase", "surfboard", 
    "teddy bear", "tennis racket", "tie", "toaster", "toilet", "toothbrush", 
    "traffic light", "train", "truck", "tv", "umbrella", "vase", "wine glass", "zebra"
]

# 过滤数据集中的样本,只保留指定类别的样本
filtered_dataset = dataset.filter_labels("detections", fo.ViewField("label").is_in(filter_names))

# 随机抽取 100 个样本
random_subset = filtered_dataset.take(100)


# 定义导出路径
export_dir = '/media/wmx/ws3/AI/data/51/coco-yolo2017'

# 导出为 YOLO 格式
try:
    random_subset.export(
        export_dir="/media/wmx/ws3/AI/data/51/coco-yolo2017",
        dataset_type=fo.types.YOLOv5Dataset,
        label_field="ground_truth"
    )
except ValueError as e:
    print(f"Export failed: {e}. Please check the 'names' field in your dataset configuration.")

print(f"数据已成功导出到 {export_dir}")

# 创建或更新 yaml 文件
yaml_file_path = os.path.join(export_dir, 'dataset.yaml')

# 创建 yaml 内容
yaml_content = {
    'names': filter_names,
    'path': export_dir,
    'train': './images/train/'
}

# 写入 yaml 文件
with open(yaml_file_path, 'w') as yaml_file:
    yaml.dump(yaml_content, yaml_file)

print(f"yaml 文件已成功创建/更新到 {yaml_file_path}")


# 查看数据集
session = fo.launch_app(random_subset, port=5151)
session.wait()

其中查看数据集中的所有类别信息
linux 命令行 :

  jq -r '.categories[].name' /media/wmx/ws3/AI/data/coco2017/annotations_trainval2017/annotations/instances_train2017.json | sort -u

在这里插入图片描述

在这里插入图片描述


网站公告

今日签到

点亮在社区的每一天
去签到