FiftyOne 管理数据
下载安装FiftyOne
下载 coco-2017
使用 FiftyOne 查看
import fiftyone as fo
import fiftyone.zoo as foz
# 自定义路径 - 修改这些变量以匹配你的环境
image_path = '/media/wmx/ws3/AI/data/coco2017/train2017'
annotations_path = '/media/wmx/ws3/AI/data/coco2017/annotations_trainval2017/annotations/instances_train2017.json'
# 创建一个数据集
dataset = fo.Dataset.from_dir(
dataset_type=fo.types.COCODetectionDataset,
data_path=image_path,
labels_path=annotations_path,
name="coco2017"
)
# 查看数据集
session = fo.launch_app(dataset, port=5151) # 或者任何你选择的端口号
session.wait()
数据集比较大,我们只加载部分,导出coco到yolo格式
import fiftyone as fo
import fiftyone.zoo as foz
import yaml
import os
# 自定义路径 - 修改这些变量以匹配你的环境
image_path = '/media/wmx/ws3/AI/data/coco2017/train2017'
annotations_path = '/media/wmx/ws3/AI/data/coco2017/annotations_trainval2017/annotations/instances_train2017.json'
# 创建一个数据集
dataset = fo.Dataset.from_dir(
dataset_type=fo.types.COCODetectionDataset,
data_path=image_path,
labels_path=annotations_path,
max_samples=200, # 只加载前 200
name="coco2017"
)
# 定义要过滤的类别
# linux 命令行 :
# jq -r '.categories[].name' /media/wmx/ws3/AI/data/coco2017/annotations_trainval2017/annotations/instances_train2017.json | sort -u
filter_names = [
"airplane", "apple", "backpack", "banana", "baseball bat", "baseball glove",
"bear", "bed", "bench", "bicycle", "bird", "boat", "book", "bottle", "bowl",
"broccoli", "bus", "cake", "car", "carrot", "cat", "cell phone", "chair",
"clock", "couch", "cow", "cup", "dining table", "dog", "donut", "elephant",
"fire hydrant", "fork", "frisbee", "giraffe", "hair drier", "handbag", "horse",
"hot dog", "keyboard", "kite", "knife", "laptop", "microwave", "motorcycle",
"mouse", "orange", "oven", "parking meter", "person", "pizza", "potted plant",
"refrigerator", "remote", "sandwich", "scissors", "sheep", "sink", "skateboard",
"skis", "snowboard", "spoon", "sports ball", "stop sign", "suitcase", "surfboard",
"teddy bear", "tennis racket", "tie", "toaster", "toilet", "toothbrush",
"traffic light", "train", "truck", "tv", "umbrella", "vase", "wine glass", "zebra"
]
# 过滤数据集中的样本,只保留指定类别的样本
filtered_dataset = dataset.filter_labels("detections", fo.ViewField("label").is_in(filter_names))
# 随机抽取 100 个样本
random_subset = filtered_dataset.take(100)
# 定义导出路径
export_dir = '/media/wmx/ws3/AI/data/51/coco-yolo2017'
# 导出为 YOLO 格式
try:
random_subset.export(
export_dir="/media/wmx/ws3/AI/data/51/coco-yolo2017",
dataset_type=fo.types.YOLOv5Dataset,
label_field="ground_truth"
)
except ValueError as e:
print(f"Export failed: {e}. Please check the 'names' field in your dataset configuration.")
print(f"数据已成功导出到 {export_dir}")
# 创建或更新 yaml 文件
yaml_file_path = os.path.join(export_dir, 'dataset.yaml')
# 创建 yaml 内容
yaml_content = {
'names': filter_names,
'path': export_dir,
'train': './images/train/'
}
# 写入 yaml 文件
with open(yaml_file_path, 'w') as yaml_file:
yaml.dump(yaml_content, yaml_file)
print(f"yaml 文件已成功创建/更新到 {yaml_file_path}")
# 查看数据集
session = fo.launch_app(random_subset, port=5151)
session.wait()
其中查看数据集中的所有类别信息
linux 命令行 :
jq -r '.categories[].name' /media/wmx/ws3/AI/data/coco2017/annotations_trainval2017/annotations/instances_train2017.json | sort -u