环境要求
找个带有基本cv配置的虚拟环境安装上dlib依赖的人脸检测的基础环境即可,主要是:
pip install boost dlib opencv-python
缺的按提示安装。
demo
设置好视频路径和图像保存路径,裁剪尺寸(默认256)以及裁剪帧数(默认64),可以直接运行:
import os
import random
import cv2
import dlib
from imutils.face_utils import FaceAligner, rect_to_bb
from tqdm import tqdm # 引入tqdm库
# 配置路径
dataset_path = r'D:\python_project\face-parsing\dataset' # 原始数据集路径
output_path = r'D:\python_project\face-parsing\dataset\results' # 输出路径
crop_size = 256 # 人脸裁剪后的大小
# 获取人脸对齐器
def get_face(fa, image):
detector = dlib.get_frontal_face_detector() # 获取人脸检测器
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 将图像转换为灰度图
thresh = gray.shape[0] // 4 # 设置阈值
rects = detector(gray, 2) # 检测人脸
face_aligned = None # 初始化返回的人脸图像
for rect in rects:
(x, y, w, h) = rect_to_bb(rect) # 获取人脸的坐标
if w > thresh: # 如果人脸宽度大于阈值,则认为是有效人脸
face_aligned = fa.align(image, gray, rect) # 对齐人脸
break # 只处理第一张人脸
return face_aligned
# 处理视频
def process_video(video_path, save_dir, fa):
cap = cv2.VideoCapture(video_path) # 打开视频文件
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 获取总帧数
if total_frames < 64: # 如果视频帧数少于64,跳过该视频
print(f"Warning: Video '{video_path}' has less than 64 frames. Skipping.")
cap.release() # 释放视频文件
return
start_frame = random.randint(0, total_frames - 64) # 随机选择起始帧
frames = []
for i in range(start_frame, start_frame + 64): # 提取连续的64帧
cap.set(cv2.CAP_PROP_POS_FRAMES, i) # 设置当前读取的帧数
ret, frame = cap.read() # 读取该帧
if ret:
frames.append(frame) # 保存读取到的帧
cap.release() # 释放视频文件
for i, frame in enumerate(tqdm(frames, desc=f"Processing frames from {os.path.basename(video_path)}")): # 加入进度条
face_aligned = get_face(fa, frame) # 对齐每一帧中的人脸
if face_aligned is not None:
img_name = f"{i + 1:05d}.jpg" # 给每一帧命名
save_path = os.path.join(save_dir, img_name) # 保存路径
cv2.imwrite(save_path, face_aligned) # 保存图像
else:
print(f"Face not found in frame {i + 1}") # 如果没有检测到人脸
# 主函数:处理数据集中的所有视频
def align_dlib():
predictor = dlib.shape_predictor(r"../weights/shape_predictor_68_face_landmarks.dat") # 加载预测器
fa = FaceAligner(predictor, desiredFaceWidth=crop_size) # 初始化人脸对齐器
# 遍历主目录(Training、Development、Testing)
main_dirs = ['Testing']
for main_dir in main_dirs:
main_dir_path = os.path.join(dataset_path, main_dir)
if not os.path.isdir(main_dir_path):
print(f"Skipping non-directory: {main_dir_path}")
continue
# 遍历每个子目录(Northwind、Freeform 等)
sub_dirs = os.listdir(main_dir_path)
# for sub_dir in sub_dirs:
# sub_dir_path = os.path.join(main_dir_path, sub_dir)
# if not os.path.isdir(sub_dir_path):
# print(f"Skipping non-directory: {sub_dir_path}")
# continue
# 遍历视频文件夹中的每个视频文件
video_files = os.listdir(main_dir_path)
for video_file in video_files:
video_path = os.path.join(main_dir_path, video_file)
if not os.path.isfile(video_path):
continue
# 获取视频名称(去掉文件扩展名)
video_name = os.path.splitext(video_file)[0]
# 构建保存路径: datasets/avec14/Training/Northwind/236_1_Northwind_video
save_path = os.path.join(output_path, main_dir, video_name)
os.makedirs(save_path, exist_ok=True) # 创建保存文件夹
print(f"Processing video: {video_path}")
process_video(video_path, save_path, fa) # 处理该视频
if __name__ == "__main__":
align_dlib() # 调用主函数进行处理
debug
eyesCenter在cv2.getRotationMatrix2D中一些版本要求是传入float型,直接传整型可能报错:
Traceback (most recent call last):
File “D:\python_project\face-parsing\utils\face_dect.py”, line 99, in
align_dlib() # 调用主函数进行处理
File “D:\python_project\face-parsing\utils\face_dect.py”, line 95, in align_dlib
process_video(video_path, save_path, fa) # 处理该视频
File “D:\python_project\face-parsing\utils\face_dect.py”, line 49, in process_video
face_aligned = get_face(fa, frame) # 对齐每一帧中的人脸
File “D:\python_project\face-parsing\utils\face_dect.py”, line 24, in get_face
face_aligned = fa.align(image, gray, rect) # 对齐人脸
File “C:\Users\Fine\anaconda3\envs\torch2\lib\site-packages\imutils\face_utils\facealigner.py”, line 68, in align
M = cv2.getRotationMatrix2D(eyesCenter, float(angle), float(scale))
TypeError: Can’t parse ‘center’. Sequence item with index 0 has a wrong type
将人脸对齐脚本中的eyesCenter类型转换为float即可:
# eyesCenter = ((leftEyeCenter[0] + rightEyeCenter[0]) // 2,
# (leftEyeCenter[1] + rightEyeCenter[1]) // 2)
eyesCenter = ((leftEyeCenter[0] + rightEyeCenter[0]) / 2.0,
(leftEyeCenter[1] + rightEyeCenter[1]) / 2.0)
# grab the rotation matrix for rotating and scaling the face
M = cv2.getRotationMatrix2D(eyesCenter, float(angle), float(scale))