人像视频预处理v1.1【时间裁剪+画面裁切+调整帧率】

发布于:2024-07-16 ⋅ 阅读:(132) ⋅ 点赞:(0)

优化人物淡入淡出时,人脸判断依然有效的情况:

引入sample_rate参数来控制帧率采样,减少不必要的处理。
使用min_confidence参数来过滤低置信度的人脸检测结果。
使用滑动窗口计算置信度的移动平均,以检测淡出效果。fade_threshold参数用于决定何时认为人脸真正淡出。
我们还增加了timeout参数,用于在没有人脸被检测到一定时间后停止搜索。
注意,face_recognition库的face_encodings方法返回的编码并不直接对应置信度,但我们可以利用编码的范数作为置信度的代理。这个范数通常在人脸存在时会相对较小,不存在时较大,因此可以作为置信度的反向指标。

这个版本的代码应该能够更准确地处理淡入淡出效果,并在保持较高检测灵敏度的同时,加快处理速度。不过,你可能需要根据实际情况调整sample_rate、min_confidence、fade_threshold和timeout等参数。

完整代码:

#  python data_utils/pre_video/cut_crop_fps.py

import cv2
import math
import numpy as np
import face_recognition
from moviepy.editor import VideoFileClip, concatenate_videoclips
from tqdm import tqdm

def find_host_face_location(video_path):
    """ 在视频的前几秒内检测并返回主持人面部的大致位置 """
    cap = cv2.VideoCapture(video_path)
    found_face = False
    host_face_location = None
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # 缩小帧尺寸以加快处理速度
        small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
        rgb_small_frame = small_frame[:, :, ::-1]
        
        # 检测人脸
        face_locations = face_recognition.face_locations(rgb_small_frame)
        
        if face_locations:
            # 取第一张脸的位置,假设主持人位于视频画面的中心位置附近
            host_face_location = face_locations[0]
            # 将位置放大回原始大小
            host_face_location = (host_face_location[0]*4, host_face_location[1]*4, host_face_location[2]*4, host_face_location[3]*4)
            found_face = True
            break
    
    cap.release()
    return host_face_location if found_face else None

def calculate_cropping_box(face_location, frame_shape):
    """ 根据主持人面部位置计算裁剪框 """
    top, right, bottom, left = face_location
    center_x, center_y = (left + right) // 2, (top + bottom) // 2
    half_width, half_height = 256, 256
    
    left_cropped = max(center_x - half_width, 0)
    top_cropped = max(center_y - half_height, 0)
    right_cropped = min(center_x + half_width, frame_shape[1])
    bottom_cropped = min(center_y + half_height, frame_shape[0])
    
    return (top_cropped, right_cropped, bottom_cropped, left_cropped)

def find_first_last_face(video_path, sample_rate=3, min_confidence=0.5, fade_threshold=0.5, timeout=2.0):
    """ 找到视频中第一次和最后一次出现人脸的时间戳,优化处理速度和淡出效果 """
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    first_face_time = None
    last_face_time = 0
    last_face_detected_time = 0
    timeout_counter = 0.0
    face_confidences = []
    timestamps = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # 按照sample_rate进行帧率采样
        if len(timestamps) > 0 and (len(timestamps) % sample_rate) != 0:
            continue
        
        timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000  # Convert to seconds
        
        # 缩小帧尺寸以加快处理速度
        small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
        rgb_small_frame = small_frame[:, :, ::-1]

        # 检测人脸及其置信度
        face_locations = face_recognition.face_locations(rgb_small_frame, model='hog')  # 可以选择 'cnn' 或 'hog'
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
        face_confidence_scores = [np.linalg.norm(face_encoding) for face_encoding in face_encodings]

        if face_locations:
            if not first_face_time:
                first_face_time = timestamp
            last_face_detected_time = timestamp
            timeout_counter = 0.0

            # 计算平均置信度
            avg_confidence = sum(face_confidence_scores) / len(face_confidence_scores)
            face_confidences.append(avg_confidence)
            timestamps.append(timestamp)

            # 检查是否低于置信度阈值
            if avg_confidence < min_confidence:
                face_confidences[-1] = 0

        else:
            timeout_counter += 1/fps
            face_confidences.append(0)
            timestamps.append(timestamp)
        
        if timeout_counter > timeout:
            last_face_time = last_face_detected_time
            break

    cap.release()

    # 后处理逻辑:滑动窗口检测强度
    window_size = int(timeout * fps / sample_rate)
    if window_size > 1:
        moving_averages = np.convolve(face_confidences, np.ones(window_size)/window_size, mode='valid')
        moving_timestamps = timestamps[window_size-1:]
        for i, avg in enumerate(moving_averages):
            if avg < fade_threshold and last_face_time == 0:
                last_face_time = moving_timestamps[i]
                break

    # 如果整个视频都没有检测到人脸,设置last_face_time为None
    if last_face_time == 0:
        last_face_time = None
    
    return first_face_time, last_face_time

def process_video(input_path, output_path):
    """ 处理视频,裁剪并调整帧率 """
    # 检测主持人面部位置
    host_face_location = find_host_face_location(input_path)
    if host_face_location is None:
        print(f"No face detected in video {input_path}")
        return
    
    # 读取视频,获取视频的宽度和高度
    clip = VideoFileClip(input_path)
    frame_shape = clip.size[::-1]  # 电影剪辑的尺寸是(width, height),我们需要(height, width)
    
    # 计算裁剪框
    cropping_box = calculate_cropping_box(host_face_location, frame_shape)
    
    # 找到第一次和最后一次出现人脸的时间
    first_face_time, last_face_time = find_first_last_face(input_path)
    print(f"First face time: {first_face_time}, Last face time: {last_face_time}")
    
    # 裁剪视频以保留第一次和最后一次出现人脸的部分
    start_trim = math.ceil(first_face_time) # 向上取整
    end_trim = math.floor(last_face_time) # 向下取整
    print(f"Start trim: {start_trim}, End trim: {end_trim}")
    trimmed_clip = clip.subclip(start_trim, end_trim)
        
    # 裁剪视频
    cropped_clip = trimmed_clip.crop(x1=cropping_box[3], y1=cropping_box[0], x2=cropping_box[1], y2=cropping_box[2])
    cropped_clip = cropped_clip.resize((512, 512))
    
    # 调整帧率
    cropped_clip = cropped_clip.set_fps(25)
    
    # 保存最终视频
    cropped_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
    
    # 清理资源
    cropped_clip.close()

if __name__ == "__main__":
    for i in tqdm(range(1, 76), desc="Processing videos"):
        print("处理第", i, "个视频")
        input_path = f"data/dataset/{i}/{i}.mp4"
        output_path = f"data/dataset/{i}/{i}_fcc.mp4"
        process_video(input_path, output_path)


网站公告

今日签到

点亮在社区的每一天
去签到