FFmpeg入门：最简单的音视频播放器-EW帮帮网

FFmpeg入门：最简单的音视频播放器

前两章，我们已经了解了分别如何构建一个简单和音频播放器和视频播放器。
FFmpeg入门：最简单的音频播放器
 FFmpeg入门：最简单的视频播放器

本章我们将结合上述两章的知识，看看如何融合成一个完整的音视频播放器，跟上我的节奏，本章将是咱们后续完成一个完整的音视频播放器的起点。

整体流程图

话不多说，先上图
在这里插入图片描述
这个图似乎有点复杂了，这里我会分别将每个模块拿出来讲述，方便大家一步一步分析整个流程。

第一步：初始化

在这里插入图片描述
我们首先关注整个流程图的最上面一部分，这部分其实和之前的流程一样，主要就是将做一些前置的初始化工作：

1：打开文件，获取文件上下文
2：找到对应的音频/视频流，获取到Codec上下文
3：打开解码器
4：分配输出空间缓存，用于后续存储解码的输出数据
5：音频/视频帧的格式转化上下文
6：初始化SDL组件，主要是视频的播放窗口和音频播放器

代码（省略了部分校验和参数初始化，方便阅读，原码见文章末尾）：

	/** 初始化函数 */
	init_video_state(&video_state);
	audio_param = video_state->audioParam;
	video_param = video_state->videoParam;
	
	avformat_network_init();
	// 1. 打开视频文件，获取格式上下文
	if(avformat_open_input(&video_state->formatCtx, argv[1], NULL, NULL)!=0){
		printf("Couldn't open input stream.\n");
		return -1;
	}
	
	// 2. 对文件探测流信息
	if(avformat_find_stream_info(video_state->formatCtx, NULL) < 0){
		printf("Couldn't find stream information.\n");
		return -1;
	}
	
	// 3. 找到对应的 音频流/视频流 索引
	video_state->audioStream=-1;
	video_state->videoStream=-1;
	for(int i=0; i < video_state->formatCtx->nb_streams; i++) {
		if(video_state->formatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_AUDIO){
			video_state->audioStream=i;
		}
		if (video_state->formatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_VIDEO) {
			video_state->videoStream=i;
		}
	}
	
	// 4. 将 音频流/视频流 编码参数写入上下文
	AVCodecParameters* aCodecParam = video_state->formatCtx->streams[video_state->audioStream]->codecpar;
	avcodec_parameters_to_context(video_state->aCodecCtx, aCodecParam);
	
	AVCodecParameters* vCodecParam = video_state->formatCtx->streams[video_state->videoStream]->codecpar;
	avcodec_parameters_to_context(video_state->vCodecCtx, vCodecParam);
	
	// 5. 查找流的编码器
	video_state->aCodec = avcodec_find_decoder(video_state->aCodecCtx->codec_id);
	video_state->vCodec = avcodec_find_decoder(video_state->vCodecCtx->codec_id);
	
	// 6. 打开流的编解码器
	if(avcodec_open2(video_state->aCodecCtx, video_state->aCodec, NULL)<0){
		printf("Could not open audio codec.\n");
		return -1;
	}
	if(avcodec_open2(video_state->vCodecCtx, video_state->vCodec, NULL)<0){
		printf("Could not open video codec.\n");
		return -1;
	}
	
	/** 音频输出信息构建 */
	audio_output_set(video_state);
	/** 视频输出信息构建 */
	video_output_set(video_state);
	
	
	//	SDL 初始化
#if USE_SDL
	if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
		printf( "Could not initialize SDL - %s\n", SDL_GetError());
		return -1;
	}
	
	/** 初始化音频SDL设备 */
	SDL_AudioSpec wanted_spec;
	wanted_spec.freq = audio_param->out_sample_rate;					// 采样率
	wanted_spec.format = AUDIO_S16SYS;									// 采样格式 16bit
	wanted_spec.channels = audio_param->out_channels;					// 通道数
	wanted_spec.silence = 0;
	wanted_spec.samples = audio_param->out_nb_samples;					// 单帧处理的采样点
	wanted_spec.callback = fill_audio;									// 回调函数
	wanted_spec.userdata = video_state->aCodecCtx;						// 回调函数的参数
	
	/** 初始化视频SDL设备 */
	SDL_Window*       window = NULL;
	SDL_Renderer*     renderer = NULL;
	SDL_Texture*      texture= NULL;
	/** 窗口 */
	window = SDL_CreateWindow("SDL2 window",
							  SDL_WINDOWPOS_CENTERED,
							  SDL_WINDOWPOS_CENTERED,
							  video_state->vCodecCtx->width,
							  video_state->vCodecCtx->height,
							  SDL_WINDOW_SHOWN);
	/** 渲染 */
	renderer = SDL_CreateRenderer(window,
								  -1,
								  SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
	/** 纹理 */
	texture = SDL_CreateTexture(renderer,
								SDL_PIXELFORMAT_YV12,
								SDL_TEXTUREACCESS_STREAMING,
								video_state->vCodecCtx->width,
								video_state->vCodecCtx->height);
	
	// 打开音频播放器
	if (SDL_OpenAudio(&wanted_spec, NULL)<0) {
		printf("can't open audio.\n");
		return -1;
	}
	
#endif
	
	// 音频上下文格式转换
	swr_alloc_set_opts2(&video_state->swrCtx,
						&audio_param->out_channel_layout,			// 输出layout
						audio_param->out_sample_fmt,				// 输出格式
						audio_param->out_sample_rate,				// 输出采样率
						&video_state->aCodecCtx->ch_layout,			// 输入layout
						video_state->aCodecCtx->sample_fmt,			// 输入格式
						video_state->aCodecCtx->sample_rate,		// 输入采样率
						0, NULL);
	swr_init(video_state->swrCtx);
	
	// 视频上下文格式转换
	video_state->swsCtx = sws_getContext(video_state->vCodecCtx->width,			// src 宽
										 video_state->vCodecCtx->height,		// src 高
										 video_state->vCodecCtx->pix_fmt,		// src 格式
										 video_param->width,					// dst 宽
										 video_param->height,					// dst 高
										 video_param->pix_fmt,					// dst 格式
										 SWS_BILINEAR,
										 NULL,NULL,NULL);
	
	// 开始播放
	SDL_PauseAudio(0);

第二步：packet队列写入

做完准备工作之后，我们就将源文件中的输入packet都取出来，放入到对应的音频packet队列和视频packet队列中，方便后续使用。然后然后分别启动音频解码进程和视频解码进程同时进行解码。

读出packet，判断packet类型

根据类型放入音频和视频packet队列

创建解码进程

在这里插入图片描述

// 循环1: 从文件中读取packet
	while(av_read_frame(video_state->formatCtx, packet)>=0){
		/** 写入音频pkt队列 */
		if(packet->stream_index==video_state->audioStream){
			packet_queue_push(video_state->aQueue, packet);
		}
		/** 写入视频pkt队列 */
		if (packet->stream_index==video_state->videoStream) {
			packet_queue_push(video_state->vQueue, packet);
		}
		av_packet_unref(packet);
		SDL_PollEvent(&event);
		switch(event.type) {
			case SDL_QUIT:
				SDL_Quit();
				exit(0);
				break;
			default:
				break;
		}
	}
	printf("audio queue.size=%d\n", video_state->aQueue->size);
	
	// 创建一个线程并启动
	SDL_CreateThread(audio_thread, "audio_thread", video_state);
	SDL_CreateThread(video_thread, "video_thread", video_state);

第三步：音频解码+播放

接下来会分别讲一下音频和视频解码进程，这两个进程是同时开始的。

首先音频解码进程的步骤可以参考之前的音频播放器文章。我简单说一下步骤

从音频packet队列中取出packet

对packet进行解码得到frame

按音频输出格式进行swr_convert转换得到输出值，并写入buffer。

SDL音频播放器通过回调函数从buffer不断读取数据播放。

在这里插入图片描述
代码如下：

/**
 音频线程
 */
int audio_thread(void *arg) {
	/**
	 1. 从packet_queue队列中取出packet
	 2. 将packet进行解码
	 3. 写入到sdl的缓冲区中
	 */ 	VideoState* video_state = (VideoState*) arg;
	AudioParam* audio_param = video_state->audioParam;
	PacketQueue* queue = video_state->aQueue;
	audio_param->index = 0;
	
	AVRational time_base = video_state->formatCtx->streams[video_state->audioStream]->time_base;
	int64_t av_start_time = av_gettime();								// 播放开始时间戳
	
	AVPacket 	packet;
	int 		ret;
	AVFrame* 	pFrame = av_frame_alloc();
	for(;;) {
		if (queue->size > 0) {
			
			packet_queue_pop(queue, &packet);
			// 将packet写入编解码器
			ret = avcodec_send_packet(video_state->aCodecCtx, &packet);
			
			// 获取解码后的帧
			while (!avcodec_receive_frame(video_state->aCodecCtx, pFrame)) {
				// 格式转化
				swr_convert(video_state->swrCtx, &audio_param->out_buffer, audio_param->out_buffer_size,
							(const uint8_t **)pFrame->data, pFrame->nb_samples);
				audio_param->index++;
				printf("第%d帧 | pts:%lld | 帧大小(采样点):%d | 实际播放点%.2fs | 预期播放点%.2fs\n",
					   audio_param->index,
					   packet.pts,
					   packet.size,
					   (double)(av_gettime() - av_start_time)/AV_TIME_BASE,
					   pFrame->pts * av_q2d(time_base));
				
#if USE_SDL
				// 设置读取的音频数据
				audio_info.audio_len = audio_param->out_buffer_size;
				audio_info.audio_pos = (Uint8 *) audio_param->out_buffer;
				// 等待SDL播放完成
				while(audio_info.audio_len > 0)
					SDL_Delay(0.5);
#endif
			}
			av_packet_unref(&packet);
		}
		else {
			break;
		}
	}
	
	av_frame_free(&pFrame);
	// 结束
	video_state->isEnd = 1;
	return 0;
}

第四步：视频解码（子线程）+播放（主线程）

说视频的解码和播放之前，先提一点：SDL的主窗口操作是需要在主线程中进行的。因此我们不能再解码子线程中直接渲染SDL窗口，否则会造成内存泄漏。知道这个知识之后，更能理解接下来的流程分析。

我们视频解码播放拆成两个部分：解码+播放

第一部分：解码子线程，在子线程中完成解码，通过标识符的方式通知到主线程帧已更新，并渲染出来。
第二部分：主线程播放，循环监听子线程的通知标识，并更新窗口帧进行显示。

在这里插入图片描述
代码如下：

视频解码子线程

/**
 视频线程
 */
int video_thread(void *arg) {
	/**
	 1. 从视频pkt队列中读出packet
	 2. 送入解码器解码并取出
	 3. 使用SDL进行渲染
	 4. 根据pts计算延迟SDL_DELAY
	 */
	VideoState* 	video_state = (VideoState*) arg;
	PacketQueue* 	video_queue = video_state->vQueue;
	AVCodecContext* pCodecCtx = video_state->vCodecCtx;
	AVFrame* 		out_frame = video_state->videoParam->out_frame;
	
	AVPacket packet;
	AVFrame* pFrame = av_frame_alloc();
	
	AVRational time_base = video_state->formatCtx->streams[video_state->videoStream]->time_base;
	int64_t av_start_time = av_gettime();							// 开始播放时间(ms*1000)
	int64_t frame_delay = av_q2d(time_base) * AV_TIME_BASE;		// pts单位(ms*1000)
	
	int64_t frame_start_time = av_gettime();
	
	for (;;) {
		if (video_queue->size > 0) {
			packet_queue_pop(video_queue, &packet);
			// 将packet写入编解码器
			int ret = avcodec_send_packet(pCodecCtx, &packet);
			// 从解码器中取出原始帧
			while (!avcodec_receive_frame(pCodecCtx, pFrame)) {
				// 帧格式转化，转为YUV420P
				sws_scale(video_state->swsCtx,						// sws_context转换
						  (uint8_t const * const *)pFrame->data,	// 输入 data
						  pFrame->linesize,							// 输入 每行数据的大小（对齐）
						  0,										// 输入 Y轴位置
						  pCodecCtx->height,						// 输入 height
						  out_frame->data,							// 输出 data
						  out_frame->linesize);						// 输出 linesize
				
				// 帧更新
				video_state->videoParam->frame_update = 1;
				
				// 计算延迟
				int64_t pts = pFrame->pts;											// pts
				int64_t actual_playback_time = av_start_time + pts * frame_delay;	// 实际播放时间
				int64_t current_time = av_gettime();
				if (actual_playback_time > current_time) {
					SDL_Delay((Uint32)(actual_playback_time-current_time)/1000);	// 延迟当前时间和实际播放时间
				}
				
				video_state->videoParam->index++;
				printf("第%i帧 | 属于%s | pts为%d | 时长为%.2fms | 实际播放点为%.2fs | 预期播放点为%.2fs\n ",
					   video_state->videoParam->index,
					   get_frame_type(pFrame),
					   (int)pFrame->pts,
					   (double)(av_gettime() - frame_start_time)/1000,
					   (double)(av_gettime() - av_start_time)/AV_TIME_BASE,
					   pFrame->pts * av_q2d(time_base));
				
				frame_start_time = av_gettime();
			}
			
			av_packet_unref(&packet);
		} else {
			break;
		}
	}
	
	av_frame_free(&pFrame);
	// 结束
	video_state->isEnd = 1;
	return 1;
}

渲染主线程

while (!video_state->isEnd) {
		// 处理事件（必须由主线程执行）
		while (SDL_PollEvent(&event)) {
			if (event.type == SDL_QUIT) {
				video_state->isEnd = 1;
			}
		}
		
		if (video_state->videoParam->frame_update) {
			// 将AVFrame的数据写入到texture中，然后渲染后windows上
			rect.x = 0;
			rect.y = 0;
			rect.w = video_state->vCodecCtx->width;
			rect.h = video_state->vCodecCtx->height;
			
			out_frame = video_state->videoParam->out_frame;
			
			// 更新纹理
			SDL_UpdateYUVTexture(texture, &rect,
								 out_frame->data[0], out_frame->linesize[0],	// 	Y
								 out_frame->data[1], out_frame->linesize[1],	// 	U
								 out_frame->data[2], out_frame->linesize[2]);	//  V
			// 渲染页面
			SDL_RenderClear(renderer);
			SDL_RenderCopy(renderer, texture, NULL, NULL);
			SDL_RenderPresent(renderer);
			
			// 重置标志
			video_state->videoParam->frame_update = 0;
		}
	}

完整代码

sample_player.h

//
//  sample_player.h
//  learning
//
//  Created by chenhuaiyi on 2025/2/26.
//

#ifndef sample_player_h
#define sample_player_h

#include <stdio.h>
// ffmpeg
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
#include "libavutil/time.h"
#include "libavutil/fifo.h"
#include "libavutil/channel_layout.h"
// SDL
#include "SDL.h"
#include "SDL_thread.h"

/**
 宏定义
 */
#define USE_SDL 1

typedef struct MyAVPacketList {
	AVPacket 		*pkt;
	int 			serial;
} MyAVPacketList;

/**
 packet队列
 */
typedef struct PacketQueue {
	AVFifo* 		pkt_list;		// fifo队列
	int 			size;			// 队列大小
	
	SDL_mutex*		mutex;			// 互斥信号量
	SDL_cond*		cond;			// 条件变量，阻塞线程
} PacketQueue;

/**
 数据类型定义
 */
typedef struct AudioInfo{
	Uint32  		audio_len;		// 缓冲区长度
	Uint8*			audio_pos;		// 缓冲区起始地址指针
} AudioInfo;

/**
 语音输出参数
 */
typedef struct AudioParam {
	AVChannelLayout 	out_channel_layout;			// layout
	int 				out_nb_samples;				// 每一帧的样本数
	enum AVSampleFormat out_sample_fmt;				// 格式
	int 				out_sample_rate;			// 采样率
	int 				out_channels;				// 输出通道数
	int					index;						// 音频帧总数
	
	int 				out_buffer_size;			// 音频输出缓冲区大小
	uint8_t* 			out_buffer;					// 音频输出缓冲区
} AudioParam;

/**
 视频输出参数
 */
typedef struct VideoParam {
	int 				width;						// 宽
	int 				height;						// 高
	enum AVPixelFormat	pix_fmt;					// 格式 YUV420P
	int 				num_bytes;					// 单帧字节数
	int					index;						// 视频帧总数
	
	AVFrame*			out_frame;					// 输出帧
	int					frame_update;				// 帧更新标识
} VideoParam;

/**
 全局参数
 */
typedef struct VideoState {
	AVFormatContext* 	formatCtx;			// format上下文
	
	int				 	audioStream;		// 音频流索引
	AVCodecContext*  	aCodecCtx;			// 音频codec上下文
	const AVCodec*	 	aCodec;				// 音频解码器
	AudioParam*		 	audioParam;			// 音频参数
	SwrContext*			swrCtx;				// 音频上线文转换
	
	int					videoStream;		// 视频流索引
	AVCodecContext*  	vCodecCtx;			// 视频codec上新闻
	const AVCodec*	 	vCodec;				// 视频解码器
	VideoParam*			videoParam;			// 视频参数
	struct SwsContext* 	swsCtx;				// 视频上下文转换
	
	PacketQueue*		aQueue;				// 音频pkt队列
	PacketQueue*		vQueue;				// 视频pkt队列
	
	int					isEnd;				// 结束标志
} VideoState;

/**
 全局变量
 */
extern AudioInfo audio_info;

#endif /* sample_player_h */

utils.h

//
//  utils.h
//  sample_player
//
//  Created by chenhuaiyi on 2025/2/27.
//

#ifndef utils_h
#define utils_h

#include "sample_player.h"

int init_video_state(VideoState** video_state);

int destory_video_state(VideoState** video_state);

int packet_queue_push(PacketQueue* q, AVPacket* pkt);

int packet_queue_init(PacketQueue** q, size_t max_size);

int packet_queue_pop(PacketQueue* q, AVPacket* pkt);

void packet_queue_destroy(PacketQueue** q);

char* get_frame_type(AVFrame* frame);

#endif /* utils_h */

manager.h

//
//  manager.h
//  sample_player
//
//  Created by chenhuaiyi on 2025/2/27.
//

#ifndef manager_h
#define manager_h

#include "sample_player.h"

/**
 音频输出信息设置
 */
int audio_output_set(VideoState* video_state);

/**
 视频输出信息设置
 */
int video_output_set(VideoState* video_state);

/**
 音频SDL初始化
 */
int audio_sdl_set(VideoState* video_state, SDL_AudioSpec* wanted_spec, void (*fn)(void*, Uint8*, int));

/**
 视频SDL初始化
 */
int video_sdl_set(VideoState* video_state, SDL_Window** window, SDL_Renderer** renderer, SDL_Texture** texture);

#endif /* manager_h */

utils.c

//
//  utils.c
//  sample_player
//
//  Created by chenhuaiyi on 2025/2/27.
//

#include "utils.h"

/**
 初始化VideoState
 */
int init_video_state(VideoState** video_state) {
	*video_state = av_malloc(sizeof(VideoState));
	(*video_state)->formatCtx = avformat_alloc_context();
	
	(*video_state)->audioStream = 0;
	(*video_state)->aCodecCtx = avcodec_alloc_context3(NULL);
	(*video_state)->audioParam = av_malloc(sizeof(AudioParam));
	
	(*video_state)->videoStream = 0;
	(*video_state)->vCodecCtx = avcodec_alloc_context3(NULL);
	(*video_state)->videoParam = av_malloc(sizeof(VideoParam));
	(*video_state)->videoParam->frame_update = 0;
	
	/** pkt队列初始化 */
	(*video_state)->aQueue = av_malloc(sizeof(PacketQueue));
	packet_queue_init(&(*video_state)->aQueue, 1);
	(*video_state)->vQueue = av_malloc(sizeof(PacketQueue));
	packet_queue_init(&(*video_state)->vQueue, 1);
	
	(*video_state)->isEnd = 0;
	
	return 1;
}

/**
 销毁VideoState
 */
int destory_video_state(VideoState** video_state){
	
	swr_free(&(*video_state)->swrCtx);

	avcodec_free_context(&(*video_state)->aCodecCtx);
	av_free((*video_state)->audioParam->out_buffer);
	av_free((*video_state)->audioParam);
	
	sws_freeContext((*video_state)->swsCtx);
	avcodec_free_context(&(*video_state)->vCodecCtx);
	av_frame_free(&(*video_state)->videoParam->out_frame);
	av_free((*video_state)->videoParam);
	
	/** 队列释放 */
	packet_queue_destroy(&(*video_state)->aQueue);
	packet_queue_destroy(&(*video_state)->vQueue);
	
	if ((*video_state)->formatCtx != NULL) {
		avformat_close_input(&(*video_state)->formatCtx);
		(*video_state)->formatCtx = NULL;
	}
	
	
	av_free(*video_state);
	return 1;
}

/**
 初始化队列
 */
int packet_queue_init(PacketQueue** q, size_t max_size) {
	// 创建一个 AVFifo 队列，每个元素的大小为 sizeof(AVPacket)
	*q = av_malloc(sizeof(PacketQueue));
	(*q)->pkt_list = av_fifo_alloc2(max_size, sizeof(MyAVPacketList), AV_FIFO_FLAG_AUTO_GROW);
	(*q)->size = 0;
	(*q)->mutex = SDL_CreateMutex();
	(*q)->cond = SDL_CreateCond();
	if (!(*q)->pkt_list) {
		return -1;
	}
	return 0;
}

/**
 写入队列
 */
int packet_queue_push(PacketQueue* q, AVPacket* pkt) {
	MyAVPacketList pNode;
	if (!q || !pkt) {
		return -1;
	}
	AVPacket* pkt1 = av_packet_alloc();
	if (!pkt1) {
		av_packet_unref(pkt);
		return -1;
	}
	
	SDL_LockMutex(q->mutex);
	av_packet_ref(pkt1, pkt);
	pNode.pkt = pkt1;
	// 将 pkt 压入队列
	if (av_fifo_write(q->pkt_list, &pNode, 1) < 0) {
		SDL_UnlockMutex(q->mutex);
		return -1;
	}
	q->size++;
	SDL_CondSignal(q->cond);
	SDL_UnlockMutex(q->mutex);
	return 0;
}

/**
 弹出队列
 */
int packet_queue_pop(PacketQueue* q, AVPacket* pkt) {
	if (!q || !pkt) {
		return -1;
	}
	SDL_LockMutex(q->mutex);
	MyAVPacketList pNode;
	// 从队列中弹出一个元素, 没找到则阻塞线程，等待生产者释放
	if (av_fifo_read(q->pkt_list, &pNode, 1) < 0) {
		SDL_CondWait(q->cond, q->mutex);
	}
	q->size--;
	av_packet_move_ref(pkt, pNode.pkt);
	av_packet_free(&pNode.pkt);
	SDL_UnlockMutex(q->mutex);
	return 0;
}

/**
 销毁队列
 */
void packet_queue_destroy(PacketQueue** q) {
	if ((*q) && (*q)->pkt_list) {
		// 释放队列中的所有 AVPacket
		MyAVPacketList pNode;
		SDL_LockMutex((*q)->mutex);
		while (av_fifo_read((*q)->pkt_list, &pNode, 1) >= 0) {
			av_packet_free(&pNode.pkt);  // 释放 AVPacket 的资源
		}
		SDL_UnlockMutex((*q)->mutex);
		
		// 释放 AVFifo 队列
		(*q)->size = 0;
		av_fifo_freep2(&(*q)->pkt_list);
		
		SDL_DestroyMutex((*q)->mutex);
		SDL_DestroyCond((*q)->cond);
		av_free(*q);
	}
}

/**
 获取帧类型
 */
char* get_frame_type(AVFrame* frame) {
	switch (frame->pict_type) {
		case AV_PICTURE_TYPE_I:
			return "I";
			break;
		case AV_PICTURE_TYPE_P:
			return "P";
			break;
		case AV_PICTURE_TYPE_B:
			return "B";
			break;
		case AV_PICTURE_TYPE_S:
			return "S";
			break;
		case AV_PICTURE_TYPE_SI:
			return "SI";
			break;
		case AV_PICTURE_TYPE_SP:
			return "SP";
			break;
		case AV_PICTURE_TYPE_BI:
			return "BI";
			break;
		default:
			return "N";
			break;
	}
}

manager.c

//
//  manager.c
//  sample_player
//
//  Created by chenhuaiyi on 2025/2/27.
//

#include "manager.h"

/**
 音频输出信息构建
 */
int audio_output_set(VideoState* video_state) {
	
	AudioParam* audio_param = video_state->audioParam;
	
	// 输出用到的信息
	av_channel_layout_default(&audio_param->out_channel_layout, 2);
	audio_param->out_nb_samples = video_state->aCodecCtx->frame_size;			// 编解码器每个帧需要处理或者输出的采样点的大小 AAC:1024  MP3:1152
	audio_param->out_sample_fmt = AV_SAMPLE_FMT_S16;							// 采样格式
	audio_param->out_sample_rate = 44100;										// 采样率
	audio_param->out_channels = audio_param->out_channel_layout.nb_channels;	// 通道数
	
	// 获取需要使用的缓冲区大小 -> 通道数，单通道样本数，位深 1024(单帧处理的采样点)*2(双通道)*2(16bit对应2字节)
	audio_param->out_buffer_size = av_samples_get_buffer_size(NULL, audio_param->out_channels,
													 audio_param->out_nb_samples,
													 audio_param->out_sample_fmt, 1);
	// 分配缓冲区空间
	audio_param->out_buffer = NULL;
	av_samples_alloc(&audio_param->out_buffer, NULL, audio_param->out_channels,
					 audio_param->out_nb_samples, audio_param->out_sample_fmt, 1);
	
	return 1;
}

/**
 视频输出信息构建
 */
int video_output_set(VideoState* video_state) {
	
	VideoParam* video_param = video_state->videoParam;
	
	// 基础信息
	video_param->width = video_state->vCodecCtx->width;
	video_param->height = video_state->vCodecCtx->height;
	video_param->pix_fmt = AV_PIX_FMT_YUV420P;
	
	// 计算单帧大小, 分配单帧内存
	video_param->num_bytes = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, video_param->width, video_param->height, 1);
	video_param->out_frame = av_frame_alloc();
	av_image_alloc(video_param->out_frame->data, video_param->out_frame->linesize,
				   video_param->width, video_param->height, AV_PIX_FMT_YUV420P, 1);
	
	return 1;
}

/**
 音频SDL初始化
 */
int audio_sdl_set(VideoState* video_state, SDL_AudioSpec* wanted_spec, void (*fn)(void*, Uint8*, int)) {
	AudioParam* audio_param = video_state->audioParam;
	
	wanted_spec->freq = audio_param->out_sample_rate;					// 采样率
	wanted_spec->format = AUDIO_S16SYS;									// 采样格式 16bit
	wanted_spec->channels = audio_param->out_channels;					// 通道数
	wanted_spec->silence = 0;
	wanted_spec->samples = audio_param->out_nb_samples;					// 单帧处理的采样点
	wanted_spec->callback = fn;											// 回调函数
	wanted_spec->userdata = video_state->aCodecCtx;						// 回调函数的参数
	
	
	return 1;
}

/**
 视频SDL初始化
 */
int video_sdl_set(VideoState* video_state, SDL_Window** window, SDL_Renderer** renderer, SDL_Texture** texture){
	
	AVCodecContext* pCodecCtx = video_state->vCodecCtx;
	
	/** 窗口 */
	*window = SDL_CreateWindow("SDL2 window",
										   SDL_WINDOWPOS_CENTERED,
										   SDL_WINDOWPOS_CENTERED,
										   pCodecCtx->width,
										   pCodecCtx->height,
										   SDL_WINDOW_SHOWN);
	if (!*window) {
		printf("SDL_CreateWindow Error: %s\n", SDL_GetError());
		SDL_Quit();
		return 1;
	}
	/** 渲染 */
	*renderer = SDL_CreateRenderer(*window,
								   -1,
								   SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
	if (!*renderer) {
		printf("SDL_CreateRenderer Error: %s\n", SDL_GetError());
		SDL_DestroyWindow(*window);
		SDL_Quit();
		return 1;
	}
	/** 纹理 */
	*texture = SDL_CreateTexture(*renderer,
								SDL_PIXELFORMAT_YV12,
								 SDL_TEXTUREACCESS_STREAMING,
								 pCodecCtx->width,
								 pCodecCtx->height);
	return 1;
}

main.c

//
//  main.c
//  sample_player
//
//  Created by chenhuaiyi on 2025/2/26.
//

#include "utils.h"
#include "manager.h"

AudioInfo audio_info;

/* udata: 传入的参数
 * stream: SDL音频缓冲区
 * len: SDL音频缓冲区大小
 * 回调函数
 */
void fill_audio(void *udata, Uint8 *stream, int len){
	SDL_memset(stream, 0, len);			// 必须重置，不然全是电音!!!
	if(audio_info.audio_len==0){					// 有音频数据时才调用
		return;
	}
	len = (len>audio_info.audio_len ? audio_info.audio_len : len);	// 最多填充缓冲区大小的数据
	SDL_MixAudio(stream, audio_info.audio_pos, len, SDL_MIX_MAXVOLUME);
	audio_info.audio_pos += len;
	audio_info.audio_len -= len;
}

/**
 音频线程
 */
int audio_thread(void *arg) {
	/**
	 1. 从packet_queue队列中取出packet
	 2. 将packet进行解码
	 3. 写入到sdl的缓冲区中
	 */ 	VideoState* video_state = (VideoState*) arg;
	AudioParam* audio_param = video_state->audioParam;
	PacketQueue* queue = video_state->aQueue;
	audio_param->index = 0;
	
	AVRational time_base = video_state->formatCtx->streams[video_state->audioStream]->time_base;
	int64_t av_start_time = av_gettime();								// 播放开始时间戳
	
	AVPacket 	packet;
	int 		ret;
	AVFrame* 	pFrame = av_frame_alloc();
	for(;;) {
		if (queue->size > 0) {
			
			packet_queue_pop(queue, &packet);
			// 将packet写入编解码器
			ret = avcodec_send_packet(video_state->aCodecCtx, &packet);
			if ( ret < 0 ) {
				printf("send packet error\n");
				return -1;
			}
			
			// 获取解码后的帧
			while (!avcodec_receive_frame(video_state->aCodecCtx, pFrame)) {
				// 格式转化
				swr_convert(video_state->swrCtx, &audio_param->out_buffer, audio_param->out_buffer_size,
							(const uint8_t **)pFrame->data, pFrame->nb_samples);
				audio_param->index++;
				printf("第%d帧 | pts:%lld | 帧大小(采样点):%d | 实际播放点%.2fs | 预期播放点%.2fs\n",
					   audio_param->index,
					   packet.pts,
					   packet.size,
					   (double)(av_gettime() - av_start_time)/AV_TIME_BASE,
					   pFrame->pts * av_q2d(time_base));
				
#if USE_SDL
				// 设置读取的音频数据
				audio_info.audio_len = audio_param->out_buffer_size;
				audio_info.audio_pos = (Uint8 *) audio_param->out_buffer;
				// 等待SDL播放完成
				while(audio_info.audio_len > 0)
					SDL_Delay(0.5);
#endif
			}
			av_packet_unref(&packet);
		}
		else {
			break;
		}
	}
	
	av_frame_free(&pFrame);
	// 结束
	video_state->isEnd = 1;
	return 0;
}

/**
 视频线程
 */
int video_thread(void *arg) {
	/**
	 1. 从视频pkt队列中读出packet
	 2. 送入解码器解码并取出
	 3. 使用SDL进行渲染
	 4. 根据pts计算延迟SDL_DELAY
	 */
	VideoState* 	video_state = (VideoState*) arg;
	PacketQueue* 	video_queue = video_state->vQueue;
	AVCodecContext* pCodecCtx = video_state->vCodecCtx;
	AVFrame* 		out_frame = video_state->videoParam->out_frame;
	
	AVPacket packet;
	AVFrame* pFrame = av_frame_alloc();
	
	AVRational time_base = video_state->formatCtx->streams[video_state->videoStream]->time_base;
	int64_t av_start_time = av_gettime();							// 开始播放时间(ms*1000)
	int64_t frame_delay = av_q2d(time_base) * AV_TIME_BASE;		// pts单位(ms*1000)
	
	int64_t frame_start_time = av_gettime();
	
	for (;;) {
		if (video_queue->size > 0) {
			packet_queue_pop(video_queue, &packet);
			// 将packet写入编解码器
			int ret = avcodec_send_packet(pCodecCtx, &packet);
			if (ret < 0) {
				printf("packet resolve error!");
				break;
			}
			// 从解码器中取出原始帧
			while (!avcodec_receive_frame(pCodecCtx, pFrame)) {
				// 帧格式转化，转为YUV420P
				sws_scale(video_state->swsCtx,						// sws_context转换
						  (uint8_t const * const *)pFrame->data,	// 输入 data
						  pFrame->linesize,							// 输入 每行数据的大小（对齐）
						  0,										// 输入 Y轴位置
						  pCodecCtx->height,						// 输入 height
						  out_frame->data,							// 输出 data
						  out_frame->linesize);						// 输出 linesize
				
				// 帧更新
				video_state->videoParam->frame_update = 1;
				
				// 计算延迟
				int64_t pts = pFrame->pts;											// pts
				int64_t actual_playback_time = av_start_time + pts * frame_delay;	// 实际播放时间
				int64_t current_time = av_gettime();
				if (actual_playback_time > current_time) {
					SDL_Delay((Uint32)(actual_playback_time-current_time)/1000);	// 延迟当前时间和实际播放时间
				}
				
				video_state->videoParam->index++;
				printf("第%i帧 | 属于%s | pts为%d | 时长为%.2fms | 实际播放点为%.2fs | 预期播放点为%.2fs\n ",
					   video_state->videoParam->index,
					   get_frame_type(pFrame),
					   (int)pFrame->pts,
					   (double)(av_gettime() - frame_start_time)/1000,
					   (double)(av_gettime() - av_start_time)/AV_TIME_BASE,
					   pFrame->pts * av_q2d(time_base));
				
				frame_start_time = av_gettime();
			}
			
			av_packet_unref(&packet);
		} else {
			break;
		}
	}
	
	av_frame_free(&pFrame);
	// 结束
	video_state->isEnd = 1;
	return 1;
}


int main(int argc, char* argv[])
{
	
	VideoState* 		video_state;
	AudioParam*			audio_param;
	VideoParam*			video_param;
	
	SDL_Event 			event;
	SDL_Rect      		rect;
	
	if(argc < 2) {
		fprintf(stderr, "Usage: test <file>\n");
		exit(1);
	}
	
	/** 初始化函数 */
	init_video_state(&video_state);
	audio_param = video_state->audioParam;
	video_param = video_state->videoParam;
	
	avformat_network_init();
	
	// 1. 打开视频文件，获取格式上下文
	if(avformat_open_input(&video_state->formatCtx, argv[1], NULL, NULL)!=0){
		printf("Couldn't open input stream.\n");
		return -1;
	}
	
	// 2. 对文件探测流信息
	if(avformat_find_stream_info(video_state->formatCtx, NULL) < 0){
		printf("Couldn't find stream information.\n");
		return -1;
	}
	
	// 打印信息
	av_dump_format(video_state->formatCtx, 0, argv[1], 0);
	
	// 3. 找到对应的 音频流/视频流 索引
	video_state->audioStream=-1;
	video_state->videoStream=-1;
	for(int i=0; i < video_state->formatCtx->nb_streams; i++) {
		if(video_state->formatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_AUDIO){
			video_state->audioStream=i;
		}
		if (video_state->formatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_VIDEO) {
			video_state->videoStream=i;
		}
	}
	if(video_state->audioStream==-1){
		printf("Didn't find a audio stream.\n");
		return -1;
	}
	if (video_state->videoStream==-1) {
		printf("Didn't find a video stream.\n");
		return -1;
	}
	
	// 4. 将 音频流/视频流 编码参数写入上下文
	AVCodecParameters* aCodecParam = video_state->formatCtx->streams[video_state->audioStream]->codecpar;
	avcodec_parameters_to_context(video_state->aCodecCtx, aCodecParam);
//	avcodec_parameters_free(&aCodecParam); 这个是不需要手动释放的
	
	AVCodecParameters* vCodecParam = video_state->formatCtx->streams[video_state->videoStream]->codecpar;
	avcodec_parameters_to_context(video_state->vCodecCtx, vCodecParam);
//	avcodec_parameters_free(&vCodecParam);
	
	// 5. 查找流的编码器
	video_state->aCodec = avcodec_find_decoder(video_state->aCodecCtx->codec_id);
	if(video_state->aCodec==NULL){
		printf("Audio codec not found.\n");
		return -1;
	}
	video_state->vCodec = avcodec_find_decoder(video_state->vCodecCtx->codec_id);
	if(video_state->vCodec==NULL){
		printf("Video codec not found.\n");
		return -1;
	}
	
	// 6. 打开流的编解码器
	if(avcodec_open2(video_state->aCodecCtx, video_state->aCodec, NULL)<0){
		printf("Could not open audio codec.\n");
		return -1;
	}
	if(avcodec_open2(video_state->vCodecCtx, video_state->vCodec, NULL)<0){
		printf("Could not open video codec.\n");
		return -1;
	}
	
	/** 音频输出信息构建 */
	audio_output_set(video_state);
	/** 视频输出信息构建 */
	video_output_set(video_state);
	
	
	//	SDL 初始化
#if USE_SDL
	if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
		printf( "Could not initialize SDL - %s\n", SDL_GetError());
		return -1;
	}
	
	// 在 main 函数开始处添加
	SDL_SetHint(SDL_HINT_VIDEO_MAC_FULLSCREEN_SPACES, "0");
	SDL_SetHint(SDL_HINT_MAC_BACKGROUND_APP, "1");
	
	/** 初始化音频SDL设备 */
	SDL_AudioSpec wanted_spec;
	//	audio_sdl_set(video_state, &wanted_spec, fill_audio);
	wanted_spec.freq = audio_param->out_sample_rate;					// 采样率
	wanted_spec.format = AUDIO_S16SYS;									// 采样格式 16bit
	wanted_spec.channels = audio_param->out_channels;					// 通道数
	wanted_spec.silence = 0;
	wanted_spec.samples = audio_param->out_nb_samples;					// 单帧处理的采样点
	wanted_spec.callback = fill_audio;									// 回调函数
	wanted_spec.userdata = video_state->aCodecCtx;						// 回调函数的参数
	
	/** 初始化视频SDL设备 */
	SDL_Window*       window = NULL;
	SDL_Renderer*     renderer = NULL;
	SDL_Texture*      texture= NULL;
	//	video_sdl_set(video_state, &window, &renderer, &texture);
	/** 窗口 */
	window = SDL_CreateWindow("SDL2 window",
							  SDL_WINDOWPOS_CENTERED,
							  SDL_WINDOWPOS_CENTERED,
							  video_state->vCodecCtx->width,
							  video_state->vCodecCtx->height,
							  SDL_WINDOW_SHOWN);
	if (!window) {
		printf("SDL_CreateWindow Error: %s\n", SDL_GetError());
		SDL_Quit();
		return 1;
	}
	/** 渲染 */
	renderer = SDL_CreateRenderer(window,
								  -1,
								  SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
	if (!renderer) {
		printf("SDL_CreateRenderer Error: %s\n", SDL_GetError());
		SDL_DestroyWindow(window);
		SDL_Quit();
		return 1;
	}
	/** 纹理 */
	texture = SDL_CreateTexture(renderer,
								SDL_PIXELFORMAT_YV12,
								SDL_TEXTUREACCESS_STREAMING,
								video_state->vCodecCtx->width,
								video_state->vCodecCtx->height);
	
	// 打开音频播放器
	if (SDL_OpenAudio(&wanted_spec, NULL)<0) {
		printf("can't open audio.\n");
		return -1;
	}
	
#endif
	
	// 音频上下文格式转换
	swr_alloc_set_opts2(&video_state->swrCtx,
						&audio_param->out_channel_layout,			// 输出layout
						audio_param->out_sample_fmt,				// 输出格式
						audio_param->out_sample_rate,				// 输出采样率
						&video_state->aCodecCtx->ch_layout,			// 输入layout
						video_state->aCodecCtx->sample_fmt,			// 输入格式
						video_state->aCodecCtx->sample_rate,		// 输入采样率
						0, NULL);
	swr_init(video_state->swrCtx);
	
	// 视频上下文格式转换
	video_state->swsCtx = sws_getContext(video_state->vCodecCtx->width,			// src 宽
										 video_state->vCodecCtx->height,		// src 高
										 video_state->vCodecCtx->pix_fmt,		// src 格式
										 video_param->width,					// dst 宽
										 video_param->height,					// dst 高
										 video_param->pix_fmt,					// dst 格式
										 SWS_BILINEAR,
										 NULL,NULL,NULL);
	
	// 开始播放
	SDL_PauseAudio(0);
	int64_t av_start_time = av_gettime();	// 播放开始时间戳
	
	AVPacket* packet = av_packet_alloc();	// packet初始化
	
	// 循环1: 从文件中读取packet
	while(av_read_frame(video_state->formatCtx, packet)>=0){
		/** 写入音频pkt队列 */
		if(packet->stream_index==video_state->audioStream){
			packet_queue_push(video_state->aQueue, packet);
		}
		/** 写入视频pkt队列 */
		if (packet->stream_index==video_state->videoStream) {
			packet_queue_push(video_state->vQueue, packet);
		}
		av_packet_unref(packet);
		SDL_PollEvent(&event);
		switch(event.type) {
			case SDL_QUIT:
				SDL_Quit();
				exit(0);
				break;
			default:
				break;
		}
	}
	printf("audio queue.size=%d\n", video_state->aQueue->size);
	
	// 创建一个线程并启动
	SDL_CreateThread(audio_thread, "audio_thread", video_state);
	SDL_CreateThread(video_thread, "video_thread", video_state);
	//	video_thread(video_state);
	
	AVFrame* out_frame = NULL;
	
	while (!video_state->isEnd) {
		// 处理事件（必须由主线程执行）
		while (SDL_PollEvent(&event)) {
			if (event.type == SDL_QUIT) {
				video_state->isEnd = 1;
			}
		}
		
		if (video_state->videoParam->frame_update) {
			// 将AVFrame的数据写入到texture中，然后渲染后windows上
			rect.x = 0;
			rect.y = 0;
			rect.w = video_state->vCodecCtx->width;
			rect.h = video_state->vCodecCtx->height;
			
			out_frame = video_state->videoParam->out_frame;
			
			// 更新纹理
			SDL_UpdateYUVTexture(texture, &rect,
								 out_frame->data[0], out_frame->linesize[0],	// 	Y
								 out_frame->data[1], out_frame->linesize[1],	// 	U
								 out_frame->data[2], out_frame->linesize[2]);	//  V
			// 渲染页面
			SDL_RenderClear(renderer);
			SDL_RenderCopy(renderer, texture, NULL, NULL);
			SDL_RenderPresent(renderer);
			
			// 重置标志
			video_state->videoParam->frame_update = 0;
		}
	}
	
	
	// 打印参数
	printf("格式: %s\n", video_state->formatCtx->iformat->name);
	printf("时长: %lld us\n", video_state->formatCtx->duration);
	
	printf("音频持续时长为 %.2f，音频帧总数为 %d\n", (double)(av_gettime()-av_start_time)/AV_TIME_BASE, audio_param->index);
	printf("码率: %lld\n", video_state->formatCtx->bit_rate);
	printf("编码器: %s (%s)\n", video_state->aCodecCtx->codec->long_name, avcodec_get_name(video_state->aCodecCtx->codec_id));
	printf("通道数: %d\n", video_state->aCodecCtx->ch_layout.nb_channels);
	printf("采样率: %d \n", video_state->aCodecCtx->sample_rate);
	printf("单通道每帧的采样点数目: %d\n", video_state->aCodecCtx->frame_size);
	printf("pts单位(ms*1000): %.2f\n", av_q2d(video_state->formatCtx->streams[video_state->audioStream]->time_base) * AV_TIME_BASE);
	
	
	// 释放空间
	av_packet_free(&packet);
	
#if USE_SDL
	SDL_CloseAudio();
	SDL_DestroyTexture(texture);
	SDL_DestroyRenderer(renderer);
	SDL_DestroyWindow(window);
	SDL_Quit();
#endif
	destory_video_state(&video_state);
	
	return 0;
}

结语和展望

终于做完了，恭喜你，完成了一个非常粗糙，而且有很多问题的简单音视频播放器。接下来几期，我们跟着大家一起对这个简单的播放器进行优化。当然我也是个小萌新，所以一步一步来嘛哈哈。先抛出几个问题：

时钟同步怎么做

如何边读出packet，边解码frame并播放

我们如何对输出的解码帧进行转化

ps. 鼓励大家阅读ffplay源码，所有的问题都能迎刃而解，哈哈哈！

FFmpeg入门：最简单的音视频播放器