FFmpeg入门:最简单的音视频播放器
前两章,我们已经了解了分别如何构建一个简单和音频播放器和视频播放器。
FFmpeg入门:最简单的音频播放器
FFmpeg入门:最简单的视频播放器
本章我们将结合上述两章的知识,看看如何融合成一个完整的音视频播放器,跟上我的节奏,本章将是咱们后续完成一个完整的音视频播放器的起点。
整体流程图
话不多说,先上图
这个图似乎有点复杂了,这里我会分别将每个模块拿出来讲述,方便大家一步一步分析整个流程。
第一步:初始化
我们首先关注整个流程图的最上面一部分,这部分其实和之前的流程一样,主要就是将做一些前置的初始化工作:
1:打开文件,获取文件上下文
2:找到对应的音频/视频流,获取到Codec上下文
3:打开解码器
4:分配输出空间缓存,用于后续存储解码的输出数据
5:音频/视频帧的格式转化上下文
6:初始化SDL组件,主要是视频的播放窗口和音频播放器
代码(省略了部分校验和参数初始化,方便阅读,原码见文章末尾):
/** 初始化函数 */
init_video_state(&video_state);
audio_param = video_state->audioParam;
video_param = video_state->videoParam;
avformat_network_init();
// 1. 打开视频文件,获取格式上下文
if(avformat_open_input(&video_state->formatCtx, argv[1], NULL, NULL)!=0){
printf("Couldn't open input stream.\n");
return -1;
}
// 2. 对文件探测流信息
if(avformat_find_stream_info(video_state->formatCtx, NULL) < 0){
printf("Couldn't find stream information.\n");
return -1;
}
// 3. 找到对应的 音频流/视频流 索引
video_state->audioStream=-1;
video_state->videoStream=-1;
for(int i=0; i < video_state->formatCtx->nb_streams; i++) {
if(video_state->formatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_AUDIO){
video_state->audioStream=i;
}
if (video_state->formatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_VIDEO) {
video_state->videoStream=i;
}
}
// 4. 将 音频流/视频流 编码参数写入上下文
AVCodecParameters* aCodecParam = video_state->formatCtx->streams[video_state->audioStream]->codecpar;
avcodec_parameters_to_context(video_state->aCodecCtx, aCodecParam);
AVCodecParameters* vCodecParam = video_state->formatCtx->streams[video_state->videoStream]->codecpar;
avcodec_parameters_to_context(video_state->vCodecCtx, vCodecParam);
// 5. 查找流的编码器
video_state->aCodec = avcodec_find_decoder(video_state->aCodecCtx->codec_id);
video_state->vCodec = avcodec_find_decoder(video_state->vCodecCtx->codec_id);
// 6. 打开流的编解码器
if(avcodec_open2(video_state->aCodecCtx, video_state->aCodec, NULL)<0){
printf("Could not open audio codec.\n");
return -1;
}
if(avcodec_open2(video_state->vCodecCtx, video_state->vCodec, NULL)<0){
printf("Could not open video codec.\n");
return -1;
}
/** 音频输出信息构建 */
audio_output_set(video_state);
/** 视频输出信息构建 */
video_output_set(video_state);
// SDL 初始化
#if USE_SDL
if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
printf( "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
/** 初始化音频SDL设备 */
SDL_AudioSpec wanted_spec;
wanted_spec.freq = audio_param->out_sample_rate; // 采样率
wanted_spec.format = AUDIO_S16SYS; // 采样格式 16bit
wanted_spec.channels = audio_param->out_channels; // 通道数
wanted_spec.silence = 0;
wanted_spec.samples = audio_param->out_nb_samples; // 单帧处理的采样点
wanted_spec.callback = fill_audio; // 回调函数
wanted_spec.userdata = video_state->aCodecCtx; // 回调函数的参数
/** 初始化视频SDL设备 */
SDL_Window* window = NULL;
SDL_Renderer* renderer = NULL;
SDL_Texture* texture= NULL;
/** 窗口 */
window = SDL_CreateWindow("SDL2 window",
SDL_WINDOWPOS_CENTERED,
SDL_WINDOWPOS_CENTERED,
video_state->vCodecCtx->width,
video_state->vCodecCtx->height,
SDL_WINDOW_SHOWN);
/** 渲染 */
renderer = SDL_CreateRenderer(window,
-1,
SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
/** 纹理 */
texture = SDL_CreateTexture(renderer,
SDL_PIXELFORMAT_YV12,
SDL_TEXTUREACCESS_STREAMING,
video_state->vCodecCtx->width,
video_state->vCodecCtx->height);
// 打开音频播放器
if (SDL_OpenAudio(&wanted_spec, NULL)<0) {
printf("can't open audio.\n");
return -1;
}
#endif
// 音频上下文格式转换
swr_alloc_set_opts2(&video_state->swrCtx,
&audio_param->out_channel_layout, // 输出layout
audio_param->out_sample_fmt, // 输出格式
audio_param->out_sample_rate, // 输出采样率
&video_state->aCodecCtx->ch_layout, // 输入layout
video_state->aCodecCtx->sample_fmt, // 输入格式
video_state->aCodecCtx->sample_rate, // 输入采样率
0, NULL);
swr_init(video_state->swrCtx);
// 视频上下文格式转换
video_state->swsCtx = sws_getContext(video_state->vCodecCtx->width, // src 宽
video_state->vCodecCtx->height, // src 高
video_state->vCodecCtx->pix_fmt, // src 格式
video_param->width, // dst 宽
video_param->height, // dst 高
video_param->pix_fmt, // dst 格式
SWS_BILINEAR,
NULL,NULL,NULL);
// 开始播放
SDL_PauseAudio(0);
第二步:packet队列写入
做完准备工作之后,我们就将源文件中的输入packet都取出来,放入到对应的音频packet队列和视频packet队列中,方便后续使用。然后然后分别启动音频解码进程和视频解码进程同时进行解码。
- 读出packet,判断packet类型
- 根据类型放入音频和视频packet队列
- 创建解码进程
// 循环1: 从文件中读取packet
while(av_read_frame(video_state->formatCtx, packet)>=0){
/** 写入音频pkt队列 */
if(packet->stream_index==video_state->audioStream){
packet_queue_push(video_state->aQueue, packet);
}
/** 写入视频pkt队列 */
if (packet->stream_index==video_state->videoStream) {
packet_queue_push(video_state->vQueue, packet);
}
av_packet_unref(packet);
SDL_PollEvent(&event);
switch(event.type) {
case SDL_QUIT:
SDL_Quit();
exit(0);
break;
default:
break;
}
}
printf("audio queue.size=%d\n", video_state->aQueue->size);
// 创建一个线程并启动
SDL_CreateThread(audio_thread, "audio_thread", video_state);
SDL_CreateThread(video_thread, "video_thread", video_state);
第三步:音频解码+播放
接下来会分别讲一下音频和视频解码进程,这两个进程是同时开始的。
首先音频解码进程的步骤可以参考之前的音频播放器文章。我简单说一下步骤
- 从音频packet队列中取出packet
- 对packet进行解码得到frame
- 按音频输出格式进行swr_convert转换得到输出值,并写入buffer。
- SDL音频播放器通过回调函数从buffer不断读取数据播放。
代码如下:
/**
音频线程
*/
int audio_thread(void *arg) {
/**
1. 从packet_queue队列中取出packet
2. 将packet进行解码
3. 写入到sdl的缓冲区中
*/ VideoState* video_state = (VideoState*) arg;
AudioParam* audio_param = video_state->audioParam;
PacketQueue* queue = video_state->aQueue;
audio_param->index = 0;
AVRational time_base = video_state->formatCtx->streams[video_state->audioStream]->time_base;
int64_t av_start_time = av_gettime(); // 播放开始时间戳
AVPacket packet;
int ret;
AVFrame* pFrame = av_frame_alloc();
for(;;) {
if (queue->size > 0) {
packet_queue_pop(queue, &packet);
// 将packet写入编解码器
ret = avcodec_send_packet(video_state->aCodecCtx, &packet);
// 获取解码后的帧
while (!avcodec_receive_frame(video_state->aCodecCtx, pFrame)) {
// 格式转化
swr_convert(video_state->swrCtx, &audio_param->out_buffer, audio_param->out_buffer_size,
(const uint8_t **)pFrame->data, pFrame->nb_samples);
audio_param->index++;
printf("第%d帧 | pts:%lld | 帧大小(采样点):%d | 实际播放点%.2fs | 预期播放点%.2fs\n",
audio_param->index,
packet.pts,
packet.size,
(double)(av_gettime() - av_start_time)/AV_TIME_BASE,
pFrame->pts * av_q2d(time_base));
#if USE_SDL
// 设置读取的音频数据
audio_info.audio_len = audio_param->out_buffer_size;
audio_info.audio_pos = (Uint8 *) audio_param->out_buffer;
// 等待SDL播放完成
while(audio_info.audio_len > 0)
SDL_Delay(0.5);
#endif
}
av_packet_unref(&packet);
}
else {
break;
}
}
av_frame_free(&pFrame);
// 结束
video_state->isEnd = 1;
return 0;
}
第四步:视频解码(子线程)+播放(主线程)
说视频的解码和播放之前,先提一点:SDL的主窗口操作是需要在主线程中进行的。因此我们不能再解码子线程中直接渲染SDL窗口,否则会造成内存泄漏。知道这个知识之后,更能理解接下来的流程分析。
我们视频解码播放拆成两个部分:解码+播放
第一部分:解码子线程,在子线程中完成解码,通过标识符的方式通知到主线程帧已更新,并渲染出来。
第二部分:主线程播放,循环监听子线程的通知标识,并更新窗口帧进行显示。
代码如下:
视频解码子线程
/**
视频线程
*/
int video_thread(void *arg) {
/**
1. 从视频pkt队列中读出packet
2. 送入解码器解码并取出
3. 使用SDL进行渲染
4. 根据pts计算延迟SDL_DELAY
*/
VideoState* video_state = (VideoState*) arg;
PacketQueue* video_queue = video_state->vQueue;
AVCodecContext* pCodecCtx = video_state->vCodecCtx;
AVFrame* out_frame = video_state->videoParam->out_frame;
AVPacket packet;
AVFrame* pFrame = av_frame_alloc();
AVRational time_base = video_state->formatCtx->streams[video_state->videoStream]->time_base;
int64_t av_start_time = av_gettime(); // 开始播放时间(ms*1000)
int64_t frame_delay = av_q2d(time_base) * AV_TIME_BASE; // pts单位(ms*1000)
int64_t frame_start_time = av_gettime();
for (;;) {
if (video_queue->size > 0) {
packet_queue_pop(video_queue, &packet);
// 将packet写入编解码器
int ret = avcodec_send_packet(pCodecCtx, &packet);
// 从解码器中取出原始帧
while (!avcodec_receive_frame(pCodecCtx, pFrame)) {
// 帧格式转化,转为YUV420P
sws_scale(video_state->swsCtx, // sws_context转换
(uint8_t const * const *)pFrame->data, // 输入 data
pFrame->linesize, // 输入 每行数据的大小(对齐)
0, // 输入 Y轴位置
pCodecCtx->height, // 输入 height
out_frame->data, // 输出 data
out_frame->linesize); // 输出 linesize
// 帧更新
video_state->videoParam->frame_update = 1;
// 计算延迟
int64_t pts = pFrame->pts; // pts
int64_t actual_playback_time = av_start_time + pts * frame_delay; // 实际播放时间
int64_t current_time = av_gettime();
if (actual_playback_time > current_time) {
SDL_Delay((Uint32)(actual_playback_time-current_time)/1000); // 延迟当前时间和实际播放时间
}
video_state->videoParam->index++;
printf("第%i帧 | 属于%s | pts为%d | 时长为%.2fms | 实际播放点为%.2fs | 预期播放点为%.2fs\n ",
video_state->videoParam->index,
get_frame_type(pFrame),
(int)pFrame->pts,
(double)(av_gettime() - frame_start_time)/1000,
(double)(av_gettime() - av_start_time)/AV_TIME_BASE,
pFrame->pts * av_q2d(time_base));
frame_start_time = av_gettime();
}
av_packet_unref(&packet);
} else {
break;
}
}
av_frame_free(&pFrame);
// 结束
video_state->isEnd = 1;
return 1;
}
渲染主线程
while (!video_state->isEnd) {
// 处理事件(必须由主线程执行)
while (SDL_PollEvent(&event)) {
if (event.type == SDL_QUIT) {
video_state->isEnd = 1;
}
}
if (video_state->videoParam->frame_update) {
// 将AVFrame的数据写入到texture中,然后渲染后windows上
rect.x = 0;
rect.y = 0;
rect.w = video_state->vCodecCtx->width;
rect.h = video_state->vCodecCtx->height;
out_frame = video_state->videoParam->out_frame;
// 更新纹理
SDL_UpdateYUVTexture(texture, &rect,
out_frame->data[0], out_frame->linesize[0], // Y
out_frame->data[1], out_frame->linesize[1], // U
out_frame->data[2], out_frame->linesize[2]); // V
// 渲染页面
SDL_RenderClear(renderer);
SDL_RenderCopy(renderer, texture, NULL, NULL);
SDL_RenderPresent(renderer);
// 重置标志
video_state->videoParam->frame_update = 0;
}
}
完整代码
sample_player.h
//
// sample_player.h
// learning
//
// Created by chenhuaiyi on 2025/2/26.
//
#ifndef sample_player_h
#define sample_player_h
#include <stdio.h>
// ffmpeg
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
#include "libavutil/time.h"
#include "libavutil/fifo.h"
#include "libavutil/channel_layout.h"
// SDL
#include "SDL.h"
#include "SDL_thread.h"
/**
宏定义
*/
#define USE_SDL 1
typedef struct MyAVPacketList {
AVPacket *pkt;
int serial;
} MyAVPacketList;
/**
packet队列
*/
typedef struct PacketQueue {
AVFifo* pkt_list; // fifo队列
int size; // 队列大小
SDL_mutex* mutex; // 互斥信号量
SDL_cond* cond; // 条件变量,阻塞线程
} PacketQueue;
/**
数据类型定义
*/
typedef struct AudioInfo{
Uint32 audio_len; // 缓冲区长度
Uint8* audio_pos; // 缓冲区起始地址指针
} AudioInfo;
/**
语音输出参数
*/
typedef struct AudioParam {
AVChannelLayout out_channel_layout; // layout
int out_nb_samples; // 每一帧的样本数
enum AVSampleFormat out_sample_fmt; // 格式
int out_sample_rate; // 采样率
int out_channels; // 输出通道数
int index; // 音频帧总数
int out_buffer_size; // 音频输出缓冲区大小
uint8_t* out_buffer; // 音频输出缓冲区
} AudioParam;
/**
视频输出参数
*/
typedef struct VideoParam {
int width; // 宽
int height; // 高
enum AVPixelFormat pix_fmt; // 格式 YUV420P
int num_bytes; // 单帧字节数
int index; // 视频帧总数
AVFrame* out_frame; // 输出帧
int frame_update; // 帧更新标识
} VideoParam;
/**
全局参数
*/
typedef struct VideoState {
AVFormatContext* formatCtx; // format上下文
int audioStream; // 音频流索引
AVCodecContext* aCodecCtx; // 音频codec上下文
const AVCodec* aCodec; // 音频解码器
AudioParam* audioParam; // 音频参数
SwrContext* swrCtx; // 音频上线文转换
int videoStream; // 视频流索引
AVCodecContext* vCodecCtx; // 视频codec上新闻
const AVCodec* vCodec; // 视频解码器
VideoParam* videoParam; // 视频参数
struct SwsContext* swsCtx; // 视频上下文转换
PacketQueue* aQueue; // 音频pkt队列
PacketQueue* vQueue; // 视频pkt队列
int isEnd; // 结束标志
} VideoState;
/**
全局变量
*/
extern AudioInfo audio_info;
#endif /* sample_player_h */
utils.h
//
// utils.h
// sample_player
//
// Created by chenhuaiyi on 2025/2/27.
//
#ifndef utils_h
#define utils_h
#include "sample_player.h"
int init_video_state(VideoState** video_state);
int destory_video_state(VideoState** video_state);
int packet_queue_push(PacketQueue* q, AVPacket* pkt);
int packet_queue_init(PacketQueue** q, size_t max_size);
int packet_queue_pop(PacketQueue* q, AVPacket* pkt);
void packet_queue_destroy(PacketQueue** q);
char* get_frame_type(AVFrame* frame);
#endif /* utils_h */
manager.h
//
// manager.h
// sample_player
//
// Created by chenhuaiyi on 2025/2/27.
//
#ifndef manager_h
#define manager_h
#include "sample_player.h"
/**
音频输出信息设置
*/
int audio_output_set(VideoState* video_state);
/**
视频输出信息设置
*/
int video_output_set(VideoState* video_state);
/**
音频SDL初始化
*/
int audio_sdl_set(VideoState* video_state, SDL_AudioSpec* wanted_spec, void (*fn)(void*, Uint8*, int));
/**
视频SDL初始化
*/
int video_sdl_set(VideoState* video_state, SDL_Window** window, SDL_Renderer** renderer, SDL_Texture** texture);
#endif /* manager_h */
utils.c
//
// utils.c
// sample_player
//
// Created by chenhuaiyi on 2025/2/27.
//
#include "utils.h"
/**
初始化VideoState
*/
int init_video_state(VideoState** video_state) {
*video_state = av_malloc(sizeof(VideoState));
(*video_state)->formatCtx = avformat_alloc_context();
(*video_state)->audioStream = 0;
(*video_state)->aCodecCtx = avcodec_alloc_context3(NULL);
(*video_state)->audioParam = av_malloc(sizeof(AudioParam));
(*video_state)->videoStream = 0;
(*video_state)->vCodecCtx = avcodec_alloc_context3(NULL);
(*video_state)->videoParam = av_malloc(sizeof(VideoParam));
(*video_state)->videoParam->frame_update = 0;
/** pkt队列初始化 */
(*video_state)->aQueue = av_malloc(sizeof(PacketQueue));
packet_queue_init(&(*video_state)->aQueue, 1);
(*video_state)->vQueue = av_malloc(sizeof(PacketQueue));
packet_queue_init(&(*video_state)->vQueue, 1);
(*video_state)->isEnd = 0;
return 1;
}
/**
销毁VideoState
*/
int destory_video_state(VideoState** video_state){
swr_free(&(*video_state)->swrCtx);
avcodec_free_context(&(*video_state)->aCodecCtx);
av_free((*video_state)->audioParam->out_buffer);
av_free((*video_state)->audioParam);
sws_freeContext((*video_state)->swsCtx);
avcodec_free_context(&(*video_state)->vCodecCtx);
av_frame_free(&(*video_state)->videoParam->out_frame);
av_free((*video_state)->videoParam);
/** 队列释放 */
packet_queue_destroy(&(*video_state)->aQueue);
packet_queue_destroy(&(*video_state)->vQueue);
if ((*video_state)->formatCtx != NULL) {
avformat_close_input(&(*video_state)->formatCtx);
(*video_state)->formatCtx = NULL;
}
av_free(*video_state);
return 1;
}
/**
初始化队列
*/
int packet_queue_init(PacketQueue** q, size_t max_size) {
// 创建一个 AVFifo 队列,每个元素的大小为 sizeof(AVPacket)
*q = av_malloc(sizeof(PacketQueue));
(*q)->pkt_list = av_fifo_alloc2(max_size, sizeof(MyAVPacketList), AV_FIFO_FLAG_AUTO_GROW);
(*q)->size = 0;
(*q)->mutex = SDL_CreateMutex();
(*q)->cond = SDL_CreateCond();
if (!(*q)->pkt_list) {
return -1;
}
return 0;
}
/**
写入队列
*/
int packet_queue_push(PacketQueue* q, AVPacket* pkt) {
MyAVPacketList pNode;
if (!q || !pkt) {
return -1;
}
AVPacket* pkt1 = av_packet_alloc();
if (!pkt1) {
av_packet_unref(pkt);
return -1;
}
SDL_LockMutex(q->mutex);
av_packet_ref(pkt1, pkt);
pNode.pkt = pkt1;
// 将 pkt 压入队列
if (av_fifo_write(q->pkt_list, &pNode, 1) < 0) {
SDL_UnlockMutex(q->mutex);
return -1;
}
q->size++;
SDL_CondSignal(q->cond);
SDL_UnlockMutex(q->mutex);
return 0;
}
/**
弹出队列
*/
int packet_queue_pop(PacketQueue* q, AVPacket* pkt) {
if (!q || !pkt) {
return -1;
}
SDL_LockMutex(q->mutex);
MyAVPacketList pNode;
// 从队列中弹出一个元素, 没找到则阻塞线程,等待生产者释放
if (av_fifo_read(q->pkt_list, &pNode, 1) < 0) {
SDL_CondWait(q->cond, q->mutex);
}
q->size--;
av_packet_move_ref(pkt, pNode.pkt);
av_packet_free(&pNode.pkt);
SDL_UnlockMutex(q->mutex);
return 0;
}
/**
销毁队列
*/
void packet_queue_destroy(PacketQueue** q) {
if ((*q) && (*q)->pkt_list) {
// 释放队列中的所有 AVPacket
MyAVPacketList pNode;
SDL_LockMutex((*q)->mutex);
while (av_fifo_read((*q)->pkt_list, &pNode, 1) >= 0) {
av_packet_free(&pNode.pkt); // 释放 AVPacket 的资源
}
SDL_UnlockMutex((*q)->mutex);
// 释放 AVFifo 队列
(*q)->size = 0;
av_fifo_freep2(&(*q)->pkt_list);
SDL_DestroyMutex((*q)->mutex);
SDL_DestroyCond((*q)->cond);
av_free(*q);
}
}
/**
获取帧类型
*/
char* get_frame_type(AVFrame* frame) {
switch (frame->pict_type) {
case AV_PICTURE_TYPE_I:
return "I";
break;
case AV_PICTURE_TYPE_P:
return "P";
break;
case AV_PICTURE_TYPE_B:
return "B";
break;
case AV_PICTURE_TYPE_S:
return "S";
break;
case AV_PICTURE_TYPE_SI:
return "SI";
break;
case AV_PICTURE_TYPE_SP:
return "SP";
break;
case AV_PICTURE_TYPE_BI:
return "BI";
break;
default:
return "N";
break;
}
}
manager.c
//
// manager.c
// sample_player
//
// Created by chenhuaiyi on 2025/2/27.
//
#include "manager.h"
/**
音频输出信息构建
*/
int audio_output_set(VideoState* video_state) {
AudioParam* audio_param = video_state->audioParam;
// 输出用到的信息
av_channel_layout_default(&audio_param->out_channel_layout, 2);
audio_param->out_nb_samples = video_state->aCodecCtx->frame_size; // 编解码器每个帧需要处理或者输出的采样点的大小 AAC:1024 MP3:1152
audio_param->out_sample_fmt = AV_SAMPLE_FMT_S16; // 采样格式
audio_param->out_sample_rate = 44100; // 采样率
audio_param->out_channels = audio_param->out_channel_layout.nb_channels; // 通道数
// 获取需要使用的缓冲区大小 -> 通道数,单通道样本数,位深 1024(单帧处理的采样点)*2(双通道)*2(16bit对应2字节)
audio_param->out_buffer_size = av_samples_get_buffer_size(NULL, audio_param->out_channels,
audio_param->out_nb_samples,
audio_param->out_sample_fmt, 1);
// 分配缓冲区空间
audio_param->out_buffer = NULL;
av_samples_alloc(&audio_param->out_buffer, NULL, audio_param->out_channels,
audio_param->out_nb_samples, audio_param->out_sample_fmt, 1);
return 1;
}
/**
视频输出信息构建
*/
int video_output_set(VideoState* video_state) {
VideoParam* video_param = video_state->videoParam;
// 基础信息
video_param->width = video_state->vCodecCtx->width;
video_param->height = video_state->vCodecCtx->height;
video_param->pix_fmt = AV_PIX_FMT_YUV420P;
// 计算单帧大小, 分配单帧内存
video_param->num_bytes = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, video_param->width, video_param->height, 1);
video_param->out_frame = av_frame_alloc();
av_image_alloc(video_param->out_frame->data, video_param->out_frame->linesize,
video_param->width, video_param->height, AV_PIX_FMT_YUV420P, 1);
return 1;
}
/**
音频SDL初始化
*/
int audio_sdl_set(VideoState* video_state, SDL_AudioSpec* wanted_spec, void (*fn)(void*, Uint8*, int)) {
AudioParam* audio_param = video_state->audioParam;
wanted_spec->freq = audio_param->out_sample_rate; // 采样率
wanted_spec->format = AUDIO_S16SYS; // 采样格式 16bit
wanted_spec->channels = audio_param->out_channels; // 通道数
wanted_spec->silence = 0;
wanted_spec->samples = audio_param->out_nb_samples; // 单帧处理的采样点
wanted_spec->callback = fn; // 回调函数
wanted_spec->userdata = video_state->aCodecCtx; // 回调函数的参数
return 1;
}
/**
视频SDL初始化
*/
int video_sdl_set(VideoState* video_state, SDL_Window** window, SDL_Renderer** renderer, SDL_Texture** texture){
AVCodecContext* pCodecCtx = video_state->vCodecCtx;
/** 窗口 */
*window = SDL_CreateWindow("SDL2 window",
SDL_WINDOWPOS_CENTERED,
SDL_WINDOWPOS_CENTERED,
pCodecCtx->width,
pCodecCtx->height,
SDL_WINDOW_SHOWN);
if (!*window) {
printf("SDL_CreateWindow Error: %s\n", SDL_GetError());
SDL_Quit();
return 1;
}
/** 渲染 */
*renderer = SDL_CreateRenderer(*window,
-1,
SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
if (!*renderer) {
printf("SDL_CreateRenderer Error: %s\n", SDL_GetError());
SDL_DestroyWindow(*window);
SDL_Quit();
return 1;
}
/** 纹理 */
*texture = SDL_CreateTexture(*renderer,
SDL_PIXELFORMAT_YV12,
SDL_TEXTUREACCESS_STREAMING,
pCodecCtx->width,
pCodecCtx->height);
return 1;
}
main.c
//
// main.c
// sample_player
//
// Created by chenhuaiyi on 2025/2/26.
//
#include "utils.h"
#include "manager.h"
AudioInfo audio_info;
/* udata: 传入的参数
* stream: SDL音频缓冲区
* len: SDL音频缓冲区大小
* 回调函数
*/
void fill_audio(void *udata, Uint8 *stream, int len){
SDL_memset(stream, 0, len); // 必须重置,不然全是电音!!!
if(audio_info.audio_len==0){ // 有音频数据时才调用
return;
}
len = (len>audio_info.audio_len ? audio_info.audio_len : len); // 最多填充缓冲区大小的数据
SDL_MixAudio(stream, audio_info.audio_pos, len, SDL_MIX_MAXVOLUME);
audio_info.audio_pos += len;
audio_info.audio_len -= len;
}
/**
音频线程
*/
int audio_thread(void *arg) {
/**
1. 从packet_queue队列中取出packet
2. 将packet进行解码
3. 写入到sdl的缓冲区中
*/ VideoState* video_state = (VideoState*) arg;
AudioParam* audio_param = video_state->audioParam;
PacketQueue* queue = video_state->aQueue;
audio_param->index = 0;
AVRational time_base = video_state->formatCtx->streams[video_state->audioStream]->time_base;
int64_t av_start_time = av_gettime(); // 播放开始时间戳
AVPacket packet;
int ret;
AVFrame* pFrame = av_frame_alloc();
for(;;) {
if (queue->size > 0) {
packet_queue_pop(queue, &packet);
// 将packet写入编解码器
ret = avcodec_send_packet(video_state->aCodecCtx, &packet);
if ( ret < 0 ) {
printf("send packet error\n");
return -1;
}
// 获取解码后的帧
while (!avcodec_receive_frame(video_state->aCodecCtx, pFrame)) {
// 格式转化
swr_convert(video_state->swrCtx, &audio_param->out_buffer, audio_param->out_buffer_size,
(const uint8_t **)pFrame->data, pFrame->nb_samples);
audio_param->index++;
printf("第%d帧 | pts:%lld | 帧大小(采样点):%d | 实际播放点%.2fs | 预期播放点%.2fs\n",
audio_param->index,
packet.pts,
packet.size,
(double)(av_gettime() - av_start_time)/AV_TIME_BASE,
pFrame->pts * av_q2d(time_base));
#if USE_SDL
// 设置读取的音频数据
audio_info.audio_len = audio_param->out_buffer_size;
audio_info.audio_pos = (Uint8 *) audio_param->out_buffer;
// 等待SDL播放完成
while(audio_info.audio_len > 0)
SDL_Delay(0.5);
#endif
}
av_packet_unref(&packet);
}
else {
break;
}
}
av_frame_free(&pFrame);
// 结束
video_state->isEnd = 1;
return 0;
}
/**
视频线程
*/
int video_thread(void *arg) {
/**
1. 从视频pkt队列中读出packet
2. 送入解码器解码并取出
3. 使用SDL进行渲染
4. 根据pts计算延迟SDL_DELAY
*/
VideoState* video_state = (VideoState*) arg;
PacketQueue* video_queue = video_state->vQueue;
AVCodecContext* pCodecCtx = video_state->vCodecCtx;
AVFrame* out_frame = video_state->videoParam->out_frame;
AVPacket packet;
AVFrame* pFrame = av_frame_alloc();
AVRational time_base = video_state->formatCtx->streams[video_state->videoStream]->time_base;
int64_t av_start_time = av_gettime(); // 开始播放时间(ms*1000)
int64_t frame_delay = av_q2d(time_base) * AV_TIME_BASE; // pts单位(ms*1000)
int64_t frame_start_time = av_gettime();
for (;;) {
if (video_queue->size > 0) {
packet_queue_pop(video_queue, &packet);
// 将packet写入编解码器
int ret = avcodec_send_packet(pCodecCtx, &packet);
if (ret < 0) {
printf("packet resolve error!");
break;
}
// 从解码器中取出原始帧
while (!avcodec_receive_frame(pCodecCtx, pFrame)) {
// 帧格式转化,转为YUV420P
sws_scale(video_state->swsCtx, // sws_context转换
(uint8_t const * const *)pFrame->data, // 输入 data
pFrame->linesize, // 输入 每行数据的大小(对齐)
0, // 输入 Y轴位置
pCodecCtx->height, // 输入 height
out_frame->data, // 输出 data
out_frame->linesize); // 输出 linesize
// 帧更新
video_state->videoParam->frame_update = 1;
// 计算延迟
int64_t pts = pFrame->pts; // pts
int64_t actual_playback_time = av_start_time + pts * frame_delay; // 实际播放时间
int64_t current_time = av_gettime();
if (actual_playback_time > current_time) {
SDL_Delay((Uint32)(actual_playback_time-current_time)/1000); // 延迟当前时间和实际播放时间
}
video_state->videoParam->index++;
printf("第%i帧 | 属于%s | pts为%d | 时长为%.2fms | 实际播放点为%.2fs | 预期播放点为%.2fs\n ",
video_state->videoParam->index,
get_frame_type(pFrame),
(int)pFrame->pts,
(double)(av_gettime() - frame_start_time)/1000,
(double)(av_gettime() - av_start_time)/AV_TIME_BASE,
pFrame->pts * av_q2d(time_base));
frame_start_time = av_gettime();
}
av_packet_unref(&packet);
} else {
break;
}
}
av_frame_free(&pFrame);
// 结束
video_state->isEnd = 1;
return 1;
}
int main(int argc, char* argv[])
{
VideoState* video_state;
AudioParam* audio_param;
VideoParam* video_param;
SDL_Event event;
SDL_Rect rect;
if(argc < 2) {
fprintf(stderr, "Usage: test <file>\n");
exit(1);
}
/** 初始化函数 */
init_video_state(&video_state);
audio_param = video_state->audioParam;
video_param = video_state->videoParam;
avformat_network_init();
// 1. 打开视频文件,获取格式上下文
if(avformat_open_input(&video_state->formatCtx, argv[1], NULL, NULL)!=0){
printf("Couldn't open input stream.\n");
return -1;
}
// 2. 对文件探测流信息
if(avformat_find_stream_info(video_state->formatCtx, NULL) < 0){
printf("Couldn't find stream information.\n");
return -1;
}
// 打印信息
av_dump_format(video_state->formatCtx, 0, argv[1], 0);
// 3. 找到对应的 音频流/视频流 索引
video_state->audioStream=-1;
video_state->videoStream=-1;
for(int i=0; i < video_state->formatCtx->nb_streams; i++) {
if(video_state->formatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_AUDIO){
video_state->audioStream=i;
}
if (video_state->formatCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_VIDEO) {
video_state->videoStream=i;
}
}
if(video_state->audioStream==-1){
printf("Didn't find a audio stream.\n");
return -1;
}
if (video_state->videoStream==-1) {
printf("Didn't find a video stream.\n");
return -1;
}
// 4. 将 音频流/视频流 编码参数写入上下文
AVCodecParameters* aCodecParam = video_state->formatCtx->streams[video_state->audioStream]->codecpar;
avcodec_parameters_to_context(video_state->aCodecCtx, aCodecParam);
// avcodec_parameters_free(&aCodecParam); 这个是不需要手动释放的
AVCodecParameters* vCodecParam = video_state->formatCtx->streams[video_state->videoStream]->codecpar;
avcodec_parameters_to_context(video_state->vCodecCtx, vCodecParam);
// avcodec_parameters_free(&vCodecParam);
// 5. 查找流的编码器
video_state->aCodec = avcodec_find_decoder(video_state->aCodecCtx->codec_id);
if(video_state->aCodec==NULL){
printf("Audio codec not found.\n");
return -1;
}
video_state->vCodec = avcodec_find_decoder(video_state->vCodecCtx->codec_id);
if(video_state->vCodec==NULL){
printf("Video codec not found.\n");
return -1;
}
// 6. 打开流的编解码器
if(avcodec_open2(video_state->aCodecCtx, video_state->aCodec, NULL)<0){
printf("Could not open audio codec.\n");
return -1;
}
if(avcodec_open2(video_state->vCodecCtx, video_state->vCodec, NULL)<0){
printf("Could not open video codec.\n");
return -1;
}
/** 音频输出信息构建 */
audio_output_set(video_state);
/** 视频输出信息构建 */
video_output_set(video_state);
// SDL 初始化
#if USE_SDL
if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
printf( "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
// 在 main 函数开始处添加
SDL_SetHint(SDL_HINT_VIDEO_MAC_FULLSCREEN_SPACES, "0");
SDL_SetHint(SDL_HINT_MAC_BACKGROUND_APP, "1");
/** 初始化音频SDL设备 */
SDL_AudioSpec wanted_spec;
// audio_sdl_set(video_state, &wanted_spec, fill_audio);
wanted_spec.freq = audio_param->out_sample_rate; // 采样率
wanted_spec.format = AUDIO_S16SYS; // 采样格式 16bit
wanted_spec.channels = audio_param->out_channels; // 通道数
wanted_spec.silence = 0;
wanted_spec.samples = audio_param->out_nb_samples; // 单帧处理的采样点
wanted_spec.callback = fill_audio; // 回调函数
wanted_spec.userdata = video_state->aCodecCtx; // 回调函数的参数
/** 初始化视频SDL设备 */
SDL_Window* window = NULL;
SDL_Renderer* renderer = NULL;
SDL_Texture* texture= NULL;
// video_sdl_set(video_state, &window, &renderer, &texture);
/** 窗口 */
window = SDL_CreateWindow("SDL2 window",
SDL_WINDOWPOS_CENTERED,
SDL_WINDOWPOS_CENTERED,
video_state->vCodecCtx->width,
video_state->vCodecCtx->height,
SDL_WINDOW_SHOWN);
if (!window) {
printf("SDL_CreateWindow Error: %s\n", SDL_GetError());
SDL_Quit();
return 1;
}
/** 渲染 */
renderer = SDL_CreateRenderer(window,
-1,
SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC);
if (!renderer) {
printf("SDL_CreateRenderer Error: %s\n", SDL_GetError());
SDL_DestroyWindow(window);
SDL_Quit();
return 1;
}
/** 纹理 */
texture = SDL_CreateTexture(renderer,
SDL_PIXELFORMAT_YV12,
SDL_TEXTUREACCESS_STREAMING,
video_state->vCodecCtx->width,
video_state->vCodecCtx->height);
// 打开音频播放器
if (SDL_OpenAudio(&wanted_spec, NULL)<0) {
printf("can't open audio.\n");
return -1;
}
#endif
// 音频上下文格式转换
swr_alloc_set_opts2(&video_state->swrCtx,
&audio_param->out_channel_layout, // 输出layout
audio_param->out_sample_fmt, // 输出格式
audio_param->out_sample_rate, // 输出采样率
&video_state->aCodecCtx->ch_layout, // 输入layout
video_state->aCodecCtx->sample_fmt, // 输入格式
video_state->aCodecCtx->sample_rate, // 输入采样率
0, NULL);
swr_init(video_state->swrCtx);
// 视频上下文格式转换
video_state->swsCtx = sws_getContext(video_state->vCodecCtx->width, // src 宽
video_state->vCodecCtx->height, // src 高
video_state->vCodecCtx->pix_fmt, // src 格式
video_param->width, // dst 宽
video_param->height, // dst 高
video_param->pix_fmt, // dst 格式
SWS_BILINEAR,
NULL,NULL,NULL);
// 开始播放
SDL_PauseAudio(0);
int64_t av_start_time = av_gettime(); // 播放开始时间戳
AVPacket* packet = av_packet_alloc(); // packet初始化
// 循环1: 从文件中读取packet
while(av_read_frame(video_state->formatCtx, packet)>=0){
/** 写入音频pkt队列 */
if(packet->stream_index==video_state->audioStream){
packet_queue_push(video_state->aQueue, packet);
}
/** 写入视频pkt队列 */
if (packet->stream_index==video_state->videoStream) {
packet_queue_push(video_state->vQueue, packet);
}
av_packet_unref(packet);
SDL_PollEvent(&event);
switch(event.type) {
case SDL_QUIT:
SDL_Quit();
exit(0);
break;
default:
break;
}
}
printf("audio queue.size=%d\n", video_state->aQueue->size);
// 创建一个线程并启动
SDL_CreateThread(audio_thread, "audio_thread", video_state);
SDL_CreateThread(video_thread, "video_thread", video_state);
// video_thread(video_state);
AVFrame* out_frame = NULL;
while (!video_state->isEnd) {
// 处理事件(必须由主线程执行)
while (SDL_PollEvent(&event)) {
if (event.type == SDL_QUIT) {
video_state->isEnd = 1;
}
}
if (video_state->videoParam->frame_update) {
// 将AVFrame的数据写入到texture中,然后渲染后windows上
rect.x = 0;
rect.y = 0;
rect.w = video_state->vCodecCtx->width;
rect.h = video_state->vCodecCtx->height;
out_frame = video_state->videoParam->out_frame;
// 更新纹理
SDL_UpdateYUVTexture(texture, &rect,
out_frame->data[0], out_frame->linesize[0], // Y
out_frame->data[1], out_frame->linesize[1], // U
out_frame->data[2], out_frame->linesize[2]); // V
// 渲染页面
SDL_RenderClear(renderer);
SDL_RenderCopy(renderer, texture, NULL, NULL);
SDL_RenderPresent(renderer);
// 重置标志
video_state->videoParam->frame_update = 0;
}
}
// 打印参数
printf("格式: %s\n", video_state->formatCtx->iformat->name);
printf("时长: %lld us\n", video_state->formatCtx->duration);
printf("音频持续时长为 %.2f,音频帧总数为 %d\n", (double)(av_gettime()-av_start_time)/AV_TIME_BASE, audio_param->index);
printf("码率: %lld\n", video_state->formatCtx->bit_rate);
printf("编码器: %s (%s)\n", video_state->aCodecCtx->codec->long_name, avcodec_get_name(video_state->aCodecCtx->codec_id));
printf("通道数: %d\n", video_state->aCodecCtx->ch_layout.nb_channels);
printf("采样率: %d \n", video_state->aCodecCtx->sample_rate);
printf("单通道每帧的采样点数目: %d\n", video_state->aCodecCtx->frame_size);
printf("pts单位(ms*1000): %.2f\n", av_q2d(video_state->formatCtx->streams[video_state->audioStream]->time_base) * AV_TIME_BASE);
// 释放空间
av_packet_free(&packet);
#if USE_SDL
SDL_CloseAudio();
SDL_DestroyTexture(texture);
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(window);
SDL_Quit();
#endif
destory_video_state(&video_state);
return 0;
}
结语和展望
终于做完了,恭喜你,完成了一个非常粗糙,而且有很多问题的简单音视频播放器。接下来几期,我们跟着大家一起对这个简单的播放器进行优化。当然我也是个小萌新,所以一步一步来嘛哈哈。先抛出几个问题:
- 时钟同步怎么做
- 如何边读出packet,边解码frame并播放
- 我们如何对输出的解码帧进行转化
ps. 鼓励大家阅读ffplay源码,所有的问题都能迎刃而解,哈哈哈!