音视频核心技术

雷神 FFmpeg源代码结构图 - 解码

雷神 GitHub

雷神 FFmpeg blog

ffmpeg filter过滤器 基础实例及全面解析

1. 学习大纲

FFmpeg 常用命令

  • 视频录制命令
  • 多媒体文件的分解/复用命令
  • 裁剪与合并命令
  • 图片/视频互转命令
  • 直播相关命令
  • 各种滤镜命令

FFmpeg 基本开发

  • C 语言回顾
  • FFmpeg 核心概念与常用结构体
  • 实战 - 多媒体文件的分解与复用
  • 实战 - 多媒体格式的互转
  • 实战 - 从 MP4 裁剪一段视频
  • 作业 - 实现一个简单的小咖秀

音视频编解码实战

  • 实战 - H264 解码
  • 实战 - H264 编码
  • 实战 - 音频 AAC 解码
  • 实战 - 音频 AAC 编码
  • 实战 - 视频转图片

音视频渲染实战

  • SDL 事件处理
  • SDL 视频文理渲染
  • SDL 音频渲染
  • 实战1 - 实现 YUV 视频播放
  • 实战2 - YUV 视频倍数播放
  • 实战3 - 实现 PCM 播放器

FFmpeg 开发播放器核心功能

  • 实战 - 实现 MP4 文件的视频播放
  • 实战 - 实现 MP4 文件的音频播放
  • 实战 - 实现一个初级播放器
  • 实战 - 音视频同步
  • 实战 - 实现播放器内核

Android 中实战 FFmpeg

  • 编译 Android 端可以使用的 FFmpeg
  • Java 与 C 语言相互调用
  • 实战 - Android 调用 FFmpeg

学习建议

  • 牢牢抓住音视频的处理机制,了解其本质
  • 勤加练习,熟能生巧
  • 待着问题去学习,事半功倍

音视频的广泛应用

  • 直播类:音视频会议、教育直播、娱乐/游戏直播
  • 短视频:抖音、快手、小咖秀
  • 网络视频:优酷、腾讯视频、爱奇艺等
  • 音视频通话:微信、QQ、Skype等
  • 视频监控
  • 人工智能:人脸识别,智能音箱等,更关注算法

播放器架构

渲染流程

FFmpeg 都能做啥

  • FFmpeg 是一个非常优秀的多媒体框架
  • FFmpeg 可以运行在 Linux、Mac、Windows 等平台上
  • 能够解码、编码、转码、复用、解复用、过滤音视频数据

FFmpeg 下载与安装

FFMpeg 下载与安装

1
2
3
$ git clone https://git.ffmpeg.org/ffmpeg.git
$ config -- help
$ make && make install


2. FFmpeg 常用命令实战

我们按使用目的可以将 FFMPEG 命令分成以下几类:

  • 基本信息查询命令
  • 录制
  • 分解 / 复用
  • 处理原始数据
  • 滤镜
  • 切割与合并
  • 图/视互转
  • 直播相关

除了 FFMPEG 的基本信息查询命令外,其它命令都按下图所示的流程处理音视频。

1
$ ffplay -s 2560x1600 -pix_fmt uyvy422 out.yuv

3. 初级开发内容

  • FFmpeg 日志的使用及目录的操作
  • 介绍 FFmpeg 的基本概念及常用的结构体
  • 对复用/解复用及流程操作的各种实践

FFmpeg 代码结构:

  • libavcodec: 提供了一系列编码器的实现。
  • libavformat: 实现在流协议,容器格式及其本IO访问。
  • libavutil: 包括了hash器,解码器和各类工具函数。
  • libavfilter: 提供了各种音视频过滤器。
  • libavdevice: 提供了访问捕获设备和回放设备的接口。
  • libswresample: 实现了混音和重采样。
  • libswscale: 实现了色彩转换和缩放工能。

3.1 FFmpeg 日志系统

1
2
3
4
5
#include <libavutil/log.h>

av_log_set_level(AV_LOG_DEBUG)

av_log(NULL, AV_LOG_INFO, "...%s\n", op)
  • AV_LOG_ERROR
  • AV_LOG_WARNING
  • AV_LOG_INFO
FFmpeg日志系统使用

1
2
3
4
5
6
7
8
9
10
11
#include <stdio.h>
#include <libavutil/log.h>

int main(int argc, char *argv[])
{
av_log_set_level(AV_LOG_DEBUG);

av_log(NULL, AV_LOG_INFO, "hello world: %s!\n", "aaa");

return 0;
}


3.2 FFmpeg 文件与目录操作

文件的删除与重命名:

1
2
3
4
5
#include <libavformat/avformat.h>

avpriv_io_delete()

avpriv_io_move(src, dst)
FFmpeg文件与目录操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avformat.h>

int main(int argc, char *argv[])
{
int ret;
ret = avpriv_io_delete("./mytestfile.txt");
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to delete file mytestfile.txt\n");
return -1
}

ret = avpriv_io_move("111.txt", "222.txt");
if(ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Filed to rename\n");
return -1;
}

return 0;
}


1
2
3
4
5
6
$ clang -g -o ffmpeg_del ffmpeg_file.c `pkg-config --libs libavformat`

# pkg-config --libs libavformat 指令可以搜索libavformat库所在路径

$ pkg-config --libs libavformat
-L/usr/local/ffmpeg/lib -lavformat


3.3 FFmpeg 操作目录重要函数

1
2
3
avio_open_dir()
avio_read_dir()
avio_close_dir()

操作目录重要结构体:

  • AVIODirContext

    操作目录的上下文

  • AVIODirEntry

    目录项。用于存放文件名,文件大小等信息

FFmpeg操作目录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avformat.h>

int main(int argc, char *argv[])
{
av_log_set_level(AV_LOG_INFO);

int ret;
AVIODirContext *ctx = NULL;
AVIODirEntry *entry = NULL;

ret = avio_open_dir(&ctx, "./", NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cant open dir:%s\n", av_err2str(ret));
return -1;
}
while(1) {
ret = avio_read_dir(ctx, &entry);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cant read dir: %s\n", av_err2str(ret));
goto __fail;
}
if (!entry) {
break;
}

av_log(NULL, AV_LOG_INFO, "%l2"PRId64" %s\n",
entry->size,
entry->name);

avio_free_directory_entry(&entry);
}
__fail:
avio_close_dir(&ctx);
return 0;
}


1
$ clang -g -o list ffmpeg_list.c `pkg-config --libs libavformat libavutil`


3.4 多媒体文件的基本概念

  • 多媒体文件其实是个容器
  • 在容器里有很多流(Stream/Track)
  • 每种流是由不同的编码器编码的
  • 从流中读出的数据称为包
  • 在一个包中包含着一个或多个帧

几个重要的结构体:

  • AVFormatContext
  • AVStream
  • AVPacket

FFmpeg 操作流数据的基本步骤:

解复用 —> 获取流 —> 读取数据包 —> 释放资源

3.5 [实战] 打印音/视频信息

1
2
3
av_register_all()
avformat_open_input() / avformat_close_input()
av_dump_format()
[实战] 打印音/视频信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avformat.h>

int main(int argc, char *argv[])
{
int ret;
av_log_set_level(AV_LOG_INFO);

AVFormatContext *fmt_ctx = NULL;

av_register_all();

ret = avformat_open_input(&fmt_ctx, "./test.mp4", NULL, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Can't open file: %s\n", av_err2str(ret));
return -1;
}

av_dump_format(fmt_ctx, 0, "./test.mp4", 0);

avformat_close_input(&fmt_ctx);

return 0;
}


3.6 [实战] 抽取音频数据

1
2
3
av_init_packet()
av_find_best_stream()
av_read_frame() / av_packet_unref()
[实战] 抽取音频数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avformat.h>

int main(int argc, char *argv[])
{
int ret;
int len;
int audio_index;

char *src = NULL;
char *dst = NULL;

av_log_set_level(AV_LOG_INFO);

AVPacket pkt;
AVFormatContext *fmt_ctx = NULL;

av_register_all();

// 1. read two params form console
if (argc < 3) {
av_log(NULL, AV_LOG_ERROR, "eg: %s in_file out_file\n", argv[0]);
return -1;
}
src = argv[1];
dst = argv[2];
if (!src || !dst) {
av_log(NULL, AV_LOG_ERROR, "src or dst is null\n");
return -1;
}

ret = avformat_open_input(&fmt_ctx, src, NULL, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Can't open file: %s\n", av_err2str(ret));
return -1;
}

FILE *dst_fd = fopen(dst, "wb");
if (dst_fd) {
av_log(NULL, AV_LOG_ERROR, "Can't open out file!\n");
avformat_close_input(&fmt_ctx);
return -1;
}
av_dump_format(fmt_ctx, 0, src, 0);

// 2. get stream
ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Can't find the best stream!\n");
avformat_close_input(&fmt_ctx);
fclose(dst_fd);
return -1;
}

audio_index = ret;
av_init_packet(&pkt);
while(av_read_frame(fmt_ctx, &pkt) >= 0) {
if (pkt.stream_index == audio_index) {
// 3. write audio data to aac file.
len = fwrite(pkt.data, 1, pkt.size, dst_fd);
if (len != pkt.size) {
av_log(NULL, AV_LOG_WARNING, "warning, length of data is not equal size of pkt!\n");
}
}
av_packet_unref(&pkt);
}

avformat_close_input(&fmt_ctx);
if (dst_fd) {
fclose(dst_fd);
}

return 0;
}


1
2
$ lang -g -o extra_audio extra_audio.c `pkg-config --libs libavutil libavformat`
$ ./extra_audio test.mp4 killer.aa


3.7 [实战] 抽取视频数据

  • Start code
  • SPS/PPS
  • codec -> extradata

3.8 [实战] 将 MP4 转成 FLV 格式

1
2
3
4
5
6
avformat_alloc_output_context2() / avformat_free_context();
avformat_new_stream();
avcodec_parameters_copy();
avformat_write_header();
av_write_frame() / av_interleaved_write_frame();
av_write_trailer()

3.9 [实战] 从 MP4 截取一段视频

1
av_seek_frame()
从 MP4 截取一段视频代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#include <stdlib.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>

static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt, const char *tag)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;

printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
tag,
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}

int cut_video(double from_seconds, double end_seconds, const char* in_filename, const char* out_filename) {
AVOutputFormat *ofmt = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
AVPacket pkt;
int ret, i;

av_register_all();

if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
fprintf(stderr, "Could not open input file '%s'", in_filename);
goto end;
}

if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
fprintf(stderr, "Failed to retrieve input stream information");
goto end;
}

av_dump_format(ifmt_ctx, 0, in_filename, 0);

avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx) {
fprintf(stderr, "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}

ofmt = ofmt_ctx->oformat;

for (i = 0; i < ifmt_ctx->nb_streams; i++) {
AVStream *in_stream = ifmt_ctx->streams[i];
AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
if (!out_stream) {
fprintf(stderr, "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}

ret = avcodec_copy_context(out_stream->codec, in_stream->codec);
if (ret < 0) {
fprintf(stderr, "Failed to copy context from input to output stream codec context\n");
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
av_dump_format(ofmt_ctx, 0, out_filename, 1);

if (!(ofmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open output file '%s'", out_filename);
goto end;
}
}

ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file\n");
goto end;
}

// int indexs[8] = {0};


// int64_t start_from = 8*AV_TIME_BASE;
ret = av_seek_frame(ifmt_ctx, -1, from_seconds*AV_TIME_BASE, AVSEEK_FLAG_ANY);
if (ret < 0) {
fprintf(stderr, "Error seek\n");
goto end;
}

int64_t *dts_start_from = malloc(sizeof(int64_t) * ifmt_ctx->nb_streams);
memset(dts_start_from, 0, sizeof(int64_t) * ifmt_ctx->nb_streams);
int64_t *pts_start_from = malloc(sizeof(int64_t) * ifmt_ctx->nb_streams);
memset(pts_start_from, 0, sizeof(int64_t) * ifmt_ctx->nb_streams);

while (1) {
AVStream *in_stream, *out_stream;

ret = av_read_frame(ifmt_ctx, &pkt);
if (ret < 0)
break;

in_stream = ifmt_ctx->streams[pkt.stream_index];
out_stream = ofmt_ctx->streams[pkt.stream_index];

log_packet(ifmt_ctx, &pkt, "in");

if (av_q2d(in_stream->time_base) * pkt.pts > end_seconds) {
av_free_packet(&pkt);
break;
}

if (dts_start_from[pkt.stream_index] == 0) {
dts_start_from[pkt.stream_index] = pkt.dts;
printf("dts_start_from: %s\n", av_ts2str(dts_start_from[pkt.stream_index]));
}
if (pts_start_from[pkt.stream_index] == 0) {
pts_start_from[pkt.stream_index] = pkt.pts;
printf("pts_start_from: %s\n", av_ts2str(pts_start_from[pkt.stream_index]));
}

/* copy packet */
pkt.pts = av_rescale_q_rnd(pkt.pts - pts_start_from[pkt.stream_index], in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
pkt.dts = av_rescale_q_rnd(pkt.dts - dts_start_from[pkt.stream_index], in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
if (pkt.pts < 0) {
pkt.pts = 0;
}
if (pkt.dts < 0) {
pkt.dts = 0;
}
pkt.duration = (int)av_rescale_q((int64_t)pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
log_packet(ofmt_ctx, &pkt, "out");
printf("\n");

ret = av_interleaved_write_frame(ofmt_ctx, &pkt);
if (ret < 0) {
fprintf(stderr, "Error muxing packet\n");
break;
}
av_free_packet(&pkt);
}
free(dts_start_from);
free(pts_start_from);

av_write_trailer(ofmt_ctx);

end:
avformat_close_input(&ifmt_ctx);

/* close output */
if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))
avio_closep(&ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);

if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
return 1;
}

return 0;
}

int main(int argc, char *argv[]){
if(argc < 5){
fprintf(stderr, "Usage: \
command startime, endtime, srcfile, outfile");
return -1;
}

double startime = atoi(argv[1]);
double endtime = atoi(argv[2]);
cut_video(startime, endtime, argv[3], argv[4]);

return 0;
}


3.10 [实战] 一个简单的小咖秀

  • 将两个媒体文件中分别抽取音频与视频轨
  • 将音频与视频轨合并成一个新文件
  • 对音频与视频轨进行裁剪

    4. FFmpeg 中级开发内容

  • FFmpeg H264 解码

  • FFmpeg H264 编码
  • FFmpeg AAC 解码
  • FFmpeg AAC 编码

4.1 FFmpeg H264 解码

1
#include <libavcodec/avcodec.h>

常用数据结构:

  • AVCodec 编码器结构体
  • AVCodecContext 编码器上下文
  • AVFrame 解码后的帧

    结构体内存的分配与释放:

1
2
3
av_frame_alloc / av_frame_free();
avcodec_alloc_context3();
avcodec_free_context();

解码步骤:

  • 查找解码器(avcodec_find_decoder)
  • 打开解码器(avcodec_open2)
  • 解码(avcodec_decode_video2)

4.2 FFmpeg H264 编码

H264编码流程:

  • 查找编码器(avcodec_find_encoder_by_name)
  • 设置参数,打开编码器(avcondec_open2)
  • 编码(avcondec_encode_video2)

4.3 视频转图片

TODO

4.4 FFmpeg AAC 编码

  • 编码流程与视频相同
  • 编码函数 avcodec_encodec_audio2

5. SDL 介绍

SDL 官网)

  • SDL(Simple DirectMedia Layer) 是一套开放源代码跨平台多媒体开发
  • 由 C 语言实现的跨平台的媒体开源库
  • 多用于开发游戏、模拟器、媒体播放器等多媒体应用领域

语法与子系统:

SDL将功能分成下列数个子系统(subsystem):

  • Video(图像)—图像控制以及线程(thread)和事件管理(event)。
  • Audio(声音)—声音控制
  • Joystick(摇杆)—游戏摇杆控制
  • CD-ROM(光盘驱动器)—光盘媒体控制
  • Window Management(视窗管理)-与视窗程序设计集成
  • Event(事件驱动)-处理事件驱动

以下是一支用C语言写成、非常简单的SDL示例:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// Headers
#include "SDL.h"

// Main function
int main(int argc, char* argv[])
{
// Initialize SDL
if(SDL_Init(SDL_INIT_EVERYTHING) == -1)
return(1);

// Delay 2 seconds
SDL_Delay(2000);

// Quit SDL
SDL_Quit();

// Return
return 0;
}

上述程序会加载所有SDL子系统(出错则退出程序),然后暂停两秒,最后关闭SDL并退出程序。

5.1 SDL 编译与安装

  • 下载 SDL 源码
  • 生成Makefile configure –prefix=/usr/local
  • 安装 sudo make -j 8 && make install

5.2 使用 SDL 基本步骤

  • 添加头文件 #include <SDL.h>
  • 初始化 SDL
  • 退出 SDL

SDL 渲染窗口:

1
2
3
SDL_Init() / SDL_Quit();
SDL_CreateWindow() / SDL_DestoryWindow();
SDL_CreateRender(); // 创建渲染器
1
$ clang -g -o first_sdl first_sdl.c `pkg-config --libs sdl2`

SDL 渲染窗口:

1
2
3
SDL_CreateRender() / SDL_DestoryRenderer();
SDL_RenderClear();
SDL_RenderPresent();

5.3 SDL 事件基本原理

  • SDL 将所有的事件都存放在一个队列中
  • 所有对事件的操作,其实就是队列的操作

SDL 事件种类:

  • SDL_WindowEvent:窗口事件
  • SDL_KeyboardEvent:键盘事件
  • SDL_MouseMotionEvent:鼠标事件
  • 自定义事件

SDL 事件处理:

1
2
3
SDL_PollEvent(); // 轮询检测
SDL_WaitEvent(); // 常用的方式
SDL_WaitEventTimeout();

5.4 文理渲染

SDL 渲染基本原理:

SDL 文理相关 API:

1
2
3
4
5
SDL_CreateTexture();
- format: YUV, RGB
- access: Texture 类型, Target, Stream

SDL_DestroyTexture();

SDL 渲染相关 API:

1
2
3
4
SDL_SetRenderTarget();
SDL_RenderClear();
SDL_RenderCopy();
SDL_RenderPresent();

5.5 [实战] YUV 视频播放器

创建线程:

1
2
3
4
SDL_CreateThread();
- fn: 线程执行函数
- name: 线程名
- data: 执行函数参数

SDL 更新文理:

1
2
SDL_UpdateTexture();
SDL_UpdateYUVTexture();

5.6 SDL 播放音频

播放音频基本流程:

播放音频的基本原则:

  • 声卡向你要数据而不是你主动推给声卡
  • 数据的多少由音频参数决定的

SDL 音频 API:

1
2
3
SDL_OpenAudio() / SDL_CloseAudio();
SDL_PauseAudio();
SDL_MixAudio();

5.7 实现 PCM 播放器

TODO

6. 最简单的播放器

  • 该播放器只实现视频播放
  • 将 FFmpeg 与 SDL 结合到一起
  • 通过 FFmpeg 解码视频数据
  • 通过 SDL 进行渲染
1
$ clang -g -o player2 player2.c `pkg-config --cflags --libs sdl2 libavformat libavutil libswscale libavcodec libswresample`

最简单的播放器之二:

  • 可以同时播放音频与视频
  • 使用队列存放音频包

6.1 多线程与锁

为什么要用多线程:

  • 多线程的好处
  • 多线程带来的问题

线程的互斥与同步:

  • 互斥

  • 同步

    大的任务分为很多小任务通过信号协调

锁与信号量:

  • 锁的种类
  • 通过信号进行同步

锁的中种类:

  • 读写锁
  • 自旋锁
  • 可重入锁

SDL 线程的创建:

1
2
SDL_CreateThread();
SDL_WaitThread();

SDL 锁:

1
2
SDL_CreateMutex() / SDL_DestroyMutex();  // 创建互斥量
SDL_LockMutex() / SDL_UnlockMutex(); // 锁互斥量于解锁互斥量

SDL 条件变量:

1
2
SDL_CreateCond() / SDL_DestroyCond();
SDL_CondWait() / SDL_CondSignal();

6.2 锁与条件变量的使用

TODO

6.3 播放器线程模型

6.4 线程的退出机制

  • 主线程接收到退出事件
  • 解复用线程在循环分流时对 quit 进行判断
  • 视频解码线程从视频流队列中取包时对 quit 进行判断
  • 音视解码从音频流队列中取包时对 quit 进行判断
  • 音视循环解码时对 quit 进行判断
  • 在收到信号变量消息时对 quit 进行判断

6.5 音视频同步

时间戳:

  • PTS:Presentation timestamp 渲染时间戳
  • DTS:Decoding timestamp 解码时间戳
  • I(intra)/ B(bidirectional)/ P(predicted)帧

时间戳顺序:

  • 实际帧顺序:I B B P
  • 存放帧顺序:I P B B
  • 解码时间戳:1 4 2 3
  • 展示时间戳:1 2 3 4

由于有了 B 帧之后,它打乱了 PTS 时间戳,所以加了 DTS 解码时间戳。在大多数没有 B 帧的情况下 PTS 和 DTS 是一致的。

从哪儿获得 PTS:

  • AVPacket 中的 PTS
  • AVFrame 中的 PTS
  • av_frame_get_best_effort_timestamp()

时间基:

  • tbr:帧率
  • tbn:time base of stream 流的时间基
  • tbc:time base of codec 解码的时间基

计算当前帧的 PTS:

  • PTS = PTS * av_q2d(video_stream->time_base)
  • av_q2d(AVRotional a){ return a.num / (double)a.den; }

计算下一帧的 PTS:

  • video_clock:预测的下一帧视频的 PTS
  • frame_delay:1/tbr
  • audio_clock:音频当前播放的时间戳

音视频同步的时候需要计算 audio_clock 和 video_clock,看视屏时间是在音频时间之前还是在音频时间之后,如果是在音频时间之前就立即播放,如果在音频时间之后需要 delay 一段时间播放(delay的时间计算:audio_clock - video_clock)

音视频同步方式:

  • 视频同步到音频
  • 音频同步到视频
  • 音频和视频都同步到系统时钟

视频播放的基本思路:

  • 一般的做法,展示第一帧视频帧后,获得要显示的下一个视频帧的 PTS,然后设置一个定时器,当定时器超时时后,刷新新的视屏帧,如此反复操作。
最简单的播放器:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
#include <stdio.h>
#include <assert.h>
#include <math.h>

#include <SDL.h>

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>

// compatibility with newer API
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
#define av_frame_alloc avcodec_alloc_frame
#define av_frame_free avcodec_free_frame
#endif

#define SDL_AUDIO_BUFFER_SIZE 1024
#define MAX_AUDIO_FRAME_SIZE 192000 //channels(2) * data_size(2) * sample_rate(48000)

#define MAX_AUDIOQ_SIZE (5 * 16 * 1024)
#define MAX_VIDEOQ_SIZE (5 * 256 * 1024)

#define AV_SYNC_THRESHOLD 0.01
#define AV_NOSYNC_THRESHOLD 10.0

#define SAMPLE_CORRECTION_PERCENT_MAX 10
#define AUDIO_DIFF_AVG_NB 20

#define FF_REFRESH_EVENT (SDL_USEREVENT)
#define FF_QUIT_EVENT (SDL_USEREVENT + 1)

#define VIDEO_PICTURE_QUEUE_SIZE 1
#define DEFAULT_AV_SYNC_TYPE AV_SYNC_AUDIO_MASTER //AV_SYNC_VIDEO_MASTER

typedef struct PacketQueue {
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;

typedef struct VideoPicture {
AVPicture *bmp;
int width, height; /* source height & width */
int allocated;
double pts;
} VideoPicture;

typedef struct VideoState {
//multi-media file
char filename[1024];
AVFormatContext *pFormatCtx;
int videoStream, audioStream;

//sync
int av_sync_type;
double external_clock; /* external clock base */
int64_t external_clock_time;

double audio_diff_cum; /* used for AV difference average computation */
double audio_diff_avg_coef;
double audio_diff_threshold;
int audio_diff_avg_count;

double audio_clock;
double frame_timer;
double frame_last_pts;
double frame_last_delay;

double video_clock; ///<pts of last decoded frame / predicted pts of next decoded frame
double video_current_pts; ///<current displayed pts (different from video_clock if frame fifos are used)
int64_t video_current_pts_time; ///<time (av_gettime) at which we updated video_current_pts - used to have running video pts

//audio
AVStream *audio_st;
AVCodecContext *audio_ctx;
PacketQueue audioq;
uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
unsigned int audio_buf_size;
unsigned int audio_buf_index;
AVFrame audio_frame;
AVPacket audio_pkt;
uint8_t *audio_pkt_data;
int audio_pkt_size;
int audio_hw_buf_size;

//video
AVStream *video_st;
AVCodecContext *video_ctx;
PacketQueue videoq;
struct SwsContext *video_sws_ctx;
struct SwrContext *audio_swr_ctx;

VideoPicture pictq[VIDEO_PICTURE_QUEUE_SIZE];
int pictq_size, pictq_rindex, pictq_windex;
SDL_mutex *pictq_mutex;
SDL_cond *pictq_cond;

SDL_Thread *parse_tid;
SDL_Thread *video_tid;

int quit;
} VideoState;

SDL_mutex *text_mutex;
SDL_Window *win = NULL;
SDL_Renderer *renderer;
SDL_Texture *texture;

enum {
AV_SYNC_AUDIO_MASTER,
AV_SYNC_VIDEO_MASTER,
AV_SYNC_EXTERNAL_MASTER,
};

FILE *yuvfd = NULL;
FILE *audiofd = NULL;

/* Since we only have one decoding thread, the Big Struct
can be global in case we need it. */
VideoState *global_video_state;

void packet_queue_init(PacketQueue *q) {
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
}

int packet_queue_put(PacketQueue *q, AVPacket *pkt) {
AVPacketList *pkt1;
if(av_dup_packet(pkt) < 0) {
return -1;
}
pkt1 = av_malloc(sizeof(AVPacketList));
if (!pkt1)
return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;

SDL_LockMutex(q->mutex);

if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size;

SDL_CondSignal(q->cond);
SDL_UnlockMutex(q->mutex);
return 0;
}

int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
AVPacketList *pkt1;
int ret;

SDL_LockMutex(q->mutex);

for(;;) {
if(global_video_state->quit) {
ret = -1;
break;
}

pkt1 = q->first_pkt;
if (pkt1) {
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
av_free(pkt1);
ret = 1;
break;
} else if (!block) {
ret = 0;
break;
} else {
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
}

double get_audio_clock(VideoState *is) {
double pts;
int hw_buf_size, bytes_per_sec, n;

pts = is->audio_clock; /* maintained in the audio thread */
hw_buf_size = is->audio_buf_size - is->audio_buf_index;
bytes_per_sec = 0;
n = is->audio_ctx->channels * 2;
if(is->audio_st) {
bytes_per_sec = is->audio_ctx->sample_rate * n;
}
if(bytes_per_sec) {
pts -= (double)hw_buf_size / bytes_per_sec;
}
return pts;
}

double get_video_clock(VideoState *is) {
double delta;
delta = (av_gettime() - is->video_current_pts_time) / 1000000.0;
return is->video_current_pts + delta;
}

double get_external_clock(VideoState *is) {
return av_gettime() / 1000000.0;
}

double get_master_clock(VideoState *is) {
if(is->av_sync_type == AV_SYNC_VIDEO_MASTER) {
return get_video_clock(is);
} else if(is->av_sync_type == AV_SYNC_AUDIO_MASTER) {
return get_audio_clock(is);
} else {
return get_external_clock(is);
}
}

/* Add or subtract samples to get a better sync, return new
audio buffer size */
int synchronize_audio(VideoState *is, short *samples,
int samples_size, double pts) {
int n;
double ref_clock;

n = 2 * is->audio_ctx->channels;

if(is->av_sync_type != AV_SYNC_AUDIO_MASTER) {
double diff, avg_diff;
int wanted_size, min_size, max_size /*, nb_samples */;

ref_clock = get_master_clock(is);
diff = get_audio_clock(is) - ref_clock;

if(diff < AV_NOSYNC_THRESHOLD) {
// accumulate the diffs
is->audio_diff_cum = diff + is->audio_diff_avg_coef
* is->audio_diff_cum;
if(is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
is->audio_diff_avg_count++;
} else {
avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef);
if(fabs(avg_diff) >= is->audio_diff_threshold) {
wanted_size = samples_size + ((int)(diff * is->audio_ctx->sample_rate) * n);
min_size = samples_size * ((100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100);
max_size = samples_size * ((100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100);
if(wanted_size < min_size) {
wanted_size = min_size;
} else if (wanted_size > max_size) {
wanted_size = max_size;
}
if(wanted_size < samples_size) {
/* remove samples */
samples_size = wanted_size;
} else if(wanted_size > samples_size) {
uint8_t *samples_end, *q;
int nb;

/* add samples by copying final sample*/
nb = (samples_size - wanted_size);
samples_end = (uint8_t *)samples + samples_size - n;
q = samples_end + n;
while(nb > 0) {
memcpy(q, samples_end, n);
q += n;
nb -= n;
}
samples_size = wanted_size;
}
}
}
} else {
/* difference is TOO big; reset diff stuff */
is->audio_diff_avg_count = 0;
is->audio_diff_cum = 0;
}
}
return samples_size;
}

int audio_decode_frame(VideoState *is, uint8_t *audio_buf,
int buf_size, double *pts_ptr) {
int len1, data_size = 0;
AVPacket *pkt = &is->audio_pkt;
double pts;
int n;

for(;;) {
while(is->audio_pkt_size > 0) {
int got_frame = 0;
len1 = avcodec_decode_audio4(is->audio_ctx, &is->audio_frame, &got_frame, pkt);
if(len1 < 0) {
/* if error, skip frame */
is->audio_pkt_size = 0;
break;
}
data_size = 0;
if(got_frame) {
/*
data_size = av_samples_get_buffer_size(NULL,
is->audio_ctx->channels,
is->audio_frame.nb_samples,
is->audio_ctx->sample_fmt,
1);
*/
data_size = 2 * is->audio_frame.nb_samples * 2;
assert(data_size <= buf_size);

swr_convert(is->audio_swr_ctx,
&audio_buf,
MAX_AUDIO_FRAME_SIZE*3/2,
(const uint8_t **)is->audio_frame.data,
is->audio_frame.nb_samples);

fwrite(audio_buf, 1, data_size, audiofd);
//memcpy(audio_buf, is->audio_frame.data[0], data_size);
}
is->audio_pkt_data += len1;
is->audio_pkt_size -= len1;
if(data_size <= 0) {
/* No data yet, get more frames */
continue;
}
pts = is->audio_clock;
*pts_ptr = pts;
n = 2 * is->audio_ctx->channels;
is->audio_clock += (double)data_size /
(double)(n * is->audio_ctx->sample_rate);
/* We have data, return it and come back for more later */
return data_size;
}
if(pkt->data)
av_free_packet(pkt);

if(is->quit) {
return -1;
}
/* next packet */
if(packet_queue_get(&is->audioq, pkt, 1) < 0) {
return -1;
}
is->audio_pkt_data = pkt->data;
is->audio_pkt_size = pkt->size;
/* if update, update the audio clock w/pts */
if(pkt->pts != AV_NOPTS_VALUE) {
is->audio_clock = av_q2d(is->audio_st->time_base)*pkt->pts;
}
}
}

void audio_callback(void *userdata, Uint8 *stream, int len) {
VideoState *is = (VideoState *)userdata;
int len1, audio_size;
double pts;

SDL_memset(stream, 0, len);

while(len > 0) {
if(is->audio_buf_index >= is->audio_buf_size) {
/* We have already sent all our data; get more */
audio_size = audio_decode_frame(is, is->audio_buf, sizeof(is->audio_buf), &pts);
if(audio_size < 0) {
/* If error, output silence */
is->audio_buf_size = 1024 * 2 * 2;
memset(is->audio_buf, 0, is->audio_buf_size);
} else {
audio_size = synchronize_audio(is, (int16_t *)is->audio_buf, audio_size, pts);
is->audio_buf_size = audio_size;
}
is->audio_buf_index = 0;
}
len1 = is->audio_buf_size - is->audio_buf_index;
if(len1 > len)
len1 = len;
SDL_MixAudio(stream,(uint8_t *)is->audio_buf + is->audio_buf_index, len1, SDL_MIX_MAXVOLUME);
//memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
len -= len1;
stream += len1;
is->audio_buf_index += len1;
}
}

static Uint32 sdl_refresh_timer_cb(Uint32 interval, void *opaque) {
SDL_Event event;
event.type = FF_REFRESH_EVENT;
event.user.data1 = opaque;
SDL_PushEvent(&event);
return 0; /* 0 means stop timer */
}

/* schedule a video refresh in 'delay' ms */
static void schedule_refresh(VideoState *is, int delay) {
SDL_AddTimer(delay, sdl_refresh_timer_cb, is);
}

void video_display(VideoState *is) {
SDL_Rect rect;
VideoPicture *vp;
float aspect_ratio;
int w, h, x, y;
int i;

vp = &is->pictq[is->pictq_rindex];
if(vp->bmp) {

SDL_UpdateYUVTexture(texture, NULL,
vp->bmp->data[0], vp->bmp->linesize[0],
vp->bmp->data[1], vp->bmp->linesize[1],
vp->bmp->data[2], vp->bmp->linesize[2]);

rect.x = 0;
rect.y = 0;
rect.w = is->video_ctx->width;
rect.h = is->video_ctx->height;
SDL_LockMutex(text_mutex);
SDL_RenderClear( renderer );
SDL_RenderCopy( renderer, texture, NULL, &rect);
SDL_RenderPresent( renderer );
SDL_UnlockMutex(text_mutex);
}
}

void video_refresh_timer(void *userdata) {
VideoState *is = (VideoState *)userdata;
VideoPicture *vp;
double actual_delay, delay, sync_threshold, ref_clock, diff;

if(is->video_st) {
if(is->pictq_size == 0) {
schedule_refresh(is, 1);
//fprintf(stderr, "no picture in the queue!!!\n");
} else {
//fprintf(stderr, "get picture from queue!!!\n");
vp = &is->pictq[is->pictq_rindex];

is->video_current_pts = vp->pts;
is->video_current_pts_time = av_gettime();
delay = vp->pts - is->frame_last_pts; /* the pts from last time */
if(delay <= 0 || delay >= 1.0) {
/* if incorrect delay, use previous one */
delay = is->frame_last_delay;
}
/* save for next time */
is->frame_last_delay = delay;
is->frame_last_pts = vp->pts;

/* update delay to sync to audio if not master source */
if(is->av_sync_type != AV_SYNC_VIDEO_MASTER) {
ref_clock = get_master_clock(is);
diff = vp->pts - ref_clock;

/* Skip or repeat the frame. Take delay into account
FFPlay still doesn't "know if this is the best guess." */
sync_threshold = (delay > AV_SYNC_THRESHOLD) ? delay : AV_SYNC_THRESHOLD;
if(fabs(diff) < AV_NOSYNC_THRESHOLD) {
if(diff <= -sync_threshold) {
delay = 0;
} else if(diff >= sync_threshold) {
delay = 2 * delay;
}
}
}
is->frame_timer += delay;
/* computer the REAL delay */
actual_delay = is->frame_timer - (av_gettime() / 1000000.0);
if(actual_delay < 0.010) {
/* Really it should skip the picture instead */
actual_delay = 0.010;
}
schedule_refresh(is, (int)(actual_delay * 1000 + 0.5));

/* show the picture! */
video_display(is);

/* update queue for next picture! */
if(++is->pictq_rindex == VIDEO_PICTURE_QUEUE_SIZE) {
is->pictq_rindex = 0;
}
SDL_LockMutex(is->pictq_mutex);
is->pictq_size--;
SDL_CondSignal(is->pictq_cond);
SDL_UnlockMutex(is->pictq_mutex);
}
} else {
schedule_refresh(is, 100);
}
}

void alloc_picture(void *userdata) {
int ret;

VideoState *is = (VideoState *)userdata;
VideoPicture *vp;

vp = &is->pictq[is->pictq_windex];
if(vp->bmp) {
// we already have one make another, bigger/smaller
avpicture_free(vp->bmp);
free(vp->bmp);
vp->bmp = NULL;
}

// Allocate a place to put our YUV image on that screen
SDL_LockMutex(text_mutex);

vp->bmp = (AVPicture*)malloc(sizeof(AVPicture));
ret = avpicture_alloc(vp->bmp, AV_PIX_FMT_YUV420P, is->video_ctx->width, is->video_ctx->height);
if (ret < 0) {
fprintf(stderr, "Could not allocate temporary picture: %s\n", av_err2str(ret));
}

SDL_UnlockMutex(text_mutex);

vp->width = is->video_ctx->width;
vp->height = is->video_ctx->height;
vp->allocated = 1;
}

int queue_picture(VideoState *is, AVFrame *pFrame, double pts) {
VideoPicture *vp;

/* wait until we have space for a new pic */
SDL_LockMutex(is->pictq_mutex);
while(is->pictq_size >= VIDEO_PICTURE_QUEUE_SIZE &&
!is->quit) {
SDL_CondWait(is->pictq_cond, is->pictq_mutex);
}
SDL_UnlockMutex(is->pictq_mutex);

if(is->quit)
return -1;

// windex is set to 0 initially
vp = &is->pictq[is->pictq_windex];

/* allocate or resize the buffer! */
if(!vp->bmp ||
vp->width != is->video_ctx->width ||
vp->height != is->video_ctx->height) {

vp->allocated = 0;
alloc_picture(is);
if(is->quit) {
return -1;
}
}

/* We have a place to put our picture on the queue */
if(vp->bmp) {
vp->pts = pts;

// Convert the image into YUV format that SDL uses
sws_scale(is->video_sws_ctx, (uint8_t const * const *)pFrame->data,
pFrame->linesize, 0, is->video_ctx->height,
vp->bmp->data, vp->bmp->linesize);

/* now we inform our display thread that we have a pic ready */
if(++is->pictq_windex == VIDEO_PICTURE_QUEUE_SIZE) {
is->pictq_windex = 0;
}
SDL_LockMutex(is->pictq_mutex);
is->pictq_size++;
SDL_UnlockMutex(is->pictq_mutex);
}
return 0;
}

double synchronize_video(VideoState *is, AVFrame *src_frame, double pts) {
double frame_delay;

if(pts != 0) {
/* if we have pts, set video clock to it */
is->video_clock = pts;
} else {
/* if we aren't given a pts, set it to the clock */
pts = is->video_clock;
}
/* update the video clock */
frame_delay = av_q2d(is->video_ctx->time_base);
/* if we are repeating a frame, adjust clock accordingly */
frame_delay += src_frame->repeat_pict * (frame_delay * 0.5);
is->video_clock += frame_delay;
return pts;
}

int decode_video_thread(void *arg) {
VideoState *is = (VideoState *)arg;
AVPacket pkt1, *packet = &pkt1;
int frameFinished;
AVFrame *pFrame;
double pts;

pFrame = av_frame_alloc();

for(;;) {
if(packet_queue_get(&is->videoq, packet, 1) < 0) {
// means we quit getting packets
break;
}
pts = 0;

// Decode video frame
avcodec_decode_video2(is->video_ctx, pFrame, &frameFinished, packet);

if((pts = av_frame_get_best_effort_timestamp(pFrame)) != AV_NOPTS_VALUE) {
} else {
pts = 0;
}
pts *= av_q2d(is->video_st->time_base);

// Did we get a video frame?
if(frameFinished) {
pts = synchronize_video(is, pFrame, pts);
if(queue_picture(is, pFrame, pts) < 0) {
break;
}
}
av_free_packet(packet);
}
av_frame_free(&pFrame);
return 0;
}

int stream_component_open(VideoState *is, int stream_index) {
AVFormatContext *pFormatCtx = is->pFormatCtx;
AVCodecContext *codecCtx = NULL;
AVCodec *codec = NULL;
SDL_AudioSpec wanted_spec, spec;

if(stream_index < 0 || stream_index >= pFormatCtx->nb_streams) {
return -1;
}

codecCtx = avcodec_alloc_context3(NULL);

int ret = avcodec_parameters_to_context(codecCtx, pFormatCtx->streams[stream_index]->codecpar);
if (ret < 0)
return -1;

codec = avcodec_find_decoder(codecCtx->codec_id);
if(!codec) {
fprintf(stderr, "Unsupported codec!\n");
return -1;
}

if(codecCtx->codec_type == AVMEDIA_TYPE_AUDIO) {
// Set audio settings from codec info
wanted_spec.freq = codecCtx->sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = 2;//codecCtx->channels;
wanted_spec.silence = 0;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = is;

fprintf(stderr, "wanted spec: channels:%d, sample_fmt:%d, sample_rate:%d \n",
2, AUDIO_S16SYS, codecCtx->sample_rate);

if(SDL_OpenAudio(&wanted_spec, &spec) < 0) {
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
return -1;
}
is->audio_hw_buf_size = spec.size;
}

if(avcodec_open2(codecCtx, codec, NULL) < 0) {
fprintf(stderr, "Unsupported codec!\n");
return -1;
}

switch(codecCtx->codec_type) {
case AVMEDIA_TYPE_AUDIO:
is->audioStream = stream_index;
is->audio_st = pFormatCtx->streams[stream_index];
is->audio_ctx = codecCtx;
is->audio_buf_size = 0;
is->audio_buf_index = 0;
memset(&is->audio_pkt, 0, sizeof(is->audio_pkt));
packet_queue_init(&is->audioq);

//Out Audio Param
uint64_t out_channel_layout=AV_CH_LAYOUT_STEREO;

//AAC:1024 MP3:1152
int out_nb_samples= is->audio_ctx->frame_size;
//AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;

int out_sample_rate=is->audio_ctx->sample_rate;
int out_channels=av_get_channel_layout_nb_channels(out_channel_layout);
//Out Buffer Size
/*
int out_buffer_size=av_samples_get_buffer_size(NULL,
out_channels,
out_nb_samples,
AV_SAMPLE_FMT_S16,
1);
*/

//uint8_t *out_buffer=(uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE*2);
int64_t in_channel_layout=av_get_default_channel_layout(is->audio_ctx->channels);

struct SwrContext *audio_convert_ctx;
audio_convert_ctx = swr_alloc();
swr_alloc_set_opts(audio_convert_ctx,
out_channel_layout,
AV_SAMPLE_FMT_S16,
out_sample_rate,
in_channel_layout,
is->audio_ctx->sample_fmt,
is->audio_ctx->sample_rate,
0,
NULL);
fprintf(stderr, "swr opts: out_channel_layout:%lld, out_sample_fmt:%d, out_sample_rate:%d, in_channel_layout:%lld, in_sample_fmt:%d, in_sample_rate:%d",
out_channel_layout,
AV_SAMPLE_FMT_S16,
out_sample_rate,
in_channel_layout,
is->audio_ctx->sample_fmt,
is->audio_ctx->sample_rate);
swr_init(audio_convert_ctx);
is->audio_swr_ctx = audio_convert_ctx;

SDL_PauseAudio(0);
break;
case AVMEDIA_TYPE_VIDEO:
is->videoStream = stream_index;
is->video_st = pFormatCtx->streams[stream_index];
is->video_ctx = codecCtx;

is->frame_timer = (double)av_gettime() / 1000000.0;
is->frame_last_delay = 40e-3;
is->video_current_pts_time = av_gettime();

packet_queue_init(&is->videoq);
is->video_sws_ctx = sws_getContext(
is->video_ctx->width, is->video_ctx->height,
is->video_ctx->pix_fmt, is->video_ctx->width,
is->video_ctx->height, AV_PIX_FMT_YUV420P,
SWS_BILINEAR, NULL, NULL, NULL);
is->video_tid = SDL_CreateThread(decode_video_thread, "decode_video_thread", is);
break;
default:
break;
}
}

int demux_thread(void *arg) {
int err_code;
char errors[1024] = {0,};

VideoState *is = (VideoState *)arg;
AVFormatContext *pFormatCtx;
AVPacket pkt1, *packet = &pkt1;

int video_index = -1;
int audio_index = -1;
int i;

is->videoStream=-1;
is->audioStream=-1;

global_video_state = is;

/* open input file, and allocate format context */
if ((err_code=avformat_open_input(&pFormatCtx, is->filename, NULL, NULL)) < 0) {
av_strerror(err_code, errors, 1024);
fprintf(stderr, "Could not open source file %s, %d(%s)\n", is->filename, err_code, errors);
return -1;
}

is->pFormatCtx = pFormatCtx;

// Retrieve stream information
if(avformat_find_stream_info(pFormatCtx, NULL)<0)
return -1; // Couldn't find stream information

// Dump information about file onto standard error
av_dump_format(pFormatCtx, 0, is->filename, 0);

// Find the first video stream

for(i=0; i<pFormatCtx->nb_streams; i++) {
if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO &&
video_index < 0) {
video_index=i;
}
if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
audio_index < 0) {
audio_index=i;
}
}
if(audio_index >= 0) {
stream_component_open(is, audio_index);
}
if(video_index >= 0) {
stream_component_open(is, video_index);
}

if(is->videoStream < 0 || is->audioStream < 0) {
fprintf(stderr, "%s: could not open codecs\n", is->filename);
goto fail;
}

//creat window from SDL
win = SDL_CreateWindow("Media Player",
SDL_WINDOWPOS_UNDEFINED,
SDL_WINDOWPOS_UNDEFINED,
is->video_ctx->width, is->video_ctx->height,
SDL_WINDOW_OPENGL|SDL_WINDOW_RESIZABLE);
if(!win) {
fprintf(stderr, "SDL: could not set video mode - exiting\n");
exit(1);
}

renderer = SDL_CreateRenderer(win, -1, 0);

//IYUV: Y + U + V (3 planes)
//YV12: Y + V + U (3 planes)
Uint32 pixformat= SDL_PIXELFORMAT_IYUV;

//create texture for render
texture = SDL_CreateTexture(renderer,
pixformat,
SDL_TEXTUREACCESS_STREAMING,
is->video_ctx->width,
is->video_ctx->height);

// main decode loop
for(;;) {
if(is->quit) {
break;
}
// seek stuff goes here
if(is->audioq.size > MAX_AUDIOQ_SIZE ||
is->videoq.size > MAX_VIDEOQ_SIZE) {
SDL_Delay(10);
continue;
}
if(av_read_frame(is->pFormatCtx, packet) < 0) {
if(is->pFormatCtx->pb->error == 0) {
SDL_Delay(100); /* no error; wait for user input */
continue;
} else {
break;
}
}
// Is this a packet from the video stream?
if(packet->stream_index == is->videoStream) {
packet_queue_put(&is->videoq, packet);
} else if(packet->stream_index == is->audioStream) {
packet_queue_put(&is->audioq, packet);
} else {
av_free_packet(packet);
}
}
/* all done - wait for it */
while(!is->quit) {
SDL_Delay(100);
}

fail:
if(1){
SDL_Event event;
event.type = FF_QUIT_EVENT;
event.user.data1 = is;
SDL_PushEvent(&event);
}
return 0;
}

int main(int argc, char *argv[]) {
SDL_Event event;
VideoState *is;

is = av_mallocz(sizeof(VideoState));
if(argc < 2) {
fprintf(stderr, "Usage: test <file>\n");
exit(1);
}

yuvfd = fopen("testout.yuv", "wb+");
audiofd = fopen("testout.pcm", "wb+");
// Register all formats and codecs
av_register_all();

if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
exit(1);
}

text_mutex = SDL_CreateMutex();
av_strlcpy(is->filename, argv[1], sizeof(is->filename));
is->pictq_mutex = SDL_CreateMutex();
is->pictq_cond = SDL_CreateCond();

schedule_refresh(is, 40);
is->av_sync_type = DEFAULT_AV_SYNC_TYPE;
is->parse_tid = SDL_CreateThread(demux_thread,"demux_thread", is);
if(!is->parse_tid) {
av_free(is);
return -1;
}
for(;;) {
SDL_WaitEvent(&event);
switch(event.type) {
case FF_QUIT_EVENT:
case SDL_QUIT:
is->quit = 1;
SDL_Quit();
return 0;
break;
case FF_REFRESH_EVENT:
video_refresh_timer(event.user.data1);
break;
default:
break;
}
}

fclose(yuvfd);
fclose(audiofd);
return 0;
}


7. 如何在 Android 下使用 FFmpeg

Android 架构:

内容:

  • Java 与 C 之间的相互调用
  • Android 下 FFmpeg 的编译
  • Android 下如何使用FFmpeg

第一个 JNI 程序:

TODO

JNI 基本概念:

  • JNIEnv
  • JavaVM 一个Android APP只有一个 JavaVM, 一个 JavaVM 可以有多个JNIEnv
  • 线程 一个线程对应一个JNIEnv

Java调用C/C++ 方法一:

  • 在Java层定义 native 关键字函数

  • 方法一:在C/C++层创建

    Java_packname_classname_methodname 函数

Java调用C/C++方法二:

什么是Signature:

  • Java与C/C++ 相互调用时,表式函数参数的描述符
  • 输入参数放在()内,输出参数放在()外
  • 多个参数之间顺序存放,且用 “;” 分割

C/C++ 调用 Java 方法:

  • FindClass
  • GetMethodID / GetFieldID
  • NewObject
  • Call<TYPE>Method / [G/S]et<type>Field

7.1 [实战] Android 下的播放器

TODO

8. IOS 下使用 FFmpeg

TODO

9. 音视频进阶

  • FFmpeg Filter 的使用
  • FFmpeg 裁剪与优化
  • 视频渲染(OpenGL / Metal)
  • 声音的特效
  • 网络传输
  • Webrtc - 实时互动、直播、P2P音视频传输
  • AR技术
  • OpenCV

行业痛点:

  • 回音消除
  • 降噪
  • 视频秒开
  • 多人多视频实时互动
  • PC端/APP/网页实时视频互通
  • 实时互动与大并发负载

FFmpeg音视频同步原理与实现

音视频同步解决方案

一种基于FFMPEG的音视频同步算法

音视频同步原理

如果简单的按照音频的采样率与视频的帧率去播放,由于机器运行速度,解码效率等种种造成时间差异的因素影响,很难同步,音视频时间差将会呈现线性增长。所以要做音视频的同步,有三种方式:

参考一个外部时钟,将音频与视频同步至此时间。我首先想到这种方式,但是并不好,由于某些生物学的原理,人对声音的变化比较敏感,但是对视觉变化不太敏感。所以频繁的去调整声音的播放会有些刺耳或者杂音吧影响用户体验。(ps:顺便科普生物学知识,自我感觉好高大上_)。

  • 以视频为基准,音频去同步视频的时间。不采用,理由同上。
  • 以音频为基准,视频去同步音频的时间。 所以这个办法了。

所以,原理就是以音频时间为基准,判断视频快了还是慢了,从而调整视频速度。其实是一个动态的追赶与等待的过程。

一些概念

音视频中都有 DTSPTS

  • DTS ,Decoding Time Stamp,解码时间戳,告诉解码器packet的解码顺序。
  • PTS ,Presentation Time Stamp,显示时间戳,指示从packet中解码出来的数据的显示顺序。
  • 音频中二者是相同的,但是视频由于B帧(双向预测)的存在,会造成解码顺序与显示顺序并不相同,也就是视频中 DTS 与 PTS 不一定相同。

时间基 : 看 FFmpeg 源码

1
2
3
4
5
6
7
8
AVRational time_base;
/**
* rational number numerator/denominator
*/
typedef struct AVRational{
int num; ///< numerator
int den; ///< denominator
} AVRational;

个人理解,其实就是 ffmpeg中 的用分数表示时间单位,num 为分子,den 为分母。并且 ffmpeg 提供了计算方法:

1
2
3
4
5
6
7
8
/**
* Convert rational to double.
* @param a rational to convert
* @return (double) a
*/
static inline double av_q2d(AVRational a){
return a.num / (double) a.den;
}

所以 视频中某帧的显示时间 计算方式为(单位为妙):

1
time = pts * av_q2d(time_base);

同步代码

音频部分

clock 为音频的播放时长(从开始到当前的时间)

1
2
3
if (packet->pts != AV_NOPTS_VALUE) {
audio->clock = av_q2d(audio->time_base) * packet->pts;
}

然后加上此 packet 中数据需要播放的时间

1
2
double time = datalen/((double) 44100 *2 * 2);
audio->clock = audio->clock +time;

datalen 为数据长度。采样率为 44100,采样位数为 16,通道数为 2。所以 数据长度 / 每秒字节数。

ps:此处计算方式不是很完美,有很多问题,回头研究在再补上。

视频部分

先定义几个值:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
double  last_play  //上一帧的播放时间
,play //当前帧的播放时间
, last_delay // 上一次播放视频的两帧视频间隔时间
,delay //两帧视频间隔时间
,audio_clock //音频轨道 实际播放时间
,diff //音频帧与视频帧相差时间
,sync_threshold //合理的范围
,start_time //从第一帧开始的绝对时间
,pts
,actual_delay//真正需要延迟时间
start_time = av_gettime() / 1000000.0;
// 获取pts
if ((pts = av_frame_get_best_effort_timestamp(frame)) == AV_NOPTS_VALUE) {
pts = 0;
}
play = pts * av_q2d(vedio->time_base);
// 纠正时间
play = vedio->synchronize(frame, play);
delay = play - last_play;
if (delay <= 0 || delay > 1) {
delay = last_delay;
}
audio_clock = vedio->audio->clock;
last_delay = delay;
last_play = play;
//音频与视频的时间差
diff = vedio->clock - audio_clock;
// 在合理范围外 才会延迟 加快
sync_threshold = (delay > 0.01 ? 0.01 : delay);
if (fabs(diff) < 10) {
if (diff <= -sync_threshold) {
delay = 0;
} else if (diff >= sync_threshold) {
delay = 2 * delay;
}
}
start_time += delay;
actual_delay = start_time - av_gettime() / 1000000.0;
if (actual_delay < 0.01) {
actual_delay = 0.01;
}
// 休眠时间 ffmpeg 建议这样写 为什么 要这样写 有待研究
av_usleep(actual_delay * 1000000.0 + 6000);
纠正play (播放时间)的方法 repeat_pict / (2 * fps) 是ffmpeg注释里教的
synchronize(AVFrame *frame, double play) {
//clock是当前播放的时间位置
if (play != 0)
clock=play;
else //pst为0 则先把pts设为上一帧时间
play = clock;
//可能有pts为0 则主动增加clock
//需要求出扩展延时:
double repeat_pict = frame->repeat_pict;
//使用AvCodecContext的而不是stream的
double frame_delay = av_q2d(codec->time_base);
//fps
double fps = 1 / frame_delay;
//pts 加上 这个延迟 是显示时间
double extra_delay = repeat_pict / (2 * fps);
double delay = extra_delay + frame_delay;
clock += delay;
return play;
}


FFmpeg 痛点解决

回音消除解决方案:

语音自适应回声消除(AEC)算法

回声消除(AEC)原理

音频降噪在 58 直播中的研究与实现

视频秒开:

直播视频秒开及视频优化

视频直播秒开背后的技术与优化经验

短视频“秒播”那点事

百度LSS 音视频直播 秒开

播放器的“妥协”造就了视频“秒开”的实现!

多人视频实时互动:

WebRTC现状以及多人视频通话分析

多人视频连麦——直播高效互动方式

实时互动与大并发负载:

RTP直播分发服务器集群方案

海量用户实时互动直播架构探索

直播开发过程中关于直播技术的架构问题

感谢你对我的支持 让我继续努力分享有用的技术和知识点.