音视频核心技术

雷神 FFmpeg源代码结构图 - 解码

雷神 GitHub

雷神 FFmpeg blog

ffmpeg filter过滤器基础实例及全面解析

1. 学习大纲

FFmpeg 常用命令：

视频录制命令
多媒体文件的分解/复用命令
裁剪与合并命令
图片/视频互转命令
直播相关命令
各种滤镜命令

FFmpeg 基本开发：

C 语言回顾
FFmpeg 核心概念与常用结构体
实战 - 多媒体文件的分解与复用
实战 - 多媒体格式的互转
实战 - 从 MP4 裁剪一段视频
作业 - 实现一个简单的小咖秀

音视频编解码实战：

实战 - H264 解码
实战 - H264 编码
实战 - 音频 AAC 解码
实战 - 音频 AAC 编码
实战 - 视频转图片

音视频渲染实战：

SDL 事件处理
SDL 视频文理渲染
SDL 音频渲染
实战1 - 实现 YUV 视频播放
实战2 - YUV 视频倍数播放
实战3 - 实现 PCM 播放器

FFmpeg 开发播放器核心功能：

实战 - 实现 MP4 文件的视频播放
实战 - 实现 MP4 文件的音频播放
实战 - 实现一个初级播放器
实战 - 音视频同步
实战 - 实现播放器内核

Android 中实战 FFmpeg：

编译 Android 端可以使用的 FFmpeg
Java 与 C 语言相互调用
实战 - Android 调用 FFmpeg

学习建议：

牢牢抓住音视频的处理机制，了解其本质
勤加练习，熟能生巧
待着问题去学习，事半功倍

音视频的广泛应用：

直播类：音视频会议、教育直播、娱乐/游戏直播
短视频：抖音、快手、小咖秀
网络视频：优酷、腾讯视频、爱奇艺等
音视频通话：微信、QQ、Skype等
视频监控
人工智能：人脸识别，智能音箱等，更关注算法

播放器架构：

渲染流程：

FFmpeg 都能做啥：

FFmpeg 是一个非常优秀的多媒体框架
FFmpeg 可以运行在 Linux、Mac、Windows 等平台上
能够解码、编码、转码、复用、解复用、过滤音视频数据

FFmpeg 下载与安装：

FFMpeg 下载与安装

1
2
3

$ git clone https://git.ffmpeg.org/ffmpeg.git
$ config -- help
$ make && make install

2. FFmpeg 常用命令实战

我们按使用目的可以将 FFMPEG 命令分成以下几类：

基本信息查询命令
录制
分解 / 复用
处理原始数据
滤镜
切割与合并
图／视互转
直播相关

除了 FFMPEG 的基本信息查询命令外，其它命令都按下图所示的流程处理音视频。

1	$ ffplay -s 2560x1600 -pix_fmt uyvy422 out.yuv

3. 初级开发内容

FFmpeg 日志的使用及目录的操作
介绍 FFmpeg 的基本概念及常用的结构体
对复用/解复用及流程操作的各种实践

FFmpeg 代码结构：

libavcodec：提供了一系列编码器的实现。
libavformat：实现在流协议，容器格式及其本IO访问。
libavutil：包括了hash器，解码器和各类工具函数。
libavfilter：提供了各种音视频过滤器。
libavdevice：提供了访问捕获设备和回放设备的接口。
libswresample：实现了混音和重采样。
libswscale：实现了色彩转换和缩放工能。

3.1 FFmpeg 日志系统

#include <libavutil/log.h>

av_log_set_level(AV_LOG_DEBUG)
    
av_log(NULL, AV_LOG_INFO, "...%s\n", op)

AV_LOG_ERROR
AV_LOG_WARNING
AV_LOG_INFO

FFmpeg日志系统使用

#include <stdio.h>
#include <libavutil/log.h>

int main(int argc, char *argv[])
{
    av_log_set_level(AV_LOG_DEBUG);

    av_log(NULL, AV_LOG_INFO, "hello world: %s!\n", "aaa");

    return 0;
}

3.2 FFmpeg 文件与目录操作

文件的删除与重命名：

#include <libavformat/avformat.h>

avpriv_io_delete()
    
avpriv_io_move(src, dst)

FFmpeg文件与目录操作

#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avformat.h>

int main(int argc, char *argv[])
{
    int ret;
    ret = avpriv_io_delete("./mytestfile.txt");
    if(ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Failed to delete file mytestfile.txt\n");
        return -1
    }
    
    ret = avpriv_io_move("111.txt", "222.txt");
    if(ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Filed to rename\n");
        return -1;
    } 

    return 0;
}

$ clang -g -o ffmpeg_del ffmpeg_file.c `pkg-config --libs libavformat`

# pkg-config --libs libavformat 指令可以搜索libavformat库所在路径

$ pkg-config --libs libavformat
-L/usr/local/ffmpeg/lib -lavformat

3.3 FFmpeg 操作目录重要函数

1
2
3

avio_open_dir()
avio_read_dir()
avio_close_dir()

操作目录重要结构体：

AVIODirContext

操作目录的上下文
AVIODirEntry

目录项。用于存放文件名，文件大小等信息

FFmpeg操作目录

#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avformat.h>

int main(int argc, char *argv[])
{
    av_log_set_level(AV_LOG_INFO);

    int ret;
    AVIODirContext *ctx = NULL;
    AVIODirEntry *entry = NULL;

    ret = avio_open_dir(&ctx, "./", NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Cant open dir:%s\n", av_err2str(ret));
        return -1;
    }
    while(1) {
        ret = avio_read_dir(ctx, &entry);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Cant read dir: %s\n", av_err2str(ret));
            goto __fail;
        }
        if (!entry) {
            break;
        }

        av_log(NULL, AV_LOG_INFO, "%l2"PRId64" %s\n",
               entry->size,
               entry->name);

        avio_free_directory_entry(&entry);
    }
__fail:
    avio_close_dir(&ctx);
    return 0;
}

1	$ clang -g -o list ffmpeg_list.c `pkg-config --libs libavformat libavutil`

3.4 多媒体文件的基本概念

多媒体文件其实是个容器
在容器里有很多流（Stream/Track)
每种流是由不同的编码器编码的
从流中读出的数据称为包
在一个包中包含着一个或多个帧

几个重要的结构体：

AVFormatContext
AVStream
AVPacket

FFmpeg 操作流数据的基本步骤：

解复用 —> 获取流 —> 读取数据包 —> 释放资源

3.5 [实战] 打印音/视频信息

1
2
3

av_register_all()
avformat_open_input() / avformat_close_input()
av_dump_format()

[实战] 打印音/视频信息

#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avformat.h>

int main(int argc, char *argv[])
{
    int ret;
    av_log_set_level(AV_LOG_INFO);

    AVFormatContext *fmt_ctx = NULL;

    av_register_all();

    ret = avformat_open_input(&fmt_ctx, "./test.mp4", NULL, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Can't open file: %s\n", av_err2str(ret));
        return -1;
    }

    av_dump_format(fmt_ctx, 0, "./test.mp4", 0);

    avformat_close_input(&fmt_ctx);

    return 0;
}

3.6 [实战] 抽取音频数据

1
2
3

av_init_packet()
av_find_best_stream()
av_read_frame() / av_packet_unref()

[实战] 抽取音频数据

#include <stdio.h>
#include <libavutil/log.h>
#include <libavformat/avformat.h>

int main(int argc, char *argv[])
{
    int ret;
    int len;
    int audio_index;

    char *src = NULL;
    char *dst = NULL;

    av_log_set_level(AV_LOG_INFO);

    AVPacket pkt;
    AVFormatContext *fmt_ctx = NULL;

    av_register_all();

    // 1. read two params form console
    if (argc < 3) {
        av_log(NULL, AV_LOG_ERROR, "eg: %s in_file out_file\n", argv[0]);
        return -1;
    }
    src = argv[1];
    dst = argv[2];
    if (!src || !dst) {
        av_log(NULL, AV_LOG_ERROR, "src or dst is null\n");
        return -1;
    }

    ret = avformat_open_input(&fmt_ctx, src, NULL, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Can't open file: %s\n", av_err2str(ret));
        return -1;
    }

    FILE *dst_fd = fopen(dst, "wb");
    if (dst_fd) {
        av_log(NULL, AV_LOG_ERROR, "Can't open out file!\n");
        avformat_close_input(&fmt_ctx);
        return -1;
    }
    av_dump_format(fmt_ctx, 0, src, 0);

    // 2. get stream
    ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Can't find the best stream!\n");
        avformat_close_input(&fmt_ctx);
        fclose(dst_fd);
        return -1;
    }

    audio_index = ret;
    av_init_packet(&pkt);
    while(av_read_frame(fmt_ctx, &pkt) >= 0) {
        if (pkt.stream_index == audio_index) {
            // 3. write audio data to aac file.
            len = fwrite(pkt.data, 1, pkt.size, dst_fd);
            if (len != pkt.size) {
                av_log(NULL, AV_LOG_WARNING, "warning, length of data is not equal size of pkt!\n");
            }
        }
        av_packet_unref(&pkt);
    }

    avformat_close_input(&fmt_ctx);
    if (dst_fd) {
        fclose(dst_fd);
    }

    return 0;
}

1 2	$ lang -g -o extra_audio extra_audio.c `pkg-config --libs libavutil libavformat` $ ./extra_audio test.mp4 killer.aa

3.7 [实战] 抽取视频数据

Start code
SPS/PPS
codec -> extradata

3.8 [实战] 将 MP4 转成 FLV 格式

avformat_alloc_output_context2() / avformat_free_context();
avformat_new_stream();
avcodec_parameters_copy();
avformat_write_header();
av_write_frame() / av_interleaved_write_frame();
av_write_trailer()

3.9 [实战] 从 MP4 截取一段视频

1	av_seek_frame()

从 MP4 截取一段视频代码:

#include <stdlib.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>

static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt, const char *tag)
{
    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;

    printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
           tag,
           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
           pkt->stream_index);
}

int cut_video(double from_seconds, double end_seconds, const char* in_filename, const char* out_filename) {
    AVOutputFormat *ofmt = NULL;
    AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
    AVPacket pkt;
    int ret, i;

    av_register_all();
    
    if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
        fprintf(stderr, "Could not open input file '%s'", in_filename);
        goto end;
    }
    
    if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
        fprintf(stderr, "Failed to retrieve input stream information");
        goto end;
    }
    
    av_dump_format(ifmt_ctx, 0, in_filename, 0);
    
    avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
    if (!ofmt_ctx) {
        fprintf(stderr, "Could not create output context\n");
        ret = AVERROR_UNKNOWN;
        goto end;
    }
    
    ofmt = ofmt_ctx->oformat;
    
    for (i = 0; i < ifmt_ctx->nb_streams; i++) {
        AVStream *in_stream = ifmt_ctx->streams[i];
        AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
        if (!out_stream) {
            fprintf(stderr, "Failed allocating output stream\n");
            ret = AVERROR_UNKNOWN;
            goto end;
        }
    
        ret = avcodec_copy_context(out_stream->codec, in_stream->codec);
        if (ret < 0) {
            fprintf(stderr, "Failed to copy context from input to output stream codec context\n");
            goto end;
        }
        out_stream->codec->codec_tag = 0;
        if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
            out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }
    av_dump_format(ofmt_ctx, 0, out_filename, 1);
    
    if (!(ofmt->flags & AVFMT_NOFILE)) {
        ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
        if (ret < 0) {
            fprintf(stderr, "Could not open output file '%s'", out_filename);
            goto end;
        }
    }
    
    ret = avformat_write_header(ofmt_ctx, NULL);
    if (ret < 0) {
        fprintf(stderr, "Error occurred when opening output file\n");
        goto end;
    }
    
    //    int indexs[8] = {0};


    //    int64_t start_from = 8*AV_TIME_BASE;
    ret = av_seek_frame(ifmt_ctx, -1, from_seconds*AV_TIME_BASE, AVSEEK_FLAG_ANY);
    if (ret < 0) {
        fprintf(stderr, "Error seek\n");
        goto end;
    }
    
    int64_t *dts_start_from = malloc(sizeof(int64_t) * ifmt_ctx->nb_streams);
    memset(dts_start_from, 0, sizeof(int64_t) * ifmt_ctx->nb_streams);
    int64_t *pts_start_from = malloc(sizeof(int64_t) * ifmt_ctx->nb_streams);
    memset(pts_start_from, 0, sizeof(int64_t) * ifmt_ctx->nb_streams);
    
    while (1) {
        AVStream *in_stream, *out_stream;
    
        ret = av_read_frame(ifmt_ctx, &pkt);
        if (ret < 0)
            break;
    
        in_stream  = ifmt_ctx->streams[pkt.stream_index];
        out_stream = ofmt_ctx->streams[pkt.stream_index];
    
        log_packet(ifmt_ctx, &pkt, "in");
    
        if (av_q2d(in_stream->time_base) * pkt.pts > end_seconds) {
            av_free_packet(&pkt);
            break;
        }
    
        if (dts_start_from[pkt.stream_index] == 0) {
            dts_start_from[pkt.stream_index] = pkt.dts;
            printf("dts_start_from: %s\n", av_ts2str(dts_start_from[pkt.stream_index]));
        }
        if (pts_start_from[pkt.stream_index] == 0) {
            pts_start_from[pkt.stream_index] = pkt.pts;
            printf("pts_start_from: %s\n", av_ts2str(pts_start_from[pkt.stream_index]));
        }
    
        /* copy packet */
        pkt.pts = av_rescale_q_rnd(pkt.pts - pts_start_from[pkt.stream_index], in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
        pkt.dts = av_rescale_q_rnd(pkt.dts - dts_start_from[pkt.stream_index], in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
        if (pkt.pts < 0) {
            pkt.pts = 0;
        }
        if (pkt.dts < 0) {
            pkt.dts = 0;
        }
        pkt.duration = (int)av_rescale_q((int64_t)pkt.duration, in_stream->time_base, out_stream->time_base);
        pkt.pos = -1;
        log_packet(ofmt_ctx, &pkt, "out");
        printf("\n");
    
        ret = av_interleaved_write_frame(ofmt_ctx, &pkt);
        if (ret < 0) {
            fprintf(stderr, "Error muxing packet\n");
            break;
        }
        av_free_packet(&pkt);
    }
    free(dts_start_from);
    free(pts_start_from);
    
    av_write_trailer(ofmt_ctx);
    
end:
    avformat_close_input(&ifmt_ctx);
    
    /* close output */
    if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))
        avio_closep(&ofmt_ctx->pb);
    avformat_free_context(ofmt_ctx);
    
    if (ret < 0 && ret != AVERROR_EOF) {
        fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
        return 1;
    }
    
    return 0;
}

int main(int argc, char *argv[]){
    if(argc < 5){
        fprintf(stderr, "Usage: \
                command startime, endtime, srcfile, outfile");
        return -1;
    }

    double startime = atoi(argv[1]);
    double endtime = atoi(argv[2]);
    cut_video(startime, endtime, argv[3], argv[4]);
    
    return 0;
}

3.10 [实战] 一个简单的小咖秀

将两个媒体文件中分别抽取音频与视频轨
将音频与视频轨合并成一个新文件
对音频与视频轨进行裁剪

4. FFmpeg 中级开发内容
FFmpeg H264 解码
FFmpeg H264 编码
FFmpeg AAC 解码
FFmpeg AAC 编码

4.1 FFmpeg H264 解码

1	#include <libavcodec/avcodec.h>

常用数据结构：

AVCodec 编码器结构体
AVCodecContext 编码器上下文
AVFrame 解码后的帧

结构体内存的分配与释放：

1
2
3

av_frame_alloc / av_frame_free();
avcodec_alloc_context3();
avcodec_free_context();

解码步骤：

查找解码器（avcodec_find_decoder）
打开解码器（avcodec_open2）
解码（avcodec_decode_video2）

4.2 FFmpeg H264 编码

H264编码流程：

查找编码器（avcodec_find_encoder_by_name）
设置参数，打开编码器（avcondec_open2）
编码（avcondec_encode_video2）

4.3 视频转图片

TODO

4.4 FFmpeg AAC 编码

编码流程与视频相同
编码函数 avcodec_encodec_audio2

5. SDL 介绍

SDL 官网)

SDL（Simple DirectMedia Layer）是一套开放源代码的跨平台多媒体开发库
由 C 语言实现的跨平台的媒体开源库
多用于开发游戏、模拟器、媒体播放器等多媒体应用领域

语法与子系统：

SDL将功能分成下列数个子系统（subsystem）：

Video（图像）—图像控制以及线程（thread）和事件管理（event）。
Audio（声音）—声音控制
Joystick（摇杆）—游戏摇杆控制
CD-ROM（光盘驱动器）—光盘媒体控制
Window Management（视窗管理）－与视窗程序设计集成
Event（事件驱动）－处理事件驱动

以下是一支用C语言写成、非常简单的SDL示例：

// Headers
#include "SDL.h"

// Main function
int main(int argc, char* argv[])
{
    // Initialize SDL
    if(SDL_Init(SDL_INIT_EVERYTHING) == -1)
        return(1);

    // Delay 2 seconds
    SDL_Delay(2000);

    // Quit SDL
    SDL_Quit();

    // Return
    return 0;
}

上述程序会加载所有SDL子系统（出错则退出程序），然后暂停两秒，最后关闭SDL并退出程序。

5.1 SDL 编译与安装

下载 SDL 源码
生成Makefile configure –prefix=/usr/local
安装 sudo make -j 8 && make install

5.2 使用 SDL 基本步骤

添加头文件 #include <SDL.h>
初始化 SDL
退出 SDL

SDL 渲染窗口：

1
2
3

SDL_Init() / SDL_Quit();
SDL_CreateWindow() / SDL_DestoryWindow();
SDL_CreateRender();  // 创建渲染器

1	$ clang -g -o first_sdl first_sdl.c `pkg-config --libs sdl2`

SDL 渲染窗口：

1
2
3

SDL_CreateRender() / SDL_DestoryRenderer();
SDL_RenderClear();
SDL_RenderPresent();

5.3 SDL 事件基本原理

SDL 将所有的事件都存放在一个队列中
所有对事件的操作，其实就是队列的操作

SDL 事件种类：

SDL_WindowEvent：窗口事件
SDL_KeyboardEvent：键盘事件
SDL_MouseMotionEvent：鼠标事件
自定义事件

SDL 事件处理：

1
2
3

SDL_PollEvent(); // 轮询检测
SDL_WaitEvent(); // 常用的方式
SDL_WaitEventTimeout();

5.4 文理渲染

SDL 渲染基本原理：

SDL 文理相关 API：

SDL_CreateTexture();
- format: YUV, RGB
- access: Texture 类型， Target， Stream

SDL_DestroyTexture();

SDL 渲染相关 API：

SDL_SetRenderTarget();
SDL_RenderClear();
SDL_RenderCopy();
SDL_RenderPresent();

5.5 [实战] YUV 视频播放器

创建线程：

SDL_CreateThread();
- fn: 线程执行函数
- name: 线程名
- data: 执行函数参数

SDL 更新文理：

1 2	SDL_UpdateTexture(); SDL_UpdateYUVTexture();

5.6 SDL 播放音频

播放音频基本流程：

播放音频的基本原则：

声卡向你要数据而不是你主动推给声卡
数据的多少由音频参数决定的

SDL 音频 API：

1
2
3

SDL_OpenAudio() / SDL_CloseAudio();
SDL_PauseAudio();
SDL_MixAudio();

5.7 实现 PCM 播放器

TODO

6. 最简单的播放器

该播放器只实现视频播放
将 FFmpeg 与 SDL 结合到一起
通过 FFmpeg 解码视频数据
通过 SDL 进行渲染

1	$ clang -g -o player2 player2.c `pkg-config --cflags --libs sdl2 libavformat libavutil libswscale libavcodec libswresample`

最简单的播放器之二：

可以同时播放音频与视频
使用队列存放音频包

6.1 多线程与锁

为什么要用多线程：

多线程的好处
多线程带来的问题

线程的互斥与同步：

互斥
同步

大的任务分为很多小任务通过信号协调

锁与信号量：

锁的种类
通过信号进行同步

锁的中种类：

读写锁
自旋锁
可重入锁

SDL 线程的创建：

1 2	SDL_CreateThread(); SDL_WaitThread();

SDL 锁：

1 2	SDL_CreateMutex() / SDL_DestroyMutex(); // 创建互斥量 SDL_LockMutex() / SDL_UnlockMutex(); // 锁互斥量于解锁互斥量

SDL 条件变量：

1 2	SDL_CreateCond() / SDL_DestroyCond(); SDL_CondWait() / SDL_CondSignal();

6.2 锁与条件变量的使用

TODO

6.3 播放器线程模型

6.4 线程的退出机制

主线程接收到退出事件
解复用线程在循环分流时对 quit 进行判断
视频解码线程从视频流队列中取包时对 quit 进行判断
音视解码从音频流队列中取包时对 quit 进行判断
音视循环解码时对 quit 进行判断
在收到信号变量消息时对 quit 进行判断

6.5 音视频同步

时间戳：

PTS：Presentation timestamp 渲染时间戳
DTS：Decoding timestamp 解码时间戳
I（intra）/ B（bidirectional）/ P（predicted）帧

时间戳顺序：

实际帧顺序：I B B P
存放帧顺序：I P B B
解码时间戳：1 4 2 3
展示时间戳：1 2 3 4

由于有了 B 帧之后，它打乱了 PTS 时间戳，所以加了 DTS 解码时间戳。在大多数没有 B 帧的情况下 PTS 和 DTS 是一致的。

从哪儿获得 PTS：

AVPacket 中的 PTS
AVFrame 中的 PTS
av_frame_get_best_effort_timestamp()

时间基：

tbr：帧率
tbn：time base of stream 流的时间基
tbc：time base of codec 解码的时间基

计算当前帧的 PTS：

PTS = PTS * av_q2d(video_stream->time_base)
av_q2d(AVRotional a){ return a.num / (double)a.den; }

计算下一帧的 PTS：

video_clock：预测的下一帧视频的 PTS
frame_delay：1/tbr
audio_clock：音频当前播放的时间戳

音视频同步的时候需要计算 audio_clock 和 video_clock，看视屏时间是在音频时间之前还是在音频时间之后，如果是在音频时间之前就立即播放，如果在音频时间之后需要 delay 一段时间播放（delay的时间计算：audio_clock - video_clock）

音视频同步方式：

视频同步到音频
音频同步到视频
音频和视频都同步到系统时钟

视频播放的基本思路：

一般的做法，展示第一帧视频帧后，获得要显示的下一个视频帧的 PTS，然后设置一个定时器，当定时器超时时后，刷新新的视屏帧，如此反复操作。

最简单的播放器：

#include <stdio.h>
#include <assert.h>
#include <math.h>

#include <SDL.h>

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>

// compatibility with newer API
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
#define av_frame_alloc avcodec_alloc_frame
#define av_frame_free avcodec_free_frame
#endif

#define SDL_AUDIO_BUFFER_SIZE 1024
#define MAX_AUDIO_FRAME_SIZE 192000 //channels(2) * data_size(2) * sample_rate(48000)

#define MAX_AUDIOQ_SIZE (5 * 16 * 1024)
#define MAX_VIDEOQ_SIZE (5 * 256 * 1024)

#define AV_SYNC_THRESHOLD 0.01
#define AV_NOSYNC_THRESHOLD 10.0

#define SAMPLE_CORRECTION_PERCENT_MAX 10
#define AUDIO_DIFF_AVG_NB 20

#define FF_REFRESH_EVENT (SDL_USEREVENT)
#define FF_QUIT_EVENT (SDL_USEREVENT + 1)

#define VIDEO_PICTURE_QUEUE_SIZE 1
#define DEFAULT_AV_SYNC_TYPE AV_SYNC_AUDIO_MASTER //AV_SYNC_VIDEO_MASTER

typedef struct PacketQueue {
	AVPacketList *first_pkt, *last_pkt;
    int nb_packets;
    int size;
    SDL_mutex *mutex;
    SDL_cond *cond;
} PacketQueue;

typedef struct VideoPicture {
    AVPicture *bmp;
    int width, height; /* source height & width */
    int allocated;
    double pts;
} VideoPicture;

typedef struct VideoState {
    //multi-media file
    char            filename[1024];
    AVFormatContext *pFormatCtx;
    int             videoStream, audioStream;

    //sync
    int             av_sync_type;
    double          external_clock; /* external clock base */
    int64_t         external_clock_time;

    double          audio_diff_cum; /* used for AV difference average computation */
    double          audio_diff_avg_coef;
    double          audio_diff_threshold;
    int             audio_diff_avg_count;

    double          audio_clock;
    double          frame_timer;
    double          frame_last_pts;
    double          frame_last_delay;

    double          video_clock; ///<pts of last decoded frame / predicted pts of next decoded frame
    double          video_current_pts; ///<current displayed pts (different from video_clock if frame fifos are used)
    int64_t         video_current_pts_time;  ///<time (av_gettime) at which we updated video_current_pts - used to have running video pts

    //audio
    AVStream        *audio_st;
    AVCodecContext  *audio_ctx;
    PacketQueue     audioq;
    uint8_t         audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
    unsigned int    audio_buf_size;
    unsigned int    audio_buf_index;
    AVFrame         audio_frame;
    AVPacket        audio_pkt;
    uint8_t         *audio_pkt_data;
    int             audio_pkt_size;
    int             audio_hw_buf_size;

    //video
    AVStream        *video_st;
    AVCodecContext  *video_ctx;
    PacketQueue     videoq;
    struct SwsContext *video_sws_ctx;
    struct SwrContext *audio_swr_ctx;

    VideoPicture    pictq[VIDEO_PICTURE_QUEUE_SIZE];
    int             pictq_size, pictq_rindex, pictq_windex;
    SDL_mutex       *pictq_mutex;
    SDL_cond        *pictq_cond;

    SDL_Thread      *parse_tid;
    SDL_Thread      *video_tid;

    int             quit;
} VideoState;

SDL_mutex    *text_mutex;
SDL_Window   *win = NULL;
SDL_Renderer *renderer;
SDL_Texture  *texture;

enum {
    AV_SYNC_AUDIO_MASTER,
    AV_SYNC_VIDEO_MASTER,
    AV_SYNC_EXTERNAL_MASTER,
};

FILE *yuvfd = NULL;
FILE *audiofd = NULL;

/* Since we only have one decoding thread, the Big Struct
   can be global in case we need it. */
VideoState *global_video_state;

void packet_queue_init(PacketQueue *q) {
    memset(q, 0, sizeof(PacketQueue));
    q->mutex = SDL_CreateMutex();
    q->cond = SDL_CreateCond();
}

int packet_queue_put(PacketQueue *q, AVPacket *pkt) {
    AVPacketList *pkt1;
    if(av_dup_packet(pkt) < 0) {
        return -1;
    }
    pkt1 = av_malloc(sizeof(AVPacketList));
    if (!pkt1)
        return -1;
    pkt1->pkt = *pkt;
    pkt1->next = NULL;

    SDL_LockMutex(q->mutex);
    
    if (!q->last_pkt)
        q->first_pkt = pkt1;
    else
        q->last_pkt->next = pkt1;
    q->last_pkt = pkt1;
    q->nb_packets++;
    q->size += pkt1->pkt.size;
    
    SDL_CondSignal(q->cond);
    SDL_UnlockMutex(q->mutex);
    return 0;
}

int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
    AVPacketList *pkt1;
    int ret;
    
    SDL_LockMutex(q->mutex);

    for(;;) {  
        if(global_video_state->quit) {
            ret = -1;
            break;
        }

        pkt1 = q->first_pkt;
        if (pkt1) {
            q->first_pkt = pkt1->next;
            if (!q->first_pkt)
                q->last_pkt = NULL;
            q->nb_packets--;
            q->size -= pkt1->pkt.size;
            *pkt = pkt1->pkt;
            av_free(pkt1);
            ret = 1;
            break;
        } else if (!block) {
            ret = 0;
            break;
        } else {
            SDL_CondWait(q->cond, q->mutex);
        }
    }
    SDL_UnlockMutex(q->mutex);
    return ret;
}

double get_audio_clock(VideoState *is) {
    double pts;
    int hw_buf_size, bytes_per_sec, n;

    pts = is->audio_clock; /* maintained in the audio thread */
    hw_buf_size = is->audio_buf_size - is->audio_buf_index;
    bytes_per_sec = 0;
    n = is->audio_ctx->channels * 2;
    if(is->audio_st) {
        bytes_per_sec = is->audio_ctx->sample_rate * n;
    }
    if(bytes_per_sec) {
        pts -= (double)hw_buf_size / bytes_per_sec;
    }
    return pts;
}

double get_video_clock(VideoState *is) {
    double delta;
    delta = (av_gettime() - is->video_current_pts_time) / 1000000.0;
    return is->video_current_pts + delta;
}

double get_external_clock(VideoState *is) {
    return av_gettime() / 1000000.0;
}

double get_master_clock(VideoState *is) {
    if(is->av_sync_type == AV_SYNC_VIDEO_MASTER) {
        return get_video_clock(is);
    } else if(is->av_sync_type == AV_SYNC_AUDIO_MASTER) {
        return get_audio_clock(is);
    } else {
        return get_external_clock(is);
    }
}

/* Add or subtract samples to get a better sync, return new
   audio buffer size */
int synchronize_audio(VideoState *is, short *samples,
                 int samples_size, double pts) {
    int n;
    double ref_clock;

    n = 2 * is->audio_ctx->channels;

    if(is->av_sync_type != AV_SYNC_AUDIO_MASTER) {
        double diff, avg_diff;
        int wanted_size, min_size, max_size /*, nb_samples */;

        ref_clock = get_master_clock(is);
        diff = get_audio_clock(is) - ref_clock;

        if(diff < AV_NOSYNC_THRESHOLD) {
            // accumulate the diffs
            is->audio_diff_cum = diff + is->audio_diff_avg_coef
                * is->audio_diff_cum;
            if(is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
                is->audio_diff_avg_count++;
            } else {
                avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef);
                if(fabs(avg_diff) >= is->audio_diff_threshold) {
                    wanted_size = samples_size + ((int)(diff * is->audio_ctx->sample_rate) * n);
                    min_size = samples_size * ((100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100);
                    max_size = samples_size * ((100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100);
                    if(wanted_size < min_size) {
                        wanted_size = min_size;
                    } else if (wanted_size > max_size) {
                        wanted_size = max_size;
                    }
                    if(wanted_size < samples_size) {
                        /* remove samples */
                        samples_size = wanted_size;
                    } else if(wanted_size > samples_size) {
                        uint8_t *samples_end, *q;
                        int nb;

                        /* add samples by copying final sample*/
                        nb = (samples_size - wanted_size);
                        samples_end = (uint8_t *)samples + samples_size - n;
                        q = samples_end + n;
                        while(nb > 0) {
                            memcpy(q, samples_end, n);
                            q += n;
                            nb -= n;
                        }
                        samples_size = wanted_size;
                    }
                }
            }
        } else {
            /* difference is TOO big; reset diff stuff */
            is->audio_diff_avg_count = 0;
            is->audio_diff_cum = 0;
        }
    }
    return samples_size;
}

int audio_decode_frame(VideoState *is, uint8_t *audio_buf, 
               int buf_size, double *pts_ptr) {
    int len1, data_size = 0;
    AVPacket *pkt = &is->audio_pkt;
    double pts;
    int n;

    for(;;) {
        while(is->audio_pkt_size > 0) {
            int got_frame = 0;
            len1 = avcodec_decode_audio4(is->audio_ctx, &is->audio_frame, &got_frame, pkt);
            if(len1 < 0) {
                /* if error, skip frame */
                is->audio_pkt_size = 0;
                break;
            }
            data_size = 0;
            if(got_frame) {
                /*
				data_size = av_samples_get_buffer_size(NULL, 
					       is->audio_ctx->channels,
					       is->audio_frame.nb_samples,
					       is->audio_ctx->sample_fmt,
					       1);
        		*/
                data_size = 2 * is->audio_frame.nb_samples * 2;
                assert(data_size <= buf_size);

                swr_convert(is->audio_swr_ctx,
                            &audio_buf,
                            MAX_AUDIO_FRAME_SIZE*3/2,
                            (const uint8_t **)is->audio_frame.data,
                            is->audio_frame.nb_samples);

                fwrite(audio_buf, 1, data_size, audiofd);
                //memcpy(audio_buf, is->audio_frame.data[0], data_size);
            }
            is->audio_pkt_data += len1;
            is->audio_pkt_size -= len1;
            if(data_size <= 0) {
                /* No data yet, get more frames */
                continue;
            }
            pts = is->audio_clock;
            *pts_ptr = pts;
            n = 2 * is->audio_ctx->channels;
            is->audio_clock += (double)data_size /
                (double)(n * is->audio_ctx->sample_rate);
            /* We have data, return it and come back for more later */
            return data_size;
        }
        if(pkt->data)
            av_free_packet(pkt);

        if(is->quit) {
            return -1;
        }
        /* next packet */
        if(packet_queue_get(&is->audioq, pkt, 1) < 0) {
            return -1;
        }
        is->audio_pkt_data = pkt->data;
        is->audio_pkt_size = pkt->size;
        /* if update, update the audio clock w/pts */
        if(pkt->pts != AV_NOPTS_VALUE) {
            is->audio_clock = av_q2d(is->audio_st->time_base)*pkt->pts;
        }
    }
}

void audio_callback(void *userdata, Uint8 *stream, int len) {
    VideoState *is = (VideoState *)userdata;
    int len1, audio_size;
    double pts;

    SDL_memset(stream, 0, len);

    while(len > 0) {
        if(is->audio_buf_index >= is->audio_buf_size) {
            /* We have already sent all our data; get more */
            audio_size = audio_decode_frame(is, is->audio_buf, sizeof(is->audio_buf), &pts);
            if(audio_size < 0) {
                /* If error, output silence */
                is->audio_buf_size = 1024 * 2 * 2;
                memset(is->audio_buf, 0, is->audio_buf_size);
            } else {
                audio_size = synchronize_audio(is, (int16_t *)is->audio_buf, audio_size, pts);
                is->audio_buf_size = audio_size;
            }
            is->audio_buf_index = 0;
        }
        len1 = is->audio_buf_size - is->audio_buf_index;
        if(len1 > len)
            len1 = len;
        SDL_MixAudio(stream,(uint8_t *)is->audio_buf + is->audio_buf_index, len1, SDL_MIX_MAXVOLUME);
        //memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
        len -= len1;
        stream += len1;
        is->audio_buf_index += len1;
    }
}

static Uint32 sdl_refresh_timer_cb(Uint32 interval, void *opaque) {
    SDL_Event event;
    event.type = FF_REFRESH_EVENT;
    event.user.data1 = opaque;
    SDL_PushEvent(&event);
    return 0; /* 0 means stop timer */
}

/* schedule a video refresh in 'delay' ms */
static void schedule_refresh(VideoState *is, int delay) {
    SDL_AddTimer(delay, sdl_refresh_timer_cb, is);
}

void video_display(VideoState *is) {
    SDL_Rect rect;
    VideoPicture *vp;
    float aspect_ratio;
    int w, h, x, y;
    int i;

    vp = &is->pictq[is->pictq_rindex];
    if(vp->bmp) {

        SDL_UpdateYUVTexture(texture, NULL, 
                             vp->bmp->data[0], vp->bmp->linesize[0],
                             vp->bmp->data[1], vp->bmp->linesize[1],
                             vp->bmp->data[2], vp->bmp->linesize[2]);

        rect.x = 0;
        rect.y = 0;
        rect.w = is->video_ctx->width;
        rect.h = is->video_ctx->height;
        SDL_LockMutex(text_mutex);
        SDL_RenderClear( renderer );
        SDL_RenderCopy( renderer, texture, NULL, &rect);
        SDL_RenderPresent( renderer );
        SDL_UnlockMutex(text_mutex);
    }
}

void video_refresh_timer(void *userdata) {
    VideoState *is = (VideoState *)userdata;
    VideoPicture *vp;
    double actual_delay, delay, sync_threshold, ref_clock, diff;

    if(is->video_st) {
        if(is->pictq_size == 0) {
            schedule_refresh(is, 1);
            //fprintf(stderr, "no picture in the queue!!!\n");
        } else {
            //fprintf(stderr, "get picture from queue!!!\n");
            vp = &is->pictq[is->pictq_rindex];

            is->video_current_pts = vp->pts;
            is->video_current_pts_time = av_gettime();
            delay = vp->pts - is->frame_last_pts; /* the pts from last time */
            if(delay <= 0 || delay >= 1.0) {
                /* if incorrect delay, use previous one */
                delay = is->frame_last_delay;
            }
            /* save for next time */
            is->frame_last_delay = delay;
            is->frame_last_pts = vp->pts;

            /* update delay to sync to audio if not master source */
            if(is->av_sync_type != AV_SYNC_VIDEO_MASTER) {
                ref_clock = get_master_clock(is);
                diff = vp->pts - ref_clock;

                /* Skip or repeat the frame. Take delay into account
       FFPlay still doesn't "know if this is the best guess." */
                sync_threshold = (delay > AV_SYNC_THRESHOLD) ? delay : AV_SYNC_THRESHOLD;
                if(fabs(diff) < AV_NOSYNC_THRESHOLD) {
                    if(diff <= -sync_threshold) {
                        delay = 0;
                    } else if(diff >= sync_threshold) {
                        delay = 2 * delay;
                    }
                }
            }
            is->frame_timer += delay;
            /* computer the REAL delay */
            actual_delay = is->frame_timer - (av_gettime() / 1000000.0);
            if(actual_delay < 0.010) {
                /* Really it should skip the picture instead */
                actual_delay = 0.010;
            }
            schedule_refresh(is, (int)(actual_delay * 1000 + 0.5));

            /* show the picture! */
            video_display(is);

            /* update queue for next picture! */
            if(++is->pictq_rindex == VIDEO_PICTURE_QUEUE_SIZE) {
                is->pictq_rindex = 0;
            }
            SDL_LockMutex(is->pictq_mutex);
            is->pictq_size--;
            SDL_CondSignal(is->pictq_cond);
            SDL_UnlockMutex(is->pictq_mutex);
        }
    } else {
        schedule_refresh(is, 100);
    }
}
      
void alloc_picture(void *userdata) {
    int ret;

    VideoState *is = (VideoState *)userdata;
    VideoPicture *vp;

    vp = &is->pictq[is->pictq_windex];
    if(vp->bmp) {
        // we already have one make another, bigger/smaller
        avpicture_free(vp->bmp);
        free(vp->bmp);
        vp->bmp = NULL;
    }

    // Allocate a place to put our YUV image on that screen
    SDL_LockMutex(text_mutex);

    vp->bmp = (AVPicture*)malloc(sizeof(AVPicture));
    ret = avpicture_alloc(vp->bmp, AV_PIX_FMT_YUV420P, is->video_ctx->width, is->video_ctx->height);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate temporary picture: %s\n", av_err2str(ret));
    }

    SDL_UnlockMutex(text_mutex);

    vp->width = is->video_ctx->width;
    vp->height = is->video_ctx->height;
    vp->allocated = 1;
}

int queue_picture(VideoState *is, AVFrame *pFrame, double pts) {
    VideoPicture *vp;

    /* wait until we have space for a new pic */
    SDL_LockMutex(is->pictq_mutex);
    while(is->pictq_size >= VIDEO_PICTURE_QUEUE_SIZE &&
          !is->quit) {
        SDL_CondWait(is->pictq_cond, is->pictq_mutex);
    }
    SDL_UnlockMutex(is->pictq_mutex);

    if(is->quit)
        return -1;

    // windex is set to 0 initially
    vp = &is->pictq[is->pictq_windex];

    /* allocate or resize the buffer! */
    if(!vp->bmp ||
       vp->width != is->video_ctx->width ||
       vp->height != is->video_ctx->height) {

        vp->allocated = 0;
        alloc_picture(is);
        if(is->quit) {
            return -1;
        }
    }

    /* We have a place to put our picture on the queue */
    if(vp->bmp) {
        vp->pts = pts;

        // Convert the image into YUV format that SDL uses
        sws_scale(is->video_sws_ctx, (uint8_t const * const *)pFrame->data,
                  pFrame->linesize, 0, is->video_ctx->height,
                  vp->bmp->data, vp->bmp->linesize);

        /* now we inform our display thread that we have a pic ready */
        if(++is->pictq_windex == VIDEO_PICTURE_QUEUE_SIZE) {
            is->pictq_windex = 0;
        }
        SDL_LockMutex(is->pictq_mutex);
        is->pictq_size++;
        SDL_UnlockMutex(is->pictq_mutex);
    }
    return 0;
}

double synchronize_video(VideoState *is, AVFrame *src_frame, double pts) {
    double frame_delay;

    if(pts != 0) {
        /* if we have pts, set video clock to it */
        is->video_clock = pts;
    } else {
        /* if we aren't given a pts, set it to the clock */
        pts = is->video_clock;
    }
    /* update the video clock */
    frame_delay = av_q2d(is->video_ctx->time_base);
    /* if we are repeating a frame, adjust clock accordingly */
    frame_delay += src_frame->repeat_pict * (frame_delay * 0.5);
    is->video_clock += frame_delay;
    return pts;
}

int decode_video_thread(void *arg) {
    VideoState *is = (VideoState *)arg;
    AVPacket pkt1, *packet = &pkt1;
    int frameFinished;
    AVFrame *pFrame;
    double pts;

    pFrame = av_frame_alloc();

    for(;;) {
        if(packet_queue_get(&is->videoq, packet, 1) < 0) {
            // means we quit getting packets
            break;
        }
        pts = 0;

        // Decode video frame
        avcodec_decode_video2(is->video_ctx, pFrame, &frameFinished, packet);

        if((pts = av_frame_get_best_effort_timestamp(pFrame)) != AV_NOPTS_VALUE) {
        } else {
            pts = 0;
        }
        pts *= av_q2d(is->video_st->time_base);

        // Did we get a video frame?
        if(frameFinished) {
            pts = synchronize_video(is, pFrame, pts);
            if(queue_picture(is, pFrame, pts) < 0) {
                break;
            }
        }
        av_free_packet(packet);
    }
    av_frame_free(&pFrame);
    return 0;
}

int stream_component_open(VideoState *is, int stream_index) {
    AVFormatContext *pFormatCtx = is->pFormatCtx;
    AVCodecContext *codecCtx = NULL;
    AVCodec *codec = NULL;
    SDL_AudioSpec wanted_spec, spec;

    if(stream_index < 0 || stream_index >= pFormatCtx->nb_streams) {
        return -1;
    }

    codecCtx = avcodec_alloc_context3(NULL);

    int ret = avcodec_parameters_to_context(codecCtx, pFormatCtx->streams[stream_index]->codecpar);
    if (ret < 0)
        return -1;

    codec = avcodec_find_decoder(codecCtx->codec_id);
    if(!codec) {
        fprintf(stderr, "Unsupported codec!\n");
        return -1;
    }

    if(codecCtx->codec_type == AVMEDIA_TYPE_AUDIO) {
        // Set audio settings from codec info
        wanted_spec.freq = codecCtx->sample_rate;
        wanted_spec.format = AUDIO_S16SYS;
        wanted_spec.channels = 2;//codecCtx->channels;
        wanted_spec.silence = 0;
        wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
        wanted_spec.callback = audio_callback;
        wanted_spec.userdata = is;

        fprintf(stderr, "wanted spec: channels:%d, sample_fmt:%d, sample_rate:%d \n",
                2, AUDIO_S16SYS, codecCtx->sample_rate);

        if(SDL_OpenAudio(&wanted_spec, &spec) < 0) {
            fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
            return -1;
        }
        is->audio_hw_buf_size = spec.size;
    }

    if(avcodec_open2(codecCtx, codec, NULL) < 0) {
        fprintf(stderr, "Unsupported codec!\n");
        return -1;
    }

    switch(codecCtx->codec_type) {
        case AVMEDIA_TYPE_AUDIO:
            is->audioStream = stream_index;
            is->audio_st = pFormatCtx->streams[stream_index];
            is->audio_ctx = codecCtx;
            is->audio_buf_size = 0;
            is->audio_buf_index = 0;
            memset(&is->audio_pkt, 0, sizeof(is->audio_pkt));
            packet_queue_init(&is->audioq);

            //Out Audio Param
            uint64_t out_channel_layout=AV_CH_LAYOUT_STEREO;

            //AAC:1024  MP3:1152
            int out_nb_samples= is->audio_ctx->frame_size;
            //AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;

            int out_sample_rate=is->audio_ctx->sample_rate;
            int out_channels=av_get_channel_layout_nb_channels(out_channel_layout);
            //Out Buffer Size
            /*
    		int out_buffer_size=av_samples_get_buffer_size(NULL,
                                                   out_channels,
                                                   out_nb_samples,
                                                   AV_SAMPLE_FMT_S16,
                                                   1);
                                                   */

            //uint8_t *out_buffer=(uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE*2);
            int64_t in_channel_layout=av_get_default_channel_layout(is->audio_ctx->channels);

            struct SwrContext *audio_convert_ctx;
            audio_convert_ctx = swr_alloc();
            swr_alloc_set_opts(audio_convert_ctx,
                               out_channel_layout,
                               AV_SAMPLE_FMT_S16,
                               out_sample_rate,
                               in_channel_layout,
                               is->audio_ctx->sample_fmt,
                               is->audio_ctx->sample_rate,
                               0,
                               NULL);
            fprintf(stderr, "swr opts: out_channel_layout:%lld, out_sample_fmt:%d, out_sample_rate:%d, in_channel_layout:%lld, in_sample_fmt:%d, in_sample_rate:%d",
                    out_channel_layout, 
                    AV_SAMPLE_FMT_S16, 
                    out_sample_rate, 
                    in_channel_layout, 
                    is->audio_ctx->sample_fmt, 
                    is->audio_ctx->sample_rate);
            swr_init(audio_convert_ctx);
            is->audio_swr_ctx = audio_convert_ctx;

            SDL_PauseAudio(0);
            break;
        case AVMEDIA_TYPE_VIDEO:
            is->videoStream = stream_index;
            is->video_st = pFormatCtx->streams[stream_index];
            is->video_ctx = codecCtx;

            is->frame_timer = (double)av_gettime() / 1000000.0;
            is->frame_last_delay = 40e-3;
            is->video_current_pts_time = av_gettime();

            packet_queue_init(&is->videoq);
            is->video_sws_ctx = sws_getContext(
                		is->video_ctx->width, is->video_ctx->height,
                        is->video_ctx->pix_fmt, is->video_ctx->width,
                        is->video_ctx->height, AV_PIX_FMT_YUV420P,
                        SWS_BILINEAR, NULL, NULL, NULL);
            is->video_tid = SDL_CreateThread(decode_video_thread, "decode_video_thread", is);
            break;
        default:
            break;
    }
}

int demux_thread(void *arg) {
    int err_code;
    char errors[1024] = {0,};

    VideoState *is = (VideoState *)arg;
    AVFormatContext *pFormatCtx;
    AVPacket pkt1, *packet = &pkt1;

    int video_index = -1;
    int audio_index = -1;
    int i;

    is->videoStream=-1;
    is->audioStream=-1;

    global_video_state = is;

    /* open input file, and allocate format context */
    if ((err_code=avformat_open_input(&pFormatCtx, is->filename, NULL, NULL)) < 0) {
        av_strerror(err_code, errors, 1024);
        fprintf(stderr, "Could not open source file %s, %d(%s)\n", is->filename, err_code, errors);
        return -1;
    }

    is->pFormatCtx = pFormatCtx;

    // Retrieve stream information
    if(avformat_find_stream_info(pFormatCtx, NULL)<0)
        return -1; // Couldn't find stream information

    // Dump information about file onto standard error
    av_dump_format(pFormatCtx, 0, is->filename, 0);

    // Find the first video stream

    for(i=0; i<pFormatCtx->nb_streams; i++) {
        if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO &&
           video_index < 0) {
            video_index=i;
        }
        if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
           audio_index < 0) {
            audio_index=i;
        }
    }
    if(audio_index >= 0) {
        stream_component_open(is, audio_index);
    }
    if(video_index >= 0) {
        stream_component_open(is, video_index);
    }   

    if(is->videoStream < 0 || is->audioStream < 0) {
        fprintf(stderr, "%s: could not open codecs\n", is->filename);
        goto fail;
    }

    //creat window from SDL
    win = SDL_CreateWindow("Media Player",
                           SDL_WINDOWPOS_UNDEFINED,
                           SDL_WINDOWPOS_UNDEFINED,
                           is->video_ctx->width, is->video_ctx->height,
                           SDL_WINDOW_OPENGL|SDL_WINDOW_RESIZABLE);
    if(!win) {
        fprintf(stderr, "SDL: could not set video mode - exiting\n");
        exit(1);
    }

    renderer = SDL_CreateRenderer(win, -1, 0);

    //IYUV: Y + U + V  (3 planes)
    //YV12: Y + V + U  (3 planes)
    Uint32 pixformat= SDL_PIXELFORMAT_IYUV;

    //create texture for render
    texture = SDL_CreateTexture(renderer,
                                pixformat,
                                SDL_TEXTUREACCESS_STREAMING,
                                is->video_ctx->width,
                                is->video_ctx->height);

    // main decode loop
    for(;;) {
        if(is->quit) {
            break;
        }
        // seek stuff goes here
        if(is->audioq.size > MAX_AUDIOQ_SIZE ||
           is->videoq.size > MAX_VIDEOQ_SIZE) {
            SDL_Delay(10);
            continue;
        }
        if(av_read_frame(is->pFormatCtx, packet) < 0) {
            if(is->pFormatCtx->pb->error == 0) {
                SDL_Delay(100); /* no error; wait for user input */
                continue;
            } else {
                break;
            }
        }
        // Is this a packet from the video stream?
        if(packet->stream_index == is->videoStream) {
            packet_queue_put(&is->videoq, packet);
        } else if(packet->stream_index == is->audioStream) {
            packet_queue_put(&is->audioq, packet);
        } else {
            av_free_packet(packet);
        }
    }
    /* all done - wait for it */
    while(!is->quit) {
        SDL_Delay(100);
    }

fail:
    if(1){
        SDL_Event event;
        event.type = FF_QUIT_EVENT;
        event.user.data1 = is;
        SDL_PushEvent(&event);
    }
    return 0;
}

int main(int argc, char *argv[]) {
    SDL_Event       event;
    VideoState      *is;

    is = av_mallocz(sizeof(VideoState));
    if(argc < 2) {
        fprintf(stderr, "Usage: test <file>\n");
        exit(1);
    }

    yuvfd = fopen("testout.yuv", "wb+");
    audiofd = fopen("testout.pcm", "wb+");
    // Register all formats and codecs
    av_register_all();

    if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
        fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
        exit(1);
    }

    text_mutex = SDL_CreateMutex();
    av_strlcpy(is->filename, argv[1], sizeof(is->filename));
    is->pictq_mutex = SDL_CreateMutex();
    is->pictq_cond = SDL_CreateCond();

    schedule_refresh(is, 40);
    is->av_sync_type = DEFAULT_AV_SYNC_TYPE;
    is->parse_tid = SDL_CreateThread(demux_thread,"demux_thread", is);
    if(!is->parse_tid) {
        av_free(is);
        return -1;
    }
    for(;;) {
        SDL_WaitEvent(&event);
        switch(event.type) {
            case FF_QUIT_EVENT:
            case SDL_QUIT:
                is->quit = 1;
                SDL_Quit();
                return 0;
                break;
            case FF_REFRESH_EVENT:
                video_refresh_timer(event.user.data1);
                break;
            default:
                break;
        }
    }

    fclose(yuvfd);
    fclose(audiofd);
    return 0;
}

7. 如何在 Android 下使用 FFmpeg

Android 架构：

内容：

Java 与 C 之间的相互调用
Android 下 FFmpeg 的编译
Android 下如何使用FFmpeg

第一个 JNI 程序：

TODO

JNI 基本概念：

JNIEnv
JavaVM 一个Android APP只有一个 JavaVM，一个 JavaVM 可以有多个JNIEnv
线程一个线程对应一个JNIEnv

Java调用C/C++ 方法一：

在Java层定义 native 关键字函数
方法一：在C/C++层创建

Java_packname_classname_methodname 函数

Java调用C/C++方法二：

什么是Signature：

Java与C/C++ 相互调用时，表式函数参数的描述符
输入参数放在（）内，输出参数放在（）外
多个参数之间顺序存放，且用 “；” 分割

C/C++ 调用 Java 方法：

FindClass
GetMethodID / GetFieldID
NewObject
Call<TYPE>Method / [G/S]et<type>Field

7.1 [实战] Android 下的播放器

TODO

8. IOS 下使用 FFmpeg

TODO

9. 音视频进阶

FFmpeg Filter 的使用
FFmpeg 裁剪与优化
视频渲染（OpenGL / Metal）
声音的特效
网络传输
Webrtc - 实时互动、直播、P2P音视频传输
AR技术
OpenCV

行业痛点：

回音消除
降噪
视频秒开
多人多视频实时互动
PC端/APP/网页实时视频互通
实时互动与大并发负载

FFmpeg音视频同步原理与实现

音视频同步解决方案

一种基于FFMPEG的音视频同步算法

音视频同步原理

如果简单的按照音频的采样率与视频的帧率去播放，由于机器运行速度，解码效率等种种造成时间差异的因素影响，很难同步，音视频时间差将会呈现线性增长。所以要做音视频的同步，有三种方式：

参考一个外部时钟，将音频与视频同步至此时间。我首先想到这种方式，但是并不好，由于某些生物学的原理，人对声音的变化比较敏感，但是对视觉变化不太敏感。所以频繁的去调整声音的播放会有些刺耳或者杂音吧影响用户体验。（ps：顺便科普生物学知识，自我感觉好高大上_）。

以视频为基准，音频去同步视频的时间。不采用，理由同上。
以音频为基准，视频去同步音频的时间。所以这个办法了。

所以，原理就是以音频时间为基准，判断视频快了还是慢了，从而调整视频速度。其实是一个动态的追赶与等待的过程。

一些概念

音视频中都有 DTS 与 PTS。

DTS ，Decoding Time Stamp，解码时间戳，告诉解码器packet的解码顺序。
PTS ，Presentation Time Stamp，显示时间戳，指示从packet中解码出来的数据的显示顺序。
音频中二者是相同的，但是视频由于B帧（双向预测）的存在，会造成解码顺序与显示顺序并不相同，也就是视频中 DTS 与 PTS 不一定相同。

时间基 : 看 FFmpeg 源码

AVRational time_base;
/**
* rational number numerator/denominator
*/
typedef struct AVRational{
   int num; ///< numerator
   int den; ///< denominator
} AVRational;

个人理解，其实就是 ffmpeg中的用分数表示时间单位，num 为分子，den 为分母。并且 ffmpeg 提供了计算方法：

/**
* Convert rational to double.
* @param a rational to convert
* @return (double) a
*/
static inline double av_q2d(AVRational a){
   return a.num / (double) a.den;
}

所以视频中某帧的显示时间计算方式为(单位为妙)：

1	time = pts * av_q2d(time_base);

同步代码

音频部分

clock 为音频的播放时长（从开始到当前的时间）

1
2
3

if (packet->pts != AV_NOPTS_VALUE) {
    audio->clock = av_q2d(audio->time_base) * packet->pts;
}

然后加上此 packet 中数据需要播放的时间

1 2	double time = datalen/((double) 44100 2 2); audio->clock = audio->clock +time;

datalen 为数据长度。采样率为 44100，采样位数为 16，通道数为 2。所以数据长度 / 每秒字节数。

ps：此处计算方式不是很完美，有很多问题，回头研究在再补上。

视频部分

先定义几个值：

double  last_play  //上一帧的播放时间
   ,play             //当前帧的播放时间
   , last_delay    // 上一次播放视频的两帧视频间隔时间
   ,delay         //两帧视频间隔时间
   ,audio_clock //音频轨道 实际播放时间
   ,diff   //音频帧与视频帧相差时间
   ,sync_threshold //合理的范围
   ,start_time  //从第一帧开始的绝对时间
   ,pts
   ,actual_delay//真正需要延迟时间
   start_time = av_gettime() / 1000000.0;
//        获取pts
       if ((pts = av_frame_get_best_effort_timestamp(frame)) == AV_NOPTS_VALUE) {
           pts = 0;
       }
       play = pts * av_q2d(vedio->time_base);
//        纠正时间
       play = vedio->synchronize(frame, play);
       delay = play - last_play;
       if (delay <= 0 || delay > 1) {
           delay = last_delay;
       }
       audio_clock = vedio->audio->clock;
       last_delay = delay;
       last_play = play;
//音频与视频的时间差
       diff = vedio->clock - audio_clock;
//        在合理范围外  才会延迟  加快
       sync_threshold = (delay > 0.01 ? 0.01 : delay);
       if (fabs(diff) < 10) {
           if (diff <= -sync_threshold) {
               delay = 0;
           } else if (diff >= sync_threshold) {
               delay = 2 * delay;
           }
       }
       start_time += delay;
       actual_delay = start_time - av_gettime() / 1000000.0;
       if (actual_delay < 0.01) {
           actual_delay = 0.01;
       }
//  休眠时间 ffmpeg 建议这样写  为什么 要这样写 有待研究
       av_usleep(actual_delay * 1000000.0 + 6000);
纠正play （播放时间）的方法 repeat_pict / (2 * fps) 是ffmpeg注释里教的
synchronize(AVFrame *frame, double play) {
   //clock是当前播放的时间位置
   if (play != 0)
       clock=play;
   else //pst为0 则先把pts设为上一帧时间
       play = clock;
   //可能有pts为0 则主动增加clock
   //需要求出扩展延时：
   double repeat_pict = frame->repeat_pict;
   //使用AvCodecContext的而不是stream的
   double frame_delay = av_q2d(codec->time_base);
   //fps 
   double fps = 1 / frame_delay;
   //pts 加上 这个延迟 是显示时间  
   double extra_delay = repeat_pict / (2 * fps);
   double delay = extra_delay + frame_delay;
   clock += delay;
   return play;
}

FFmpeg 痛点解决

回音消除解决方案：

语音自适应回声消除（AEC）算法

回声消除(AEC)原理

音频降噪在 58 直播中的研究与实现

视频秒开：

直播视频秒开及视频优化

视频直播秒开背后的技术与优化经验

短视频“秒播”那点事

百度LSS 音视频直播秒开

播放器的“妥协”造就了视频“秒开”的实现！

多人视频实时互动：

WebRTC现状以及多人视频通话分析

多人视频连麦——直播高效互动方式

实时互动与大并发负载：

RTP直播分发服务器集群方案

海量用户实时互动直播架构探索

直播开发过程中关于直播技术的架构问题

1. 学习大纲

2. FFmpeg 常用命令实战

3. 初级开发内容

3.1 FFmpeg 日志系统

3.2 FFmpeg 文件与目录操作

3.3 FFmpeg 操作目录重要函数

3.4 多媒体文件的基本概念

3.5 [实战] 打印音/视频信息

3.6 [实战] 抽取音频数据

3.7 [实战] 抽取视频数据

3.8 [实战] 将 MP4 转成 FLV 格式

3.9 [实战] 从 MP4 截取一段视频

3.10 [实战] 一个简单的小咖秀

4. FFmpeg 中级开发内容

4.1 FFmpeg H264 解码

4.2 FFmpeg H264 编码

4.3 视频转图片

4.4 FFmpeg AAC 编码

5. SDL 介绍

5.1 SDL 编译与安装

5.2 使用 SDL 基本步骤

5.3 SDL 事件基本原理

5.4 文理渲染

5.5 [实战] YUV 视频播放器

5.6 SDL 播放音频

5.7 实现 PCM 播放器

6. 最简单的播放器

6.1 多线程与锁

6.2 锁与条件变量的使用

6.3 播放器线程模型

6.4 线程的退出机制

6.5 音视频同步

7. 如何在 Android 下使用 FFmpeg

7.1 [实战] Android 下的播放器

8. IOS 下使用 FFmpeg

9. 音视频进阶

FFmpeg音视频同步原理与实现

音视频同步原理

一些概念

同步代码

FFmpeg 痛点解决