[
  {
    "path": ".github/workflows/docker-image.yml",
    "content": "name: Docker Image CI\n\non:\n  push:\n    branches: [ \"master\" ]\n  pull_request:\n    branches: [ \"master\" ]\n\njobs:\n\n  build:\n\n    runs-on: ubuntu-latest\n\n    steps:\n    - uses: actions/checkout@v4\n    - name: Run hello example\n      run: make make_hello\n    - name: Run remuxing\n      run: make make_remuxing\n    - name: Run transcoding\n      run: make make_transcoding\n      \n"
  },
  {
    "path": ".gitignore",
    "content": "*pgm\nbuild/*\nbunny_1080p_60fps.mp4\nbunny_1s_gop.mp4\nbunny_1s_gop.mp4.ts\nbunny_1s_gop.mp4.webm\n.vscode\n.clangd\ncompile_commands.json"
  },
  {
    "path": "0_hello_world.c",
    "content": "/*\n * http://ffmpeg.org/doxygen/trunk/index.html\n *\n * Main components\n *\n * Format (Container) - a wrapper, providing sync, metadata and muxing for the streams.\n * Stream - a continuous stream (audio or video) of data over time.\n * Codec - defines how data are enCOded (from Frame to Packet)\n *        and DECoded (from Packet to Frame).\n * Packet - are the data (kind of slices of the stream data) to be decoded as raw frames.\n * Frame - a decoded raw frame (to be encoded or filtered).\n */\n\n#include <libavcodec/avcodec.h>\n#include <libavformat/avformat.h>\n#include <stdio.h>\n#include <stdarg.h>\n#include <stdlib.h>\n#include <string.h>\n#include <inttypes.h>\n\n// print out the steps and errors\nstatic void logging(const char *fmt, ...);\n// decode packets into frames\nstatic int decode_packet(AVPacket *pPacket, AVCodecContext *pCodecContext, AVFrame *pFrame);\n// save a frame into a .pgm file\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename);\n\nint main(int argc, const char *argv[])\n{\n\n  if (argc < 2) {\n    printf(\"You need to specify a media file.\\n\");\n    return -1;\n  }\n  \n  logging(\"initializing all the containers, codecs and protocols.\");\n\n  // AVFormatContext holds the header information from the format (Container)\n  // Allocating memory for this component\n  // http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html\n  AVFormatContext *pFormatContext = avformat_alloc_context();\n  if (!pFormatContext) {\n    logging(\"ERROR could not allocate memory for Format Context\");\n    return -1;\n  }\n\n  logging(\"opening the input file (%s) and loading format (container) header\", argv[1]);\n  // Open the file and read its header. The codecs are not opened.\n  // The function arguments are:\n  // AVFormatContext (the component we allocated memory for),\n  // url (filename),\n  // AVInputFormat (if you pass NULL it'll do the auto detect)\n  // and AVDictionary (which are options to the demuxer)\n  // http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49\n  if (avformat_open_input(&pFormatContext, argv[1], NULL, NULL) != 0) {\n    logging(\"ERROR could not open the file\");\n    return -1;\n  }\n\n  // now we have access to some information about our file\n  // since we read its header we can say what format (container) it's\n  // and some other information related to the format itself.\n  logging(\"format %s, duration %lld us, bit_rate %lld\", pFormatContext->iformat->name, pFormatContext->duration, pFormatContext->bit_rate);\n\n  logging(\"finding stream info from format\");\n  // read Packets from the Format to get stream information\n  // this function populates pFormatContext->streams\n  // (of size equals to pFormatContext->nb_streams)\n  // the arguments are:\n  // the AVFormatContext\n  // and options contains options for codec corresponding to i-th stream.\n  // On return each dictionary will be filled with options that were not found.\n  // https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb\n  if (avformat_find_stream_info(pFormatContext,  NULL) < 0) {\n    logging(\"ERROR could not get the stream info\");\n    return -1;\n  }\n\n  // the component that knows how to enCOde and DECode the stream\n  // it's the codec (audio or video)\n  // http://ffmpeg.org/doxygen/trunk/structAVCodec.html\n  AVCodec *pCodec = NULL;\n  // this component describes the properties of a codec used by the stream i\n  // https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html\n  AVCodecParameters *pCodecParameters =  NULL;\n  int video_stream_index = -1;\n\n  // loop though all the streams and print its main information\n  for (int i = 0; i < pFormatContext->nb_streams; i++)\n  {\n    AVCodecParameters *pLocalCodecParameters =  NULL;\n    pLocalCodecParameters = pFormatContext->streams[i]->codecpar;\n    logging(\"AVStream->time_base before open coded %d/%d\", pFormatContext->streams[i]->time_base.num, pFormatContext->streams[i]->time_base.den);\n    logging(\"AVStream->r_frame_rate before open coded %d/%d\", pFormatContext->streams[i]->r_frame_rate.num, pFormatContext->streams[i]->r_frame_rate.den);\n    logging(\"AVStream->start_time %\" PRId64, pFormatContext->streams[i]->start_time);\n    logging(\"AVStream->duration %\" PRId64, pFormatContext->streams[i]->duration);\n\n    logging(\"finding the proper decoder (CODEC)\");\n\n    AVCodec *pLocalCodec = NULL;\n\n    // finds the registered decoder for a codec ID\n    // https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca\n    pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);\n\n    if (pLocalCodec==NULL) {\n      logging(\"ERROR unsupported codec!\");\n      // In this example if the codec is not found we just skip it\n      continue;\n    }\n\n    // when the stream is a video we store its index, codec parameters and codec\n    if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {\n      if (video_stream_index == -1) {\n        video_stream_index = i;\n        pCodec = pLocalCodec;\n        pCodecParameters = pLocalCodecParameters;\n      }\n\n      logging(\"Video Codec: resolution %d x %d\", pLocalCodecParameters->width, pLocalCodecParameters->height);\n    } else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {\n      logging(\"Audio Codec: %d channels, sample rate %d\", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate);\n    }\n\n    // print its name, id and bitrate\n    logging(\"\\tCodec %s ID %d bit_rate %lld\", pLocalCodec->name, pLocalCodec->id, pLocalCodecParameters->bit_rate);\n  }\n\n  if (video_stream_index == -1) {\n    logging(\"File %s does not contain a video stream!\", argv[1]);\n    return -1;\n  }\n\n  // https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html\n  AVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);\n  if (!pCodecContext)\n  {\n    logging(\"failed to allocated memory for AVCodecContext\");\n    return -1;\n  }\n\n  // Fill the codec context based on the values from the supplied codec parameters\n  // https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16\n  if (avcodec_parameters_to_context(pCodecContext, pCodecParameters) < 0)\n  {\n    logging(\"failed to copy codec params to codec context\");\n    return -1;\n  }\n\n  // Initialize the AVCodecContext to use the given AVCodec.\n  // https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d\n  if (avcodec_open2(pCodecContext, pCodec, NULL) < 0)\n  {\n    logging(\"failed to open codec through avcodec_open2\");\n    return -1;\n  }\n\n  // https://ffmpeg.org/doxygen/trunk/structAVFrame.html\n  AVFrame *pFrame = av_frame_alloc();\n  if (!pFrame)\n  {\n    logging(\"failed to allocate memory for AVFrame\");\n    return -1;\n  }\n  // https://ffmpeg.org/doxygen/trunk/structAVPacket.html\n  AVPacket *pPacket = av_packet_alloc();\n  if (!pPacket)\n  {\n    logging(\"failed to allocate memory for AVPacket\");\n    return -1;\n  }\n\n  int response = 0;\n  int how_many_packets_to_process = 8;\n\n  // fill the Packet with data from the Stream\n  // https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61\n  while (av_read_frame(pFormatContext, pPacket) >= 0)\n  {\n    // if it's the video stream\n    if (pPacket->stream_index == video_stream_index) {\n    logging(\"AVPacket->pts %\" PRId64, pPacket->pts);\n      response = decode_packet(pPacket, pCodecContext, pFrame);\n      if (response < 0)\n        break;\n      // stop it, otherwise we'll be saving hundreds of frames\n      if (--how_many_packets_to_process <= 0) break;\n    }\n    // https://ffmpeg.org/doxygen/trunk/group__lavc__packet.html#ga63d5a489b419bd5d45cfd09091cbcbc2\n    av_packet_unref(pPacket);\n  }\n\n  logging(\"releasing all the resources\");\n\n  avformat_close_input(&pFormatContext);\n  av_packet_free(&pPacket);\n  av_frame_free(&pFrame);\n  avcodec_free_context(&pCodecContext);\n  return 0;\n}\n\nstatic void logging(const char *fmt, ...)\n{\n    va_list args;\n    fprintf( stderr, \"LOG: \" );\n    va_start( args, fmt );\n    vfprintf( stderr, fmt, args );\n    va_end( args );\n    fprintf( stderr, \"\\n\" );\n}\n\nstatic int decode_packet(AVPacket *pPacket, AVCodecContext *pCodecContext, AVFrame *pFrame)\n{\n  // Supply raw packet data as input to a decoder\n  // https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3\n  int response = avcodec_send_packet(pCodecContext, pPacket);\n\n  if (response < 0) {\n    logging(\"Error while sending a packet to the decoder: %s\", av_err2str(response));\n    return response;\n  }\n\n  while (response >= 0)\n  {\n    // Return decoded output data (into a frame) from a decoder\n    // https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c\n    response = avcodec_receive_frame(pCodecContext, pFrame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      logging(\"Error while receiving a frame from the decoder: %s\", av_err2str(response));\n      return response;\n    }\n\n    if (response >= 0) {\n      logging(\n          \"Frame %d (type=%c, size=%d bytes, format=%d) pts %d key_frame %d [DTS %d]\",\n          pCodecContext->frame_number,\n          av_get_picture_type_char(pFrame->pict_type),\n          pFrame->pkt_size,\n          pFrame->format,\n          pFrame->pts,\n          pFrame->key_frame,\n          pFrame->coded_picture_number\n      );\n\n      char frame_filename[1024];\n      snprintf(frame_filename, sizeof(frame_filename), \"%s-%d.pgm\", \"frame\", pCodecContext->frame_number);\n      // Check if the frame is a planar YUV 4:2:0, 12bpp\n      // That is the format of the provided .mp4 file\n      // RGB formats will definitely not give a gray image\n      // Other YUV image may do so, but untested, so give a warning\n      if (pFrame->format != AV_PIX_FMT_YUV420P)\n      {\n        logging(\"Warning: the generated file may not be a grayscale image, but could e.g. be just the R component if the video format is RGB\");\n      }\n      // save a grayscale frame into a .pgm file\n      save_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename);\n    }\n  }\n  return 0;\n}\n\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename)\n{\n    FILE *f;\n    int i;\n    f = fopen(filename,\"w\");\n    // writing the minimal required header for a pgm file format\n    // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example\n    fprintf(f, \"P5\\n%d %d\\n%d\\n\", xsize, ysize, 255);\n\n    // writing line by line\n    for (i = 0; i < ysize; i++)\n        fwrite(buf + i * wrap, 1, xsize, f);\n    fclose(f);\n}\n"
  },
  {
    "path": "2_remuxing.c",
    "content": "// based on https://ffmpeg.org/doxygen/trunk/remuxing_8c-example.html\n#include <libavutil/timestamp.h>\n#include <libavformat/avformat.h>\n\nint main(int argc, char **argv)\n{\n  AVFormatContext *input_format_context = NULL, *output_format_context = NULL;\n  AVPacket packet;\n  const char *in_filename, *out_filename;\n  int ret, i;\n  int stream_index = 0;\n  int *streams_list = NULL;\n  int number_of_streams = 0;\n  int fragmented_mp4_options = 0;\n\n  if (argc < 3) {\n    printf(\"You need to pass at least two parameters.\\n\");\n    return -1;\n  } else if (argc == 4) {\n    fragmented_mp4_options = 1;\n  }\n\n  in_filename  = argv[1];\n  out_filename = argv[2];\n\n  if ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) {\n    fprintf(stderr, \"Could not open input file '%s'\", in_filename);\n    goto end;\n  }\n  if ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) {\n    fprintf(stderr, \"Failed to retrieve input stream information\");\n    goto end;\n  }\n\n  avformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename);\n  if (!output_format_context) {\n    fprintf(stderr, \"Could not create output context\\n\");\n    ret = AVERROR_UNKNOWN;\n    goto end;\n  }\n\n  number_of_streams = input_format_context->nb_streams;\n  streams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list));\n\n  if (!streams_list) {\n    ret = AVERROR(ENOMEM);\n    goto end;\n  }\n\n  for (i = 0; i < input_format_context->nb_streams; i++) {\n    AVStream *out_stream;\n    AVStream *in_stream = input_format_context->streams[i];\n    AVCodecParameters *in_codecpar = in_stream->codecpar;\n    if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&\n        in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&\n        in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {\n      streams_list[i] = -1;\n      continue;\n    }\n    streams_list[i] = stream_index++;\n    out_stream = avformat_new_stream(output_format_context, NULL);\n    if (!out_stream) {\n      fprintf(stderr, \"Failed allocating output stream\\n\");\n      ret = AVERROR_UNKNOWN;\n      goto end;\n    }\n    ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);\n    if (ret < 0) {\n      fprintf(stderr, \"Failed to copy codec parameters\\n\");\n      goto end;\n    }\n  }\n  // https://ffmpeg.org/doxygen/trunk/group__lavf__misc.html#gae2645941f2dc779c307eb6314fd39f10\n  av_dump_format(output_format_context, 0, out_filename, 1);\n\n  // unless it's a no file (we'll talk later about that) write to the disk (FLAG_WRITE)\n  // but basically it's a way to save the file to a buffer so you can store it\n  // wherever you want.\n  if (!(output_format_context->oformat->flags & AVFMT_NOFILE)) {\n    ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE);\n    if (ret < 0) {\n      fprintf(stderr, \"Could not open output file '%s'\", out_filename);\n      goto end;\n    }\n  }\n  AVDictionary* opts = NULL;\n\n  if (fragmented_mp4_options) {\n    // https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE\n    av_dict_set(&opts, \"movflags\", \"frag_keyframe+empty_moov+default_base_moof\", 0);\n  }\n  // https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga18b7b10bb5b94c4842de18166bc677cb\n  ret = avformat_write_header(output_format_context, &opts);\n  if (ret < 0) {\n    fprintf(stderr, \"Error occurred when opening output file\\n\");\n    goto end;\n  }\n  while (1) {\n    AVStream *in_stream, *out_stream;\n    ret = av_read_frame(input_format_context, &packet);\n    if (ret < 0)\n      break;\n    in_stream  = input_format_context->streams[packet.stream_index];\n    if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) {\n      av_packet_unref(&packet);\n      continue;\n    }\n    packet.stream_index = streams_list[packet.stream_index];\n    out_stream = output_format_context->streams[packet.stream_index];\n    /* copy packet */\n    packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n    packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n    packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);\n    // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903\n    packet.pos = -1;\n\n    //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1\n    ret = av_interleaved_write_frame(output_format_context, &packet);\n    if (ret < 0) {\n      fprintf(stderr, \"Error muxing packet\\n\");\n      break;\n    }\n    av_packet_unref(&packet);\n  }\n  //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13\n  av_write_trailer(output_format_context);\nend:\n  avformat_close_input(&input_format_context);\n  /* close output */\n  if (output_format_context && !(output_format_context->oformat->flags & AVFMT_NOFILE))\n    avio_closep(&output_format_context->pb);\n  avformat_free_context(output_format_context);\n  av_freep(&streams_list);\n  if (ret < 0 && ret != AVERROR_EOF) {\n    fprintf(stderr, \"Error occurred: %s\\n\", av_err2str(ret));\n    return 1;\n  }\n  return 0;\n}\n\n"
  },
  {
    "path": "3_transcoding.c",
    "content": "#include <libavcodec/avcodec.h>\n#include <libavformat/avformat.h>\n#include <libavutil/timestamp.h>\n#include <stdio.h>\n#include <stdarg.h>\n#include <stdlib.h>\n#include <libavutil/opt.h>\n#include <string.h>\n#include <inttypes.h>\n#include \"video_debugging.h\"\n\ntypedef struct StreamingParams {\n  char copy_video;\n  char copy_audio;\n  char *output_extension;\n  char *muxer_opt_key;\n  char *muxer_opt_value;\n  char *video_codec;\n  char *audio_codec;\n  char *codec_priv_key;\n  char *codec_priv_value;\n} StreamingParams;\n\ntypedef struct StreamingContext {\n  AVFormatContext *avfc;\n  AVCodec *video_avc;\n  AVCodec *audio_avc;\n  AVStream *video_avs;\n  AVStream *audio_avs;\n  AVCodecContext *video_avcc;\n  AVCodecContext *audio_avcc;\n  int video_index;\n  int audio_index;\n  char *filename;\n} StreamingContext;\n\nint fill_stream_info(AVStream *avs, AVCodec **avc, AVCodecContext **avcc) {\n  *avc = avcodec_find_decoder(avs->codecpar->codec_id);\n  if (!*avc) {logging(\"failed to find the codec\"); return -1;}\n\n  *avcc = avcodec_alloc_context3(*avc);\n  if (!*avcc) {logging(\"failed to alloc memory for codec context\"); return -1;}\n\n  if (avcodec_parameters_to_context(*avcc, avs->codecpar) < 0) {logging(\"failed to fill codec context\"); return -1;}\n\n  if (avcodec_open2(*avcc, *avc, NULL) < 0) {logging(\"failed to open codec\"); return -1;}\n  return 0;\n}\n\nint open_media(const char *in_filename, AVFormatContext **avfc) {\n  *avfc = avformat_alloc_context();\n  if (!*avfc) {logging(\"failed to alloc memory for format\"); return -1;}\n\n  if (avformat_open_input(avfc, in_filename, NULL, NULL) != 0) {logging(\"failed to open input file %s\", in_filename); return -1;}\n\n  if (avformat_find_stream_info(*avfc, NULL) < 0) {logging(\"failed to get stream info\"); return -1;}\n  return 0;\n}\n\nint prepare_decoder(StreamingContext *sc) {\n  for (int i = 0; i < sc->avfc->nb_streams; i++) {\n    if (sc->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {\n      sc->video_avs = sc->avfc->streams[i];\n      sc->video_index = i;\n\n      if (fill_stream_info(sc->video_avs, &sc->video_avc, &sc->video_avcc)) {return -1;}\n    } else if (sc->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {\n      sc->audio_avs = sc->avfc->streams[i];\n      sc->audio_index = i;\n\n      if (fill_stream_info(sc->audio_avs, &sc->audio_avc, &sc->audio_avcc)) {return -1;}\n    } else {\n      logging(\"skipping streams other than audio and video\");\n    }\n  }\n\n  return 0;\n}\n\nint prepare_video_encoder(StreamingContext *sc, AVCodecContext *decoder_ctx, AVRational input_framerate, StreamingParams sp) {\n  sc->video_avs = avformat_new_stream(sc->avfc, NULL);\n\n  sc->video_avc = avcodec_find_encoder_by_name(sp.video_codec);\n  if (!sc->video_avc) {logging(\"could not find the proper codec\"); return -1;}\n\n  sc->video_avcc = avcodec_alloc_context3(sc->video_avc);\n  if (!sc->video_avcc) {logging(\"could not allocated memory for codec context\"); return -1;}\n\n  av_opt_set(sc->video_avcc->priv_data, \"preset\", \"fast\", 0);\n  if (sp.codec_priv_key && sp.codec_priv_value)\n    av_opt_set(sc->video_avcc->priv_data, sp.codec_priv_key, sp.codec_priv_value, 0);\n\n  sc->video_avcc->height = decoder_ctx->height;\n  sc->video_avcc->width = decoder_ctx->width;\n  sc->video_avcc->sample_aspect_ratio = decoder_ctx->sample_aspect_ratio;\n  if (sc->video_avc->pix_fmts)\n    sc->video_avcc->pix_fmt = sc->video_avc->pix_fmts[0];\n  else\n    sc->video_avcc->pix_fmt = decoder_ctx->pix_fmt;\n\n  sc->video_avcc->bit_rate = 2 * 1000 * 1000;\n  sc->video_avcc->rc_buffer_size = 4 * 1000 * 1000;\n  sc->video_avcc->rc_max_rate = 2 * 1000 * 1000;\n  sc->video_avcc->rc_min_rate = 2.5 * 1000 * 1000;\n\n  sc->video_avcc->time_base = av_inv_q(input_framerate);\n  sc->video_avs->time_base = sc->video_avcc->time_base;\n\n  if (avcodec_open2(sc->video_avcc, sc->video_avc, NULL) < 0) {logging(\"could not open the codec\"); return -1;}\n  avcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc);\n  return 0;\n}\n\nint prepare_audio_encoder(StreamingContext *sc, int sample_rate, StreamingParams sp){\n  sc->audio_avs = avformat_new_stream(sc->avfc, NULL);\n\n  sc->audio_avc = avcodec_find_encoder_by_name(sp.audio_codec);\n  if (!sc->audio_avc) {logging(\"could not find the proper codec\"); return -1;}\n\n  sc->audio_avcc = avcodec_alloc_context3(sc->audio_avc);\n  if (!sc->audio_avcc) {logging(\"could not allocated memory for codec context\"); return -1;}\n\n  int OUTPUT_CHANNELS = 2;\n  int OUTPUT_BIT_RATE = 196000;\n  sc->audio_avcc->channels       = OUTPUT_CHANNELS;\n  sc->audio_avcc->channel_layout = av_get_default_channel_layout(OUTPUT_CHANNELS);\n  sc->audio_avcc->sample_rate    = sample_rate;\n  sc->audio_avcc->sample_fmt     = sc->audio_avc->sample_fmts[0];\n  sc->audio_avcc->bit_rate       = OUTPUT_BIT_RATE;\n  sc->audio_avcc->time_base      = (AVRational){1, sample_rate};\n\n  sc->audio_avcc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;\n\n  sc->audio_avs->time_base = sc->audio_avcc->time_base;\n\n  if (avcodec_open2(sc->audio_avcc, sc->audio_avc, NULL) < 0) {logging(\"could not open the codec\"); return -1;}\n  avcodec_parameters_from_context(sc->audio_avs->codecpar, sc->audio_avcc);\n  return 0;\n}\n\nint prepare_copy(AVFormatContext *avfc, AVStream **avs, AVCodecParameters *decoder_par) {\n  *avs = avformat_new_stream(avfc, NULL);\n  avcodec_parameters_copy((*avs)->codecpar, decoder_par);\n  return 0;\n}\n\nint remux(AVPacket **pkt, AVFormatContext **avfc, AVRational decoder_tb, AVRational encoder_tb) {\n  av_packet_rescale_ts(*pkt, decoder_tb, encoder_tb);\n  if (av_interleaved_write_frame(*avfc, *pkt) < 0) { logging(\"error while copying stream packet\"); return -1; }\n  return 0;\n}\n\nint encode_video(StreamingContext *decoder, StreamingContext *encoder, AVFrame *input_frame) {\n  if (input_frame) input_frame->pict_type = AV_PICTURE_TYPE_NONE;\n\n  AVPacket *output_packet = av_packet_alloc();\n  if (!output_packet) {logging(\"could not allocate memory for output packet\"); return -1;}\n\n  int response = avcodec_send_frame(encoder->video_avcc, input_frame);\n\n  while (response >= 0) {\n    response = avcodec_receive_packet(encoder->video_avcc, output_packet);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      logging(\"Error while receiving packet from encoder: %s\", av_err2str(response));\n      return -1;\n    }\n\n    output_packet->stream_index = decoder->video_index;\n    output_packet->duration = encoder->video_avs->time_base.den / encoder->video_avs->time_base.num / decoder->video_avs->avg_frame_rate.num * decoder->video_avs->avg_frame_rate.den;\n\n    av_packet_rescale_ts(output_packet, decoder->video_avs->time_base, encoder->video_avs->time_base);\n    response = av_interleaved_write_frame(encoder->avfc, output_packet);\n    if (response != 0) { logging(\"Error %d while receiving packet from decoder: %s\", response, av_err2str(response)); return -1;}\n  }\n  av_packet_unref(output_packet);\n  av_packet_free(&output_packet);\n  return 0;\n}\n\nint encode_audio(StreamingContext *decoder, StreamingContext *encoder, AVFrame *input_frame) {\n  AVPacket *output_packet = av_packet_alloc();\n  if (!output_packet) {logging(\"could not allocate memory for output packet\"); return -1;}\n\n  int response = avcodec_send_frame(encoder->audio_avcc, input_frame);\n\n  while (response >= 0) {\n    response = avcodec_receive_packet(encoder->audio_avcc, output_packet);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      logging(\"Error while receiving packet from encoder: %s\", av_err2str(response));\n      return -1;\n    }\n\n    output_packet->stream_index = decoder->audio_index;\n\n    av_packet_rescale_ts(output_packet, decoder->audio_avs->time_base, encoder->audio_avs->time_base);\n    response = av_interleaved_write_frame(encoder->avfc, output_packet);\n    if (response != 0) { logging(\"Error %d while receiving packet from decoder: %s\", response, av_err2str(response)); return -1;}\n  }\n  av_packet_unref(output_packet);\n  av_packet_free(&output_packet);\n  return 0;\n}\n\nint transcode_audio(StreamingContext *decoder, StreamingContext *encoder, AVPacket *input_packet, AVFrame *input_frame) {\n  int response = avcodec_send_packet(decoder->audio_avcc, input_packet);\n  if (response < 0) {logging(\"Error while sending packet to decoder: %s\", av_err2str(response)); return response;}\n\n  while (response >= 0) {\n    response = avcodec_receive_frame(decoder->audio_avcc, input_frame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      logging(\"Error while receiving frame from decoder: %s\", av_err2str(response));\n      return response;\n    }\n\n    if (response >= 0) {\n      if (encode_audio(decoder, encoder, input_frame)) return -1;\n    }\n    av_frame_unref(input_frame);\n  }\n  return 0;\n}\n\nint transcode_video(StreamingContext *decoder, StreamingContext *encoder, AVPacket *input_packet, AVFrame *input_frame) {\n  int response = avcodec_send_packet(decoder->video_avcc, input_packet);\n  if (response < 0) {logging(\"Error while sending packet to decoder: %s\", av_err2str(response)); return response;}\n\n  while (response >= 0) {\n    response = avcodec_receive_frame(decoder->video_avcc, input_frame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      logging(\"Error while receiving frame from decoder: %s\", av_err2str(response));\n      return response;\n    }\n\n    if (response >= 0) {\n      if (encode_video(decoder, encoder, input_frame)) return -1;\n    }\n    av_frame_unref(input_frame);\n  }\n  return 0;\n}\n\nint main(int argc, char *argv[])\n{\n  /*\n   * H264 -> H265\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx265\";\n  sp.codec_priv_key = \"x265-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 1;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libx264\";\n  //sp.codec_priv_key = \"x264-params\";\n  //sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - fragmented MP4\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 1;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libx264\";\n  //sp.codec_priv_key = \"x264-params\";\n  //sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  //sp.muxer_opt_key = \"movflags\";\n  //sp.muxer_opt_value = \"frag_keyframe+empty_moov+delay_moov+default_base_moof\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> AAC\n   * MP4 - MPEG-TS\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 0;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libx264\";\n  //sp.codec_priv_key = \"x264-params\";\n  //sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  //sp.audio_codec = \"aac\";\n  //sp.output_extension = \".ts\";\n\n  /*\n   * H264 -> VP9\n   * Audio -> Vorbis\n   * MP4 - WebM\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 0;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libvpx-vp9\";\n  //sp.audio_codec = \"libvorbis\";\n  //sp.output_extension = \".webm\";\n\n  StreamingContext *decoder = (StreamingContext*) calloc(1, sizeof(StreamingContext));\n  decoder->filename = argv[1];\n\n  StreamingContext *encoder = (StreamingContext*) calloc(1, sizeof(StreamingContext));\n  encoder->filename = argv[2];\n\n  if (sp.output_extension)\n    strcat(encoder->filename, sp.output_extension);\n\n  if (open_media(decoder->filename, &decoder->avfc)) return -1;\n  if (prepare_decoder(decoder)) return -1;\n\n  avformat_alloc_output_context2(&encoder->avfc, NULL, NULL, encoder->filename);\n  if (!encoder->avfc) {logging(\"could not allocate memory for output format\");return -1;}\n\n  for (int i = 0; i < decoder->avfc->nb_streams; i++) {\n    if (decoder->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) \n    {\n      if (!sp.copy_video) {\n        AVRational input_framerate = av_guess_frame_rate(decoder->avfc, decoder->video_avs, NULL);\n        prepare_video_encoder(encoder, decoder->video_avcc, input_framerate, sp);\n      } else {\n        if (prepare_copy(encoder->avfc, &encoder->video_avs, decoder->video_avs->codecpar)) {return -1;}\n      }\n    }\n\n    if (decoder->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)\n    {\n      if (!sp.copy_audio) {\n        if (prepare_audio_encoder(encoder, decoder->audio_avcc->sample_rate, sp)) {return -1;}\n      } else {\n        if (prepare_copy(encoder->avfc, &encoder->audio_avs, decoder->audio_avs->codecpar)) {return -1;}\n      }\n    } \n  }\n\n  if (encoder->avfc->oformat->flags & AVFMT_GLOBALHEADER)\n    encoder->avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;\n\n  if (!(encoder->avfc->oformat->flags & AVFMT_NOFILE)) {\n    if (avio_open(&encoder->avfc->pb, encoder->filename, AVIO_FLAG_WRITE) < 0) {\n      logging(\"could not open the output file\");\n      return -1;\n    }\n  }\n\n  AVDictionary* muxer_opts = NULL;\n\n  if (sp.muxer_opt_key && sp.muxer_opt_value) {\n    av_dict_set(&muxer_opts, sp.muxer_opt_key, sp.muxer_opt_value, 0);\n  }\n\n  if (avformat_write_header(encoder->avfc, &muxer_opts) < 0) {logging(\"an error occurred when opening output file\"); return -1;}\n\n  AVFrame *input_frame = av_frame_alloc();\n  if (!input_frame) {logging(\"failed to allocated memory for AVFrame\"); return -1;}\n\n  AVPacket *input_packet = av_packet_alloc();\n  if (!input_packet) {logging(\"failed to allocated memory for AVPacket\"); return -1;}\n\n  while (av_read_frame(decoder->avfc, input_packet) >= 0)\n  {\n    if (decoder->avfc->streams[input_packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {\n      if (!sp.copy_video) {\n        // TODO: refactor to be generic for audio and video (receiving a function pointer to the differences)\n        if (transcode_video(decoder, encoder, input_packet, input_frame)) return -1;\n        av_packet_unref(input_packet);\n      } else {\n        if (remux(&input_packet, &encoder->avfc, decoder->video_avs->time_base, encoder->video_avs->time_base)) return -1;\n      }\n    } else if (decoder->avfc->streams[input_packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)  {\n      if (!sp.copy_audio) {\n        if (transcode_audio(decoder, encoder, input_packet, input_frame)) return -1;\n        av_packet_unref(input_packet);\n      } else {\n        if (remux(&input_packet, &encoder->avfc, decoder->audio_avs->time_base, encoder->audio_avs->time_base)) return -1;\n      }\n    } else {\n      logging(\"ignoring all non video or audio packets\");\n    }\n  }\n\n  if (!sp.copy_video)\n    if (encode_video(decoder, encoder, NULL)) return -1;\n  if (!sp.copy_audio)\n    if (encode_audio(decoder, encoder, NULL)) return -1;\n\n  av_write_trailer(encoder->avfc);\n\n  if (muxer_opts != NULL) {\n    av_dict_free(&muxer_opts);\n    muxer_opts = NULL;\n  }\n\n  if (input_frame != NULL) {\n    av_frame_free(&input_frame);\n    input_frame = NULL;\n  }\n\n  if (input_packet != NULL) {\n    av_packet_free(&input_packet);\n    input_packet = NULL;\n  }\n\n  avformat_close_input(&decoder->avfc);\n\n  avformat_free_context(decoder->avfc); decoder->avfc = NULL;\n  avformat_free_context(encoder->avfc); encoder->avfc = NULL;\n\n  avcodec_free_context(&decoder->video_avcc); decoder->video_avcc = NULL;\n  avcodec_free_context(&decoder->audio_avcc); decoder->audio_avcc = NULL;\n\n  free(decoder); decoder = NULL;\n  free(encoder); encoder = NULL;\n  return 0;\n}\n\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.17)\nproject(libav_tutorial)\n\n# set out directory\nset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)\nset(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)\nset(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)\n\n# set ffmpeg root directory\nif(NOT FFMPEG_DEV_ROOT)\n    message(FATAL_ERROR \"set FFMPEG_DEV_ROOT to use ffmpeg libraries\")\nendif()\n\n# set ffmpeg develop environment\ninclude_directories(${FFMPEG_DEV_ROOT}/include)\nlink_directories(${FFMPEG_DEV_ROOT}/lib)\nlink_libraries(\n    avcodec\n    avformat\n    avfilter\n    avdevice\n    swresample\n    swscale\n    avutil\n)\n\n# copy dlls \nfile(GLOB ffmpeg_shared_libries ${FFMPEG_DEV_ROOT}/bin/*dll)\nfile(COPY ${ffmpeg_shared_libries} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})\n\n# copy test file\nfile(COPY ${CMAKE_CURRENT_SOURCE_DIR}/small_bunny_1080p_60fps.mp4 DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})\n\n\n# add library\nset(debug_src ${CMAKE_CURRENT_SOURCE_DIR}/video_debugging.c)\nadd_library(video_debug ${debug_src})\nlink_libraries(video_debug)\n\n# add project/executables\nfile(GLOB srcs *.c)\nlist(REMOVE_ITEM srcs ${debug_src})\nforeach(src  ${srcs})\n    get_filename_component(TARGET ${src} NAME)\n    add_executable(${TARGET} ${src})\n    message(STATUS \"${TARGET} added\")\nendforeach()\n\n\n"
  },
  {
    "path": "Dockerfile",
    "content": "# ffmpeg - http://ffmpeg.org/download.html\n#\n# From https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu\n#\n# https://hub.docker.com/r/jrottenberg/ffmpeg/\n#\n#\nFROM        ubuntu:20.04 AS base\n\nWORKDIR     /tmp/workdir\n\nRUN     apt-get -yqq update && \\\n        apt-get install -yq --no-install-recommends ca-certificates expat libgomp1 && \\\n        apt-get autoremove -y && \\\n        apt-get clean -y\n\nFROM base as build\n\nENV         FFMPEG_VERSION=4.4 \\\n            AOM_VERSION=v1.0.0 \\\n            FDKAAC_VERSION=0.1.5 \\\n            FONTCONFIG_VERSION=2.12.4 \\\n            FREETYPE_VERSION=2.10.4 \\\n            FRIBIDI_VERSION=0.19.7 \\\n            KVAZAAR_VERSION=2.0.0 \\\n            LAME_VERSION=3.100 \\\n            LIBASS_VERSION=0.13.7 \\\n            LIBPTHREAD_STUBS_VERSION=0.4 \\\n            LIBVIDSTAB_VERSION=1.1.0 \\\n            LIBXCB_VERSION=1.13.1 \\\n            XCBPROTO_VERSION=1.13 \\\n            OGG_VERSION=1.3.2 \\\n            OPENCOREAMR_VERSION=0.1.5 \\\n            OPUS_VERSION=1.2 \\\n            OPENJPEG_VERSION=2.1.2 \\\n            THEORA_VERSION=1.1.1 \\\n            VORBIS_VERSION=1.3.5 \\\n            VPX_VERSION=1.8.0 \\\n            WEBP_VERSION=1.0.2 \\\n            X264_VERSION=20170226-2245-stable \\\n            X265_VERSION=3.4 \\\n            XAU_VERSION=1.0.9 \\\n            XORG_MACROS_VERSION=1.19.2 \\\n            XPROTO_VERSION=7.0.31 \\\n            XVID_VERSION=1.3.4 \\\n            LIBXML2_VERSION=2.9.10 \\\n            LIBBLURAY_VERSION=1.1.2 \\\n            LIBZMQ_VERSION=4.3.2 \\\n            LIBSRT_VERSION=1.4.1 \\\n            LIBARIBB24_VERSION=1.0.3 \\\n            LIBPNG_VERSION=1.6.9 \\\n            LIBVMAF_VERSION=2.1.1 \\\n            SRC=/usr/local\n\nARG         FREETYPE_SHA256SUM=\"5eab795ebb23ac77001cfb68b7d4d50b5d6c7469247b0b01b2c953269f658dac freetype-2.10.4.tar.gz\"\nARG         FRIBIDI_SHA256SUM=\"3fc96fa9473bd31dcb5500bdf1aa78b337ba13eb8c301e7c28923fea982453a8 0.19.7.tar.gz\"\nARG         LIBASS_SHA256SUM=\"8fadf294bf701300d4605e6f1d92929304187fca4b8d8a47889315526adbafd7 0.13.7.tar.gz\"\nARG         LIBVIDSTAB_SHA256SUM=\"14d2a053e56edad4f397be0cb3ef8eb1ec3150404ce99a426c4eb641861dc0bb v1.1.0.tar.gz\"\nARG         OGG_SHA256SUM=\"e19ee34711d7af328cb26287f4137e70630e7261b17cbe3cd41011d73a654692 libogg-1.3.2.tar.gz\"\nARG         OPUS_SHA256SUM=\"77db45a87b51578fbc49555ef1b10926179861d854eb2613207dc79d9ec0a9a9 opus-1.2.tar.gz\"\nARG         THEORA_SHA256SUM=\"40952956c47811928d1e7922cda3bc1f427eb75680c3c37249c91e949054916b libtheora-1.1.1.tar.gz\"\nARG         VORBIS_SHA256SUM=\"6efbcecdd3e5dfbf090341b485da9d176eb250d893e3eb378c428a2db38301ce libvorbis-1.3.5.tar.gz\"\nARG         XVID_SHA256SUM=\"4e9fd62728885855bc5007fe1be58df42e5e274497591fec37249e1052ae316f xvidcore-1.3.4.tar.gz\"\nARG         LIBXML2_SHA256SUM=\"f07dab13bf42d2b8db80620cce7419b3b87827cc937c8bb20fe13b8571ee9501  libxml2-v2.9.10.tar.gz\"\nARG         LIBBLURAY_SHA256SUM=\"a3dd452239b100dc9da0d01b30e1692693e2a332a7d29917bf84bb10ea7c0b42 libbluray-1.1.2.tar.bz2\"\nARG         LIBZMQ_SHA256SUM=\"02ecc88466ae38cf2c8d79f09cfd2675ba299a439680b64ade733e26a349edeb v4.3.2.tar.gz\"\nARG         LIBARIBB24_SHA256SUM=\"f61560738926e57f9173510389634d8c06cabedfa857db4b28fb7704707ff128 v1.0.3.tar.gz\"\nARG         LIBVMAF_SHA256SUM=\"e7fc00ae1322a7eccfcf6d4f1cdf9c67eec8058709887c8c6c3795c617326f77 v2.1.1.tar.gz\"\n\n\nARG         LD_LIBRARY_PATH=/opt/ffmpeg/lib\nARG         MAKEFLAGS=\"-j2\"\nARG         PKG_CONFIG_PATH=\"/opt/ffmpeg/share/pkgconfig:/opt/ffmpeg/lib/pkgconfig:/opt/ffmpeg/lib64/pkgconfig\"\nARG         PREFIX=/opt/ffmpeg\nARG         LD_LIBRARY_PATH=\"/opt/ffmpeg/lib:/opt/ffmpeg/lib64\"\n\n\nARG DEBIAN_FRONTEND=noninteractive\n\nRUN      buildDeps=\"autoconf \\\n                    automake \\\n                    cmake \\\n                    curl \\\n                    bzip2 \\\n                    libexpat1-dev \\\n                    g++ \\\n                    gcc \\\n                    git \\\n                    gperf \\\n                    libtool \\\n                    make \\\n                    meson \\\n                    nasm \\\n                    perl \\\n                    pkg-config \\\n                    python \\\n                    libssl-dev \\\n                    yasm \\\n                    zlib1g-dev\" && \\\n        apt-get -yqq update && \\\n        apt-get install -yq --no-install-recommends ${buildDeps}\n## libvmaf https://github.com/Netflix/vmaf\nRUN \\\n        if which meson || false; then \\\n                echo \"Building VMAF.\" && \\\n                DIR=/tmp/vmaf && \\\n                mkdir -p ${DIR} && \\\n                cd ${DIR} && \\\n                curl -sLO https://github.com/Netflix/vmaf/archive/v${LIBVMAF_VERSION}.tar.gz && \\\n                tar -xz --strip-components=1 -f v${LIBVMAF_VERSION}.tar.gz && \\\n                cd /tmp/vmaf/libvmaf && \\\n                meson build --buildtype release --prefix=${PREFIX} && \\\n                ninja -vC build && \\\n                ninja -vC build install && \\\n                mkdir -p ${PREFIX}/share/model/ && \\\n                cp -r /tmp/vmaf/model/* ${PREFIX}/share/model/ && \\\n                rm -rf ${DIR}; \\\n        else \\\n                echo \"VMAF skipped.\"; \\\n        fi\n\n## opencore-amr https://sourceforge.net/projects/opencore-amr/\nRUN \\\n        DIR=/tmp/opencore-amr && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sL https://versaweb.dl.sourceforge.net/project/opencore-amr/opencore-amr/opencore-amr-${OPENCOREAMR_VERSION}.tar.gz | \\\n        tar -zx --strip-components=1 && \\\n        ./configure --prefix=\"${PREFIX}\" --enable-shared  && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n## x264 http://www.videolan.org/developers/x264.html\nRUN \\\n        DIR=/tmp/x264 && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sL https://download.videolan.org/pub/videolan/x264/snapshots/x264-snapshot-${X264_VERSION}.tar.bz2 | \\\n        tar -jx --strip-components=1 && \\\n        ./configure --prefix=\"${PREFIX}\" --enable-shared --enable-pic --disable-cli && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### x265 http://x265.org/\nRUN \\\n        DIR=/tmp/x265 && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sL https://github.com/videolan/x265/archive/refs/tags/${X265_VERSION}.tar.gz | \\\n        tar -zx && \\\n        cd x265-${X265_VERSION}/build/linux && \\\n        sed -i \"/-DEXTRA_LIB/ s/$/ -DCMAKE_INSTALL_PREFIX=\\${PREFIX}/\" multilib.sh && \\\n        sed -i \"/^cmake/ s/$/ -DENABLE_CLI=OFF/\" multilib.sh && \\\n        ./multilib.sh && \\\n        make -C 8bit install && \\\n        rm -rf ${DIR}\n### libogg https://www.xiph.org/ogg/\nRUN \\\n        DIR=/tmp/ogg && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO http://downloads.xiph.org/releases/ogg/libogg-${OGG_VERSION}.tar.gz && \\\n        echo ${OGG_SHA256SUM} | sha256sum --check && \\\n        tar -zx --strip-components=1 -f libogg-${OGG_VERSION}.tar.gz && \\\n        ./configure --prefix=\"${PREFIX}\" --enable-shared  && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### libopus https://www.opus-codec.org/\nRUN \\\n        DIR=/tmp/opus && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://archive.mozilla.org/pub/opus/opus-${OPUS_VERSION}.tar.gz && \\\n        echo ${OPUS_SHA256SUM} | sha256sum --check && \\\n        tar -zx --strip-components=1 -f opus-${OPUS_VERSION}.tar.gz && \\\n        autoreconf -fiv && \\\n        ./configure --prefix=\"${PREFIX}\" --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### libvorbis https://xiph.org/vorbis/\nRUN \\\n        DIR=/tmp/vorbis && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO http://downloads.xiph.org/releases/vorbis/libvorbis-${VORBIS_VERSION}.tar.gz && \\\n        echo ${VORBIS_SHA256SUM} | sha256sum --check && \\\n        tar -zx --strip-components=1 -f libvorbis-${VORBIS_VERSION}.tar.gz && \\\n        ./configure --prefix=\"${PREFIX}\" --with-ogg=\"${PREFIX}\" --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### libtheora http://www.theora.org/\nRUN \\\n        DIR=/tmp/theora && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO http://downloads.xiph.org/releases/theora/libtheora-${THEORA_VERSION}.tar.gz && \\\n        echo ${THEORA_SHA256SUM} | sha256sum --check && \\\n        tar -zx --strip-components=1 -f libtheora-${THEORA_VERSION}.tar.gz && \\\n        ./configure --prefix=\"${PREFIX}\" --with-ogg=\"${PREFIX}\" --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### libvpx https://www.webmproject.org/code/\nRUN \\\n        DIR=/tmp/vpx && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sL https://codeload.github.com/webmproject/libvpx/tar.gz/v${VPX_VERSION} | \\\n        tar -zx --strip-components=1 && \\\n        ./configure --prefix=\"${PREFIX}\" --enable-vp8 --enable-vp9 --enable-vp9-highbitdepth --enable-pic --enable-shared \\\n        --disable-debug --disable-examples --disable-docs --disable-install-bins  && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### libwebp https://developers.google.com/speed/webp/\nRUN \\\n        DIR=/tmp/vebp && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sL https://storage.googleapis.com/downloads.webmproject.org/releases/webp/libwebp-${WEBP_VERSION}.tar.gz | \\\n        tar -zx --strip-components=1 && \\\n        ./configure --prefix=\"${PREFIX}\" --enable-shared  && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### libmp3lame http://lame.sourceforge.net/\nRUN \\\n        DIR=/tmp/lame && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sL https://versaweb.dl.sourceforge.net/project/lame/lame/$(echo ${LAME_VERSION} | sed -e 's/[^0-9]*\\([0-9]*\\)[.]\\([0-9]*\\)[.]\\([0-9]*\\)\\([0-9A-Za-z-]*\\)/\\1.\\2/')/lame-${LAME_VERSION}.tar.gz | \\\n        tar -zx --strip-components=1 && \\\n        ./configure --prefix=\"${PREFIX}\" --bindir=\"${PREFIX}/bin\" --enable-shared --enable-nasm --disable-frontend && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### xvid https://www.xvid.com/\nRUN \\\n        DIR=/tmp/xvid && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://xvid.com/downloads/xvidcore-${XVID_VERSION}.tar.gz && \\\n        echo ${XVID_SHA256SUM} | sha256sum --check && \\\n        tar -zx -f xvidcore-${XVID_VERSION}.tar.gz && \\\n        cd xvidcore/build/generic && \\\n        ./configure --prefix=\"${PREFIX}\" --bindir=\"${PREFIX}/bin\" && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n### fdk-aac https://github.com/mstorsjo/fdk-aac\nRUN \\\n        DIR=/tmp/fdk-aac && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sL https://github.com/mstorsjo/fdk-aac/archive/v${FDKAAC_VERSION}.tar.gz | \\\n        tar -zx --strip-components=1 && \\\n        autoreconf -fiv && \\\n        ./configure --prefix=\"${PREFIX}\" --enable-shared --datadir=\"${DIR}\" && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n## openjpeg https://github.com/uclouvain/openjpeg\nRUN \\\n        DIR=/tmp/openjpeg && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sL https://github.com/uclouvain/openjpeg/archive/v${OPENJPEG_VERSION}.tar.gz | \\\n        tar -zx --strip-components=1 && \\\n        cmake -DBUILD_THIRDPARTY:BOOL=ON -DCMAKE_INSTALL_PREFIX=\"${PREFIX}\" . && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n## freetype https://www.freetype.org/\nRUN  \\\n        DIR=/tmp/freetype && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://download.savannah.gnu.org/releases/freetype/freetype-${FREETYPE_VERSION}.tar.gz && \\\n        echo ${FREETYPE_SHA256SUM} | sha256sum --check && \\\n        tar -zx --strip-components=1 -f freetype-${FREETYPE_VERSION}.tar.gz && \\\n        ./configure --prefix=\"${PREFIX}\" --disable-static --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n## libvstab https://github.com/georgmartius/vid.stab\nRUN  \\\n        DIR=/tmp/vid.stab && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://github.com/georgmartius/vid.stab/archive/v${LIBVIDSTAB_VERSION}.tar.gz && \\\n        echo ${LIBVIDSTAB_SHA256SUM} | sha256sum --check &&  \\\n        tar -zx --strip-components=1 -f v${LIBVIDSTAB_VERSION}.tar.gz && \\\n        cmake -DCMAKE_INSTALL_PREFIX=\"${PREFIX}\" . && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n## fridibi https://www.fribidi.org/\nRUN  \\\n        DIR=/tmp/fribidi && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://github.com/fribidi/fribidi/archive/${FRIBIDI_VERSION}.tar.gz && \\\n        echo ${FRIBIDI_SHA256SUM} | sha256sum --check && \\\n        tar -zx --strip-components=1 -f ${FRIBIDI_VERSION}.tar.gz && \\\n        sed -i 's/^SUBDIRS =.*/SUBDIRS=gen.tab charset lib bin/' Makefile.am && \\\n        ./bootstrap --no-config --auto && \\\n        ./configure --prefix=\"${PREFIX}\" --disable-static --enable-shared && \\\n        make -j1 && \\\n        make install && \\\n        rm -rf ${DIR}\n## fontconfig https://www.freedesktop.org/wiki/Software/fontconfig/\nRUN  \\\n        DIR=/tmp/fontconfig && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://www.freedesktop.org/software/fontconfig/release/fontconfig-${FONTCONFIG_VERSION}.tar.bz2 && \\\n        tar -jx --strip-components=1 -f fontconfig-${FONTCONFIG_VERSION}.tar.bz2 && \\\n        ./configure --prefix=\"${PREFIX}\" --disable-static --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n## libass https://github.com/libass/libass\nRUN  \\\n        DIR=/tmp/libass && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://github.com/libass/libass/archive/${LIBASS_VERSION}.tar.gz && \\\n        echo ${LIBASS_SHA256SUM} | sha256sum --check && \\\n        tar -zx --strip-components=1 -f ${LIBASS_VERSION}.tar.gz && \\\n        ./autogen.sh && \\\n        ./configure --prefix=\"${PREFIX}\" --disable-static --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n## kvazaar https://github.com/ultravideo/kvazaar\nRUN \\\n        DIR=/tmp/kvazaar && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://github.com/ultravideo/kvazaar/archive/v${KVAZAAR_VERSION}.tar.gz && \\\n        tar -zx --strip-components=1 -f v${KVAZAAR_VERSION}.tar.gz && \\\n        ./autogen.sh && \\\n        ./configure --prefix=\"${PREFIX}\" --disable-static --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\nRUN \\\n        DIR=/tmp/aom && \\\n        git clone --branch ${AOM_VERSION} --depth 1 https://aomedia.googlesource.com/aom ${DIR} ; \\\n        cd ${DIR} ; \\\n        rm -rf CMakeCache.txt CMakeFiles ; \\\n        mkdir -p ./aom_build ; \\\n        cd ./aom_build ; \\\n        cmake -DCMAKE_INSTALL_PREFIX=\"${PREFIX}\" -DBUILD_SHARED_LIBS=1 ..; \\\n        make ; \\\n        make install ; \\\n        rm -rf ${DIR}\n\n## libxcb (and supporting libraries) for screen capture https://xcb.freedesktop.org/\nRUN \\\n        DIR=/tmp/xorg-macros && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://www.x.org/archive//individual/util/util-macros-${XORG_MACROS_VERSION}.tar.gz && \\\n        tar -zx --strip-components=1 -f util-macros-${XORG_MACROS_VERSION}.tar.gz && \\\n        ./configure --srcdir=${DIR} --prefix=\"${PREFIX}\" && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\nRUN \\\n        DIR=/tmp/xproto && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://www.x.org/archive/individual/proto/xproto-${XPROTO_VERSION}.tar.gz && \\\n        tar -zx --strip-components=1 -f xproto-${XPROTO_VERSION}.tar.gz && \\\n        ./configure --srcdir=${DIR} --prefix=\"${PREFIX}\" && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\nRUN \\\n        DIR=/tmp/libXau && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://www.x.org/archive/individual/lib/libXau-${XAU_VERSION}.tar.gz && \\\n        tar -zx --strip-components=1 -f libXau-${XAU_VERSION}.tar.gz && \\\n        ./configure --srcdir=${DIR} --prefix=\"${PREFIX}\" && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\nRUN \\\n        DIR=/tmp/libpthread-stubs && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://xcb.freedesktop.org/dist/libpthread-stubs-${LIBPTHREAD_STUBS_VERSION}.tar.gz && \\\n        tar -zx --strip-components=1 -f libpthread-stubs-${LIBPTHREAD_STUBS_VERSION}.tar.gz && \\\n        ./configure --prefix=\"${PREFIX}\" && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\nRUN \\\n        DIR=/tmp/libxcb-proto && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://xcb.freedesktop.org/dist/xcb-proto-${XCBPROTO_VERSION}.tar.gz && \\\n        tar -zx --strip-components=1 -f xcb-proto-${XCBPROTO_VERSION}.tar.gz && \\\n        ACLOCAL_PATH=\"${PREFIX}/share/aclocal\" ./autogen.sh && \\\n        ./configure --prefix=\"${PREFIX}\" && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\nRUN \\\n        DIR=/tmp/libxcb && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://xcb.freedesktop.org/dist/libxcb-${LIBXCB_VERSION}.tar.gz && \\\n        tar -zx --strip-components=1 -f libxcb-${LIBXCB_VERSION}.tar.gz && \\\n        ACLOCAL_PATH=\"${PREFIX}/share/aclocal\" ./autogen.sh && \\\n        ./configure --prefix=\"${PREFIX}\" --disable-static --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\n## libxml2 - for libbluray\nRUN \\\n        DIR=/tmp/libxml2 && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://gitlab.gnome.org/GNOME/libxml2/-/archive/v${LIBXML2_VERSION}/libxml2-v${LIBXML2_VERSION}.tar.gz && \\\n        echo ${LIBXML2_SHA256SUM} | sha256sum --check && \\\n        tar -xz --strip-components=1 -f libxml2-v${LIBXML2_VERSION}.tar.gz && \\\n        ./autogen.sh --prefix=\"${PREFIX}\" --with-ftp=no --with-http=no --with-python=no && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\n## libbluray - Requires libxml, freetype, and fontconfig\nRUN \\\n        DIR=/tmp/libbluray && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://download.videolan.org/pub/videolan/libbluray/${LIBBLURAY_VERSION}/libbluray-${LIBBLURAY_VERSION}.tar.bz2 && \\\n        echo ${LIBBLURAY_SHA256SUM} | sha256sum --check && \\\n        tar -jx --strip-components=1 -f libbluray-${LIBBLURAY_VERSION}.tar.bz2 && \\\n        ./configure --prefix=\"${PREFIX}\" --disable-examples --disable-bdjava-jar --disable-static --enable-shared && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\n## libzmq https://github.com/zeromq/libzmq/\nRUN \\\n        DIR=/tmp/libzmq && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://github.com/zeromq/libzmq/archive/v${LIBZMQ_VERSION}.tar.gz && \\\n        echo ${LIBZMQ_SHA256SUM} | sha256sum --check && \\\n        tar -xz --strip-components=1 -f v${LIBZMQ_VERSION}.tar.gz && \\\n        ./autogen.sh && \\\n        ./configure --prefix=\"${PREFIX}\" && \\\n        make && \\\n        make check && \\\n        make install && \\\n        rm -rf ${DIR}\n\n## libsrt https://github.com/Haivision/srt\nRUN \\\n        DIR=/tmp/srt && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://github.com/Haivision/srt/archive/v${LIBSRT_VERSION}.tar.gz && \\\n        tar -xz --strip-components=1 -f v${LIBSRT_VERSION}.tar.gz && \\\n        cmake -DCMAKE_INSTALL_PREFIX=\"${PREFIX}\" . && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\n## libpng\nRUN \\\n        DIR=/tmp/png && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        git clone https://git.code.sf.net/p/libpng/code ${DIR} -b v${LIBPNG_VERSION} --depth 1 && \\\n        ./autogen.sh && \\\n        ./configure --prefix=\"${PREFIX}\" && \\\n        make check && \\\n        make install && \\\n        rm -rf ${DIR}\n\n## libaribb24\nRUN \\\n        DIR=/tmp/b24 && \\\n        mkdir -p ${DIR} && \\\n        cd ${DIR} && \\\n        curl -sLO https://github.com/nkoriyama/aribb24/archive/v${LIBARIBB24_VERSION}.tar.gz && \\\n        echo ${LIBARIBB24_SHA256SUM} | sha256sum --check && \\\n        tar -xz --strip-components=1 -f v${LIBARIBB24_VERSION}.tar.gz && \\\n        autoreconf -fiv && \\\n        ./configure CFLAGS=\"-I${PREFIX}/include -fPIC\" --prefix=\"${PREFIX}\" && \\\n        make && \\\n        make install && \\\n        rm -rf ${DIR}\n\n## ffmpeg https://ffmpeg.org/\nRUN  \\\n        DIR=/tmp/ffmpeg && mkdir -p ${DIR} && cd ${DIR} && \\\n        curl -sLO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.bz2 && \\\n        tar -jx --strip-components=1 -f ffmpeg-${FFMPEG_VERSION}.tar.bz2\n\n\n\nRUN \\\n        DIR=/tmp/ffmpeg && mkdir -p ${DIR} && cd ${DIR} && \\\n        ./configure \\\n        --disable-debug \\\n        --disable-doc \\\n        --disable-ffplay \\\n        --enable-shared \\\n        --enable-avresample \\\n        --enable-libopencore-amrnb \\\n        --enable-libopencore-amrwb \\\n        --enable-gpl \\\n        --enable-libass \\\n        --enable-fontconfig \\\n        --enable-libfreetype \\\n        --enable-libvidstab \\\n        --enable-libmp3lame \\\n        --enable-libopus \\\n        --enable-libtheora \\\n        --enable-libvorbis \\\n        --enable-libvpx \\\n        --enable-libwebp \\\n        --enable-libxcb \\\n        --enable-libx265 \\\n        --enable-libxvid \\\n        --enable-libx264 \\\n        --enable-nonfree \\\n        --enable-openssl \\\n        --enable-libfdk_aac \\\n        --enable-postproc \\\n        --enable-small \\\n        --enable-version3 \\\n        --enable-libbluray \\\n        --enable-libzmq \\\n        --extra-libs=-ldl \\\n        --prefix=\"${PREFIX}\" \\\n        --enable-libopenjpeg \\\n        --enable-libkvazaar \\\n        --enable-libaom \\\n        --extra-libs=-lpthread \\\n        --enable-libsrt \\\n        --enable-libaribb24 \\\n        --enable-libvmaf \\\n        --extra-cflags=\"-I${PREFIX}/include\" \\\n        --extra-ldflags=\"-L${PREFIX}/lib\" && \\\n        make && \\\n        make install && \\\n        make tools/zmqsend && cp tools/zmqsend ${PREFIX}/bin/ && \\\n        make distclean && \\\n        hash -r && \\\n        cd tools && \\\n        make qt-faststart && cp qt-faststart ${PREFIX}/bin/\n\n# Let's make sure the app built correctly\n# Convenient to verify on https://hub.docker.com/r/jrottenberg/ffmpeg/builds/ console output\n\nFROM        base AS release\nENV         LD_LIBRARY_PATH /opt/ffmpeg/lib:/usr/local/lib\nRUN     apt-get -yqq update && \\\n        apt-get install -yq --no-install-recommends build-essential && \\\n        apt-get autoremove -y && \\\n        apt-get clean -y\n\nCOPY --from=build /opt/ffmpeg /opt/ffmpeg\n"
  },
  {
    "path": "LICENSE",
    "content": "BSD 3-Clause License\n\nCopyright (c) 2017, Leandro Moreira\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n* Redistributions of source code must retain the above copyright notice, this\n  list of conditions and the following disclaimer.\n\n* Redistributions in binary form must reproduce the above copyright notice,\n  this list of conditions and the following disclaimer in the documentation\n  and/or other materials provided with the distribution.\n\n* Neither the name of the copyright holder nor the names of its\n  contributors may be used to endorse or promote products derived from\n  this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "Makefile",
    "content": "usage:\n\techo \"make fetch_small_bunny_video && make run_hello\"\n\nall: clean fetch_bbb_video make_hello run_hello make_remuxing run_remuxing_ts run_remuxing_fragmented_mp4 make_transcoding\n.PHONY: all\n\nclean:\n\t@rm -rf ./build/*\n\nfetch_small_bunny_video:\n\t./fetch_bbb_video.sh\n\nmake_hello: clean\n\tdocker run -i -w /files --rm  -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 \\\n\t  gcc -L/opt/ffmpeg/lib -I/opt/ffmpeg/include/ /files/0_hello_world.c \\\n\t  -lavcodec -lavformat -lavfilter -lavdevice -lswresample -lswscale -lavutil \\\n\t  -o /files/build/hello\n\nrun_hello: make_hello\n\tdocker run -i -w /files --rm  -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 /files/build/hello /files/small_bunny_1080p_60fps.mp4\n\nmake_remuxing: clean\n\tdocker run -i -w /files --rm  -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 \\\n\t  gcc -L/opt/ffmpeg/lib -I/opt/ffmpeg/include/ /files/2_remuxing.c \\\n\t  -lavcodec -lavformat -lavfilter -lavdevice -lswresample -lswscale -lavutil \\\n\t  -o /files/build/remuxing\n\nrun_remuxing_ts: make_remuxing\n\tdocker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 /files/build/remuxing /files/small_bunny_1080p_60fps.mp4 /files/remuxed_small_bunny_1080p_60fps.ts\n\nrun_remuxing_fragmented_mp4: make_remuxing\n\tdocker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 /files/build/remuxing /files/small_bunny_1080p_60fps.mp4 /files/fragmented_small_bunny_1080p_60fps.mp4 fragmented\n\nmake_transcoding: clean\n\tdocker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 \\\n\t  gcc -g -Wall -L/opt/ffmpeg/lib -I/opt/ffmpeg/include/ /files/3_transcoding.c /files/video_debugging.c \\\n\t  -lavcodec -lavformat -lavfilter -lavdevice -lswresample -lswscale -lavutil \\\n\t  -o /files/build/3_transcoding\n\nrun_transcoding: make_transcoding\n\tdocker run -i -w /files --rm -v `pwd`:/files leandromoreira/ffmpeg-devel:4.4 ./build/3_transcoding /files/small_bunny_1080p_60fps.mp4 /files/bunny_1s_gop.mp4\n"
  },
  {
    "path": "README-cn.md",
    "content": "[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)\n\n起初我在寻找可以学习使用FFmpeg库(又名 libav)的教程或书籍，然后找到了名为[\"如何在1k行代码内实现视频播放器\"](http://dranger.com/ffmpeg/)的指南。但该项目已经停止维护，因此我决定撰写此教程。\n\n此项目主要使用C语言开发，**但请不用担心**：项目内容非常通俗易懂。FFmpeg libav具有许多其他语言的实现，例如[python](https://pyav.org/)，[go](https://github.com/imkira/go-libav)。即使其中没有你熟悉的编程语言，仍然可以通过  `ffi` 为它提供支持（这是一个 [Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua) 的示例）。\n\n下文将会简单介绍什么是视频、音频、编解码和容器，然后我们将尝试使用 FFmpeg 命令行工具，最终使用代码实现一些功能。如果你拥有一些经验，可以随时跳过这些内容，直接阅读 [笨办法学 FFmpeg libav](#笨办法学-FFmpeg-libav) 章节。\n\n许多人认为网络视频流媒体是传统 TV 的未来。无论如何，FFmpeg 值得我们深入学习。\n\n__目录__\n\n* [介绍](#介绍)\n  * [视频 - 目光所见](#视频---目光所见)\n  * [音频 - 耳朵所听](#音频---耳朵所听)\n  * [编解码 - 压缩数据](#编解码---压缩数据)\n  * [容器 - 整合音频和视频](#容器---整合音视频)\n* [FFmpeg - 命令行](#FFmpeg---命令行)\n  * [FFmpeg 命令行工具 101](#FFmpeg-命令行工具-101)\n* [通用视频操作](#通用视频操作)\n  * [转码](#转码)\n  * [转封装](#转封装)\n  * [转码率](#转码率)\n  * [转分辨率](#转分辨率)\n  * [自适应流](#自适应流)\n  * [更多](#更多)\n* [笨办法学 FFmpeg libav](#笨办法学-FFmpeg-libav)\n  * [章节0 - 臭名昭著的 hello world](#章节0---臭名昭著的-hello-world)\n    * [FFmpeg libav 架构](#FFmpeg-libav-架构)\n  * [章节1 - 音视频同步](#章节-1---音视频同步)\n  * [章节2 - 重新封装](#章节-2---重新封装)\n  * [章节3 - 转码](#章节-3---转码)\n\n# 介绍\n\n## 视频 - 目光所见\n\n如果以一定的频率播放一组图片([比如每秒24张图片](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/))，人将会产生[视觉暂留现象](https://en.wikipedia.org/wiki/Persistence_of_vision)。\n概括来讲，视频的本质就是: **以给定频率播放的一系列图片/帧**.\n\n<img src=\"https://upload.wikimedia.org/wikipedia/commons/1/1f/Linnet_kineograph_1886.jpg\" title=\"flip book\" height=\"280\"></img>\n\n当代插画 (1886)\n\n## 音频 - 耳朵所听\n\n尽管一个没有声音的视频也可以表达很多感受和情绪，但加入音频会带来更多的体验乐趣。\n\n声音是指以压力波形式通过空气或其他介质（例如气体、液体或者固体）传播的振动。\n\n> 在数字音频系统中，麦克风将声音转换为模拟电信号，然后通常使用脉冲编码调制（[PCM](https://en.wikipedia.org/wiki/Pulse-code_modulation)）的模数转换器（ADC）将模拟信号转换为数字信号。\n\n![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png \"audio analog to digital\")\n\n>[图片来源](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg)\n\n## 编解码 - 压缩数据\n\n> CODEC是用于压缩或解压缩数字音频/视频的硬件或软件。 它提供将原始（未压缩的）数字音频/视频与压缩格式相互转换的能力。\n>\n> https://en.wikipedia.org/wiki/Video_codec\n\n如果我们选择打包数百万张图片来生成一个视频文件，那么该文件的大小将会非常惊人。让我们来计算一下：\n\n假如我们创建一个 `1080x1920` (高x宽)的视频，每个像素占用 `3 bytes` 对颜色进行编码(或使用 [24 bit](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29) 真色彩, 这可以提供 16,777,216 种不同的颜色)，每秒 24 帧，视频时长为 30 分钟。\n\n```c\ntoppf = 1080 * 1920 // 每帧所有的像素点\ncpp = 3 // 每个像素的大小(bytes)\ntis = 30 * 60 // 时长(秒)\nfps = 24 // 每秒帧数\n\nrequired_storage = tis * fps * toppf * cpp\n```\n\n计算结果显示，此视频需要大约 `250.28G` 的存储空间或 `1.19Gbps` 的带宽。这就是我们为什么需要使用 [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work) 的原因。\n\n## 容器 - 整合音视频\n\n> 容器或者封装格式描述了不同的数据元素和元数据是如何在计算机文件中共存的。\n> https://en.wikipedia.org/wiki/Digital_container_format\n\n**单个这样的文件包含所有的流**（主要是音频和视频），并提供**同步和通用元数据**，比如标题、分辨率等等。\n\n一般我们可以通过文件的后缀来判断文件格式：比如 video.webm 通常是一个使用 [`webm`](https://www.webmproject.org/) 容器格式的视频。\n\n![container](/img/container.png)\n\n# FFmpeg - 命令行\n\n> 这是一个完整的跨平台解决方案，可用于音视频的录制、转换和流式传输等。\n\n我们使用非常优秀的工具/库 [FFmpeg](https://www.ffmpeg.org/) 来处理多媒体文件。你可能对它有些了解，也可能已经直接或者间接的在使用它了（你用过 [Chrome](https://www.chromium.org/developers/design-documents/video) 吗？）\n\n`ffmpeg` 是该方案中简单而强大的命令行工具。例如，可以通过以下命令将一个 `mp4` 文件转换成 `avi` 格式：\n\n```bash\n$ ffmpeg -i input.mp4 output.avi\n```\n\n通过上述步骤，我们做了一次重新封装，从一个容器转换为另外一个容器。FFmpeg 也可以用于转码，我们稍后再针对它进行讨论。\n\n## **FFmpeg 命令行工具 101**\n\nFFmpeg 有一个非常完善的[文档](https://www.ffmpeg.org/ffmpeg.html)来介绍它是如何工作的。\n\n简单来说，FFmpeg 命令行程序需要以下参数格式来执行操作： `ffmpeg {1} {2} -i {3} {4} {5}`，分别是:\n\n1. 全局参数\n2. 输入文件参数\n3. 输入文件\n4. 输出文件参数\n5. 输出文件\n\n选项 2、3、4、5 可以可以根据自己的需求进行添加。以下是一个易于理解的示例：\n\n``` bash\n# 警告：这个文件大约 300MB\n$ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4\n\n$ ffmpeg \\\n-y \\ # 全局参数\n-c:a libfdk_aac \\ # 输入文件参数\n-i bunny_1080p_60fps.mp4 \\ # 输入文件\n-c:v libvpx-vp9 -c:a libvorbis \\ # 输出文件参数\nbunny_1080p_60fps_vp9.webm # 输出文件\n```\n\n这个命令行作用是将一个 `mp4` 文件（包含了 `aac` 格式的音频流，`h264` 编码格式的视频流）转换为 `webm`，同时改变了音视频的编码格式。\n\n我们可以简化上述命令行，但请注意 FFmpeg 会猜测或采用默认值。例如我们仅输入 `ffmpeg -i input.avi output.mp4` 时，FFmpeg 会使用哪种音频/视频编码来生成 `output.mp4` 呢？\n\nWerner Robitza 写了一篇 [关于 ffmpeg 编码和编辑的教程](https://slhck.info/ffmpeg-encoding-course/#/)。\n\n# 通用视频操作\n\n在处理音频/视频时，我们通常会执行一系列操作。\n\n## 转码\n\n![transcoding](/img/transcoding.png)\n\n**是什么?** 将其中一个流（视频流或音频流）从一种编码格式转换成另一种\n\n**为什么?** 有时候有些设备（TV，智能手机等等）不支持 X ，但是支持 Y 和一些更新的编码方式，这些方式能提供更好的压缩比\n\n**如何做?** 转换 `H264`（AVC）视频为 `H265`（HEVC）\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c:v libx265 \\\nbunny_1080p_60fps_h265.mp4\n```\n\n## 转封装\n\n![transmuxing](/img/transmuxing.png)\n\n**是什么?** 将视频/音频从某一种格式（容器）转换成另一种\n\n**为什么?** 有时候有些设备（TV，智能手机等等）不支持 X ，但是支持 Y 和一些新的容器，这些格式提供了更现代的功能/特征\n\n**如何做?** 转换一个 `mp4` 为 `ts`\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c copy \\ # 令 ffmpeg 跳过编解码过程\nbunny_1080p_60fps.ts\n```\n\n## 转码率\n\n![transrating](/img/transrating.png)\n\n**是什么?** 改变码率或生成其他版本。\n\n**为什么?** 有的人使用用较为落后的智能手机通过 `2G` (edge) 的网络连接来观看视频，有些人使用 4K 电视及光纤网络来观看视频，因此我们需要提供不同的码率的视频来满足不同的需求。\n\n**如何做?** 生成视频码率在 3856k 和 2000K 之间的版本。\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-minrate 964K -maxrate 3856K -bufsize 2000K \\\nbunny_1080p_60fps_transrating_964_3856.mp4\n```\n\n我们通常会同时使用改变码率和分辨率的操作。Werner Robitza 写了另一篇关于 [FFmpeg 码率控制](https://slhck.info/posts/) 的必知必会系列文章。\n\n## 转分辨率\n\n![transsizing](/img/transsizing.png)\n\n**是什么?** 将视频从一种分辨率转为其他分辨率的操作。正如上文所述，改变分辨率的操作通常与改变码率的操作同时使用。\n\n**为什么?** 原因与转码率相同，需要满足不同情况下的不同需求。\n\n**如何做?** 将视频从 `1080p` 转换为  `480p` \n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-vf scale=480:-1 \\\nbunny_1080p_60fps_transsizing_480.mp4\n```\n\n## 自适应流\n\n![adaptive streaming](/img/adaptive-streaming.png)\n\n**是什么?** 生成很多不同分辨率/码率的视频并分块，通过http进行传输。\n\n**为什么?** 为了在不同的终端和网络环境下提供更加灵活的观看体验，比如低端智能手机或者4K电视。这也使得扩展和部署更为简单方便，但是会增加延迟。\n\n**如何做?** 用 DASH 创建一个自适应的 WebM。\n\n```bash\n# 视频流\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm\n\n# 音频流\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm\n\n# DASH 格式\n$ ffmpeg \\\n -f webm_dash_manifest -i video_160x90_250k.webm \\\n -f webm_dash_manifest -i video_320x180_500k.webm \\\n -f webm_dash_manifest -i video_640x360_750k.webm \\\n -f webm_dash_manifest -i video_640x360_1000k.webm \\\n -f webm_dash_manifest -i video_1280x720_500k.webm \\\n -f webm_dash_manifest -i audio_128k.webm \\\n -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \\\n -f webm_dash_manifest \\\n -adaptation_sets \"id=0,streams=0,1,2,3,4 id=1,streams=5\" \\\n manifest.mpd\n```\n\nPS: 该样例借鉴自 [使用 DASH 播放自适应 WebM](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash)\n\n## 更多\n\nFFmpeg 还有很多[其他用法](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly)。我会利用 FFmpeg 结合 iMovie 为 YouTube 编辑视频，你当然也可以更专业地使用它。\n\n# 笨办法学 FFmpeg libav\n\n> Don't you wonder sometimes 'bout sound and vision?\n> **David Robert Jones**\n\n既然 [FFmpeg](#ffmpeg---command-line) 作为命令行工具对多媒体文件进行基本处理这么有效，那么我们如何在自己的程序里使用它呢？\n\nFFmpeg 是由几个可以集成到程序里的[lib库](https://www.ffmpeg.org/doxygen/trunk/index.html)组成的。通常在安装FFmpeg时，会自动安装这些库。我们将这些库统一叫做 **FFmpeg libav**。\n\n> 这个标题是对 Zed Shaw 的[笨办法学XX](https://learncodethehardway.org/)系列丛书的致敬，特别是笨办法学C语言。\n\n## 章节0 - 臭名昭著的 hello world\n\n这里说的 hello world 实际上不是在终端里输出 “hello world” :tongue:，而是**输出视频信息**，例如：格式、时长、分辨率、音频轨道，最后我们将**解码一些帧，并保存为图片**。\n\n\n### FFmpeg libav 架构\n\n在我们开始之前，我们需要先了解一下**FFmpeg libav 架构**的工作流程和各个组件之间的工作方式。\n\n下面是一张视频解码的处理流程图：\n\n![ffmpeg libav architecture - decoding process](/img/decoding.png)\n\n首先，我们需要加载媒体文件到 [AVFormatContext](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) 组件（为便于理解，容器看作是文件格式即可）。这个过程并不是加载整个文件，它通常只是加载了文件头。\n\n我们加载**容器的头部信息**后，就可以访问媒体文件流（流可以认为是基本的音频和视频数据）。每个流在 [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html) 组件中可用。\n\n> 流是数据流的一个昵称\n\n假设我们的视频文件包含两个流：一个是 [AAC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) 音频流，一个是 [H264（AVC）](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC)视频流。我们可以从每一个流中提取出被称为数据包的数据片段（切片），这些数据包将被加载到 [AVPacket](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) 组件中。\n\n**数据包中的数据仍然是被编码的**（被压缩），为了解码这些数据，我们需要将这些数据给到 [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html)。\n\n`AVCodec` 将解码这些数据到 [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html)，最后我们将得到**解码后的帧**。注意，视频流和音频流共用此处理流程。\n\n### 构建要求\n\n由于有些人编译或者运行示例时会遇到许多[问题](https://github.com/leandromoreira/ffmpeg-libav-tutorial/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+compiling)，因此我们使用 `Docker` 来构建开发/运行环境。我们将使用一个 Big Buck Bunny 的视频来作为示例，如果你没有这个视频，运行 `make fetch_small_bunny_video` 来获取。\n\n### 章节 0 - 代码一览\n\n> 展示[代码](/0_hello_world.c)并执行。\n>\n> ```bash\n> $ make run_hello\n> ```\n\n我们将跳过一些细节，不过不用担心，[代码](https://github.com/leandromoreira/ffmpeg-libav-tutorial/blob/master/0_hello_world.c)都在Github上维护。\n\n我们首先为 [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) 分配内存，利用它可以获得相关格式（容器）的信息。\n\n```c\nAVFormatContext *pFormatContext = avformat_alloc_context();\n```\n\n我们将打开一个文件并读取文件的头信息，利用相关格式的简要信息填充 `AVFormatContext`（注意，编解码器通常不会被打开）。需要使用 [`avformat_open_input`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49) 函数，该函数需要 `AVFormatContext`、文件名和两个可选参数：[`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html)（如果为NULL，FFmpeg将猜测格式）、[`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html)（解封装参数）。\n\n```c\navformat_open_input(&pFormatContext, filename, NULL, NULL);\n```\n\n可以输出视频的格式和时长：\n\n```c\nprintf(\"Format %s, duration %lld us\", pFormatContext->iformat->long_name, pFormatContext->duration);\n```\n\n为了访问数据流，我们需要从媒体文件中读取数据。需要利用函数 [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb)完成此步骤。`pFormatContext->nb_streams` 将获取所有的流信息，并且通过  `pFormatContext->streams[i]` 获取到指定的 `i` 数据流（[`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html))。\n\n```c\navformat_find_stream_info(pFormatContext,  NULL);\n```\n\n可以使用循环来获取所有流数据：\n\n```c\nfor (int i = 0; i < pFormatContext->nb_streams; i++)\n{\n  //\n}\n```\n\n针对每个流维护一个对应的 [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html)，该结构体描述了被编码流的各种属性。\n\n```c\nAVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar;\n```\n\n通过codec id和 [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) 函数可以找到对应已经注册的解码器，返回 [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html) 指针，该组件能让我们知道如何编解码这个流。\n\n```c\nAVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);\n```\n\n现在可以输出一些编解码信息。\n\n```c\n// 用于视频和音频\nif (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {\n  printf(\"Video Codec: resolution %d x %d\", pLocalCodecParameters->width, pLocalCodecParameters->height);\n} else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {\n  printf(\"Audio Codec: %d channels, sample rate %d\", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate);\n}\n// 通用\nprintf(\"\\tCodec %s ID %d bit_rate %lld\", pLocalCodec->long_name, pLocalCodec->id, pCodecParameters->bit_rate);\n```\n\n利用刚刚获取的 `AVCodec` 为 [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html) 分配内存，它将维护解码/编码过程的上下文。 然后需要使用 [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16)和被编码流的参数(`AVCodecParameters`) 来填充 `AVCodecContext`。\n\n完成上下文填充后，使用 [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) 来打开解码器。\n\n```c\nAVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);\navcodec_parameters_to_context(pCodecContext, pCodecParameters);\navcodec_open2(pCodecContext, pCodec, NULL);\n```\n\n现在我们将从流中读取数据包并将它们解码为帧。但首先，需要为  [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) 和 [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) 分配内存。\n\n```c\nAVPacket *pPacket = av_packet_alloc();\nAVFrame *pFrame = av_frame_alloc();\n```\n\n使用函数 [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) 读取帧数据来填充数据包。\n\n```c\nwhile (av_read_frame(pFormatContext, pPacket) >= 0) {\n  //...\n}\n```\n\n使用函数 [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3) 来把**原始数据包**（未解压的帧）发送给解码器。\n\n```c\navcodec_send_packet(pCodecContext, pPacket);\n```\n\n使用函数 [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c) 从解码器接受原始数据帧（解压后的帧）。\n\n```c\navcodec_receive_frame(pCodecContext, pFrame);\n```\n\n可以输出 frame 编号、[PTS](https://en.wikipedia.org/wiki/Presentation_timestamp)、DTS、[frame 类型](https://en.wikipedia.org/wiki/Video_compression_picture_types)等其他信息。\n\n```c\nprintf(\n    \"Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]\",\n    av_get_picture_type_char(pFrame->pict_type),\n    pCodecContext->frame_number,\n    pFrame->pts,\n    pFrame->pkt_dts,\n    pFrame->key_frame,\n    pFrame->coded_picture_number,\n    pFrame->display_picture_number\n);\n```\n\n最后，我们可以将解码后的帧保存为[灰度图](https://en.wikipedia.org/wiki/Netpbm#PGM_example)。处理过程非常简单，使用 `pFrame->data`，它的索引与 [Y, Cb 和 Cr 分量](https://en.wikipedia.org/wiki/YCbCr) 相关联。我们只选择 `0`（Y 分量）数据保存灰度图。\n\n```c\nsave_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename);\n\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename)\n{\n    FILE *f;\n    int i;\n    f = fopen(filename,\"w\");\n    // 编写 pgm 格式所需的最小文件头\n    // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example\n    fprintf(f, \"P5\\n%d %d\\n%d\\n\", xsize, ysize, 255);\n\n    // 逐行写入\n    for (i = 0; i < ysize; i++)\n        fwrite(buf + i * wrap, 1, xsize, f);\n    fclose(f);\n}\n```\n\n现在将得到一张2MB大小的灰度图：\n\n![saved frame](/img/generated_frame.png)\n\n## 章节 1 - 音视频同步\n\n> **Be the player** - 一个年轻 JS 开发者开发的新 MSE 视频播放器。\n\n在我们学习 [重新封装](#章节-2---重新封装) 之前，我们来谈谈timing（时机/时间点），或者说播放器如何知道在正确的时间来播放每一帧。\n\n在上一个例子中，我们保存了一些帧：\n\n![frame 0](/img/hello_world_frames/frame0.png)\n![frame 1](/img/hello_world_frames/frame1.png)\n![frame 2](/img/hello_world_frames/frame2.png)\n![frame 3](/img/hello_world_frames/frame3.png)\n![frame 4](/img/hello_world_frames/frame4.png)\n![frame 5](/img/hello_world_frames/frame5.png)\n\n当我们在设计一个播放器的时候，需要**以给定的速度播放每一帧**。否则，我们很难获得好的体验，因为在观看的过程中很可能播放得太快或者太慢。\n\n因此我们需要引入一些机制来流畅地播放每一帧。每一帧都将拥有一个**播放时间戳**（PTS）。它是一个将**timebase**（时基，FFmpeg中一种特殊的时间度量单位，**timescale**可以认为是它的倒数）作为单位的递增数字。\n\n我们来模仿几个场景，通过以下示例可以更迅速地理解。\n\n例如 `fps=60/1` ， `timebase=1/60000`，PTS 将以 `timescale / fps = 1000` 进行递增，因此每一帧对应的 PTS 如下（假设开始为0）:\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033`\n\n相同情况下，将 timebase 修改为 `1/60`：\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033`\n* `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050`\n\n如 `fps=25`，`timebase=1/75`，PTS 将以 `timescale / fps = 3` 进行递增，因此每一帧对应的 PTS 如下（假设开始为0）：\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04`\n* `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08`\n* `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12`\n* ...\n* `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96`\n* ...\n* `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56`\n\n通过 `pts_time`， 我们可以找到一种渲染它和音频的 `pts_time` 或系统时钟进行同步的方式。FFmpeg libav 提供了获取这些信息的接口：\n\n- fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad)\n- tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad)\n- tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6)\n\n被保存的帧按照 DTS 顺序发送(frames：1,6,4,2,3,5)，按照 PTS 顺序播放(frames：1,2,3,4,5)。同时，我们可以发现B帧相对于P帧和I帧压缩率更高，更加节省空间。\n\n```\nLOG: AVStream->r_frame_rate 60/1\nLOG: AVStream->time_base 1/60000\n...\nLOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0]\nLOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3]\nLOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4]\nLOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2]\nLOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5]\nLOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1]\n```\n\n## 章节 2 - 重新封装\n\n重新封装是将文件从一种格式转换为另一种格式。例如：我们可以非常容易地利用 FFmpeg 将 [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) 格式的视频 转换成 [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream) 格式。\n\n```bash\nffmpeg input.mp4 -c copy output.ts\n```\n\n以上命令将在不编解码的情况下（`-c copy`）来对 mp4 做解封装，然重新后封装为 `mpegts` 文件。如果不用 `-f` 参数来指定格式的话，ffmpeg 会根据文件扩展名来进行猜测。\n\nFFmpeg 或 libav 的一般用法遵循以下模式/架构或工作流：\n\n* **[协议层](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** -  接收一个输入（例如一个文件，也可以是 `rtmp` 或 `http`）\n* **[格式层](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - 解封装数据内容，暴露出元数据和流信息\n* **[编码层](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - 解码原始数据流 <sup>*可选*</sup>\n* **[像素层](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - 可以对原始帧应用一些 `filters`（例如调整大小）<sup>*可选*</sup>\n* 然后反过来做相同的操作\n* **[编码层](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - 编码（重新编码或者转码）原始帧<sup>*可选*</sup>\n* **[格式层](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - 封装（或重新封装）原始数据流（压缩数据）\n* **[协议层](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - 将封装后数据输出 (另外的文件或远程服务器)\n\n![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg)\n\n> 这张图的灵感来自 [Leixiaohua's](https://leixiaohua1020.github.io/#ffmpeg-development-examples) 和 [Slhck's](https://slhck.info/ffmpeg-encoding-course/#/9) 的作品。\n\n现在我们将使用 libav 编写一个示例，完成与此命令行相同的效果:  `ffmpeg input.mp4 -c copy output.ts`\n\n我们读取一个输入文件（`input_format_context`)，并且将修改保存至输出（`output_format_context`)。\n\n```c\nAVFormatContext *input_format_context = NULL;\nAVFormatContext *output_format_context = NULL;\n```\n\n通常我们的做法是分配内存并打开输入文件。对于这个示例，我们将打开一个输入文件并为一个输出文件分配内存。\n\n```c\nif ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) {\n  fprintf(stderr, \"Could not open input file '%s'\", in_filename);\n  goto end;\n}\nif ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) {\n  fprintf(stderr, \"Failed to retrieve input stream information\");\n  goto end;\n}\n\navformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename);\nif (!output_format_context) {\n  fprintf(stderr, \"Could not create output context\\n\");\n  ret = AVERROR_UNKNOWN;\n  goto end;\n}\n```\n\n我们将重新封装视频、音频、字幕流，因此需要将用到的这些流存入一个数组中。\n\n```c\nnumber_of_streams = input_format_context->nb_streams;\nstreams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list));\n```\n\n分配完所需要的内存之后，我们将遍历所有的流，然后利用 [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) 为每一个流创建一个对应的输出流。注意，当前只需要针对视频、音频、字幕流进行处理。\n\n```c\nfor (i = 0; i < input_format_context->nb_streams; i++) {\n  AVStream *out_stream;\n  AVStream *in_stream = input_format_context->streams[i];\n  AVCodecParameters *in_codecpar = in_stream->codecpar;\n  if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {\n    streams_list[i] = -1;\n    continue;\n  }\n  streams_list[i] = stream_index++;\n  out_stream = avformat_new_stream(output_format_context, NULL);\n  if (!out_stream) {\n    fprintf(stderr, \"Failed allocating output stream\\n\");\n    ret = AVERROR_UNKNOWN;\n    goto end;\n  }\n  ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);\n  if (ret < 0) {\n    fprintf(stderr, \"Failed to copy codec parameters\\n\");\n    goto end;\n  }\n}\n```\n\n现在，我们需要创建一个输出文件。\n\n```c\nif (!(output_format_context->oformat->flags & AVFMT_NOFILE)) {\n  ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE);\n  if (ret < 0) {\n    fprintf(stderr, \"Could not open output file '%s'\", out_filename);\n    goto end;\n  }\n}\n\nret = avformat_write_header(output_format_context, NULL);\nif (ret < 0) {\n  fprintf(stderr, \"Error occurred when opening output file\\n\");\n  goto end;\n}\n```\n\n完成上述操作之后，我们就可以将输入流逐个数据包复制到输出流。我们通过（`av_read_frame`）循环读取每一个数据包。对于每一数据包，我们都要重新计算 PTS 和 DTS，最终通过 `av_interleaved_write_frame` 写入输出格式的上下文。\n\n```c\nwhile (1) {\n  AVStream *in_stream, *out_stream;\n  ret = av_read_frame(input_format_context, &packet);\n  if (ret < 0)\n    break;\n  in_stream  = input_format_context->streams[packet.stream_index];\n  if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) {\n    av_packet_unref(&packet);\n    continue;\n  }\n  packet.stream_index = streams_list[packet.stream_index];\n  out_stream = output_format_context->streams[packet.stream_index];\n  /* 赋值数据包 */\n  packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);\n  // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903\n  packet.pos = -1;\n\n  //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1\n  ret = av_interleaved_write_frame(output_format_context, &packet);\n  if (ret < 0) {\n    fprintf(stderr, \"Error muxing packet\\n\");\n    break;\n  }\n  av_packet_unref(&packet);\n}\n```\n\n最后我们要使用函数 [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13) 输出文件尾。\n\n```c\nav_write_trailer(output_format_context);\n```\n\n现在可以进行测试了，首先我们将文件从 MP4 转换成 MPEG-TS 格式。使用 libav 来代替命令行 `ffmpeg input.mp4 -c copy output.ts `的作用。\n\n```bash\nmake run_remuxing_ts\n```\n\n它起作用了！！！不相信吗？我们可以使用 ffprobe 来检测一下：\n\n```bash\nffprobe -i remuxed_small_bunny_1080p_60fps.ts\n\nInput #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts':\n  Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s\n  Program 1\n    Metadata:\n      service_name    : Service01\n      service_provider: FFmpeg\n    Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc\n    Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s\n```\n\n下图中总结了我们所做的工作，我们可以回顾一下之前关于[libav如何工作](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-libav-architecture)的介绍。但图中也表明我们跳过了编解码的部分。\n\n![remuxing libav components](/img/remuxing_libav_components.png)\n\n在结束本章之前，我想展示一下重新封装中的一个重要功能 — — **使用选项**。比如我们想要 [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding) 格式的文件，需要使用 [fragmented mp4](https://stackoverflow.com/a/35180327)（有时称为fmp4）而不是 MPEG-TS 或者普通的 MPEG-4。\n\n使用[命令行](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting)可以简单地实现该功能：\n\n```\nffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4\n```\n\n使用 libav 进行实现也非常简单，只需要在写入输出头时（复制数据包之前），传递相应选项即可。\n\n```c\nAVDictionary* opts = NULL;\nav_dict_set(&opts, \"movflags\", \"frag_keyframe+empty_moov+default_base_moof\", 0);\nret = avformat_write_header(output_format_context, &opts);\n```\n\n现在可以生成 fragmented mp4 文件：\n\n```bash\nmake run_remuxing_fragmented_mp4\n```\n\n可以使用非常优秀的 [gpac/mp4box.js](https://gpac.github.io/mp4box.js/)，或者在线工具 [http://mp4parser.com/](http://mp4parser.com/) 来对比差异。首先加载普通mp4：\n\n![mp4 boxes](/img/boxes_normal_mp4.png)\n\n如你所见，`mdat` atom/box 是**存放实际音视频帧数据**的地方。现在我们加载 fragmented mp4，看看它是如何组织 `mdat` 的。\n\n![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png)\n\n## 章节 3 - 转码\n\n> #### 展示代码并执行\n>\n> ```bash\n> $ make run_transcoding\n> ```\n>\n> 我们将跳过一些细节，但是请不用担心：[代码](https://github.com/leandromoreira/ffmpeg-libav-tutorial/blob/master/3_transcoding.c)维护在 github。\n\n在这一章，我们将用 C 写一个精简的转码器，使用 **FFmpeg/libav库**，特别是[libavcodec](https://ffmpeg.org/libavcodec.html)、libavformat 和 libavutil，将 H264 编码的视频转换为 H265。\n\n![media transcoding flow](/img/transcoding_flow.png)\n\n> 简单回顾：[**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html) 是多媒体文件格式的抽象（例如：MKV，MP4，Webm，TS）。 [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html) 代表给定格式的数据类型（例如：音频，视频，字幕，元数据）。 [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html) 是从 `AVStream` 获得的压缩数据的切片，可由 [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html)（例如av1，h264，vp9，hevc）解码，从而生成称为 [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html) 的原始数据。\n\n### 转封装\n\n我们将从简单的转封装操作开始，然后在此代码基础上进行构建，第一步需要**加载输入文件**。\n\n```c\n// 为 AVFormatContext 分配内存\navfc = avformat_alloc_context();\n// 打开一个输入流并读取头信息\navformat_open_input(avfc, in_filename, NULL, NULL);\n// 读取文件数据包以获取流信息\navformat_find_stream_info(avfc, NULL);\n```\n\n现在需要设置解码器，`AVFormatContext` 将使我们能够访问所有 `AVStream` 组件，获取它们的 `AVCodec` 并创建特定的 `AVCodecContext`，最后我们可以打开给定的编解码器进行解码。\n\n>  [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html) 保存相关媒体文件的数据包括：码率，帧率，采样率，通道、高度等等。\n\n```c\nfor (int i = 0; i < avfc->nb_streams; i++)\n{\n  AVStream *avs = avfc->streams[i];\n  AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id);\n  AVCodecContext *avcc = avcodec_alloc_context3(*avc);\n  avcodec_parameters_to_context(*avcc, avs->codecpar);\n  avcodec_open2(*avcc, *avc, NULL);\n}\n```\n\n现在我们需要准备输出文件，首先为 `AVFormatContext` **分配内存**。我们为为输出的格式创建**每一个流**。为了正确打包这些流，我们从解码器中**复制编解码参数**。\n\n通过设置 `AV_CODEC_FLAG_GLOBAL_HEADER` 来告诉编码器可以使用这个全局头信息，最终打开输出文件写入文件头。\n\n```c\navformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename);\n\nAVStream *avs = avformat_new_stream(encoder_avfc, NULL);\navcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar);\n\nif (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER)\n  encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;\n\navio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE);\navformat_write_header(encoder->avfc, &muxer_opts);\n\n```\n\n我们从解码器获得 `AVPacket`，调整时间戳后写到输出文件。尽管 `av_interleaved_write_frame` 从函数名上来看是 “写入帧信息”，但我们实际是在存储数据包。最后通过写入文件尾来结束转封装操作。\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base);\n  av_interleaved_write_frame(*avfc, input_packet) < 0));\n}\n\nav_write_trailer(encoder_avfc);\n```\n\n### 转码\n\n前面的章节展示了一个转封装的程序，现在我们将添加对文件做编码的功能，具体是将视频从 `h264` 编码转换为 `h265`。\n\n在我们设置解码器之后及准备输出文件之前，需要设置编码器。\n\n* 使用 [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) 和编码器创建 `AVStream`\n* 使用名为 `libx265` 的 `AVCodec`，利用 [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37) 获取\n* 利用 [`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315) 及编解码器创建 `AVCodecContext`\n* 为编解码设置基础属性\n* 打开编解码器，使用 [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) 和 [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe) 将参数从上下文复制到流中\n\n```c\nAVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL);\nAVStream *video_avs = avformat_new_stream(encoder_avfc, NULL);\n\nchar *codec_name = \"libx265\";\nchar *codec_priv_key = \"x265-params\";\n// 我们将对 x265 使用内置的参数\n// 禁用场景切换并且把 GOP 调整为 60 帧\nchar *codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\nAVCodec *video_avc = avcodec_find_encoder_by_name(codec_name);\nAVCodecContext *video_avcc = avcodec_alloc_context3(video_avc);\n// 编码参数\nav_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0);\nvideo_avcc->height = decoder_ctx->height;\nvideo_avcc->width = decoder_ctx->width;\nvideo_avcc->pix_fmt = video_avc->pix_fmts[0];\n// 控制码率\nvideo_avcc->bit_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_buffer_size = 4 * 1000 * 1000;\nvideo_avcc->rc_max_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_min_rate = 2.5 * 1000 * 1000;\n// 时间基数\nvideo_avcc->time_base = av_inv_q(input_framerate);\nvideo_avs->time_base = sc->video_avcc->time_base;\n\navcodec_open2(sc->video_avcc, sc->video_avc, NULL);\navcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc);\n```\n\n为了视频流转码，我们需要拓展解码的步骤：\n\n- 利用 [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3) 发送空的 `AVPacket` 给解码器\n- 利用 [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c) 接收未压缩的 `AVFrame`\n- 开始转码原始数据\n- 使用 [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169) 发送原始数据\n- 基于编解码器和 `AVPacket`，利用 [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decodinghtml#ga5b8eff59cf259747cf0b31563e38ded6) 接受编码数据\n- 设置时间戳，调用 [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e)\n- 写入输出文件 [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1)\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  int response = avcodec_send_packet(decoder_video_avcc, input_packet);\n  while (response >= 0) {\n    response = avcodec_receive_frame(decoder_video_avcc, input_frame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return response;\n    }\n    if (response >= 0) {\n      encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index);\n    }\n    av_frame_unref(input_frame);\n  }\n  av_packet_unref(input_packet);\n}\nav_write_trailer(encoder_avfc);\n\n// used function\nint encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) {\n  AVPacket *output_packet = av_packet_alloc();\n  int response = avcodec_send_frame(video_avcc, input_frame);\n\n  while (response >= 0) {\n    response = avcodec_receive_packet(video_avcc, output_packet);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return -1;\n    }\n\n    output_packet->stream_index = index;\n    output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den;\n\n    av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base);\n    response = av_interleaved_write_frame(avfc, output_packet);\n  }\n  av_packet_unref(output_packet);\n  av_packet_free(&output_packet);\n  return 0;\n}\n\n```\n\n我们将媒体流从 `h264` 编码转换为 `h265`，和预期的一样，`h265` 编码的文件相较于 h264 更小。本次[创建的程序](/3_transcoding.c)能够完成以下转换：\n\n```c\n  /*\n   * H264 -> H265\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx265\";\n  sp.codec_priv_key = \"x265-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - fragmented MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.muxer_opt_key = \"movflags\";\n  sp.muxer_opt_value = \"frag_keyframe+empty_moov+delay_moov+default_base_moof\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> AAC\n   * MP4 - MPEG-TS\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 0;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.audio_codec = \"aac\";\n  sp.output_extension = \".ts\";\n\n  /* WIP :P  -> it's not playing on VLC, the final bit rate is huge\n   * H264 -> VP9\n   * Audio -> Vorbis\n   * MP4 - WebM\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 0;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libvpx-vp9\";\n  //sp.audio_codec = \"libvorbis\";\n  //sp.output_extension = \".webm\";\n```\n\n> 老实说，完成这个教程[比我想象中的难](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54)，必须深入理解 [FFmpeg 命令行源码](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749)并进行大量测试。而且我想我肯定遗漏了一些细节，因为我必须强制执行 `force-cfr` 才能使 h264 正常工作，并且现在仍然会出现一些 warning 信息，例如 `warning messages (forced frame type (5) at 80 was changed to frame type (3))`。\n"
  },
  {
    "path": "README-es.md",
    "content": "[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)\n\n\n\n\nEstaba buscando un tutorial/libro que pudiera enseñarme como usar [FFmpeg](https://www.ffmpeg.org/) como una librería (alias libav) y encontré el tutorial de [\"How to write a video player in less than 1k lines\"](http://dranger.com/ffmpeg/). Desafortunadamente estaba obsoleto, así que decidí escribir el siguiente tutorial.\n\n\n\nLa mayoría del código aquí estará en C, **pero no te preocupes**: tu podrás entenderlo fácilmente y aplicarlo a tu lenguaje preferido. FFmpeg libav tiene montones de bindings para muchos lenguajes como [python](https://pyav.org/), [go](https://github.com/imkira/go-libav) e incluso si tu lenguaje no lo tiene, aún es posible darle soporte mediante `ffi` (aquí hay un ejemplo en [Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua)).\n\nEmpezaremos con una lección rápida de lo que es video, audio, códec y contenedor,  entonces iremos a un curso rápido en como usar el comando `FFmpeg` y finalmente, escribiremos algo de código, siéntete libre de saltar directamente[ ](http://newmediarockstars.com/wp-content/uploads/2015/11/nintendo-direct-iwata.jpg \"Secret Leandro´s Easter Egg\")a la sección [Aprender FFmpeg libav de la manera difícil.](#learn-ffmpeg-libav-the-hard-way) \n\nAlgunas personas solían decir que la transmisión de video por internet era el futuro de la televisión tradicional, en cualquier caso, FFmpeg es algo que vale la pena estudiar.\n\n__Tabla de Contenido__\n\n* [Intro](#intro)\n  * [video - ¡lo que ves!](#video---¡lo-que-ves!)\n  * [audio - ¡lo que escuchas!](#audio---¡lo-que-escuchas!)\n  * [códec - comprimiendo datos](#códec---comprimiendo-datos)\n  * [Contenedor - un lugar cómodo para audio y video](#Contenedor---un-lugar-cómodo-para-audio-y-video)\n* [FFmpeg - línea de comandos](#FFmpeg---línea-de-comandos)\n  * [FFmpeg herramienta de línea de comandos 101](#FFmpeg-herramienta-de-línea-de-comandos-101)\n* [Operaciones de video comunes](#Operaciones-de-video-comunes)\n  * [Transcoding](#transcoding)\n  * [Transmuxing](#transmuxing)\n  * [Transrating](#transrating)\n  * [Transsizing](#transsizing)\n  * [Round Bonus:  Transmisión adaptativa](#Round-Bonus-Transmisión-adaptativa)\n  * [Ve más allá](#Ve-más-allá)\n* [Aprende FFmpeg libav de la manera difícil](#Aprende-FFmpeg-libav-de-la-manera-difícil)\n  * [Capítulo 0 - El infame hola mundo](#Capítulo-0---El-infame-hola-mundo)\n    * [Arquitectura de FFmpeg libav](#Arquitectura-de-FFmpeg-libav)\n  * [Capítulo 1 - timing](#Capítulo-1---sincronizando-audio-y-video)\n  * [Capítulo 2 - remuxing](#Capítulo-2---remuxing)\n  * [Capítulo 3 - transcoding](#Capítulo-3---transcoding)\n\n# Intro\n\n## Video - ¡lo que ves!\n\nSi tu tienes una secuencia de imágenes en serie y las cambias a cierta frecuencia (digamos [24 imagenes por segundo](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)), crearías una [ilusion de movimiento](https://en.wikipedia.org/wiki/Persistence_of_vision).\nEn resumen, esta es una muy básica idea detrás de un video: **una serie de imágenes / cuadros, corriendo a una velocidad dada**.\n\n<img src=\"https://upload.wikimedia.org/wikipedia/commons/1/1f/Linnet_kineograph_1886.jpg\" title=\"flip book\" height=\"280\"></img>\n\n\nIlustración Zeitgenössische (1886)\n\n## Audio - ¡lo que escuchas!\n\nAunque un video mudo puede expresar una variedad de sentimientos, el agregarle sonido lo vuelve una experiencia mas placentera.\n\nEl sonido es la vibración que se propaga como una onda de presión, a través del aire o de cualquier otro medio de transmisión, como un gas, líquido o sólido.\n\n> En un sistema de audio digital, el micrófono convierte sonido a una señal eléctrica analógica, después un convertidor analógico-a-digital  (ADC) — típicamente se usa [pulse-code modulation (PCM)](https://en.wikipedia.org/wiki/Pulse-code_modulation)  - que convierte la señal analógica en una señal digital.\n\n![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png \"audio analogo a digital\")\n>[Fuente](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg)\n\n## Códec - comprimiendo datos\n\n> CODEC es un circuito electrónico o software que **comprime o descomprime audio/video digital.** \n\nConvierte audio/video digital en bruto (raw) a un formato comprimido o vice versa.\n\n> https://en.wikipedia.org/wiki/Video_codec\n\nPero si deseamos empaquetar millones de imágenes dentro de un solo archivo y generamos una película, entonces terminaríamos con un archivo enorme. Veamos las matemáticas:\n\nSupongamos que creamos el video con una resolución de `1080 x 1920` (altura x anchura) y que utilizaremos `3 bytes` por píxel (la unidad mínima en una pantalla) para codificar el color (o [un color de 24 bit](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29), que nos da 16,777,216 diferentes colores) y este video se reproduce a `24 cuadros por segundo` entonces serán  `30 minutos` de duración.\n\n```c\ntoppf = 1080 * 1920 //total_de_pixeles_por_cuadro\ncpp = 3 //costo_por_pixel\ntis = 30 * 60 //tiempo_en_segundos\nfps = 24 //cuadros_por_segundo\n\nalmacenamiento_requerido = tis * fps * toppf * cpp\n```\n\n¡Este video requeriría aproximadamente `250.28GB` de almacenamiento o `1.19 Gbps` de banda ancha! Es por esto que necesitamos hacer uso de un [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work).\n\n## Contenedor - un lugar cómodo para audio y video\n\n> Un contenedor o formato de envoltura es un formato de meta-archivos cuyas especificaciones describen que diferentes elementos de datos y metadatos coexisten en un mismo archivo de computadora.\n>\n>  https://en.wikipedia.org/wiki/Digital_container_format\n\nEs un **sólo archivo que contiene todos los streams (en su mayoría de audio y video) y también provee una sincronización y metadatos generales**, como un titulo, resolución, etc.\n\nUsualmente, podemos inferir el formato de un archivo al ver su extensión: por ejemplo un `video.webm` es probablemente un video usando el contenedor [`webm`](https://www.webmproject.org/).\n\n![container](/img/container.png)\n\n# FFmpeg - línea de comandos\n\n> Una completa solución multi-plataforma para grabar, convertir y transmitir audio y video.\n\nPara trabajar con multimedia podemos hacer uso de esta MARAVILLOSA herramienta/librería llamada [FFmpeg](https://www.ffmpeg.org/). Existen posibilidades de que ya la conoces/usas, directa o indirectamente (¿usas [Chrome](https://www.chromium.org/developers/design-documents/video)?).\n\nÉste tiene una programa para línea de comandos llamado `ffmpeg`,un binario muy simple y poderoso. Por ejemplo, puedes convertir desde un contenedor `mp4`a uno `avi` solo escribiendo el siguiente comando:\n\n```bash\n$ ffmpeg -i input.mp4 output.avi\n```\nAcabamos de hacer **remuxing** (remultiplexación) aquí, el cual consiste convertir de un contenedor a otro. Técnicamente, FFmpeg puede también hacer un transcoding, pero hablaremos de eso después. \n\n## FFmpeg herramienta de línea de comandos 101\n\nFFmpeg posee [documentación](https://www.ffmpeg.org/ffmpeg.html) que hace un gran trabajo explicando como funciona.\n\n```bash\n# tambien puedes ver la documentacion usando la linea de comandos\n\nffmpeg -h full | grep -A 10 -B 10 avoid_negative_ts\n```\n\nPara ser breves, el comando de línea para FFmpeg espera el siguiente formato de argumentos para realizar sus acciones `ffmpeg {1} {2} -i {3} {4} {5}`, donde:\n\n1. Opciones globales\n2. Opciones de archivo de entrada\n3. URL de entrada\n4. Opciones de archivo de salida\n5. URL de salida\n\nLas partes 2,3,4 y 5 pueden ser tantas como sean necesarias.\n\nEs mas fácil entender este formato de argumentos en acción:\n\n``` bash\n# ADVERTENCIA: este archivo pesa alrededor de 300MB\n$ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4\n\n$ ffmpeg \\\n-y \\ # opciones globales\n-c:a libfdk_aac \\ # opciones de entrada\n-i bunny_1080p_60fps.mp4 \\ # url de entrada\n-c:v libvpx-vp9 -c:a libvorbis \\ # opciones de salida\nbunny_1080p_60fps_vp9.webm # url de salida\n```\n\nEste comando toma el archivo de entrada `mp4` que contiene 2 streams (un audio codificado con el CODEC `aac` y el video codificado usando el CODEC `h264`) y va a convertirlo a `webm`, cambiando también los CODECs de audio y video.\n\nPodríamos simplificar el comando de arriba pero tenemos que saber que FFmpeg adoptará o supondrá los valores predeterminados por ti.\n\nPor ejemplo, cuando tu introduces `ffmpeg -i input.avi output.mp4` ¿qué CODEC para audio/video va a usar para producir `output.mp4`?\n\nWerner Robitza escribió un [tutorial acerca de codificacion y edicion con FFmpeg](http://slhck.info/ffmpeg-encoding-course/#/) que se tiene que leer/realizar para una mejor comprensión.\n\n# Operaciones de video comunes\n\nCuando trabajamos con audio/video nosotros usualmente hacemos una serie de tareas con archivos multimedia.\n\n## Transcoding\n\n![transcoding](/img/transcoding.png)\n\n**¿Qué?** el acto de convertir uno de los flujos de transmisión (audio o video) de un CODEC a otro.\n\n**¿Por qué?** en ocasiones algunos dispositivos (TVs, smartphones, consolas, etc.) no soportan X pero si Y y nuevos CODECs proveen mejor tasa de compresión. \n\n**¿Cómo?** convirtiendo un video `H264` (AVC) a un `H265` (HEVC).\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c:v libx265 \\\nbunny_1080p_60fps_h265.mp4\n```\n\n## Transmuxing \n\n![transmuxing](/img/transmuxing.png)\n\n**¿Qué?** el acto de convertir un formato (contenedor) a otro.\n\n**¿Por qué?** en ocasiones algunos dispositivos (TVs, smartphones, consolas, etc.) no soportan X pero si Y y a veces nuevos contenedores proveen características modernas que son requeridas.\n\n**¿Cómo?** convirtiendo de `mp4` a `webm`.\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c copy \\ # con esto se dice a ffmpeg que se salte la codificación\nbunny_1080p_60fps.webm\n```\n\n## Transrating\n\n![transrating](/img/transrating.png)\n\n**¿Qué?** el acto de cambiar la tasa de bits, o produciendo otras presentaciones.\n\n**¿Por qué?** las personas intentaran ver tu video usando una conexión `2G` (edge) en un smartphone de baja gama o una conexión por `fibra` a Internet en los televisores a 4K, por lo tanto tu deberías ofrecer mas de una presentación para el mismo video a diferente tasa de bits.\n\n**¿Cómo?** produciendo una presentación con una tasa de bits entre 964K y 3856K.\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-minrate 964K -maxrate 3856K -bufsize 2000K \\\nbunny_1080p_60fps_transrating_964_3856.mp4\n```\n\nUsualmente vamos a estar usando transrating con transsizing. Werner Robitza escribió otra [serie de posts acerca del control de tasa para FFmpeg](http://slhck.info/posts/) que debes leer/realizar.\n\n# Transsizing\n\n![transsizing](/img/transsizing.png)\n\n**¿Qué?** el acto de convertir desde una resolución a otro. Como antes se dijo, transsizing es usualmente usado con transrating.\n\n**¿Por qué?** las razones serian las mismas que las de transrating.\n\n**¿Cómo?** convirtiendo de una resolución de `1080p` a `480p`.\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-vf scale=480:-1 \\\nbunny_1080p_60fps_transsizing_480.mp4\n```\n\n## Round Bonus: Transmisión adaptativa\n\n![adaptive streaming](/img/adaptive-streaming.png)\n\n**¿Qué?** el acto de producir varias resoluciones (tasas de bits) y dividir el contenido en porciones y después servirlos mediante http.\n\n**¿Por qué?** para proveer un contenido flexible que puede ser observado en un smartphone de baja gama o en una televisión en 4K, también es fácil de escalar y desplegar pero puede agregar latencia.\n\n**¿Cómo?** creando un WebM adaptativo usando DASH.\n```bash\n# emisiones de video\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm\n\n# emisiones de audio\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm\n\n# el manifiesto DASH\n$ ffmpeg \\\n -f webm_dash_manifest -i video_160x90_250k.webm \\\n -f webm_dash_manifest -i video_320x180_500k.webm \\\n -f webm_dash_manifest -i video_640x360_750k.webm \\\n -f webm_dash_manifest -i video_640x360_1000k.webm \\\n -f webm_dash_manifest -i video_1280x720_500k.webm \\\n -f webm_dash_manifest -i audio_128k.webm \\\n -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \\\n -f webm_dash_manifest \\\n -adaptation_sets \"id=0,streams=0,1,2,3,4 id=1,streams=5\" \\\n manifest.mpd\n```\n\nPD: Tomé este ejemplo desde las [Instrucciones de la reproducción de WebM adaptativo usando DASH](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash)\n\n## Ve más allá\n\nHay [muchos y bastantes mas usos para FFmpeg](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly).\nYo lo uso en conjunto con *iMovie* para producir/editar algunos videos de Youtube y tu ciertamente puedes usarle de manera profesional.\n\n# Aprende FFmpeg libav de la manera difícil\n\n> ¿A veces no te preguntas acerca de el sonido y la visión?\n> **David Robert Jones**\n\nSabiendo que [FFmpeg]() es tan útil como una herramienta de línea de comandos para realizar tareas esenciales en archivos multimedia, pero ¿cómo se pueden usar en nuestros programas?\n\nFFmpeg está [compuesto de multiples librerías](https://www.ffmpeg.org/doxygen/trunk/index.html) que pueden ser integradas en nuestros propios programas.\n\nUsualmente, cuando instalas FFmpeg, se instalan automáticamente todas esas librerías. De aquí en adelante, me voy a referir a estas set de librerías como **FFmpeg libav**.\n\n> Este título es un homenaje a las series de Zed Shaw [Aprende X de la manera difícil](https://learncodethehardway.org/), particularmente a su libro Aprende C de la manera difícil.\n\n## Capítulo 0 - El infame hola mundo\n\nÉste hola mundo, de hecho, no enseñara el mensaje de `\"hola mundo\"` en la terminal :tongue: En su lugar, vamos a **imprimir la información acerca del video**. cosas como su formato (contenedor), duración, resolución, canales de audio y, al final, vamos a **decodificar algunos cuadros y a guardarlos como archivos de imagen**.\n\n### Arquitectura de FFmpeg libav\n\nPero antes de que podamos empezar a codificar, vamos a aprender como la **Arquitectura de FFmpeg libav** funciona y como sus componentes se comunican con otros.\n\nAquí hay un diagrama del proceso de decodificación de video:\n\n![ffmpeg libav architecture - decoding process](/img/decoding.png)\n\nPrimero, vas a necesitar cargar tu archivo multimedia dentro de un componente llamado [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html)(el contenedor de video es también conocido como formato). \n\nDe hecho, no se carga todo el archivo: usualmente solo lee el encabezado (header) del mismo.\n\nUna vez cargamos el **encabezado de nuestro contenedor** en su forma mínima, nosotros podemos acceder a sus streams (piensa de ellos como datos rudimentarios de audio y video).\n\nCada stream estará disponible en un componente llamado [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html).\n\n> Stream es un nombre elegante para un flujo continuo de datos.\n\nSupongamos que nuestro video tiene dos streams: un audio codificado con [AAC CODEC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) y un video codificado con [H264 (AVC) CODEC](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC). Por cada stream, nosotros podemos extraer **piezas de datos** llamados paquetes, los que serán cargados en componentes llamados [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html).\n\nLos **datos dentro de los paquetes siguen codificados** (comprimidos) y para decodificar los paquetes, necesitamos pasarlos a un [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html) específico.\n\nEl `AVCodec` va a decodificarlos dentro de un [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) y finalmente, este componente nos da **el cuadro (frame) descomprimido**. Hay que poner atención en que se usa la misma terminología o mismo proceso es usado de igual manera por un stream de audio y video.\n\n### Requerimientos\n\nDebido a que algunas personas estuvieron [enfrentandose a varios problemas durante la compilacion o ejecucion de los ejemplos](https://github.com/leandromoreira/ffmpeg-libav-tutorial/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+compiling) **vamos a usar [`Docker`](https://docs.docker.com/install/) como nuestro entorno de desarrollo/ejecución, también haremos uso del video: \"The Big Buck Bunny\", que en caso de no contar con él de manera local, solo ejecuta el comando `make fetch_small_bunny_video`.\n\n ### Capítulo 0 - el código, paso a paso\n\n> #### TLDR; enséname el [codigo](/0_hello_world.c) y ejecuta.\n> ```bash\n> $ make run_hello\n> ```\n\nVamos a saltarnos unos detalles, pero no te preocupes: el [código fuente esta disponible en GitHub](/0_hello_world.c).\n\nVamos a acomodar (allocate) la memoria para el componente [`AVFormatContext`](http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html), el cual va a contener la información acerca del formato (contenedor).\n\n```c\nAVFormatContext *pFormatContext = avformat_alloc_context();\n```\n\nAhora vamos a abrir el archivo y leer su encabezado para llenar el `AVFormatContext` con la información mínima acerca del formato (note que usualmente los códecs no son abiertos).\n\nLa función usada para hacer esto es [`avformat_open_input`](http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49). Éste espera un `AVFormatContext`, un archivo (`filename`) y dos argumentos opcionales: el [`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html) (si tu colocas un `NULL`, FFmpeg va a suponer el formato por ti) y el [`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html) (el cual son las opciones para el desmultiplexador).\n\n```c\navformat_open_input(&pFormatContext, filename, NULL, NULL);\n```\n\nPodemos imprimir el nombre del formato y la duración media:\n\n```c\nprintf(\"Format %s, duration %lld us\", pFormatContext->iformat->long_name, pFormatContext->duration);\n```\n\nPara acceder a los `streams`, necesitamos leer los datos del archivo. La función [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb) hace eso.\n\nAhora, el `pFormatContext->nb_streams` contendrá el numero de streams y el `pFormatContext->streams[i]` nos dará el  stream `i`([`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html)).\n\n```c\navformat_find_stream_info(pFormatContext,  NULL);\n```\nAhora, navegaremos por todos los streams.\n```c\nfor (int i = 0; i < pFormatContext->nb_streams; i++)\n{\n  //\n}\n```\nPor cada stream, vamos a mantener los [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html), los cuales describen las propiedades de un códec usado por el stream `i`.\n\n```c\nAVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar;\n```\nYa con las propiedades del códec, podremos ver el CODEC apropiado solicitándolo a la función [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca)  y encontrar el decodificador para un códec id y regresar un [`AVCodec`](http://ffmpeg.org/doxygen/trunk/structAVCodec.html), el componente que conoce como **CO**dificar y **DEC**odificar el stream.\n\n```c\nAVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);\n```\n\nAhora, vamos a imprimir la información acerca de los códecs.\n\n```c\n// especifico para video y audio\nif (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {\n  printf(\"Video Codec: resolution %d x %d\", pLocalCodecParameters->width, pLocalCodecParameters->height);\n} else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {\n  printf(\"Audio Codec: %d channels, sample rate %d\", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate);\n}\n// general\nprintf(\"\\tCodec %s ID %d bit_rate %lld\", pLocalCodec->long_name, pLocalCodec->id, pLocalCodecParameters->bit_rate);\n```\n\nCon el códec, podemos acomodar memoria para el [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html), el cual va a contener el contexto para nuestro proceso de decodificación/codificación, pero antes debemos llenar el contexto del códec con los parámetros CODEC; esto lo hacemos con [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16).\n\nUna vez llenado el contexto del códec, necesitamos abrirlo. Entonces tenemos que llamar a la función [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) y después de ello,  lo podremos usar.\n\n```c\nAVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);\navcodec_parameters_to_context(pCodecContext, pCodecParameters);\navcodec_open2(pCodecContext, pCodec, NULL);\n```\n\nAhora, vamos a leer los paquetes desde el stream y decodificarlos dentro de cuadros, vamos a necesitar acomodar la memoria para ambos componentes, el [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) y [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n```c\nAVPacket *pPacket = av_packet_alloc();\nAVFrame *pFrame = av_frame_alloc();\n```\n\nHay que sustraer nuestros paquetes desde los streams con la función [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) mientras contenga paquetes.\n\n```c\nwhile (av_read_frame(pFormatContext, pPacket) >= 0) {\n  //...\n}\n```\n\nAhora, hay que **mandar los paquetes de datos en bruto** (cuadro comprimido) al decodificador, mediante el contexto del códec, usando la función  [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3).\n\n```c\navcodec_send_packet(pCodecContext, pPacket);\n```\n\nY vamos a **recibir el cuadro de datos en bruto** (cuadro descomprimido) desde el decodificador, mediante el mismo contexto del códec, usando la función [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c).\n\n```c\navcodec_receive_frame(pCodecContext, pFrame);\n```\n\nPodemos imprimir el numero de cuadro, el [PTS](https://en.wikipedia.org/wiki/Presentation_timestamp), DTS, [frame type](https://en.wikipedia.org/wiki/Video_compression_picture_types), etc.\n\n```c\nprintf(\n    \"Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]\",\n    av_get_picture_type_char(pFrame->pict_type),\n    pCodecContext->frame_number,\n    pFrame->pts,\n    pFrame->pkt_dts,\n    pFrame->key_frame,\n    pFrame->coded_picture_number,\n    pFrame->display_picture_number\n);\n```\n\nFinalmente, podemos guardar nuestro cuadro decodificado dentro de una [imagen gris simple](https://en.wikipedia.org/wiki/Netpbm_format#PGM_example). El proceso es muy sencillo, nosotros usaremos el `pFrame->data,`, donde el índice esta relacionado con los [planos Y, Cb y Cr](https://en.wikipedia.org/wiki/YCbCr), nosotros solo seleccionamos  `0` (Y) para guardar nuestra imagen gris.\n\n```c\nsave_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename);\n\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename)\n{\n    FILE *f;\n    int i;\n    f = fopen(filename,\"w\");\n    // escribiendo el encabezado mínimo para un formato de un archivo pgm\n    // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example\n    fprintf(f, \"P5\\n%d %d\\n%d\\n\", xsize, ysize, 255);\n\n    // escribiendo linea por linea\n    for (i = 0; i < ysize; i++)\n        fwrite(buf + i * wrap, 1, xsize, f);\n    fclose(f);\n}\n```\n\n¡Y voilà! Ahora nosotros tenemos una imagen gris a escala de 2MB:\n\n![saved frame](/img/generated_frame.png)\n\n## Capítulo 1 - sincronizando audio y video\n\n> **Sé el jugador** - un joven desarrollador de JS escribiendo un nuevo reproductor de video MSE.\n\nAntes de que nos movamos a [codificar un ejemplo de transcoding](#capitulo2-transcoding) ahora vamos a hablar acerca de la **sincronización (timing)**, o como el reproductor de video lo conoce, el tiempo correcto para reproducir un cuadro.\n\nEn el ultimo ejemplo, hemos guardado algunos cuadros que pueden verse aquí:\n\n![frame 0](/img/hello_world_frames/frame0.png)\n![frame 1](/img/hello_world_frames/frame1.png)\n![frame 2](/img/hello_world_frames/frame2.png)\n![frame 3](/img/hello_world_frames/frame3.png)\n![frame 4](/img/hello_world_frames/frame4.png)\n![frame 5](/img/hello_world_frames/frame5.png)\n\nCuando nosotros estamos diseñando un reproductor de video, nosotros necesitamos **reproducir cada cuadro a su debido tiempo**, de otra forma sería difícil ver un video de manera agradable, porque se estaría reproduciendo demasiado rápido o lento.\n\nPor lo tanto, necesitamos introducir algo de lógica para reproducir sin complicaciones cada cuadro. Para ello, cada cuadro tiene un **Timestamp de presentación** (PTS) el cual tiene un numero creciente factorizado en un **timebase (tiempo base)**, que es un numero racional (donde el denominador es conocido como **timescale**) divisible por el **frame rate (fps)**.\n\nEs fácil entender cuando vemos algunos ejemplos, vamos a simular varios escenarios.\n\nPara un `fps=60/1` y `timebase=1/60000` cada PTS se incrementará `timescale / fps = 1000`, por lo tanto el **PTS en tiempo real** por cada cuadro podría ser (suponiendo que empieza en 0):\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033`\n\nPara casi el mismo escenario pero con un timebase igual a `1/60`.\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033`\n* `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050`\n\nPara un `fps=25/1` y `timebase=1/75` cada PTS se incrementará `timescale / fps = 3` y el tiempo PTS podría ser:\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04`\n* `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08`\n* `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12`\n* ...\n* `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96`\n* ...\n* `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56`\n\nAhora con el `pts_time` podemos encontrar una forma de renderizarlo, esto es sincronizándolo con el audio `pts_time` o con el reloj del sistema. El FFmpeg libav provee esa información a través de su API:\n\n- fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad)\n- tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad)\n- tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6)\n\nPor pura curiosidad, observa que los cuadros fueron guardados en el orden DTS (cuadros: 1, 6, 4, 2, 3, 5) pero reproducidos en un orden PTS (cuadros: 1, 2, 3, 4, 5). Además, nota que poco costo tienen los cuadros-B en comparación con los cuadros-P o cuadros-I.\n\n```\nLOG: AVStream->r_frame_rate 60/1\nLOG: AVStream->time_base 1/60000\n...\nLOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0]\nLOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3]\nLOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4]\nLOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2]\nLOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5]\nLOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1]\n```\n\n## Capítulo 2 - remuxing\n\nRemuxing (remultiplexar) es el acto de cambiar de un formato (contenedor) a otro, por ejemplo, nosotros podemos cambiar un video [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) a uno  [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream) sin muchos problemas usando FFmpeg:\n\n```bash\nffmpeg input.mp4 -c copy output.ts\n```\n\nEsto va a desmultiplexar (demux) el mp4 pero no lo va a decodificar o codificar (`-c copy`) y al final, esto lo multiplexa (mux) dentro de un archivo `mpegts`. Si tu no provees el formato `-f`, entonces FFmpeg va a tener que determinarlo en base de la extensión del archivo.\n\nEl uso general de FFmpeg o libav sigue un patrón/arquitectura o flujo de trabajo:\n\n* **[protocol layer](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - este acepta una entrada (`input`) (un archivo o `file`, o por ejemplo la entrada también podría ser  `rtmp` o `HTTP`)\n* **[format layer](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - este desmultiplexa (`demuxes`) su contenido, revelando, en mayor parte, los metadatos y sus streams\n* **[codec layer](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - esto decodifica (`decodes`) sus datos de stream comprimidos<sup>*opcional*</sup>\n* **[pixel layer](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - aquí también se pueden aplicar filtros (`filters`) a los cuadros en bruto (como resizing)<sup>*optional*</sup>\n* y después lo hace en el sentido contrario.\n* **[codec layer](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - esto codifica (`encodes`) (o re-encodifica (`re-encodes`) o incluso transcodifican o `transcodes`) los cuadros en bruto<sup>*opcional*</sup>\n* **[format layer](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - esto multiplexa (`muxes`) (o remultiplexa  (`remuxes`) los streams en bruto (los datos comprimidos)\n* **[protocol layer](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - y finalmente los datos multiplexados son enviados a una salida o `output` (otro archivo o quizás, un servidor remoto en la red)\n\n![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg)\n\n> Esta imagen está fuertemente inspirada por los trabajos de [Leixiaohua](http://leixiaohua1020.github.io/#ffmpeg-development-examples) y [Slhck](https://slhck.info/ffmpeg-encoding-course/#/9).\n\nAhora vamos a codificar un ejemplo usando libav para proveer el mismo efecto que en `ffmpeg input.mp4 -c copy output.ts`.\n\n```c\nAVFormatContext *input_format_context = NULL;\nAVFormatContext *output_format_context = NULL;\n```\n\nComo en los ejemplos anteriores, empezaremos por acomodar la memoria y abrir el formato de la entrada. Para este caso en específico, vamos a abrir un archivo de entrada y acomodar memora para un archivo de salida.\n\n```c\nif ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) {\n  fprintf(stderr, \"Could not open input file '%s'\", in_filename);\n  goto end;\n}\nif ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) {\n  fprintf(stderr, \"Failed to retrieve input stream information\");\n  goto end;\n}\n\navformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename);\nif (!output_format_context) {\n  fprintf(stderr, \"Could not create output context\\n\");\n  ret = AVERROR_UNKNOWN;\n  goto end;\n}\n```\n\nVamos a remultiplexar solamente los tipos de streams de video, audio y subtítulos, así que vamos a obtener que streams vamos a estar usando dentro de un arreglo de índices.\n\n ```c\nnumber_of_streams = input_format_context->nb_streams;\nstreams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list));\n ```\n\nDespués de haber acomodado la memoria requerida, vamos a navegar por todos los streams, por cada uno necesitaremos crear un nuevo stream dentro de nuestro contexto de formato de salida, usando la función [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827). Nota como estamos marcando todos los streams que no son video, audio o subtitulo, así que podemos saltarlos para luego.\n\n```c\nfor (i = 0; i < input_format_context->nb_streams; i++) {\n  AVStream *out_stream;\n  AVStream *in_stream = input_format_context->streams[i];\n  AVCodecParameters *in_codecpar = in_stream->codecpar;\n  if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {\n    streams_list[i] = -1;\n    continue;\n  }\n  streams_list[i] = stream_index++;\n  out_stream = avformat_new_stream(output_format_context, NULL);\n  if (!out_stream) {\n    fprintf(stderr, \"Failed allocating output stream\\n\");\n    ret = AVERROR_UNKNOWN;\n    goto end;\n  }\n  ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);\n  if (ret < 0) {\n    fprintf(stderr, \"Failed to copy codec parameters\\n\");\n    goto end;\n  }\n}\n```\n\nAhora, podemos crear un archivo de salida.\n\n```c\nif (!(output_format_context->oformat->flags & AVFMT_NOFILE)) {\n  ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE);\n  if (ret < 0) {\n    fprintf(stderr, \"Could not open output file '%s'\", out_filename);\n    goto end;\n  }\n}\n\nret = avformat_write_header(output_format_context, NULL);\nif (ret < 0) {\n  fprintf(stderr, \"Error occurred when opening output file\\n\");\n  goto end;\n}\n```\n\nDespués, podemos copiar los streams, paquete por paquete, desde nuestros streams de entrada a los de salida. Continuaremos navegando por los paquetes, mientras estos sigan llegando (`av_read_frame`), por cada paquete vamos a necesitar recalcular el PTS y el DTS, para finalmente escribirlo (`av_interleaved_write_frame`) a nuestro contexto de formato de salida.\n\n```c\nwhile (1) {\n  AVStream *in_stream, *out_stream;\n  ret = av_read_frame(input_format_context, &packet);\n  if (ret < 0)\n    break;\n  in_stream  = input_format_context->streams[packet.stream_index];\n  if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) {\n    av_packet_unref(&packet);\n    continue;\n  }\n  packet.stream_index = streams_list[packet.stream_index];\n  out_stream = output_format_context->streams[packet.stream_index];\n  /* copiar paquete */\n  packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);\n  // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903\n  packet.pos = -1;\n\n  //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1\n  ret = av_interleaved_write_frame(output_format_context, &packet);\n  if (ret < 0) {\n    fprintf(stderr, \"Error muxing packet\\n\");\n    break;\n  }\n  av_packet_unref(&packet);\n}\n```\n\nPara finalizar, necesitamos escribir el stream trailer a un archivo multimedia de salida con la función [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13).\n\n```c\nav_write_trailer(output_format_context);\n```\n\nAhora, estamos listos para probarlo y la primera prueba va a ser una conversión de formato (contenedor de video) de un video MP4 a un video MPEG-TS. Estamos básicamente realizando la línea de comando `ffmpeg input.mp4 -c copy output.ts` con libav.\n\n```bash\nmake run_remuxing_ts\n```\n\n¡Funciona! !¿No me crees?! no deberías, podemos checarlo con `ffprobe`:\n\n```bash\nffprobe -i remuxed_small_bunny_1080p_60fps.ts\n\nInput #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts':\n  Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s\n  Program 1\n    Metadata:\n      service_name    : Service01\n      service_provider: FFmpeg\n    Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc\n    Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s\n```\n\nPara resumir todo lo que hicimos esto en una imagen, podemos revisitar nuestra [idea inicial acerca de cómo libav funciona](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-libav-architecture) pero observa que nos saltamos la parte del códec.\n\n![remuxing libav components](/img/remuxing_libav_components.png)\n\nAntes de terminar este capítulo, me gustaría enseñarte una parte importante del proceso de remultiplexación, **tu puedes pasar esas opciones al multiplexor**. Digamos que se desea entregar un formato [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding), para eso, necesitamos usar [mp4 fragmentado](https://stackoverflow.com/a/35180327) (a veces es referido como `fmp4`) en lugar de MPEG-TS o MPEG-4 plano.\n\nCon la [línea de comando, podemos hacer eso fácilmente](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting).\n\n```\nffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4\n```\n\nCasi igual de fácil como en la línea de comando, para su versión en libav, solamente debemos pasar las opciones y después escribir el encabezado de salida, justo antes de copiar los paquetes.\n\n ```c\nAVDictionary* opts = NULL;\nav_dict_set(&opts, \"movflags\", \"frag_keyframe+empty_moov+default_base_moof\", 0);\nret = avformat_write_header(output_format_context, &opts);\n ```\n\nAhora podemos generar este archivo mp4 fragmentado:\n\n```bash\nmake run_remuxing_fragmented_mp4\n```\n\nPara asegurarte que no te estoy mintiendo. Puedes usar esta maravillosa página/herramienta [gpac/mp4box.js](http://download.tsi.telecom-paristech.fr/gpac/mp4box.js/filereader.html) o el sitio [http://mp4parser.com/](http://mp4parser.com/) para ver las diferencias, primero carga el mp4 \"común\".\n\n![mp4 boxes](/img/boxes_normal_mp4.png)\n\nComo podrás ver, este tiene un solo atom (o caja) `mdat`, **este es el espacio donde se encuentran los cuadros de video y audio**. Ahora carga el mp4 fragmentado y ve lo que despliega de las cajas `mdat`.\n\n![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png)\n\n## Capítulo 3 - transcoding\n\n> #### TLDR; enséñame el [código](/3_transcoding.c) y ejecuta.\n> ```bash\n> $ make run_transcoding\n> ```\n> Vamos a saltarnos unos detalles, pero no te preocupes: el [el codigo fuente está disponible en GitHub](/3_transcoding.c).\n\nEn este capitulo, vamos a crear un transcoder minimalista, escrito en C, que pueda convertir videos codificados en H264 a H265 usando la librería **FFmpeg/libav**, específicamente [libavcodec](https://ffmpeg.org/libavcodec.html), libavformat, y libavutil.\n\n![media transcoding flow](/img/transcoding_flow.png)\n\n> _Solo una recapitulación rápida:_ El [**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html) es la abstracción del formato para un archivo multimedia, alias contenedor (ej. MKV, MP4, Webm, TS). El [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html) representa cada tipo de datos para un formato dado (ej: audio, video, subtitulo, metadatos). El [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html) es una porción de datos comprimidos, los cuales son adquiridos desde `AVStream` y que pueden ser decodificados por un [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html) (ej: av1, h264, vp9, hevc) generando datos en bruto, llamados [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n### Transmuxing \n\nVamos a empezar con una simple operación de transmultiplexación (transmuxing) y después podemos construir sobre este código, el primer paso es **cargar el archivo de entrada**.\n\n```c\n// Acomoda un AVFormatContext\navfc = avformat_alloc_context();\n// Abre un stream de entrada y lee el encabezado.\navformat_open_input(avfc, in_filename, NULL, NULL);\n// Lee los paquetes del archivo para obtener la informacion de streams.\navformat_find_stream_info(avfc, NULL);\n```\n\nAhora vamos a poner en pie el decodificador, el `AVFormatContext` nos va a dar acceso a todos los componentes `AVStream` y por cada uno de ellos, podremos obtener su `AVCodec` y crear su `AVCodecContext` en particular y finalmente podremos abrir el códec dado, así entonces podremos proceder con el proceso de decodificación.\n\n>  El [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html) contiene datos acerca de la configuración del archivo como la tasa de bits (bit rate), tasa de cuadros (frame rate), tasa de muestreo (sample rate), canales (channels), altura (height), así como muchos otros.\n\n```c\nfor (int i = 0; i < avfc->nb_streams; i++)\n{\n  AVStream *avs = avfc->streams[i];\n  AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id);\n  AVCodecContext *avcc = avcodec_alloc_context3(*avc);\n  avcodec_parameters_to_context(*avcc, avs->codecpar);\n  avcodec_open2(*avcc, *avc, NULL);\n}\n```\n\nNecesitamos preparar el archivo multimedia para transmultiplexación también, primero debemos **acomodar memoria** para la salida `AVFormatContext`. Creamos **cada uno de los streams** en el formato de salida. Para poder empaquetar propiamente el stream, **copiamos los parámetros del códec** desde el decodificador.\n\n**Establecemos la bandera** `AV_CODEC_FLAG_GLOBAL_HEADER` el cual le dice al encodificador que puede usar los encabezados globales y finalmente abrimos el **archivo de salida para vaciar los datos** y mantener los encabezados.\n\n```c\navformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename);\n\nAVStream *avs = avformat_new_stream(encoder_avfc, NULL);\navcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar);\n\nif (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER)\n  encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;\n\navio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE);\navformat_write_header(encoder->avfc, &muxer_opts);\n\n```\n\nNosotros conseguiremos los `AVPacket` desde el decodificador, ajustando los timestamps, y así poder escribir apropiadamente el paquete en el archivo de salida. Aunque la función `av_interleaved_write_frame` dice \"write frame\" (escribir cuadro), estamos guardando el paquete. Terminaremos el proceso de transmultiplexación escribiendo el stream del trailer, que se encuentra dentro del archivo.\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base);\n  av_interleaved_write_frame(*avfc, input_packet) < 0));\n}\n\nav_write_trailer(encoder_avfc);\n```\n\n### Transcoding\n\nLa sección previa mostró un programa transmultiplexador, ahora vamos a agregar la capacidad para codificar los archivos, específicamente, vamos a habilitarlo para transcodificar videos desde `h264` a `h265`.\n\nDespués de que preparamos el decodificador, pero antes de acomodar el archivo de salida multimedia, vamos a configurar el encodificador.\n\n* Crea el video `AVStream` en el encodificador, [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827)\n* Usa el `AVCodec` llamado `libx265`, [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37)\n* Crear el `AVCodecContext` basado en el códec creado, [`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315)\n* Configurar los atributos básicos para la sesión de transcodificación, y\n* Abre el códec y copia los parámetros del contexto al stream. [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) y [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe) \n\n```c\nAVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL);\nAVStream *video_avs = avformat_new_stream(encoder_avfc, NULL);\n\nchar *codec_name = \"libx265\";\nchar *codec_priv_key = \"x265-params\";\n// vamos a usar las opciones internas para x265\n// esto deshabilita la deteccion de cambio de escena y despues fija\n// GOP en 60 cuadros.\nchar *codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\nAVCodec *video_avc = avcodec_find_encoder_by_name(codec_name);\nAVCodecContext *video_avcc = avcodec_alloc_context3(video_avc);\n// parametros de codec para el encoder \nav_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0);\nvideo_avcc->height = decoder_ctx->height;\nvideo_avcc->width = decoder_ctx->width;\nvideo_avcc->pix_fmt = video_avc->pix_fmts[0];\n// control de tasa\nvideo_avcc->bit_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_buffer_size = 4 * 1000 * 1000;\nvideo_avcc->rc_max_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_min_rate = 2.5 * 1000 * 1000;\n// tiempo base\nvideo_avcc->time_base = av_inv_q(input_framerate);\nvideo_avs->time_base = sc->video_avcc->time_base;\n\navcodec_open2(sc->video_avcc, sc->video_avc, NULL);\navcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc);\n```\n\nNecesitamos expandir nuestro ciclo decodificador para la transcodificación del stream de video:\n\n* Envía el `AVPacket` vacío al decodificador, [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3)\n* Recibir el `AVFrame` descomprimido, [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c)\n* Empezar a transcodificar este cuadro en bruto,\n* Enviar este cuadro en bruto, [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169)\n* Recibe el contenido comprimido, basado en nuestro códec, `AVPacket`, [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga5b8eff59cf259747cf0b31563e38ded6)\n* Establece el timestamp, y[`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e)\n* Escríbelo a un archivo de salida. [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1)\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  int response = avcodec_send_packet(decoder_video_avcc, input_packet);\n  while (response >= 0) {\n    response = avcodec_receive_frame(decoder_video_avcc, input_frame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return response;\n    }\n    if (response >= 0) {\n      encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index);\n    }\n    av_frame_unref(input_frame);\n  }\n  av_packet_unref(input_packet);\n}\nav_write_trailer(encoder_avfc);\n\n// funcion usada\nint encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) {\n  AVPacket *output_packet = av_packet_alloc();\n  int response = avcodec_send_frame(video_avcc, input_frame);\n\n  while (response >= 0) {\n    response = avcodec_receive_packet(video_avcc, output_packet);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return -1;\n    }\n\n    output_packet->stream_index = index;\n    output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den;\n\n    av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base);\n    response = av_interleaved_write_frame(avfc, output_packet);\n  }\n  av_packet_unref(output_packet);\n  av_packet_free(&output_packet);\n  return 0;\n}\n\n```\n\nVamos a convertir el stream desde `h264` a `h265`, como se espera de la versión `h265`, el archivo es más pequeño que el `h264` sin embargo el [programa creado](/3_transcoding.c) es capaz de:\n\n```c\n\n  /*\n   * H264 -> H265\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx265\";\n  sp.codec_priv_key = \"x265-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - fragmented MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.muxer_opt_key = \"movflags\";\n  sp.muxer_opt_value = \"frag_keyframe+empty_moov+delay_moov+default_base_moof\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> AAC\n   * MP4 - MPEG-TS\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 0;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.audio_codec = \"aac\";\n  sp.output_extension = \".ts\";\n\n  /* WIP :P  -> it's not playing on VLC, the final bit rate is huge\n   * H264 -> VP9\n   * Audio -> Vorbis\n   * MP4 - WebM\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 0;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libvpx-vp9\";\n  //sp.audio_codec = \"libvorbis\";\n  //sp.output_extension = \".webm\";\n\n```\n\n> Ahora, para ser honesto, esto fue [más difícil de lo que pensé](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54), voy a tener que y ya me he metido dentro de [el código fuente de la linea de comandos FFmpeg](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749) y probarlo bastante, y también pienso que estoy olvidando algo, ya que cuando tengo que forzar `force-cfr` para que el `h264` funcione, me sigue arrojando algunos mensajes como `warning messages (forced frame type (5) at 80 was changed to frame type (3))`.\n"
  },
  {
    "path": "README-ko.md",
    "content": "[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)\n\n[FFmpeg](https://www.ffmpeg.org/)을 라이브러리처럼(a.k.a. libav) 사용하려면 어떻게 시작해야할지 알려줄만할 튜토리얼/책을 찾아봤었습니다. 그리고는 [\"How to write a video player in less than 1k lines\"](http://dranger.com/ffmpeg/) 라는 튜토리얼을 찾았죠.\n하지만 안타깝게도 그건 더이상 관리가 안되고 있어서 이 글을 쓰기로 결정했습니다.\n\n여기서 사용된 대부분의 코드는 C로 되어있습니다. **하지만 걱정하지 마세요**: 당신도 쉽게 이해할 것이고 선호하는 언어에도 적용하실 수 있을겁니다.\nFFmpeg libav는 [python](https://pyav.org/), [go](https://github.com/imkira/go-libav)와 같은 다양한 언어로 된 많은 bindings을 제공합니다. 만약 사용하려는 언어에 그것이 없다면 `ffi`를 통해서도 지원할 수 있습니다. ([Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua) 예시)\n\n우리는 비디오와 오디오, 코덱, 컨테이너가 무엇인지에 대해 빠르게 학습한 후에 `FFmpeg` 명령을 어떻게 사용하는지 대해서 파헤쳐보고 마지막으로 코드도 작성해볼 것입니다, [삽질하면서 FFmpeg libav 배우기](#삽질하면서-FFmpeg-libav-배우기) 섹션으로 바로 넘어가셔도 좋습니다.\n\n혹자는 인터넷 비디오 스트리밍이 전통적인 TV의 미래라고 이야기하기도 합니다. 어떻게 되든 FFmpeg은 공부해둘만한 가치가 있는 것입니다.\n\n__목차__\n\n* [소개](#intro)\n  * [비디오 - 당신이 무엇을 보는지!](#비디오---당신이-무엇을-보는지!)\n  * [오디오 - 당신이 무엇을 듣는지!](#오디오---당신이-무엇을-듣는지!)\n  * [코덱 - 데이터를 줄이기](#코덱---데이터를-줄이기)\n  * [컨테이너 - 오디오와 비디오의 안식처](#[컨테이너---오디오와-비디오의-안식처)\n* [FFmpeg - 명령줄 도구](#FFmpeg---명령줄-도구)\n  * [FFmpeg 명령줄 도구 101](#FFmpeg-명령줄-도구-101)\n* [공통 비디오 연산](#공통-비디오-연산)\n  * [트랜스코딩 (Transcoding)](#트랜스코딩-(Transcoding))\n  * [트랜스먹싱 (Transmuxing)](#트랜스먹싱-(Transmuxing))\n  * [트랜스레이팅 (Transrating)](#트랜스레이팅-(Transrating))\n  * [트랜스사이징 (Transsizing)](#트랜스사이징-(Transsizing))\n  * [보너스: 적응형 스트리밍 (Adaptive Streaming)](#보너스:-적응형-스트리밍-(Adaptive-Streaming))\n  * [더 들어가기](#더-들어가기)\n* [삽질하면서 FFmpeg libav 배우기](#삽질하면서-FFmpeg-libav-배우기)\n  * [챕터 0 - 악명 높은 hello world](#챕터-0---악명-높은-hello-world)\n    * [FFmpeg libav 아키텍처](#FFmpeg-libav-아키텍처)\n  * [챕터 1 - 타이밍 (timing)](#챕터-1---오디오와-비디오-동기화)\n  * [챕터 2 - 리먹싱 (remuxing)](#챕터-2---리먹싱-(remuxing))\n  * [챕터 3 - 트랜스코딩 (transcoding)](#챕터-3---트랜스코딩-(transcoding))\n\n# 소개\n\n## 비디오 - 당신이 무엇을 보는지!\n\n만약 당신이 여러 연속된 이미지들을 가지고 있고 이것들을 주어진 주파수에 맞게 변화시킨다면 (이를테면 [초당 24장의 이미지](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)), [움직임의 잔상](https://en.wikipedia.org/wiki/Persistence_of_vision)을 만들게 될 것입니다.\n요약하면 이게 비디오라는 것의 가장 기본적인 아이디어입니다: **정해진 속도에 맞게 돌아가는 연속된 사진들 / 프레임들**.\n\n<img src=\"https://upload.wikimedia.org/wikipedia/commons/1/1f/Linnet_kineograph_1886.jpg\" title=\"flip book\" height=\"280\"></img>\n\nZeitgenössische Illustration (1886)\n\n## 오디오 - 당신이 무엇을 듣는지!\n\n음소거된 비디오만으로도 다양한 감정들을 표현할 수는 있지만 여기에 소리를 더해준다면 훨씬 더 즐거운 경험을 가져다 줄 것입니다.  \n\n소리는 공기 혹은 가스, 액체, 고체와 같은 다른 매체들을 통해 압력의 파동 형태로 전파되는 진동입니다.\n\n> 디지털 오디오 시스템에서는 마이크가 소리를 아날로그 전기 신호로 전환하고, 아날로그-디지털 변환기 (ADC) - 보통 [펄스-부호 변조 (PCM)](https://en.wikipedia.org/wiki/Pulse-code_modulation)를 이용하여 - 아날로그 신호를 디지탈 신호로 변환합니다.\n\n![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png \"audio analog to digital\")\n>[원문](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg)\n\n## 코덱 - 데이터를 줄이기\n\n> CODEC은 **디지털 오디오/비디오를 압축하거나 압축해제하는** 전자회로나 소프트웨어입니다. 이것은 raw (압축이안된) 디지털 오디오/비디오를 압축된 형태로 혹은 그 반대로 변환합니다.\n> https://en.wikipedia.org/wiki/Video_codec\n\n만약 우리가 수많은 이미지들을 차곡차곡 채워서 영화라고 부르는 하나의 파일로 만든다면, 결과적으로 엄청나게 큰 하나의 파일을 접하게 될 것 입니다. 한번 계산해봅시다: \n\n한번 가정해봅시다. 해상도가 `1080 x 1920` (높이 x 너비)인 비디오를 하나 만들건데 색을 인코딩하는데 픽셀당 `3 bytes` (화면의 최소 화소)를 쓸 것입니다. (혹은 [24비트 컬러](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29), 16,777,216개의 다른 색상을 제공) 그리고 이 비디오는 `초당 24프레임`으로 재생되고 `30분` 정도 길이입니다. \n\n```c\ntoppf = 1080 * 1920 //total_of_pixels_per_frame\ncpp = 3 //cost_per_pixel\ntis = 30 * 60 //time_in_seconds\nfps = 24 //frames_per_second\n\nrequired_storage = tis * fps * toppf * cpp\n```\n\n이 비디오는 거의 `250.28GB`의 저장 용량이 필요하며 `1.19Gbps`의 대역폭이 요구됩니다! 이것이 바로 우리가 [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work)을 사용해야하는 이유입니다.\n\n## 컨테이너 - 오디오와 비디오의 안식처\n\n> 컨테이너 혹은 래퍼(wrapper) 포맷은 데이터와 메타데이터의 다양한 요소들이 어떻게 하나의 컴퓨터 파일에 구성되어있는지를 기술하는 스펙을 담은 메타파일 포맷입니다.\n> https://en.wikipedia.org/wiki/Digital_container_format\n\n**하나의 파일이 모든 스트림을 담고 있고** (주로 오디오와 비디오) 이것은 또 동기화와 제목, 해상도 등과 같은 일반적인 메타데이터도 제공합니다.\n\n보통 우리는 파일의 확장자를 보고 포맷을 유추할 수 있습니다: 예를들면 `video.webm`은 아마도 [`webm`](https://www.webmproject.org/)를 컨테이너로 사용하는 비디오겠죠.\n\n![container](/img/container.png)\n\n# FFmpeg - 명령줄 도구\n\n> 오디오와 비디오를 녹화하고 변환하고 스트리밍할 수 있는 완전한 크로스-플랫폼 솔루션.\n\n멀티미디어 작업을 한다면 우리는 [FFmpeg](https://www.ffmpeg.org/)이라고 하는 정말 쩌는 툴/라이브러리를 사용할 수 있습니다. 아마도 여러분도 이것을 직간접적으로 알고있거나/사용했던 기회가 있었을 것입니다. ([Chrome](https://www.chromium.org/developers/design-documents/video) 사용시죠?). \n\n이것은 `ffmpeg`이라고하는 아주 단순하지만 파워풀한 바이너리 형태의 명려줄 프로그램도 제공합니다.\n예를들어, 아래 명령을 치는 것만으로도 컨테이너를 `mp4`에서 `avi`로 변환할 수 있습니다:\n\n```bash\n$ ffmpeg -i input.mp4 output.avi\n```\n\n우리는 방금 어떤 컨테이너에서 다른 컨테이너로 변환하는 과정인 **remuxing**을 해보았습니다.\n기술적으로 FFmpeg은 트랜스코딩(transcoding)도 할 수 있습니다만 이것들에 대해서는 뒤에서 다시 이야기하겠습니다.\n\n## FFmpeg 명령줄 도구 101\n\nFFmpeg이 어떻게 동작하는지를 아주 잘 설명하고 있는 [문서](https://www.ffmpeg.org/ffmpeg.html)가 있습니다. \n\n간단히 정리하면, FFmpeg 명령줄 프로그램은 실행하기 위해 다음과 같은 형식의 인자를 갖춰야합니다 `ffmpeg {1} {2} -i {3} {4} {5}`, 여기서:\n\n1. 전역 옵션\n2. 입력 파일 옵션\n3. 입력 url\n4. 출력 파일 옵션\n5. 출력 url\n\n2, 3, 4, 5 부분은 필요한만큼 많아질 수 있습니다.\n실제로 수행해보면 이 인자 형식을 더 쉽게 이해할 수 있습니다:\n\n``` bash\n# WARNING: this file is around 300MB\n$ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4\n\n$ ffmpeg \\\n-y \\ # 전역 옵션\n-c:a libfdk_aac \\ # 입력 파일 옵션\n-i bunny_1080p_60fps.mp4 \\ # 입력 url\n-c:v libvpx-vp9 -c:a libvorbis \\ # 출력 파일 옵션\nbunny_1080p_60fps_vp9.webm # 출력 url\n```\n이 명령은 두개의 스트림(`aac` 코덱으로 인코딩된 오디오와 `h264` 코덱으로 인코딩된 비디오)을 포함하는 `mp4`를 입력 파일로 받고 이를 `webm`으로 변환합니다, 물론 그 안의 오디오와 비디오 코덱들도 변환하고 있죠.\n\n위의 명령을 더 단순화할 수도 있는데 그러면 FFmpeg이 기본값들을 사용하거나 추측하게될 것입니다.\n예를들어 `ffmpeg -i input.avi output.mp4` 이렇게만 친다면 어떤 오디오/비디오 코덱이 `output.mp4`를 만들기 위해 사용될까요?\n\nWerner Robitza가 작성한 꼭 읽고/실행해볼만한 [FFmpeg으로 인코딩하고 편집하는 것에 대한 튜토리얼](http://slhck.info/ffmpeg-encoding-course/#/)이 있습니다.\n\n# 공통 비디오 연산\n\n오디오/비디오 작업 중 보통 미디어에 대해 일련의 작업을 수행하게 됩니다.\n\n## 트랜스코딩 (Transcoding)\n\n![transcoding](/img/transcoding.png)\n\n**무엇인가?** 스트림 (오디오 또는 비디오) 중에 하나를 기존 코덱에서 다른 코덱으로 변환하는 작업.\n\n**왜?** 가끔 어떤 장치들은 (텔레비전, 스마트폰, 콘솔 등) X는 지원하지 않지만 Y를 지원합니다. 그리고 더 새로운 코덱들은 더 나은 압축률을 제공하기도 합니다.\n\n**어떻게?** `H264` (AVC) 비디오를 `H265` (HEVC)로 변환하기.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c:v libx265 \\\nbunny_1080p_60fps_h265.mp4\n```\n\n## 트랜스먹싱 (Transmuxing)\n\n![transmuxing](/img/transmuxing.png)\n\n**무엇인가?** 하나의 포맷을 (컨테이너) 다른 포맷으로 변환하는 작업.\n\n**왜?** 가끔 어떤 장치들은 (텔레비전, 스마트폰, 콘솔 등) X는 지원하지 않지만 Y를 지원합니다. 그리고 때때로 더 새로운 컨테이터들은 최신으로 요구되는 피처들을 제공합니다.\n\n**어떻게?** `mp4`에서 `webm`으로 변환하기.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c copy \\ # just saying to ffmpeg to skip encoding\nbunny_1080p_60fps.webm\n```\n\n## 트랜스레이팅 (Transrating)\n\n![transrating](/img/transrating.png)\n\n**무엇인가?** 비트레이트를 변환하거나 다른 변환본(renditions)을 만드는 작업.\n\n**왜?** 사람들은 `2G` (edge)가 연결된 저사양의 스마트폰에서든 `광통신` 인터넷이 연결된 4K 텔레비전에든 당신의 비디오 볼 것이다. 그래서 같은 비디오라도 여러 비트레이트를 가진 하나 이상의 변환본을 제공해야합니다.\n\n**어떻게?** 3856K와 2000K 사이의 비트레이트를 가진 변환본을 생성하기.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-minrate 964K -maxrate 3856K -bufsize 2000K \\\nbunny_1080p_60fps_transrating_964_3856.mp4\n```\n\n보통 트랜스레이팅(transrating)은 트랜스사이징(transsizing)과 함께 사용합니다. Werner Robitza가 작성한 또 다른 필독/실행물 [FFmpeg rate 제어에 대한 연재 포스팅](http://slhck.info/posts/)가 있습니다.\n\n## 트랜스사이징 (Transsizing)\n\n![transsizing](/img/transsizing.png)\n\n**무엇인가?** 하나의 해상도에서 다른 것으로 변환하는 작업. 이전에 언급한 것처럼 트랜스사이징(transsizing)은 주로 트랜스레이팅(transrating)과 함께 사용됩니다.\n\n**왜?** 트랜스레이팅(transrating)에서의 이유와 동일함.\n\n**어떻게?** `1080p`의 해상도를 `480p`로 변환하기.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-vf scale=480:-1 \\\nbunny_1080p_60fps_transsizing_480.mp4\n```\n\n## 보너스: 적응형 스트리밍 (Adaptive Streaming)\n\n![adaptive streaming](/img/adaptive-streaming.png)\n\n**무엇인가?** 다양한 (비트레이트의) 해상도를 생성하고 미디어들을 여러 청크로 나눠서 http를 통해 서비스하는 작업.\n\n**왜?** 저사양 스마트폰 혹은 4K TV에서 시청할 수 있는 유연한 미디어를 제공하기 위해. 또한 이렇게 하면 확장이나 배포하기가 쉽습니다. 다만 지연시간이 생길 수 있습니다.\n\n**어떻게?** DASH를 이용하여 적응형 WebM을 생성하기.\n```bash\n# video streams\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm\n\n# audio streams\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm\n\n# the DASH manifest\n$ ffmpeg \\\n -f webm_dash_manifest -i video_160x90_250k.webm \\\n -f webm_dash_manifest -i video_320x180_500k.webm \\\n -f webm_dash_manifest -i video_640x360_750k.webm \\\n -f webm_dash_manifest -i video_640x360_1000k.webm \\\n -f webm_dash_manifest -i video_1280x720_500k.webm \\\n -f webm_dash_manifest -i audio_128k.webm \\\n -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \\\n -f webm_dash_manifest \\\n -adaptation_sets \"id=0,streams=0,1,2,3,4 id=1,streams=5\" \\\n manifest.mpd\n```\n\nPS: 저는 이 예제를 [DASH를 이용한 Adaptive WebM 재생에 대한 지침](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash)에서 가져왔습니다.\n\n## 더 들어가기\n\n[FFmpeg에 대한 아주 수많은 다른 사용방법들이](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly) 있습니다.\n저는 이걸 YouTube 용 동영상들을 만들고/편집하는데 *iMovie*와 함께 사용합니다. 물론 여러분도 프로페셔널처럼 사용하실 수 있습니다.\n\n# 삽질하면서 FFmpeg libav 배우기\n\n> 가끔 '소리나는 것과 보이는 것이' 궁금하지 않으세요?\n> **David Robert Jones**\n\n[FFmpeg](#ffmpeg---command-line)는 미디어 파일들에 대한 필수 작업들을 수행하는 명령줄 도구로써 매우 유용합니다. 어떻게 우리의 프로그램에 이용할 수 있을까요?\n\nFFmpeg는 우리의 프로그램에 통합될 수 있는 [여러 라이브러리들로 구성](https://www.ffmpeg.org/doxygen/trunk/index.html)되어있습니다.\n보통, FFmpeg을 설치할때 이 모든 라이브러리들도 자동으로 설치됩니다. 이 라이브러리 모음들을 **FFmpeg libav**라고 해보죠.\n\n> 이 제목은 Zed Shaw의 [Learn X the Hard Way](https://learncodethehardway.org/) 시리즈, 특히 그의 책 Learn C the Hard Way에 대한 오마주입니다.\n\n## 챕터 0 - 악명 높은 hello world\n이 hello world는 실제로 `\"hello world\"` 메시지를 터미널에 보여주진 않습니다. :tongue:\n대신 우리는 **비디오의 정보를 출력**할 것입니다. 비디오의 포맷 (컨테이너), 길이, 해상도, 오디오 채널들 같은 것들을 말이죠. 그리고 마지막으로 **몇몇 프레임들을 디코딩하고 이미지 파일로 저장**해보겠습니다.\n\n### FFmpeg libav 아키텍처\n\n하지만 코딩을 시작하기 전에, **FFmpeg libav 아키텍처**가 어떻게 동작하는지 이것들의 컴포넌트들이 서로 어떻게 통신하는지를 배워봅시다. \n\n여기 비디오가 디코딩되는 프로세스를 담은 다이어그램이 하나 있습니다.\n\n![ffmpeg libav architecture - decoding process](/img/decoding.png)\n\n우선 여러분의 미디어 파일을 [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) (비디오 컨테이너는 포맷이라고도 합니다)라고 불리는 컴포넌트로 불러올 필요가 있습니다.\n이건 사실 파일 전체를 불러오는건 아닙니다: 종종 헤더만을 읽죠.\n\n일단 최소한의 **컨테이너 헤더**를 불러왔다면, 우리는 이것의 스트림 (기본적이고 필수적인 오디오와 비디오 데이터라고 간주하시면 됩니다)에 접근할 수 있습니다.\n각 스트림은 [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html)라고 하는 컴포넌트로 접근 가능합니다.\n\n> 스트림은 데이터의 연속적인 흐름을 의미하는 fancy한 이름입니다.\n\n비디오가 두개의 스트림을 가지고 있다고 해봅시다: 오디오는 [AAC CODEC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding)로 인코딩되어있고 비디오는 [H264 (AVC) CODEC](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC)로 인코딩되어있습니다. 각 스트림으로부터 [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) 컴포넌트로 로드될 패킷이라 칭하는 **데이터의 조각들**을 추출할 수 있습니다.\n\n**패킷안의 데이터는 여전히 인코딩되어 있습니다** (압축된상태). 이 패킷을 디코딩하기 위해서 우리는 이것들을 특정한 [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html)에 넘겨야합니다.\n\n`AVCodec`은 그것들을 [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html)으로 디코딩하며 최종적으로 우리에게 **압축 해제된 프레임**을 넘겨줍니다. 오디오 및 비디오 스트림에서 동일한 용어/프로세스가 사용된다는 점을 유의하십시오.\n\n### 요구 사항\n\n간혹 예제를 컴파일하고 실행하는데 이슈들을 겪는 분들이 계셔서 **우리의 개발/실행 환경으로 [`Docker`](https://docs.docker.com/install/)를 사용할 것입니다,** 우리는 또한 big buck bunny 비디오를 사용할 것인데 따로 로컬에 가지고 있지 않다면 `make fetch_small_bunny_video` 명령만 실행해주시면 됩니다.\n\n### 챕터 0 - 몸풀기 코드\n\n> #### TLDR; [코드](/0_hello_world.c)랑 실행하는거나 보여주세요.\n> ```bash\n> $ make run_hello\n> ```\n> 좀 상세한 부분은 넘어가겠습니다. 그러나 걱정하진 마세요: [소스 코드는 github에 있습니다](/0_hello_world.c). \n\n포맷 (컨테이너)에 관한 정보를 담고 있는 [`AVFormatContext`](http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) 컴포넌트에게 메모리를 할당합니다.\n\n```c\nAVFormatContext *pFormatContext = avformat_alloc_context();\n```\n\n이제 우리는 파일을 열고 헤더를 읽어서 `AVFormatContext`에 포맷에 관한 기본적인 정보를 채워줄 것입니다 (보통 코덱은 열리지 않음).\n이를 위해 사용할 함수는 [`avformat_open_input`](http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49)입니다. 이 함수는 `AVFormatContext`와 `filename` 두개의 옵셔널 인자를 받습니다: [`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html) (`NULL`을 넘기면 FFmpeg이 포맷을 추측)과 [`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html) (demuxer에 대한 옵션)\n\n```c\navformat_open_input(&pFormatContext, filename, NULL, NULL);\n```\n\n포맷 이름과 미디어 길이를 출력할 수 있습니다:\n\n```c\nprintf(\"Format %s, duration %lld us\", pFormatContext->iformat->long_name, pFormatContext->duration);\n```\n\n`streams`에 접근하기 위해서는, 미디어로부터 데이터를 읽어야합니다. [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb) 함수가 그 일을 하죠.\n`pFormatContext->nb_streams`가 스트림의 개수를 가지고 있고 `pFormatContext->streams[i]`는 `i`번째 스트림 ([`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html))을 반환합니다.\n\n```c\navformat_find_stream_info(pFormatContext,  NULL);\n```\n\n이제 모든 스트림에 대해 루프를 돌아보겠습니다.\n\n```c\nfor (int i = 0; i < pFormatContext->nb_streams; i++)\n{\n  //\n}\n```\n\n각 스트림에 대해서, `i`번째 스트림에 사용된 코덱 속성들을 담고있는 [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html)를 가져오겠습니다.\n\n```c\nAVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar;\n```\n\n이 코덱 속성을 이용하여 [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) 함수를 통해 적절한 코덱을 찾을 수 있습니다. 코덱 id에 맞는 등록된 디코더를 찾고 스트림을 어떻게 en**CO**de와 **DEC**ode할지를 알고 있는 [`AVCodec`](http://ffmpeg.org/doxygen/trunk/structAVCodec.html) 컴포넌트를 반환받을 수 있습니다.\n\n```c\nAVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);\n```\n\n이제 코덱에 관한 정보를 출력할 수 있습니다.\n\n```c\n// specific for video and audio\nif (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {\n  printf(\"Video Codec: resolution %d x %d\", pLocalCodecParameters->width, pLocalCodecParameters->height);\n} else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {\n  printf(\"Audio Codec: %d channels, sample rate %d\", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate);\n}\n// general\nprintf(\"\\tCodec %s ID %d bit_rate %lld\", pLocalCodec->long_name, pLocalCodec->id, pLocalCodecParameters->bit_rate);\n```\n\n이 코덱을 기반으로 디코딩/인코딩 프로세스에 대한 컨텍스트를 담고있는 [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html)의 메모리를 할당할 수 있습니다. 그 다음 코덱 파라미터로 코덱 컨텍스트를 채워줍니다; [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16)로 가능합니다.\n\n일단 코덱 컨텍스트를 채웠다면 이제 코덱을 열 수 있습니다. [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d)로 가능합니다. \n\n```c\nAVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);\navcodec_parameters_to_context(pCodecContext, pCodecParameters);\navcodec_open2(pCodecContext, pCodec, NULL);\n```\n\n이제 스트림으로부터 패킷을 읽고 디코딩하여 프레임으로 만들어볼 예정입니다. 그러나 우선, [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html)와 [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) 두 컴포넌트에 대해 메모리 할당이 필요합니다.\n\n```c\nAVPacket *pPacket = av_packet_alloc();\nAVFrame *pFrame = av_frame_alloc();\n```\n\n패킷이 존재하는 동안 루프를 돌면서 [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) 함수를 이용해 스트림으로부터 패킷을 받아오겠습니다. \n\n```c\nwhile (av_read_frame(pFormatContext, pPacket) >= 0) {\n  //...\n}\n```\n\n코덱 컨텍스트를 [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3) 함수를 통해 디코더에 **raw 데이터 패킷 (압축된 프레임)을 보내**봅시다.\n\n```c\navcodec_send_packet(pCodecContext, pPacket);\n```\n\n그리고 마찬가지로 코덱 컨텍스트를 [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c) 함수를 통해 디코더로부터 **raw 데이터 프레임 (압축 해제된 프레임)를 받아**봅시다.\n\n```c\navcodec_receive_frame(pCodecContext, pFrame);\n```\n\n프레임 번호, [PTS](https://en.wikipedia.org/wiki/Presentation_timestamp), DTS, [프레임 타입](https://en.wikipedia.org/wiki/Video_compression_picture_types) 등을 출력해볼 수 있습니다.\n\n```c\nprintf(\n    \"Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]\",\n    av_get_picture_type_char(pFrame->pict_type),\n    pCodecContext->frame_number,\n    pFrame->pts,\n    pFrame->pkt_dts,\n    pFrame->key_frame,\n    pFrame->coded_picture_number,\n    pFrame->display_picture_number\n);\n```\n\n마지막으로 디코딩된 프레임을 [심플 흑백 이미지](https://en.wikipedia.org/wiki/Netpbm_format#PGM_example)로 저장해볼 수 있습니다. 이 과정은 매우 단순합니다, 인덱스가 [planes Y, Cb, Cr](https://en.wikipedia.org/wiki/YCbCr)를 참조하고 있는 `pFrame->data`를 사용할 것입니다. 우리는 흑백 이미지를 저장하기 위해 `0` (Y) 인덱스를 선택했습니다.\n\n```c\nsave_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename);\n\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename)\n{\n    FILE *f;\n    int i;\n    f = fopen(filename,\"w\");\n    // writing the minimal required header for a pgm file format\n    // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example\n    fprintf(f, \"P5\\n%d %d\\n%d\\n\", xsize, ysize, 255);\n\n    // writing line by line\n    for (i = 0; i < ysize; i++)\n        fwrite(buf + i * wrap, 1, xsize, f);\n    fclose(f);\n}\n```\n\nvoilà! 이제 우리는 2MB짜리 흑백 이미지를 얻어냈습니다:\n\n![saved frame](/img/generated_frame.png)\n\n## 챕터 1 - 오디오와 비디오 동기화\n\n> **플레이어가 되세요** - 신규 MSE 비디오 플레이어를 작성 중인 젊은 JS 개발자\n\n[트랜스코딩 예제 코드](#챕터-3---트랜스코딩-(transcoding))로 넘어가기 전에 **타이밍** 혹은 어떻게 비디오 플레이어가 하나의 프레임을 제시간에 재생해야하는지에 대해서 이야기해봅시다.\n\n지난 예제에서, 우리는 이렇게 보이는 프레임들을 저장했습니다.\n\n![frame 0](/img/hello_world_frames/frame0.png)\n![frame 1](/img/hello_world_frames/frame1.png)\n![frame 2](/img/hello_world_frames/frame2.png)\n![frame 3](/img/hello_world_frames/frame3.png)\n![frame 4](/img/hello_world_frames/frame4.png)\n![frame 5](/img/hello_world_frames/frame5.png)\n\n비디오 플레이어를 디자인 할때 **각 프레임을 주어진 속도에 재생**해야합니다, 그렇지 않으면 너무 빠르거나 너무 느리게 재생되기 때문에 비디오를 제대로 즐기기 어려울 것입니다.\n\n그래서 뭔가 프레임을 원활하게 재생할 수 있는 로직을 소개할 필요가 있습니다. 이 이슈를 위해, 각 프레임은 **프리젠테이션 타임스탬프** (PTS)를 갖게 되는데 이것은 **프레임속도(fps)** 로 나누어지는 **타임베이스(timebase)** 라고 하는 유리수(분모가 **타임스케일(timescale)** 로 알려진)로 구성된(factored) 증가하는 숫자입니다.\n\n예제를 좀 본다면 이해가 더 쉬울 것입니다, 몇개의 시나리오를 시뮬레이션해죠.\n\n`fps=60/1` 이고 `timebase=1/60000` 라면 각 PTS는 `timescale / fps = 1000`를 증가할 것 입니다. 그래서 각 프레임의 **PTS 실제 시간**은 이렇게 됩니다 (0부터 시작한다고 하면):\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033`\n\n동일한 시나리오지만 타임베이스가 `1/60`이라면.\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033`\n* `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050`\n\n`fps=25/1`와 `timebase=1/75`에 대해서는 각 PTS는 `timescale / fps = 3`만큼 증가할 것이고 PTS 시간은 이렇게 될 것 입니다:\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04`\n* `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08`\n* `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12`\n* ...\n* `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96`\n* ...\n* `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56`\n\n이제 이 `pts_time`으로 오디오의 `pts_time` 혹은 시스템 시간과 동기화해서 재생할 방법을 찾을 수 있습니다. FFmpeg libav는 그 정보들을 아래 API를 통해 제공합니다.\n\n- fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad)\n- tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad)\n- tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6)\n\n호기심에 보자면, 우리가 저장했던 프레임들을 DTS 순으로 (frames: 1,6,4,2,3,5) 보내졌지만 재생은 PTS 순 (frames: 1,2,3,4,5)로 되었습니다. 또한, B-프레임이 P 혹은 I-프레임 대비 얼마나 저렴한지도 알 수 있죠.\n\n```\nLOG: AVStream->r_frame_rate 60/1\nLOG: AVStream->time_base 1/60000\n...\nLOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0]\nLOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3]\nLOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4]\nLOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2]\nLOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5]\nLOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1]\n```\n\n## 챕터 2 - 리먹싱 (remuxing)\n\nRemuxing은 하나의 포맷 (컨테이너)에서 다른 것으로 변경하는 작업입니다. 다음 예제처럼 FFmpeg을 쓰면 별로 어렵지 않게 [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) 비디오를 [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream)로 변경할 수 있습니다:\n\n```bash\nffmpeg input.mp4 -c copy output.ts\n```\n\n이것은 mp4를 demux하지만 디코딩이나 인코딩은 하지 않습니다. (`-c copy`) 최종적으로 `mpegts` 파일로 mux할 것입니다. 만약 포맷을 의미하는 `-f`를 제공하지 않으면 ffmpeg은 파일 확장자로 포맷을 추측할 것입니다.\n\nFFmpeg 혹은 libav의 일반적인 사용법은 아래 패턴/아키텍처 또는 워크플로우를 따릅니다: \n* **[프로토콜 레이어](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - `input`을 받음 (예를들면 `file`이지만 `rtmp` 또는 `HTTP` 입력도 가능).\n* **[포맷 레이어](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - 컨텐츠를 `demuxes`, 대부분 메타데이터와 스트림을 열어봄\n* **[코덱 레이어](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - 압축된 스트림 데이터를 `decodes` <sup>*optional*</sup>\n* **[픽셀 레이어](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - raw 프레임에 대해 (리사이징 같은) `filters`를 적용할 수도 있음 <sup>*optional*</sup>\n* and then it does the reverse path\n* **[코덱 레이어](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - raw 프레임을 `encodes` (또는 `re-encodes` 혹은 `transcodes` 까지도) <sup>*optional*</sup>\n* **[포맷 레이어](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - raw 스트림 (압축된 데이터)를 `muxes` (또는 `remuxes`)\n* **[프로토콜 레이어](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - 그리고 마지막으로 muxed된 데이터를 `output`으로 전송 (또다른 파일 혹은 네트워크 원격 서버일 수도 있음)\n\n![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg)\n> 이 그래프는 [Leixiaohua's](http://leixiaohua1020.github.io/#ffmpeg-development-examples)와 [Slhck's](https://slhck.info/ffmpeg-encoding-course/#/9)의 작업으로부터 큰 영감을 받은 것입니다.\n\n자 이제 `ffmpeg input.mp4 -c copy output.ts`와 동일한 효과를 제공할 수 있도록 libav 를 이용한 예제를 하나 구현해봅시다.\n\n입력 (`input_format_context`)으로부터 읽은 것을 다른 출력 (`output_format_context`)으로 변환해보겠습니다.\n\n```c\nAVFormatContext *input_format_context = NULL;\nAVFormatContext *output_format_context = NULL;\n```\n\n일반적으로 메모리 할당을 시작하고 입력 포맷을 엽니다. 이번같은 특정한 경우에는, 입력 파일을 열고나서 출력 파일을 위한 메모리를 할당하겠습니다. \n\n```c\nif ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) {\n  fprintf(stderr, \"Could not open input file '%s'\", in_filename);\n  goto end;\n}\nif ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) {\n  fprintf(stderr, \"Failed to retrieve input stream information\");\n  goto end;\n}\n\navformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename);\nif (!output_format_context) {\n  fprintf(stderr, \"Could not create output context\\n\");\n  ret = AVERROR_UNKNOWN;\n  goto end;\n}\n```\n\n비디오, 오디오, 자막 타입의 스트림만 remux할 것이며 사용하게될 스트림을 인덱스 배열에 들고 있겠습니다. \n\n```c\nnumber_of_streams = input_format_context->nb_streams;\nstreams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list));\n```\n\n필요한만큼의 메모리를 할당한 후, 모든 스트림에 대해 각각 루프를 돌면서 [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) 함수를 통해 출력 포맷 컨텍스트에다가 새로운 출력 스트림을 생성해야합니다. 비디오, 오디오, 자막이 아닌 모든 스트림들에 대해서는 마킹을 해서 나중에 스킵할 수 있게 하겠습니다.\n\n```c\nfor (i = 0; i < input_format_context->nb_streams; i++) {\n  AVStream *out_stream;\n  AVStream *in_stream = input_format_context->streams[i];\n  AVCodecParameters *in_codecpar = in_stream->codecpar;\n  if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {\n    streams_list[i] = -1;\n    continue;\n  }\n  streams_list[i] = stream_index++;\n  out_stream = avformat_new_stream(output_format_context, NULL);\n  if (!out_stream) {\n    fprintf(stderr, \"Failed allocating output stream\\n\");\n    ret = AVERROR_UNKNOWN;\n    goto end;\n  }\n  ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);\n  if (ret < 0) {\n    fprintf(stderr, \"Failed to copy codec parameters\\n\");\n    goto end;\n  }\n}\n```\n\n이제 출력 파일을 생성할 수 있습니다.\n\n```c\nif (!(output_format_context->oformat->flags & AVFMT_NOFILE)) {\n  ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE);\n  if (ret < 0) {\n    fprintf(stderr, \"Could not open output file '%s'\", out_filename);\n    goto end;\n  }\n}\n\nret = avformat_write_header(output_format_context, NULL);\nif (ret < 0) {\n  fprintf(stderr, \"Error occurred when opening output file\\n\");\n  goto end;\n}\n```\n\n그런 후에, 입력 스트림에서 패킷을 하나씩 출력 스트림으로 복사하겠습니다. 패킷이 존재하는 동안 (`av_read_frame`), 각 패킷에 대해 PTS와 DTS를 다시 계산하고 마지막으로 포맷 컨텍스트에 (`av_interleaved_write_frame`) 씁니다.\n\n```c\nwhile (1) {\n  AVStream *in_stream, *out_stream;\n  ret = av_read_frame(input_format_context, &packet);\n  if (ret < 0)\n    break;\n  in_stream  = input_format_context->streams[packet.stream_index];\n  if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) {\n    av_packet_unref(&packet);\n    continue;\n  }\n  packet.stream_index = streams_list[packet.stream_index];\n  out_stream = output_format_context->streams[packet.stream_index];\n  /* copy packet */\n  packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);\n  // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903\n  packet.pos = -1;\n\n  //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1\n  ret = av_interleaved_write_frame(output_format_context, &packet);\n  if (ret < 0) {\n    fprintf(stderr, \"Error muxing packet\\n\");\n    break;\n  }\n  av_packet_unref(&packet);\n}\n```\n\n마무리를 위해 [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13) 함수를 통해 스트림 트레일러(trailer)를 출력 미디어 파일에 씁니다.\n\n```c\nav_write_trailer(output_format_context);\n```\n\n이제 테스트할 준비가 되었습니다. 첫번째 테스트는 MP4에서 MPEG-TS 비디오 파일로의 포맷 (비디오 컨테이너) 변환입니다. 우리는 기본적으로 `ffmpeg input.mp4 -c copy output.ts` 명령줄을 libav를 이용해 만든 것입니다.\n\n```bash\nmake run_remuxing_ts\n```\n\n동작합니다!!! 절 믿지 않았나요?! 그러시면 안되죠, `ffprobe`로 한번 확인해보겠습니다:\n\n```bash\nffprobe -i remuxed_small_bunny_1080p_60fps.ts\n\nInput #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts':\n  Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s\n  Program 1\n    Metadata:\n      service_name    : Service01\n      service_provider: FFmpeg\n    Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc\n    Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s\n```\n\n우리가 했던 것을 그래프로 정리하기 위해, 초반 [libav의 동작 방식에 대한 아이디어](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-li기bav-architecture)를 다시 한번 살펴보면 코덱 부분만 건너뛴걸 볼 수 있습니다.\n\n![remuxing libav components](/img/remuxing_libav_components.png)\n\n이 챕터를 끝내기 전에 리먹싱(remuxing) 프로세스의 중요한 부분을 보여드리고자 합니다, **muxer에 옵션을 줄 수 있다**는 것인데요. 만약에 전송을 [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding) 포맷으로 하고 싶다면 MPEG-TS나 기본 MPEG-4 대신 (`fmp4`라고 부르는) [fragmented mp4](https://stackoverflow.com/a/35180327)를 사용해야합니다.\n\n[명령줄로는 이렇게 쉽게 할 수 있습니다](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting).\n\n```\nffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4\n```\n\nlibav 버전도 거의 명령줄 만큼이나 쉽습니다. 패킷을 복사하기 바로 전, 출력 헤더를 쓸때 해당 옵션을 넘겨주기만 하면 됩니다. \n\n```c\nAVDictionary* opts = NULL;\nav_dict_set(&opts, \"movflags\", \"frag_keyframe+empty_moov+default_base_moof\", 0);\nret = avformat_write_header(output_format_context, &opts);\n```\n\n이제 fragmented mp4 파일을 생성할 수 있습니다:\n\n```bash\nmake run_remuxing_fragmented_mp4\n```\n\n제가 여러분께 거짓말하고 있지 않다는걸 보여드리죠. 결과물의 차이를 확인하기 위해 [gpac/mp4box.js](http://download.tsi.telecom-paristech.fr/gpac/mp4box.js/filereader.html) 혹은 [http://mp4parser.com/](http://mp4parser.com/) 같은 아주 훌륭한 사이트/툴을 이용할 수 있습니다. 일단 \"common\" mp4 파일을 로드해보세요. \n\n![mp4 boxes](/img/boxes_normal_mp4.png)\n\n보시다시피 단 하나의 `mdat` 박스(atom)가 있습니다, **여기에 비디오와 오디오 프레임이 담겨있습니다**. 이번엔 fragmented mp4를 로드해서 `mdat` 박스가 어떻게 흩어져있는지 보시겠습니다.\n\n![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png)\n\n## 챕터 3 - 트랜스코딩 (transcoding)\n\n> #### TLDR; [코드](/3_transcoding.c)랑 실행하는거나 보여주세요.\n> ```bash\n> $ make run_transcoding\n> ```\n> 좀 상세한 부분은 넘어가겠습니다, 그러나 걱정하진 마세요: [소스 코드는 github에 있습니다](/3_transcoding.c). \n\n이번 챕터에서는 아주 간단한 트랜스코더를 만들어보겠습니다, C로 작성할 것이고, 이것으로 H264로 인코딩된 비디오를 H265로 변환할 수 있을겁니다. **FFmpeg/libav** 라이브러리, 특히 [libavcodec](https://ffmpeg.org/libavcodec.html), libavformat, libavutil를 이용하겠습니다. \n\n![media transcoding flow](/img/transcoding_flow.png)\n\n> _빠르게 복습해보면:_ [**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html)는 컨테이너 (ex: MKV, MP4, Webm, TS) 같은 미디어 파일 포맷에 대한 추상화 구조체입니다. [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html)는 주어진 포맷 (ex: 오디오, 비디오, 자막, 메타데이터)에 대한 각 데이터 유형을 나타냅니다. [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html)은 `AVStream`으로부터 얻어진 압축된 데이터의 조각입니다. 그리고 이것은 [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html) (ex: av1, h264, vp9, hevc)에 의해 디코딩되어 [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html)라고 불리는 raw 데이터로 만들어집니다.\n\n### 트랜스먹싱 (Transmuxing)\n\n간단한 트랜스먹싱 작업을 시작해봅시다. 그리고나서 이 코드 기반으로 빌드할 수 있을겁니다. 첫번째 단계는 **입력 파일 로드하기**입니다.\n\n```c\n// Allocate an AVFormatContext\navfc = avformat_alloc_context();\n// Open an input stream and read the header.\navformat_open_input(avfc, in_filename, NULL, NULL);\n// Read packets of a media file to get stream information.\navformat_find_stream_info(avfc, NULL);\n```\n\n이제 디코더를 설정할 것인데, `AVFormatContext`가 모든 `AVStream` 컴포넌트에 접근할 수 있게 해줄 것입니다. 그리고 각각의 스트림에 대해서, `AVCodec`을 가져와서 특정 `AVCodecContext`를 생성합니다. 그리고 마지막으로 주어진 코덱을 열게되고 디코딩 프로세스를 수행할 수 있습니다.\n\n> [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html)는 비트레이트, 프레임 속도, 샘플레이트, 채널, 높이 등과 같은 미디어 설정에 대한 데이터를 가지고 있습니다.\n\n```c\nfor (int i = 0; i < avfc->nb_streams; i++)\n{\n  AVStream *avs = avfc->streams[i];\n  AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id);\n  AVCodecContext *avcc = avcodec_alloc_context3(*avc);\n  avcodec_parameters_to_context(*avcc, avs->codecpar);\n  avcodec_open2(*avcc, *avc, NULL);\n}\n```\n\n마찬가지로 트랜스먹싱에서도 출력 미디어 파일을 준비해둬야합니다, 우선 출력 `AVFormatContext`에 대해 **메모리를 할당**합니다. 이 출력 포맷에 **각 스트림**을 생성합니다. 스트림을 제대로 적재시키기 위해 디코더로부터 **코덱 파라미터를 복사**합니다.\n\n인코더가 글로벌 헤더를 사용할 수 있도록 지정하는 `AV_CODEC_FLAG_GLOBAL_HEADER` **플래그를 설정**합니다. 그리고 출력으로 **쓰기 위한 파일**을 열고 헤더를 저장합니다.\n\n```c\navformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename);\n\nAVStream *avs = avformat_new_stream(encoder_avfc, NULL);\navcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar);\n\nif (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER)\n  encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;\n\navio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE);\navformat_write_header(encoder->avfc, &muxer_opts);\n\n```\n\n디코더로부터 `AVPacket`을 얻어서, 타임스탬프를 조정하고, 패킷을 출력 파일에 제대로 씁니다. `av_interleaved_write_frame` 이 함수 이름이 \"write frame\"라고 되어있긴 하지만 이것은 패킷을 저장합니다 . 이제 파일에 스트림 트레일러를 쓰면서 트랜스먹싱 프로세스를 마무리합니다. \n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base);\n  av_interleaved_write_frame(*avfc, input_packet) < 0));\n}\n\nav_write_trailer(encoder_avfc);\n```\n\n### 트랜스코딩 (Transcoding)\n\n이전 섹션에서 간단한 트랜스먹서 프로그램을 봤는데요, 이번엔 여기에 인코딩을 기능을 추가해보겠습니다. 특히, `h264`에서 `h265`로 비디오를 트랜스코딩할 수 있게 하겠습니다.\n\n디코더를 준비한 후, 그리고 출력 미디어 파일을 다루기 전에 인코더를 설정할 것입니다.\n\n* 인코더에 비디오 `AVStream`를 생성합니다, [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827)\n* `libx265`라고 하는 `AVCodec`을 사용합니다, [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37)\n* 생성한 코덱을 기반으로 `AVCodecContext`를 생성합니다,[`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315)\n* 트랜스코딩 세션에 대해 기본적인 속성들을 설정합니다, 그리고\n* 코덱을 열고 컨텍스트에서 스트림으로 파라미터를 복사합니다. [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d), [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe)\n\n```c\nAVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL);\nAVStream *video_avs = avformat_new_stream(encoder_avfc, NULL);\n\nchar *codec_name = \"libx265\";\nchar *codec_priv_key = \"x265-params\";\n// we're going to use internal options for the x265\n// it disables the scene change detection and fix then\n// GOP on 60 frames.\nchar *codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\nAVCodec *video_avc = avcodec_find_encoder_by_name(codec_name);\nAVCodecContext *video_avcc = avcodec_alloc_context3(video_avc);\n// encoder codec params\nav_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0);\nvideo_avcc->height = decoder_ctx->height;\nvideo_avcc->width = decoder_ctx->width;\nvideo_avcc->pix_fmt = video_avc->pix_fmts[0];\n// control rate\nvideo_avcc->bit_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_buffer_size = 4 * 1000 * 1000;\nvideo_avcc->rc_max_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_min_rate = 2.5 * 1000 * 1000;\n// time base\nvideo_avcc->time_base = av_inv_q(input_framerate);\nvideo_avs->time_base = sc->video_avcc->time_base;\n\navcodec_open2(sc->video_avcc, sc->video_avc, NULL);\navcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc);\n```\n\n비디오 스트림의 트랜스코딩을 위해 디코딩 루프를 확장해야합니다.\n\n* 디코더에 빈 `AVPacket`를 전송합니다, [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3)\n* 압축이 해제된 `AVFrame`를 받아옵니다, [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c)\n* 이 raw 프레임의 트랜스코딩을 시작합니다,\n* raw 프레임을 (인코더에) 보내고, [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169)\n* 코덱에 맞게 압축된 `AVPacket`을 받아옵니다, [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga5b8eff59cf259747cf0b31563e38ded6)\n* 타임스탬프를 설정하고, [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e)\n* 패킷을 출력 파일에 씁니다. [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1)\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  int response = avcodec_send_packet(decoder_video_avcc, input_packet);\n  while (response >= 0) {\n    response = avcodec_receive_frame(decoder_video_avcc, input_frame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return response;\n    }\n    if (response >= 0) {\n      encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index);\n    }\n    av_frame_unref(input_frame);\n  }\n  av_packet_unref(input_packet);\n}\nav_write_trailer(encoder_avfc);\n\n// used function\nint encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) {\n  AVPacket *output_packet = av_packet_alloc();\n  int response = avcodec_send_frame(video_avcc, input_frame);\n\n  while (response >= 0) {\n    response = avcodec_receive_packet(video_avcc, output_packet);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return -1;\n    }\n\n    output_packet->stream_index = index;\n    output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den;\n\n    av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base);\n    response = av_interleaved_write_frame(avfc, output_packet);\n  }\n  av_packet_unref(output_packet);\n  av_packet_free(&output_packet);\n  return 0;\n}\n\n```\n\n아시다시피 `h265` 버전의 미디어 파일이 `h264`보다 사이즈가 작기 때문에 미디어 스트림을 `h264`에서 `h265`로 변환했습니다. 하지만 [작성한 프로그램](/3_transcoding.c)은 다음의 작업들도 수행할 수 있습니다:\n\n```c\n\n  /*\n   * H264 -> H265\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx265\";\n  sp.codec_priv_key = \"x265-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - fragmented MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.muxer_opt_key = \"movflags\";\n  sp.muxer_opt_value = \"frag_keyframe+empty_moov+delay_moov+default_base_moof\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> AAC\n   * MP4 - MPEG-TS\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 0;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.audio_codec = \"aac\";\n  sp.output_extension = \".ts\";\n\n  /* WIP :P  -> it's not playing on VLC, the final bit rate is huge\n   * H264 -> VP9\n   * Audio -> Vorbis\n   * MP4 - WebM\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 0;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libvpx-vp9\";\n  //sp.audio_codec = \"libvorbis\";\n  //sp.output_extension = \".webm\";\n\n```\n\n> 이제서야 솔직히 말하자면, [제가 생각했던 것보다 더 삽질](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/53)했는데요. [FFmpeg 명령줄 소스코드](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749)를 파봐야했고 테스트도 엄청 돌려봤습니다. 그리고 제가 뭔가 놓치는게 있는 것 같은데요, 왜냐하면 `force-cfr`을 강제로 넣어줘야지만 `h264`가 작용하고 `warning messages (forced frame type (5) at 80 was changed to frame type (3))` 같은 경고 메시지도 여전히 나고 있기 때문이죠.\n"
  },
  {
    "path": "README-pt.md",
    "content": "[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)\n\nEu estava procurando por um tutorial/livro que me ensinasse a começar a usar o [FFmpeg](https://www.ffmpeg.org/) como biblioteca (também conhecida como libav) e então encontrei o tutorial [\"Como escrever um player de vídeo em menos de 1k linhas\"](http://dranger.com/ffmpeg/).\nInfelizmente, ele foi descontinuado, então decidi escrever este.\n\nA maior parte do código aqui será em C **mas não se preocupe**: você pode facilmente entender e aplicá-lo à sua linguagem preferida.\nO FFmpeg libav tem muitas ligações para várias linguagens, como [python](https://pyav.org/), [go](https://github.com/imkira/go-libav) e mesmo que sua linguagem não tenha, ainda é possível suportá-la através do `ffi` (aqui está um exemplo com [Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua)).\n\nComeçaremos com uma breve lição sobre o que é vídeo, áudio, codec e contêiner e depois faremos um curso intensivo sobre como usar a linha de comando do `FFmpeg` e, finalmente, escreveremos código. Sinta-se à vontade para pular diretamente para a seção [Aprenda o FFmpeg libav do jeito difícil.](#aprenda-o-ffmpeg-libav-do-modo-difícil)\n\nAlgumas pessoas costumavam dizer que o streaming de vídeo na internet é o futuro da TV tradicional, de qualquer forma, o FFmpeg é algo que vale a pena estudar.\n\n__Índice__\n\n* [Introdução](#introdução)\n\t* [Vídeo - O que você vê!](#vídeo---o-que-você-vê)\n\t* [Áudio - O que você ouve!](#áudio---o-que-você-ouve)\n\t* [Codec - reduzindo dados](#codec---reduzindo-dados)\n\t* [Container - um lugar confortável para áudio e vídeo](#container---um-lugar-confortável-para-áudio-e-vídeo)\n* [FFmpeg - linha de comando](#ffmpeg---linha-de-comando)\n\t* [Ferramenta de linha de comando do FFmpeg 101](#ferramenta-de-linha-de-comando-do-ffmpeg-101)\n* [Operações comuns de vídeo](#operações-comuns-de-vídeo)\n\t* [Transcodificação](#transcodificação)\n\t* [Transmuxing](#transmuxing)\n\t* [Transcodificação de Taxa de Bits](#transcodificação-de-taxa-de-bits)\n\t* [Transdimensionamento](#transdimensionamento)\n\t* [Bônus: Streaming Adaptativo](#bônus-streaming-adaptativo)\n\t* [Indo além](#indo-além)\n* [Aprenda o FFmpeg libav do modo difícil](#aprenda-o-ffmpeg-libav-do-modo-difícil)\n  \t* [Capítulo 0 - O famoso \"hello world\"](#capítulo-0---o-famoso-hello-world)\n\t\t* [Arquitetura da biblioteca FFmpeg libav](#arquitetura-da-biblioteca-ffmpeg-libav)\n\t* [Capítulo 1 - Sincronização de áudio e vídeo](#capítulo-1---sincronização-de-áudio-e-vídeo)\n\t* [Capítulo 2 - Remuxing](#capítulo-2---remuxing)\n\t* [Capítulo 3 - Transcoding](#capítulo-3---transcoding)\n\n# Introdução\n\n## Vídeo - O que você vê!\n\nSe você tem uma sequência de imagens e as altera com uma determinada frequência (digamos [24 imagens por segundo](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)), você criará uma [ilusão de movimento](https://en.wikipedia.org/wiki/Persistence_of_vision).\nEm resumo, essa é a ideia básica por trás de um vídeo: **uma série de imagens/quadros sendo executados a uma determinada taxa**.\n\n<img src=\"https://upload.wikimedia.org/wikipedia/commons/1/1f/Linnet_kineograph_1886.jpg\" title=\"flip book\" height=\"280\"></img>\n\nZeitgenössische Illustration (1886)\n\n## Áudio - O que você ouve!\n\nEmbora um vídeo sem som possa expressar uma variedade de sentimentos, adicionar som a ele traz mais prazer à experiência.\n\nO som é a vibração que se propaga como uma onda de pressão, através do ar ou qualquer outro meio de transmissão, como um gás, líquido ou sólido.\n\n> Em um sistema de áudio digital, um microfone converte o som em um sinal elétrico analógico, em seguida, um conversor analógico-digital (ADC) - tipicamente usando [modulação por código de pulso (PCM)](https://en.wikipedia.org/wiki/Pulse-code_modulation) - converte o sinal analógico em um sinal digital.\n\n![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png \"audio analog to digital\")\n>[Fonte](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg)\n\n## Codec - reduzindo dados\n\n> CODEC é um circuito eletrônico ou software que **comprime ou descomprime áudio/vídeo digital.** Ele converte áudio/vídeo digital bruto (não comprimido) para um formato comprimido ou vice-versa.\n> https://en.wikipedia.org/wiki/Video_codec\n\nMas se escolhermos empacotar milhões de imagens em um único arquivo e chamá-lo de filme, podemos acabar com um arquivo enorme. Vamos fazer as contas:\n\nSuponha que estamos criando um vídeo com resolução `1080 x 1920` (altura x largura) e que gastaremos `3 bytes` por pixel (o ponto mínimo em uma tela) para codificar a cor (ou [cor de 24 bits](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29), o que nos dá 16.777.216 cores diferentes) e este vídeo é executado a `24 quadros por segundo` e tem `30 minutos` de duração.\n\n```c\ntoppf = 1080 * 1920 // total_de_pixels_por_quadro\ncpp = 3 //custo_por_pixel\ntis = 30 * 60 //tempo_em_segundos\nfps = 24 //quadros_por_segundo\n\narmazenamento_necessário = tis * fps * toppf * cpp\n```\n\nEste vídeo exigiria aproximadamente `250,28 GB` de armazenamento ou `1,19 Gbps` de largura de banda! É por isso que precisamos usar um [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work).\n\n## container - um lugar confortável para áudio e vídeo\n\n> Um formato de container ou envoltório é um formato de metafile cuja especificação descreve como diferentes elementos de dados e metadados coexistem em um arquivo de computador.\n> https://en.wikipedia.org/wiki/Digital_container_format\n\nUm **único arquivo que contém todos os fluxos** (principalmente áudio e vídeo) e também fornece **sincronização e metadados gerais**, como título, resolução, entre outros.\n\nNormalmente, podemos inferir o formato de um arquivo ao olhar para sua extensão: por exemplo, um `video.webm` provavelmente é um vídeo usando o container [`webm`](https://www.webmproject.org/).\n\n![container](/img/container.png)\n\n# FFmpeg - linha de comando\n\n> Uma solução completa e multiplataforma para gravar, converter e transmitir áudio e vídeo.\n\nPara trabalhar com multimídia, podemos usar a FERRAMENTA/BIBLIOTECA incrível chamada [FFmpeg](https://www.ffmpeg.org/). Provavelmente, você já a conhece/usa diretamente ou indiretamente (você usa o [Chrome?](https://www.chromium.org/developers/design-documents/video)).\n\nEle tem um programa de linha de comando chamado `ffmpeg`, um binário muito simples, porém poderoso.\nPor exemplo, você pode converter de `mp4` para o contêiner `avi` apenas digitando o seguinte comando:\n\n```bash\n$ ffmpeg -i input.mp4 output.avi\n```\n\nAcabamos de fazer um **remuxing** aqui, que é converter de um contêiner para outro.\nTecnicamente, o FFmpeg também poderia estar fazendo uma transcodificação, mas falaremos sobre isso mais tarde.\n\n## Ferramenta de linha de comando do FFmpeg 101\n\nO FFmpeg possui uma [documentação](https://www.ffmpeg.org/ffmpeg.html) que faz um ótimo trabalho explicando como ele funciona.\n\n```bash\n# você também pode procurar a documentação usando a linha de comando\n\nffmpeg -h full | grep -A 10 -B 10 avoid_negative_ts\n```\n\nResumidamente, o programa de linha de comando do FFmpeg espera o seguinte formato de argumento para executar suas ações: `ffmpeg {1} {2} -i {3} {4} {5}`, onde:\n\n1. opções globais\n2. opções do arquivo de entrada\n3. URL de entrada\n4. opções do arquivo de saída\n5. URL de saída\n\nAs partes 2, 3, 4 e 5 podem ser quantas você precisar.\nÉ mais fácil entender esse formato de argumento na prática:\n\n```bash\n# ATENÇÃO: este arquivo tem cerca de 300MB\n$ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4\n\n$ ffmpeg \\\n-y \\ # opções globais\n-c:a libfdk_aac \\ # opções de entrada\n-i bunny_1080p_60fps.mp4 \\ # URL de entrada\n-c:v libvpx-vp9 -c:a libvorbis \\ # opções de saída\nbunny_1080p_60fps_vp9.webm # URL de saída\n```\nEste comando leva um arquivo de entrada `mp4` contendo dois fluxos (um áudio codificado com `aac` CODEC e um vídeo codificado usando `h264` CODEC) e o converte para `webm`, mudando seus CODECs de áudio e vídeo também.\n\nPodemos simplificar o comando acima, mas esteja ciente de que o FFmpeg adotará ou adivinhará os valores padrão para você.\nPor exemplo, quando você apenas digita `ffmpeg -i input.avi output.mp4`, que CODEC de áudio/vídeo ele usa para produzir o `output.mp4`?\n\nWerner Robitza escreveu um [tutorial obrigatório para ler/executar sobre codificação e edição com FFmpeg](http://slhck.info/ffmpeg-encoding-course/#/).\n\n# Operações comuns de vídeo\n\nAo trabalhar com áudio/vídeo, geralmente realizamos um conjunto de tarefas com a mídia.\n\n## Transcodificação\n\n![transcodificação](/img/transcoding.png)\n\n**O que é?** É o ato de converter um dos fluxos (áudio ou vídeo) de um CODEC para outro.\n\n**Por que?** Às vezes, alguns dispositivos (TVs, smartphones, consoles etc.) não suportam X, mas sim Y, e os novos CODECs fornecem melhor taxa de compressão.\n\n**Como?** Convertendo um vídeo `H264` (AVC) para `H265` (HEVC).\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c:v libx265 \\\nbunny_1080p_60fps_h265.mp4\n```\n\n## Transmuxing\n\n![transmuxing](/img/transmuxing.png)\n\n**O que é?** É o ato de converter de um formato (container) para outro.\n\n**Por que?** Às vezes, alguns dispositivos (TVs, smartphones, consoles, etc.) não suportam o formato X, mas suportam o Y e, às vezes, os novos formatos (containers) fornecem recursos modernos necessários.\n\n**Como?** Converter um arquivo `mp4` para `ts`.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c copy \\ # just saying to ffmpeg to skip encoding\nbunny_1080p_60fps.ts\n```\n\n## Transcodificação de Taxa de Bits\n\n![transrating](/img/transrating.png)\n\n**O que é?** É a alteração da taxa de bits de um vídeo, ou a produção de outras versões do mesmo vídeo.\n\n**Por que fazer?** As pessoas podem tentar assistir ao seu vídeo em uma conexão de rede `2G` (edge) usando um smartphone menos potente ou em uma conexão de fibra óptica em suas TVs 4K. Portanto, você deve oferecer mais de uma versão do mesmo vídeo com diferentes taxas de bits.\n\n**Como fazer?** Produzindo uma versão com taxa de bits entre 3856K e 2000K.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-minrate 964K -maxrate 3856K -bufsize 2000K \\\nbunny_1080p_60fps_transrating_964_3856.mp4\n```\n\nGeralmente, a transcodificação de taxa de bits é usada em conjunto com a transcodificação de tamanho de vídeo. Werner Robitza escreveu outra série de posts que deve ser lida/executada sobre o controle de taxa do FFmpeg (http://slhck.info/posts/).\n\n## Transdimensionamento\n\n![transsizing](/img/transsizing.png)\n\n**O que é?** a ação de converter de uma resolução para outra. Como mencionado antes, o transdimensionamento é frequentemente usado junto com o transrating.\n\n**Por quê?** as razões são as mesmas que para o transrating.\n\n**Como?** convertendo uma resolução `1080p` para `480p`.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-vf scale=480:-1 \\\nbunny_1080p_60fps_transsizing_480.mp4\n```\n\n## Bônus: Streaming Adaptativo\n\n![Streaming adaptativo](/img/adaptive-streaming.png)\n\n**O que é?** A produção de várias resoluções (taxas de bits) e a divisão da mídia em pedaços para serem servidos por HTTP.\n\n**Por que?** Para fornecer uma mídia flexível que possa ser assistida em um smartphone de baixo desempenho ou em uma TV 4K, além de ser fácil de dimensionar e implantar, mas pode adicionar latência.\n\n**Como?** Criando um WebM adaptativo usando o DASH.\n```bash\n# video streams\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm\n\n# audio streams\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm\n\n# the DASH manifest\n$ ffmpeg \\\n -f webm_dash_manifest -i video_160x90_250k.webm \\\n -f webm_dash_manifest -i video_320x180_500k.webm \\\n -f webm_dash_manifest -i video_640x360_750k.webm \\\n -f webm_dash_manifest -i video_640x360_1000k.webm \\\n -f webm_dash_manifest -i video_1280x720_500k.webm \\\n -f webm_dash_manifest -i audio_128k.webm \\\n -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \\\n -f webm_dash_manifest \\\n -adaptation_sets \"id=0,streams=0,1,2,3,4 id=1,streams=5\" \\\n manifest.mpd\n```\n\nPS: Eu roubei esse exemplo das [Instruções para reproduzir Adaptive WebM usando DASH](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash)\n\n## Indo além\n\nExistem [muitos outros usos para o FFmpeg](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly).\nEu uso em conjunto com o *iMovie* para produzir/editar alguns vídeos para o YouTube e certamente você pode usá-lo profissionalmente.\n\n# Aprenda o FFmpeg libav do modo difícil\n\n> Você nunca se perguntou sobre som e visão?\n> **David Robert Jones**\n\nJá que o [FFmpeg](#ffmpeg---linha-de-comando) é tão útil como uma ferramenta de linha de comando para realizar tarefas essenciais em arquivos de mídia, como podemos usá-lo em nossos programas?\n\nO FFmpeg é [composto por diversas bibliotecas](https://www.ffmpeg.org/doxygen/trunk/index.html) que podem ser integradas em nossos próprios programas. Geralmente, quando você instala o FFmpeg, ele instala automaticamente todas essas bibliotecas. Estarei me referindo a esse conjunto de bibliotecas como **FFmpeg libav**.\n\n> Este título é uma homenagem à série de Zed Shaw [Aprenda X do Modo Difícil](https://learncodethehardway.org/), em particular seu livro Aprenda C do Modo Difícil.\n\n## Capítulo 0 - O famoso \"hello world\"\nEste \"hello world\" na verdade não mostrará a mensagem \"hello world\" no terminal :tongue:\nEm vez disso, vamos **imprimir informações sobre o vídeo**, como seu formato (container), duração, resolução, canais de áudio e, no final, vamos **decodificar alguns quadros e salvá-los como arquivos de imagem**.\n\n### Arquitetura da biblioteca FFmpeg libav\n\nMas antes de começarmos a programar, vamos aprender como funciona a **arquitetura da biblioteca FFmpeg libav** e como seus componentes se comunicam entre si.\n\nAqui está um diagrama do processo de decodificação de um vídeo:\n\n![ffmpeg libav architecture - processo de decodificação](/img/decoding.png)\n\nVocê primeiro precisará carregar seu arquivo de mídia em um componente chamado [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) (o contêiner de vídeo também é conhecido como formato).\nNa verdade, ele não carrega todo o arquivo: muitas vezes ele lê apenas o cabeçalho.\n\nDepois de carregar o **cabeçalho mínimo do nosso contêiner**, podemos acessar suas streams (pense nelas como dados de áudio e vídeo rudimentares).\nCada stream estará disponível em um componente chamado [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html).\n\n> Stream é um nome elegante para um fluxo contínuo de dados.\n\nSuponha que nosso vídeo tenha duas streams: um áudio codificado com [AAC CODEC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) e um vídeo codificado com [H264 (AVC) CODEC](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC). De cada stream podemos extrair **pedaços (slices) de dados** chamados pacotes que serão carregados em componentes chamados [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html).\n\nOs **dados dentro dos pacotes ainda estão codificados** (comprimidos) e, para decodificar os pacotes, precisamos passá-los para um [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html) específico.\n\nO `AVCodec` os decodificará em [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) e, finalmente, este componente nos fornecerá **o quadro não comprimido**. Observe que a mesma terminologia/processo é usada tanto para fluxo de áudio quanto de vídeo.\n\n### Requisitos\n\nComo algumas pessoas estavam [enfrentando problemas ao compilar ou executar os exemplos](https://github.com/leandromoreira/ffmpeg-libav-tutorial/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+compiling), **vamos usar o [`Docker`](https://docs.docker.com/install/) como nosso ambiente de desenvolvimento/execução**, também usaremos o vídeo Big Buck Bunny, então se você não o tiver localmente, basta executar o comando `make fetch_small_bunny_video`.\n\n### Capítulo 0 - apresentação do código\n\n> #### TLDR; mostre-me o [código](/0_hello_world.c) e a execução.\n> ```bash\n> $ make run_hello\n> ```\n\nVamos pular alguns detalhes, mas não se preocupe: o [código-fonte está disponível no GitHub](/0_hello_world.c).\n\nVamos alocar memória para o componente [`AVFormatContext`](http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) que conterá informações sobre o formato (container).\n\n```c\nAVFormatContext *pFormatContext = avformat_alloc_context();\n```\n\nAgora vamos abrir o arquivo e ler seu cabeçalho e preencher o `AVFormatContext` com informações mínimas sobre o formato (observe que geralmente os codecs não são abertos).\nA função usada para isso é [`avformat_open_input`](http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49). Ele espera um `AVFormatContext`, um `filename` e dois argumentos opcionais: o [`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html) (se você passar `NULL`, o FFmpeg adivinhará o formato) e o [`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html) (que são as opções para o demuxer).\n\n```c\navformat_open_input(&pFormatContext, filename, NULL, NULL);\n```\n\nPodemos imprimir o nome do formato e a duração da mídia:\n\n```c\nprintf(\"Format %s, duration %lld us\", pFormatContext->iformat->long_name, pFormatContext->duration);\n```\n\nPara acessar as `streams`, precisamos ler os dados da mídia. A função [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb) faz isso. Agora, o `pFormatContext->nb_streams` irá armazenar a quantidade de streams e o `pFormatContext->streams[i]` nos fornecerá a `i`-ésima stream (um [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html)).\n\n```c\navformat_find_stream_info(pFormatContext,  NULL);\n```\n\nAgora vamos iterar por todos os fluxos (streams).\n\n```c\nfor (int i = 0; i < pFormatContext->nb_streams; i++)\n{\n\t//\n}\n```\n\nPara cada fluxo, vamos manter os [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html), que descreve as propriedades de um codec usado pelo fluxo `i`.\n\n```c\nAVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar;\n```\n\nCom as propriedades do codec, podemos procurar o CODEC apropriado consultando a função [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) e encontrar o decodificador registrado para o id do codec e retornar um [`AVCodec`](http://ffmpeg.org/doxygen/trunk/structAVCodec.html), o componente que sabe como en**CO**der e de**CO**der o fluxo.\n```c\nAVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);\n```\n\nAgora podemos imprimir informações sobre os codecs.\n\n```c\n// especifico para video e audio\nif (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {\n\tprintf(\"Video Codec: resolution %d x %d\", pLocalCodecParameters->width, pLocalCodecParameters->height);\n} else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {\n\tprintf(\"Audio Codec: %d channels, sample rate %d\", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate);\n}\n// geral\nprintf(\"\\tCodec %s ID %d bit_rate %lld\", pLocalCodec->long_name, pLocalCodec->id, pLocalCodecParameters->bit_rate);\n```\n\nCom o codec, podemos alocar memória para o [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html), que conterá o contexto para nosso processo de decodificação/ codificação, mas precisamos preencher este contexto do codec com os parâmetros do CODEC; fazemos isso com [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16).\n\nUma vez preenchido o contexto do codec, precisamos abrir o codec. Chamamos a função [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) e, em seguida, podemos usá-lo.\n\n```c\nAVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);\navcodec_parameters_to_context(pCodecContext, pCodecParameters);\navcodec_open2(pCodecContext, pCodec, NULL);\n```\n\nAgora vamos ler os pacotes do fluxo e decodificá-los em quadros, mas antes disso, precisamos alocar memória para ambos os componentes, o [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) e [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n```c\nAVPacket *pPacket = av_packet_alloc();\nAVFrame *pFrame = av_frame_alloc();\n```\n\nVamos alimentar nossos pacotes das streams com a função [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) enquanto houver pacotes.\n\n\n```c\nwhile (av_read_frame(pFormatContext, pPacket) >= 0) {\n\t//...\n}\n```\n\nVamos **enviar o pacote de dados bruto** (quadro comprimido) para o decodificador, por meio do contexto do codec, usando a função [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3).\n\n```c\navcodec_send_packet(pCodecContext, pPacket);\n```\n\nE vamos **receber o quadro de dados bruto** (quadro descomprimido) do decodificador, através do mesmo contexto do codec, usando a função [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c).\n\n```c\navcodec_receive_frame(pCodecContext, pFrame);\n```\n\nPodemos imprimir o número do quadro, o [PTS](https://en.wikipedia.org/wiki/Presentation_timestamp), DTS, [tipo de quadro](https://en.wikipedia.org/wiki/Video_compression_picture_types) e etc.\n\n```c\nprintf(\n\t\t\"Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]\",\n\t\tav_get_picture_type_char(pFrame->pict_type),\n\t\tpCodecContext->frame_number,\n\t\tpFrame->pts,\n\t\tpFrame->pkt_dts,\n\t\tpFrame->key_frame,\n\t\tpFrame->coded_picture_number,\n\t\tpFrame->display_picture_number\n);\n```\n\nFinalmente, podemos salvar nosso quadro decodificado em uma [imagem simples em tons de cinza](https://en.wikipedia.org/wiki/Netpbm_format#PGM_example). O processo é muito simples, usaremos `pFrame->data`, onde o índice está relacionado aos [planos Y, Cb e Cr](https://en.wikipedia.org/wiki/YCbCr), escolhemos apenas `0` (Y) para salvar nossa imagem em tons de cinza.\n\n```c\nsave_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename);\n\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename)\n{\n\t\tFILE *f;\n\t\tint i;\n\t\tf = fopen(filename,\"w\");\n\t\t// writing the minimal required header for a pgm file format\n\t\t// portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example\n\t\tfprintf(f, \"P5\\n%d %d\\n%d\\n\", xsize, ysize, 255);\n\n\t\t// writing line by line\n\t\tfor (i = 0; i < ysize; i++)\n\t\t\t\tfwrite(buf + i * wrap, 1, xsize, f);\n\t\tfclose(f);\n}\n```\n\nE voilà! Agora temos uma imagem em escala de cinza com 2MB:\n\n![saved frame](/img/generated_frame.png)\n\n## Capítulo 1 - sincronização de áudio e vídeo\n\n> **Seja o player** - um jovem desenvolvedor de JS criando um novo player de vídeo MSE.\n\nAntes de avançarmos para [codificar um exemplo de transcodificação](#capítulo-2---remuxing), vamos falar sobre **tempo**, ou como um player de vídeo sabe a hora certa de exibir um quadro.\n\nNo último exemplo, salvamos alguns quadros que podem ser vistos aqui:\n\n![frame 0](/img/hello_world_frames/frame0.png)\n![frame 1](/img/hello_world_frames/frame1.png)\n![frame 2](/img/hello_world_frames/frame2.png)\n![frame 3](/img/hello_world_frames/frame3.png)\n![frame 4](/img/hello_world_frames/frame4.png)\n![frame 5](/img/hello_world_frames/frame5.png)\n\nQuando estamos projetando um player de vídeo, precisamos **reproduzir cada quadro em um ritmo definido**, caso contrário, seria difícil visualizar o vídeo de forma agradável, seja porque está reproduzindo muito rápido ou muito devagar.\n\nPortanto, precisamos introduzir alguma lógica para reproduzir cada quadro suavemente. Para esse fim, cada quadro tem um **carimbo de tempo de apresentação** (PTS), que é um número crescente multiplicado por uma **base de tempo** que é um número racional (onde o denominador é conhecido como **timescale**) divisível pela **taxa de quadros (fps)**.\n\nÉ mais fácil entender quando olhamos alguns exemplos, vamos simular alguns cenários.\n\nPara um `fps=60/1` e `timebase=1/60000` cada PTS aumentará `timescale / fps = 1000` portanto, o **tempo real do PTS** para cada quadro poderia ser (supondo que começou em 0):\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033`\n\nPara um cenário quase idêntico, mas com uma base de tempo igual a `1/60`.\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033`\n* `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050`\n\nPara um `fps=25/1` e `timebase=1/75` cada PTS aumentará `timescale/fps = 3` e o tempo de PTS pode ser:\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04`\n* `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08`\n* `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12`\n* ...\n* `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96`\n* ...\n* `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56`\n\nAgora com o `pts_time` podemos encontrar uma maneira de renderizar isso sincronizado com o `pts_time` de áudio ou com o relógio do sistema. O libav do FFmpeg fornece essas informações por meio de sua API:\n\n- fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad)\n- tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad)\n- tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6)\n\nApenas por curiosidade, os quadros que salvamos foram enviados em uma ordem DTS (quadros: 1,6,4,2,3,5), mas tocados em uma ordem PTS (quadros: 1,2,3,4,5). Além disso, observe como os quadros B são baratos em comparação com os quadros P ou I.\n\n```\nLOG: AVStream->r_frame_rate 60/1\nLOG: AVStream->time_base 1/60000\n...\nLOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0]\nLOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3]\nLOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4]\nLOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2]\nLOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5]\nLOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1]\n```\n\n## Capítulo 2 - Remuxing\n\nRemuxar é o ato de mudar de um formato (container) para outro, por exemplo, podemos mudar um vídeo [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) para um [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream) sem muito esforço usando o FFmpeg:\n\n```bash\nffmpeg input.mp4 -c copy output.ts\n```\n\nEle irá demultiplexar o mp4, mas não o decodificará ou codificará (`-c copy`) e, no final, o multiplexará em um arquivo `mpegts`. Se você não fornecer o formato `-f`, o ffmpeg tentará adivinhá-lo com base na extensão do arquivo.\n\nO uso geral do FFmpeg ou do libav segue um padrão/arquitetura ou fluxo de trabalho:\n\n* **[camada de protocolo](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - aceita uma entrada (`input`) (um arquivo, por exemplo, mas também pode ser uma entrada `rtmp` ou `HTTP`)\n* **[camada de formato](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - faz a desmultiplexação de seu conteúdo, revelando principalmente metadados e seus fluxos\n* **[camada de codec](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - decodifica os dados de fluxos comprimidos <sup>*opcional*</sup>\n* **[camada de pixel](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - também pode aplicar alguns `filtros` aos quadros brutos (como redimensionamento)<sup>*opcional*</sup>\n* e então ele segue o caminho inverso\n* **[camada de codec](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - codifica (ou re-codifica ou mesmo transcodifica) os quadros brutos<sup>*opcional*</sup>\n* **[camada de formato](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - multiplexa (ou remultiplexa) os fluxos brutos (os dados comprimidos)\n* **[camada de protocolo](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - e finalmente, os dados multiplexados são enviados para uma saída (outro arquivo ou talvez um servidor remoto de rede)\n\n![fluxo de trabalho do ffmpeg libav](/img/ffmpeg_libav_workflow.jpeg)\n> Este gráfico é fortemente inspirado nos trabalhos de [Leixiaohua](http://leixiaohua1020.github.io/#ffmpeg-development-examples) e [Slhck](https://slhck.info/ffmpeg-encoding-course/#/9).\n\nAgora vamos codificar um exemplo usando o libav para fornecer o mesmo efeito de `ffmpeg input.mp4 -c copy output.ts`.\n\nVamos ler de uma entrada (`input_format_context`) e convertê-la para outra saída (`output_format_context`).\n\n```c\nAVFormatContext *input_format_context = NULL;\nAVFormatContext *output_format_context = NULL;\n```\n\nComeçamos alocando a memória necessária e abrindo o formato de entrada. Para este caso específico, vamos abrir um arquivo de entrada e alocar memória para um arquivo de saída.\n\n```c\nif ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) {\n\tfprintf(stderr, \"Could not open input file '%s'\", in_filename);\n\tgoto end;\n}\nif ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) {\n\tfprintf(stderr, \"Failed to retrieve input stream information\");\n\tgoto end;\n}\n\navformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename);\nif (!output_format_context) {\n\tfprintf(stderr, \"Could not create output context\\n\");\n\tret = AVERROR_UNKNOWN;\n\tgoto end;\n}\n```\n\nVamos remuxar apenas os tipos de fluxos de vídeo, áudio e legenda, portanto, estamos armazenando em um array de índices quais fluxos serão usados.\n\n```c\nnumber_of_streams = input_format_context->nb_streams;\nstreams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list));\n```\n\nLogo após alocarmos a memória necessária, vamos fazer um loop em todos os fluxos e, para cada um, precisamos criar um novo fluxo de saída em nosso contexto de formato de saída, usando a função [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827). Observe que estamos marcando todos os fluxos que não são de vídeo, áudio ou legenda para que possamos ignorá-los posteriormente.\n\n```c\nfor (i = 0; i < input_format_context->nb_streams; i++) {\n\tAVStream *out_stream;\n\tAVStream *in_stream = input_format_context->streams[i];\n\tAVCodecParameters *in_codecpar = in_stream->codecpar;\n\tif (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&\n\t\t\tin_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&\n\t\t\tin_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {\n\t\tstreams_list[i] = -1;\n\t\tcontinue;\n\t}\n\tstreams_list[i] = stream_index++;\n\tout_stream = avformat_new_stream(output_format_context, NULL);\n\tif (!out_stream) {\n\t\tfprintf(stderr, \"Failed allocating output stream\\n\");\n\t\tret = AVERROR_UNKNOWN;\n\t\tgoto end;\n\t}\n\tret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);\n\tif (ret < 0) {\n\t\tfprintf(stderr, \"Failed to copy codec parameters\\n\");\n\t\tgoto end;\n\t}\n}\n```\n\nAgora podemos criar o arquivo de saída.\n\n```c\nif (!(output_format_context->oformat->flags & AVFMT_NOFILE)) {\n\tret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE);\n\tif (ret < 0) {\n\t\tfprintf(stderr, \"Could not open output file '%s'\", out_filename);\n\t\tgoto end;\n\t}\n}\n\nret = avformat_write_header(output_format_context, NULL);\nif (ret < 0) {\n\tfprintf(stderr, \"Error occurred when opening output file\\n\");\n\tgoto end;\n}\n```\n\nDepois disso, podemos copiar os fluxos, pacote por pacote, dos nossos fluxos de entrada para os nossos fluxos de saída. Vamos fazer um loop enquanto tiver pacotes (`av_read_frame`), para cada pacote, precisamos recalcular o PTS e DTS para finalmente escrevê-lo (`av_interleaved_write_frame`) no nosso contexto de formato de saída.\n\n```c\nwhile (1) {\n\tAVStream *in_stream, *out_stream;\n\tret = av_read_frame(input_format_context, &packet);\n\tif (ret < 0)\n\t\tbreak;\n\tin_stream  = input_format_context->streams[packet.stream_index];\n\tif (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) {\n\t\tav_packet_unref(&packet);\n\t\tcontinue;\n\t}\n\tpacket.stream_index = streams_list[packet.stream_index];\n\tout_stream = output_format_context->streams[packet.stream_index];\n\t/* copy packet */\n\tpacket.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n\tpacket.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n\tpacket.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);\n\t// https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903\n\tpacket.pos = -1;\n\n\t//https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1\n\tret = av_interleaved_write_frame(output_format_context, &packet);\n\tif (ret < 0) {\n\t\tfprintf(stderr, \"Error muxing packet\\n\");\n\t\tbreak;\n\t}\n\tav_packet_unref(&packet);\n}\n```\n\nPara finalizar, precisamos escrever o trailer do fluxo em um arquivo de mídia de saída com a função [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13).\n\n```c\nav_write_trailer(output_format_context);\n```\n\nAgora estamos prontos para testar e o primeiro teste será a conversão de formato (container de vídeo) de um arquivo MP4 para um arquivo de vídeo MPEG-TS. Basicamente, estamos executando a linha de comando `ffmpeg input.mp4 -c copy output.ts` com o libav.\n\n```bash\nmake run_remuxing_ts\n```\n\nEstá funcionando!!! Você não confia em mim?! Você não deveria, podemos verificar com `ffprobe`:\n\n```bash\nffprobe -i remuxed_small_bunny_1080p_60fps.ts\n\nInput #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts':\n\tDuration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s\n\tProgram 1\n\t\tMetadata:\n\t\t\tservice_name    : Service01\n\t\t\tservice_provider: FFmpeg\n\t\tStream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc\n\t\tStream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s\n```\n\nPara resumir o que fizemos aqui em um gráfico, podemos revisitar nossa [ideia inicial sobre como o libav funciona](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-libav-architecture) mostrando que pulamos a parte do codec.\n\n![remuxing libav components](/img/remuxing_libav_components.png)\n\nAntes de encerrarmos este capítulo, gostaria de mostrar uma parte importante do processo de remuxing, você pode passar opções para o muxer. Digamos que queremos entregar o formato [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding) para isso precisamos usar [fragmented mp4](https://stackoverflow.com/a/35180327) (às vezes referido como `fmp4`) em vez de MPEG-TS ou MPEG-4 simples.\n\nCom a [linha de comando, podemos fazer isso facilmente](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting).\n\n```\nffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4\n```\n\nQuase tão fácil quanto a linha de comando é a versão da biblioteca libav, só precisamos passar as opções ao escrever o cabeçalho de saída, logo antes da cópia dos pacotes.\n\n```c\nAVDictionary* opts = NULL;\nav_dict_set(&opts, \"movflags\", \"frag_keyframe+empty_moov+default_base_moof\", 0);\nret = avformat_write_header(output_format_context, &opts);\n```\n\nAgora podemos gerar este arquivo mp4 fragmentado:\n\n```bash\nmake run_remuxing_fragmented_mp4\n```\n\nMas para ter certeza de que não estou mentindo para você, você pode usar o incrível site/ferramenta [gpac/mp4box.js](http://download.tsi.telecom-paristech.fr/gpac/mp4box.js/filereader.html) ou o site [http://mp4parser.com/](http://mp4parser.com/) para ver as diferenças. Primeiro carregue o mp4 \"comum\".\n\n![mp4 boxes](/img/boxes_normal_mp4.png)\n\nComo você pode ver, ele tem apenas um átomo/box `mdat`, **este é o local onde estão os quadros de vídeo e áudio**. Agora carregue o mp4 fragmentado para ver como ele espalha as caixas `mdat`.\n\n![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png)\n\n## Capítulo 3 - Transcoding\n\n> #### TLDR; mostre-me o [código](/3_transcoding.c) e a execução.\n> ```bash\n> $ make run_transcoding\n> ```\n> Vamos pular alguns detalhes, mas não se preocupe: o [código-fonte está disponível no github](/3_transcoding.c).\n\n\nNeste capítulo, vamos criar um transcodificador minimalista, escrito em C, que pode converter vídeos codificados em H264 para H265 usando a biblioteca **FFmpeg/libav**, especificamente [libavcodec](https://ffmpeg.org/libavcodec.html), libavformat e libavutil.\n\n![media transcoding flow](/img/transcoding_flow.png)\n\n> _Apenas um rápido resumo:_ O [**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html) é a abstração para o formato do arquivo de mídia, também conhecido como contêiner (ex: MKV, MP4, Webm, TS). O [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html) representa cada tipo de dados para um determinado formato (ex: áudio, vídeo, legenda, metadados). O [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html) é uma fatia de dados comprimidos obtidos do `AVStream` que pode ser decodificado por um [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html) (ex: av1, h264, vp9, hevc) gerando um dado bruto chamado [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n### Transmuxing\n\nVamos começar com a operação simples de transmuxing e depois podemos desenvolver este código. O primeiro passo é **carregar o arquivo de entrada**.\n\n```c\n// Allocate an AVFormatContext\navfc = avformat_alloc_context();\n// Open an input stream and read the header.\navformat_open_input(avfc, in_filename, NULL, NULL);\n// Read packets of a media file to get stream information.\navformat_find_stream_info(avfc, NULL);\n```\n\nAgora vamos configurar o decodificador, o `AVFormatContext` nos dará acesso a todos os componentes do `AVStream` e, para cada um deles, podemos obter seu `AVCodec` e criar o `AVCodecContext` correspondente e, finalmente, podemos abrir o codec fornecido para que possamos prosseguir com o processo de decodificação.\n\n> O [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html) contém dados sobre a configuração de mídia, como taxa de bits, taxa de quadros, taxa de amostragem, canais, altura e muitos outros.\n\n```c\nfor (int i = 0; i < avfc->nb_streams; i++)\n{\n\tAVStream *avs = avfc->streams[i];\n\tAVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id);\n\tAVCodecContext *avcc = avcodec_alloc_context3(*avc);\n\tavcodec_parameters_to_context(*avcc, avs->codecpar);\n\tavcodec_open2(*avcc, *avc, NULL);\n}\n```\n\nPrecisamos preparar o arquivo de mídia de saída para a transmuxação também, primeiro **alocamos memória** para o `AVFormatContext` de saída. Criamos **cada fluxo** no formato de saída. Para empacotar o fluxo adequadamente, **copiamos os parâmetros do codec** do decodificador.\n\n**Definimos a flag** `AV_CODEC_FLAG_GLOBAL_HEADER`, que informa ao codificador que ele pode usar os cabeçalhos globais e, finalmente, abrimos o arquivo de saída para escrever e persistimos os cabeçalhos.\n\n```c\navformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename);\n\nAVStream *avs = avformat_new_stream(encoder_avfc, NULL);\navcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar);\n\nif (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER)\n\tencoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;\n\navio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE);\navformat_write_header(encoder->avfc, &muxer_opts);\n\n```\n\nEstamos recebendo os `AVPacket` do decodificador, ajustando os timestamps e escrevendo o pacote corretamente no arquivo de saída. Embora a função `av_interleaved_write_frame` diga \"escrever quadro\", estamos armazenando o pacote. Finalizamos o processo de transmuxing escrevendo o trailer do fluxo no arquivo.\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n\tav_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base);\n\tav_interleaved_write_frame(*avfc, input_packet) < 0));\n}\n\nav_write_trailer(encoder_avfc);\n```\n\n### Transcodificação\n\nA seção anterior mostrou um programa de transmuxer simples, agora vamos adicionar a capacidade de codificar arquivos, especificamente, vamos habilitá-lo para transcoded vídeos de `h264` para `h265`.\n\nApós prepararmos o decodificador, mas antes de organizarmos o arquivo de mídia de saída, vamos configurar o codificador.\n\n* Criar o `AVStream` de vídeo no codificador, [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827)\n* Usar o `AVCodec` chamado `libx265`, [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37)\n* Criar o `AVCodecContext` com base no codec criado, [`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315)\n* Configurar atributos básicos para a sessão de transcodificação, e\n* Abrir o codec e copiar parâmetros do contexto para o stream. [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) e [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe)\n\n```c\nAVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL);\nAVStream *video_avs = avformat_new_stream(encoder_avfc, NULL);\n\nchar *codec_name = \"libx265\";\nchar *codec_priv_key = \"x265-params\";\n// we're going to use internal options for the x265\n// it disables the scene change detection and fix then\n// GOP on 60 frames.\nchar *codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\nAVCodec *video_avc = avcodec_find_encoder_by_name(codec_name);\nAVCodecContext *video_avcc = avcodec_alloc_context3(video_avc);\n// encoder codec params\nav_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0);\nvideo_avcc->height = decoder_ctx->height;\nvideo_avcc->width = decoder_ctx->width;\nvideo_avcc->pix_fmt = video_avc->pix_fmts[0];\n// control rate\nvideo_avcc->bit_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_buffer_size = 4 * 1000 * 1000;\nvideo_avcc->rc_max_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_min_rate = 2.5 * 1000 * 1000;\n// time base\nvideo_avcc->time_base = av_inv_q(input_framerate);\nvideo_avs->time_base = sc->video_avcc->time_base;\n\navcodec_open2(sc->video_avcc, sc->video_avc, NULL);\navcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc);\n```\n\nPrecisamos expandir nosso loop de decodificação para a transcodificação do fluxo de vídeo:\n\n* Enviar o `AVPacket` vazio para o decodificador, [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3)\n* Receber o `AVFrame` não comprimido, [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c)\n* Começar a transcodificar este frame bruto,\n* Enviar o frame bruto, [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169)\n* Receber o `AVPacket` comprimido, com base no nosso codec, [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga5b8eff59cf259747cf0b31563e38ded6)\n* Configurar o timestamp e [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e)\n* Escrever no arquivo de saída. [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1)\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n\tint response = avcodec_send_packet(decoder_video_avcc, input_packet);\n\twhile (response >= 0) {\n\t\tresponse = avcodec_receive_frame(decoder_video_avcc, input_frame);\n\t\tif (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n\t\t\tbreak;\n\t\t} else if (response < 0) {\n\t\t\treturn response;\n\t\t}\n\t\tif (response >= 0) {\n\t\t\tencode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index);\n\t\t}\n\t\tav_frame_unref(input_frame);\n\t}\n\tav_packet_unref(input_packet);\n}\nav_write_trailer(encoder_avfc);\n\n// used function\nint encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) {\n\tAVPacket *output_packet = av_packet_alloc();\n\tint response = avcodec_send_frame(video_avcc, input_frame);\n\n\twhile (response >= 0) {\n\t\tresponse = avcodec_receive_packet(video_avcc, output_packet);\n\t\tif (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n\t\t\tbreak;\n\t\t} else if (response < 0) {\n\t\t\treturn -1;\n\t\t}\n\n\t\toutput_packet->stream_index = index;\n\t\toutput_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den;\n\n\t\tav_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base);\n\t\tresponse = av_interleaved_write_frame(avfc, output_packet);\n\t}\n\tav_packet_unref(output_packet);\n\tav_packet_free(&output_packet);\n\treturn 0;\n}\n\n```\n\nNós convertemos o fluxo de mídia de `h264` para `h265`, como esperado a versão `h265` do arquivo de mídia é menor que a versão `h264`, no entanto o [programa criado](/3_transcoding.c) é capaz de:\n\n```c\n\n\t/*\n\t * H264 -> H265\n\t * Audio -> remuxed (untouched)\n\t * MP4 - MP4\n\t */\n\tStreamingParams sp = {0};\n\tsp.copy_audio = 1;\n\tsp.copy_video = 0;\n\tsp.video_codec = \"libx265\";\n\tsp.codec_priv_key = \"x265-params\";\n\tsp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\n\t/*\n\t * H264 -> H264 (fixed gop)\n\t * Audio -> remuxed (untouched)\n\t * MP4 - MP4\n\t */\n\tStreamingParams sp = {0};\n\tsp.copy_audio = 1;\n\tsp.copy_video = 0;\n\tsp.video_codec = \"libx264\";\n\tsp.codec_priv_key = \"x264-params\";\n\tsp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\n\t/*\n\t * H264 -> H264 (fixed gop)\n\t * Audio -> remuxed (untouched)\n\t * MP4 - fragmented MP4\n\t */\n\tStreamingParams sp = {0};\n\tsp.copy_audio = 1;\n\tsp.copy_video = 0;\n\tsp.video_codec = \"libx264\";\n\tsp.codec_priv_key = \"x264-params\";\n\tsp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\tsp.muxer_opt_key = \"movflags\";\n\tsp.muxer_opt_value = \"frag_keyframe+empty_moov+delay_moov+default_base_moof\";\n\n\t/*\n\t * H264 -> H264 (fixed gop)\n\t * Audio -> AAC\n\t * MP4 - MPEG-TS\n\t */\n\tStreamingParams sp = {0};\n\tsp.copy_audio = 0;\n\tsp.copy_video = 0;\n\tsp.video_codec = \"libx264\";\n\tsp.codec_priv_key = \"x264-params\";\n\tsp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\tsp.audio_codec = \"aac\";\n\tsp.output_extension = \".ts\";\n\n\t/* WIP :P  -> it's not playing on VLC, the final bit rate is huge\n\t * H264 -> VP9\n\t * Audio -> Vorbis\n\t * MP4 - WebM\n\t */\n\t//StreamingParams sp = {0};\n\t//sp.copy_audio = 0;\n\t//sp.copy_video = 0;\n\t//sp.video_codec = \"libvpx-vp9\";\n\t//sp.audio_codec = \"libvorbis\";\n\t//sp.output_extension = \".webm\";\n\n```\n\n> Para ser honesto, isso foi mais difícil do que eu pensava que seria (https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54) e tive que mergulhar no código-fonte da linha de comando do FFmpeg (https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749) e testar muito, e acho que ainda estou perdendo alguma coisa, pois tive que forçar `force-cfr` para o `h264` funcionar e ainda estou vendo algumas mensagens de aviso, como `warning messages (forced frame type (5) at 80 was changed to frame type (3))`."
  },
  {
    "path": "README-ru.md",
    "content": "[🇨🇳](/README-cn.md \"Simplified Chinese\")\n[🇰🇷](/README-ko.md \"Korean\")\n[🇪🇸](/README-es.md \"Spanish\")\n[🇻🇳](/README-vn.md \"Vietnamese\")\n[🇧🇷](/README-pt.md \"Portuguese\")\n\n[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)\n\nЯ искал туториал/книгу, которая научит использовать [FFmpeg](https://www.ffmpeg.org/) с помощью libav, и наткнулся на руководство [\"How to write a video player in less than 1k lines\"](http://dranger.com/ffmpeg/).\nК сожалению, оно устарело, поэтому я решил написать этот гайд.\n\nБольшая часть кода здесь на C, **но не пугайтесь**: понять и перенести идеи на любимый язык несложно.\nДля FFmpeg/libav есть биндинги ко многим языкам — например, [python](https://pyav.org/), [go](https://github.com/imkira/go-libav); даже если для вашего языка биндингов нет, можно подключиться через `ffi` (вот пример с [Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua)).\n\nМы начнём с краткого ликбеза: что такое видео, аудио, кодек и контейнер, затем вкратце пройдемся по командной строке `FFmpeg`, и, наконец, перейдём к коду. Если вам это неинтересно, смело пролистывайте прямо к разделу [Изучаем FFmpeg и libav трудным путем](#Изучаем FFmpeg и libav трудным путем) (да, ссылка с картинкой посреди фразы — это отсылка к мемам, не баг).\n\nНекоторые люди раньше говорили, что видеостриминг в Интернете — будущее традиционного ТВ. Как бы то ни было, FFmpeg — вещь, которую стоит изучить.\n\n**Оглавление**\n\n* [Введение](#intro)\n\n  * [видео — то, что ты видишь!](#видео--то-что-ты-видишь)\n  * [аудио — то, что ты слышишь!](#аудио--то-что-ты-слышишь)\n  * [кодек — сжатие данных](#кодек--сжатие-данных)\n  * [контейнер — дом для аудио и видео](#контейнер--дом-для-аудио-и-видео)\n* [FFmpeg — командная строка](#ffmpeg--командная-строка)\n\n  * [FFmpeg CLI 101](#ffmpeg-cli-101)\n* [Типовые операции с видео](#типовые-операции-с-видео)\n\n  * [Transcoding (перекодирование)](#transcoding-перекодирование)\n  * [Transmuxing (перемультиплексирование)](#transmuxing)\n  * [Transrating (изменение битрейта)](#transrating-изменение-битрейта)\n  * [Transsizing (изменение разрешения)](#transsizing)\n  * [Бонус: адаптивный стриминг](#bonus-round-adaptive-streaming)\n  * [Дальше — больше](#going-beyond)\n* [Learning FFmpeg libav the hard way](#learn-ffmpeg-libav-the-hard-way)\n\n  * [Глава 0 — печально известный hello world](#глава-0--печально-известный-hello-world)\n\n    * [Архитектура FFmpeg libav](#архитектура-ffmpeg-libav)\n  * [Глава 1 — тайминги/синхронизация](#глава-1--таймингисинхронизация)\n  * [Глава 2 — remuxing](#глава-2--remuxing)\n  * [Глава 3 — транскодирование](#глава-3--транскодирование)\n\n# Введение\n\n## видео — то, что ты видишь\n\nЕсли последовательно показывать серию изображений с заданной частотой (скажем, [24 кадра в секунду](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)), возникнет [иллюзия движения](https://en.wikipedia.org/wiki/Persistence_of_vision).\nВ итоге получаем базовую идею, стояющую за видео: **ряд картинок/кадров, идущих с заданной частотой**.\n\n<img src=\"https://upload.wikimedia.org/wikipedia/commons/1/1f/Linnet_kineograph_1886.jpg\" title=\"flip book\" height=\"280\"></img>\n\nZeitgenössische Illustration (1886)\n\n## аудио — то, что ты слышишь!\n\nХотя видео без звука может выражать самые разные чувства, добавление звука делает просмотр более приятным.\n\nЗвук — это вибрация, распространяющаяся как волна давления через воздух или любую другую среду — газ, жидкость, твёрдое тело.\n\n> В цифровой аудиосистеме микрофон преобразует звук в аналоговый электрический сигнал, затем АЦП — обычно с использованием [PCM](https://en.wikipedia.org/wiki/Pulse-code_modulation) — превращает аналог в цифровой.\n\n![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png \"audio analog to digital\")\n\n> [Источник](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg)\n\n## кодек — сжатие данных\n\n> CODEC — это электронная схема или ПО, которое **сжимает/распаковывает цифровое аудио/видео.** Оно переводит сырые (несжатые) данные в сжатый формат и обратно.\n> [https://en.wikipedia.org/wiki/Video_codec](https://en.wikipedia.org/wiki/Video_codec)\n\nЕсли же просто сложить миллионы картинок в один файл и назвать это фильмом, размер получится чудовищным. Посчитаем:\n\nПусть есть видео с разрешением `1080 x 1920` (высота × ширина), на каждый пиксель тратим `3 байта` (цвет в [24 битах](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29), т.е. 16 777 216 цветов), частота `24 кадра/с`, длительность `30 минут`.\n\n```c\ntoppf = 1080 * 1920 // total_of_pixels_per_frame — всего пикселей на кадр\ncpp = 3 // cost_per_pixel — байт на пиксель\ntis = 30 * 60 // time_in_seconds — время в секундах\nfps = 24 // frames_per_second — кадры в секунду\n\nrequired_storage = tis * fps * toppf * cpp\n```\n\nТакое видео займёт примерно `250.28GB` или потребует `1.19 Gbps` пропускной способности! Поэтому нам и нужен [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work).\n\n## контейнер — дом для аудио и видео\n\n> Контейнер (wrapper format) — это метафайл-формат, спецификация которого описывает, как в одном компьютерном файле сосуществуют разные элементы данных и метаданных.\n> [https://en.wikipedia.org/wiki/Digital_container_format](https://en.wikipedia.org/wiki/Digital_container_format)\n\n**Один файл, содержащий все потоки** (обычно аудио и видео) и обеспечивающий **синхронизацию и общие метаданные** — название, разрешение и т.п.\n\nЧасто формат файла можно понять по расширению: например, `video.webm` — это, скорее всего, видео в контейнере [`webm`](https://www.webmproject.org/).\n\n![container](/img/container.png)\n\n# FFmpeg — командная строка\n\n> Полноценное кроссплатформенное решение для записи, конвертации и стриминга аудио и видео.\n\nДля работы с мультимедиа есть великолепный инструмент/библиотека [FFmpeg](https://www.ffmpeg.org/). Скорее всего, ты уже знаком с ним напрямую или косвенно (пользуешься [Chrome?](https://www.chromium.org/developers/design-documents/video)).\n\nЕсть CLI-программа `ffmpeg` — простая, но мощная.\nНапример, чтобы преобразовать контейнер `mp4` в `avi`, достаточно:\n\n```bash\n$ ffmpeg -i input.mp4 output.avi\n```\n\nМы только что сделали **ремультиплексирование** (remuxing) — конвертировали один контейнер в другой.\nТехнически FFmpeg мог и перекодировать потоки, но об этом позже.\n\n## FFmpeg CLI 101\n\nУ FFmpeg есть [документация](https://www.ffmpeg.org/ffmpeg.html), отлично объясняющая, как он работает.\n\n```bash\n# документацию можно смотреть и из командной строки\n\nffmpeg -h full | grep -A 10 -B 10 avoid_negative_ts\n```\n\nЕсли кратко, `ffmpeg` получает аргументы в формате `ffmpeg {1} {2} -i {3} {4} {5}`, где:\n\n1. глобальные опции\n2. опции входного файла\n3. входной URL/путь\n4. опции выходного файла\n5. выходной URL/путь\n\nБлоки 2, 3, 4 и 5 можно повторять сколько угодно.\nПорядок аргументов проще понять на примере:\n\n```bash\n# ВНИМАНИЕ: файл около 300 МБ\n$ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4\n\n$ ffmpeg \\\n-y \\ # глобальные опции\n-c:a libfdk_aac \\ # опции входа\n-i bunny_1080p_60fps.mp4 \\ # входной url\n-c:v libvpx-vp9 -c:a libvorbis \\ # опции выхода\nbunny_1080p_60fps_vp9.webm # выходной url\n```\n\nЭта команда берёт на входе файл mp4 с двумя потоками (аудио закодированное кодеком aac и видео закодированное с помощью кодека h264) и конвертирует в webm, изменяя изначальные аудио, и видео кодеки.\n\nКоманду можно упростить, но тогда FFmpeg подставит/угадает значения по умолчанию.\nНапример, если написать `ffmpeg -i input.avi output.mp4`, то какой аудио/видео кодек будет выбран для `output.mp4`?\n\nУ Вернера Робитцы есть обязательный к прочтению/выполнению [курс по кодированию и редактированию в FFmpeg](http://slhck.info/ffmpeg-encoding-course/#/).\n\n# Типовые операции с видео\n\nРаботая с аудио/видео, мы обычно решаем ряд типовых задач.\n\n## Transcoding (перекодирование)\n\n![transcoding](/img/transcoding.png)\n\n**Что это?** преобразование одного из потоков (аудио или видео) из одного кодека в другой.\n\n**Зачем?** иногда некоторые устройства (TV, смартфон, консоль и т.д.) не поддерживает кодек X, но поддерживает кодек Y; также новые кодеки нередко предоставляют лучшую степень сжатия.\n\n**Как?** конвертируем видео `H264` (AVC) в `H265` (HEVC).\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c:v libx265 \\\nbunny_1080p_60fps_h265.mp4\n```\n\n## Transmuxing (перемультиплексирование)\n\n![transmuxing](/img/transmuxing.png)\n\n**Что это?** конвертация из одного формата (контейнера) в другой.\n\n**Зачем?** некоторые устройства не поддерживают контейнер X, но поддерживают Y; новые контейнеры иногда дают современные фичи.\n\n**Как?** конвертируем `mp4` в `ts`.\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c copy \\ # просим ffmpeg пропустить перекодирование\nbunny_1080p_60fps.ts\n```\n\n## Transrating (изменение битрейта)\n\n![transrating](/img/transrating.png)\n\n**Что это?** изменение битрейта или создание альтернативных версий видео (renditions).\n\n**Зачем?** кто-то будет смотреть через `2G` на слабом смартфоне, а кто-то — через `оптоволокно` на 4K-TV; сттоит предложить несколько версий одного видео с разным битрейтом под возможности зрителя.\n\n**Как?** делаем версию с битрейтом между 964K и 3856K.\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-minrate 964K -maxrate 3856K -bufsize 2000K \\\nbunny_1080p_60fps_transrating_964_3856.mp4\n```\n\nОбычно transrating используют вместе с transsizing. У Вернера Робитцы есть ещё одна отличная [серия постов о контроле битрейта в FFmpeg](http://slhck.info/posts/).\n\n## Transsizing (изменение разрешения)\n\n![transsizing](/img/transsizing.png)\n\n**Что это?** изменение разрешения. Как уже сказано, transsizing часто идёт в паре с transrating.\n\n**Зачем?** причины те же, что и для transrating.\n\n**Как?** конвертируем `1080p` в `480p`.\n\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-vf scale=480:-1 \\\nbunny_1080p_60fps_transsizing_480.mp4\n```\n\n## Bonus Round: Adaptive Streaming\n\n![adaptive streaming](/img/adaptive-streaming.png)\n\n**Что это?** производим несколько разрешений (битрейтов), режем медиа на фрагменты и отдаём по HTTP.\n\n**Зачем?** гибкость — чтобы обеспечить гибкий медиаконтент, который можно смотреть как на маломощном смартфоне, так и на 4K-телевизоре, его также легко масштабировать и развертывать, но это может добавить задержку.\n\n**Как?** создаём адаптивное WebM через DASH.\n\n```bash\n# видеопотоки\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm\n\n# аудиопотоки\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm\n\n# DASH-манифест\n$ ffmpeg \\\n -f webm_dash_manifest -i video_160x90_250k.webm \\\n -f webm_dash_manifest -i video_320x180_500k.webm \\\n -f webm_dash_manifest -i video_640x360_750k.webm \\\n -f webm_dash_manifest -i video_640x360_1000k.webm \\\n -f webm_dash_manifest -i video_1280x720_500k.webm \\\n -f webm_dash_manifest -i audio_128k.webm \\\n -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \\\n -f webm_dash_manifest \\\n -adaptation_sets \"id=0,streams=0,1,2,3,4 id=1,streams=5\" \\\n manifest.mpd\n```\n\nPS: Я позаимствовал пример из [Instructions to playback Adaptive WebM using DASH](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash)\n\n## Дальше — больше\n\n[Существует множество других способов использования FFmpeg.](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly).\nЯ использую его вместе с *iMovie* для создания/монтажа роликов для YouTube — и вы можете применять его профессионально.\n\n# Learn FFmpeg libav the Hard Way\n\n> Don't you wonder sometimes 'bout sound and vision?\n> **David Robert Jones**\n\nРаз уж [FFmpeg](#ffmpeg---command-line) настолько полезен в CLI для базовых операций с медиафайлами, как использовать его в своих программах?\n\nFFmpeg — это [набор библиотек](https://www.ffmpeg.org/doxygen/trunk/index.html), которые можно интегрировать в свои приложения.\nОбычно при установке FFmpeg ставятся и эти библиотеки. Я буду называть набор этих библиотек **FFmpeg libav**.\n\n> Это название — дань уважения серии Zed Shaw [Learn X the Hard Way](https://learncodethehardway.org/), в частности его книге Learn C the Hard Way.\n\n## Глава 0 — печально известный hello world\n\nЭтот hello world не выведет `\"hello world\"` в терминал :tongue:\nВместо этого мы **распечатаем информацию о видео** — формат (контейнер), длительность, разрешение, аудио каналы; и напоследок **декодируем несколько кадров и сохраним их как изображения**.\n\n### Архитектура FFmpeg libav\n\nПрежде чем писать код, разберёмся, **как устроена FFmpeg libav** и как разные компоненты взаимодействуют между собой.\n\nСхема процесса декодирования видео:\n\n![ffmpeg libav architecture - decoding process](/img/decoding.png)\n\nСначала нужно загрузить медиафайл в компонент [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) (контейнер также называют форматом).\nНа самом деле полностью файл не читается: зачастую парсится только заголовок.\n\nПосле чтения минимального **заголовка контейнера** можно получить доступ к его потокам (думайте о них как о примитивных аудио и видеоданных).\nКаждый поток доступен через компонент, называемый [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html).\n\n> Stream — красивое слово для «непрерывного потока данных».\n\nДопустим, у нашего видео есть два потока: аудио в [AAC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) Кодеке и видео в [H264 (AVC)](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC). Из каждого потока мы извлекаем **кусочки данных**, называемые пакетами которые будут загружены в компонент называемый [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html).\n\n**Данные в пакетах всё ещё закодированы** (сжаты), и чтобы их декодировать, передаём их соответствующему [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html).\n\n`AVCodec` декодирует их в [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html), и, наконец, мы получаем **несжатый кадр**. Обрати внимание: терминология/процесс одинаковы и для аудио, и для видео.\n\n### Требования\n\nПоскольку некоторые сталкивались с [проблемами при сборке/запуске примеров](https://github.com/leandromoreira/ffmpeg-libav-tutorial/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+compiling), **мы будем использовать [`Docker`](https://docs.docker.com/install/) как окружение разработки/запуска**. Также используем ролик big buck bunny; если его нет локально, выполните `make fetch_small_bunny_video`.\n\n### Глава 0 - пройдемся по коду\n\n> #### TLDR; покажи [код](/0_hello_world.c) и как его запускать.\n>\n> ```bash\n> $ make run_hello\n> ```\n\nОпустим некоторые детали, но не переживайте: [исходники на GitHub](/0_hello_world.c).\n\nВыделим память под [`AVFormatContext`](http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html), который будет хранить данные о формате (контейнере).\n\n```c\nAVFormatContext *pFormatContext = avformat_alloc_context();\n```\n\nТеперь откроем файл, прочитаем заголовок и заполним `AVFormatContext` минимальной информацией о формате (заметь, кодеки обычно не открываются).\nИспользуем [`avformat_open_input`](http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49). На вход — `AVFormatContext`, `filename` и два необязательных аргумента: [`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html) (если `NULL`, FFmpeg угадает формат) и [`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html) (опции демультиплексора).\n\n```c\navformat_open_input(&pFormatContext, filename, NULL, NULL);\n```\n\nМожно вывести название формата и длительность:\n\n```c\nprintf(\"Format %s, duration %lld us\", pFormatContext->iformat->long_name, pFormatContext->duration);\n```\n\nЧтобы получить `streams`, нужно прочитать данные из медиа. Функция [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb) делает это.\nТеперь `pFormatContext->nb_streams` — число потоков, а `pFormatContext->streams[i]` — сам `i`-й поток (`AVStream`).\n\n```c\navformat_find_stream_info(pFormatContext,  NULL);\n```\n\nПройдёмся по всем потокам:\n\n```c\nfor (int i = 0; i < pFormatContext->nb_streams; i++)\n{\n  //\n}\n```\n\nДля каждого потока сохраним [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html) — свойства кодека, которым закодирован поток `i`.\n\n```c\nAVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar;\n```\n\nЗная свойства кодека, ищем подходящий декодер через [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) — получаем зарегистрированный декодер по `codec_id`, т.е. [`AVCodec`](http://ffmpeg.org/doxygen/trunk/structAVCodec.html) — компонент, который умеет **enCO**de/**DEC**ode поток.\n\n```c\nAVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);\n```\n\nТеперь можно вывести информацию о кодеках.\n\n```c\n// видео и аудио\nif (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {\n  printf(\"Video Codec: resolution %d x %d\", pLocalCodecParameters->width, pLocalCodecParameters->height);\n} else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {\n  printf(\"Audio Codec: %d channels, sample rate %d\", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate);\n}\n// общее\nprintf(\"\\tCodec %s ID %d bit_rate %lld\", pLocalCodec->long_name, pLocalCodec->id, pLocalCodecParameters->bit_rate);\n```\n\nС кодеком можно выделить память под [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html) — контекст для процессов кодирования/декодирования, затем заполнить его параметрами кодека через [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16).\n\nПосле заполнения контекста — открыть кодек функцией [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d).\n\n```c\nAVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);\navcodec_parameters_to_context(pCodecContext, pCodecParameters);\navcodec_open2(pCodecContext, pCodec, NULL);\n```\n\nТеперь мы будем читать пакеты из потока и декодировать их в кадры, но сперва выделим память для [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) и [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n```c\nAVPacket *pPacket = av_packet_alloc();\nAVFrame *pFrame = av_frame_alloc();\n```\n\nСчитываем пакеты из потоков через [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61), пока они есть.\n\n```c\nwhile (av_read_frame(pFormatContext, pPacket) >= 0) {\n  //...\n}\n```\n\n**Отправляем сжатый пакет** (compressed frame) в декодер через контекст кодека — [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3).\n\n```c\navcodec_send_packet(pCodecContext, pPacket);\n```\n\nИ **получаем несжатый кадр** из декодера через тот же контекст — [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c).\n\n```c\navcodec_receive_frame(pCodecContext, pFrame);\n```\n\nМожно вывести номер кадра, [PTS](https://en.wikipedia.org/wiki/Presentation_timestamp), DTS, [тип кадра](https://en.wikipedia.org/wiki/Video_compression_picture_types) и т.д.\n\n```c\nprintf(\n    \"Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]\",\n    av_get_picture_type_char(pFrame->pict_type),\n    pCodecContext->frame_number,\n    pFrame->pts,\n    pFrame->pkt_dts,\n    pFrame->key_frame,\n    pFrame->coded_picture_number,\n    pFrame->display_picture_number\n);\n```\n\nИ наконец можно сохранить декодированный кадр как [простое «серое» изображение](https://en.wikipedia.org/wiki/Netpbm_format#PGM_example). Всё просто: берём `pFrame->data`, где индексы соответствуют [плоскостям Y, Cb и Cr](https://en.wikipedia.org/wiki/YCbCr), и берём `0` (Y), чтобы сохранить градации серого.\n\n```c\nsave_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename);\n\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename)\n{\n    FILE *f;\n    int i;\n    f = fopen(filename,\"w\");\n    // пишем минимальный заголовок для формата pgm\n    // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example\n    fprintf(f, \"P5\\n%d %d\\n%d\\n\", xsize, ysize, 255);\n\n    // пишем построчно\n    for (i = 0; i < ysize; i++)\n        fwrite(buf + i * wrap, 1, xsize, f);\n    fclose(f);\n}\n```\n\nVoilà! У нас есть оттенки серого ~2 МБ:\n\n![saved frame](/img/generated_frame.png)\n\n## Глава 1 — тайминги/синхронизация\n\n> **Будь плеером** — молодой JS-разработчик пишет новый MSE-видеоплеер.\n\nПрежде чем [перейти к примеру транскодирования](#chapter-2---transcoding), поговорим про **тайминг** — как плеер понимает, когда показывать кадр.\n\nВ прошлом примере мы сохранили несколько кадров:\n\n![frame 0](/img/hello_world_frames/frame0.png)\n![frame 1](/img/hello_world_frames/frame1.png)\n![frame 2](/img/hello_world_frames/frame2.png)\n![frame 3](/img/hello_world_frames/frame3.png)\n![frame 4](/img/hello_world_frames/frame4.png)\n![frame 5](/img/hello_world_frames/frame5.png)\n\nПри разработке видеоплеера нужно **проигрывать каждый кадр в нужный момент** — иначе будет либо слишком быстро, либо слишком медленно.\n\nДля плавного воспроизведения у каждого кадра есть **PTS (presentation timestamp)** — возрастающее число в **timebase** (рациональное число, где знаменатель — **timescale**), кратное **fps**.\n\nПроще на примерах.\n\nДля `fps=60/1` и `timebase=1/60000` PTS увеличивается на `timescale / fps = 1000`, значит **реальное время PTS** каждого кадра (если старт с 0):\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033`\n\nПочти тот же сценарий с `timebase=1/60`:\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1, PTS_TIME = 0.016`\n* `frame=2, PTS = 2, PTS_TIME = 0.033`\n* `frame=3, PTS = 3, PTS_TIME = 0.050`\n\nДля `fps=25/1` и `timebase=1/75` PTS растёт на `3`, времена:\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 3, PTS_TIME = 0.04`\n* `frame=2, PTS = 6, PTS_TIME = 0.08`\n* `frame=3, PTS = 9, PTS_TIME = 0.12`\n* ...\n* `frame=24, PTS = 72, PTS_TIME = 0.96`\n* ...\n* `frame=4064, PTS = 12192, PTS_TIME = 162.56`\n\nЗная `pts_time`, можно рендерить, синхронизируя с аудио `pts_time` или системными часами. FFmpeg libav отдаёт эти параметры через API:\n\n* fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad)\n* tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad)\n* tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6)\n\nК слову: кадры, которые мы сохранили, приходили в порядке DTS (1,6,4,2,3,5), но воспроизводились по PTS (1,2,3,4,5). И обрати внимание, насколько «дешевле» B-кадры по сравнению с P и I.\n\n```\nLOG: AVStream->r_frame_rate 60/1\nLOG: AVStream->time_base 1/60000\n...\nLOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0]\nLOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3]\nLOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4]\nLOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2]\nLOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5]\nLOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1]\n```\n\n## Глава 2 — remuxing\n\nRemuxing — смена формата (контейнера). Например, можно без боли поменять [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) на [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream) с FFmpeg:\n\n```bash\nffmpeg input.mp4 -c copy output.ts\n```\n\nFFmpeg демультиплексирует mp4, но **не** декодирует/кодирует (`-c copy`), а в конце мультиплексирует в `mpegts`. Если не указать формат через `-f`, FFmpeg попытается угадать по расширению.\n\nОбщая архитектура использования FFmpeg/libav такова:\n\n* **[protocol layer](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** — принимает `input` (например, `file`, но может быть `rtmp`/`HTTP`)\n* **[format layer](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** — `demuxes` (демультиплексирует) содержимое, отдаёт метаданные и потоки\n* **[codec layer](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** — `decodes` (декодирует) сжатые данные потоков <sup>*необязательно*</sup>\n* **[pixel layer](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** — применяет `filters` к сырым кадрам (например, масштабирование) <sup>*необязательно*</sup>\n* потом обратный путь:\n* **[codec layer](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** — `encodes`/`re-encodes`/`transcodes` сырые кадры <sup>*необязательно*</sup>\n* **[format layer](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** — `muxes`/`remuxes` сжатые данные\n* **[protocol layer](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** — выдаёт результат на `output` (файл или сеть)\n\n![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg)\n\n> График вдохновлён работами [Leixiaohua](http://leixiaohua1020.github.io/#ffmpeg-development-examples) и [Slhck](https://slhck.info/ffmpeg-encoding-course/#/9).\n\nТеперь напишем пример на libav, эквивалентный `ffmpeg input.mp4 -c copy output.ts`.\n\nМы читаем из входа (`input_format_context`) и пишем в выход (`output_format_context`).\n\n```c\nAVFormatContext *input_format_context = NULL;\nAVFormatContext *output_format_context = NULL;\n```\n\nСначала — обычные шаги: выделить память и открыть вход. В этом кейсе — открыть входной файл и выделить память под выходной.\n\n```c\nif ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) {\n  fprintf(stderr, \"Could not open input file '%s'\", in_filename);\n  goto end;\n}\nif ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) {\n  fprintf(stderr, \"Failed to retrieve input stream information\");\n  goto end;\n}\n\navformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename);\nif (!output_format_context) {\n  fprintf(stderr, \"Could not create output context\\n\");\n  ret = AVERROR_UNKNOWN;\n  goto end;\n}\n```\n\nРемуксить будем только видео, аудио и субтитры, поэтому держим индексы нужных потоков в массиве.\n\n```\nnumber_of_streams = input_format_context->nb_streams;\nstreams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list));\n```\n\nПосле выделения памяти проходим по всем потокам; для каждого создаём выходной поток в `output_format_context` через [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827). Потоки не типа видео/аудио/субтитры помечаем, чтобы их потом пропустить.\n\n```c\nfor (i = 0; i < input_format_context->nb_streams; i++) {\n  AVStream *out_stream;\n  AVStream *in_stream = input_format_context->streams[i];\n  AVCodecParameters *in_codecpar = in_stream->codecpar;\n  if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {\n    streams_list[i] = -1;\n    continue;\n  }\n  streams_list[i] = stream_index++;\n  out_stream = avformat_new_stream(output_format_context, NULL);\n  if (!out_stream) {\n    fprintf(stderr, \"Failed allocating output stream\\n\");\n    ret = AVERROR_UNKNOWN;\n    goto end;\n  }\n  ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);\n  if (ret < 0) {\n    fprintf(stderr, \"Failed to copy codec parameters\\n\");\n    goto end;\n  }\n}\n```\n\nТеперь создаём выходной файл.\n\n```c\nif (!(output_format_context->oformat->flags & AVFMT_NOFILE)) {\n  ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE);\n  if (ret < 0) {\n    fprintf(stderr, \"Could not open output file '%s'\", out_filename);\n    goto end;\n  }\n}\n\nret = avformat_write_header(output_format_context, NULL);\nif (ret < 0) {\n  fprintf(stderr, \"Error occurred when opening output file\\n\");\n  goto end;\n}\n```\n\nПосле этого копируем потоки пакет за пакетом из входа в выход. В цикле, пока есть пакеты (`av_read_frame`): пересчитываем PTS/DTS и пишем (`av_interleaved_write_frame`) в выходной контекст.\n\n```c\nwhile (1) {\n  AVStream *in_stream, *out_stream;\n  ret = av_read_frame(input_format_context, &packet);\n  if (ret < 0)\n    break;\n  in_stream  = input_format_context->streams[packet.stream_index];\n  if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) {\n    av_packet_unref(&packet);\n    continue;\n  }\n  packet.stream_index = streams_list[packet.stream_index];\n  out_stream = output_format_context->streams[packet.stream_index];\n  /* копируем пакет */\n  packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);\n  // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903\n  packet.pos = -1;\n\n  // https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1\n  ret = av_interleaved_write_frame(output_format_context, &packet);\n  if (ret < 0) {\n    fprintf(stderr, \"Error muxing packet\\n\");\n    break;\n  }\n  av_packet_unref(&packet);\n}\n```\n\nЧтобы завершить, записываем «хвост» потока в файл функцией [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13).\n\n```c\nav_write_trailer(output_format_context);\n```\n\nТеперь протестируем — первый тест: смена контейнера из MP4 в MPEG-TS. По сути, повторяем `ffmpeg input.mp4 -c copy output.ts` на libav.\n\n```bash\nmake run_remuxing_ts\n```\n\nРаботает!!! Не верите? И правильно — проверим с помощью `ffprobe`:\n\n```bash\nffprobe -i remuxed_small_bunny_1080p_60fps.ts\n\nInput #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts':\n  Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s\n  Program 1\n    Metadata:\n      service_name    : Service01\n      service_provider: FFmpeg\n    Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc\n    Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s\n```\n\nИтог на картинке: возвращаемся к [идее архитектуры libav](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-libav-architecture), но показываем, что кодек-слой мы пропустили.\n\n![remuxing libav components](/img/remuxing_libav_components.png)\n\nПеред закрытием главы — важный момент: **можно передавать опции мультиплексору**. Допустим, хотим выдавать [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding); для этого нужен [fragmented mp4](https://stackoverflow.com/a/35180327) (`fmp4`) вместо MPEG-TS или обычного MP4.\n\nЧерез CLI это просто:\n\n```\nffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4\n```\n\nПочти столь же просто — на libav: передаём опции при записи заголовка (до копирования пакетов).\n\n```c\nAVDictionary* opts = NULL;\nav_dict_set(&opts, \"movflags\", \"frag_keyframe+empty_moov+default_base_moof\", 0);\nret = avformat_write_header(output_format_context, &opts);\n```\n\nТеперь можно сгенерировать fragmented mp4:\n\n```bash\nmake run_remuxing_fragmented_mp4\n```\n\nЧтобы убедиться, что я не вру, можно воспользоваться шикарными тулзами [gpac/mp4box.js](http://download.tsi.telecom-paristech.fr/gpac/mp4box.js/filereader.html) или [http://mp4parser.com/](http://mp4parser.com/). Сначала загрузите «обычный» mp4.\n\n![mp4 boxes](/img/boxes_normal_mp4.png)\n\nВидим один `mdat` атом/бокс — **здесь лежат видео/аудио кадры**. Теперь загрузите fragmented mp4 и увидите, что `mdat` разбит на части.\n\n![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png)\n\n## Глава 3 — транскодирование\n\n> #### TLDR; покажи [код](/3_transcoding.c) и как его запускать.\n>\n> ```bash\n> $ make run_transcoding\n> ```\n>\n> Детали частично опустим — [исходники на GitHub](/3_transcoding.c).\n\nВ этой главе создадим минималистичный транскодер на C, который конвертирует видео из H264 в H265, используя **FFmpeg/libav**: [libavcodec](https://ffmpeg.org/libavcodec.html), libavformat и libavutil.\n\n![media transcoding flow](/img/transcoding_flow.png)\n\n> *Короткий повтор:* [**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html) — абстракция формата (контейнера: MKV, MP4, WebM, TS). [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html) — тип данных в формате (audio, video, subtitle, metadata). [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html) — фрагмент сжатых данных из `AVStream`, который декодируется [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html) (av1, h264, vp9, hevc) в сырые [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n### Transmuxing\n\nНачнём с простого ремукса, затем просто дополним его. Первый шаг — **загрузка входного файла**.\n\n```c\n// Выделяем AVFormatContext\navfc = avformat_alloc_context();\n// Открываем вход и читаем заголовок.\navformat_open_input(avfc, in_filename, NULL, NULL);\n// Читаем пакеты, чтобы получить информацию о потоках.\navformat_find_stream_info(avfc, NULL);\n```\n\nДалее готовим декодер: `AVFormatContext` даёт доступ ко всем `AVStream`. Для каждого находим `AVCodec`, создаём `AVCodecContext` и открываем кодек — после этого можно декодировать.\n\n> [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html) хранит конфигурацию медиа: битрейт, fps, sample rate, channels, высоту/ширину и многое другое.\n\n```c\nfor (int i = 0; i < avfc->nb_streams; i++)\n{\n  AVStream *avs = avfc->streams[i];\n  AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id);\n  AVCodecContext *avcc = avcodec_alloc_context3(*avc);\n  avcodec_parameters_to_context(*avcc, avs->codecpar);\n  avcodec_open2(*avcc, *avc, NULL);\n}\n```\n\nНужно подготовить и выходной медиафайл для ремакса: **выделяем память** под `AVFormatContext` для выхода. Создаём **каждый поток** в выходном формате. Чтобы корректно упаковалось, **копируем параметры кодека** из декодера.\n\nСтавим флаг `AV_CODEC_FLAG_GLOBAL_HEADER`, говоря энкодеру использовать глобальные заголовки, затем открываем **файл на запись** и сохраняем заголовки.\n\n```c\navformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename);\n\nAVStream *avs = avformat_new_stream(encoder_avfc, NULL);\navcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar);\n\nif (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER)\n  encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;\n\navio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE);\navformat_write_header(encoder->avfc, &muxer_opts);\n```\n\nМы берём `AVPacket` из декодера, корректируем таймстампы и записываем его в выходной файл. Несмотря на название `av_interleaved_write_frame`, записывается пакет. Завершаем ремакс записью трейлера.\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base);\n  av_interleaved_write_frame(*avfc, input_packet) < 0));\n}\n\nav_write_trailer(encoder_avfc);\n```\n\n### Transcoding\n\nВ предыдущем разделе был простой ремуксер. Теперь добавим кодирование — научим программу транскодировать видео `h264` → `h265`.\n\nПосле подготовки декодера, но до настройки выходного медиафайла — настраиваем энкодер.\n\n* Создаём видео-`AVStream` в энкодере, [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827)\n* Берём `AVCodec` по имени `libx265`, [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37)\n* Создаём `AVCodecContext` для этого кодека, [`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315)\n* Настраиваем базовые параметры сессии\n* Открываем кодек и копируем параметры из контекста в поток: [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d), [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe)\n\n```c\nAVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL);\nAVStream *video_avs = avformat_new_stream(encoder_avfc, NULL);\n\nchar *codec_name = \"libx265\";\nchar *codec_priv_key = \"x265-params\";\n// используем внутренние опции x265:\n// отключаем детекцию смены сцены и фиксируем\n// GOP на 60 кадров.\nchar *codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\nAVCodec *video_avc = avcodec_find_encoder_by_name(codec_name);\nAVCodecContext *video_avcc = avcodec_alloc_context3(video_avc);\n// параметры энкодера\nav_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0);\nvideo_avcc->height = decoder_ctx->height;\nvideo_avcc->width  = decoder_ctx->width;\nvideo_avcc->pix_fmt = video_avc->pix_fmts[0];\n// контроль битрейта\nvideo_avcc->bit_rate       = 2 * 1000 * 1000;\nvideo_avcc->rc_buffer_size = 4 * 1000 * 1000;\nvideo_avcc->rc_max_rate    = 2 * 1000 * 1000;\nvideo_avcc->rc_min_rate    = 2.5 * 1000 * 1000;\n// time base\nvideo_avcc->time_base = av_inv_q(input_framerate);\nvideo_avs->time_base  = sc->video_avcc->time_base;\n\navcodec_open2(sc->video_avcc, sc->video_avc, NULL);\navcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc);\n```\n\nРасширим цикл декодирования для видеотранскодирования:\n\n* Отправляем входной `AVPacket` в декодер — [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3)\n* Получаем сырой `AVFrame` — [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c)\n* Начинаем кодировать этот сырой кадр,\n* Отправляем кадр — [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169)\n* Получаем сжатый `AVPacket` — [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga5b8eff59cf259747cf0b31563e38ded6)\n* Выставляем таймстампы — [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e)\n* Пишем в выходной файл — [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1)\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  int response = avcodec_send_packet(decoder_video_avcc, input_packet);\n  while (response >= 0) {\n    response = avcodec_receive_frame(decoder_video_avcc, input_frame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return response;\n    }\n    if (response >= 0) {\n      encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index);\n    }\n    av_frame_unref(input_frame);\n  }\n  av_packet_unref(input_packet);\n}\nav_write_trailer(encoder_avfc);\n\n// используемая функция\nint encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) {\n  AVPacket *output_packet = av_packet_alloc();\n  int response = avcodec_send_frame(video_avcc, input_frame);\n\n  while (response >= 0) {\n    response = avcodec_receive_packet(video_avcc, output_packet);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return -1;\n    }\n\n    output_packet->stream_index = index;\n    output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den;\n\n    av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base);\n    response = av_interleaved_write_frame(avfc, output_packet);\n  }\n  av_packet_unref(output_packet);\n  av_packet_free(&output_packet);\n  return 0;\n}\n```\n\nМы конвертировали видеопоток `h264` → `h265`. Как и ожидалось, `h265`-версия меньше `h264`. При этом [написанная программа](/3_transcoding.c) умеет проводить следующие операции:\n\n```c\n\n  /*\n   * H264 -> H265\n   * Audio -> remuxed (без изменений)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx265\";\n  sp.codec_priv_key = \"x265-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\n  /*\n   * H264 -> H264 (фиксированный GOP)\n   * Audio -> remuxed (без изменений)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\n  /*\n   * H264 -> H264 (фиксированный GOP)\n   * Audio -> remuxed (без изменений)\n   * MP4 - fragmented MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.muxer_opt_key = \"movflags\";\n  sp.muxer_opt_value = \"frag_keyframe+empty_moov+delay_moov+default_base_moof\";\n\n  /*\n   * H264 -> H264 (фиксированный GOP)\n   * Audio -> AAC\n   * MP4 - MPEG-TS\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 0;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.audio_codec = \"aac\";\n  sp.output_extension = \".ts\";\n\n  /* В работе :P  -> не играет во VLC, итоговый битрейт огромен\n   * H264 -> VP9\n   * Audio -> Vorbis\n   * MP4 - WebM\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 0;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libvpx-vp9\";\n  //sp.audio_codec = \"libvorbis\";\n  //sp.output_extension = \".webm\";\n\n```\n\n> Честно говоря, это оказалось [сложнее, чем я думал](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54): пришлось копать [исходники FFmpeg CLI](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749) и много тестировать. Похоже, я ещё что-то упускаю: пришлось включить `force-cfr`, чтобы `h264` заработал, и я всё ещё вижу предупреждения вроде `warning messages (forced frame type (5) at 80 was changed to frame type (3))`.\n"
  },
  {
    "path": "README-vn.md",
    "content": "[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)\n\nTôi đang tìm một bài hướng dẫn về cách sử dụng [FFmpeg](https://www.ffmpeg.org/) như một thư viện (được biết đến là libav) và sau đó tôi đã tìm thấy bài viết [\"Cách viết video player ít hơn 1000 dòng\"](http://dranger.com/ffmpeg/).\nThật không may, nó không còn được dùng nữa, vì vậy tôi quyết định viết bài hướng dẫn này.\n\nTất cả dòng code ở đây được viết bằng ngôn ngữ C, nhưng đừng lo lắng: bạn có thể dễ dàng hiểu và áp dụng nó với ngôn ngữ bạn mong muốn.\nThư viện FFmpeg libav có rất nhiều biến thể cho các ngôn ngữ khác nhau như [python](https://pyav.org/), [go](https://github.com/imkira/go-libav) và thậm chí nếu ngôn ngữ bạn sử dụng không có thư viện này, bạn vẫn được hỗ trợ qua `ffi` (đây là một ví dụ với [Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua)).\n\nChúng ta sẽ bắt đầu với một tiết học nhanh về video, audio, codec và container; tiếp đó, chúng ta đi vào khoá học sâu hơn về cách sử dụng câu lệnh `FFmpeg` và cuối cùng chúng ta sẽ viết code. Đừng ngại bỏ qua phần đầu và nhảy thẳng đến phần [Tìm hiểu thư viện FFmpeg libav sâu hơn.](#learn-ffmpeg-libav-the-hard-way).\n\nMột vài người thường nói phát trực tuyến video trên Internet là tương lai của TV truyền thống, dù bất cứ tình huống gì, FFmpeg là một thứ đáng để học.\n\n__Mục lục__\n\n- [Giới thiệu](#giới-thiệu)\n  - [Video - Điều bạn thấy!](#video---điều-bạn-thấy)\n  - [Audio - Điều bạn nghe!](#audio---điều-bạn-nghe)\n  - [Codec - Nén dữ liệu](#codec---nén-dữ-liệu)\n  - [Container - Định dạng tệp lưu trữ chung video và audio](#container---định-dạng-tệp-lưu-trữ-chung-video-và-audio)\n- [FFmpeg - Bộ công cụ dưới dạng câu lệnh](#ffmpeg---bộ-công-cụ-dưới-dạng-câu-lệnh)\n  - [Bộ công cụ câu lệnh FFmpeg 101](#bộ-công-cụ-câu-lệnh-ffmpeg-101)\n- [Những hành động xử lý video phổ biến](#những-hành-động-xử-lý-video-phổ-biến)\n  - [Chuyển đổi chuẩn nén - Transcoding](#chuyển-đổi-chuẩn-nén---transcoding)\n  - [Chuyển đổi định dạng tệp - Transmuxing](#chuyển-đổi-định-dạng-tệp---transmuxing)\n  - [Thay đổi tốc độ bit - Transrating](#thay-đổi-tốc-độ-bit---transrating)\n  - [Thay đổi độ phân giải - Transsizing](#thay-đổi-độ-phân-giải---transsizing)\n  - [Mở rộng: phát trực tuyến thích ứng (Adaptive-streaming)](#mở-rộng-phát-trực-tuyến-thích-ứng-adaptive-streaming)\n  - [Hơn thế nữa](#hơn-thế-nữa)\n- [Tìm hiểu thư viện FFmpeg libav sâu hơn](#tìm-hiểu-thư-viện-ffmpeg-libav-sâu-hơn)\n  - [Chapter 0 - Hello world nổi tiếng](#chapter-0---hello-world-nổi-tiếng)\n    - [Kiến trúc thư viện FFmpeg libav](#kiến-trúc-thư-viện-ffmpeg-libav)\n    - [Các yêu cầu](#các-yêu-cầu)\n    - [Chương 0 - lướt qua các dòng code](#chương-0---lướt-qua-các-dòng-code)\n  - [Chapter 1 - Đồng bộ audio và video](#chapter-1---đồng-bộ-audio-và-video)\n  - [Chapter 2 - Remuxing](#chapter-2---remuxing)\n  - [Chapter 3 - Transcoding](#chapter-3---transcoding)\n    - [Transmuxing](#transmuxing)\n    - [Transcoding](#transcoding)\n  \n# Giới thiệu\n\n## Video - Điều bạn thấy!\n\nNếu bạn có một chuỗi tuần tự các hình ảnh và thay đổi chúng ở một tần số đã biết (hãy ví dụ như [24 hình trên giây](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)), bạn sẽ tạo ra [ảo giác về sự chuyển động](https://en.wikipedia.org/wiki/Persistence_of_vision).\nTóm lại, đây là nguyên lý cơ bản đằng sau video: **một chuỗi các hình ảnh chạy với tốc độ cho trước**. \n\n<img src=\"https://upload.wikimedia.org/wikipedia/commons/1/1f/Linnet_kineograph_1886.jpg\" title=\"flip book\" height=\"280\"></img>\n\nZeitgenössische Illustration (1886)\n\n## Audio - Điều bạn nghe!\n\nMặc dù video không âm thanh có thể mang đến rất nhiều cảm xúc, nhưng việc bổ sung thêm âm thanh sẽ mang lại nhiều trải nghiệm hứng khởi hơn.\n\nÂm thanh là sự rung động lan truyền như sóng áp suất, thông qua không khí hoặc bất cứ phương tiện truyền dẫn khác, như khí gas, chất lỏng hoặc đất.\n\n> Trong một hệ thống âm thanh kỹ thuật số, microphone chuyển đổi âm thanh thành tín hiệu điện tương tự, sau đó qua bộ chuyển đổi tương tự - số (analog-to-digital converter ADC) - tiêu biểu sử dụng [điều chế độ rộng xung (pulse-code modulation PCM)](https://en.wikipedia.org/wiki/Pulse-code_modulation) - chuyển đổi tín hiệu tương tự sang tín hiệu số.\n\n![chuyển đổi tín hiệu tương tự âm thành sang tín hiệu số](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png \"audio analog to digital\")\n>[Nguồn](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg)\n\n## Codec - Nén dữ liệu\n\n> CODEC là một mạch điện tử hoặc phần mềm dùng để **nén hoặc giải nén dữ liệu video/audio kỹ thuật số.** Nó chuyển đổi dữ liệu video/audio số hoá nguyên thuỷ (chưa nén) sang định dạng nén hoặc ngược lại.\n> https://en.wikipedia.org/wiki/Video_codec\n\nNhưng nếu chúng ta chọn đóng gói hàng triệu hình ảnh vào trong tệp tài liệu và gọi nó là một bộ phim, chúng ta có thể nhận được một tệp tài liệu khổng lồ. Hãy thử tính toán một chút:\n\nGiả sử chúng ta đang tạo một video với độ phân giải `1080 x 1920` (cao x rộng) và chúng ta dành `3 bytes` cho mỗi điểm ảnh (pixel - đơn vị nhỏ nhất của một màn hình) để mã hoá màu sắc (hoặc [màu sắc 24 bit](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29), nó đại diện cho 16,777,216 màu sắc khác nhau), và video này chạy ở tốc độ `24 hình trên giây`, kéo dài `30 phút`.\n\n```c\ntoppf = 1080 * 1920 //tong_so_diem_anh_tren_mot_hinh\ncpp = 3 //gia_tri_cho_moi_diem_anh\ntis = 30 * 60 //thoi_gian_tinh_bang_giay\nfps = 24 //so_hinh_tren_giay\n\nbo_nho_yeu_cau = tis * fps * toppf * cpp\n```\n\nVideo này sẽ yêu cầu xấp xỉ bộ nhớ `250.28GB` hoặc băng thông `1.19Gbps`! Đó là lý do tại sao chúng ta cần dùng [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work).\n\n## Container - Định dạng tệp lưu trữ chung video và audio\n\n> Một container hay định dạng tệp là một định dạng tập tin mà thông số của nó miêu tả những thành phần khác nhau của dữ liệu và thông tin cũng tồn tại như thế nào trong một tập tin máy tính.\n> https://en.wikipedia.org/wiki/Digital_container_format\n\nMột **tệp tin chứa tất cả các luồng dữ liệu** (bao gồm tất cả audio và video) và nó cũng cung cấp cơ chế đồng bộ và thông tin chung, như tựa đề, độ phân giải,...\n\nThông thường chúng ta có thể suy luận định dạng của tệp dữ liệu bằng cách nhìn vào phần mở rộng tên tệp: ví dụ như một tệp có tên `video.webm` là một video sử dụng định dạng container [`webm`](https://www.webmproject.org/).\n\n![container](/img/container.png)\n\n# FFmpeg - Bộ công cụ dưới dạng câu lệnh\n\n> Một giải pháp hoàn thiện, đa nền tảng để ghi lại, chuyển đổi và phát trực tuyến luồng audio và video.\n\nĐể làm việc với truyền thông đa phương tiện, chúng ta có thể sử dụng công cụ/thư viện hữu ích gọi là [FFmpeg](https://www.ffmpeg.org/). Rất có thể bạn đã từng biết/sử dụng nó một cách trực tiếp hoặc gián tiếp (bạn có sử dụng [Chrome?](https://www.chromium.org/developers/design-documents/video)).\n\nNó có một chương trình chạy lệnh gọi là `ffmpeg`, một chương trình mã nhị phân đơn giản nhưng vô cùng mạnh mẽ\nVí dự như bạn có thể chuyển đổi từ định dạng `mp4` sang định dạng container `avi` chỉ bằng cách gỗ câu lệnh sau:\n\n```bash\n$ ffmpeg -i input.mp4 output.avi\n```\n\nChúng ta chỉ thực hiện một bước **định dạng lại (remuxing)** ở đây, nghĩa là nó đang chuyển đổi từ định dạng container này sang một định dạng container khác.\nVề mặt kỹ thuật FFmpeg cũng có thể thực hiện thêm một bước chuyển đổi chuẩn nén (transcode) nhưng chúng ta sẽ nói về nó sau.\n\n## Bộ công cụ câu lệnh FFmpeg 101\n\nFFmpeg có một trang chủ [tài liệu](https://www.ffmpeg.org/ffmpeg.html) đã giải thích rõ ràng đầy đủ về nguyên lý hoạt động của nó. \n\nNgắn gọn mà nói, chương trình câu lệnh FFmpeg cần định dạng đối số sau để thực hiện hành động của nó `ffmpeg {1} {2} -i {3} {4} {5}` trong đó:\n\n1. tuỳ chọn toàn cục\n2. tuỳ chọn đầu vào\n3. đường dẫn đầu vào\n4. tuỳ chọn đầu ra\n5. đường dẫn đầu ra\n\nCác phần 2, 3, 4 và 5 có thể là một hoặc nhiều theo như yêu cầu của bạn.\nThật dễ dạng để hiểu những định dạng đối số này trong câu lệnh dưới đây:\n\n``` bash\n# WARNING: kích thước file xấp xỉ 300MB\n$ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4\n\n$ ffmpeg \\\n-y \\ # lựa chọn toàn cục\n-c:a libfdk_aac \\ # tuỳ chọn đầu vào\n-i bunny_1080p_60fps.mp4 \\ # đường dẫn đầu vào\n-c:v libvpx-vp9 -c:a libvorbis \\ # tuỳ chọn đầu ra\nbunny_1080p_60fps_vp9.webm # đường dẫn đầu ra\n```\nCâu lệnh này nhận tệp đầu vào định dạng `mp4` chứa 2 luồng dữ liệu (một luồng audio nén với chuẩn nén `aac` và một luồng video sử dụng chuẩn nén `h264`) và chuyển đổi nó sang định dạng tệp `webm`, cũng thay đổi chuẩn nén audio và video của nó.\n\nChúng ta có thể đơn giản hoá các câu lệnh trên nhưng hãy lưu ý rằng FFmpeg sẽ nhận hoặc dự đoán các giá trị mặc định cho bạn.\nVí dụ, khi bạn gõ `ffmpeg -i input.avi output.mp4`, chuẩn nén audio/video sẽ được sử dụng để xuất `output.mp4` là gì?\n\nWerner Robitza đã viết một bài hướng dẫn nên đọc về [nén và chính sửa với FFmpeg](http://slhck.info/ffmpeg-encoding-course/#/).\n\n# Những hành động xử lý video phổ biến\n\nTrong khi làm việc với audio/video, chúng ta thường thực hiện một bộ các tác vụ cụ thể với nội dung đa phương tiện.\n\n## Chuyển đổi chuẩn nén - Transcoding\n\n![transcoding](/img/transcoding.png)\n\n**Là gì?** là hành động chuyển đổi một luồng dữ liệu (có thể là audio hoặc video) từ chuẩn nén này sang chuẩn nén khác.\n\n**Tại sao?** thỉnh thoảng chúng ta bắt gặp trường hợp một vài thiết bị (Tivi, điện thoại thông minh, bẳng điều khiển,...) không hỗ trợ loại X nhưng lại hỗ trợ loại Y và những chuẩn nén mới cung cấp tỉ lệ nén tốt hơn.\n\n**Như thế nào?** chuyển đổi một video từ chuẩn nén `H264` (AVC) sang chuẩn nén `H265` (HEVC) bằng cách sau:\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c:v libx265 \\\nbunny_1080p_60fps_h265.mp4\n```\n\n## Chuyển đổi định dạng tệp - Transmuxing\n\n![transmuxing](/img/transmuxing.png)\n\n**Là gì?** là hành động chuyển đổi từ một định dạng tệp (container) này sang một định dạng tệp khác.\n\n**Tại sao?** thỉng thoảng một vài thiết bị (Tivi, điện thoại thông minh, bẳng điều khiển,...) không hỗ trợ loại X nhưng lại hỗ trợ loại Y và thỉnh thoảng những định dạng mới cung cấp những tính năng hiện đại được yêu cầu.\n\n**Như thế nào?** thực hiện chuyển đổi từ định dạng `mp4` sang định dạng `webm`.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c copy \\ # dieu_khien_ffmpeg_bo_qua_buoc_giai_ma_va_ma_hoa\nbunny_1080p_60fps.webm\n```\n\n## Thay đổi tốc độ bit - Transrating\n\n![transrating](/img/transrating.png)\n\n**Là gì?** là hành động thay đổi tốc độ bit của video/audio, hoặc xuất ra những biến thể (renditions) khác.\n\n**Tại sao?** mọi người có thể thử xem video của bạn với kết nối mạng `2G`(edge) bằng cách sử dụng các thiết bị điện thoại thông minh hiệu năng thấp hoặc bằng kết nối Internet `cáp quang` (fiber) trên thiết bị Tivi 4K của họ. Do đó, bạn nên đề xuất nhiều hơn một biến thể của cùng một video với tốc độ bit khác nhau.\n\n**Như thế nào?** tiến hành xuất một biến thể với tốc độ bit giữa 3856K và 2000K.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-minrate 964K -maxrate 3856K -bufsize 2000K \\\nbunny_1080p_60fps_transrating_964_3856.mp4\n```\n\nThông thường chúng ta sẽ cùng sử dụng 2 tác vụ thay đổi tốc độ và thay đổi kích thước. Werner Robitza đã viết một chuỗi các bài viết nên đọc về [điều khiển tỉ lệ trong FFmpeg](http://slhck.info/posts/).\n\n## Thay đổi độ phân giải - Transsizing\n\n![transsizing](/img/transsizing.png)\n\n**Là gì?** là hành động thay đổi chất lượng video từ độ phân giải này sang một độ phân giải khác. Như đã nói trước đó, tác vụ thay đổi kích thước thường đi kèm với tác vụ thay đổi tốc độ.\n\n**Tại sao?** lý do tương tư như với tác vụ thay đổi tốc độ.\n\n**Như thế nào?** thay đổi từ độ phân giải từ `1080p` thành `480p`.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-vf scale=480:-1 \\\nbunny_1080p_60fps_transsizing_480.mp4\n```\n\n## Mở rộng: phát trực tuyến thích ứng (Adaptive-streaming)\n\n![adaptive streaming](/img/adaptive-streaming.png)\n\n**Là gì?** là hành động xuất nhiều độ phân giải (hoặc tốc độ bit) và chia nội dung đa phương tiện thành các đoạn và truyền tải chúng thông qua giao thức http.\n\n**Tại sao?** để cung cấp nội dung đa phương tiện linh hoạt để có thể xem trên điện thoại thông minh hiệu năng thấp hoặc tivi 4K, nó cũng dễ dàng mở rộng và triển khai nhưng có thể tăng thêm độ trễ.\n\n**Như thế nào?** tạo ra một nội dụng định dạng WebM thích ứng (adaptive) bằng cách sử dụng giao thức DASH.\n```bash\n# luồng video\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm\n\n# luồng audio\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm\n\n# tệp kê khai DASH\n$ ffmpeg \\\n -f webm_dash_manifest -i video_160x90_250k.webm \\\n -f webm_dash_manifest -i video_320x180_500k.webm \\\n -f webm_dash_manifest -i video_640x360_750k.webm \\\n -f webm_dash_manifest -i video_640x360_1000k.webm \\\n -f webm_dash_manifest -i video_1280x720_500k.webm \\\n -f webm_dash_manifest -i audio_128k.webm \\\n -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \\\n -f webm_dash_manifest \\\n -adaptation_sets \"id=0,streams=0,1,2,3,4 id=1,streams=5\" \\\n manifest.mpd\n```\n\nPS: Tôi đã lấy ví dụ này từ bài [Giới thiệu cách thức xem lại WebM thích ứng bằng giao thức DASH](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash)\n\n## Hơn thế nữa\n\nCòn [rất nhiều cách sử dụng khác nữa của FFmpeg](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly).\nTôi sử dụng nó khi kết hợp với *iMovie* để xuất ra/chỉnh sửa một vài video cho nền tảng Youtube và bạn chắc chắn có thể sử dụng nó một cách chuyên nghiệp hơn.\n\n# Tìm hiểu thư viện FFmpeg libav sâu hơn\n\n> Bạn không nên lo lắng quá nhiều về âm thanh và hình ảnh?\n> **David Robert Jones**\n\nBởi vì [FFmpeg](#ffmpeg---command-line) là một câu lệnh rất hữu dụng để làm những tác vụ thiết yếu trên các tệp tin đa phương tiện, bằng cách nào chúng ta có thể sử dụng nó trong chương trình của chúng ta?\n\nFFmpeg được [kết hợp bởi một vài thư viện](https://www.ffmpeg.org/doxygen/trunk/index.html) mà có thể tích hợp vào trong chương trình của chúng ta.\nThông thường, khi bạn cài đặt FFmpeg, nó sẽ tự động cài tất cá các thư viện đó. Tôi sẽ tham chiếu đến tập các thư viện gọi là **FFmpeg libav**.\n\n>> Tựa đề này là trang chủ của chuỗi các bài viết của Zed Shaw [\"Học X chuyên sâu\"](https://learncodethehardway.org/), đặc biệt là cuốn sách của anh ấy \"Học ngôn ngữ C chuyên sâu\" (Learn C the Hard Way).\n\n## Chapter 0 - Hello world nổi tiếng\n\nChương trình Hello world này thực chất sẽ không hiển thị tin nhắn `\"hello world\"` trên màn hình terminal :tongue: Thay vào đó, chúng ta sẽ in ra thông tin của video, ví dụ như là định dạng tệp (container) của nó, thời lượng, độ phân giải, các kênh audio và cuối cùng, chúng ta sẽ **giải nén một số khung hình (frames) và lưu chúng lại như tệp tin hình ảnh.**\n\n### Kiến trúc thư viện FFmpeg libav\n\nTrước khi chúng ta bắt đầu viết chương trình, hãy học cách **kiến trúc thư viện FFmpeg libav** hoạt động và các thành phần của nó giao tiếp với nhau như thế nào.\n\nĐây là sơ đồ tiến trình giải nén một video:\n\n![kiến trúc thư viện ffmpeg libav - tiến trình giải nén](/img/decoding.png)\n\nĐầu tiên bạn sẽ cần tải lên một tệp tin đa phương tiện của bạn vào thành phần gọi là [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) (Containter của video còn được gọi là định dạng).\nNó thực chất không tải toàn bộ tệp tin: nó thường chỉ đọc phần đầu header của tệp tin.\n\nMột khi chúng đọc được ít nhất **phần đầu (header) của container**, chúng ta có thể truy cập vào các luồng dữ liệu của nó (nghĩ chúng như là phần thông tin chung của dữ liệu audio và video).\nMỗi luồng (stream) sẽ được lưu trong thành phần gọi là [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html).\n\n> Luồng là một cái tên ưa thích đại điện cho một dòng dữ liệu liên tục.\n\nGiả sử chúng ta có một video chứa hai luồng dữ liệu: một luồng là audio được nén với [chuẩn nén AAC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) và luồng còn lại là video được nén với [chuẩn nén H264 (AVC)](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC). Từ mỗi luồng, chúng ta có thể giải nén **từng mảnh (slices) của dữ liệu** gọi là gói (packet) mà chúng sẽ được tải vào những phần tử [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html).\n\nPhần **dữ liệu trong các gói vẫn được nén** và để giải nén các gói, chúng ta cần đưa chúng vào [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html) cụ thể.\n\nThành phần `AVCodec` sẽ giải mã chúng thành phần tử [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) và cuối cùng, những phần tử này sẽ cho chúng ta những khung hình gốc không nén. Có thể nhận ra rằng thuật ngữ/tiến trình đều được sử dụng bởi cả luồng audio và video.\n\n### Các yêu cầu\n\nBởi có một số người đã [gặp vấn để trong khi biên dịch hoặc chạy các ví dụ mẫu](https://github.com/leandromoreira/ffmpeg-libav-tutorial/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+compiling) **chúng ta sẽ sử dụng [`Docker`](https://docs.docker.com/install/) như là một trường phát triển hay chạy thử,** chúng ta cũng sẽ sử dụng video \"The big buck bunny\" vì thế nếu bạn không có nó ở trên máy tính thì hãy chạy lệnh `make fetch_small_bunny_video`.\n\n### Chương 0 - lướt qua các dòng code\n\n> #### TLDR; hãy mở [code](/0_hello_world.c) và thực thi nó.\n> ```bash\n> $ make run_hello\n> ```\n\nChúng ta sẽ bỏ qua một số chi tiết, nhưng đừng lo lắng: [source code có sẵn trên github](/0_hello_world.c).\n\nChúng ta sẽ khởi tạo vùng nhớ cho thành phần [`AVFormatContext`](http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) để giữ các thông tin về định dạng tệp (container).\n\n```c\nAVFormatContext *pFormatContext = avformat_alloc_context();\n```\n\nBây giở chúng ta sẽ mở tệp tin và đọc phần đầu (header) của nó và điền vào `AVFormatContext` với thông tin tối thiểu về định dạng (lưu ý rằng các chuẩn nén vẫn chưa được xác định).\nHàm được sử dụng để làm điều đó là [`avformat_open_input`](http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49). Nó cần đầu vào là một `AVFormatContext`, một `tên file (filename)` và đối số tuỳ chọn: [`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html) (nếu bạn đưa vào `NULL`, FFmpeg sẽ dự đoán định dạng) và [`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html) (là các tuỳ chọn cho bộ demuxer).  \n\n```c\navformat_open_input(&pFormatContext, filename, NULL, NULL);\n```\n\nChúng ta có thể in tên định dạng và thời lượng đa phương tiện:\n\n```c\nprintf(\"Format %s, duration %lld us\", pFormatContext->iformat->long_name, pFormatContext->duration);\n```\n\nĐể truy cập vào `các luồng`, chúng ta cần đọc dữ liệu từ đa phương tiện. Hàm [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb) thực hiên điều đó.\nBây giờ, thành phần `pFormatContext->nb_streams` sẽ giữ số lượng các luồng và `pFormatContext->streams[i]` sẽ cho chúng ta các thông tin về luồng `i` (tương ứng với một [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html)).\n\n```c\navformat_find_stream_info(pFormatContext,  NULL);\n```\n\nBây giờ chúng ta sẽ chạy vòng lặp qua tất cả các luồng.\n\n```c\nfor (int i = 0; i < pFormatContext->nb_streams; i++)\n{\n  //\n}\n```\n\nVới mỗi luồng, chúng ta sẽ cần [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html), nó miêu tả các thuộc tính của chuẩn nén được sử dụng với luồng `i`.\n\n```c\nAVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar;\n```\n\nvới thuộc tính của chuẩn nén, chúng ta có thể tìm chuẩn nén thích hợp thông qua hàm [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) và tìm bộ giải nén sẵn có với mã định danh của chuẩn nén đó (code id) và trả về một [`AVCodec`](http://ffmpeg.org/doxygen/trunk/structAVCodec.html), thành phần biết cách thức thực hiện nén (en**CO**de) và giải nén (**DEC**ode) luồng dữ liệu.\n```c\nAVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);\n```\n\nĐến giờ chúng ta có thể in thông tin về chuẩn nén\n\n```c\n// specific for video and audio\nif (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {\n  printf(\"Video Codec: resolution %d x %d\", pLocalCodecParameters->width, pLocalCodecParameters->height);\n} else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {\n  printf(\"Audio Codec: %d channels, sample rate %d\", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate);\n}\n// general\nprintf(\"\\tCodec %s ID %d bit_rate %lld\", pLocalCodec->long_name, pLocalCodec->id, pLocalCodecParameters->bit_rate);\n```\n\nVới thông tin chuẩn nén, chúng ta khởi tạo vùng nhớ cho [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html), nó sẽ giữ nội dung của tiến trình giải mã/mã hoá, nhưng sau đó chúng ta cần điền nội dung chuẩn nén với các thông số đã xác định; chúng ta làm thực hiện với hàm [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16).\n\nMột khi chúng ta đã điền vào nội dung bộ mã hoá, chúng ta có thể mở bộ mã hoá. Chúng ta gọi hàm [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) và sau đó chúng ta có thể sử dụng nó.\n\n```c\nAVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);\navcodec_parameters_to_context(pCodecContext, pCodecParameters);\navcodec_open2(pCodecContext, pCodec, NULL);\n```\n\nBây giờ, chúng ta sẽ đọc các gói dữ liệu từ luồng stream và giải mã chúng thành các khung hình nhưng trước tiên, chúng ta cần khởi tạo bộ nhớ cho cả hai thành phần, [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) và [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n```c\nAVPacket *pPacket = av_packet_alloc();\nAVFrame *pFrame = av_frame_alloc();\n```\n\nHãy lấy các gói dữ liệu từ luồng stream với hàm [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) trong khi nó có dữ liệu.\n\n```c\nwhile (av_read_frame(pFormatContext, pPacket) >= 0) {\n  //...\n}\n```\n\n**Đưa gói dữ liệu thô** (hình đã nén) vào bộ giải mã, đi qua bộ mã hoá, bằng hàm [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3).\n\n```c\navcodec_send_packet(pCodecContext, pPacket);\n```\n\nVà **Nhận hình ảnh thô** (hình đã giải nén) từ bộ giải mã, thông qua bộ mã hoá tương tự, bằng hàm [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c).\n\n```c\navcodec_receive_frame(pCodecContext, pFrame);\n```\n\nChúng ta có thể in ra số lượng khung hình, thông số [PTS](https://en.wikipedia.org/wiki/Presentation_timestamp), DTS, [frame type](https://en.wikipedia.org/wiki/Video_compression_picture_types) và nhiều hơn thế.\n\n```c\nprintf(\n    \"Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]\",\n    av_get_picture_type_char(pFrame->pict_type),\n    pCodecContext->frame_number,\n    pFrame->pts,\n    pFrame->pkt_dts,\n    pFrame->key_frame,\n    pFrame->coded_picture_number,\n    pFrame->display_picture_number\n);\n```\n\nCuối cùng chúng ta có thể lưu lại những khung hình đã được giải nén thành một ảnh xám đơn giản [simple gray image](https://en.wikipedia.org/wiki/Netpbm_format#PGM_example). Quá trình này rất đơn giản, chúng ta sẽ dùng `pFrame->data` nơi mà index liên quan đến không gian màu [planes Y, Cb and Cr](https://en.wikipedia.org/wiki/YCbCr), chúng ta chỉ cần lấy phần tử đầu tiên với chỉ mục `0` (Y) để lưu thành hình ảnh.\n\n```c\nsave_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename);\n\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename)\n{\n    FILE *f;\n    int i;\n    f = fopen(filename,\"w\");\n    // writing the minimal required header for a pgm file format\n    // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example\n    fprintf(f, \"P5\\n%d %d\\n%d\\n\", xsize, ysize, 255);\n\n    // writing line by line\n    for (i = 0; i < ysize; i++)\n        fwrite(buf + i * wrap, 1, xsize, f);\n    fclose(f);\n}\n```\n\nVậy là cuối cùng chúng ta có một ảnh xám với kích thước 2MB:\n\n![saved frame](/img/generated_frame.png)\n\n## Chapter 1 - Đồng bộ audio và video\n\n> **Player** - một nhà phát triển JS hoàn thành một trình phát video mới.\n\nTrước khi chúng ta đến với [ví dụ về  transcoding](#chapter-2---transcoding) hãy nói về **đồng bộ thời gian** , hoặc cách thức một trình phát video biết khi nào cần hiển thị hình ảnh.\n\nTrong ví dụ cuối cùng, chúng ta đã lưu một số khung hình có thể xem được ở đây:\n\n![frame 0](/img/hello_world_frames/frame0.png)\n![frame 1](/img/hello_world_frames/frame1.png)\n![frame 2](/img/hello_world_frames/frame2.png)\n![frame 3](/img/hello_world_frames/frame3.png)\n![frame 4](/img/hello_world_frames/frame4.png)\n![frame 5](/img/hello_world_frames/frame5.png)\n\nKhi chúng ta thiết kế trình phát video, chúng ta cần hiển thị từng khung hình theo một tốc độ nhất định, nếu không, sẽ rất khó để xem video một cách thoải mái bởi vì nó phát rất nhanh hoặc rất chậm.\n\nDo đó, chúng ta cần xác định một số logic để phát mỗi khung hình một cách mượt mà. Để xử lý vấn đề này, mỗi khung hình có một **mốc thời gian hiển thị** (PTS) tăng dần theo hệ số **timebase**, là một số hữu tỉ (trong đó mẫu số được biết đến như **timescale**), chia cho **tốc độ khung hình (fps)** \n\nSẽ dễ dàng để hiểu khi chúng ta nhìn vào một số ví dụ, hãy thực hiện một số kịch bản.\n\nVới `fps=60/1` và `timebase=1/60000`, mỗi PTS sẽ tăng lên `timescale / pts = 1000`, do đó **PTS thời gian thực** cho mỗi khung hình sẽ là (giả định bắt đầu từ 0):\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033`\n\nVới kịch bản tương tự nhưng timebase bằng `1/60`. \n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033`\n* `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050`\n\nVới `fps=25/1` và `timebase=1/75`, mỗi PTS sẽ tăng một khoảng `timescale / pts = 3` và mốc thời gian PTS sẽ là:\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04`\n* `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08`\n* `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12`\n* ...\n* `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96`\n* ...\n* `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56`\n\nBây giờ với `pts_time` chúng ta có thể tìm được cách kết xuất video đồng bộ với `pts_time` của audio hoặc với nhịp xung hệ thống. Thư viện FFmpeg libav cung cấp những thông tin này thông qua API:  \n\n- fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad)\n- tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad)\n- tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6)\n\nXem xét sâu hơn, những khung hình chúng ta lưu được gửi theo thứ tự DTS (frames: 1,6,4,2,3,5) nhưng phát theo thứ tự PTS (frames: 1,2,3,4,5). Cũng để ý xem hiệu quả của khung hình loại B so với khung hình loại P hoặc I.\n\n```\nLOG: AVStream->r_frame_rate 60/1\nLOG: AVStream->time_base 1/60000\n...\nLOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0]\nLOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3]\nLOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4]\nLOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2]\nLOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5]\nLOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1]\n```\n\n## Chapter 2 - Remuxing\n\nRemuxing là hành động thay đổi từ định dang tệp (container) này sang định dạng tệp khác, ví dụ, chúng ta thay đổi một video định dạng [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) sang định dạng [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream) mà không gặp nhiều khó khăn khi sử dụng FFmpeg: \n\n```bash\nffmpeg input.mp4 -c copy output.ts\n```\n\nNó sẽ bóc tách định dạng mp4 nhưng nó sẽ không giải mã hay mã hoá lại (`-c copy`) và cuối cùng, nó sẽ sắp xếp lại theo định dạng `mpegts`. Nếu bạn không cung cấp định dạng `-f`, ffmpeg sẽ cố gắng đoán nó bằng tên mở rộng của tập tin đầu ra.\n\nCách sử dụng thông thường của FFmpeg hoặc thư viện libav theo kiến trúc/ mẫu hoặc theo trình tự như sau:\n* **[lớp giao thức](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - nó nhận `đầu vào`  (có thể là một `tập tin` hoặc là giao thức `rtmp` hay `HTTP`)\n* **[lớp định dạng](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - nó `bóc tách` nội dung trong đó, lấy được hầu hết siêu dữ liệu và các luồng của nó\n* **[lớp mã hoá](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - nó `giải mã` dữ liệu được nén trong các luồng <sup>*tuỳ chọn*</sup>\n* **[lớp điểm ảnh](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - nó cũng cung cấp  `bộ lọc` tương tác với từng khung hình gốc (như thay đổi kích thước)<sup>*tuỳ chọn*</sup>\n* và sau đó thực hiện ngược lại các bước\n* **[lớp mã hoá](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - nó `mã hoá` (hoặc `mã hoá lại` hoặc `transcode`) những frame gốc<sup>*tuỳ chọn*</sup>\n* **[lớp định dạng](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - nó `sắp xếp` (hoặc `tái sắp xếp`) các luồng dữ liệu (dữ liệu nén)\n* **[lớp giao thức](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - và cuối cũng những dữ liệu được sắp xếp sẽ được gửi đến `đầu ra` (tập tin khác hoặc có thể là một máy chủ mạng)\n\n![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg)\n> Sơ đồ này được truyền cảm hứng từ sự nỗ lực của [Leixiaohua's](http://leixiaohua1020.github.io/#ffmpeg-development-examples) vả [Slhck's](https://slhck.info/ffmpeg-encoding-course/#/9).\n\nBây giờ hãy xem ví dụ sử dụng libav để đưa ra hiệu ứng tương tự như trong `ffmpeg input.mp4 -c copy output.ts`.\n\nChúng ta đọc từ đầu vào (`input_format_context`) và thay đổi nó thành đầu ra khác (`output_format_context`).\n\n```c\nAVFormatContext *input_format_context = NULL;\nAVFormatContext *output_format_context = NULL;\n```\n\nChúng ta bắt đầu thực hiện khởi tạo vùng nhớ và mở định dạng đầu vào. Cho trường hợp này, chúng ta cần mở tập tin đầu vào và khởi tạo vùng nhớ cho tập tin đầu ra.\n\n```c\nif ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) {\n  fprintf(stderr, \"Could not open input file '%s'\", in_filename);\n  goto end;\n}\nif ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) {\n  fprintf(stderr, \"Failed to retrieve input stream information\");\n  goto end;\n}\n\navformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename);\nif (!output_format_context) {\n  fprintf(stderr, \"Could not create output context\\n\");\n  ret = AVERROR_UNKNOWN;\n  goto end;\n}\n```\n\nChúng ta cũng cần sắp xếp lại các luồng stream video, audio và subtitle, vì để giữ chúng, chúng ta sẽ lưu thông tin của chúng trong mảng.\n\n```c\nnumber_of_streams = input_format_context->nb_streams;\nstreams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list));\n```\n\nSau đó, chúng ta khởi tạo vùng nhớ yêu cầu, chúng ta thực hiện vòng lặp tất cả các luồng stream và với mỗi luồng stream chúng ta cần tạo một luồng stream đầu ra cho định dạng đầu ra, bằng cách dùng hàm [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827). Chú ý chúng ta sẽ đánh đấu tất cả các luồng stream kể cả không phải video, audio hoặc subtitle, vì vậy chúng ta có thể bỏ qua chúng.\n\n```c\nfor (i = 0; i < input_format_context->nb_streams; i++) {\n  AVStream *out_stream;\n  AVStream *in_stream = input_format_context->streams[i];\n  AVCodecParameters *in_codecpar = in_stream->codecpar;\n  if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {\n    streams_list[i] = -1;\n    continue;\n  }\n  streams_list[i] = stream_index++;\n  out_stream = avformat_new_stream(output_format_context, NULL);\n  if (!out_stream) {\n    fprintf(stderr, \"Failed allocating output stream\\n\");\n    ret = AVERROR_UNKNOWN;\n    goto end;\n  }\n  ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);\n  if (ret < 0) {\n    fprintf(stderr, \"Failed to copy codec parameters\\n\");\n    goto end;\n  }\n}\n```\n\nBây giờ chúng ta có thể tạo tập tin đầu ra.\n\n```c\nif (!(output_format_context->oformat->flags & AVFMT_NOFILE)) {\n  ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE);\n  if (ret < 0) {\n    fprintf(stderr, \"Could not open output file '%s'\", out_filename);\n    goto end;\n  }\n}\n\nret = avformat_write_header(output_format_context, NULL);\nif (ret < 0) {\n  fprintf(stderr, \"Error occurred when opening output file\\n\");\n  goto end;\n}\n```\n\nSau đó, chúng ta có thể sao chép các luồng stream, từng gói dữ liệu packet, từ luồng đầu vào đến luồng đầu ra. Thực hiện vòng lặp khi có gói dự liệu (`av_read_frame`), với mỗi gói dữ liệu, chúng ta cần tính lại PTS và DTS để kết thúc ghi nó lại (`av_interleaved_write_frame`) tới bộ định dạng đầu ra.\n\n```c\nwhile (1) {\n  AVStream *in_stream, *out_stream;\n  ret = av_read_frame(input_format_context, &packet);\n  if (ret < 0)\n    break;\n  in_stream  = input_format_context->streams[packet.stream_index];\n  if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) {\n    av_packet_unref(&packet);\n    continue;\n  }\n  packet.stream_index = streams_list[packet.stream_index];\n  out_stream = output_format_context->streams[packet.stream_index];\n  /* copy packet */\n  packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);\n  // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903\n  packet.pos = -1;\n\n  //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1\n  ret = av_interleaved_write_frame(output_format_context, &packet);\n  if (ret < 0) {\n    fprintf(stderr, \"Error muxing packet\\n\");\n    break;\n  }\n  av_packet_unref(&packet);\n}\n```\n\nĐể kết thúc chúng ta cần viết phần kết thúc luồng stream tới tập tin đầu ra với hàm [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13)\n\n```c\nav_write_trailer(output_format_context);\n```\n\nTừ giờ chúng ta đã sẵn sàng để kiểm tra nó và bài kiểm tra đầu tiên sẽ là chuyển đổi định dạng video từ MP4 sang MPEG-TS. Chúng ta cơ bản thực hiện lệnh  `ffmpeg input.mp4 -c copy output.ts` với thư viện libav.\n\n```bash\nmake run_remuxing_ts\n```\n\nNó đã làm việc!!! Bạn không tin tôi ư?! Để chắc chắn, chúng ta có thể kiểm tra nó với `ffprobe`\n\n```bash\nffprobe -i remuxed_small_bunny_1080p_60fps.ts\n\nInput #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts':\n  Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s\n  Program 1\n    Metadata:\n      service_name    : Service01\n      service_provider: FFmpeg\n    Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc\n    Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s\n```\n\nTổng hợp lại những gì chúng ta đã làm theo sơ đồ, chúng ta xem lại bài mở đầu [ý tưởng libav hoạt động](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-libav-architecture) và như đã thấy chúng ta bỏ qua phần mã hoá.\n\n![remuxing libav components](/img/remuxing_libav_components.png)\n\nTrước khi kết thúc chương này, tôi muốn chỉ ra phần quan trọng nhất của tiến trình remuxing, **bạn có thể đưa các tuỳ chọn vào bộ muxer**. Hãy nói chúng ta muốn chuyển định dạng [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding), để giải quyết vấn đề này, chúng ta cần sử dụng định dạng [fragmented mp4](https://stackoverflow.com/a/35180327) (thỉnh thoảng được giới thiệu như `fmp4`) thay vì MPEF-TS hoặc thuần MPEG-4.\n\nVới [việc thực hiện dễ dàng bằng câu lệnh](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting).\n\n```\nffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4\n```\n\nDùng libav cũng dễ dàng như câu lệnh, chúng ta cần đưa các tuỳ chọn khi ghi header đầu ra, trước khi sao chép các gói dữ liệu packet.\n\n```c\nAVDictionary* opts = NULL;\nav_dict_set(&opts, \"movflags\", \"frag_keyframe+empty_moov+default_base_moof\", 0);\nret = avformat_write_header(output_format_context, &opts);\n```\n\nChúng ta có thể tạo tập tin fragmented mp4:\n\n```bash\nmake run_remuxing_fragmented_mp4\n```\n\nNhưng để chắc chắn rằng tôi không nói dối, bạn có thể sử dụng một công cụ tiện ích [gpac/mp4box.js](http://download.tsi.telecom-paristech.fr/gpac/mp4box.js/filereader.html) hoặc [http://mp4parser.com/](http://mp4parser.com/) để nhìn sự khác biệt, đầu tiên tải lên tập tin mp4 \"thông thường\"\n\n![mp4 boxes](/img/boxes_normal_mp4.png)\n\nNhư bạn thấy, nó chỉ có duy nhất một box `mdat`, **nơi chứa khung hình video và audio**. Giờ hãy tải lên tệp tin định dạng fragmented mp4 để nhìn chúng được chia thành nhiều box `mdat`\n\n![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png)\n\n## Chapter 3 - Transcoding\n\n> #### TLDR; chỉ ra bộ mã hoá [code](/3_transcoding.c) và thực thi.\n> ```bash\n> $ make run_transcoding\n> ```\n> Chúng ta sẽ bỏ qua chi tiết, nhưng đùng lo lắng: [source code có sẵn trên github](/3_transcoding.c).\n\n\nỞ chương này, chúng ta sẽ tạo một bộ chuyển đổi chuẩn nén transcoder tối giản nhất, viết bằng ngôn ngữ C, có thể chuyển đổi video từ chuẩn nén H264 thành H265 bằng thư viện **FFmpeg/libav**, cụ thể là  [libavcodec](https://ffmpeg.org/libavcodec.html), libavformat, và libavutil.\n\n![media transcoding flow](/img/transcoding_flow.png)\n\n> _Tóm tắt nhanh:_ [**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html) là sự trừu tượng cho các định dạng tệp tin đa phương tiện, hay còn gọi là container (ví dự: MKV, MP4, Webm, TS). [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html) đại diện mỗi loại dữ liệu của định dạng đã cho (ví dụ: audio, video, subtitle, metadata). [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html) là một phần của dữ liệu đã nén chứa trong `AVStream`, nó có thể được giải mã bới [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html) (ví dụ: av1, h264, vp9, hevc), tạo ra dữ liệu gốc gọi là [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n### Transmuxing\n\nHãy bắt đầu với sự hoạt động transmuxing đơn giản và sau đó chúng ta có thể xây dựng dựa trên code đó, bước đầu tiên là **tải tập tin đầu vào**. \n\n```c\n// Allocate an AVFormatContext\navfc = avformat_alloc_context();\n// Open an input stream and read the header.\navformat_open_input(avfc, in_filename, NULL, NULL);\n// Read packets of a media file to get stream information.\navformat_find_stream_info(avfc, NULL);\n```\n\nChúng ta sẽ cài đặt một bộ giải mã, `AVFormatContext` sẽ cho phép chúng ta truy cập tất cả thành phần `AVStream` và mỗi thành phần trong số chúng, chúng ta có thể nhận `AVCodec` và tạo `AVCodecContext` chi tiết và cuối cùng chúng ta có thể mở codec nhận được, do đó chúng ta có thể thực hiện quá trình giải mã.  \n\n>  Thành phần [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html) giữ những dữ liệu về cấu hình đa phương tiện như tốc độ bit, tốc độ khung hình, tốc độ mẫu, các kênh, chiều cao và rất nhiều thứ khác nữa.\n\n```c\nfor (int i = 0; i < avfc->nb_streams; i++)\n{\n  AVStream *avs = avfc->streams[i];\n  AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id);\n  AVCodecContext *avcc = avcodec_alloc_context3(*avc);\n  avcodec_parameters_to_context(*avcc, avs->codecpar);\n  avcodec_open2(*avcc, *avc, NULL);\n}\n```\n\nChúng ta cần chuẩn bị tập tin đầu ra cho việc transmuxing, đầu tiên chúng ta **khởi tạo vùng nhớ** cho `AVFormatContext` đầu ra. Chúng ta tạo **từng luồng stream** cho định dạng đầu ra. Để đóng gói luồng thích hợp, chúng ta **sao chép các thông số codec** từ bộ giải mã.\n\nChúng ta **bật cờ** `AV_CODEC_FLAG_GLOBAL_HEADER` để nói cho bộ mã hoá rằng nó có thể sử dụng global header và cuối cùng chúng ta mở **tập tin để ghi** đầu ra và giữ header.\n\n```c\navformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename);\n\nAVStream *avs = avformat_new_stream(encoder_avfc, NULL);\navcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar);\n\nif (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER)\n  encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;\n\navio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE);\navformat_write_header(encoder->avfc, &muxer_opts);\n\n```\n\nChúng ta nhận `AVPacket` từ bộ giải mã, điều chỉnh timestamp, và ghi gói dữ liệu packet thích hợp với tập tin đầu ra. Mặc dù hàm `av_interleaved_write_frame` nói \"ghi khung hình\", nhưng chúng ta đang lưu các gói packet. Chúng ta kết thúc quá trình transmuxing bằng cách ghi phần đuôi (trailer) vào tập tin.\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base);\n  av_interleaved_write_frame(*avfc, input_packet) < 0));\n}\n\nav_write_trailer(encoder_avfc);\n```\n\n### Transcoding\n\nPhần trước đã đưa ra chương trình transmuxer đơn giản, bây giờ chúng ta sẽ thêm vào khả năng cho tập tin mã hoá, đặc biệt chúng ta sẽ thực hiện transcode video từ `h264` sang `h265`\n\nSau khi chúng ta chuẩn bị bộ giải mã, trước khi chúng ta sắp xếp các tập tin đầu ra, chúng ta sẽ cài đặt bộ mã hoá.\n\n* Tạo video `AVStream` trong bộ mã hoá, [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827)\n* Sử dụng `AVCodec` là `libx265`, [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37)\n* Tạo `AVCodecContext` dựa vào codec được tạo, [`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315)\n* Cài đặt thuộc tính cơ sở cho phiên transcoding, và\n* Mở codec và sao chép thông số từ context tới luồng stream. [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) và [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe)\n\n```c\nAVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL);\nAVStream *video_avs = avformat_new_stream(encoder_avfc, NULL);\n\nchar *codec_name = \"libx265\";\nchar *codec_priv_key = \"x265-params\";\n// we're going to use internal options for the x265\n// it disables the scene change detection and fix then\n// GOP on 60 frames.\nchar *codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\nAVCodec *video_avc = avcodec_find_encoder_by_name(codec_name);\nAVCodecContext *video_avcc = avcodec_alloc_context3(video_avc);\n// encoder codec params\nav_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0);\nvideo_avcc->height = decoder_ctx->height;\nvideo_avcc->width = decoder_ctx->width;\nvideo_avcc->pix_fmt = video_avc->pix_fmts[0];\n// control rate\nvideo_avcc->bit_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_buffer_size = 4 * 1000 * 1000;\nvideo_avcc->rc_max_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_min_rate = 2.5 * 1000 * 1000;\n// time base\nvideo_avcc->time_base = av_inv_q(input_framerate);\nvideo_avs->time_base = sc->video_avcc->time_base;\n\navcodec_open2(sc->video_avcc, sc->video_avc, NULL);\navcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc);\n```\n\nChúng ta cần mở rộng vòng lặp giải mã cho việc trancoding luồng video:\n\n* Gửi `AVPacket` rỗng tới bộ giải mã, [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3)\n* Nhận `AVFrame` đã giải nén, [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c)\n* Bắt đầu transcode khung hình gốc này,\n* Gửi khung hình gốc, [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169)\n* Nhận dữ liệu nén lại dựa trên codec, `AVPacket`, [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga5b8eff59cf259747cf0b31563e38ded6)\n* Cài đặt timestamp, và [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e)\n* Ghi nó vào tập tin đầu ra. [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1)\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  int response = avcodec_send_packet(decoder_video_avcc, input_packet);\n  while (response >= 0) {\n    response = avcodec_receive_frame(decoder_video_avcc, input_frame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return response;\n    }\n    if (response >= 0) {\n      encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index);\n    }\n    av_frame_unref(input_frame);\n  }\n  av_packet_unref(input_packet);\n}\nav_write_trailer(encoder_avfc);\n\n// used function\nint encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) {\n  AVPacket *output_packet = av_packet_alloc();\n  int response = avcodec_send_frame(video_avcc, input_frame);\n\n  while (response >= 0) {\n    response = avcodec_receive_packet(video_avcc, output_packet);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return -1;\n    }\n\n    output_packet->stream_index = index;\n    output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den;\n\n    av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base);\n    response = av_interleaved_write_frame(avfc, output_packet);\n  }\n  av_packet_unref(output_packet);\n  av_packet_free(&output_packet);\n  return 0;\n}\n\n```\n\nChúng ta chuyển đổi luồng media từ `h264` tới `h265`, như phiên bản `h265` mong đợi của tập tin media sẽ có kích thước nhỏ hơn `h264` tuy nhiên [chương trình được tạo](/3_transcoding.c) có khả năng:\n\n```c\n\n  /*\n   * H264 -> H265\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx265\";\n  sp.codec_priv_key = \"x265-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - fragmented MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.muxer_opt_key = \"movflags\";\n  sp.muxer_opt_value = \"frag_keyframe+empty_moov+delay_moov+default_base_moof\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> AAC\n   * MP4 - MPEG-TS\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 0;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.audio_codec = \"aac\";\n  sp.output_extension = \".ts\";\n\n  /* WIP :P  -> it's not playing on VLC, the final bit rate is huge\n   * H264 -> VP9\n   * Audio -> Vorbis\n   * MP4 - WebM\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 0;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libvpx-vp9\";\n  //sp.audio_codec = \"libvorbis\";\n  //sp.output_extension = \".webm\";\n\n```\n\n> Bây giờ, thânh thật mà nói, điều này [khó hơn tôi nghĩ](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54) nó là như vậy và tôi phải đào sâu hơn [source code câu lệnh FFmpeg](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749) và kiểm tra nó rất nhiều và tôi nghĩ tôi đang bỏ quên một số thứ bởi vì tôi phải thực hiện `force-cfr` cho `h264` để làm việc và tôi vẫn xem một số tin nhắn cảnh báo như `warning messages (forced frame type (5) at 80 was changed to frame type (3))`.\n"
  },
  {
    "path": "README.md",
    "content": "[🇨🇳](/README-cn.md \"Simplified Chinese\")\n[🇰🇷](/README-ko.md \"Korean\")\n[🇪🇸](/README-es.md \"Spanish\")\n[🇻🇳](/README-vn.md \"Vietnamese\")\n[🇧🇷](/README-pt.md \"Portuguese\")\n[🇷🇺](/README-ru.md \"Russian\")\n\n[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)\n\nI was looking for a tutorial/book that would teach me how to start to use [FFmpeg](https://www.ffmpeg.org/) as a library (a.k.a. libav) and then I found the [\"How to write a video player in less than 1k lines\"](http://dranger.com/ffmpeg/) tutorial.\nUnfortunately it was deprecated, so I decided to write this one.\n\nMost of the code in here will be in C **but don't worry**: you can easily understand and apply it to your preferred language.\nFFmpeg libav has lots of bindings for many languages like [python](https://pyav.org/), [go](https://github.com/imkira/go-libav) and even if your language doesn't have it, you can still support it through the `ffi` (here's an example with [Lua](https://github.com/daurnimator/ffmpeg-lua-ffi/blob/master/init.lua)).\n\nWe'll start with a quick lesson about what is video, audio, codec and container and then we'll go to a crash course on how to use `FFmpeg` command line and finally we'll write code, feel free to skip directly to[ ](http://newmediarockstars.com/wp-content/uploads/2015/11/nintendo-direct-iwata.jpg)the section [Learn FFmpeg libav the Hard Way.](#learn-ffmpeg-libav-the-hard-way)\n\nSome people used to say that the Internet video streaming is the future of the traditional TV, in any case, the FFmpeg is something that is worth studying.\n\n__Table of Contents__\n\n* [Intro](#intro)\n  * [video - what you see!](#video---what-you-see)\n  * [audio - what you listen!](#audio---what-you-listen)\n  * [codec - shrinking data](#codec---shrinking-data)\n  * [container - a comfy place for audio and video](#container---a-comfy-place-for-audio-and-video)\n* [FFmpeg - command line](#ffmpeg---command-line)\n  * [FFmpeg command line tool 101](#ffmpeg-command-line-tool-101)\n* [Common video operations](#common-video-operations)\n  * [Transcoding](#transcoding)\n  * [Transmuxing](#transmuxing)\n  * [Transrating](#transrating)\n  * [Transsizing](#transsizing)\n  * [Bonus Round: Adaptive Streaming](#bonus-round-adaptive-streaming)\n  * [Going beyond](#going-beyond)\n* [Learn FFmpeg libav the Hard Way](#learn-ffmpeg-libav-the-hard-way)\n  * [Chapter 0 - The infamous hello world](#chapter-0---the-infamous-hello-world)\n    * [FFmpeg libav architecture](#ffmpeg-libav-architecture)\n  * [Chapter 1 - timing](#chapter-1---syncing-audio-and-video)\n  * [Chapter 2 - remuxing](#chapter-2---remuxing)\n  * [Chapter 3 - transcoding](#chapter-3---transcoding)\n\n# Intro\n\n## video - what you see!\n\nIf you have a sequence series of images and change them at a given frequency (let's say [24 images per second](https://www.filmindependent.org/blog/hacking-film-24-frames-per-second/)), you will create an [illusion of movement](https://en.wikipedia.org/wiki/Persistence_of_vision).\nIn summary this is the very basic idea behind a video: **a series of pictures / frames running at a given rate**.\n\n<img src=\"https://upload.wikimedia.org/wikipedia/commons/1/1f/Linnet_kineograph_1886.jpg\" title=\"flip book\" height=\"280\"></img>\n\nZeitgenössische Illustration (1886)\n\n## audio - what you listen!\n\nAlthough a muted video can express a variety of feelings, adding sound to it brings more pleasure to the experience.\n\nSound is the vibration that propagates as a wave of pressure, through the air or any other transmission medium, such as a gas, liquid or solid.\n\n> In a digital audio system, a microphone converts sound to an analog electrical signal, then an analog-to-digital converter (ADC) — typically using [pulse-code modulation (PCM)](https://en.wikipedia.org/wiki/Pulse-code_modulation) - converts the analog signal into a digital signal.\n\n![audio analog to digital](https://upload.wikimedia.org/wikipedia/commons/thumb/c/c7/CPT-Sound-ADC-DAC.svg/640px-CPT-Sound-ADC-DAC.svg.png \"audio analog to digital\")\n>[Source](https://commons.wikimedia.org/wiki/File:CPT-Sound-ADC-DAC.svg)\n\n## codec - shrinking data\n\n> CODEC is an electronic circuit or software that **compresses or decompresses digital audio/video.** It converts raw (uncompressed) digital audio/video to a compressed format or vice versa.\n> https://en.wikipedia.org/wiki/Video_codec\n\nBut if we chose to pack millions of images in a single file and called it a movie, we might end up with a huge file. Let's do the math:\n\nSuppose we are creating a video with a resolution of `1080 x 1920` (height x width) and that we'll spend `3 bytes` per pixel (the minimal point at a screen) to encode the color (or [24 bit color](https://en.wikipedia.org/wiki/Color_depth#True_color_.2824-bit.29), what gives us 16,777,216 different colors) and this video runs at `24 frames per second` and it is `30 minutes` long.\n\n```c\ntoppf = 1080 * 1920 //total_of_pixels_per_frame\ncpp = 3 //cost_per_pixel\ntis = 30 * 60 //time_in_seconds\nfps = 24 //frames_per_second\n\nrequired_storage = tis * fps * toppf * cpp\n```\n\nThis video would require approximately `250.28GB` of storage or `1.19 Gbps` of bandwidth! That's why we need to use a [CODEC](https://github.com/leandromoreira/digital_video_introduction#how-does-a-video-codec-work).\n\n## container - a comfy place for audio and video\n\n> A container or wrapper format is a metafile format whose specification describes how different elements of data and metadata coexist in a computer file.\n> https://en.wikipedia.org/wiki/Digital_container_format\n\nA **single file that contains all the streams** (mostly the audio and video) and it also provides **synchronization and general metadata**, such as title, resolution and etc.\n\nUsually we can infer the format of a file by looking at its extension: for instance a `video.webm` is probably a video using the container [`webm`](https://www.webmproject.org/).\n\n![container](/img/container.png)\n\n# FFmpeg - command line\n\n> A complete, cross-platform solution to record, convert and stream audio and video.\n\nTo work with multimedia we can use the AMAZING tool/library called [FFmpeg](https://www.ffmpeg.org/). Chances are you already know/use it directly or indirectly (do you use [Chrome?](https://www.chromium.org/developers/design-documents/video)).\n\nIt has a command line program called `ffmpeg`, a very simple yet powerful binary.\nFor instance, you can convert from `mp4` to the container `avi` just by typing the follow command:\n\n```bash\n$ ffmpeg -i input.mp4 output.avi\n```\n\nWe just made a **remuxing** here, which is converting from one container to another one.\nTechnically FFmpeg could also be doing a transcoding but we'll talk about that later.\n\n## FFmpeg command line tool 101\n\nFFmpeg does have a [documentation](https://www.ffmpeg.org/ffmpeg.html) that does a great job of explaining how it works.\n\n```bash\n# you can also look for the documentation using the command line\n\nffmpeg -h full | grep -A 10 -B 10 avoid_negative_ts\n```\n\nTo make things short, the FFmpeg command line program expects the following argument format to perform its actions `ffmpeg {1} {2} -i {3} {4} {5}`, where:\n\n1. global options\n2. input file options\n3. input url\n4. output file options\n5. output url\n\nThe parts 2, 3, 4 and 5 can be as many as you need.\nIt's easier to understand this argument format in action:\n\n``` bash\n# WARNING: this file is around 300MB\n$ wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4\n\n$ ffmpeg \\\n-y \\ # global options\n-c:a libfdk_aac \\ # input options\n-i bunny_1080p_60fps.mp4 \\ # input url\n-c:v libvpx-vp9 -c:a libvorbis \\ # output options\nbunny_1080p_60fps_vp9.webm # output url\n```\nThis command takes an input file `mp4` containing two streams (an audio encoded with `aac` CODEC and a video encoded using `h264` CODEC) and convert it to `webm`, changing its audio and video CODECs too.\n\nWe could simplify the command above but then be aware that FFmpeg will adopt or guess the default values for you.\nFor instance when you just type `ffmpeg -i input.avi output.mp4` what audio/video CODEC does it use to produce the `output.mp4`?\n\nWerner Robitza wrote a must read/execute [tutorial about encoding and editing with FFmpeg](http://slhck.info/ffmpeg-encoding-course/#/).\n\n# Common video operations\n\nWhile working with audio/video we usually do a set of tasks with the media.\n\n## Transcoding\n\n![transcoding](/img/transcoding.png)\n\n**What?** the act of converting one of the streams (audio or video) from one CODEC to another one.\n\n**Why?** sometimes some devices (TVs, smartphones, console and etc) doesn't support X but Y and newer CODECs provide better compression rate.\n\n**How?** converting an `H264` (AVC) video to an `H265` (HEVC).\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c:v libx265 \\\nbunny_1080p_60fps_h265.mp4\n```\n\n## Transmuxing\n\n![transmuxing](/img/transmuxing.png)\n\n**What?** the act of converting from one format (container) to another one.\n\n**Why?** sometimes some devices (TVs, smartphones, console and etc) doesn't support X but Y and sometimes newer containers provide modern required features.\n\n**How?** converting a `mp4` to a `ts`.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-c copy \\ # just saying to ffmpeg to skip encoding\nbunny_1080p_60fps.ts\n```\n\n## Transrating\n\n![transrating](/img/transrating.png)\n\n**What?** the act of changing the bit rate, or producing other renditions.\n\n**Why?** people will try to watch your video in a `2G` (edge) connection using a less powerful smartphone or in a `fiber` Internet connection on their 4K TVs therefore you should offer more than one rendition of the same video with different bit rate.\n\n**How?** producing a rendition with bit rate between 964K and 3856K.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-minrate 964K -maxrate 3856K -bufsize 2000K \\\nbunny_1080p_60fps_transrating_964_3856.mp4\n```\n\nUsually we'll be using transrating with transsizing. Werner Robitza wrote another must read/execute [series of posts about FFmpeg rate control](http://slhck.info/posts/).\n\n## Transsizing\n\n![transsizing](/img/transsizing.png)\n\n**What?** the act of converting from one resolution to another one. As said before transsizing is often used with transrating.\n\n**Why?** reasons are about the same as for the transrating.\n\n**How?** converting a `1080p` to a `480p` resolution.\n```bash\n$ ffmpeg \\\n-i bunny_1080p_60fps.mp4 \\\n-vf scale=480:-1 \\\nbunny_1080p_60fps_transsizing_480.mp4\n```\n\n## Bonus Round: Adaptive Streaming\n\n![adaptive streaming](/img/adaptive-streaming.png)\n\n**What?** the act of producing many resolutions (bit rates) and split the media into chunks and serve them via http.\n\n**Why?** to provide a flexible media that can be watched on a low end smartphone or on a 4K TV, it's also easy to scale and deploy but it can add latency.\n\n**How?** creating an adaptive WebM using DASH.\n```bash\n# video streams\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 160x90 -b:v 250k -keyint_min 150 -g 150 -an -f webm -dash 1 video_160x90_250k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 320x180 -b:v 500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_320x180_500k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 750k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_750k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 640x360 -b:v 1000k -keyint_min 150 -g 150 -an -f webm -dash 1 video_640x360_1000k.webm\n\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:v libvpx-vp9 -s 1280x720 -b:v 1500k -keyint_min 150 -g 150 -an -f webm -dash 1 video_1280x720_1500k.webm\n\n# audio streams\n$ ffmpeg -i bunny_1080p_60fps.mp4 -c:a libvorbis -b:a 128k -vn -f webm -dash 1 audio_128k.webm\n\n# the DASH manifest\n$ ffmpeg \\\n -f webm_dash_manifest -i video_160x90_250k.webm \\\n -f webm_dash_manifest -i video_320x180_500k.webm \\\n -f webm_dash_manifest -i video_640x360_750k.webm \\\n -f webm_dash_manifest -i video_640x360_1000k.webm \\\n -f webm_dash_manifest -i video_1280x720_500k.webm \\\n -f webm_dash_manifest -i audio_128k.webm \\\n -c copy -map 0 -map 1 -map 2 -map 3 -map 4 -map 5 \\\n -f webm_dash_manifest \\\n -adaptation_sets \"id=0,streams=0,1,2,3,4 id=1,streams=5\" \\\n manifest.mpd\n```\n\nPS: I stole this example from the [Instructions to playback Adaptive WebM using DASH](http://wiki.webmproject.org/adaptive-streaming/instructions-to-playback-adaptive-webm-using-dash)\n\n## Going beyond\n\nThere are [many and many other usages for FFmpeg](https://github.com/leandromoreira/digital_video_introduction/blob/master/encoding_pratical_examples.md#split-and-merge-smoothly).\nI use it in conjunction with *iMovie* to produce/edit some videos for YouTube and you can certainly use it professionally.\n\n# Learn FFmpeg libav the Hard Way\n\n> Don't you wonder sometimes 'bout sound and vision?\n> **David Robert Jones**\n\nSince the [FFmpeg](#ffmpeg---command-line) is so useful as a command line tool to do essential tasks over the media files, how can we use it in our programs?\n\nFFmpeg is [composed by several libraries](https://www.ffmpeg.org/doxygen/trunk/index.html) that can be integrated into our own programs.\nUsually, when you install FFmpeg, it installs automatically all these libraries. I'll be referring to the set of these libraries as **FFmpeg libav**.\n\n> This title is a homage to Zed Shaw's series [Learn X the Hard Way](https://learncodethehardway.org/), particularly his book Learn C the Hard Way.\n\n## Chapter 0 - The infamous hello world\nThis hello world actually won't show the message `\"hello world\"` in the terminal :tongue:\nInstead we're going to **print out information about the video**, things like its format (container), duration, resolution, audio channels and, in the end, we'll **decode some frames and save them as image files**.\n\n### FFmpeg libav architecture\n\nBut before we start to code, let's learn how **FFmpeg libav architecture** works and how its components communicate with others.\n\nHere's a diagram of the process of decoding a video:\n\n![ffmpeg libav architecture - decoding process](/img/decoding.png)\n\nYou'll first need to load your media file into a component called [`AVFormatContext`](https://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) (the video container is also known as format).\nIt actually doesn't fully load the whole file: it often only reads the header.\n\nOnce we loaded the minimal **header of our container**, we can access its streams (think of them as a rudimentary audio and video data).\nEach stream will be available in a component called [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html).\n\n> Stream is a fancy name for a continuous flow of data.\n\nSuppose our video has two streams: an audio encoded with [AAC CODEC](https://en.wikipedia.org/wiki/Advanced_Audio_Coding) and a video encoded with [H264 (AVC) CODEC](https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC). From each stream we can extract **pieces (slices) of data** called packets that will be loaded into components named [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html).\n\nThe **data inside the packets are still coded** (compressed) and in order to decode the packets, we need to pass them to a specific [`AVCodec`](https://ffmpeg.org/doxygen/trunk/structAVCodec.html).\n\nThe `AVCodec` will decode them into [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html) and finally, this component gives us **the uncompressed frame**.  Noticed that the same terminology/process is used either by audio and video stream.\n\n### Requirements\n\nSince some people were [facing issues while compiling or running the examples](https://github.com/leandromoreira/ffmpeg-libav-tutorial/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+compiling) **we're going to use [`Docker`](https://docs.docker.com/install/) as our development/runner environment,** we'll also use the big buck bunny video so if you don't have it locally just run the command `make fetch_small_bunny_video`.\n\n### Chapter 0 - code walkthrough\n\n> #### TLDR; show me the [code](/0_hello_world.c) and execution.\n> ```bash\n> $ make run_hello\n> ```\n\nWe'll skip some details, but don't worry: the [source code is available at github](/0_hello_world.c).\n\nWe're going to allocate memory to the component [`AVFormatContext`](http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html) that will hold  information about the format (container).\n\n```c\nAVFormatContext *pFormatContext = avformat_alloc_context();\n```\n\nNow we're going to open the file and read its header and fill the `AVFormatContext` with minimal information about the format (notice that usually the codecs are not opened).\nThe function used to do this is [`avformat_open_input`](http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga31d601155e9035d5b0e7efedc894ee49). It expects an `AVFormatContext`, a `filename` and two optional arguments: the [`AVInputFormat`](https://ffmpeg.org/doxygen/trunk/structAVInputFormat.html) (if you pass `NULL`, FFmpeg will guess the format) and the [`AVDictionary`](https://ffmpeg.org/doxygen/trunk/structAVDictionary.html) (which are the options to the demuxer).\n\n```c\navformat_open_input(&pFormatContext, filename, NULL, NULL);\n```\n\nWe can print the format name and the media duration:\n\n```c\nprintf(\"Format %s, duration %lld us\", pFormatContext->iformat->long_name, pFormatContext->duration);\n```\n\nTo access the `streams`, we need to read data from the media. The function [`avformat_find_stream_info`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#gad42172e27cddafb81096939783b157bb) does that.\nNow, the `pFormatContext->nb_streams` will hold the amount of streams and the `pFormatContext->streams[i]` will give us the `i` stream (an [`AVStream`](https://ffmpeg.org/doxygen/trunk/structAVStream.html)).\n\n```c\navformat_find_stream_info(pFormatContext,  NULL);\n```\n\nNow we'll loop through all the streams.\n\n```c\nfor (int i = 0; i < pFormatContext->nb_streams; i++)\n{\n  //\n}\n```\n\nFor each stream, we're going to keep the [`AVCodecParameters`](https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html), which describes the properties of a codec used by the stream `i`.\n\n```c\nAVCodecParameters *pLocalCodecParameters = pFormatContext->streams[i]->codecpar;\n```\n\nWith the codec properties we can look up the proper CODEC querying the function [`avcodec_find_decoder`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga19a0ca553277f019dd5b0fec6e1f9dca) and find the registered decoder for the codec id and return an [`AVCodec`](http://ffmpeg.org/doxygen/trunk/structAVCodec.html), the component that knows how to en**CO**de and **DEC**ode the stream.\n```c\nAVCodec *pLocalCodec = avcodec_find_decoder(pLocalCodecParameters->codec_id);\n```\n\nNow we can print information about the codecs.\n\n```c\n// specific for video and audio\nif (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_VIDEO) {\n  printf(\"Video Codec: resolution %d x %d\", pLocalCodecParameters->width, pLocalCodecParameters->height);\n} else if (pLocalCodecParameters->codec_type == AVMEDIA_TYPE_AUDIO) {\n  printf(\"Audio Codec: %d channels, sample rate %d\", pLocalCodecParameters->channels, pLocalCodecParameters->sample_rate);\n}\n// general\nprintf(\"\\tCodec %s ID %d bit_rate %lld\", pLocalCodec->long_name, pLocalCodec->id, pLocalCodecParameters->bit_rate);\n```\n\nWith the codec, we can allocate memory for the [`AVCodecContext`](https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html), which will hold the context for our decode/encode process, but then we need to fill this codec context with CODEC parameters; we do that with [`avcodec_parameters_to_context`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#gac7b282f51540ca7a99416a3ba6ee0d16).\n\nOnce we filled the codec context, we need to open the codec. We call the function [`avcodec_open2`](https://ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) and then we can use it.\n\n```c\nAVCodecContext *pCodecContext = avcodec_alloc_context3(pCodec);\navcodec_parameters_to_context(pCodecContext, pCodecParameters);\navcodec_open2(pCodecContext, pCodec, NULL);\n```\n\nNow we're going to read the packets from the stream and decode them into frames but first, we need to allocate memory for both components, the [`AVPacket`](https://ffmpeg.org/doxygen/trunk/structAVPacket.html) and [`AVFrame`](https://ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n```c\nAVPacket *pPacket = av_packet_alloc();\nAVFrame *pFrame = av_frame_alloc();\n```\n\nLet's feed our packets from the streams with the function [`av_read_frame`](https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga4fdb3084415a82e3810de6ee60e46a61) while it has packets.\n\n```c\nwhile (av_read_frame(pFormatContext, pPacket) >= 0) {\n  //...\n}\n```\n\nLet's **send the raw data packet** (compressed frame) to the decoder, through the codec context, using the function [`avcodec_send_packet`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3).\n\n```c\navcodec_send_packet(pCodecContext, pPacket);\n```\n\nAnd let's **receive the raw data frame** (uncompressed frame) from the decoder, through the same codec context, using the function [`avcodec_receive_frame`](https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c).\n\n```c\navcodec_receive_frame(pCodecContext, pFrame);\n```\n\nWe can print the frame number, the [PTS](https://en.wikipedia.org/wiki/Presentation_timestamp), DTS, [frame type](https://en.wikipedia.org/wiki/Video_compression_picture_types) and etc.\n\n```c\nprintf(\n    \"Frame %c (%d) pts %d dts %d key_frame %d [coded_picture_number %d, display_picture_number %d]\",\n    av_get_picture_type_char(pFrame->pict_type),\n    pCodecContext->frame_number,\n    pFrame->pts,\n    pFrame->pkt_dts,\n    pFrame->key_frame,\n    pFrame->coded_picture_number,\n    pFrame->display_picture_number\n);\n```\n\nFinally we can save our decoded frame into a [simple gray image](https://en.wikipedia.org/wiki/Netpbm_format#PGM_example). The process is very simple, we'll use the `pFrame->data` where the index is related to the [planes Y, Cb and Cr](https://en.wikipedia.org/wiki/YCbCr), we just picked `0` (Y) to save our gray image.\n\n```c\nsave_gray_frame(pFrame->data[0], pFrame->linesize[0], pFrame->width, pFrame->height, frame_filename);\n\nstatic void save_gray_frame(unsigned char *buf, int wrap, int xsize, int ysize, char *filename)\n{\n    FILE *f;\n    int i;\n    f = fopen(filename,\"w\");\n    // writing the minimal required header for a pgm file format\n    // portable graymap format -> https://en.wikipedia.org/wiki/Netpbm_format#PGM_example\n    fprintf(f, \"P5\\n%d %d\\n%d\\n\", xsize, ysize, 255);\n\n    // writing line by line\n    for (i = 0; i < ysize; i++)\n        fwrite(buf + i * wrap, 1, xsize, f);\n    fclose(f);\n}\n```\n\nAnd voilà! Now we have a gray scale image with 2MB:\n\n![saved frame](/img/generated_frame.png)\n\n## Chapter 1 - syncing audio and video\n\n> **Be the player** - a young JS developer writing a new MSE video player.\n\nBefore we move to [code a transcoding example](#chapter-2---transcoding) let's talk about **timing**, or how a video player knows the right time to play a frame.\n\nIn the last example, we saved some frames that can be seen here:\n\n![frame 0](/img/hello_world_frames/frame0.png)\n![frame 1](/img/hello_world_frames/frame1.png)\n![frame 2](/img/hello_world_frames/frame2.png)\n![frame 3](/img/hello_world_frames/frame3.png)\n![frame 4](/img/hello_world_frames/frame4.png)\n![frame 5](/img/hello_world_frames/frame5.png)\n\nWhen we're designing a video player we need to **play each frame at a given pace**, otherwise it would be hard to pleasantly see the video either because it's playing so fast or so slow.\n\nTherefore we need to introduce some logic to play each frame smoothly. For that matter, each frame has a **presentation timestamp** (PTS) which is an increasing number factored in a **timebase** that is a rational number (where the denominator is known as **timescale**) divisible by the **frame rate (fps)**.\n\nIt's easier to understand when we look at some examples, let's simulate some scenarios.\n\nFor a `fps=60/1` and `timebase=1/60000` each PTS will increase `timescale / fps = 1000` therefore the **PTS real time** for each frame could be (supposing it started at 0):\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1000, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2000, PTS_TIME = PTS * timebase = 0.033`\n\nFor almost the same scenario but with a timebase equal to `1/60`.\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 1, PTS_TIME = PTS * timebase = 0.016`\n* `frame=2, PTS = 2, PTS_TIME = PTS * timebase = 0.033`\n* `frame=3, PTS = 3, PTS_TIME = PTS * timebase = 0.050`\n\nFor a `fps=25/1` and `timebase=1/75` each PTS will increase `timescale / fps = 3` and the PTS time could be:\n\n* `frame=0, PTS = 0, PTS_TIME = 0`\n* `frame=1, PTS = 3, PTS_TIME = PTS * timebase = 0.04`\n* `frame=2, PTS = 6, PTS_TIME = PTS * timebase = 0.08`\n* `frame=3, PTS = 9, PTS_TIME = PTS * timebase = 0.12`\n* ...\n* `frame=24, PTS = 72, PTS_TIME = PTS * timebase = 0.96`\n* ...\n* `frame=4064, PTS = 12192, PTS_TIME = PTS * timebase = 162.56`\n\nNow with the `pts_time` we can find a way to render this synched with audio `pts_time` or with a system clock. The FFmpeg libav provides these info through its API:\n\n- fps = [`AVStream->avg_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a946e1e9b89eeeae4cab8a833b482c1ad)\n- tbr = [`AVStream->r_frame_rate`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#ad63fb11cc1415e278e09ddc676e8a1ad)\n- tbn = [`AVStream->time_base`](https://ffmpeg.org/doxygen/trunk/structAVStream.html#a9db755451f14e2bf590d4b85d82b32e6)\n\nJust out of curiosity, the frames we saved were sent in a DTS order (frames: 1,6,4,2,3,5) but played at a PTS order (frames: 1,2,3,4,5). Also, notice how cheap are B-Frames in comparison to P or I-Frames.\n\n```\nLOG: AVStream->r_frame_rate 60/1\nLOG: AVStream->time_base 1/60000\n...\nLOG: Frame 1 (type=I, size=153797 bytes) pts 6000 key_frame 1 [DTS 0]\nLOG: Frame 2 (type=B, size=8117 bytes) pts 7000 key_frame 0 [DTS 3]\nLOG: Frame 3 (type=B, size=8226 bytes) pts 8000 key_frame 0 [DTS 4]\nLOG: Frame 4 (type=B, size=17699 bytes) pts 9000 key_frame 0 [DTS 2]\nLOG: Frame 5 (type=B, size=6253 bytes) pts 10000 key_frame 0 [DTS 5]\nLOG: Frame 6 (type=P, size=34992 bytes) pts 11000 key_frame 0 [DTS 1]\n```\n\n## Chapter 2 - remuxing\n\nRemuxing is the act of changing from one format (container) to another, for instance, we can change a [MPEG-4](https://en.wikipedia.org/wiki/MPEG-4_Part_14) video to a [MPEG-TS](https://en.wikipedia.org/wiki/MPEG_transport_stream) one without much pain using FFmpeg:\n\n```bash\nffmpeg input.mp4 -c copy output.ts\n```\n\nIt'll demux the mp4 but it won't decode or encode it (`-c copy`) and in the end, it'll mux it into a `mpegts` file. If you don't provide the format `-f` the ffmpeg will try to guess it based on the file's extension.\n\nThe general usage of FFmpeg or the libav follows a pattern/architecture or workflow:\n* **[protocol layer](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - it accepts an `input` (a `file` for instance but it could be a `rtmp` or `HTTP` input as well)\n* **[format layer](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - it `demuxes` its content, revealing mostly metadata and its streams\n* **[codec layer](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - it `decodes` its compressed streams data <sup>*optional*</sup>\n* **[pixel layer](https://ffmpeg.org/doxygen/trunk/group__lavfi.html)** - it can also apply some `filters` to the raw frames (like resizing)<sup>*optional*</sup>\n* and then it does the reverse path\n* **[codec layer](https://ffmpeg.org/doxygen/trunk/group__libavc.html)** - it `encodes` (or `re-encodes` or even `transcodes`) the raw frames<sup>*optional*</sup>\n* **[format layer](https://ffmpeg.org/doxygen/trunk/group__libavf.html)** - it `muxes` (or `remuxes`) the raw streams (the compressed data)\n* **[protocol layer](https://ffmpeg.org/doxygen/trunk/protocols_8c.html)** - and finally the muxed data is sent to an `output` (another file or maybe a network remote server)\n\n![ffmpeg libav workflow](/img/ffmpeg_libav_workflow.jpeg)\n> This graph is strongly inspired by [Leixiaohua's](http://leixiaohua1020.github.io/#ffmpeg-development-examples) and [Slhck's](https://slhck.info/ffmpeg-encoding-course/#/9) works.\n\nNow let's code an example using libav to provide the same effect as in `ffmpeg input.mp4 -c copy output.ts`.\n\nWe're going to read from an input (`input_format_context`) and change it to another output (`output_format_context`).\n\n```c\nAVFormatContext *input_format_context = NULL;\nAVFormatContext *output_format_context = NULL;\n```\n\nWe start doing the usually allocate memory and open the input format. For this specific case, we're going to open an input file and allocate memory for an output file.\n\n```c\nif ((ret = avformat_open_input(&input_format_context, in_filename, NULL, NULL)) < 0) {\n  fprintf(stderr, \"Could not open input file '%s'\", in_filename);\n  goto end;\n}\nif ((ret = avformat_find_stream_info(input_format_context, NULL)) < 0) {\n  fprintf(stderr, \"Failed to retrieve input stream information\");\n  goto end;\n}\n\navformat_alloc_output_context2(&output_format_context, NULL, NULL, out_filename);\nif (!output_format_context) {\n  fprintf(stderr, \"Could not create output context\\n\");\n  ret = AVERROR_UNKNOWN;\n  goto end;\n}\n```\n\nWe're going to remux only the video, audio and subtitle types of streams so we're holding what streams we'll be using into an array of indexes.\n\n```c\nnumber_of_streams = input_format_context->nb_streams;\nstreams_list = av_mallocz_array(number_of_streams, sizeof(*streams_list));\n```\n\nJust after we allocated the required memory, we're going to loop throughout all the streams and for each one we need to create new out stream into our output format context, using the [avformat_new_stream](https://ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827) function. Notice that we're marking all the streams that aren't video, audio or subtitle so we can skip them after.\n\n```c\nfor (i = 0; i < input_format_context->nb_streams; i++) {\n  AVStream *out_stream;\n  AVStream *in_stream = input_format_context->streams[i];\n  AVCodecParameters *in_codecpar = in_stream->codecpar;\n  if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&\n      in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {\n    streams_list[i] = -1;\n    continue;\n  }\n  streams_list[i] = stream_index++;\n  out_stream = avformat_new_stream(output_format_context, NULL);\n  if (!out_stream) {\n    fprintf(stderr, \"Failed allocating output stream\\n\");\n    ret = AVERROR_UNKNOWN;\n    goto end;\n  }\n  ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);\n  if (ret < 0) {\n    fprintf(stderr, \"Failed to copy codec parameters\\n\");\n    goto end;\n  }\n}\n```\n\nNow we can create the output file.\n\n```c\nif (!(output_format_context->oformat->flags & AVFMT_NOFILE)) {\n  ret = avio_open(&output_format_context->pb, out_filename, AVIO_FLAG_WRITE);\n  if (ret < 0) {\n    fprintf(stderr, \"Could not open output file '%s'\", out_filename);\n    goto end;\n  }\n}\n\nret = avformat_write_header(output_format_context, NULL);\nif (ret < 0) {\n  fprintf(stderr, \"Error occurred when opening output file\\n\");\n  goto end;\n}\n```\n\nAfter that, we can copy the streams, packet by packet, from our input to our output streams. We'll loop while it has packets (`av_read_frame`), for each packet we need to re-calculate the PTS and DTS to finally write it (`av_interleaved_write_frame`) to our output format context.\n\n```c\nwhile (1) {\n  AVStream *in_stream, *out_stream;\n  ret = av_read_frame(input_format_context, &packet);\n  if (ret < 0)\n    break;\n  in_stream  = input_format_context->streams[packet.stream_index];\n  if (packet.stream_index >= number_of_streams || streams_list[packet.stream_index] < 0) {\n    av_packet_unref(&packet);\n    continue;\n  }\n  packet.stream_index = streams_list[packet.stream_index];\n  out_stream = output_format_context->streams[packet.stream_index];\n  /* copy packet */\n  packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);\n  packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base);\n  // https://ffmpeg.org/doxygen/trunk/structAVPacket.html#ab5793d8195cf4789dfb3913b7a693903\n  packet.pos = -1;\n\n  //https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1\n  ret = av_interleaved_write_frame(output_format_context, &packet);\n  if (ret < 0) {\n    fprintf(stderr, \"Error muxing packet\\n\");\n    break;\n  }\n  av_packet_unref(&packet);\n}\n```\n\nTo finalize we need to write the stream trailer to an output media file with [av_write_trailer](https://ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga7f14007e7dc8f481f054b21614dfec13) function.\n\n```c\nav_write_trailer(output_format_context);\n```\n\nNow we're ready to test it and the first test will be a format (video container) conversion from a MP4 to a MPEG-TS video file. We're basically making the command line `ffmpeg input.mp4 -c copy output.ts` with libav.\n\n```bash\nmake run_remuxing_ts\n```\n\nIt's working!!! don't you trust me?! you shouldn't, we can check it with `ffprobe`:\n\n```bash\nffprobe -i remuxed_small_bunny_1080p_60fps.ts\n\nInput #0, mpegts, from 'remuxed_small_bunny_1080p_60fps.ts':\n  Duration: 00:00:10.03, start: 0.000000, bitrate: 2751 kb/s\n  Program 1\n    Metadata:\n      service_name    : Service01\n      service_provider: FFmpeg\n    Stream #0:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(progressive), 1920x1080 [SAR 1:1 DAR 16:9], 60 fps, 60 tbr, 90k tbn, 120 tbc\n    Stream #0:1[0x101]: Audio: ac3 ([129][0][0][0] / 0x0081), 48000 Hz, 5.1(side), fltp, 320 kb/s\n```\n\nTo sum up what we did here in a graph, we can revisit our initial [idea about how libav works](https://github.com/leandromoreira/ffmpeg-libav-tutorial#ffmpeg-libav-architecture) but showing that we skipped the codec part.\n\n![remuxing libav components](/img/remuxing_libav_components.png)\n\nBefore we end this chapter I'd like to show an important part of the remuxing process, **you can pass options to the muxer**. Let's say we want to delivery [MPEG-DASH](https://developer.mozilla.org/en-US/docs/Web/Apps/Fundamentals/Audio_and_video_delivery/Setting_up_adaptive_streaming_media_sources#MPEG-DASH_Encoding) format for that matter we need to use [fragmented mp4](https://stackoverflow.com/a/35180327) (sometimes referred as `fmp4`) instead of MPEG-TS or plain MPEG-4.\n\nWith the [command line we can do that easily](https://developer.mozilla.org/en-US/docs/Web/API/Media_Source_Extensions_API/Transcoding_assets_for_MSE#Fragmenting).\n\n```\nffmpeg -i non_fragmented.mp4 -movflags frag_keyframe+empty_moov+default_base_moof fragmented.mp4\n```\n\nAlmost equally easy as the command line is the libav version of it, we just need to pass the options when write the output header, just before the packets copy.\n\n```c\nAVDictionary* opts = NULL;\nav_dict_set(&opts, \"movflags\", \"frag_keyframe+empty_moov+default_base_moof\", 0);\nret = avformat_write_header(output_format_context, &opts);\n```\n\nWe now can generate this fragmented mp4 file:\n\n```bash\nmake run_remuxing_fragmented_mp4\n```\n\nBut to make sure that I'm not lying to you. You can use the amazing site/tool [gpac/mp4box.js](http://download.tsi.telecom-paristech.fr/gpac/mp4box.js/filereader.html) or the site [http://mp4parser.com/](http://mp4parser.com/) to see the differences, first load up the \"common\" mp4.\n\n![mp4 boxes](/img/boxes_normal_mp4.png)\n\nAs you can see it has a single `mdat` atom/box, **this is place where the video and audio frames are**. Now load the fragmented mp4 to see which how it spreads the `mdat` boxes.\n\n![fragmented mp4 boxes](/img/boxes_fragmente_mp4.png)\n\n## Chapter 3 - transcoding\n\n> #### TLDR; show me the [code](/3_transcoding.c) and execution.\n> ```bash\n> $ make run_transcoding\n> ```\n> We'll skip some details, but don't worry: the [source code is available at github](/3_transcoding.c).\n\n\n\nIn this chapter, we're going to create a minimalist transcoder, written in C, that can convert videos coded in H264 to H265 using **FFmpeg/libav** library specifically [libavcodec](https://ffmpeg.org/libavcodec.html), libavformat, and libavutil.\n\n![media transcoding flow](/img/transcoding_flow.png)\n\n> _Just a quick recap:_ The [**AVFormatContext**](https://www.ffmpeg.org/doxygen/trunk/structAVFormatContext.html) is the abstraction for the format of the media file, aka container (ex: MKV, MP4, Webm, TS). The [**AVStream**](https://www.ffmpeg.org/doxygen/trunk/structAVStream.html) represents each type of data for a given format (ex: audio, video, subtitle, metadata). The [**AVPacket**](https://www.ffmpeg.org/doxygen/trunk/structAVPacket.html) is a slice of compressed data obtained from the `AVStream` that can be decoded by an [**AVCodec**](https://www.ffmpeg.org/doxygen/trunk/structAVCodec.html) (ex: av1, h264, vp9, hevc) generating a raw data called [**AVFrame**](https://www.ffmpeg.org/doxygen/trunk/structAVFrame.html).\n\n### Transmuxing\n\nLet's start with the simple transmuxing operation and then we can build upon this code, the first step is to **load the input file**.\n\n```c\n// Allocate an AVFormatContext\navfc = avformat_alloc_context();\n// Open an input stream and read the header.\navformat_open_input(avfc, in_filename, NULL, NULL);\n// Read packets of a media file to get stream information.\navformat_find_stream_info(avfc, NULL);\n```\n\nNow we're going to set up the decoder, the `AVFormatContext` will give us access to all the `AVStream` components and for each one of them, we can get their `AVCodec` and create the particular `AVCodecContext` and finally we can open the given codec so we can proceed to the decoding process.\n\n>  The [**AVCodecContext**](https://www.ffmpeg.org/doxygen/trunk/structAVCodecContext.html) holds data about media configuration such as bit rate, frame rate, sample rate, channels, height, and many others.\n\n```c\nfor (int i = 0; i < avfc->nb_streams; i++)\n{\n  AVStream *avs = avfc->streams[i];\n  AVCodec *avc = avcodec_find_decoder(avs->codecpar->codec_id);\n  AVCodecContext *avcc = avcodec_alloc_context3(*avc);\n  avcodec_parameters_to_context(*avcc, avs->codecpar);\n  avcodec_open2(*avcc, *avc, NULL);\n}\n```\n\nWe need to prepare the output media file for transmuxing as well, we first **allocate memory** for the output `AVFormatContext`. We create **each stream** in the output format. In order to pack the stream properly, we **copy the codec parameters** from the decoder.\n\nWe **set the flag** `AV_CODEC_FLAG_GLOBAL_HEADER` which tells the encoder that it can use the global headers and finally we open the output **file for write** and persist the headers.\n\n```c\navformat_alloc_output_context2(&encoder_avfc, NULL, NULL, out_filename);\n\nAVStream *avs = avformat_new_stream(encoder_avfc, NULL);\navcodec_parameters_copy(avs->codecpar, decoder_avs->codecpar);\n\nif (encoder_avfc->oformat->flags & AVFMT_GLOBALHEADER)\n  encoder_avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;\n\navio_open(&encoder_avfc->pb, encoder->filename, AVIO_FLAG_WRITE);\navformat_write_header(encoder->avfc, &muxer_opts);\n\n```\n\nWe're getting the `AVPacket`'s from the decoder, adjusting the timestamps, and write the packet properly to the output file. Even though the function `av_interleaved_write_frame` says \"write frame\" we are storing the packet. We finish the transmuxing process by writing the stream trailer to the file.\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  av_packet_rescale_ts(input_packet, decoder_video_avs->time_base, encoder_video_avs->time_base);\n  av_interleaved_write_frame(*avfc, input_packet) < 0));\n}\n\nav_write_trailer(encoder_avfc);\n```\n\n### Transcoding\n\nThe previous section showed a simple transmuxer program, now we're going to add the capability to encode files, specifically we're going to enable it to transcode videos from `h264` to `h265`.\n\nAfter we prepared the decoder but before we arrange the output media file we're going to set up the encoder.\n\n* Create the video `AVStream` in the encoder, [`avformat_new_stream`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__core.html#gadcb0fd3e507d9b58fe78f61f8ad39827)\n* Use the `AVCodec` called `libx265`, [`avcodec_find_encoder_by_name`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__encoding.html#gaa614ffc38511c104bdff4a3afa086d37)\n* Create the `AVCodecContext` based in the created codec, [`avcodec_alloc_context3`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#gae80afec6f26df6607eaacf39b561c315)\n* Set up basic attributes for the transcoding session, and\n* Open the codec and copy parameters from the context to the stream. [`avcodec_open2`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga11f785a188d7d9df71621001465b0f1d) and [`avcodec_parameters_from_context`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__core.html#ga0c7058f764778615e7978a1821ab3cfe)\n\n```c\nAVRational input_framerate = av_guess_frame_rate(decoder_avfc, decoder_video_avs, NULL);\nAVStream *video_avs = avformat_new_stream(encoder_avfc, NULL);\n\nchar *codec_name = \"libx265\";\nchar *codec_priv_key = \"x265-params\";\n// we're going to use internal options for the x265\n// it disables the scene change detection and fix then\n// GOP on 60 frames.\nchar *codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\nAVCodec *video_avc = avcodec_find_encoder_by_name(codec_name);\nAVCodecContext *video_avcc = avcodec_alloc_context3(video_avc);\n// encoder codec params\nav_opt_set(sc->video_avcc->priv_data, codec_priv_key, codec_priv_value, 0);\nvideo_avcc->height = decoder_ctx->height;\nvideo_avcc->width = decoder_ctx->width;\nvideo_avcc->pix_fmt = video_avc->pix_fmts[0];\n// control rate\nvideo_avcc->bit_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_buffer_size = 4 * 1000 * 1000;\nvideo_avcc->rc_max_rate = 2 * 1000 * 1000;\nvideo_avcc->rc_min_rate = 2.5 * 1000 * 1000;\n// time base\nvideo_avcc->time_base = av_inv_q(input_framerate);\nvideo_avs->time_base = sc->video_avcc->time_base;\n\navcodec_open2(sc->video_avcc, sc->video_avc, NULL);\navcodec_parameters_from_context(sc->video_avs->codecpar, sc->video_avcc);\n```\n\nWe need to expand our decoding loop for the video stream transcoding:\n\n* Send the empty `AVPacket` to the decoder, [`avcodec_send_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3)\n* Receive the uncompressed `AVFrame`, [`avcodec_receive_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c)\n* Start to transcode this raw frame,\n* Send the raw frame, [`avcodec_send_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga9395cb802a5febf1f00df31497779169)\n* Receive the compressed, based on our codec, `AVPacket`, [`avcodec_receive_packet`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga5b8eff59cf259747cf0b31563e38ded6)\n* Set up the timestamp, and [`av_packet_rescale_ts`](https://www.ffmpeg.org/doxygen/trunk/group__lavc__packet.html#gae5c86e4d93f6e7aa62ef2c60763ea67e)\n* Write it to the output file. [`av_interleaved_write_frame`](https://www.ffmpeg.org/doxygen/trunk/group__lavf__encoding.html#ga37352ed2c63493c38219d935e71db6c1)\n\n```c\nAVFrame *input_frame = av_frame_alloc();\nAVPacket *input_packet = av_packet_alloc();\n\nwhile (av_read_frame(decoder_avfc, input_packet) >= 0)\n{\n  int response = avcodec_send_packet(decoder_video_avcc, input_packet);\n  while (response >= 0) {\n    response = avcodec_receive_frame(decoder_video_avcc, input_frame);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return response;\n    }\n    if (response >= 0) {\n      encode(encoder_avfc, decoder_video_avs, encoder_video_avs, decoder_video_avcc, input_packet->stream_index);\n    }\n    av_frame_unref(input_frame);\n  }\n  av_packet_unref(input_packet);\n}\nav_write_trailer(encoder_avfc);\n\n// used function\nint encode(AVFormatContext *avfc, AVStream *dec_video_avs, AVStream *enc_video_avs, AVCodecContext video_avcc int index) {\n  AVPacket *output_packet = av_packet_alloc();\n  int response = avcodec_send_frame(video_avcc, input_frame);\n\n  while (response >= 0) {\n    response = avcodec_receive_packet(video_avcc, output_packet);\n    if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {\n      break;\n    } else if (response < 0) {\n      return -1;\n    }\n\n    output_packet->stream_index = index;\n    output_packet->duration = enc_video_avs->time_base.den / enc_video_avs->time_base.num / dec_video_avs->avg_frame_rate.num * dec_video_avs->avg_frame_rate.den;\n\n    av_packet_rescale_ts(output_packet, dec_video_avs->time_base, enc_video_avs->time_base);\n    response = av_interleaved_write_frame(avfc, output_packet);\n  }\n  av_packet_unref(output_packet);\n  av_packet_free(&output_packet);\n  return 0;\n}\n\n```\n\nWe converted the media stream from `h264` to `h265`, as expected the `h265` version of the media file is smaller than the `h264` however the [created program](/3_transcoding.c) is capable of:\n\n```c\n\n  /*\n   * H264 -> H265\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx265\";\n  sp.codec_priv_key = \"x265-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> remuxed (untouched)\n   * MP4 - fragmented MP4\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 1;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.muxer_opt_key = \"movflags\";\n  sp.muxer_opt_value = \"frag_keyframe+empty_moov+delay_moov+default_base_moof\";\n\n  /*\n   * H264 -> H264 (fixed gop)\n   * Audio -> AAC\n   * MP4 - MPEG-TS\n   */\n  StreamingParams sp = {0};\n  sp.copy_audio = 0;\n  sp.copy_video = 0;\n  sp.video_codec = \"libx264\";\n  sp.codec_priv_key = \"x264-params\";\n  sp.codec_priv_value = \"keyint=60:min-keyint=60:scenecut=0:force-cfr=1\";\n  sp.audio_codec = \"aac\";\n  sp.output_extension = \".ts\";\n\n  /* WIP :P  -> it's not playing on VLC, the final bit rate is huge\n   * H264 -> VP9\n   * Audio -> Vorbis\n   * MP4 - WebM\n   */\n  //StreamingParams sp = {0};\n  //sp.copy_audio = 0;\n  //sp.copy_video = 0;\n  //sp.video_codec = \"libvpx-vp9\";\n  //sp.audio_codec = \"libvorbis\";\n  //sp.output_extension = \".webm\";\n\n```\n\n> Now, to be honest, this was [harder than I thought](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54) it'd be and I had to dig into the [FFmpeg command line source code](https://github.com/leandromoreira/ffmpeg-libav-tutorial/pull/54#issuecomment-570746749) and test it a lot and I think I'm missing something because I had to enforce `force-cfr` for the `h264` to work and I'm still seeing some warning messages like `warning messages (forced frame type (5) at 80 was changed to frame type (3))`.\n"
  },
  {
    "path": "build/.gitignore",
    "content": "# Ignore everything in this directory\n*\n# Except this file\n!.gitignore\n"
  },
  {
    "path": "fetch_bbb_video.sh",
    "content": "#!/bin/bash\n#  the link doesn't work anymore\n# wget -O bunny_1080p_60fps.mp4 http://distribution.bbb3d.renderfarming.net/video/mp4/bbb_sunflower_1080p_60fps_normal.mp4\n# ffmpeg -y -i bunny_1080p_60fps.mp4 -ss 00:01:24 -t 00:00:10 small_bunny_1080p_60fps.mp4\n\necho \"the small_bunny_1080p_60fps.mp4 is already provided\"\n"
  },
  {
    "path": "video_debugging.c",
    "content": "#include <libavcodec/avcodec.h>\n#include <libavformat/avformat.h>\n#include <libavutil/timestamp.h>\n#include <stdio.h>\n#include <stdarg.h>\n#include <stdlib.h>\n#include <libavutil/opt.h>\n#include <string.h>\n#include <inttypes.h>\n#include \"video_debugging.h\"\n\nvoid logging(const char *fmt, ...)\n{\n  va_list args;\n  fprintf( stderr, \"LOG: \" );\n  va_start( args, fmt );\n  vfprintf( stderr, fmt, args );\n  va_end( args );\n  fprintf( stderr, \"\\n\" );\n}\n\nvoid log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)\n{\n    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;\n\n    logging(\"pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\",\n           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),\n           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),\n           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),\n           pkt->stream_index);\n}\n\nvoid print_timing(char *name, AVFormatContext *avf, AVCodecContext *avc, AVStream *avs) {\n  logging(\"=================================================\");\n  logging(\"%s\", name);\n\n  logging(\"\\tAVFormatContext\");\n  if (avf != NULL) {\n    logging(\"\\t\\tstart_time=%d duration=%d bit_rate=%d start_time_realtime=%d\", avf->start_time, avf->duration, avf->bit_rate, avf->start_time_realtime);\n  } else {\n    logging(\"\\t\\t->NULL\");\n  }\n\n  logging(\"\\tAVCodecContext\");\n  if (avc != NULL) {\n    logging(\"\\t\\tbit_rate=%d ticks_per_frame=%d width=%d height=%d gop_size=%d keyint_min=%d sample_rate=%d profile=%d level=%d \",\n        avc->bit_rate, avc->ticks_per_frame, avc->width, avc->height, avc->gop_size, avc->keyint_min, avc->sample_rate, avc->profile, avc->level);\n    logging(\"\\t\\tavc->time_base=num/den %d/%d\", avc->time_base.num, avc->time_base.den);\n    logging(\"\\t\\tavc->framerate=num/den %d/%d\", avc->framerate.num, avc->framerate.den);\n    logging(\"\\t\\tavc->pkt_timebase=num/den %d/%d\", avc->pkt_timebase.num, avc->pkt_timebase.den);\n  } else {\n    logging(\"\\t\\t->NULL\");\n  }\n\n  logging(\"\\tAVStream\");\n  if (avs != NULL) {\n    logging(\"\\t\\tindex=%d start_time=%d duration=%d \", avs->index, avs->start_time, avs->duration);\n    logging(\"\\t\\tavs->time_base=num/den %d/%d\", avs->time_base.num, avs->time_base.den);\n    logging(\"\\t\\tavs->sample_aspect_ratio=num/den %d/%d\", avs->sample_aspect_ratio.num, avs->sample_aspect_ratio.den);\n    logging(\"\\t\\tavs->avg_frame_rate=num/den %d/%d\", avs->avg_frame_rate.num, avs->avg_frame_rate.den);\n    logging(\"\\t\\tavs->r_frame_rate=num/den %d/%d\", avs->r_frame_rate.num, avs->r_frame_rate.den);\n  } else {\n    logging(\"\\t\\t->NULL\");\n  }\n\n  logging(\"=================================================\");\n}\n"
  },
  {
    "path": "video_debugging.h",
    "content": "#include <libavcodec/avcodec.h>\n#include <libavformat/avformat.h>\n#include <libavutil/timestamp.h>\n#include <stdio.h>\n#include <stdarg.h>\n#include <stdlib.h>\n#include <libavutil/opt.h>\n#include <string.h>\n#include <inttypes.h>\n\nvoid logging(const char *fmt, ...);\nvoid log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt);\nvoid print_timing(char *name, AVFormatContext *avf, AVCodecContext *avc, AVStream *avs);\n"
  }
]