Abstract
FFmpeg is commonly used for video stream decoding and encoding in projects that require video stream processing. This article briefly introduces the decoding process, and provides a simple reproduction implemented in C++ to help deepen familiarity with the video stream decoding workflow.
Main Process
Includes
#include "decode.h"
#include
#include
#include
#include
#include
1. Register FFmpeg related components
FFMPEG 3.x version:
// Register global structs such as decoders and encoders to their respective global object linked lists av_register_all();// Register all hardware devices; this step is required for hardware decoding, and is not needed if only CPU decoding is used
avdevice_register_all();
// Initialize network libraries and libraries related to network encryption protocols for decoding network streams
avformat_network_init();
FFMPEG 4.x version:
The av_register_all() method is handled internally by FFmpeg, so users do not need to call this API for registration. All other registration steps are identical to FFMPEG 3.x
2. Demux, open the video stream and obtain the relevant context
// Allocate memory for the context AVFormatContext *pFormatCtx = avformat_alloc_context();// Video file path or network stream URL
const char *url = streamInfo.url.c_str();// Video encapsulation format; passing a null pointer will enable automatic recognition
AVInputFormat *inputFmt = NULL;// Dictionary containing private options for the video context and demuxer; unmatched entries will be stored in this parameter
AVDictionary *options = NULL;// Perform demuxing, get the video context and handle exceptions
if ((ret = avformat_open_input(&pFormatCtx, url, inputFmt, &options)) < 0) {
ifError = true;
fprintf(stderr, “[libdecode] Cannot open input file ‘%s’\n”, streamInfo.url);
if (ifmt_ctx != NULL) {
avformat_close_input(&pFormatCtx);
ifmt_ctx = NULL;
}
sleep(SLEEPTIME);
continue;
}
// Free dictionary memory
av_dict_free(&options);
3. Find the video stream
ret = avformat_find_stream_info(pFormatCtx, nullptr);
if (ret < 0) {
printf("avformat_find_stream_info failed, ret: ");
}
if ((ret = d.findVideoStreamIndex()) < 0) {
printf("findVideoStreamIndex failed, ret: " );
}
4. Get and open the decoder
// Get a pointer to the codec context of the video stream video = pFormatCtx->streams[videoStream];// Declare the decoder
AVCodec* pCodec = nullptr;// Determine the decoder based on the video stream type; NVIDIA hardware decoding is used as an example here
switch (video->codecpar->codec_id) {
case AV_CODEC_ID_H264:
pCodec = avcodec_find_decoder_by_name(“h264_cuvid”);
break;
case AV_CODEC_ID_HEVC:
pCodec = avcodec_find_decoder_by_name(“hevc_cuvid”);
break;
default:
pCodec = avcodec_find_decoder(video->codecpar->codec_id);
break;
}
if (pCodec == nullptr) {
printf(“Unsupported codec!”);
}// Copy the video stream information to the decoder
pCodecCtx = avcodec_alloc_context3(pCodec);
if (avcodec_parameters_to_context(pCodecCtx, video->codecpar) != 0) {
printf(“Couldn’t copy codec context!”);
}
AVDictionary *decoder_opts = NULL;// Set the hardware device ID
av_dict_set_int(&decoder_opts, “device_id”, devId, 0);
// Open the decoder
if (avcodec_open2(pCodecCtx, pCodec, &decoder_opts) < 0) {
printf(“Could not open codec!”);
}
5. Declare AVPacket and AVFrame, allocate memory and buffers
// Declare AVPacket, which contains information about the video stream: presentation timestamp (pts), decoding timestamp (dts), data duration, index of the media stream it belongs to, etc. AVPacket packet; av_init_packet(&packet);// Declare AVFrame
AVFrame *pFrame = av_frame_alloc();
AVFrame *pFrameBGR = av_frame_alloc();// Determine the buffer size required for decoding and allocate the buffer
imgWidth = pCodecCtx->width;
imgHeight = pCodecCtx->height;
imgSize = av_image_get_buffer_size(AV_PIX_FMT_BGR24, pCodecCtx->width, pCodecCtx->height, 1);
buffer = (uint8_t*)av_malloc(imgSize * sizeof(uint8_t));// Allocate a portion of the buffer to pFrame
av_image_fill_arrays(pFrame->data,pFrame->linesize, d.buffer, AV_PIX_FMT_RGB24, d.imgWidth, d.imgHeight, 1);
// Initialize the SWS context for subsequent image processing and formatting
sws_ctx = sws_getContext(pCodecCtx->width,pCodecCtx->height, pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_RGB24, SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
6. Decoding
FRAME res; res.result = NONE; res.relativeTimestamp = 0;Image queue that stores decoded data
std::queue *frameQueue = NULL;
std::mutex *lock = NUllwhile (runFlag == RUN) {
// Get one frame of data from the video stream
int ret = av_read_frame(d.pFormatCtx, &packet);
if (ret < 0) {
printf(" av_read_frame ret eof!");
}
if (packet.stream_index != d.videoStream) {
av_packet_unref(&packet);
continue;
}// Send the packet to be decoded ret = avcodec_send_packet(pCodecCtx, &packet); if (packet.stream_index != videoStream) { av_packet_unref(&packet); continue; } // Get decoded data and perform image resizing if (ret = avcodec_receive_frame(pCodecCtx, pFrame) == 0){ sws_scale(d.sws_ctx, (const uint8_t* const*),pFrame->data, pFrame->linesize, 0, imgHeight, pFrameBGR->data, pFrameBGR->linesize); // Get data in cv::Mat format cv::Mat bgr24(imgHeight, imgWidth, CV_8UC3, pFrameBGR->data); res.frame = bgr24.clone(); res.result = SUCCESS; res.relativeTimestamp = pFrame->pts * av_q2d(video->time_base) * 1000; // Add the data to the image queue lock.lock(); if (frameQueue.size() >= MAX_QUEUE_SIZE) { frameQueue.pop(); } frameQueue.push(res); lock.unlock(); } av_packet_unref(&packet);
}
// Free memory
av_frame_free(&d.pFrameRGB);
av_frame_free(&d.pFrame);
sws_freeContext(d.sws_ctx);
avcodec_close(d.pCodecCtx);
avcodec_free_context(&d.pCodecCtx);
avformat_close_input(&d.pFormatCtx);
Get image from the queue
FRAME Decode::Read() {
FRAME res;
lock.lock();
// printf("Queue size: %d\n", frameQueue.size());
if (ifError) {
res.result = ERROR;
} else if (frameQueue.size() == 0) {
res.result = NONE;
} else {
res = frameQueue.front();
frameQueue.pop();
}
lock.unlock();
return res;
}
Header File
#include #include #include #include #include #include #include #include #include #include "libyuv.h" #include "timestamp.h" // #include "opencv2/imgproc/imgproc.hpp" extern "C" { #include #include #include #include #include #include #include #include #include }#define SLEEPTIME 60
#define END 1
#define SUCCESS 0
#define NONE -1
#define ERROR -2#define RUN 1
#define STOP 0#define MAX_QUEUE_SIZE 3
#define RTSP 1
#define RTMP 2
#define ONVIF 3
#define GB 4
#define FILE 5typedef struct Struct_STREAMINFO {
int protocol;
std::string url;
std::string ip;
int port;
std::string username;
std::string password;
std::string format;
} STREAMINFO;typedef struct Struct_FRAME {
int result;
cv::Mat frame;
int relativeTimestamp; // Millisecond timestamp of the current frame relative to the initial frame
} FRAME;typedef void (*HW_CALLBACK)(unsigned char *buf, int xsize, int ysize, void *userdef);
class Decode {
public:
Decode(STREAMINFO inputStreamInfo, std::string decoder, std::string pixel_transform);
~Decode();void Init(); FRAME Read(); void Release();private:
STREAMINFO streamInfo;
std::string decoder_type;
std::string pixel_transform_type;
bool ifError;
int video_stream_idx;
int got_picture;
int runFlag;
std::queue frameQueue;
std::mutex lock;AVDictionary *options; AVInputFormat *inputFmt; AVFormatContext *ifmt_ctx; AVCodecContext *pCodecCtx; AVCodec *decoder; AVFrame *pFrame; AVFrame *FrameBGR; AVPacket *pPacket; struct SwsContext *img_convert_ctx;
};
</unistd.h></stdio.h>
This is a discussion topic separated from the original topic at https://juejin.cn/post/7369132465020190746