博文解决问题:

1,此篇博文旨在解决使用nvjpeg的库把GpuMat编码成jpg。我的项目是使用opencv硬解码出GpuMat之后把GpuMat转成jpg在转成base64进行数据传输

2,解决jpg图片解码成mat对象

运行环境:

1,cuda+cudnn+opencv(cuda)

环境搭建请参考刘老师的:

OpenCV4.7.0、FFmpeg5.1 Nvidia GPU视频硬解码_opengl硬解码-CSDN博客

1,参考链接:

1)官方cuda的api链接:

nvJPEG

2)官方github编码解码的demo参考链接:

CUDALibrarySamples/nvJPEG/Image-Resize/imageResize.cpp at master · NVIDIA/CUDALibrarySamples · GitHub3)opencv的GpuMat对象的参考链接:

OpenCV: cv::cuda::GpuMat 类参考 - OpenCV 计算机视觉库

2,编码流程

在编码流程上cpu和gpu并无本质不同,他俩的区别在于gpu可以直接使用GpuMat的gpu内存不用像cpu一样使用cudaMalloc去申请内存和cudaMemcpy进行拷贝数据,至此一点不同。

1)GpuMat =>填充=>nvjpegImage_t

cpu:

gpu:

这里有一个坑请注意:

 编码的时候存在参数

 nvjpegInputFormat_t 这个枚举定义如下

BGR和BGRI是有区别的类似yuv420和yuv420sp的区别 

BGR的内存排布是BGR三个通道分开排序 如 3 h w这样

BGRI的内存排布是BGR三个通道在一起排序 如 h w 3这样(我要是讲的不懂请自行百度)

由于opencv是hw3这种内存排列我们用NVJPEG_INPUT_BGRI这个参数

在这里还有一个坑

cpu和gpu的mat对象在内存排布存在差异,cpu的跨度是step等于3倍宽度,然而gpu是跨度存在padding操作目的是为了加速运算setp不等于3倍宽度。这里三倍是3个通道rgb。

从打印可以看出cpu跨度等于3倍的宽度 ,而gpu是大于的。我们在填充nvjpegImage_t需要注意。

2)=>设置参数

3)=>编码jpg=>nvjpegEncodeImage

4)=>获取编码结果=>nvjpegEncodeRetrieveBitstream


不理解没关系代码说话,实际跑跑就可以

#include <opencv2/opencv.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>

#include <string.h>  // strcmpi
#include <dirent.h>  
#include <sys/stat.h>
#include <sys/types.h>

#include <cuda_runtime_api.h>
#include <nvjpeg.h>


#define CHECK_CUDA(call)                                                        \
    {                                                                           \
        cudaError_t _e = (call);                                                \
        if (_e != cudaSuccess)                                                  \
        {                                                                       \
            std::cout << "CUDA Runtime failure: '#" << _e << "' at " <<  __FILE__ << ":" << __LINE__ << std::endl;\
            exit(1);                                                            \
        }                                                                       \
    }

#define CHECK_NVJPEG(call)                                                      \
    {                                                                           \
        nvjpegStatus_t _e = (call);                                             \
        if (_e != NVJPEG_STATUS_SUCCESS)                                        \
        {                                                                       \
            std::cout << "NVJPEG failure: '#" << _e << "' at " <<  __FILE__ << ":" << __LINE__ << std::endl;\
            exit(1);                                                            \
        }                                                                       \
    }

// *****************************************************************************
// reading input directory to file list
// -----------------------------------------------------------------------------
int readInput(const std::string &sInputPath, std::vector<std::string> &filelist)
{
    int error_code = 1;
    struct stat s;

    if( stat(sInputPath.c_str(), &s) == 0 )
    {
        if( s.st_mode & S_IFREG )
        {
            filelist.push_back(sInputPath);
        }
        else if( s.st_mode & S_IFDIR )
        {
            // processing each file in directory
            DIR *dir_handle;
            struct dirent *dir;
            dir_handle = opendir(sInputPath.c_str());
            std::vector<std::string> filenames;
            if (dir_handle)
            {
                error_code = 0;
                while ((dir = readdir(dir_handle)) != NULL)
                {
                    if (dir->d_type == DT_REG)
                    {
                        std::string sFileName = sInputPath + dir->d_name;
                        filelist.push_back(sFileName);
                    }
                    else if (dir->d_type == DT_DIR)
                    {
                        std::string sname = dir->d_name;
                        if (sname != "." && sname != "..")
                        {
                            readInput(sInputPath + sname + "/", filelist);
                        }
                    }
                }
                closedir(dir_handle);
            }
            else
            {
                std::cout << "Cannot open input directory: " << sInputPath << std::endl;
                return error_code;
            }
        }
        else
        {
            std::cout << "Cannot open input: " << sInputPath << std::endl;
            return error_code;
        }
    }
    else
    {
        std::cout << "Cannot find input path " << sInputPath << std::endl;
        return error_code;
    }

    return 0;
}


using namespace cv;
using namespace std;

nvjpegHandle_t nvjpeg_handle;
nvjpegJpegStream_t nvjpeg_jpeg_stream;
nvjpegJpegState_t nvjpeg_decoder_state;
nvjpegEncoderParams_t nvjpeg_encode_params;
nvjpegEncoderState_t nvjpeg_encoder_state;

int decodeResizeEncodeOneImage(std::string sImagePath, std::string sOutputPath, double &time, int resize_quality)
{
    nvjpegInputFormat_t iformat = NVJPEG_INPUT_BGRI;

    // timing for resize
    time = 0.;
    float resize_time = 0.;
    cudaEvent_t start, stop;
    CHECK_CUDA(cudaEventCreate(&start));
    CHECK_CUDA(cudaEventCreate(&stop));

    cv::Mat m_frame = imread(sImagePath);

    cv::cuda::GpuMat gpu_frame;
    gpu_frame.upload(m_frame);

    //uchar* gpu_data = gpu_frame.ptr<uchar>();

    int width = m_frame.cols;  // 宽度
    int height = m_frame.rows; // 高度

    int width_gpu = gpu_frame.cols;  // 宽度
    int height_gpu = gpu_frame.rows; // 高度

    cout << "CPU_W: " << width << " CPU_H:" << height << " cpu_step: " << m_frame.step << endl;
    cout << "GPU_W: " << width_gpu << " GPU_H:" << height_gpu << " GPU_step: " << gpu_frame.step << endl;


    nvjpegImage_t imgResize;

    //cpu需要申请内存 GPU不需要
    unsigned char * pResizeBuffer = NULL;
    cudaError_t eCopy1 = cudaMalloc(&pResizeBuffer,  gpu_frame.step * height);
    // CPU
    // CHECK_CUDA(cudaMemcpy(pResizeBuffer, m_frame.data, m_frame.total() * m_frame.elemSize(), cudaMemcpyHostToDevice));
    // imgResize.channel[0] = pResizeBuffer;
    // imgResize.pitch[0] = (unsigned int)m_frame.step;

    // GPU
    imgResize.channel[0] = gpu_frame.data;
    imgResize.pitch[0] = (unsigned int)gpu_frame.step;

    // nvJPEG encoder parameter setting
    CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(nvjpeg_encode_params, resize_quality, NULL));

    // CHECK_NVJPEG(nvjpegEncoderParamsSetOptimizedHuffman(nvjpeg_encode_params, 1, NULL));

    CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(nvjpeg_encode_params, NVJPEG_CSS_444, NULL));

    // Timing start
    CHECK_CUDA(cudaEventRecord(start, 0));

    // encoding the resize data
    CHECK_NVJPEG(nvjpegEncodeImage(nvjpeg_handle,
        nvjpeg_encoder_state,
        nvjpeg_encode_params,
        &imgResize,
        iformat,
        width,
        height,
        NULL));

    // retrive the encoded bitstream for file writing
    std::vector<unsigned char> obuffer;
    size_t length;
    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
        nvjpeg_handle,
        nvjpeg_encoder_state,
        NULL,
        &length,
        NULL));

    obuffer.resize(length);

    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
        nvjpeg_handle,
        nvjpeg_encoder_state,
        obuffer.data(),
        &length,
        NULL));

    // Timing stop
    CHECK_CUDA(cudaEventRecord(stop, 0));
    CHECK_CUDA(cudaEventSynchronize(stop));

    // file writing
    // std::cout << "Resize-width: " << dstSize.width << " Resize-height: " << dstSize.height << std::endl;
    std::string output_filename = sOutputPath + "/" + "1" + ".jpg";
    char directory[120];
    char mkdir_cmd[256];
    std::string folder = sOutputPath;
    output_filename = folder + "/"+ "1" +".jpg";
#if !defined(_WIN32)
    sprintf(directory, "%s", folder.c_str());
    sprintf(mkdir_cmd, "mkdir -p %s 2> /dev/null", directory);
#else
    sprintf(directory, "%s", folder.c_str());
    sprintf(mkdir_cmd, "mkdir %s 2> nul", directory);
#endif

    int ret = system(mkdir_cmd);

    std::cout << "Writing JPEG file: " << output_filename << std::endl;
    std::ofstream outputFile(output_filename.c_str(), std::ios::out | std::ios::binary);
    outputFile.write(reinterpret_cast<const char *>(obuffer.data()), static_cast<int>(length));
    
    // Free memory
    // CHECK_CUDA(cudaFree(pBuffer));
    CHECK_CUDA(cudaFree(pResizeBuffer));

    // get timing
    CHECK_CUDA(cudaEventElapsedTime(&resize_time, start, stop));
    time = (double)resize_time;
    return EXIT_SUCCESS;
}


// *****************************************************************************
// main image resize function
// -----------------------------------------------------------------------------
int main(int argc, const char *argv[])
{
    CHECK_NVJPEG(nvjpegCreateSimple(&nvjpeg_handle));
    CHECK_NVJPEG(nvjpegJpegStateCreate(nvjpeg_handle, &nvjpeg_decoder_state));

    // create bitstream object
    CHECK_NVJPEG(nvjpegJpegStreamCreate(nvjpeg_handle, &nvjpeg_jpeg_stream));
    CHECK_NVJPEG(nvjpegEncoderStateCreate(nvjpeg_handle, &nvjpeg_encoder_state, NULL));
    CHECK_NVJPEG(nvjpegEncoderParamsCreate(nvjpeg_handle, &nvjpeg_encode_params, NULL));

    std::string sInputPath("../input_images/img1.jpg");
    std::string sOutputPath("4");
    int resize_quality = 85;

    int error_code = 1;

    double total_time = 0., decode_time = 0.;
    int total_images = 0;

    std::vector<std::string> inputFiles;
    if (readInput(sInputPath, inputFiles))
    {
        return error_code;
    }
    for (unsigned int i = 0; i < inputFiles.size(); i++)
    {
        std::string &sFileName = inputFiles[i];
        std::cout << "Processing file: " << sFileName << std::endl;

        int image_error_code = decodeResizeEncodeOneImage(sFileName, sOutputPath, decode_time, resize_quality);

        if (image_error_code)
        {
            std::cerr << "Error processing file: " << sFileName << std::endl;
            return image_error_code;
        }
        else
        {
            total_images++;
            total_time += decode_time;
        }
    }

    std::cout << "------------------------------------------------------------- " << std::endl;
    std::cout << "Total images resized: " << total_images << std::endl;
    std::cout << "Total time spent on resizing: " << total_time << " (ms)" << std::endl;
    std::cout << "Avg time/image: " << total_time/total_images << " (ms)" << std::endl;
    std::cout << "------------------------------------------------------------- " << std::endl;

    CHECK_NVJPEG(nvjpegEncoderParamsDestroy(nvjpeg_encode_params));
    CHECK_NVJPEG(nvjpegEncoderStateDestroy(nvjpeg_encoder_state));
    CHECK_NVJPEG(nvjpegJpegStateDestroy(nvjpeg_decoder_state));
    CHECK_NVJPEG(nvjpegDestroy(nvjpeg_handle));
    
    return 0;
}

CMakeLists.txt

# 
# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
# 
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
# 
# ---[ Check cmake version.
CMAKE_MINIMUM_REQUIRED(VERSION 3.10.0 FATAL_ERROR)

add_definitions(-g)

INCLUDE(GNUInstallDirs)

# ---[ Project specIFication.
SET(PROJECT_NAME imageResize)
PROJECT(${PROJECT_NAME} LANGUAGES CUDA CXX)

find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})

if(NOT DEFINED CMAKE_CUDA_STANDARD)
    set(CMAKE_CUDA_STANDARD 11)
    set(CMAKE_CUDA_STANDARD_REQUIRED ON)    
endif()

include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) 
link_directories(${CMAKE_CUDA_INCLUDE_DIRS})

if (UNIX)
    find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
    find_library(NVJPEG_LIBRARY nvjpeg ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
    find_library(NPPIG_LIBRARY nppig ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
    find_library(NPPC_LIBRARY nppc ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
    find_library(CULIBOS culibos ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif (UNIX)

if (MSVC OR WIN32 OR MSYS)
    MESSAGE( STATUS "CMAKE_CUDA_COMPILER:         " ${CMAKE_CUDA_COMPILER} )
    set(CUDA_LIBRARY_PATH "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/lib/x64")
    set(CUDART_LIBRARY ${CUDA_LIBRARY_PATH}/cudart.lib)
    set(NVJPEG_LIBRARY ${CUDA_LIBRARY_PATH}/nvjpeg.lib)
    set(NPPIG_LIBRARY ${CUDA_LIBRARY_PATH}/nppig.lib)
    set(NPPC_LIBRARY ${CUDA_LIBRARY_PATH}/nppc.lib)
endif()

# ---[ Use the default installation path if not set.
IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
    SET(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR} CACHE PATH "" FORCE)
ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)

# ---[ Build type
IF(NOT CMAKE_BUILD_TYPE) 
    SET(CMAKE_BUILD_TYPE Develop)
ENDIF(NOT CMAKE_BUILD_TYPE)

SET(EXAMPLES_DESCRIPTOR_SOURCES "imageResize.cpp")
ADD_EXECUTABLE(${PROJECT_NAME} ${EXAMPLES_DESCRIPTOR_SOURCES})
SET_SOURCE_FILES_PROPERTIES(${EXAMPLES_DESCRIPTOR_SOURCES} PROPERTIES LANGUAGE CUDA)
TARGET_COMPILE_FEATURES(${PROJECT_NAME} PUBLIC cxx_std_11)
SET_TARGET_PROPERTIES(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
SET_TARGET_PROPERTIES(${PROJECT_NAME} PROPERTIES CUDA_SEPERABLE_COMPILATION ON)
TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
if (UNIX)
    TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC ${CUDART_LIBRARY} ${NVJPEG_LIBRARY} ${NPPIG_LIBRARY} ${NPPC_LIBRARY} ${CULIBOS} ${OpenCV_LIBS})
endif (UNIX)

if(MSVC OR WIN32 OR MSYS)
    TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC ${CUDART_LIBRARY} ${NVJPEG_LIBRARY} ${NPPIG_LIBRARY} ${NPPC_LIBRARY})
endif(MSVC OR WIN32 OR MSYS)

if(APPLE)
  # We need to add the path to the driver (libcuda.dylib) as an rpath, 
  # so that the static cuda runtime can find it at runtime.
  set_property(TARGET ${PROJECT_NAME} 
               PROPERTY
               BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif(APPLE)

INSTALL(TARGETS ${PROJECT_NAME} DESTINATION bin)

3,解码流程

解码和编码流程大体相近 只是反过来而已这里不在赘述请直接参考代码

#include <iostream>
#include <fstream>
#include "opencv2/opencv.hpp"
#include <cuda_runtime_api.h>
#include <nvjpeg.h>


#define CHECK_CUDA(call)                                                        \
    {                                                                           \
        cudaError_t _e = (call);                                                \
        if (_e != cudaSuccess)                                                  \
        {                                                                       \
            std::cout << "CUDA Runtime failure: '#" << _e << "' at " <<  __FILE__ << ":" << __LINE__ << std::endl;\
            exit(1);                                                            \
        }                                                                       \
    }

#define CHECK_NVJPEG(call)                                                      \
    {                                                                           \
        nvjpegStatus_t _e = (call);                                             \
        if (_e != NVJPEG_STATUS_SUCCESS)                                        \
        {                                                                       \
            std::cout << "NVJPEG failure: '#" << _e << "' at " <<  __FILE__ << ":" << __LINE__ << std::endl;\
            exit(1);                                                            \
        }                                                                       \
    }

int main()
{
	printf("he!");
	// step1 ceate nvJpeg handle
	/*
	struct nvjpegHandle;
	typedef struct nvjpegHandle* nvjpegHandle_t;
	两者通用 ,上述宏定义是官方文档
	*/
	nvjpegHandle* nvjpegHandle = nullptr;
	CHECK_NVJPEG(nvjpegCreateSimple(&nvjpegHandle));

	// step2 Create JPEG state with the helper function 
	nvjpegJpegState* nvjpegJpegState = nullptr;
	CHECK_NVJPEG(nvjpegJpegStateCreate(nvjpegHandle, &nvjpegJpegState));

	std::string strFileName = "/root/yanbo/test_video/decode/gpu/jpg/CUDALibrarySamples-master/nvJPEG/decode/1.jpg";
	// Read an image from disk.
	std::ifstream input(strFileName.c_str(),
		std::ios::in | std::ios::binary | std::ios::ate);
	if (!(input.is_open())) {
		std::cerr << "Cannot open image: " << strFileName
			<< ", removing it from image list" << std::endl;
	}
	// Get the size
	std::streamsize file_size = input.tellg();
	input.seekg(0, std::ios::beg);
	char* pchData = (char*)malloc(file_size);
	if (1)
	{
		if (!input.read(pchData, file_size)) {
			std::cerr << "Cannot read from file: " << strFileName
				<< ", removing it from image list" << std::endl;

		}

	}

	int channels[NVJPEG_MAX_COMPONENT] = { 0 };
	int height[NVJPEG_MAX_COMPONENT] = { 0 };
	int width[NVJPEG_MAX_COMPONENT] = { 0 };
	nvjpegChromaSubsampling_t subsampling;
	CHECK_NVJPEG(nvjpegGetImageInfo(nvjpegHandle, (const unsigned char*)pchData, file_size, channels, &subsampling, width, height));
	cudaStream_t stream;
	printf("hello");
	CHECK_CUDA(cudaStreamCreate(&stream));

	nvjpegImage_t dstImage;
	memset(&dstImage, 0, sizeof(nvjpegImage_t));
	// realloc output buffer if required
	int mul = 3;
	
	for (int c = 0; c < 1; c++) {
		int aw = mul * width[0];
		int ah = height[0];
		int sz = aw * ah;
		dstImage.pitch[c] = aw;
		//if (sz > dstImage.pitch[c]) {
		/*	if (dstImage.channel[c]) {
				CHECK_CUDA(cudaFree(dstImage.channel[c]));
			}*/
			CHECK_CUDA(cudaMalloc((void**)&dstImage.channel[c], sz));
			//dstImage.pitch[c] = sz;
		//}
	}
	if (1)
	{
		auto t_start = std::chrono::high_resolution_clock::now();
		nvjpegOutputFormat_t outFormat = nvjpegOutputFormat_t::NVJPEG_OUTPUT_BGRI;
		nvjpegDecode(nvjpegHandle, nvjpegJpegState, (const unsigned char*)pchData, file_size,
			outFormat, &dstImage, stream);
		/*CHECK_NVJPEG(nvjpegDecode(nvjpegHandle, nvjpegJpegState, (const unsigned char*)pchData, file_size,
			outFormat, &dstImage, stream));*/
		auto t_end = std::chrono::high_resolution_clock::now();
		auto ms = std::chrono::duration<float, std::milli>(t_end - t_start).count();
		std::cout << "[ " << " " << " ] " << ms << " ms." << std::endl;
	}

	//CHECK_CUDA(cudaStreamSynchronize(stream));
	int sz = height[0] * width[0] * sizeof(unsigned char);
	unsigned char* pvB = (unsigned char*)malloc(sz *3);
	/*unsigned char* pvG = (unsigned char*)malloc(sz);
	unsigned char* pvR = (unsigned char*)malloc(sz);*/
	CHECK_CUDA(cudaMemcpy(pvB, dstImage.channel[0], sz*3, cudaMemcpyDeviceToHost));
	//CHECK_CUDA(cudaMemcpy(pvG, dstImage.channel[1], sz, cudaMemcpyDeviceToHost));
	//CHECK_CUDA(cudaMemcpy(pvR, dstImage.channel[2], sz, cudaMemcpyDeviceToHost));
	cv::Mat B = cv::Mat(cv::Size(width[0], height[0]), CV_8UC3, pvB);
	//cv::Mat G = cv::Mat(cv::Size(width, height), CV_8UC1, pvG);
	//cv::Mat R = cv::Mat(cv::Size(width, height), CV_8UC1, pvR);
	cv::imwrite("./rgb_2_debug.bmp", B);
	//cv::imwrite("H:/temp/g.bmp", G);
	//cv::imwrite("H:/temp/b.bmp", B);

	if (pvB)
	{
		free(pvB);
		pvB = nullptr;
	}

	printf("hello world!");
	return 0;
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.10)
project(nvjpeg_example)

find_package(OpenCV REQUIRED)
find_package(CUDA REQUIRED)

include_directories(${OpenCV_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS})
link_directories(${CUDA_LIBRARY_DIRS})

add_executable(nvjpeg_example main.cpp)
target_link_libraries(nvjpeg_example ${OpenCV_LIBS} nvjpeg cudart)

 代码我是调过的问题不大

4,视频编码jpg

上述操作仅限图片编码jpg和解码jpg的时候若是视频需要略作修改

这里存在一个坑是:

  cv::cuda::GpuMat gpu_frame;
    reader->nextFrame(gpu_frame);

 这里视频解码器解码出来的gpu mat对象是channel 4我们上诉的代码是channel 3需要修改增加:

cv::cuda::cvtColor(gpu_frame, gpu_frame, cv::COLOR_BGRA2BGR);

完整代码如下:

主函数:

#include "encodejpg.h"

#include <opencv2/opencv.hpp>
#include <opencv2/cudacodec.hpp>
#include <opencv2/cudaimgproc.hpp>
using namespace cv;
using namespace std;


int main(int argc, const char *argv[])
{
    encodejpg encoder;
    std::vector<unsigned char> obuffer;

    // std::string sInputPath("../2.jpg");
    // std::string sOutputPath("4");
    int resize_quality = 95;

    // cv::Mat m_frame = imread(sInputPath);
    // std::vector<uchar> vec = (m_frame.isContinuous() ? m_frame : m_frame.clone()).reshape(1, 1); // data copy here
    // cout << "mat: " << vec.size() << endl;

    // std::vector<uchar> encodedImg;
    // // 设置 JPEG 编码参数(质量为95)
    // std::vector<int> params = {cv::IMWRITE_JPEG_QUALITY, 95};
    // // 将图像编码为 JPG 格式
    // bool success = cv::imencode(".jpg", m_frame, encodedImg, params);
    // if (!success) {
    //     std::cerr << "图像编码失败!" << std::endl;
    //     return -1;
    // }
    // cout << "cpu: " << encodedImg.size() << endl;

    const std::string filename = "/root/yanbo/test_video/decode/gpu/11932615_3840_2160_30fps.mp4";
    cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(filename);
    cv::cuda::GpuMat gpu_frame;
    reader->nextFrame(gpu_frame);

    cv::Mat cpu_frame;
    gpu_frame.download(cpu_frame);
    std::cout<< cpu_frame.channels() << std::endl;
    std::cout<< "cpu elemSize: " << cpu_frame.elemSize() << std::endl;
    // std::cout<< "elemSize1: " << gpu_frame.elemSize1() << std::endl;
    std::cout<< "cpu step: " << cpu_frame.step << std::endl;


    std::cout<< gpu_frame.channels() << std::endl;
    std::cout<< gpu_frame.depth() << std::endl;
    std::cout<< "elemSize: " << gpu_frame.elemSize() << std::endl;
    std::cout<< "elemSize1: " << gpu_frame.elemSize1() << std::endl;
    std::cout<< "step1: " << gpu_frame.step1() << std::endl;
    std::cout<< "step: " << gpu_frame.step << std::endl;
    std::cout<< "size: " << gpu_frame.size() << std::endl;

    cv::cuda::cvtColor(gpu_frame, gpu_frame, cv::COLOR_BGRA2BGR);
    // gpu_frame.upload(m_frame);
    auto start = std::chrono::high_resolution_clock::now();


    encoder.EncodeOneImageFromGpu(gpu_frame, resize_quality, obuffer);
    // cv:: Mat frame;
    // gpu_frame.download(frame);
    // encoder.EncodeOneImageFromCpu(frame, resize_quality, obuffer);

    auto end = std::chrono::high_resolution_clock::now();   // 记录结束时间
    auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start); // 计算毫秒
    std::cout << "Elapsed time: " << elapsed.count() << " ms" << std::endl;


    cout << "gpu: " << obuffer.size() << endl;

    std::ofstream outputFile("1.jpg", std::ios::out | std::ios::binary);
    outputFile.write(reinterpret_cast<const char *>(obuffer.data()), static_cast<int>(obuffer.size()));
    

    cv::Mat decodedImg = cv::imdecode(obuffer, cv::IMREAD_COLOR);
    if (decodedImg.empty()) {
        std::cerr << "图像解码失败!" << std::endl;
        return -1;
    }
    imwrite("2.jpg", decodedImg);


    

    return 0;
}

封装的接口类:

#include "encodejpg.h"

encodejpg::encodejpg()
{
    CHECK_NVJPEG(nvjpegCreateSimple(&m_nvjpeg_handle));
    CHECK_NVJPEG(nvjpegJpegStateCreate(m_nvjpeg_handle, &m_nvjpeg_decoder_state));
    // create bitstream object
    CHECK_NVJPEG(nvjpegJpegStreamCreate(m_nvjpeg_handle, &m_nvjpeg_jpeg_stream));
    CHECK_NVJPEG(nvjpegEncoderStateCreate(m_nvjpeg_handle, &m_nvjpeg_encoder_state, NULL));
    CHECK_NVJPEG(nvjpegEncoderParamsCreate(m_nvjpeg_handle, &m_nvjpeg_encode_params, NULL));

}

encodejpg::~encodejpg()
{
    CHECK_NVJPEG(nvjpegEncoderParamsDestroy(m_nvjpeg_encode_params));
    CHECK_NVJPEG(nvjpegEncoderStateDestroy(m_nvjpeg_encoder_state));
    CHECK_NVJPEG(nvjpegJpegStateDestroy(m_nvjpeg_decoder_state));
    CHECK_NVJPEG(nvjpegDestroy(m_nvjpeg_handle));

}

bool encodejpg::EncodeOneImageFromCpu(cv::Mat &cpu_frame, int encode_quality, std::vector<unsigned char> &obuffer)
{

    // cv::cuda::GpuMat gpu_frame;
    // gpu_frame.upload(cpu_frame);

    int width = cpu_frame.cols;  // 宽度
    int height = cpu_frame.rows; // 高度

    nvjpegImage_t imgEncode;

    //cpu需要申请内存 GPU不需要
    unsigned char * pResizeBuffer = NULL;
    cudaError_t eCopy1 = cudaMalloc((void**)&pResizeBuffer, cpu_frame.step * height);
    // CPU
    CHECK_CUDA(cudaMemcpy(pResizeBuffer, cpu_frame.data, cpu_frame.total() * cpu_frame.elemSize(), cudaMemcpyHostToDevice));
    imgEncode.channel[0] = pResizeBuffer;
    imgEncode.pitch[0] = (unsigned int)cpu_frame.step;

    // nvJPEG encoder parameter setting
    CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(m_nvjpeg_encode_params, encode_quality, NULL));

    // CHECK_NVJPEG(nvjpegEncoderParamsSetOptimizedHuffman(nvjpeg_encode_params, 1, NULL));

    CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(m_nvjpeg_encode_params, NVJPEG_CSS_444, NULL));


    // encoding the resize data
    CHECK_NVJPEG(nvjpegEncodeImage(m_nvjpeg_handle,
        m_nvjpeg_encoder_state,
        m_nvjpeg_encode_params,
        &imgEncode,
        m_iformat,
        width,
        height,
        NULL));

    size_t length;
    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
        m_nvjpeg_handle,
        m_nvjpeg_encoder_state,
        NULL,
        &length,
        NULL));

    obuffer.resize(length);
    // std::cout << length << std::endl;
    // std::cout << obuffer.size() << std::endl;
    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
        m_nvjpeg_handle,
        m_nvjpeg_encoder_state,
        obuffer.data(),
        &length,
        NULL));

    // Free memory
    CHECK_CUDA(cudaFree(pResizeBuffer));
    return true;
}

bool encodejpg::EncodeOneImageFromGpu(cv::cuda::GpuMat &gpu_frame, int encode_quality, std::vector<unsigned char> &obuffer)
{

    int width_gpu = gpu_frame.cols;  // 宽度
    int height_gpu = gpu_frame.rows; // 高度


    std::cout << "GPU_W: " << width_gpu << " GPU_H:" << height_gpu << " GPU_step: " << gpu_frame.step << std::endl;
    // std::cout<< gpu_frame.channels() << std::endl;
    // std::cout<< gpu_frame.depth() << std::endl;
    // std::cout<< "elemSize: " << gpu_frame.elemSize() << std::endl;
    // std::cout<< "elemSize1: " << gpu_frame.elemSize1() << std::endl;

    nvjpegImage_t imgEncode;
    // GPU
    imgEncode.channel[0] = gpu_frame.data;
    // imgEncode.pitch[0] = (unsigned int)gpu_frame.step;
    imgEncode.pitch[0] = (unsigned int)gpu_frame.step;

    // nvJPEG encoder parameter setting
    CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(m_nvjpeg_encode_params, encode_quality, NULL));

    // CHECK_NVJPEG(nvjpegEncoderParamsSetOptimizedHuffman(nvjpeg_encode_params, 1, NULL));

    CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(m_nvjpeg_encode_params, NVJPEG_CSS_444, NULL));

    // encoding the resize data
    CHECK_NVJPEG(nvjpegEncodeImage(m_nvjpeg_handle,
        m_nvjpeg_encoder_state,
        m_nvjpeg_encode_params,
        &imgEncode,
        m_iformat,
        width_gpu,
        height_gpu,
        NULL));

    // retrive the encoded bitstream for file writing
    size_t length;
    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
        m_nvjpeg_handle,
        m_nvjpeg_encoder_state,
        NULL,
        &length,
        NULL));

    obuffer.resize(length);
    // std::cout << length << std::endl;
    // std::cout << obuffer.size() << std::endl;
    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
        m_nvjpeg_handle,
        m_nvjpeg_encoder_state,
        obuffer.data(),
        &length,
        NULL));
    return true;
}
#pragma once

#include <opencv2/opencv.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>

#include <string.h>  // strcmpi
#include <dirent.h>  
#include <sys/stat.h>
#include <sys/types.h>

#include <cuda_runtime_api.h>
#include <nvjpeg.h>

#define CHECK_CUDA(call)                                                        \
    {                                                                           \
        cudaError_t _e = (call);                                                \
        if (_e != cudaSuccess)                                                  \
        {                                                                       \
            std::cout << "CUDA Runtime failure: '#" << _e << "' at " <<  __FILE__ << ":" << __LINE__ << std::endl;\
            exit(1);                                                            \
        }                                                                       \
    }

#define CHECK_NVJPEG(call)                                                      \
    {                                                                           \
        nvjpegStatus_t _e = (call);                                             \
        if (_e != NVJPEG_STATUS_SUCCESS)                                        \
        {                                                                       \
            std::cout << "NVJPEG failure: '#" << _e << "' at " <<  __FILE__ << ":" << __LINE__ << std::endl;\
            exit(1);                                                            \
        }                                                                       \
    }

class encodejpg
{

public:
    encodejpg();
    ~encodejpg();

    bool EncodeOneImageFromCpu(cv::Mat &cpu_frame, int encode_quality, std::vector<unsigned char> &obuffer);
    bool EncodeOneImageFromGpu(cv::cuda::GpuMat &gpu_frame, int encode_quality, std::vector<unsigned char> &obuffer);

private:
    nvjpegHandle_t           m_nvjpeg_handle;
    nvjpegJpegStream_t       m_nvjpeg_jpeg_stream;
    nvjpegJpegState_t        m_nvjpeg_decoder_state;
    nvjpegEncoderParams_t    m_nvjpeg_encode_params;
    nvjpegEncoderState_t     m_nvjpeg_encoder_state;   

    nvjpegInputFormat_t      m_iformat = NVJPEG_INPUT_BGRI;
};


感谢:

在这里特别感谢这个博文的博主看了他的博文少走不少弯路:

CUDA库之nvjpeg(一):入门介绍-CSDN博客

Logo

火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。

更多推荐