CUDA nvjpeg库编码jpeg图像
本文介绍了基于GPU加速的JPEG编解码库nvJPEG及其性能测试。该库通过CUDA技术实现高性能JPEG编解码,特别适用于深度学习和大规模图像处理。实验对比了nvJPEG与OpenCV在AMD Ryzen7处理器和RTX5060显卡上的编码性能,测试采用4096x4096分辨率的三通道图像。文中详细给出了Visual Studio环境配置指南和完整的C++实现代码,包括错误处理、内存管理等关键环
·
1、介绍
nvJPEG:一个GPU加速的JPEG编解码库。
nvJPEG 库为深度学习和超大规模多媒体应用中常用的图像格式提供了高性能、GPU 加速的 JPEG 解码功能。该库提供单张和批量 JPEG 解码能力,可高效利用可用的 GPU 资源以实现最佳性能,并为用户提供了管理解码所需内存分配的灵活性。
nvJPEG库的编码函数可对用户的图像数据执行GPU加速压缩,生成JPEG码流。用户可以提供多种格式和色彩空间的输入数据,并通过参数控制编码过程。编码功能将使用用户提供的内存分配器来分配临时缓冲区。
官方文档:1. Introduction — nvJPEG 13.0 documentation
2、编码速度测试(图像压缩测试)
采用opencv编码函数imcode与nvjpeg库
测试数据格式:3通道4096x4096
配置:
CPU -- AMD Ryzen AI 7 H 350
GPU -- RTX5060
测试结果:

3、VisualStudio环境配置与代码
3.1 环境配置


3.2 代码
#include <fstream>
#include <vector>
#include <iostream>
#include <cstdint>
#include <opencv2/opencv.hpp>
#include <nvjpeg.h>
#include <cuda_runtime.h>
#include<chrono>
#if defined _DEBUG
#pragma comment(lib, "nvjpeg.lib")
#pragma comment(lib, "cudart.lib")
#pragma comment(lib, "opencv_world4100d.lib")
#else
#pragma comment(lib, "nvjpeg.lib")
#pragma comment(lib, "cudart.lib")
#pragma comment(lib, "opencv_world4100.lib")
#endif
// ======================
// CUDA 错误检查宏(用于 cudaMalloc, cudaMemcpy, cudaStream...)
// ======================
#define checkCudaErrors(status) \
do { \
cudaError_t err = (status); \
if (err != cudaSuccess) { \
std::cerr << "CUDA error: " << cudaGetErrorString(err) << " at " << __FILE__ << ":" << __LINE__ << std::endl; \
exit(EXIT_FAILURE); \
} \
} while (0)
// ======================
// nvJPEG 错误检查宏(用于 nvjpegCreateSimple, nvjpegEncodeImage...)
// ======================
#define checkNvjpegErrors(status) \
do { \
nvjpegStatus_t err = (status); \
if (err != NVJPEG_STATUS_SUCCESS) { \
std::cerr << "NVJPEG error: " << "error" << " at " << __FILE__ << ":" << __LINE__ << std::endl; \
exit(EXIT_FAILURE); \
} \
} while (0)
// ======================
// JPEG 编码器类(基于 nvJPEG)
// ======================
class JpegNvEncoder {
public:
JpegNvEncoder() {
// 创建 nvJPEG 句柄
checkNvjpegErrors(nvjpegCreateSimple(&nv_handle_));
// 创建编码状态
checkNvjpegErrors(nvjpegEncoderStateCreate(nv_handle_, &nv_enc_state_, nullptr));
// 创建编码参数
checkNvjpegErrors(nvjpegEncoderParamsCreate(nv_handle_, &nv_enc_params_, nullptr));
// 创建 CUDA 流
checkCudaErrors(cudaStreamCreate(&stream_));
}
~JpegNvEncoder() {
// 释放编码参数
if (nv_enc_params_) {
checkNvjpegErrors(nvjpegEncoderParamsDestroy(nv_enc_params_));
nv_enc_params_ = nullptr;
}
// 释放编码状态
if (nv_enc_state_) {
checkNvjpegErrors(nvjpegEncoderStateDestroy(nv_enc_state_));
nv_enc_state_ = nullptr;
}
// 释放 nvJPEG 句柄
if (nv_handle_) {
checkNvjpegErrors(nvjpegDestroy(nv_handle_));
nv_handle_ = nullptr;
}
// 销毁 CUDA 流
if (stream_) {
checkCudaErrors(cudaStreamDestroy(stream_));
stream_ = nullptr;
}
// 释放设备上的图像通道内存
for (int i = 0; i < 3; ++i) {
if (nv_image_.channel[i]) {
checkCudaErrors(cudaFree(nv_image_.channel[i]));
nv_image_.channel[i] = nullptr;
}
}
}
// 编码函数:输入 OpenCV Mat (BGR), 输出 JPEG 文件
void encode(const cv::Mat& img_mat, const std::string& output_filename = "encode.jpg") {
if (img_mat.empty()) {
std::cerr << "[ERROR] Input image is empty!" << std::endl;
return;
}
if (img_mat.channels() != 3) {
std::cerr << "[ERROR] Only 3-channel (BGR) images are supported." << std::endl;
return;
}
int width = img_mat.cols;
int height = img_mat.rows;
int channel_size = width * height;
// 分离 BGR 通道
std::vector<cv::Mat> bgr_channels;
cv::split(img_mat, bgr_channels);
// 设置 nvjpeg 图像结构
nv_image_.pitch[0] = width; // R / B
nv_image_.pitch[1] = width; // G
nv_image_.pitch[2] = width; // B
nv_image_.pitch[3] = 0; // No Alpha
for (int i = 0; i < 3; ++i) {
checkCudaErrors(cudaMalloc(&nv_image_.channel[i], channel_size));
checkCudaErrors(cudaMemcpy(nv_image_.channel[i], bgr_channels[i].data, channel_size, cudaMemcpyHostToDevice));
}
nv_image_.channel[3] = nullptr; // No alpha
// 设置编码参数
checkNvjpegErrors(nvjpegEncoderParamsSetSamplingFactors(nv_enc_params_, NVJPEG_CSS_420, nullptr));
checkNvjpegErrors(nvjpegEncoderParamsSetQuality(nv_enc_params_, 70, nullptr));
checkNvjpegErrors(nvjpegEncoderParamsSetOptimizedHuffman(nv_enc_params_, 1, nullptr));
checkNvjpegErrors(nvjpegEncoderParamsSetEncoding(nv_enc_params_, nvjpegJpegEncoding_t::NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN, nullptr));
// 执行编码
checkNvjpegErrors(nvjpegEncodeImage(
nv_handle_,
nv_enc_state_,
nv_enc_params_,
&nv_image_,
NVJPEG_INPUT_BGR, // 输入为 BGR
width,
height,
stream_
));
checkCudaErrors(cudaStreamSynchronize(stream_));
// 获取编码后的 JPEG 大小
size_t length = 0;
checkNvjpegErrors(nvjpegEncodeRetrieveBitstream(
nv_handle_,
nv_enc_state_,
nullptr,
&length,
stream_
));
checkCudaErrors(cudaStreamSynchronize(stream_));
//std::cout << "[INFO] Encoded JPEG size: " << length << " bytes" << std::endl;
// 获取实际的 JPEG 数据
std::vector<unsigned char> jpeg_data(length);
checkNvjpegErrors(nvjpegEncodeRetrieveBitstream(
nv_handle_,
nv_enc_state_,
jpeg_data.data(),
&length,
0 // 同步模式
));
// 写入到输出文件
std::ofstream out_file(output_filename, std::ios::out | std::ios::binary);
if (!out_file) {
std::cerr << "[ERROR] Cannot open output file: " << output_filename << std::endl;
return;
}
out_file.write(reinterpret_cast<const char*>(jpeg_data.data()), length);
out_file.close();
//std::cout << "[INFO] JPEG saved to: " << output_filename << std::endl;
}
private:
nvjpegHandle_t nv_handle_ = nullptr;
nvjpegEncoderState_t nv_enc_state_ = nullptr;
nvjpegEncoderParams_t nv_enc_params_ = nullptr;
cudaStream_t stream_ = nullptr;
// nvJPEG 图像数据结构
nvjpegImage_t nv_image_{};
};
//opencv实现编码
void cvencode(cv::Mat img_mat, std::string output_filename = "encode.jpg")
{
if (img_mat.empty()) {
std::cerr << "[ERROR] Input image is empty!" << std::endl;
return;
}
if (img_mat.channels() != 3) {
std::cerr << "[ERROR] Only 3-channel (BGR) images are supported." << std::endl;
return;
}
std::vector<uchar> buffer;
std::vector<int> params;
params.push_back(cv::IMWRITE_JPEG_QUALITY);
params.push_back(70); // 设置 JPEG 质量为 95%
// 将图像编码为 JPEG 格式并存储到缓冲区
bool success = cv::imencode(".jpg", img_mat, buffer, params);
if (!success) {
std::cerr << "错误:图像编码失败!" << std::endl;
return ;
}
// 例如,将编码后的图像保存到文件
std::ofstream ofs(output_filename, std::ios::binary);
ofs.write(reinterpret_cast<char*>(buffer.data()), buffer.size());
ofs.close();
//std::cout << "cv图像编码并保存成功!" << std::endl;
}
// ======================
// 主函数:测试代码
// ======================
int main() {
// 读取图像(支持 jpg/png 等)
cv::Mat img = cv::imread("D:/2/test.bmp"); // 请确保此图片存在!
if (img.empty()) {
std::cerr << "[ERROR] Could not load image 'flower.jpg'. Please check the path." << std::endl;
return -1;
}
std::cout << "[INFO] Image loaded. Channels: " << img.channels() << ", Size: " << img.cols << "x" << img.rows << std::endl;
int testCount = 100;
// 创建编码器并执行编码nvjpeg
JpegNvEncoder encoder;
auto t_start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 100; i++)
{
std::string name = "D:/2/output_encode_"+std::to_string(i)+".jpg";
encoder.encode(img, name); // 可自定义输出路径
}
auto t_end = std::chrono::high_resolution_clock::now();
double elapsed = std::chrono::duration<double, std::milli>(t_end - t_start).count();
std::cout << "[INFO] NVJPEG Encoding Time for 100 runs avg cost: " << elapsed/100 << " ms" << std::endl;
// 使用 OpenCV 进行编码
t_start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 100; i++)
{
std::string name = "D:/2/cv_output_encode_" + std::to_string(i) + ".jpg";
cvencode(img, name); // 可自定义输出路径
}
t_end = std::chrono::high_resolution_clock::now();
elapsed = std::chrono::duration<double, std::milli>(t_end - t_start).count();
std::cout << "[INFO] OpenCV Encoding Time for 100 runs avg cost: " << elapsed / 100 << " ms" << std::endl;
return 0;
}
4、参考文档
更多推荐

所有评论(0)