#pragma omp parallel并行加速学习
pragma omp parallel sections 是 OpenMP 中用于实现任务并行的重要指令,特别适合执行多个独立任务的情况。
·
概要
#pragma omp parallel sections 是 OpenMP 中用于实现任务并行的重要指令,特别适合执行多个独立任务的情况。
基本语法
#pragma omp parallel sections [clause[[,] clause] ...]
{
#pragma omp section
{
// 代码块 1
}
#pragma omp section
{
// 代码块 2
}
#pragma omp section
{
// 代码块 3
}
// ... 可以有更多 section
}
例子
#include <iostream>
#include <vector>
#include <chrono>
#include <thread>
#include <omp.h>
#include <algorithm>
#include <random>
// 模拟耗时任务1:矩阵运算
void simulate_heavy_task1(int task_id) {
std::vector<double> matrix(10000);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(0.0, 1.0);
// 模拟复杂的数学运算
for (int i = 0; i < 100000; ++i) {
for (auto& val : matrix) {
val = dis(gen) * std::sin(val) + std::cos(val);
}
}
// 模拟耗时
std::this_thread::sleep_for(std::chrono::milliseconds(300));
std::cout << "Task " << task_id << " completed (Matrix operations)" << std::endl;
}
// 模拟耗时任务2:图像处理
void simulate_heavy_task2(int task_id) {
std::vector<int> image(2048 * 2048);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(0, 255);
// 模拟图像处理
for (int i = 0; i < 50000; ++i) {
for (auto& pixel : image) {
pixel = (pixel + dis(gen)) % 256;
pixel = pixel * 2 - 128;
}
}
// 模拟耗时
std::this_thread::sleep_for(std::chrono::milliseconds(400));
std::cout << "Task " << task_id << " completed (Image processing)" << std::endl;
}
// 模拟耗时任务3:文件I/O和数据处理
void simulate_heavy_task3(int task_id) {
std::vector<float> data(500000);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(-1.0, 1.0);
// 模拟数据处理
for (int i = 0; i < 20000; ++i) {
for (auto& val : data) {
val = std::exp(val) * dis(gen);
val = std::log(std::abs(val) + 1.0);
}
}
// 模拟耗时
std::this_thread::sleep_for(std::chrono::milliseconds(350));
std::cout << "Task " << task_id << " completed (Data processing)" << std::endl;
}
// 模拟耗时任务4:字符串处理
void simulate_heavy_task4(int task_id) {
std::string text(100000, 'A');
// 模拟字符串处理
for (int i = 0; i < 100000; ++i) {
for (char& c : text) {
c = (c + 1) % 128;
}
std::reverse(text.begin(), text.end());
}
// 模拟耗时
std::this_thread::sleep_for(std::chrono::milliseconds(250));
std::cout << "Task " << task_id << " completed (String processing)" << std::endl;
}
// 版本1:顺序执行
void sequential_execution() {
std::cout << "\n=== 顺序执行 ===" << std::endl;
auto start = std::chrono::high_resolution_clock::now();
simulate_heavy_task1(1);
simulate_heavy_task2(2);
simulate_heavy_task3(3);
simulate_heavy_task4(4);
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << "顺序执行总时间: " << duration.count() << " ms" << std::endl;
}
// 版本2:使用 parallel sections
void parallel_sections_execution() {
std::cout << "\n=== 使用 OpenMP parallel sections ===" << std::endl;
std::cout << "可用线程数: " << omp_get_max_threads() << std::endl;
auto start = std::chrono::high_resolution_clock::now();
#pragma omp parallel sections
{
#pragma omp section
{
int thread_id = omp_get_thread_num();
std::cout << "Thread " << thread_id << " executing task 1" << std::endl;
simulate_heavy_task1(1);
}
#pragma omp section
{
int thread_id = omp_get_thread_num();
std::cout << "Thread " << thread_id << " executing task 2" << std::endl;
simulate_heavy_task2(2);
}
#pragma omp section
{
int thread_id = omp_get_thread_num();
std::cout << "Thread " << thread_id << " executing task 3" << std::endl;
simulate_heavy_task3(3);
}
#pragma omp section
{
int thread_id = omp_get_thread_num();
std::cout << "Thread " << thread_id << " executing task 4" << std::endl;
simulate_heavy_task4(4);
}
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << "并行执行总时间: " << duration.count() << " ms" << std::endl;
}
// 版本3:指定线程数
void parallel_sections_with_numthreads() {
std::cout << "\n=== 使用 2 个线程 (指定 num_threads) ===" << std::endl;
auto start = std::chrono::high_resolution_clock::now();
#pragma omp parallel sections num_threads(2)
{
#pragma omp section
{
int thread_id = omp_get_thread_num();
std::cout << "Thread " << thread_id << " executing task 1" << std::endl;
simulate_heavy_task1(1);
std::cout << "Thread " << thread_id << " executing task 2" << std::endl;
simulate_heavy_task2(2);
}
#pragma omp section
{
int thread_id = omp_get_thread_num();
std::cout << "Thread " << thread_id << " executing task 3" << std::endl;
simulate_heavy_task3(3);
std::cout << "Thread " << thread_id << " executing task 4" << std::endl;
simulate_heavy_task4(4);
}
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::cout << "2线程并行执行总时间: " << duration.count() << " ms" << std::endl;
}
// 版本4:模拟你的VTK应用场景
int main() {
std::cout << "========== OpenMP parallel sections 性能比较 ==========" << std::endl;
std::cout << "系统信息:" << std::endl;
std::cout << "- CPU核心数: " << omp_get_num_procs() << std::endl;
std::cout << "- 最大线程数: " << omp_get_max_threads() << std::endl;
// 设置随机种子
srand(static_cast<unsigned>(time(nullptr)));
// 运行测试
sequential_execution();
parallel_sections_execution();
parallel_sections_with_numthreads();
// 性能分析
std::cout << "\n========== 性能分析 ==========" << std::endl;
std::cout << "理论最佳情况:" << std::endl;
std::cout << "- 如果4个任务完全独立且耗时相等" << std::endl;
std::cout << "- 在4核CPU上,并行执行时间 ≈ 最慢任务的时间" << std::endl;
std::cout << "- 加速比 ≈ 任务数 / (1 + 并行开销)" << std::endl;
return 0;
}
火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。
更多推荐
所有评论(0)