文章目录

概要

#pragma omp parallel sections 是 OpenMP 中用于实现任务并行的重要指令,特别适合执行多个独立任务的情况。

基本语法

#pragma omp parallel sections [clause[[,] clause] ...]
{
    #pragma omp section
    {
        // 代码块 1
    }
    
    #pragma omp section
    {
        // 代码块 2
    }
    
    #pragma omp section
    {
        // 代码块 3
    }
    
    // ... 可以有更多 section
}

例子

#include <iostream>
#include <vector>
#include <chrono>
#include <thread>
#include <omp.h>
#include <algorithm>
#include <random>

// 模拟耗时任务1:矩阵运算
void simulate_heavy_task1(int task_id) {
    std::vector<double> matrix(10000);
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis(0.0, 1.0);
    
    // 模拟复杂的数学运算
    for (int i = 0; i < 100000; ++i) {
        for (auto& val : matrix) {
            val = dis(gen) * std::sin(val) + std::cos(val);
        }
    }
    
    // 模拟耗时
    std::this_thread::sleep_for(std::chrono::milliseconds(300));
    std::cout << "Task " << task_id << " completed (Matrix operations)" << std::endl;
}

// 模拟耗时任务2:图像处理
void simulate_heavy_task2(int task_id) {
    std::vector<int> image(2048 * 2048);
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dis(0, 255);
    
    // 模拟图像处理
    for (int i = 0; i < 50000; ++i) {
        for (auto& pixel : image) {
            pixel = (pixel + dis(gen)) % 256;
            pixel = pixel * 2 - 128;
        }
    }
    
    // 模拟耗时
    std::this_thread::sleep_for(std::chrono::milliseconds(400));
    std::cout << "Task " << task_id << " completed (Image processing)" << std::endl;
}

// 模拟耗时任务3:文件I/O和数据处理
void simulate_heavy_task3(int task_id) {
    std::vector<float> data(500000);
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis(-1.0, 1.0);
    
    // 模拟数据处理
    for (int i = 0; i < 20000; ++i) {
        for (auto& val : data) {
            val = std::exp(val) * dis(gen);
            val = std::log(std::abs(val) + 1.0);
        }
    }
    
    // 模拟耗时
    std::this_thread::sleep_for(std::chrono::milliseconds(350));
    std::cout << "Task " << task_id << " completed (Data processing)" << std::endl;
}

// 模拟耗时任务4:字符串处理
void simulate_heavy_task4(int task_id) {
    std::string text(100000, 'A');
    
    // 模拟字符串处理
    for (int i = 0; i < 100000; ++i) {
        for (char& c : text) {
            c = (c + 1) % 128;
        }
        std::reverse(text.begin(), text.end());
    }
    
    // 模拟耗时
    std::this_thread::sleep_for(std::chrono::milliseconds(250));
    std::cout << "Task " << task_id << " completed (String processing)" << std::endl;
}

// 版本1:顺序执行
void sequential_execution() {
    std::cout << "\n=== 顺序执行 ===" << std::endl;
    auto start = std::chrono::high_resolution_clock::now();
    
    simulate_heavy_task1(1);
    simulate_heavy_task2(2);
    simulate_heavy_task3(3);
    simulate_heavy_task4(4);
    
    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
    
    std::cout << "顺序执行总时间: " << duration.count() << " ms" << std::endl;
}

// 版本2:使用 parallel sections
void parallel_sections_execution() {
    std::cout << "\n=== 使用 OpenMP parallel sections ===" << std::endl;
    std::cout << "可用线程数: " << omp_get_max_threads() << std::endl;
    
    auto start = std::chrono::high_resolution_clock::now();
    
    #pragma omp parallel sections
    {
        #pragma omp section
        {
            int thread_id = omp_get_thread_num();
            std::cout << "Thread " << thread_id << " executing task 1" << std::endl;
            simulate_heavy_task1(1);
        }
        
        #pragma omp section
        {
            int thread_id = omp_get_thread_num();
            std::cout << "Thread " << thread_id << " executing task 2" << std::endl;
            simulate_heavy_task2(2);
        }
        
        #pragma omp section
        {
            int thread_id = omp_get_thread_num();
            std::cout << "Thread " << thread_id << " executing task 3" << std::endl;
            simulate_heavy_task3(3);
        }
        
        #pragma omp section
        {
            int thread_id = omp_get_thread_num();
            std::cout << "Thread " << thread_id << " executing task 4" << std::endl;
            simulate_heavy_task4(4);
        }
    }
    
    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
    
    std::cout << "并行执行总时间: " << duration.count() << " ms" << std::endl;
}

// 版本3:指定线程数
void parallel_sections_with_numthreads() {
    std::cout << "\n=== 使用 2 个线程 (指定 num_threads) ===" << std::endl;
    
    auto start = std::chrono::high_resolution_clock::now();
    
    #pragma omp parallel sections num_threads(2)
    {
        #pragma omp section
        {
            int thread_id = omp_get_thread_num();
            std::cout << "Thread " << thread_id << " executing task 1" << std::endl;
            simulate_heavy_task1(1);
            std::cout << "Thread " << thread_id << " executing task 2" << std::endl;
            simulate_heavy_task2(2);
        }
        
        #pragma omp section
        {
            int thread_id = omp_get_thread_num();
            std::cout << "Thread " << thread_id << " executing task 3" << std::endl;
            simulate_heavy_task3(3);
            std::cout << "Thread " << thread_id << " executing task 4" << std::endl;
            simulate_heavy_task4(4);
        }
    }
    
    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
    
    std::cout << "2线程并行执行总时间: " << duration.count() << " ms" << std::endl;
}

// 版本4:模拟你的VTK应用场景

int main() {
    std::cout << "========== OpenMP parallel sections 性能比较 ==========" << std::endl;
    std::cout << "系统信息:" << std::endl;
    std::cout << "- CPU核心数: " << omp_get_num_procs() << std::endl;
    std::cout << "- 最大线程数: " << omp_get_max_threads() << std::endl;
    
    // 设置随机种子
    srand(static_cast<unsigned>(time(nullptr)));
    
    // 运行测试
    sequential_execution();
    parallel_sections_execution();
    parallel_sections_with_numthreads();
    
    // 性能分析
    std::cout << "\n========== 性能分析 ==========" << std::endl;
    std::cout << "理论最佳情况:" << std::endl;
    std::cout << "- 如果4个任务完全独立且耗时相等" << std::endl;
    std::cout << "- 在4核CPU上,并行执行时间 ≈ 最慢任务的时间" << std::endl;
    std::cout << "- 加速比 ≈ 任务数 / (1 + 并行开销)" << std::endl;
    
    return 0;
}
Logo

火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。

更多推荐