1、用ms-swift启动模型

2、用python实现http调用

3、用并发实现同时调用

import requests
class Qwen:
    api_key = 'EMPTY'
    base_url = "http://127.0.0.1:8001/v1/chat/completions"
    name = 'qwen2_5-72b-instruct'

class Llama:
    api_key = 'EMPTY'
    base_url = "http://127.0.0.1:8001/v1/chat/completions"
    name = 'llama3-70b-instruct'

def chat_with_gpt(api_key, messages, model="gpt-3.5-turbo", temperature=0.3, stream=False, url="https://api.openai.com/v1/chat/completions"):
    """
    与OpenAI ChatGPT API交互的函数。

    参数:
        api_key (str): OpenAI API密钥。
        messages (list): 消息列表,格式为 [{"role": "user", "content": "你的消息"}].
        model (str): 使用的模型,默认为 "gpt-3.5-turbo".
        temperature (float): 温度参数,控制生成文本的随机性,默认为 0.7.
        stream (bool): 是否启用流式输出,默认为 False.
        url (str): API端点,默认为 "https://api.openai.com/v1/chat/completions".

    返回:
        如果 stream=False,返回完整的响应内容。
        如果 stream=True,返回一个生成器,逐步生成响应片段。
    """
    # 请求头
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    import json
    # 请求体
    data = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
        "stream": stream
    }
    print('header',json.dumps(headers,ensure_ascii=False))
    print('data',json.dumps(data,ensure_ascii=False))
    # 发送请求
    if stream:
        # 流式请求
        response = requests.post(url, headers=headers, json=data, stream=True)
        return _handle_stream_response(response)
    else:
        # 非流式请求
        response = requests.post(url, headers=headers, json=data)
        return response.json()

def _handle_stream_response(response):
    """
    处理流式响应的生成器函数。

    参数:
        response: requests.Response 对象。

    返回:
        生成器,逐步生成响应片段。
    """
    for chunk in response.iter_lines():
        if chunk:
            decoded_chunk = chunk.decode("utf-8")
            yield decoded_chunk
message = [{"role": "user", "content": "你是谁"}]
import time
# t1  = time.time()
# r1 = chat_with_gpt(Qwen.api_key, message, model=Qwen.name,url=Qwen.base_url)
# t2 = time.time()
# print(t2-t1)
#
# t1  = time.time()
# r2 = chat_with_gpt(Llama.api_key, message, model=Llama.name,url=Llama.base_url)
# t2 = time.time()
# print(t2-t1)
# print(r2)
#print(r2)

def chat_with_model(model_class, message):
    """
    并发请求的辅助函数,用于与特定模型交互。

    参数:
        model_class: 模型类(如 Qwen 或 Llama)。
        message (list): 消息列表,格式为 [{"role": "user", "content": "你的消息"}].

    返回:
        模型的响应内容。
    """
    t1 = time.time()
    response = chat_with_gpt(model_class.api_key, message, model=model_class.name, url=model_class.base_url)
    t2 = time.time()
    print(f"Time taken for {model_class.name}: {t2 - t1} seconds")
    return response,round(t2-t1,4)

import concurrent.futures
# 定义不同的消息
messages = [
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
    [{"role": "user", "content": "你是谁"}],
 
]

# 定义模型列表
models = [ Llama]
durations = []
# 并发请求多个模型和不同的消息
with concurrent.futures.ThreadPoolExecutor() as executor:
    # 提交任务,每个任务使用不同的模型和消息
    futures = {
        executor.submit(chat_with_model, model, message): (model, message)
        for model in models
        for message in messages
    }
    # 获取结果
    for future in concurrent.futures.as_completed(futures):
        model, message = futures[future]
        try:
            response,duration = future.result()
            durations.append(duration)
            print(f"Response from {model.name} with message {message}: {response}")
        except Exception as e:
            print(f"Error occurred with {model.name} and message {message}: {e}")
# llama
#[1.487, 5.2435, 6.6109, 7.9721, 9.3312, 10.737, 15.1935, 16.5562, 17.9155, 19.2751]

# qwen
#[8.5853, 18.9962, 29.1533, 37.4593, 45.8992, 54.3415, 62.7779, 72.2762, 82.2203, 92.1694]

print(durations)

Logo

中国智能体开发者社区,聚焦智能体与大模型开发,提供前沿资讯、实用工具链、开源项目及行业案例。通过技术沙龙、开发者大赛等活动,促进经验交流与协作,助力开发者快速构建创新智能应用。

更多推荐