Unsloth 微调大模型教程

1.第一步:安装 unsloth和相关包

## 方式1
pip install unsloth

## 方式2
pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

# pip install bitsandbytes unsloth_zoo 默认会在下载unsloth时下载

2.第二步:下载DeepSeek-R1模型

pip install modelscope

#方式1:通过命令 下载 DeepSeek-R1:7B
modelscope download --model unsloth/DeepSeek-R1-Distill-Qwen-7B --local_dir ./models

#方式2:手动下载,然后放到指定目录下,这里我放到models/DeepSeek-R1-Distill-Qwen-7B
./models/DeepSeek-R1-Distill-Qwen-7B

2.第三步:编写微调代码

from unsloth import FastLanguageModel
import torch
import os
import multiprocessing
os.environ["CC"] = "cl"

max_seq_length = 1024
dtype = None
load_in_4bit = True

from transformers import BitsAndBytesConfig

def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input,cot,output in zip(inputs,cots,outputs):
        text = train_prompt_style.format(input,cot,output) + EOS_TOKEN
        texts.append(text)
    return {
        "text":texts
    }   


if __name__ == '__main__':
    # 添加多进程支持
    multiprocessing.freeze_support()

    quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    llm_int8_enable_fp32_cpu_offload=True
    )

    model,tokenizer = FastLanguageModel.from_pretrained(
    model_name = "models/DeepSeek-R1-Distill-Qwen-1.5B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    quantization_config = quantization_config,
    device_map="auto"
    ) 

    # 手动设置填充标记(关键步骤)
    if tokenizer.pad_token is None:
      tokenizer.pad_token = tokenizer.eos_token  # 使用结束符作为填充标记
      model.config.pad_token_id = tokenizer.pad_token_id

    prompt_style = """Below is an instruction that describes a task. paired with an input that provides further context. 
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thounghts to solve the problem.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning,diagnostics, and threatment.
Please answer the following medical question:

### Question:
{}

### Response: <think>{}"""


    question = "一个患有急性阑尾炎的病人已经发病5天,腹痛稍有减轻但仍然发热,在体检时发现右下腹有压痛的包块, 请根据患者的情况判断是否需要进行手术治疗"

    FastLanguageModel.for_inference(model)
    inputs = tokenizer([prompt_style.format(question,"")],return_tensors="pt").to("cuda")


    # 生成回答
    outputs = model.generate(
        input_ids = inputs.input_ids,
        attention_mask = inputs.attention_mask,
        max_new_tokens = 1200,
        use_cache = True,
    )
    

    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    print(response[0].split("### Response:")[1])
        


    train_prompt_style = """Below is an instruction that describes a task. paired with an input that provides further context. 
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thounghts to solve the problem.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning,diagnostics, and threatment.
Please answer the following medical question:

### Question:
{}

### Response: 
<think>
{}
</think>
{}"""

    EOS_TOKEN = tokenizer.eos_token

    from datasets import load_dataset
    dataset = load_dataset("./data",'en', split="train[0:500]",trust_remote_code=True)
    print(dataset.column_names)

    dataset = dataset.map(formatting_prompts_func, batched=True)
    dataset["text"][0]


    model = FastLanguageModel.get_peft_model(
      model,
      r=16,
      target_modules=["q_proj","k_proj","v_proj","o_proj",
                    "gate_proj","up_proj","down_proj"],
      lora_alpha=16,
      lora_dropout=0,
      bias="none",
      use_gradient_checkpointing="unsloth",
      random_state=3407,
      use_rslora=False,
      loftq_config=None,
    )


    from trl import SFTTrainer
    from transformers import TrainingArguments
    from unsloth import is_bf16_supported
    trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args = TrainingArguments(
       per_device_train_batch_size=1,
       gradient_accumulation_steps=2,
       warmup_steps=5,
       max_steps=60,
       learning_rate=2e-4,
       fp16=not is_bf16_supported(),
       bf16=is_bf16_supported(),
       logging_steps=1,
       optim="adamw_8bit",
       weight_decay=0.01, 
       lr_scheduler_type="linear",
       seed = 3407,
       output_dir="./output",
       report_to="none",
      ),
    )

    trainer_stat = trainer.train()


    ## 测试微调效果
    FastLanguageModel.for_inference(model)
    inputs = tokenizer([prompt_style.format(question,"")],return_tensors="pt").to("cuda")


    # 生成回答
    outputs = model.generate(
        input_ids = inputs.input_ids,
        attention_mask = inputs.attention_mask,
        max_new_tokens = 1200,
        use_cache = True,
    )

    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    print(response[0].split("### Response:")[1])

运行出现如下错误:ImportError: DLL load failed while importing libtriton: 动态链接库(DLL)初始化例程失败
解决办法参考这篇文章

Logo

火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。

更多推荐