Unsloth 微调大模型教程
ImportError: DLL load failed while importing libtriton: 动态链接库(DLL)初始化例程失败。2.第二步:下载DeepSeek-R1模型。1.第一步:安装 unsloth和相关包。2.第三步:编写微调代码。
·
Unsloth 微调大模型教程
1.第一步:安装 unsloth和相关包
## 方式1
pip install unsloth
## 方式2
pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
# pip install bitsandbytes unsloth_zoo 默认会在下载unsloth时下载
2.第二步:下载DeepSeek-R1模型
pip install modelscope
#方式1:通过命令 下载 DeepSeek-R1:7B
modelscope download --model unsloth/DeepSeek-R1-Distill-Qwen-7B --local_dir ./models
#方式2:手动下载,然后放到指定目录下,这里我放到models/DeepSeek-R1-Distill-Qwen-7B
./models/DeepSeek-R1-Distill-Qwen-7B
2.第三步:编写微调代码
from unsloth import FastLanguageModel
import torch
import os
import multiprocessing
os.environ["CC"] = "cl"
max_seq_length = 1024
dtype = None
load_in_4bit = True
from transformers import BitsAndBytesConfig
def formatting_prompts_func(examples):
inputs = examples["Question"]
cots = examples["Complex_CoT"]
outputs = examples["Response"]
texts = []
for input,cot,output in zip(inputs,cots,outputs):
text = train_prompt_style.format(input,cot,output) + EOS_TOKEN
texts.append(text)
return {
"text":texts
}
if __name__ == '__main__':
# 添加多进程支持
multiprocessing.freeze_support()
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
llm_int8_enable_fp32_cpu_offload=True
)
model,tokenizer = FastLanguageModel.from_pretrained(
model_name = "models/DeepSeek-R1-Distill-Qwen-1.5B",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
quantization_config = quantization_config,
device_map="auto"
)
# 手动设置填充标记(关键步骤)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token # 使用结束符作为填充标记
model.config.pad_token_id = tokenizer.pad_token_id
prompt_style = """Below is an instruction that describes a task. paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thounghts to solve the problem.
### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning,diagnostics, and threatment.
Please answer the following medical question:
### Question:
{}
### Response: <think>{}"""
question = "一个患有急性阑尾炎的病人已经发病5天,腹痛稍有减轻但仍然发热,在体检时发现右下腹有压痛的包块, 请根据患者的情况判断是否需要进行手术治疗"
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_style.format(question,"")],return_tensors="pt").to("cuda")
# 生成回答
outputs = model.generate(
input_ids = inputs.input_ids,
attention_mask = inputs.attention_mask,
max_new_tokens = 1200,
use_cache = True,
)
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print(response[0].split("### Response:")[1])
train_prompt_style = """Below is an instruction that describes a task. paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thounghts to solve the problem.
### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning,diagnostics, and threatment.
Please answer the following medical question:
### Question:
{}
### Response:
<think>
{}
</think>
{}"""
EOS_TOKEN = tokenizer.eos_token
from datasets import load_dataset
dataset = load_dataset("./data",'en', split="train[0:500]",trust_remote_code=True)
print(dataset.column_names)
dataset = dataset.map(formatting_prompts_func, batched=True)
dataset["text"][0]
model = FastLanguageModel.get_peft_model(
model,
r=16,
target_modules=["q_proj","k_proj","v_proj","o_proj",
"gate_proj","up_proj","down_proj"],
lora_alpha=16,
lora_dropout=0,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=3407,
use_rslora=False,
loftq_config=None,
)
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bf16_supported
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=max_seq_length,
dataset_num_proc=2,
packing=False,
args = TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=2,
warmup_steps=5,
max_steps=60,
learning_rate=2e-4,
fp16=not is_bf16_supported(),
bf16=is_bf16_supported(),
logging_steps=1,
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
seed = 3407,
output_dir="./output",
report_to="none",
),
)
trainer_stat = trainer.train()
## 测试微调效果
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_style.format(question,"")],return_tensors="pt").to("cuda")
# 生成回答
outputs = model.generate(
input_ids = inputs.input_ids,
attention_mask = inputs.attention_mask,
max_new_tokens = 1200,
use_cache = True,
)
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print(response[0].split("### Response:")[1])
运行出现如下错误:ImportError: DLL load failed while importing libtriton: 动态链接库(DLL)初始化例程失败
解决办法参考这篇文章
火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。
更多推荐
所有评论(0)