基本环境配置
curl -LsSf https://astral.sh/uv/install.sh
mkdir gpt-oss-fintune
cd gpt-oos-fintune
uv init --python=3.11
uv venv
source .venv/bin/activate
安装必要的包
uv pip install \
"torch>=2.8.0" "triton>=3.4.0" numpy ipykernel torchvision bitsandbytes "transformers>=4.55.3" "unsloth_zoo[base] @ git+https://github.com/unslothai/unsloth-zoo" "unsloth[base] @ git+https://github.com/unslothai/unsloth" git+https://github.com/tritonlang/triton.git@05b2c186c1b6c9a08375389d5efe9cb4c401c075#subdirectory=python/triton_kernels
代码分析
代码在jupyter中运行,逐代码块分析
import os
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
模型导入,导入4-bit模型
from unsloth import FastLanguageModel
import torch
max_seq_length = 1024
dtype = None
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
"unsloth/gpt-oss-20b-unsloth-bnb-4bit", # 20B model using bitsandbytes 4bit quantization
"unsloth/gpt-oss-120b-unsloth-bnb-4bit",
"unsloth/gpt-oss-20b", # 20B model using MXFP4 format
"unsloth/gpt-oss-120b",
] # More models at https://huggingface.co/unsloth
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/gpt-oss-20b",
dtype = dtype, # None for auto detection
max_seq_length = max_seq_length, # Choose any for long context!
load_in_4bit = True, # 4 bit quantization to reduce memory
full_finetuning = False, # [NEW!] We have full finetuning now!
# token = "hf_...", # use one if using gated models
)
添加lora层
model=FastLanguageModel.get_peft_model(
model,
r=8,
target_modules=["q_proj", "k_proj", "v_proj","o_proj","gate_proj", "up_proj", "down_proj",],
lora_alpha=16,
lora_dropout=0,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
打印一下可训练参数,结果为Trainable parameters: 3981312 / 11045084736 (0.04%)
def print_trainable_params_percentage(model):
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
percent = 100 * trainable_params / total_params
print(f"Trainable parameters: {trainable_params} / {total_params} ({percent:.2f}%)")
print_trainable_params_percentage(model)
测试
from transformers import TextStreamer
messages = [
{"role": "user", "content": "Solve x^5 + 3x^4 - 10 = 3."},
]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt = True,
return_tensors = "pt",
return_dict = True,
reasoning_effort = "low", # **NEW!** Set reasoning effort to low, medium or high
).to(model.device)
_ = model.generate(**inputs, max_new_tokens = 64, streamer = TextStreamer(tokenizer))
数据准备
首先加载数据集,并增加额外的text列,用于训练模型
def formatting_prompts_func(examples):
convos = examples["messages"]
texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
return { "text" : texts, }
pass
from datasets import load_dataset
dataset = load_dataset("HuggingFaceH4/Multilingual-Thinking", split="train")
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset[0]
训练模型
from trl import SFTConfig, SFTTrainer
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
args = SFTConfig(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4,
warmup_steps = 5,
# num_train_epochs = 1, # Set this for 1 full training run.
max_steps = 30,
learning_rate = 2e-4,
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none", # Use this for WandB etc
),
)
trainer_stats = trainer.train()
模型保存
model.save_pretrained("fintune_model")
总结
模型加载 –> peft加载 –> 数据集准备 –> trainer 准备–> 开始训练
整体来说十分的完整简单了
文章参考:
https://mp.weixin.qq.com/s/XtWTPWPK7mdedVTtfL2X0w
https://www.9gpu.com/help-id-87.html
博客地址: qwrdxer.github.io
欢迎交流: qq1944270374
转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 1944270374@qq.com