unsloth/OpenMathReasoning-mini
Viewer • Updated • 19.3k • 3.87k • 64
from transformers import AutoModelForCausalLM, AutoTokenizer,TextStreamer
model_name='wesjos/GRPO-SFT-Qwen3-4B-Base-math'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
)
system_prompt="你是一个友善并且擅长解决问题的助手,请根据我的提问,给出专业的回答。"
prompt='帮我写一个python的强化学习玩2048的代码'
messages = [
{'role':'system',"content":system_prompt},
{"role": "user", "content": prompt}
]
streamer= TextStreamer(tokenizer)
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
model_inputs = tokenizer([text], return_tensors="pt",add_special_tokens=True,
).to(model.device)
outputs = model.generate(
**model_inputs,
max_new_tokens=4096,
streamer=streamer,
)
Base model
Qwen/Qwen3-4B-Base