Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| from transformers import AutoTokenizer | |
| # These will use different templates automatically | |
| mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") | |
| qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat") | |
| smol_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M-Instruct") | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": "Hello!"}, | |
| ] | |
| # Each will format according to its model's template | |
| mistral_chat = mistral_tokenizer.apply_chat_template(messages, tokenize=False) | |
| qwen_chat = qwen_tokenizer.apply_chat_template(messages, tokenize=False) | |
| smol_chat = smol_tokenizer.apply_chat_template(messages, tokenize=False) | |
| dataset = load_dataset("HuggingFaceTB/smoltalk") | |
| def convert_to_chatml(example): | |
| return { | |
| "messages": [ | |
| {"role": "user", "content": example["input"]}, | |
| {"role": "assistant", "content": example["output"]}, | |
| ] | |
| } | |