diff --git a/demo.py b/demo.py index 4fb1b73..1c6ee73 100644 --- a/demo.py +++ b/demo.py @@ -5,7 +5,7 @@ from peft import LoraConfig from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig from trl import SFTTrainer, SFTConfig - +from datasets import Dataset from dataset import SFTDataCollator, SFTDataset from utils.constants import model2template @@ -53,7 +53,6 @@ def train_lora( optim="paged_adamw_8bit", remove_unused_columns=False, num_train_epochs=training_args.num_train_epochs, - max_seq_length=context_length, ) tokenizer = AutoTokenizer.from_pretrained( model_id, @@ -74,10 +73,12 @@ def train_lora( template=model2template[model_id], ) + dataset_dict = [dataset[i] for i in range(len(dataset))] + hf_dataset = Dataset.from_list(dataset_dict) # Define trainer trainer = SFTTrainer( model=model, - train_dataset=dataset, + train_dataset=hf_dataset, args=training_args, peft_config=lora_config, data_collator=SFTDataCollator(tokenizer, max_seq_length=context_length), @@ -108,7 +109,7 @@ def train_lora( ) # Set model ID and context length - model_id = "Qwen/Qwen1.5-0.5B" + model_id = "Qwen/Qwen3.5-2B" context_length = 2048 # Start LoRA fine-tuning diff --git a/requirements.txt b/requirements.txt index 4b610f6..41df922 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ +pyyaml torch>=1.13.1 -transformers>=4.43.0,<=4.45.0 -peft>=0.10.0,<=0.13.2 -loguru -trl>=0.9.3,<=0.9.6 +transformers==5.3.0 +peft==0.18.1 +loguru>=0.6.0 +huggingface-hub==1.5.0 +trl>=0.20.0,<=0.29.1 bitsandbytes -pyyaml diff --git a/utils/constants.py b/utils/constants.py index 572c9ad..bc0d24b 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -9,19 +9,37 @@ } model2template = { - "Qwen/Qwen2.5-0.5B-Instruct": qwen_template, - "Qwen/Qwen2.5-1.5B-Instruct": qwen_template, - "Qwen/Qwen2.5-7B-Instruct": qwen_template, + "Qwen/Qwen3.5-0.8B": qwen_template, + "Qwen/Qwen3.5-0.8B-Base": qwen_template, + "Qwen/Qwen3.5-2B": qwen_template, + "Qwen/Qwen3.5-2B-Base": qwen_template, + "Qwen/Qwen3.5-4B": qwen_template, + "Qwen/Qwen3.5-4B-Base": qwen_template, + "Qwen/Qwen3.5-9B": qwen_template, + "Qwen/Qwen3.5-9B-Base": qwen_template, + "Qwen/Qwen3.5-27B": qwen_template, } model2size = { - "Qwen/Qwen2.5-0.5B-Instruct": 494_000_000, - "Qwen/Qwen2.5-1.5B-Instruct": 1_540_000_000, - "Qwen/Qwen2.5-7B-Instruct": 7_620_000_000, + "Qwen/Qwen3.5-0.8B": 853_000_000, + "Qwen/Qwen3.5-0.8B-Base": 853_000_000, + "Qwen/Qwen3.5-2B": 2_213_000_000, + "Qwen/Qwen3.5-2B-Base": 2_213_000_000, + "Qwen/Qwen3.5-4B": 4_539_000_000, + "Qwen/Qwen3.5-4B-Base": 4_539_000_000, + "Qwen/Qwen3.5-9B": 8_392_000_000, + "Qwen/Qwen3.5-9B-Base": 8_392_000_000, + "Qwen/Qwen3.5-27B": 26_085_000_000, } model2base_model = { - "Qwen/Qwen2.5-0.5B-Instruct": "qwen1.5", - "Qwen/Qwen2.5-1.5B-Instruct": "qwen1.5", - "Qwen/Qwen2.5-7B-Instruct": "qwen1.5", + "Qwen/Qwen3.5-0.8B": "qwen3.5", + "Qwen/Qwen3.5-0.8B-Base": "qwen3.5", + "Qwen/Qwen3.5-2B": "qwen3.5", + "Qwen/Qwen3.5-2B-Base": "qwen3.5", + "Qwen/Qwen3.5-4B": "qwen3.5", + "Qwen/Qwen3.5-4B-Base": "qwen3.5", + "Qwen/Qwen3.5-9B": "qwen3.5", + "Qwen/Qwen3.5-9B-Base": "qwen3.5", + "Qwen/Qwen3.5-27B": "qwen3.5", }