mirror of
https://github.com/bentoml/OpenLLM.git
synced 2026-05-19 05:57:39 -04:00
fix(yapf): align weird new lines break [generated] [skip ci] (#284)
fix(yapf): align weird new lines break Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
@@ -61,16 +61,13 @@ model, tokenizer = openllm.AutoLLM.for_model("falcon",
|
||||
quantize="int4",
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.float16,
|
||||
ensure_available=True).prepare_for_training(adapter_type="lora",
|
||||
lora_alpha=16,
|
||||
lora_dropout=0.1,
|
||||
r=16,
|
||||
bias="none",
|
||||
target_modules=[
|
||||
"query_key_value", "dense",
|
||||
"dense_h_to_4h",
|
||||
"dense_4h_to_h"
|
||||
])
|
||||
ensure_available=True).prepare_for_training(
|
||||
adapter_type="lora",
|
||||
lora_alpha=16,
|
||||
lora_dropout=0.1,
|
||||
r=16,
|
||||
bias="none",
|
||||
target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"])
|
||||
model.config.use_cache = False
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
@@ -81,9 +78,8 @@ trainer = SFTTrainer(model=model,
|
||||
dataset_text_field="text",
|
||||
max_seq_length=model_args.max_sequence_length,
|
||||
tokenizer=tokenizer,
|
||||
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir),
|
||||
**dataclasses.asdict(training_args)),
|
||||
)
|
||||
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
|
||||
)
|
||||
|
||||
# upcast layernorm in float32 for more stable training
|
||||
for name, module in trainer.model.named_modules():
|
||||
|
||||
@@ -78,10 +78,7 @@ def chunk(sample, chunk_length=2048):
|
||||
batch_chunk_length = (batch_total_length // chunk_length) * chunk_length
|
||||
|
||||
# Split by chunks of max_len.
|
||||
result = {
|
||||
k: [t[i:i + chunk_length] for i in range(0, batch_chunk_length, chunk_length)]
|
||||
for k, t in concatenated_examples.items()
|
||||
}
|
||||
result = {k: [t[i:i + chunk_length] for i in range(0, batch_chunk_length, chunk_length)] for k, t in concatenated_examples.items()}
|
||||
# add remainder to global variable for next batch
|
||||
remainder = {k: concatenated_examples[k][batch_chunk_length:] for k in concatenated_examples.keys()}
|
||||
# prepare labels
|
||||
@@ -101,8 +98,7 @@ def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
|
||||
print("Sample from dolly-v2 ds:", dataset[randint(0, len(dataset))]["text"])
|
||||
|
||||
# tokenize and chunk dataset
|
||||
lm_dataset = dataset.map(lambda sample: tokenizer(sample["text"]),
|
||||
batched=True,
|
||||
lm_dataset = dataset.map(lambda sample: tokenizer(sample["text"]), batched=True,
|
||||
remove_columns=list(dataset.features)).map(partial(chunk, chunk_length=2048), batched=True)
|
||||
|
||||
# Print total number of samples
|
||||
@@ -113,7 +109,7 @@ def prepare_for_int4_training(model_id: str,
|
||||
model_version: str | None = None,
|
||||
gradient_checkpointing: bool = True,
|
||||
bf16: bool = True,
|
||||
) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
|
||||
) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
|
||||
from peft.tuners.lora import LoraLayer
|
||||
|
||||
llm = openllm.AutoLLM.for_model("llama",
|
||||
@@ -124,16 +120,14 @@ def prepare_for_int4_training(model_id: str,
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
use_cache=not gradient_checkpointing,
|
||||
device_map="auto",
|
||||
)
|
||||
)
|
||||
print("Model summary:", llm.model)
|
||||
|
||||
# get lora target modules
|
||||
modules = find_all_linear_names(llm.model)
|
||||
print(f"Found {len(modules)} modules to quantize: {modules}")
|
||||
|
||||
model, tokenizer = llm.prepare_for_training(adapter_type="lora",
|
||||
use_gradient_checkpointing=gradient_checkpointing,
|
||||
target_modules=modules)
|
||||
model, tokenizer = llm.prepare_for_training(adapter_type="lora", use_gradient_checkpointing=gradient_checkpointing, target_modules=modules)
|
||||
|
||||
# pre-process the model by upcasting the layer norms in float 32 for
|
||||
for name, module in model.named_modules():
|
||||
@@ -189,7 +183,7 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
|
||||
model, tokenizer = prepare_for_int4_training(model_args.model_id,
|
||||
gradient_checkpointing=training_args.gradient_checkpointing,
|
||||
bf16=training_args.bf16,
|
||||
)
|
||||
)
|
||||
datasets = prepare_datasets(tokenizer)
|
||||
|
||||
trainer = transformers.Trainer(model=model,
|
||||
@@ -197,7 +191,7 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
|
||||
**dataclasses.asdict(training_args)),
|
||||
train_dataset=datasets,
|
||||
data_collator=transformers.default_data_collator,
|
||||
)
|
||||
)
|
||||
|
||||
trainer.train()
|
||||
|
||||
@@ -212,14 +206,10 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
|
||||
del model, trainer
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
model = peft.AutoPeftModelForCausalLM.from_pretrained(training_args.output_dir,
|
||||
low_cpu_mem_usage=True,
|
||||
torch_dtype=torch.float16)
|
||||
model = peft.AutoPeftModelForCausalLM.from_pretrained(training_args.output_dir, low_cpu_mem_usage=True, torch_dtype=torch.float16)
|
||||
# merge lora with base weights and save
|
||||
model = model.merge_and_unload()
|
||||
model.save_pretrained(os.path.join(os.getcwd(), "outputs", "merged_llama_lora"),
|
||||
safe_serialization=True,
|
||||
max_shard_size="2GB")
|
||||
model.save_pretrained(os.path.join(os.getcwd(), "outputs", "merged_llama_lora"), safe_serialization=True, max_shard_size="2GB")
|
||||
else:
|
||||
trainer.model.save_pretrained(os.path.join(training_args.output_dir, "lora"))
|
||||
|
||||
|
||||
@@ -26,14 +26,12 @@ if t.TYPE_CHECKING:
|
||||
|
||||
DEFAULT_MODEL_ID = "facebook/opt-6.7b"
|
||||
|
||||
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any,
|
||||
training_args: TrainingArguments):
|
||||
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):
|
||||
return transformers.Trainer(model=model,
|
||||
train_dataset=dataset_dict["train"],
|
||||
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir),
|
||||
**dataclasses.asdict(training_args)),
|
||||
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
|
||||
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
|
||||
)
|
||||
)
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TrainingArguments:
|
||||
@@ -58,16 +56,13 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
||||
else:
|
||||
model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())
|
||||
|
||||
model, tokenizer = openllm.AutoLLM.for_model("opt",
|
||||
model_id=model_args.model_id,
|
||||
quantize="int8",
|
||||
ensure_available=True).prepare_for_training(
|
||||
adapter_type="lora",
|
||||
r=16,
|
||||
lora_alpha=32,
|
||||
target_modules=["q_proj", "v_proj"],
|
||||
lora_dropout=0.05,
|
||||
bias="none")
|
||||
model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8",
|
||||
ensure_available=True).prepare_for_training(adapter_type="lora",
|
||||
r=16,
|
||||
lora_alpha=32,
|
||||
target_modules=["q_proj", "v_proj"],
|
||||
lora_dropout=0.05,
|
||||
bias="none")
|
||||
|
||||
# ft on english_quotes
|
||||
data = load_dataset("Abirate/english_quotes")
|
||||
|
||||
Reference in New Issue
Block a user