fix(yapf): align weird new lines break [generated] [skip ci] (#284)

fix(yapf): align weird new lines break

Signed-off-by: aarnphm-ec2-dev <29749331+aarnphm@users.noreply.github.com>
This commit is contained in:
Aaron Pham
2023-09-01 05:34:22 -04:00
committed by GitHub
parent 3e45530abd
commit b7af7765d4
91 changed files with 811 additions and 1678 deletions

View File

@@ -61,16 +61,13 @@ model, tokenizer = openllm.AutoLLM.for_model("falcon",
quantize="int4",
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
ensure_available=True).prepare_for_training(adapter_type="lora",
lora_alpha=16,
lora_dropout=0.1,
r=16,
bias="none",
target_modules=[
"query_key_value", "dense",
"dense_h_to_4h",
"dense_4h_to_h"
])
ensure_available=True).prepare_for_training(
adapter_type="lora",
lora_alpha=16,
lora_dropout=0.1,
r=16,
bias="none",
target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"])
model.config.use_cache = False
tokenizer.pad_token = tokenizer.eos_token
@@ -81,9 +78,8 @@ trainer = SFTTrainer(model=model,
dataset_text_field="text",
max_seq_length=model_args.max_sequence_length,
tokenizer=tokenizer,
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir),
**dataclasses.asdict(training_args)),
)
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
)
# upcast layernorm in float32 for more stable training
for name, module in trainer.model.named_modules():

View File

@@ -78,10 +78,7 @@ def chunk(sample, chunk_length=2048):
batch_chunk_length = (batch_total_length // chunk_length) * chunk_length
# Split by chunks of max_len.
result = {
k: [t[i:i + chunk_length] for i in range(0, batch_chunk_length, chunk_length)]
for k, t in concatenated_examples.items()
}
result = {k: [t[i:i + chunk_length] for i in range(0, batch_chunk_length, chunk_length)] for k, t in concatenated_examples.items()}
# add remainder to global variable for next batch
remainder = {k: concatenated_examples[k][batch_chunk_length:] for k in concatenated_examples.keys()}
# prepare labels
@@ -101,8 +98,7 @@ def prepare_datasets(tokenizer, dataset_name=DATASET_NAME):
print("Sample from dolly-v2 ds:", dataset[randint(0, len(dataset))]["text"])
# tokenize and chunk dataset
lm_dataset = dataset.map(lambda sample: tokenizer(sample["text"]),
batched=True,
lm_dataset = dataset.map(lambda sample: tokenizer(sample["text"]), batched=True,
remove_columns=list(dataset.features)).map(partial(chunk, chunk_length=2048), batched=True)
# Print total number of samples
@@ -113,7 +109,7 @@ def prepare_for_int4_training(model_id: str,
model_version: str | None = None,
gradient_checkpointing: bool = True,
bf16: bool = True,
) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
) -> tuple[peft.PeftModel, transformers.LlamaTokenizerFast]:
from peft.tuners.lora import LoraLayer
llm = openllm.AutoLLM.for_model("llama",
@@ -124,16 +120,14 @@ def prepare_for_int4_training(model_id: str,
bnb_4bit_compute_dtype=torch.bfloat16,
use_cache=not gradient_checkpointing,
device_map="auto",
)
)
print("Model summary:", llm.model)
# get lora target modules
modules = find_all_linear_names(llm.model)
print(f"Found {len(modules)} modules to quantize: {modules}")
model, tokenizer = llm.prepare_for_training(adapter_type="lora",
use_gradient_checkpointing=gradient_checkpointing,
target_modules=modules)
model, tokenizer = llm.prepare_for_training(adapter_type="lora", use_gradient_checkpointing=gradient_checkpointing, target_modules=modules)
# pre-process the model by upcasting the layer norms in float 32 for
for name, module in model.named_modules():
@@ -189,7 +183,7 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
model, tokenizer = prepare_for_int4_training(model_args.model_id,
gradient_checkpointing=training_args.gradient_checkpointing,
bf16=training_args.bf16,
)
)
datasets = prepare_datasets(tokenizer)
trainer = transformers.Trainer(model=model,
@@ -197,7 +191,7 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
**dataclasses.asdict(training_args)),
train_dataset=datasets,
data_collator=transformers.default_data_collator,
)
)
trainer.train()
@@ -212,14 +206,10 @@ def train_loop(model_args: ModelArguments, training_args: TrainingArguments):
del model, trainer
torch.cuda.empty_cache()
model = peft.AutoPeftModelForCausalLM.from_pretrained(training_args.output_dir,
low_cpu_mem_usage=True,
torch_dtype=torch.float16)
model = peft.AutoPeftModelForCausalLM.from_pretrained(training_args.output_dir, low_cpu_mem_usage=True, torch_dtype=torch.float16)
# merge lora with base weights and save
model = model.merge_and_unload()
model.save_pretrained(os.path.join(os.getcwd(), "outputs", "merged_llama_lora"),
safe_serialization=True,
max_shard_size="2GB")
model.save_pretrained(os.path.join(os.getcwd(), "outputs", "merged_llama_lora"), safe_serialization=True, max_shard_size="2GB")
else:
trainer.model.save_pretrained(os.path.join(training_args.output_dir, "lora"))

View File

@@ -26,14 +26,12 @@ if t.TYPE_CHECKING:
DEFAULT_MODEL_ID = "facebook/opt-6.7b"
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any,
training_args: TrainingArguments):
def load_trainer(model: PeftModel, tokenizer: transformers.GPT2TokenizerFast, dataset_dict: t.Any, training_args: TrainingArguments):
return transformers.Trainer(model=model,
train_dataset=dataset_dict["train"],
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir),
**dataclasses.asdict(training_args)),
args=dataclasses.replace(transformers.TrainingArguments(training_args.output_dir), **dataclasses.asdict(training_args)),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
)
@dataclasses.dataclass
class TrainingArguments:
@@ -58,16 +56,13 @@ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
else:
model_args, training_args = t.cast(t.Tuple[ModelArguments, TrainingArguments], parser.parse_args_into_dataclasses())
model, tokenizer = openllm.AutoLLM.for_model("opt",
model_id=model_args.model_id,
quantize="int8",
ensure_available=True).prepare_for_training(
adapter_type="lora",
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none")
model, tokenizer = openllm.AutoLLM.for_model("opt", model_id=model_args.model_id, quantize="int8",
ensure_available=True).prepare_for_training(adapter_type="lora",
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none")
# ft on english_quotes
data = load_dataset("Abirate/english_quotes")