Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

llama-3 bug fixes #429

Merged
merged 61 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
64a7d27
Fix prompt
danielhanchen Apr 19, 2024
495f1da
Merge branch 'main' into nightly
danielhanchen Apr 20, 2024
656ab22
Update chat_templates.py
danielhanchen Apr 20, 2024
c4f2f54
fix_untrained_tokens
danielhanchen Apr 20, 2024
87b4bb9
Update llama.py
danielhanchen Apr 21, 2024
abd192f
add tokens
danielhanchen Apr 21, 2024
868351b
Update _utils.py
danielhanchen Apr 21, 2024
f29a3e7
Update tokenizer_utils.py
danielhanchen Apr 21, 2024
2573474
Update llama.py
danielhanchen Apr 21, 2024
bfb32a3
Update llama.py
danielhanchen Apr 21, 2024
40a6d00
Update llama.py
danielhanchen Apr 21, 2024
140a0b0
Update llama.py
danielhanchen Apr 21, 2024
88435a8
pad_token
danielhanchen Apr 21, 2024
24790e2
Update chat_templates.py
danielhanchen Apr 21, 2024
1464f7d
Update chat_templates.py
danielhanchen Apr 21, 2024
df069c5
tokenizer
danielhanchen Apr 21, 2024
eb00fb7
Update save.py
danielhanchen Apr 21, 2024
805f890
Update chat_templates.py
danielhanchen Apr 21, 2024
80be6ff
Update chat_templates.py
danielhanchen Apr 21, 2024
92723ba
Merge branch 'main' into nightly
danielhanchen Apr 22, 2024
2e62a69
patch tokenizer padding
danielhanchen Apr 22, 2024
b0678d6
Update tokenizer_utils.py
danielhanchen Apr 22, 2024
f85ef9c
Update save.py
danielhanchen Apr 23, 2024
d2f10a0
Fix: loading models with resized vocabulary (#377)
oKatanaaa Apr 24, 2024
f5fa654
GGUF fix
danielhanchen Apr 28, 2024
8325e05
Readme (#390)
danielhanchen Apr 28, 2024
13b1ae6
Update README.md
danielhanchen Apr 28, 2024
5069a7d
Delete .gitignore
danielhanchen Apr 28, 2024
1ba3379
Merge branch 'main' into nightly
danielhanchen Apr 29, 2024
7c9c3f5
Phi-3
danielhanchen Apr 29, 2024
7b696ee
Update README.md
danielhanchen Apr 29, 2024
48334f7
Update README.md
danielhanchen Apr 29, 2024
3665c0b
Update README.md
danielhanchen Apr 29, 2024
0f9e073
Update README.md
danielhanchen Apr 29, 2024
eb135d8
Update README.md
danielhanchen Apr 29, 2024
56e2674
Update README.md
danielhanchen Apr 29, 2024
b091a0b
Update README.md
danielhanchen Apr 29, 2024
18533ab
Update README.md
danielhanchen Apr 29, 2024
3e84338
Update README.md
danielhanchen Apr 29, 2024
d8feef5
Update README.md
danielhanchen Apr 29, 2024
392c034
Update README.md
danielhanchen Apr 29, 2024
df6fb52
Update README.md
danielhanchen Apr 29, 2024
99ed47a
Update README.md
danielhanchen Apr 29, 2024
7fae556
Update README.md
danielhanchen Apr 29, 2024
000d050
Update README.md
danielhanchen Apr 29, 2024
27f88f0
Update README.md
danielhanchen Apr 29, 2024
affbba1
Update README.md
danielhanchen Apr 29, 2024
14f104a
Update README.md
danielhanchen Apr 29, 2024
e040d18
Fix reserved tokens
danielhanchen May 4, 2024
fb10081
Merge branch 'main' into nightly
danielhanchen May 4, 2024
f53944a
Update save.py
danielhanchen May 4, 2024
70b41d1
Update tokenizer_utils.py
danielhanchen May 4, 2024
1b1b931
Update tokenizer_utils.py
danielhanchen May 4, 2024
61edc3c
Update tokenizer_utils.py
danielhanchen May 4, 2024
73df3ee
Update tokenizer_utils.py
danielhanchen May 4, 2024
15d7898
Update tokenizer_utils.py
danielhanchen May 4, 2024
84418a9
Merge branch 'main' into nightly
danielhanchen May 5, 2024
76ed0a4
Update chat_templates.py
danielhanchen May 6, 2024
dfec8dd
Update save.py
danielhanchen May 7, 2024
73af5d1
Update _utils.py
danielhanchen May 7, 2024
9c7d9a7
Update chat_templates.py
danielhanchen May 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 22 additions & 0 deletions unsloth/chat_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,20 @@
CHAT_TEMPLATES["llama-3"] = (llama3_template, llama3_template_eos_token,)


# Phi-3
phi3_template = \
"{{ bos_token }}"\
"{% for message in messages %}"\
"{% if (message['role'] == 'user') %}"\
"{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}"\
"{% elif (message['role'] == 'assistant') %}"\
"{{message['content'] + '<|end|>' + '\n'}}"\
"{% endif %}"\
"{% endfor %}"
phi3_template_eos_token = "<|end|>"
CHAT_TEMPLATES["phi-3"] = (phi3_template, phi3_template_eos_token,)


def get_chat_template(
tokenizer,
chat_template = "chatml",
Expand Down Expand Up @@ -595,4 +609,12 @@ def test_chat_templates():
correct_tokenizer.chat_template = template
our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
assert(correct_prompt == our_prompt)

# Phi-3
template = phi3_template
correct_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
correct_tokenizer.chat_template = template
our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
assert(correct_prompt == our_prompt)
pass
64 changes: 50 additions & 14 deletions unsloth/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,24 +144,60 @@ def make_inputs_require_grad(module, input, output):


def patch_tokenizer(model, tokenizer):
"""
Phi3's pad_token isn't set. We set it to <|placeholder...
Llama-3 is <|reserved...
Llama-2 is <unk>
Check if pad_token is not the same as eos_token otherwise the loss will ignore it!!
Fixes https://github.com/unslothai/unsloth/issues/5
"""
possible_reserved_tokens = ("<|reserved", "<|placeholder",)

if model is not None:
model.config.update({"unsloth_version" : __version__})
if not hasattr(tokenizer, "pad_token") or tokenizer.pad_token is None:
# Fixes https://github.com/unslothai/unsloth/issues/5
if hasattr(tokenizer, "unk_token") and tokenizer.unk_token is not None:
tokenizer.add_special_tokens({"pad_token" : tokenizer.unk_token})
tokenizer.pad_token = tokenizer.unk_token
else:
name = model.config._name_or_path if model is not None else "Model"
logger.warning_once(
f"{name} does not have a padding or unknown token!\n"\
f"Will use the EOS token of id {tokenizer.eos_token_id} as padding."

bad_pad_token = False
if hasattr(tokenizer, "pad_token") and tokenizer.pad_token is not None:
# Check if pad_token is not the same as eos_token otherwise the loss will ignore it!!
bad_pad_token = tokenizer.eos_token == tokenizer.pad_token
elif hasattr(tokenizer, "pad_token") and tokenizer.pad_token is None:
bad_pad_token = True
else:
bad_pad_token = False
pass

if bad_pad_token:
# Find a better pad token
added_tokens = [str(x) for x in tokenizer.added_tokens_decoder.values()]
possible_pad_token = None
for added_token in added_tokens[::-1]:
if added_token.startswith(possible_reserved_tokens):
possible_pad_token = added_token
break
pass
pass
if possible_pad_token is None:
# Try unk_token
possible_pad_token = tokenizer.unk_token
pass
if possible_pad_token is None:
# Failure!!
raise RuntimeError(
"Unsloth: Tokenizer's pad_token cannot be = eos_token, and we couldn't find a\n"\
"replacement of either <|reserved... or <|placeholder..."
)
assert(hasattr(tokenizer, "eos_token"))
tokenizer.add_special_tokens({"pad_token" : tokenizer.eos_token})
tokenizer.pad_token = tokenizer.eos_token
pass

name = model.config._name_or_path if model is not None else "Model"
logger.warning_once(
f"{name} does not have a padding token! Will use pad_token = {possible_pad_token}."
)

# Edit pad_token
tokenizer.add_special_tokens({"pad_token" : possible_pad_token})
tokenizer.pad_token = possible_pad_token
if model is not None:
config = model.config.update({"pad_token_id" : tokenizer.eos_token_id})
config = model.config.update({"pad_token_id" : tokenizer.pad_token_id})
pass
return model, tokenizer
pass
Expand Down
35 changes: 31 additions & 4 deletions unsloth/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import Optional, Callable, Union, List
import torch
import os
import shutil
import pickle
import gc
from transformers.models.llama.modeling_llama import logger
Expand Down Expand Up @@ -87,6 +88,24 @@ def print_quantization_methods():
pass


def check_if_sentencepiece_model(model, temporary_location = "_unsloth_sentencepiece_temp"):
if not hasattr(model, "_saved_temp_tokenizer"): return False

temp_tokenizer = model._saved_temp_tokenizer
sentencepiece_model = False
file_location = f"{temporary_location}/{temp_tokenizer.name_or_path}"
if not os.path.exists(file_location):
os.makedirs(file_location)
pass
temp_tokenizer.save_pretrained(file_location)
if os.path.isfile(f"{file_location}/tokenizer.model"):
sentencepiece_model = True
pass
shutil.rmtree(file_location)
return sentencepiece_model
pass


def _free_cached_model(model):
from huggingface_hub import scan_cache_dir
cached_repos = list(scan_cache_dir().repos)
Expand Down Expand Up @@ -840,6 +859,7 @@ def _fix_gemma_gguf():

def save_to_gguf(
model_type : str,
is_sentencepiece : bool = False,
model_directory : str = "unsloth_finetuned_model",
quantization_method : str = "fast_quantized",
first_conversion : str = "f16",
Expand All @@ -856,7 +876,8 @@ def save_to_gguf(

# Careful convert.py is only for Llama / Mistral based archs
use_fast_convert = False
if model_type == "llama": use_fast_convert = True
if not is_sentencepiece: use_fast_convert = False # Llama-3
elif model_type == "llama": use_fast_convert = True
elif model_type == "mistral": use_fast_convert = True
pass
logger.warning_once(f"Unsloth: Converting {model_type} model. Can use fast conversion = {use_fast_convert}.")
Expand Down Expand Up @@ -951,7 +972,7 @@ def save_to_gguf(
f"--outtype {first_conversion} --concurrency {n_cpus}"
else:
# Need to fix convert-hf-to-gguf.py for some models!
_fix_gemma_gguf()
# _fix_gemma_gguf()

command = f"python llama.cpp/convert-hf-to-gguf.py {model_directory} "\
f"--outfile {final_location} "\
Expand Down Expand Up @@ -1353,7 +1374,10 @@ def unsloth_save_pretrained_gguf(
gc.collect()

model_type = self.config.model_type
file_location = save_to_gguf(model_type, new_save_directory, quantization_method, first_conversion, makefile)
is_sentencepiece_model = check_if_sentencepiece_model(self)
file_location = save_to_gguf(model_type, is_sentencepiece_model,
new_save_directory, quantization_method, first_conversion, makefile,
)

if push_to_hub:
print("Unsloth: Uploading GGUF to Huggingface Hub...")
Expand Down Expand Up @@ -1473,7 +1497,10 @@ def unsloth_push_to_hub_gguf(
gc.collect()

model_type = self.config.model_type
file_location = save_to_gguf(model_type, new_save_directory, quantization_method, first_conversion, makefile)
is_sentencepiece_model = check_if_sentencepiece_model(self)
file_location = save_to_gguf(model_type, is_sentencepiece_model,
new_save_directory, quantization_method, first_conversion, makefile,
)

print("Unsloth: Uploading GGUF to Huggingface Hub...")
username = upload_to_huggingface(
Expand Down