Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GGML saving option to Unsloth for easier Ollama model creation and testing. #345

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
43014fc
Add save to llama.cpp GGML to save.py.
mahiatlinux Apr 14, 2024
96b8c78
Fix conversion command and path of convert to GGML function.
mahiatlinux Apr 14, 2024
129d692
Add autosaving lora to the GGML function
mahiatlinux Apr 14, 2024
b9c6207
Create lora save function for conversion to GGML
mahiatlinux Apr 14, 2024
a83218c
Test fix #2 for saving lora
mahiatlinux Apr 14, 2024
ecb83de
Test fix #3 to save the lora adapters to convert to GGML
mahiatlinux Apr 14, 2024
534303d
Remove unwated tokenizer saving for conversion to ggml and added a fe…
mahiatlinux Apr 14, 2024
786c010
Needed tokenizer for saving, added it back, also made it more unsloth…
mahiatlinux Apr 14, 2024
8ccf015
Positional arguments didn't work out, so reverted to older version of…
mahiatlinux Apr 14, 2024
c337e97
Test fix 1 for arch
mahiatlinux Apr 14, 2024
57c9d0d
Test fix 2 new Mistral error.
mahiatlinux Apr 14, 2024
8430b32
Test fix 3
mahiatlinux Apr 14, 2024
da8bdca
Revert to old version for testing.
mahiatlinux Apr 14, 2024
522b641
Upload issue test fix 1
mahiatlinux Apr 14, 2024
e268f18
Fix 2 uploading ggml
mahiatlinux Apr 14, 2024
1f8c2c5
Positional ags added.
mahiatlinux Apr 14, 2024
a8f6bb4
Temporray remove positional args
mahiatlinux Apr 14, 2024
0102ce9
Fix upload again!!!
mahiatlinux Apr 14, 2024
b031111
Add print statements and fix link
mahiatlinux Apr 15, 2024
ba526d1
Make the calling name better
mahiatlinux Apr 15, 2024
2b276b6
Create local saving for GGML
mahiatlinux Apr 15, 2024
947a91d
Add choosing directory to save local GGML.
mahiatlinux Apr 15, 2024
6c4ab40
Fix lil variable error in the save_to_custom_dir func
mahiatlinux Apr 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
144 changes: 140 additions & 4 deletions unsloth/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,140 @@ def unsloth_push_to_hub_gguf(
print(f"Saved GGUF to https://huggingface.co/{link}")
pass

# Corrected function to save LoRA to a custom directory
def save_lora_to_custom_dir(model, tokenizer, save_directory):
# Create the custom directory if it doesn't exist
os.makedirs(save_directory, exist_ok=True)

# Call the unsloth_save_model function with the custom directory
unsloth_save_model(
model,
tokenizer,
save_directory=save_directory,
save_method="lora",
push_to_hub=False,
)

# Corrected method within the model class to convert LoRA to GGML and push to Hugging Face Hub
def unsloth_convert_lora_to_ggml_and_push_to_hub(
self,
tokenizer,
repo_id: str,
use_temp_dir: Optional[bool] = None,
commit_message: Optional[str] = "Converted LoRA to GGML with Unsloth",
private: Optional[bool] = None,
token: Union[bool, str, None] = None,
create_pr: bool = False,
revision: str = None,
commit_description: str = "Convert LoRA to GGML format using Unsloth",
temporary_location: str = "_unsloth_temporary_saved_buffers",
maximum_memory_usage: float = 0.85,
):
if not os.path.exists("llama.cpp"):
if IS_KAGGLE_ENVIRONMENT:
python_install = install_python_non_blocking(["protobuf"])
python_install.wait()
install_llama_cpp_blocking(use_cuda=False)
makefile = None
else:
git_clone = install_llama_cpp_clone_non_blocking()
python_install = install_python_non_blocking(["protobuf"])
git_clone.wait()
makefile = install_llama_cpp_make_non_blocking()
python_install.wait()
else:
makefile = None

for _ in range(3):
gc.collect()

lora_directory_push = "lora-to-ggml-push"
save_lora_to_custom_dir(self, tokenizer, lora_directory_push)

model_type = self.config.model_type
output_file = os.path.join(lora_directory_push, "ggml-adapter-model.bin")

print(f"Unsloth: Converting auto-saved LoRA adapters at {lora_directory_push} to GGML format.")
print(f"The output file will be {output_file}")

command = f"python3 llama.cpp/convert-lora-to-ggml.py {lora_directory_push} {output_file} llama"

try:
with subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, universal_newlines=True) as sp:
for line in sp.stdout:
print(line, end="", flush=True)
for line in sp.stderr:
print(line, end="", flush=True)
sp.wait()
if sp.returncode != 0:
raise subprocess.CalledProcessError(sp.returncode, command)
except subprocess.CalledProcessError as e:
print(f"Error: Conversion failed with return code {e.returncode}")
return

print(f"Unsloth: Conversion completed! Output file: {output_file}")

print("Unsloth: Uploading GGML file to Hugging Face Hub...")
username = upload_to_huggingface(
self, repo_id, token,
"GGML converted LoRA", "ggml", output_file, None, private,
)
link = f"{repo_id.lstrip('/')}"
print("Unsloth: Done.")
print(f"Converted LoRA to GGML and uploaded to https://huggingface.co/{link}")
print("\nThis GGML making function was made by Maheswar. Ping him @Maheswar on the Unsloth Discord or on HuggingFace (@mahiatlinux) if you like this!")

def unsloth_convert_lora_to_ggml_and_save_locally(
self,
save_directory: str, # Added parameter for the folder name
tokenizer,
temporary_location: str = "_unsloth_temporary_saved_buffers",
maximum_memory_usage: float = 0.85,
):
if not os.path.exists("llama.cpp"):
if IS_KAGGLE_ENVIRONMENT:
python_install = install_python_non_blocking(["protobuf"])
python_install.wait()
install_llama_cpp_blocking(use_cuda=False)
makefile = None
else:
git_clone = install_llama_cpp_clone_non_blocking()
python_install = install_python_non_blocking(["protobuf"])
git_clone.wait()
makefile = install_llama_cpp_make_non_blocking()
python_install.wait()
else:
makefile = None

for _ in range(3):
gc.collect()

# Use the provided save_directory for local saving
save_lora_to_custom_dir(self, tokenizer, save_directory)

model_type = self.config.model_type
output_file = os.path.join(save_directory, "ggml-adapter-model.bin")

print(f"Unsloth: Converting auto-saved LoRA adapters at {save_directory} to GGML format.")
print(f"The output file will be {output_file}")

command = f"python3 llama.cpp/convert-lora-to-ggml.py {save_directory} {output_file} llama"

try:
with subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, universal_newlines=True) as sp:
for line in sp.stdout:
print(line, end="", flush=True)
for line in sp.stderr:
print(line, end="", flush=True)
sp.wait()
if sp.returncode != 0:
raise subprocess.CalledProcessError(sp.returncode, command)
except subprocess.CalledProcessError as e:
print(f"Error: Conversion failed with return code {e.returncode}")
return
print("Unsloth: Done.")
print(f"Unsloth: Conversion completed! Output file: {output_file}")
print("\nThis GGML making function was made by Maheswar. Ping him @Maheswar on the Unsloth Discord or on HuggingFace (@mahiatlinux) if you like this!")

def patch_saving_functions(model):
import inspect
Expand Down Expand Up @@ -1556,10 +1690,12 @@ def patch_saving_functions(model):
# Add saving methods to top level model
if hasattr(model, "config"):
# Counteract tokenizers
model.push_to_hub_merged = types.MethodType(unsloth_push_to_hub_merged, model)
model.save_pretrained_merged = types.MethodType(unsloth_save_pretrained_merged, model)
model.push_to_hub_gguf = types.MethodType(unsloth_push_to_hub_gguf, model)
model.save_pretrained_gguf = types.MethodType(unsloth_save_pretrained_gguf, model)
model.push_to_hub_merged = types.MethodType(unsloth_push_to_hub_merged, model)
model.save_pretrained_merged = types.MethodType(unsloth_save_pretrained_merged, model)
model.push_to_hub_gguf = types.MethodType(unsloth_push_to_hub_gguf, model)
model.save_pretrained_gguf = types.MethodType(unsloth_save_pretrained_gguf, model)
model.push_to_hub_ggml = types.MethodType(unsloth_convert_lora_to_ggml_and_push_to_hub, model)
model.save_pretrained_ggml = types.MethodType(unsloth_convert_lora_to_ggml_and_save_locally, model)
pass
return model
pass