Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #460

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open

Dev #460

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
.DS_Store
__pycache__
.ipynb_checkpoints
*.json
*.md
*.bin
*.model
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.

import fire
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer


def main(push_to_hub: bool = True):
base_model = "codellama/CodeLlama-13b-Instruct-hf"
peft_model = "/home/ubuntu/llama-recipes-fork/llama-recipes/src/llama_recipes/models/codellama-8bit-json-mkt-research-24-03-07_epoch_8"
tokenizer_path = "/home/ubuntu/llama-recipes-fork/llama-recipes/src/llama_recipes/models/codellama-8bit-json-mkt-research-24-03-07_tokenizer"
output_dir = "HelixAI/codellama-8bit-json-mkt-research-24-03-07"

model = AutoModelForCausalLM.from_pretrained(
base_model,
load_in_8bit=False,
torch_dtype=torch.float16,
device_map="auto",
offload_folder="tmp",
)

tokenizer = AutoTokenizer.from_pretrained(
tokenizer_path
)

model = PeftModel.from_pretrained(
model,
peft_model,
torch_dtype=torch.float16,
device_map="auto",
offload_folder="tmp",
)

model = model.merge_and_unload()

if push_to_hub:
print(f"Saving to hub ...")
model.push_to_hub(f"{output_dir}", use_temp_dir=True, create_pr=1)
tokenizer.push_to_hub(f"{output_dir}", use_temp_dir=True, create_pr=1)
else:
model.save_pretrained(f"{output_dir}")
tokenizer.save_pretrained(f"{output_dir}")
print(f"Model saved to {output_dir}")


if __name__ == "__main__":
fire.Fire(main)
21 changes: 21 additions & 0 deletions inferencing/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
LABEL description="Contextual Conversation LLM for HelixAI Assistant"
WORKDIR /app
RUN apt update
RUN apt install python-is-python3 -y
COPY pyproject.toml pyproject.toml
RUN apt install vim -y
RUN apt install python3-distutils -y
RUN apt install python3-pip -y
RUN pip install poetry
RUN touch README.md
RUN poetry install
RUN poetry run huggingface-cli login --token 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
RUN mkdir /models
ENV LD_LIBRARY_PATH=/usr/local/cuda-12.2/targets/x86_64-linux/lib:$LD_LIBRARY_PATH
RUN cp /usr/local/cuda-12.2/targets/x86_64-linux/lib/libcublas.so.12 /usr/local/cuda-12.2/targets/x86_64-linux/lib/libcublas.so.11
ENV llama2_model_name=ct2_codellama-8bit-json-mkt-research-24-03-07
RUN poetry run huggingface-cli download HelixAI/$llama2_model_name --local-dir models/merged-codellama-ct2
COPY app.py app.py
COPY hl_mr_prompt.yaml hl_mr_prompt.yaml
CMD poetry run gunicorn app:app --workers 1 --worker-class uvicorn.workers.UvicornWorker --timeout=120 --bind 0.0.0.0:8082
79 changes: 79 additions & 0 deletions inferencing/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

import os
from transformers import AutoTokenizer
from urllib.parse import unquote
import yaml
from pydantic import BaseModel

CUDA_VISIBLE_DEVICES = [0]
MAX_BATCH_SIZE = 30
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in CUDA_VISIBLE_DEVICES])

app = FastAPI()

import time
import datasets
from datasets import load_dataset
import time
import ctranslate2
import sentencepiece as spm
import torch
import random

model_dir="models/merged-codellama-ct2"
generator = ctranslate2.Generator(model_dir, device="cuda")
sp = spm.SentencePieceProcessor(os.path.join(model_dir, "tokenizer.model"))

max_tokens=128
intent_names =[]
continuities=[]
direct_commands=[]
model_raw_outputs = []

@app.get("/healthcheck")
async def health_check():
return {"message": "Ok", "status": "Green"}

class UserData(BaseModel):
inputs: str
parameters: dict

@app.post("/generate")
async def generate(user_data: UserData):
query = user_data.inputs
parameters = user_data.parameters

with open(os.path.dirname(__file__) + "/hl_mr_prompt.yaml", "r") as file:
yaml_data = yaml.safe_load(file)
prompt_template = yaml_data["prompt"].strip()
# self.sys_prompt = yaml_data["prompt"].strip()
B_INST, E_INST = "[INST]", "[/INST]"
bos_token = "<s>"
prompt = f"{bos_token}{B_INST} {prompt_template.format(user_text=query).strip()} \
{E_INST}"

prompts=[prompt]

prompt_tokens = sp.encode(prompts, out_type=str)
gen_results = generator.generate_batch(
prompt_tokens,
max_batch_size=MAX_BATCH_SIZE,
sampling_temperature=parameters['temperature'],
# sampling_topk=1,
# sampling_topp=0.5,
max_length=parameters['max_new_tokens'],
include_prompt_in_result=False
)
for gen_result in gen_results:
output_ids = []
step_results = gen_result.sequences_ids[0]
for step_result in step_results:
output_ids.append(step_result)

result = sp.decode(output_ids)
model_raw_outputs.append(result)

return result
70 changes: 70 additions & 0 deletions inferencing/hl_mr_prompt.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
prompt: |
<<SYS>>Your job is to generate JSON to answer questions in the private markets space.
The JSON should include the python function to call and params to pass to the function. The functions execute the params on dataframes to provide results.
The template of the JSON:
{{
"function": "<function_name>",
"params": {{
"filter_conditions": [],
"sort_conditions": [],
"aggregate_conditions": [],
"time": {{}}
}}
}}
Available functions:
- compare_fund_performance
- get_focus_by_attribute
- get_focus_by_performance
- get_fund_info
- get_funds_by_attribute
- get_funds_by_family_performance
- get_funds_by_manager_performance
- get_funds_by_performance
- get_funds_with_upcoming_closes
- get_geographies_by_performance
- get_managers_by_attribute
- get_managers_by_performance
- get_managers_by_status_performance
- get_previous_vintages
- get_styles_by_attribute
- get_styles_by_performance
- get_unique_investment_focuses
- get_vintages_by_performance
- miscellaneous

Description of columns used in conditions:
manager_name: str
style: str # Investment strategy or asset class. Unique style values possible= ['Buyout', 'Credit', 'Infrastructure', 'Real Estate', 'Venture Capital', 'Fund of Funds', 'Secondaries', 'Growth Equity', 'Co-Investment', 'Natural Resources']
focus: str # Primary investment area or industry of focus. Unique focus values possible = ['Small Cap', 'Senior Debt', 'Value Add', 'Balanced', 'Buyout', \
'Distressed', 'Mid Cap', 'Seed/Early Stage', 'Secondaries', \
'Special Situations', 'Opportunistic', 'Venture Capital', \
'Mezzanine', 'Core', 'Multi Manager', 'Single Manager', \
'Large Cap', 'Late Stage', 'Turnaround', 'Real Estate', \
'Multi Focus', 'Fund Interests', 'Energy', 'Expansion Stage', \
'Lending & Leasing', 'Credit', 'Direct Interests', 'Agriculture', \
'Growth Equity', 'Timber', 'Mining', 'Royalty', 'Infrastructure', \
'Natural Resources']
geography: str # regions or countries to invest
gics: str # gics sector of the fund
fund_status: str # Fundraising status of the fund. Unique values possible= ['Fundraising', 'Out of Market', 'Projected']
vintage_year: str # Year the fund was established.
size: str # Size of the fund
net_irr: float # Performance of the fund
irr_local: float # Performance of investment style
targeted_tvpi: float
targeted_net_irr: float
management_fee: float # Fees of the fund in percentage
carried_interest: float # carried interest in percentage
gp_commitment: float # GP commitment in percentage
investment_period: int # Investment duration of the fund
irr_local: float # Performance of the fund within given investment style
formal_esg_policy: str # Whether the fund has a formal ESG policy. 'Yes' or 'No'
impact_focus: str #'Yes' or 'No'
net_tvpi: float
targeted_irr_local: float
tvpi_local: float

Note: You may only return JSON statements.<</SYS>>
Human: {user_text}
---
JSON:
79 changes: 79 additions & 0 deletions inferencing/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
[tool.poetry]
name = "ai-chatbot"
version = "0.1.0"
description = ""
authors = ["Your Name <you@example.com>"]
readme = "README.md"
packages = []

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
fastapi = "*"
uvicorn = "*"
sqlalchemy = "*"
python-multipart = "*"
passlib = "*"
bcrypt = "*"
pyjwt = "*"
alembic = "*"
sentry-sdk = {extras = ["fastapi"], version = "*"}
pandas = "*"
tqdm = "*"
spacy = "*"
numpy = "*"
openai = "0.28.1"
pandasql = "*"
openpyxl = "*"
python-decouple = "^3.8"
boto3 = "^1.26.153"
redis = {extras = ["hiredis"], version = "^4.5.5"}
grpcio-tools = "^1.56.0"
pytest = "^7.4.0"
langchain = "^0.0.312"
prompt-toolkit = "^3.0.39"
pinecone-client = "^2.2.4"
httpx = "^0.25.0"
pytest-html = "^4.0.2"
fakeredis = "^2.20.0"
py = "^1.11.0"
tiktoken = "^0.5.1"
datasets = "*"
sentence-transformers = "2.3.1"
accelerate = "*"
scikit-learn="1.2.2"
pydantic = {extras = ["dotenv"], version = "1.10.13"}
rapidfuzz = "^3.4.0"
scipy = "1.10.0"
torch = "2.0.0"
pypdf = "^3.17.0"
motor = "^3.3.2"
cohere = "^4.47"
sentencepiece = "^0.1.97"
tokenizers = "^0.15.0"
huggingface-hub = "^0.19.3"
transformers = "^4.37.1"
ctranslate2 = "*"
flask = "*"
gunicorn = "*"

[tool.poetry.group.quality.dependencies]
black = "23.7.0"
isort = "5.12.0"
mypy = "1.5.1"
ruff = "0.0.285"

[tool.ruff]
# Allow lines to be as long as 120 characters.
line-length = 120

[tool.ruff.extend-per-file-ignores]
"chatbot/chat/chat_models/chain_templates/*" = ["E501"]
"chatbot/chat/chat_models/chains/*" = ["E501"]
"chatbot/chat/chat_models/llm/*.yaml" = ["E501"]
"chatbot/chat/intents/classifier/classifier_base_prompt.py" = ["E501"]
"chatbot/chat/intents/descriptive/utils.py" = ["E501"]
"chatbot/chat/intents/quantitative/tests/*" = ["E501"]

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"