meta-llama · pranjal-jaiswal · Feb 26, 2024 · Feb 26, 2024 · Feb 27, 2024 · Mar 5, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
 .DS_Store
 __pycache__
 .ipynb_checkpoints
+*.json
+*.md
+*.bin
+*.model
diff --git a/examples/hf_text_generation_inference/merge_lora_weights_and_upload.py b/examples/hf_text_generation_inference/merge_lora_weights_and_upload.py
@@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
+
+import fire
+import torch
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+def main(push_to_hub: bool = True):
+    base_model = "codellama/CodeLlama-13b-Instruct-hf"
+    peft_model = "/home/ubuntu/llama-recipes-fork/llama-recipes/src/llama_recipes/models/codellama-8bit-json-mkt-research-24-03-07_epoch_8"
+    tokenizer_path = "/home/ubuntu/llama-recipes-fork/llama-recipes/src/llama_recipes/models/codellama-8bit-json-mkt-research-24-03-07_tokenizer"
+    output_dir = "HelixAI/codellama-8bit-json-mkt-research-24-03-07"
+
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model,
+        load_in_8bit=False,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        offload_folder="tmp", 
+    )
+
+    tokenizer = AutoTokenizer.from_pretrained(
+        tokenizer_path
+    )
+
+    model = PeftModel.from_pretrained(
+        model, 
+        peft_model, 
+        torch_dtype=torch.float16,
+        device_map="auto",
+        offload_folder="tmp",
+    )
+
+    model = model.merge_and_unload()
+
+    if push_to_hub:
+        print(f"Saving to hub ...")
+        model.push_to_hub(f"{output_dir}", use_temp_dir=True, create_pr=1)
+        tokenizer.push_to_hub(f"{output_dir}", use_temp_dir=True, create_pr=1)
+    else:
+        model.save_pretrained(f"{output_dir}")
+        tokenizer.save_pretrained(f"{output_dir}")
+        print(f"Model saved to {output_dir}")
+
+
+if __name__ == "__main__":
+    fire.Fire(main)
diff --git a/inferencing/Dockerfile b/inferencing/Dockerfile
@@ -0,0 +1,21 @@
+FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
+LABEL description="Contextual Conversation LLM for HelixAI Assistant"
+WORKDIR /app
+RUN apt update
+RUN apt install python-is-python3 -y
+COPY pyproject.toml pyproject.toml
+RUN apt install vim -y
+RUN apt install python3-distutils -y
+RUN apt install python3-pip -y
+RUN pip install poetry
+RUN touch README.md
+RUN poetry install
+RUN poetry run huggingface-cli login --token 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
+RUN mkdir /models
+ENV LD_LIBRARY_PATH=/usr/local/cuda-12.2/targets/x86_64-linux/lib:$LD_LIBRARY_PATH
+RUN cp /usr/local/cuda-12.2/targets/x86_64-linux/lib/libcublas.so.12 /usr/local/cuda-12.2/targets/x86_64-linux/lib/libcublas.so.11
+ENV llama2_model_name=ct2_codellama-8bit-json-mkt-research-24-03-07
+RUN poetry run huggingface-cli download HelixAI/$llama2_model_name --local-dir models/merged-codellama-ct2
+COPY app.py app.py
+COPY hl_mr_prompt.yaml hl_mr_prompt.yaml
+CMD poetry run gunicorn app:app --workers 1 --worker-class uvicorn.workers.UvicornWorker --timeout=120 --bind 0.0.0.0:8082
diff --git a/inferencing/app.py b/inferencing/app.py
@@ -0,0 +1,79 @@
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+import os
+from transformers import AutoTokenizer
+from urllib.parse import unquote
+import yaml
+from pydantic import BaseModel
+
+CUDA_VISIBLE_DEVICES = [0]
+MAX_BATCH_SIZE = 30
+os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in CUDA_VISIBLE_DEVICES])
+
+app = FastAPI()
+
+import time
+import datasets
+from datasets import load_dataset
+import time
+import ctranslate2
+import sentencepiece as spm
+import torch
+import random
+
+model_dir="models/merged-codellama-ct2"
+generator = ctranslate2.Generator(model_dir, device="cuda")
+sp = spm.SentencePieceProcessor(os.path.join(model_dir, "tokenizer.model"))
+
+max_tokens=128
+intent_names =[]
+continuities=[]
+direct_commands=[]
+model_raw_outputs = []
+
+@app.get("/healthcheck")
+async def health_check():
+    return {"message": "Ok", "status": "Green"}
+
+class UserData(BaseModel):
+    inputs: str
+    parameters: dict
+
+@app.post("/generate")
+async def generate(user_data: UserData):
+    query = user_data.inputs
+    parameters = user_data.parameters
+
+    with open(os.path.dirname(__file__) + "/hl_mr_prompt.yaml", "r") as file:
+            yaml_data = yaml.safe_load(file)
+    prompt_template = yaml_data["prompt"].strip()
+    # self.sys_prompt = yaml_data["prompt"].strip()
+    B_INST, E_INST = "[INST]", "[/INST]"
+    bos_token = "<s>"
+    prompt = f"{bos_token}{B_INST} {prompt_template.format(user_text=query).strip()} \
+            {E_INST}"
+
+    prompts=[prompt]
+
+    prompt_tokens = sp.encode(prompts, out_type=str)
+    gen_results = generator.generate_batch(
+        prompt_tokens,
+        max_batch_size=MAX_BATCH_SIZE,
+        sampling_temperature=parameters['temperature'],
+        # sampling_topk=1,
+        # sampling_topp=0.5,
+        max_length=parameters['max_new_tokens'],
+        include_prompt_in_result=False
+    )
+    for gen_result in gen_results:
+        output_ids = []
+        step_results = gen_result.sequences_ids[0]
+        for step_result in step_results:
+            output_ids.append(step_result)
+
+        result = sp.decode(output_ids)
+        model_raw_outputs.append(result)
+
+    return result
diff --git a/inferencing/hl_mr_prompt.yaml b/inferencing/hl_mr_prompt.yaml
@@ -0,0 +1,70 @@
+prompt: |
+  <<SYS>>Your job is to generate JSON to answer questions in the private markets space.
+  The JSON should include the python function to call and params to pass to the function. The functions execute the params on dataframes to provide results.
+  The template of the JSON:
+  {{
+  "function": "<function_name>",
+  "params": {{
+      "filter_conditions": [],
+      "sort_conditions": [],
+      "aggregate_conditions": [],
+      "time": {{}}
+      }}
+  }}
+  Available functions:
+  - compare_fund_performance
+  - get_focus_by_attribute
+  - get_focus_by_performance
+  - get_fund_info
+  - get_funds_by_attribute
+  - get_funds_by_family_performance
+  - get_funds_by_manager_performance
+  - get_funds_by_performance
+  - get_funds_with_upcoming_closes
+  - get_geographies_by_performance
+  - get_managers_by_attribute
+  - get_managers_by_performance
+  - get_managers_by_status_performance
+  - get_previous_vintages
+  - get_styles_by_attribute
+  - get_styles_by_performance
+  - get_unique_investment_focuses
+  - get_vintages_by_performance
+  - miscellaneous
+
+  Description of columns used in conditions:
+  manager_name: str
+  style: str # Investment strategy or asset class. Unique style values possible= ['Buyout', 'Credit', 'Infrastructure', 'Real Estate', 'Venture Capital', 'Fund of Funds', 'Secondaries', 'Growth Equity', 'Co-Investment', 'Natural Resources']
+  focus: str # Primary investment area or industry of focus. Unique focus values possible = ['Small Cap', 'Senior Debt', 'Value Add', 'Balanced', 'Buyout', \
+      'Distressed', 'Mid Cap', 'Seed/Early Stage', 'Secondaries', \
+      'Special Situations', 'Opportunistic', 'Venture Capital', \
+      'Mezzanine', 'Core', 'Multi Manager', 'Single Manager', \
+      'Large Cap', 'Late Stage', 'Turnaround', 'Real Estate', \
+      'Multi Focus', 'Fund Interests', 'Energy', 'Expansion Stage', \
+      'Lending & Leasing', 'Credit', 'Direct Interests', 'Agriculture', \
+      'Growth Equity', 'Timber', 'Mining', 'Royalty', 'Infrastructure', \
+      'Natural Resources']
+  geography: str # regions or countries to invest
+  gics: str # gics sector of the fund
+  fund_status: str # Fundraising status of the fund. Unique values possible= ['Fundraising', 'Out of Market', 'Projected']
+  vintage_year: str # Year the fund was established.
+  size: str # Size of the fund
+  net_irr: float # Performance of the fund
+  irr_local: float # Performance of investment style
+  targeted_tvpi: float
+  targeted_net_irr: float
+  management_fee: float # Fees of the fund in percentage
+  carried_interest: float # carried interest in percentage
+  gp_commitment: float # GP commitment in percentage
+  investment_period: int # Investment duration of the fund
+  irr_local: float # Performance of the fund within given investment style
+  formal_esg_policy: str # Whether the fund has a formal ESG policy. 'Yes' or 'No'
+  impact_focus: str #'Yes' or 'No'
+  net_tvpi: float
+  targeted_irr_local: float
+  tvpi_local: float
+
+  Note: You may only return JSON statements.<</SYS>>
+  Human: {user_text}
+  ---
+  JSON:
diff --git a/inferencing/pyproject.toml b/inferencing/pyproject.toml
@@ -0,0 +1,79 @@
+[tool.poetry]
+name = "ai-chatbot"
+version = "0.1.0"
+description = ""
+authors = ["Your Name <you@example.com>"]
+readme = "README.md"
+packages = []
+
+[tool.poetry.dependencies]
+python = ">=3.10,<3.12"
+fastapi = "*"
+uvicorn = "*"
+sqlalchemy = "*"
+python-multipart = "*"
+passlib = "*"
+bcrypt = "*"
+pyjwt = "*"
+alembic = "*"
+sentry-sdk = {extras = ["fastapi"], version = "*"}
+pandas = "*"
+tqdm = "*"
+spacy = "*"
+numpy = "*"
+openai = "0.28.1"
+pandasql = "*"
+openpyxl = "*"
+python-decouple = "^3.8"
+boto3 = "^1.26.153"
+redis = {extras = ["hiredis"], version = "^4.5.5"}
+grpcio-tools = "^1.56.0"
+pytest = "^7.4.0"
+langchain = "^0.0.312"
+prompt-toolkit = "^3.0.39"
+pinecone-client = "^2.2.4"
+httpx = "^0.25.0"
+pytest-html = "^4.0.2"
+fakeredis = "^2.20.0"
+py = "^1.11.0"
+tiktoken = "^0.5.1"
+datasets = "*"
+sentence-transformers = "2.3.1"
+accelerate = "*"
+scikit-learn="1.2.2"
+pydantic = {extras = ["dotenv"], version = "1.10.13"}
+rapidfuzz = "^3.4.0"
+scipy = "1.10.0"
+torch = "2.0.0"
+pypdf = "^3.17.0"
+motor = "^3.3.2"
+cohere = "^4.47"
+sentencepiece = "^0.1.97"
+tokenizers = "^0.15.0"
+huggingface-hub = "^0.19.3"
+transformers = "^4.37.1"
+ctranslate2 = "*"
+flask = "*"
+gunicorn = "*"
+
+[tool.poetry.group.quality.dependencies]
+black = "23.7.0"
+isort = "5.12.0"
+mypy = "1.5.1"
+ruff = "0.0.285"
+
+[tool.ruff]
+# Allow lines to be as long as 120 characters.
+line-length = 120
+
+[tool.ruff.extend-per-file-ignores]
+"chatbot/chat/chat_models/chain_templates/*" = ["E501"]
+"chatbot/chat/chat_models/chains/*" = ["E501"]
+"chatbot/chat/chat_models/llm/*.yaml" = ["E501"]
+"chatbot/chat/intents/classifier/classifier_base_prompt.py" = ["E501"]
+"chatbot/chat/intents/descriptive/utils.py" = ["E501"]
+"chatbot/chat/intents/quantitative/tests/*" = ["E501"]
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"