Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Created an annotation using instructor #631

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions examples/annotation/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Introduction

This showcases a simple streamlit module which can be used to do data annotation for files in a specific table.

Make sure to install the dependencies first with `uv pip install -r requirements.txt`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The command to install dependencies seems to contain a typo. It should be pip install -r requirements.txt instead of uv pip install -r requirements.txt.

Suggested change
Make sure to install the dependencies first with `uv pip install -r requirements.txt`
Make sure to install the dependencies first with `pip install -r requirements.txt`


To populate the table, run `main.py`. This should generate ~20 different todos insert it into the table and mark it as unannotated. Once you've done so, you can then boot up the `annotate.py` file using the command `streamlit run annotate.py`
72 changes: 72 additions & 0 deletions examples/annotation/annotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import streamlit as st
import sqlite3


def fetch_unannotated_todos():
with sqlite3.connect("tutorial.db") as con:
cur = con.cursor()
cur.execute(
"SELECT title, description, annotated,id FROM todos WHERE annotated = FALSE"
)
todos = cur.fetchall()

return [
{"title": title, "description": description, "annotated": annotated, "id": id}
for title, description, annotated, id in todos
]


def display_todos(todos):
st.write("### Unannotated Todos")
for todo in todos:
st.write(f'({todo["id"]}) {todo["title"]}')
if st.button(f"Select {todo['id']}"):
st.session_state.curr_selected_todo = todo["id"]


st.title("Todo Annotation")

# Initialize session state
if "curr_selected_todo" not in st.session_state:
st.session_state.curr_selected_todo = None


def render_selected_todo():
if st.session_state.curr_selected_todo is not None:
with sqlite3.connect("tutorial.db") as con:
cur = con.cursor()
cur.execute(
"SELECT original_prompt,title, description FROM todos WHERE id = ?",
(st.session_state.curr_selected_todo,),
)
todo_data = cur.fetchone()
if todo_data:
st.write("Original Prompt: " + todo_data[0])
new_title = st.text_input("Title", value=todo_data[1])
new_description = st.text_area("Description", value=todo_data[2])
if st.button("Update"):
with sqlite3.connect("tutorial.db") as con:
cur = con.cursor()
cur.execute(
"UPDATE todos SET title = ?, description = ?, annotated = ? WHERE id = ?",
(
new_title,
new_description,
True,
st.session_state.curr_selected_todo,
),
)
con.commit()
st.success("Todo updated successfully!")
else:
st.write("Selected todo not found.")
else:
st.write("No todo selected.")


render_selected_todo()
unannotated_todos = fetch_unannotated_todos()
if unannotated_todos:
display_todos(unannotated_todos)
else:
st.write("No unannotated todos found.")
70 changes: 70 additions & 0 deletions examples/annotation/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import instructor
from typing import List
from openai import AsyncOpenAI
from asyncio import run
from tqdm.asyncio import tqdm_asyncio as asyncio
from pydantic import BaseModel, Field
import sqlite3


client = instructor.from_openai(AsyncOpenAI())


class TodoItem(BaseModel):
"""
This is a schema that represents an actionable item which the user needs to consider
"""

title: str = Field(..., description="This is a title for the todo item")
description: str = Field(
...,
description="This is a description that explains a plan of action for the todo",
)


async def extract_todo(user_query: str):
res = await client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You are a world class system that excels at extracting todo items from a user query",
},
{"role": "user", "content": user_query},
],
response_model=List[TodoItem],
)
return [(item, user_query) for item in res]


async def process_todos(items):
coros = [extract_todo(item) for item in items]
results = await asyncio.gather(*coros)
return [item for sublist in results for item in sublist]


if __name__ == "__main__":
con = sqlite3.connect("tutorial.db")
cur = con.cursor()
cur.execute(
"CREATE TABLE IF NOT EXISTS todos(id INTEGER PRIMARY KEY AUTOINCREMENT, annotated BOOLEAN DEFAULT FALSE, title TEXT, description TEXT, original_prompt TEXT)"
)

data = [
"This week I need to finalize the project report, schedule a meeting with the team, prepare the presentation slides, submit the budget review, and send the client update emails.",
"Next week I must organize the department outing, update the project timeline, review the new intern applications, and coordinate the quarterly webinars.",
"Tomorrow I should finalize the contract details, call the supplier for an update, draft the monthly newsletter, and check the inventory status.",
"By the end of this month, I need to complete the performance reviews, plan the training sessions, archive old project files, and renew the software licenses.",
"This Friday I have to prepare the weekly sales report, confirm the client appointments, oversee the network upgrade, and document the audit findings.",
]

todos: List[TodoItem] = run(process_todos(data))

with sqlite3.connect("tutorial.db") as con:
cur = con.cursor()
for todo, original_query in todos:
cur.execute(
"INSERT INTO todos (title, description,original_prompt) VALUES (?, ?,?)",
(todo.title, todo.description, original_query),
)
con.commit()
5 changes: 5 additions & 0 deletions examples/annotation/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
openai==1.23.6
instructor==1.2.3
pydantic==2.7.0
typer==0.12.3
streamlit==1.33.0