Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat - DRAFT - Support plain text #2827

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions core/src/types/inference/inferenceEntity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export enum ChatCompletionMessageContentType {
Text = 'text',
Image = 'image_url',
Doc = 'doc_url',
TextDoc = 'plain_doc_url',
}

export type ChatCompletionMessageContentText = {
Expand Down
1 change: 1 addition & 0 deletions core/src/types/message/messageEntity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ export enum ContentType {
Text = 'text',
Image = 'image',
Pdf = 'pdf',
PlainText = 'plainText',
}

/**
Expand Down
8 changes: 7 additions & 1 deletion extensions/assistant-extension/src/node/retrieval.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'
import { formatDocumentsAsString } from 'langchain/util/document'
import { PDFLoader } from 'langchain/document_loaders/fs/pdf'
import { TextLoader } from 'langchain/document_loaders/fs/text'

import { HNSWLib } from 'langchain/vectorstores/hnswlib'

Expand Down Expand Up @@ -50,9 +51,14 @@ export class Retrieval {
filePath: string,
memoryPath: string
): Promise<any> => {
const loader = new PDFLoader(filePath, {
var loader
if (filePath.endsWith(".pdf") ) {
loader = new PDFLoader(filePath, {
splitPages: true,
})
} else {
loader = new TextLoader(filePath)
}
if (!this.embeddingModel) return Promise.reject()
const doc = await loader.load()
const docs = await this.textSplitter!.splitDocuments(doc)
Expand Down
71 changes: 50 additions & 21 deletions extensions/conversational-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,30 +113,49 @@ export default class JSONConversationalExtension extends ConversationalExtension
])
if (!(await fs.existsSync(threadDirPath))) await fs.mkdir(threadDirPath)

if (message.content[0]?.type === 'image') {
const filesPath = await joinPath([threadDirPath, 'files'])
if (!(await fs.existsSync(filesPath))) await fs.mkdir(filesPath)

const imagePath = await joinPath([filesPath, `${message.id}.png`])
const base64 = message.content[0].text.annotations[0]
await this.storeImage(base64, imagePath)
if ((await fs.existsSync(imagePath)) && message.content?.length) {
// Use file path instead of blob
message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.png`
switch (message.content[0]?.type){
case 'image': {
const filesPath = await joinPath([threadDirPath, 'files'])
if (!(await fs.existsSync(filesPath))) await fs.mkdir(filesPath)

const imagePath = await joinPath([filesPath, `${message.id}.png`])
const base64 = message.content[0].text.annotations[0]
await this.storeImage(base64, imagePath)
if ((await fs.existsSync(imagePath)) && message.content?.length) {
// Use file path instead of blob
message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.png`
}
break
}

case 'pdf': {
const filesPath = await joinPath([threadDirPath, 'files'])
if (!(await fs.existsSync(filesPath))) await fs.mkdir(filesPath)

const filePath = await joinPath([filesPath, `${message.id}.pdf`])
const blob = message.content[0].text.annotations[0]
await this.storePdf(blob, filePath)

if ((await fs.existsSync(filePath)) && message.content?.length) {
// Use file path instead of blob
message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.pdf`
}
break
}
}

if (message.content[0]?.type === 'pdf') {
const filesPath = await joinPath([threadDirPath, 'files'])
if (!(await fs.existsSync(filesPath))) await fs.mkdir(filesPath)
case 'plainText': {
const filesPath = await joinPath([threadDirPath, 'files'])
if (!(await fs.existsSync(filesPath))) await fs.mkdir(filesPath)

const filePath = await joinPath([filesPath, `${message.id}.pdf`])
const blob = message.content[0].text.annotations[0]
await this.storeFile(blob, filePath)
const filePath = await joinPath([filesPath, `${message.id}.txt`])
const blob = message.content[0].text.annotations[0]
await this.storePlainFile(blob, filePath)

if ((await fs.existsSync(filePath)) && message.content?.length) {
// Use file path instead of blob
message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.pdf`
if ((await fs.existsSync(filePath)) && message.content?.length) {
// Use file path instead of blob
message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.txt`
}
break
}
}
await fs.appendFileSync(threadMessagePath, JSON.stringify(message) + '\n')
Expand All @@ -156,7 +175,7 @@ export default class JSONConversationalExtension extends ConversationalExtension
}
}

async storeFile(base64: string, filePath: string): Promise<void> {
async storePdf(base64: string, filePath: string): Promise<void> {
const base64Data = base64.replace(/^data:application\/pdf;base64,/, '')
try {
await fs.writeBlob(filePath, base64Data)
Expand All @@ -165,6 +184,16 @@ export default class JSONConversationalExtension extends ConversationalExtension
}
}

// Complete guess
async storePlainFile(base64: string, filePath: string): Promise<void> {
const base64Data = base64.replace(/^data:text\/plain;base64,/, '')
try {
await fs.writeBlob(filePath, base64Data)
} catch (err) {
console.error(err)
}
}

async writeMessages(
threadId: string,
messages: ThreadMessage[]
Expand Down
2 changes: 1 addition & 1 deletion web/containers/Providers/Jotai.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export default function JotaiWrapper({ children }: Props) {
return <Provider>{children}</Provider>
}

export type FileType = 'image' | 'pdf'
export type FileType = 'image' | 'pdf' | 'plain/text'

export type FileInfo = {
file: File
Expand Down
65 changes: 62 additions & 3 deletions web/screens/Chat/ChatInput/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,16 @@ const ChatInput: React.FC = () => {
* Its to be used to display the extension file name of the selected file.
* @param event - The change event object.
*/
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const handlePDFFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0]
if (!file) return
setFileUpload([{ file: file, type: 'pdf' }])
}
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0]
if (!file) return
setFileUpload([{ file: file, type: 'plain/text' }])
}

const handleImageChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0]
Expand Down Expand Up @@ -262,7 +267,53 @@ const ChatInput: React.FC = () => {
}}
>
<FileTextIcon size={16} />
<span className="font-medium">Document</span>
<span className="font-medium">PDF Document</span>
</li>
</TooltipTrigger>
<TooltipPortal>
{(!activeThread?.assistants[0].tools ||
!activeThread?.assistants[0].tools[0]?.enabled ||
activeThread?.assistants[0].model.settings.text_model ===
false) && (
<TooltipContent side="top" className="max-w-[154px] px-3">
{activeThread?.assistants[0].model.settings.text_model ===
false ? (
<span>
This model does not support text-based retrieval.
</span>
) : (
<span>
Turn on Retrieval in Assistant Settings to use this
feature.
</span>
)}
<TooltipArrow />
</TooltipContent>
)}
</TooltipPortal>
</Tooltip>
<Tooltip>
<TooltipTrigger asChild>
<li
className={twMerge(
'flex w-full cursor-pointer items-center space-x-2 px-4 py-2 text-muted-foreground hover:bg-secondary',
activeThread?.assistants[0].model.settings.text_model ===
false
? 'cursor-not-allowed opacity-50'
: 'cursor-pointer'
)}
onClick={() => {
if (
activeThread?.assistants[0].model.settings
.text_model !== false
) {
fileInputRef.current?.click()
setShowAttacmentMenus(false)
}
}}
>
<FileTextIcon size={16} />
<span className="font-medium">Plain Text Document</span>
</li>
</TooltipTrigger>
<TooltipPortal>
Expand Down Expand Up @@ -305,9 +356,17 @@ const ChatInput: React.FC = () => {
className="hidden"
ref={fileInputRef}
value=""
onChange={handleFileChange}
onChange={handlePDFFileChange}
accept="application/pdf"
/>
<input
type="file"
className="hidden"
ref={fileInputRef}
value=""
onChange={handleFileChange}
accept="plain/text"
/>

{messages[messages.length - 1]?.status !== MessageStatus.Pending &&
!isGeneratingResponse &&
Expand Down
3 changes: 2 additions & 1 deletion web/screens/Chat/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
}
: {
'application/pdf': ['.pdf'],
'plain/text': ['.txt', ".md"],

Check failure on line 81 in web/screens/Chat/index.tsx

View workflow job for this annotation

GitHub Actions / test-on-macos-pr-target

Replace `".md"` with `'.md'`

Check failure on line 81 in web/screens/Chat/index.tsx

View workflow job for this annotation

GitHub Actions / test-on-ubuntu-pr-target

Replace `".md"` with `'.md'`

Check failure on line 81 in web/screens/Chat/index.tsx

View workflow job for this annotation

GitHub Actions / test-on-windows-pr-target

Replace `".md"` with `'.md'`
}

const { getRootProps, isDragReject } = useDropzone({
Expand Down Expand Up @@ -118,7 +119,7 @@
)
return
const imageType = files[0]?.type.includes('image')
setFileUpload([{ file: files[0], type: imageType ? 'image' : 'pdf' }])
setFileUpload([{ file: files[0], type: imageType ? 'image' : 'pdf' }]) // TODO: Fix for plain text
setDragOver(false)
},
onDropRejected: (e) => {
Expand Down
26 changes: 24 additions & 2 deletions web/utils/messageRequestBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@
base64Blob: string | undefined,
fileContentType: FileType
) {
if (base64Blob && fileContentType === 'pdf')
if (base64Blob && fileContentType === 'pdf'){

Check failure on line 49 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / test-on-macos-pr-target

Insert `·`

Check failure on line 49 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / test-on-ubuntu-pr-target

Insert `·`

Check failure on line 49 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / test-on-windows-pr-target

Insert `·`
return this.addDocMessage(message)
else if (base64Blob && fileContentType === 'image') {
} else if (base64Blob && fileContentType === 'image') {
return this.addImageMessage(message, base64Blob)
} else if (base64Blob && fileContentType === 'plain/text') {
return this.addTextDocMessage(message)

Check failure on line 54 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / test-on-macos-pr-target

Delete `··`

Check failure on line 54 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / test-on-ubuntu-pr-target

Delete `··`

Check failure on line 54 in web/utils/messageRequestBuilder.ts

View workflow job for this annotation

GitHub Actions / test-on-windows-pr-target

Delete `··`
}
this.messages = [
...this.messages,
Expand Down Expand Up @@ -94,6 +96,26 @@
this.messages = [...this.messages, message]
return this
}
// Chainable
addTextDocMessage(prompt: string) {
const message: ChatCompletionMessage = {
role: ChatCompletionRole.User,
content: [
{
type: ChatCompletionMessageContentType.Text,
text: prompt,
} as ChatCompletionMessageContentText,
{
type: ChatCompletionMessageContentType.TextDoc,
doc_url: {
url: `threads/${this.thread.id}/files/${this.msgId}.txt`,
},
},
] as ChatCompletionMessageContent,
}
this.messages = [...this.messages, message]
return this
}

// Chainable
addImageMessage(prompt: string, base64: string) {
Expand Down
12 changes: 12 additions & 0 deletions web/utils/threadMessageBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ export class ThreadMessageBuilder {
})
}

if (base64 && fileUpload[0]?.type === 'plain/text') {
this.content.push({
type: ContentType.PlainText,
text: {
value: prompt,
annotations: [base64],
name: fileUpload[0].file.name,
size: fileUpload[0].file.size,
},
})
}

if (prompt && !base64) {
this.content.push({
type: ContentType.Text,
Expand Down