chore: replace nitro by cortex-cpp (#2912)

janhq · May 16, 2024 · 537ef20 · 537ef20
1 parent 2182599
commit 537ef20
Show file tree

Hide file tree

Showing 16 changed files with 96 additions and 71 deletions.
diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts
@@ -9,11 +9,11 @@ export const SUPPORTED_MODEL_FORMAT = '.gguf'
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
 // The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
 // The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
 
 // The URL for the Nitro subprocess to kill itself
 export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
 
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts
@@ -144,12 +144,12 @@ const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSe
 }
 
 const spawnNitroProcess = async (): Promise<void> => {
-  log(`[SERVER]::Debug: Spawning Nitro subprocess...`)
+  log(`[SERVER]::Debug: Spawning cortex subprocess...`)
 
   let binaryFolder = join(
     getJanExtensionsPath(),
     '@janhq',
-    'inference-nitro-extension',
+    'inference-cortex-extension',
     'dist',
     'bin'
   )
@@ -160,7 +160,7 @@ const spawnNitroProcess = async (): Promise<void> => {
   const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
   // Execute the binary
   log(
-    `[SERVER]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+    `[SERVER]::Debug: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
   )
   subprocess = spawn(
     executableOptions.executablePath,
@@ -184,12 +184,12 @@ const spawnNitroProcess = async (): Promise<void> => {
   })
 
   subprocess.on('close', (code: any) => {
-    log(`[SERVER]::Debug: Nitro exited with code: ${code}`)
+    log(`[SERVER]::Debug: cortex exited with code: ${code}`)
     subprocess = undefined
   })
 
   tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
-    log(`[SERVER]::Debug: Nitro is ready`)
+    log(`[SERVER]::Debug: cortex is ready`)
   })
 }
 
@@ -203,13 +203,13 @@ const executableNitroFile = (): NitroExecutableOptions => {
   let binaryFolder = join(
     getJanExtensionsPath(),
     '@janhq',
-    'inference-nitro-extension',
+    'inference-cortex-extension',
     'dist',
     'bin'
   )
 
   let cudaVisibleDevices = ''
-  let binaryName = 'nitro'
+  let binaryName = 'cortex-cpp'
   /**
    * The binary folder is different for each platform.
    */
@@ -228,12 +228,16 @@ const executableNitroFile = (): NitroExecutableOptions => {
       }
       cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
     }
-    binaryName = 'nitro.exe'
+    binaryName = 'cortex-cpp.exe'
   } else if (process.platform === 'darwin') {
     /**
      *  For MacOS: mac-universal both Silicon and InteL
      */
-    binaryFolder = join(binaryFolder, 'mac-universal')
+    if(process.arch === 'arm64') {
+    binaryFolder = join(binaryFolder, 'mac-arm64')
+    } else {
+      binaryFolder = join(binaryFolder, 'mac-amd64')
+    }
   } else {
     /**
      *  For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
@@ -300,7 +304,7 @@ const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> =>
     retryDelay: 500,
   })
     .then((res: any) => {
-      log(`[SERVER]::Debug: Load model success with response ${JSON.stringify(res)}`)
+      log(`[SERVER]::Debug: Load model request with response ${JSON.stringify(res)}`)
       return Promise.resolve(res)
     })
     .catch((err: any) => {
@@ -327,7 +331,7 @@ export const stopModel = async (_modelId: string) => {
       })
     }, 5000)
     const tcpPortUsed = require('tcp-port-used')
-    log(`[SERVER]::Debug: Request to kill Nitro`)
+    log(`[SERVER]::Debug: Request to kill cortex`)
 
     fetch(NITRO_HTTP_KILL_URL, {
       method: 'DELETE',

diff --git a/core/src/node/helper/resource.ts b/core/src/node/helper/resource.ts
@@ -4,7 +4,7 @@ import { log } from './logger'
 
 export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
   const cpu = await physicalCpuCount()
-  log(`[NITRO]::CPU information - ${cpu}`)
+  log(`[CORTEX]::CPU information - ${cpu}`)
 
   return {
     numCpuPhysicalCore: cpu,

diff --git a/extensions/assistant-extension/src/node/index.ts b/extensions/assistant-extension/src/node/index.ts
@@ -10,11 +10,12 @@ export function toolRetrievalUpdateTextSplitter(
 }
 export async function toolRetrievalIngestNewDocument(
   file: string,
+  model: string,
   engine: string
 ) {
   const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file))
   const threadPath = path.dirname(filePath.replace('files', ''))
-  retrieval.updateEmbeddingEngine(engine)
+  retrieval.updateEmbeddingEngine(model, engine)
   return retrieval
     .ingestAgentKnowledge(filePath, `${threadPath}/memory`)
     .catch((err) => {

diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts
@@ -28,14 +28,14 @@ export class Retrieval {
     })
   }
 
-  public updateEmbeddingEngine(engine: string): void {
+  public updateEmbeddingEngine(model: string, engine: string): void {
     // Engine settings are not compatible with the current embedding model params
     // Switch case manually for now
     if (engine === 'nitro') {
       this.embeddingModel = new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding' },
+        { openAIApiKey: 'nitro-embedding', model },
         // TODO: Raw settings
-        { basePath: 'http://127.0.0.1:3928/v1' }
+        { basePath: 'http://127.0.0.1:3928/v1' },
       )
     } else {
       // Fallback to OpenAI Settings

diff --git a/extensions/assistant-extension/src/tools/retrieval.ts b/extensions/assistant-extension/src/tools/retrieval.ts
@@ -36,6 +36,7 @@ export class RetrievalTool extends InferenceTool {
           NODE,
           'toolRetrievalIngestNewDocument',
           docFile,
+          data.model?.id,
           data.model?.engine
         )
       } else {

diff --git a/extensions/inference-nitro-extension/.gitignore b/extensions/inference-nitro-extension/.gitignore
@@ -0,0 +1,2 @@
+bin
+!version.txt
diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt
@@ -1 +1 @@
-0.3.22
+0.4.4
diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat
@@ -1,3 +1,3 @@
 @echo off
-set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
+set /p CORTEX_VERSION=<./bin/version.txt
+.\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2-cuda-11-7.tar.gz -e --strip 1 -o ./bin/win-cuda-11-7 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-avx2.tar.gz -e --strip 1 -o ./bin/win-cpu && .\node_modules\.bin\download https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/win-vulkan
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
@@ -1,17 +1,17 @@
 {
-  "name": "@janhq/inference-nitro-extension",
-  "productName": "Nitro Inference Engine",
+  "name": "@janhq/inference-cortex-extension",
+  "productName": "Cortex Inference Engine",
   "version": "1.0.7",
-  "description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
+  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
   "main": "dist/index.js",
   "node": "dist/node/index.cjs.js",
   "author": "Jan <service@jan.ai>",
   "license": "AGPL-3.0",
   "scripts": {
     "test": "jest",
     "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/nitro",
-    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-universal.tar.gz -o ./bin/ && mkdir -p ./bin/mac-universal && tar -zxvf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz --strip-components=1 -C ./bin/mac-universal && rm -rf ./bin/nitro-${NITRO_VERSION}-mac-universal.tar.gz && chmod +x ./bin/mac-universal/nitro",
+    "downloadnitro:linux": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-avx2.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64-vulkan.tar.gz -e --strip 1 -o ./bin/linux-vulkan && chmod +x ./bin/linux-vulkan/cortex-cpp",
+    "downloadnitro:darwin": "CORTEX_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-arm64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz --strip-components=1 -C ./bin/mac-arm64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz && chmod +x ./bin/mac-arm64/cortex-cpp && download https://github.com/janhq/cortex/releases/download/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz -o ./bin/ && mkdir -p ./bin/mac-amd64 && tar -zxvf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz --strip-components=1 -C ./bin/mac-amd64 && rm -rf ./bin/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz && chmod +x ./bin/mac-amd64/cortex-cpp",
     "downloadnitro:win32": "download.bat",
     "downloadnitro": "run-script-os",
     "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",

diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts
@@ -80,7 +80,7 @@ export default [
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
         INFERENCE_URL: JSON.stringify(
           process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/llamacpp/chat_completion'
+            'http://127.0.0.1:3928/inferences/server/chat_completion'
         ),
         TROUBLESHOOTING_URL: JSON.stringify(
           'https://jan.ai/guides/troubleshooting'

diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
@@ -130,7 +130,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
     const executableFolderPath = await joinPath([
       janDataFolderPath,
       'engines',
-      this.name ?? 'nitro',
+      this.name ?? 'cortex-cpp',
       this.version ?? '1.0.0',
     ])
 
@@ -179,7 +179,7 @@ export default class JanInferenceNitroExtension extends LocalOAIEngine {
       const executableFolderPath = await joinPath([
         janDataFolderPath,
         'engines',
-        this.name ?? 'nitro',
+        this.name ?? 'cortex-cpp',
         this.version ?? '1.0.0',
       ])
 

diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-nitro-extension/src/node/execute.test.ts
@@ -33,9 +33,22 @@ describe('test executable nitro file', () => {
     Object.defineProperty(process, 'platform', {
       value: 'darwin',
     })
+    Object.defineProperty(process, 'arch', {
+      value: 'arm64',
+    })
+    expect(executableNitroFile(testSettings)).toEqual(
+      expect.objectContaining({
+        executablePath: expect.stringContaining(`mac-arm64${sep}cortex-cpp`),
+        cudaVisibleDevices: '',
+        vkVisibleDevices: '',
+      })
+    )
+    Object.defineProperty(process, 'arch', {
+      value: 'amd64',
+    })
     expect(executableNitroFile(testSettings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`mac-universal${sep}nitro`),
+        executablePath: expect.stringContaining(`mac-amd64${sep}cortex-cpp`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -56,7 +69,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cpu${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cpu${sep}cortex-cpp.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -89,7 +102,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cuda-11-7${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cuda-11-7${sep}cortex-cpp.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -122,7 +135,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`win-cuda-12-0${sep}nitro.exe`),
+        executablePath: expect.stringContaining(`win-cuda-12-0${sep}cortex-cpp.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -139,7 +152,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cpu${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cpu${sep}cortex-cpp`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -172,7 +185,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cuda-11-7${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cuda-11-7${sep}cortex-cpp`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -205,7 +218,7 @@ describe('test executable nitro file', () => {
     }
     expect(executableNitroFile(settings)).toEqual(
       expect.objectContaining({
-        executablePath: expect.stringContaining(`linux-cuda-12-0${sep}nitro`),
+        executablePath: expect.stringContaining(`linux-cuda-12-0${sep}cortex-cpp`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })

diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-nitro-extension/src/node/execute.ts
@@ -1,4 +1,4 @@
-import { GpuSetting, SystemInformation } from '@janhq/core'
+import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 
 export interface NitroExecutableOptions {
@@ -24,7 +24,7 @@ const os = (): string => {
   return process.platform === 'win32'
     ? 'win'
     : process.platform === 'darwin'
-      ? 'mac-universal'
+      ? process.arch === 'arm64' ? 'mac-arm64' : 'mac-amd64'
       : 'linux'
 }
 
@@ -52,7 +52,7 @@ export const executableNitroFile = (
     .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `nitro${extension()}`
+  let binaryName = `cortex-cpp${extension()}`
 
   return {
     executablePath: path.join(__dirname, '..', 'bin', binaryFolder, binaryName),