Skip to content

Commit

Permalink
Release/v0.4.9 (#2421)
Browse files Browse the repository at this point in the history
* fix: turn off experimental settings should also turn off quick ask (#2411)

* fix: app glitches 1s generating response before starting model (#2412)

* fix: disable experimental feature should also disable vulkan (#2414)

* fix: model load stuck on windows when can't get CPU core count (#2413)

Signed-off-by: James <james@jan.ai>
Co-authored-by: James <james@jan.ai>

* feat: TensorRT-LLM engine update support (#2415)

* fix: engine update

* chore: add remove prepopulated models

Signed-off-by: James <james@jan.ai>

* update tinyjensen url

Signed-off-by: James <james@jan.ai>

* update llamacorn

Signed-off-by: James <james@jan.ai>

* update Mistral 7B Instruct v0.1 int4

Signed-off-by: James <james@jan.ai>

* update tensorrt

Signed-off-by: James <james@jan.ai>

* update

Signed-off-by: hiro <hiro@jan.ai>

* update

Signed-off-by: James <james@jan.ai>

* prettier

Signed-off-by: James <james@jan.ai>

* update mistral config

Signed-off-by: James <james@jan.ai>

* fix some lint

Signed-off-by: James <james@jan.ai>

---------

Signed-off-by: James <james@jan.ai>
Signed-off-by: hiro <hiro@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: hiro <hiro@jan.ai>

* Tensorrt LLM disable turing support (#2418)

Co-authored-by: Hien To <tominhhien97@gmail.com>

* chore: add prompt template tensorrtllm (#2375)

* chore: add prompt template tensorrtllm

* Add Prompt template for mistral and correct model metadata

---------

Co-authored-by: Hien To <tominhhien97@gmail.com>

* fix: correct tensorrt mistral model.json (#2419)

---------

Signed-off-by: James <james@jan.ai>
Signed-off-by: hiro <hiro@jan.ai>
Co-authored-by: Louis <louis@jan.ai>
Co-authored-by: James <james@jan.ai>
Co-authored-by: hiro <hiro@jan.ai>
Co-authored-by: hiento09 <136591877+hiento09@users.noreply.github.com>
Co-authored-by: Hien To <tominhhien97@gmail.com>
  • Loading branch information
6 people committed Mar 19, 2024
1 parent c81a33f commit 3a3bceb
Show file tree
Hide file tree
Showing 21 changed files with 328 additions and 71 deletions.
1 change: 1 addition & 0 deletions .github/workflows/jan-electron-linter-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ on:
branches:
- main
- dev
- release/**
paths:
- "electron/**"
- .github/workflows/jan-electron-linter-and-test.yml
Expand Down
2 changes: 1 addition & 1 deletion core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
},
"devDependencies": {
"@types/jest": "^29.5.12",
"@types/node": "^12.0.2",
"@types/node": "^20.11.4",
"eslint": "8.57.0",
"eslint-plugin-jest": "^27.9.0",
"jest": "^29.7.0",
Expand Down
1 change: 1 addition & 0 deletions core/src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ export enum FileManagerRoute {
fileStat = 'fileStat',
writeBlob = 'writeBlob',
mkdir = 'mkdir',
rm = 'rm',
}

export type ApiFunction = (...args: any[]) => any
Expand Down
8 changes: 8 additions & 0 deletions core/src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export interface Compatibility {
const ALL_INSTALLATION_STATE = [
'NotRequired', // not required.
'Installed', // require and installed. Good to go.
'Updatable', // require and installed but need to be updated.
'NotInstalled', // require to be installed.
'Corrupted', // require but corrupted. Need to redownload.
] as const
Expand Down Expand Up @@ -59,6 +60,13 @@ export abstract class BaseExtension implements ExtensionType {
return undefined
}

/**
* Determine if the extension is updatable.
*/
updatable(): boolean {
return false
}

/**
* Determine if the prerequisites for the extension are installed.
*
Expand Down
4 changes: 4 additions & 0 deletions core/src/fs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ const mkdir = (...args: any[]) => global.core.api?.mkdir(...args)
*/
const rmdirSync = (...args: any[]) =>
global.core.api?.rmdirSync(...args, { recursive: true, force: true })

const rm = (path: string) => global.core.api?.rm(path)

/**
* Deletes a file from the local file system.
* @param {string} path - The path of the file to delete.
Expand Down Expand Up @@ -96,6 +99,7 @@ export const fs = {
mkdirSync,
mkdir,
rmdirSync,
rm,
unlinkSync,
appendFileSync,
copyFileSync,
Expand Down
12 changes: 12 additions & 0 deletions core/src/node/api/processors/fsExt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,16 @@ export class FSExt implements Processor {
})
})
}

rmdir(path: string): Promise<void> {
return new Promise((resolve, reject) => {
fs.rm(path, { recursive: true }, (err) => {
if (err) {
reject(err)
} else {
resolve()
}
})
})
}
}
48 changes: 28 additions & 20 deletions core/src/node/helper/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,26 +82,34 @@ export const getJanExtensionsPath = (): string => {
*/
export const physicalCpuCount = async (): Promise<number> => {
const platform = os.platform()
if (platform === 'linux') {
const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
return parseInt(output.trim(), 10)
} else if (platform === 'darwin') {
const output = await exec('sysctl -n hw.physicalcpu_max')
return parseInt(output.trim(), 10)
} else if (platform === 'win32') {
const output = await exec('WMIC CPU Get NumberOfCores')
return output
.split(os.EOL)
.map((line: string) => parseInt(line))
.filter((value: number) => !isNaN(value))
.reduce((sum: number, number: number) => sum + number, 1)
} else {
const cores = os.cpus().filter((cpu: any, index: number) => {
const hasHyperthreading = cpu.model.includes('Intel')
const isOdd = index % 2 === 1
return !hasHyperthreading || isOdd
})
return cores.length
try {
if (platform === 'linux') {
const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
return parseInt(output.trim(), 10)
} else if (platform === 'darwin') {
const output = await exec('sysctl -n hw.physicalcpu_max')
return parseInt(output.trim(), 10)
} else if (platform === 'win32') {
const output = await exec('WMIC CPU Get NumberOfCores')
return output
.split(os.EOL)
.map((line: string) => parseInt(line))
.filter((value: number) => !isNaN(value))
.reduce((sum: number, number: number) => sum + number, 1)
} else {
const cores = os.cpus().filter((cpu: any, index: number) => {
const hasHyperthreading = cpu.model.includes('Intel')
const isOdd = index % 2 === 1
return !hasHyperthreading || isOdd
})
return cores.length
}
} catch (err) {
console.warn('Failed to get physical CPU count', err)
// Divide by 2 to get rid of hyper threading
const coreCount = Math.ceil(os.cpus().length / 2)
console.debug('Using node API to get physical CPU count:', coreCount)
return coreCount
}
}

Expand Down
2 changes: 1 addition & 1 deletion core/src/node/helper/resource.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { SystemResourceInfo } from '../../types'
import { physicalCpuCount } from './config'
import { log, logServer } from './log'
import { log } from './log'

export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
const cpu = await physicalCpuCount()
Expand Down
2 changes: 1 addition & 1 deletion extensions/model-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export default class JanModelExtension extends ModelExtension {
private static readonly _tensorRtEngineFormat = '.engine'
private static readonly _configDirName = 'config'
private static readonly _defaultModelFileName = 'default-model.json'
private static readonly _supportedGpuArch = ['turing', 'ampere', 'ada']
private static readonly _supportedGpuArch = ['ampere', 'ada']

/**
* Called when the extension is loaded.
Expand Down
3 changes: 1 addition & 2 deletions extensions/monitoring-extension/src/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ const updateNvidiaDriverInfo = async () =>
const getGpuArch = (gpuName: string): string => {
if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'

if (gpuName.includes('20')) return 'turing'
else if (gpuName.includes('30')) return 'ampere'
if (gpuName.includes('30')) return 'ampere'
else if (gpuName.includes('40')) return 'ada'
else return 'unknown'
}
Expand Down
88 changes: 74 additions & 14 deletions extensions/tensorrt-llm-extension/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,31 @@
"sources": [
{
"filename": "config.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/config.json"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/config.json"
},
{
"filename": "rank0.engine",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/rank0.engine"
"filename": "mistral_float16_tp1_rank0.engine",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
},
{
"filename": "tokenizer.model",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
},
{
"filename": "special_tokens_map.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
},
{
"filename": "tokenizer.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
},
{
"filename": "tokenizer_config.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
},
{
"filename": "model.cache",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/model.cache"
}
],
"id": "llamacorn-1.1b-chat-fp16",
Expand All @@ -50,27 +54,31 @@
"sources": [
{
"filename": "config.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/config.json"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json"
},
{
"filename": "rank0.engine",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/rank0.engine"
"filename": "mistral_float16_tp1_rank0.engine",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
},
{
"filename": "tokenizer.model",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
},
{
"filename": "special_tokens_map.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
},
{
"filename": "tokenizer.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
},
{
"filename": "tokenizer_config.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
},
{
"filename": "model.cache",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache"
}
],
"id": "tinyjensen-1.1b-chat-fp16",
Expand All @@ -92,5 +100,57 @@
"size": 2151000000
},
"engine": "nitro-tensorrt-llm"
},
{
"sources": [
{
"filename": "config.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/config.json"
},
{
"filename": "mistral_float16_tp1_rank0.engine",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/mistral_float16_tp1_rank0.engine"
},
{
"filename": "tokenizer.model",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.model"
},
{
"filename": "special_tokens_map.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/special_tokens_map.json"
},
{
"filename": "tokenizer.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.json"
},
{
"filename": "tokenizer_config.json",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer_config.json"
},
{
"filename": "model.cache",
"url": "https://delta.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/model.cache"
}
],
"id": "mistral-7b-instruct-int4",
"object": "model",
"name": "Mistral 7B Instruct v0.1 INT4",
"version": "1.0",
"description": "Mistral 7B Instruct v0.1 INT4",
"format": "TensorRT-LLM",
"settings": {
"ctx_len": 2048,
"text_model": false,
"prompt_template": "[INST] {prompt} [/INST]"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "MistralAI",
"tags": ["TensorRT-LLM", "7B", "Finetuned"],
"size": 3840000000
},
"engine": "nitro-tensorrt-llm"
}
]
2 changes: 1 addition & 1 deletion extensions/tensorrt-llm-extension/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"0.1.0"
]
},
"tensorrtVersion": "0.1.6",
"tensorrtVersion": "0.1.8",
"provider": "nitro-tensorrt-llm",
"scripts": {
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
Expand Down
2 changes: 1 addition & 1 deletion extensions/tensorrt-llm-extension/rollup.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export default [
DOWNLOAD_RUNNER_URL:
process.platform === 'win32'
? JSON.stringify(
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>/nitro-windows-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/windows-v<version>-tensorrt-llm-v0.7.1/nitro-windows-v<version>-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz'
)
: JSON.stringify(
'https://github.com/janhq/nitro-tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
Expand Down
25 changes: 24 additions & 1 deletion extensions/tensorrt-llm-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
override inferenceUrl = INFERENCE_URL
override nodeModule = NODE

private supportedGpuArch = ['turing', 'ampere', 'ada']
private supportedGpuArch = ['ampere', 'ada']
private supportedPlatform = ['win32', 'linux']
private isUpdateAvailable = false

compatibility() {
return COMPATIBILITY as unknown as Compatibility
Expand All @@ -56,6 +57,8 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
}

override async install(): Promise<void> {
await this.removePopulatedModels()

const info = await systemInformation()
console.debug(
`TensorRTLLMExtension installing pre-requisites... ${JSON.stringify(info)}`
Expand Down Expand Up @@ -141,6 +144,22 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
}

async removePopulatedModels(): Promise<void> {
console.debug(`removePopulatedModels`, JSON.stringify(models))
const janDataFolderPath = await getJanDataFolderPath()
const modelFolderPath = await joinPath([janDataFolderPath, 'models'])

for (const model of models) {
const modelPath = await joinPath([modelFolderPath, model.id])
console.debug(`modelPath: ${modelPath}`)
if (await fs.existsSync(modelPath)) {
console.debug(`Removing model ${modelPath}`)
await fs.rmdirSync(modelPath)
}
}
events.emit(ModelEvent.OnModelsUpdate, {})
}

async onModelInit(model: Model): Promise<void> {
if (model.engine !== this.provider) return

Expand All @@ -156,6 +175,10 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
}
}

updatable() {
return this.isUpdateAvailable
}

override async installationState(): Promise<InstallationState> {
const info = await systemInformation()

Expand Down

0 comments on commit 3a3bceb

Please sign in to comment.