Skip to content

Go bindings for llama.cpp and webui

License

Notifications You must be signed in to change notification settings

edp1096/my-llama

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

image description

Llama 7B runner on my windows machine

This is a ..

  • Go binding for interactive mode of llama.cpp/examples/main
  • examples/runner
    • Websocket server
    • Go embedded web ui

Download pre-compiled binary, dll

Usage

Use this as go module

See Go Reference or my-llama-app repo.

  • Compile - Should be placed shared libraries
# CPU
go build [-tags cpu]

# GPU/CLBlast
go build -tags clblast

# GPU/CUDA
go build -tags cuda
package main // import "minimal"

import (
	"fmt"

	llama "github.com/edp1096/my-llama"
)

func main() {
	modelName := "vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin"
	numPredict := 16

	l, err := llama.New()
	if err != nil {
		panic(err)
	}

	l.LlamaApiInitBackend()
	l.InitGptParams()

	l.SetNumThreads(4)
	l.SetUseMlock(true)
	l.SetNumPredict(numPredict)
	l.SetNumGpuLayers(32)
	l.SetSeed(42)

	l.InitContextParamsFromGptParams()

	err = l.LoadModel(modelName)
	if err != nil {
		panic(err)
	}

	l.AllocateTokens()

	numPast := 0
	prompt := "The quick brown fox"

	promptTokens, promptNumTokens := l.LlamaApiTokenize(prompt, true)
	fmt.Println("promptTokens:", promptTokens)

	if promptNumTokens < 1 {
		fmt.Println("numToken < 1")
		panic("numToken < 1")
	}

	isOK := l.LlamaApiEval(promptTokens, promptNumTokens, numPast)
	numPast += promptNumTokens

	fmt.Println("n_prompt_token, n_past, isOK:", promptNumTokens, numPast, isOK)
	fmt.Println("numPredict:", numPredict)

	for i := 0; i < numPredict; i++ {
		l.LlamaApiGetLogits()
		numVocab := l.LlamaApiNumVocab()

		l.PrepareCandidates(numVocab)
		nextToken := l.LlamaApiSampleToken()
		nextTokenStr := l.LlamaApiTokenToStr(nextToken)

		fmt.Print(nextTokenStr)
		l.LlamaApiEval([]int32{nextToken}, 1, numPast)

		numPast++
	}

	fmt.Println()

	l.LlamaApiFree()
}

/*
$ ./minimal
System Info: AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
Model: vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin
llama.cpp: loading model from vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: mem required  = 5407.71 MB (+ 1026.00 MB per state)
...................................................................................................
llama_init_from_file: kv self size  =  256.00 MB
promptTokens: [1 1576 4996 17354 1701 29916]
n_prompt_token, n_past, isOK: 6 6 true
numPredict: 16
 jumps over the lazy dog.
...
 */
# Just launch
my-llama.exe

# Launch with browser open
my-llama.exe -b
  • When modified parameters in panel seem not working, try refresh the browser screen

Build from source

Requirements

Powershell scripts

  • Before execute ps1 script files, ExecutionPolicy should be set to RemoteSigned and unblock ps1 files
# Check
ExecutionPolicy
# Set as RemoteSigned
Set-ExecutionPolicy -Scope CurrentUser RemoteSigned

# Unblock ps1 files
Unblock-File *.ps1

Clone repository then build library

  • Clone
git clone https://github.com/edp1096/my-llama.git
  • Build
# CPU
./build_lib.ps1

# GPU/CLBlast
./build_lib.ps1 clblast

# GPU/CUDA
./build_lib.ps1 cuda
  • Clean temporary files
./clean.ps1

# or

./clean.ps1 all

Then build in examples folder

  • Build runner in examples/runner folder
cd examples/runner

# CPU
go build [-tags cpu]

# GPU/CLBlast
go build -tags clblast

# GPU/CUDA
go build -tags cuda

Linux

  • Build library
# CPU
./build_lib.sh

# GPU/CLBlast
./build_lib.sh clblast

# GPU/CUDA
./build_lib.sh cuda
  • Clean temporary files
./clean.sh
  • Tested with nVidia driver 530, CUDA toolkit 12.1
    • Ubuntu 20.04, RTX 1080ti
    • Ubuntu 20.04, RTX 3090
    • WSL Ubuntu 22.04, RTX 3060ti
  • WSL
    • Because not support opencl, clblast not work
    • Set environment value export GGML_CUDA_NO_PINNED=1 if CUDA not work

Source