diff options
| author | Grail Finder <wohilas@gmail.com> | 2025-12-12 14:07:10 +0300 |
|---|---|---|
| committer | Grail Finder <wohilas@gmail.com> | 2025-12-12 14:07:10 +0300 |
| commit | 9edda1fecf4d0fc4dc9ad92e008a986b74f50c80 (patch) | |
| tree | bcfe133ce83aced1ff71ac37a7471aa25f888b84 | |
| parent | 2e2e6e9f9cb87ab31fc7dc7e0e196ece884da4bd (diff) | |
Feat: llama.cpp model switch
| -rw-r--r-- | bot.go | 29 | ||||
| -rw-r--r-- | llm.go | 2 | ||||
| -rw-r--r-- | main.go | 17 | ||||
| -rw-r--r-- | models/models.go | 92 | ||||
| -rw-r--r-- | tui.go | 15 |
5 files changed, 105 insertions, 50 deletions
@@ -65,6 +65,7 @@ var ( "google/gemma-3-27b-it:free", "meta-llama/llama-3.3-70b-instruct:free", } + LocalModels = []string{} ) // cleanNullMessages removes messages with null or empty content to prevent API issues @@ -187,7 +188,7 @@ func createClient(connectTimeout time.Duration) *http.Client { } } -func fetchLCPModelName() *models.LLMModels { +func fetchLCPModelName() *models.LCPModels { //nolint resp, err := httpClient.Get(cfg.FetchModelNameAPI) if err != nil { @@ -199,7 +200,7 @@ func fetchLCPModelName() *models.LLMModels { return nil } defer resp.Body.Close() - llmModel := models.LLMModels{} + llmModel := models.LCPModels{} if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil { logger.Warn("failed to decode resp", "link", cfg.FetchModelNameAPI, "error", err) return nil @@ -255,6 +256,24 @@ func fetchORModels(free bool) ([]string, error) { return freeModels, nil } +func fetchLCPModels() ([]string, error) { + resp, err := http.Get(cfg.FetchModelNameAPI) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + err := fmt.Errorf("failed to fetch or models; status: %s", resp.Status) + return nil, err + } + data := &models.LCPModels{} + if err := json.NewDecoder(resp.Body).Decode(data); err != nil { + return nil, err + } + localModels := data.ListModels() + return localModels, nil +} + func sendMsgToLLM(body io.Reader) { choseChunkParser() @@ -869,6 +888,12 @@ func init() { } }() } + go func() { + LocalModels, err = fetchLCPModels() + if err != nil { + logger.Error("failed to fetch llama.cpp models", "error", err) + } + }() choseChunkParser() httpClient = createClient(time.Second * 15) if cfg.TTS_ENABLED { @@ -157,7 +157,7 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse, "msg", msg, "resume", resume, "prompt", prompt, "multimodal_data_count", len(multimodalData)) - payload := models.NewLCPReq(prompt, multimodalData, defaultLCPProps, chatBody.MakeStopSlice()) + payload := models.NewLCPReq(prompt, chatBody.Model, multimodalData, defaultLCPProps, chatBody.MakeStopSlice()) data, err := json.Marshal(payload) if err != nil { logger.Error("failed to form a msg", "error", err) @@ -9,14 +9,15 @@ import ( ) var ( - botRespMode = false - editMode = false - roleEditMode = false - injectRole = true - selectedIndex = int(-1) - currentAPIIndex = 0 // Index to track current API in ApiLinks slice - currentORModelIndex = 0 // Index to track current OpenRouter model in ORFreeModels slice - shellMode = false + botRespMode = false + editMode = false + roleEditMode = false + injectRole = true + selectedIndex = int(-1) + currentAPIIndex = 0 // Index to track current API in ApiLinks slice + currentORModelIndex = 0 // Index to track current OpenRouter model in ORFreeModels slice + currentLocalModelIndex = 0 // Index to track current llama.cpp model + shellMode = false // indexLine = "F12 to show keys help | bot resp mode: [orange:-:b]%v[-:-:-] (F6) | card's char: [orange:-:b]%s[-:-:-] (ctrl+s) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [orange:-:b]%v[-:-:-] (ctrl+k) | model: [orange:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [orange:-:b]%v[-:-:-] (F10)\nAPI_URL: [orange:-:b]%s[-:-:-] (ctrl+v) | ThinkUse: [orange:-:b]%v[-:-:-] (ctrl+p) | Log Level: [orange:-:b]%v[-:-:-] (ctrl+p) | Recording: [orange:-:b]%v[-:-:-] (ctrl+r) | Writing as: [orange:-:b]%s[-:-:-] (ctrl+q)" indexLineCompletion = "F12 to show keys help | bot resp mode: [orange:-:b]%v[-:-:-] (F6) | card's char: [orange:-:b]%s[-:-:-] (ctrl+s) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [orange:-:b]%v[-:-:-] (ctrl+k) | model: [orange:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [orange:-:b]%v[-:-:-] (F10)\nAPI_URL: [orange:-:b]%s[-:-:-] (ctrl+v) | Insert <think>: [orange:-:b]%v[-:-:-] (ctrl+p) | Log Level: [orange:-:b]%v[-:-:-] (ctrl+p) | Recording: [orange:-:b]%v[-:-:-] (ctrl+r) | Writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | Bot will write as [orange:-:b]%s[-:-:-] (ctrl+x) | role_inject [orange:-:b]%v[-:-:-]" focusSwitcher = map[tview.Primitive]tview.Primitive{} diff --git a/models/models.go b/models/models.go index baadc8d..912f72b 100644 --- a/models/models.go +++ b/models/models.go @@ -89,10 +89,10 @@ type ImageContentPart struct { // RoleMsg represents a message with content that can be either a simple string or structured content parts type RoleMsg struct { - Role string `json:"role"` - Content string `json:"-"` - ContentParts []interface{} `json:"-"` - ToolCallID string `json:"tool_call_id,omitempty"` // For tool response messages + Role string `json:"role"` + Content string `json:"-"` + ContentParts []interface{} `json:"-"` + ToolCallID string `json:"tool_call_id,omitempty"` // For tool response messages hasContentParts bool // Flag to indicate which content type to marshal } @@ -215,8 +215,8 @@ func (m RoleMsg) ToPrompt() string { // NewRoleMsg creates a simple RoleMsg with string content func NewRoleMsg(role, content string) RoleMsg { return RoleMsg{ - Role: role, - Content: content, + Role: role, + Content: content, hasContentParts: false, } } @@ -420,34 +420,35 @@ type OpenAIReq struct { // === -type LLMModels struct { - Object string `json:"object"` - Data []struct { - ID string `json:"id"` - Object string `json:"object"` - Created int `json:"created"` - OwnedBy string `json:"owned_by"` - Meta struct { - VocabType int `json:"vocab_type"` - NVocab int `json:"n_vocab"` - NCtxTrain int `json:"n_ctx_train"` - NEmbd int `json:"n_embd"` - NParams int64 `json:"n_params"` - Size int64 `json:"size"` - } `json:"meta"` - } `json:"data"` -} +// type LLMModels struct { +// Object string `json:"object"` +// Data []struct { +// ID string `json:"id"` +// Object string `json:"object"` +// Created int `json:"created"` +// OwnedBy string `json:"owned_by"` +// Meta struct { +// VocabType int `json:"vocab_type"` +// NVocab int `json:"n_vocab"` +// NCtxTrain int `json:"n_ctx_train"` +// NEmbd int `json:"n_embd"` +// NParams int64 `json:"n_params"` +// Size int64 `json:"size"` +// } `json:"meta"` +// } `json:"data"` +// } type LlamaCPPReq struct { - Stream bool `json:"stream"` + Model string `json:"model"` + Stream bool `json:"stream"` // For multimodal requests, prompt should be an object with prompt_string and multimodal_data // For regular requests, prompt is a string - Prompt interface{} `json:"prompt"` // Can be string or object with prompt_string and multimodal_data - Temperature float32 `json:"temperature"` - DryMultiplier float32 `json:"dry_multiplier"` - Stop []string `json:"stop"` - MinP float32 `json:"min_p"` - NPredict int32 `json:"n_predict"` + Prompt interface{} `json:"prompt"` // Can be string or object with prompt_string and multimodal_data + Temperature float32 `json:"temperature"` + DryMultiplier float32 `json:"dry_multiplier"` + Stop []string `json:"stop"` + MinP float32 `json:"min_p"` + NPredict int32 `json:"n_predict"` // MaxTokens int `json:"max_tokens"` // DryBase float64 `json:"dry_base"` // DryAllowedLength int `json:"dry_allowed_length"` @@ -471,12 +472,11 @@ type PromptObject struct { PromptString string `json:"prompt_string"` MultimodalData []string `json:"multimodal_data,omitempty"` // Alternative field name used by some llama.cpp implementations - ImageData []string `json:"image_data,omitempty"` // For compatibility + ImageData []string `json:"image_data,omitempty"` // For compatibility } -func NewLCPReq(prompt string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq { +func NewLCPReq(prompt, model string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq { var finalPrompt interface{} - if len(multimodalData) > 0 { // When multimodal data is present, use the object format as per Python example: // { "prompt": { "prompt_string": "...", "multimodal_data": [...] } } @@ -489,8 +489,8 @@ func NewLCPReq(prompt string, multimodalData []string, props map[string]float32, // When no multimodal data, use plain string finalPrompt = prompt } - return LlamaCPPReq{ + Model: model, Stream: true, Prompt: finalPrompt, Temperature: props["temperature"], @@ -505,3 +505,27 @@ type LlamaCPPResp struct { Content string `json:"content"` Stop bool `json:"stop"` } + +type LCPModels struct { + Data []struct { + ID string `json:"id"` + Object string `json:"object"` + OwnedBy string `json:"owned_by"` + Created int `json:"created"` + InCache bool `json:"in_cache"` + Path string `json:"path"` + Status struct { + Value string `json:"value"` + Args []string `json:"args"` + } `json:"status"` + } `json:"data"` + Object string `json:"object"` +} + +func (lcp *LCPModels) ListModels() []string { + resp := []string{} + for _, model := range lcp.Data { + resp = append(resp, model.ID) + } + return resp +} @@ -961,11 +961,16 @@ func init() { } updateStatusLine() } else { - // For non-OpenRouter APIs, use the old logic - go func() { - fetchLCPModelName() // blocks - updateStatusLine() - }() + if len(LocalModels) > 0 { + currentLocalModelIndex = (currentLocalModelIndex + 1) % len(LocalModels) + chatBody.Model = LocalModels[currentLocalModelIndex] + } + updateStatusLine() + // // For non-OpenRouter APIs, use the old logic + // go func() { + // fetchLCPModelName() // blocks + // updateStatusLine() + // }() } return nil } |
