Feat: llama.cpp model switch

author: Grail Finder <wohilas@gmail.com> 2025-12-12 14:07:10 +0300
committer: Grail Finder <wohilas@gmail.com> 2025-12-12 14:07:10 +0300
commit: 9edda1fecf4d0fc4dc9ad92e008a986b74f50c80 (patch)
tree: bcfe133ce83aced1ff71ac37a7471aa25f888b84
parent: 2e2e6e9f9cb87ab31fc7dc7e0e196ece884da4bd (diff)
5 files changed, 105 insertions, 50 deletions
diff --git a/bot.go b/bot.go
index ebf345b..b426265 100644
--- a/bot.go
+++ b/bot.go
@@ -65,6 +65,7 @@ var (
 		"google/gemma-3-27b-it:free",
 		"meta-llama/llama-3.3-70b-instruct:free",
 	}
+	LocalModels = []string{}
 )
 
 // cleanNullMessages removes messages with null or empty content to prevent API issues
@@ -187,7 +188,7 @@ func createClient(connectTimeout time.Duration) *http.Client {
 	}
 }
 
-func fetchLCPModelName() *models.LLMModels {
+func fetchLCPModelName() *models.LCPModels {
 	//nolint
 	resp, err := httpClient.Get(cfg.FetchModelNameAPI)
 	if err != nil {
@@ -199,7 +200,7 @@ func fetchLCPModelName() *models.LLMModels {
 		return nil
 	}
 	defer resp.Body.Close()
-	llmModel := models.LLMModels{}
+	llmModel := models.LCPModels{}
 	if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
 		logger.Warn("failed to decode resp", "link", cfg.FetchModelNameAPI, "error", err)
 		return nil
@@ -255,6 +256,24 @@ func fetchORModels(free bool) ([]string, error) {
 	return freeModels, nil
 }
 
+func fetchLCPModels() ([]string, error) {
+	resp, err := http.Get(cfg.FetchModelNameAPI)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		err := fmt.Errorf("failed to fetch or models; status: %s", resp.Status)
+		return nil, err
+	}
+	data := &models.LCPModels{}
+	if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
+		return nil, err
+	}
+	localModels := data.ListModels()
+	return localModels, nil
+}
+
 func sendMsgToLLM(body io.Reader) {
 	choseChunkParser()
 
@@ -869,6 +888,12 @@ func init() {
 			}
 		}()
 	}
+	go func() {
+		LocalModels, err = fetchLCPModels()
+		if err != nil {
+			logger.Error("failed to fetch llama.cpp models", "error", err)
+		}
+	}()
 	choseChunkParser()
 	httpClient = createClient(time.Second * 15)
 	if cfg.TTS_ENABLED {
diff --git a/llm.go b/llm.go
index 4072197..3a2cf9b 100644
--- a/llm.go
+++ b/llm.go
@@ -157,7 +157,7 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
 
 	logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
 		"msg", msg, "resume", resume, "prompt", prompt, "multimodal_data_count", len(multimodalData))
-	payload := models.NewLCPReq(prompt, multimodalData, defaultLCPProps, chatBody.MakeStopSlice())
+	payload := models.NewLCPReq(prompt, chatBody.Model, multimodalData, defaultLCPProps, chatBody.MakeStopSlice())
 	data, err := json.Marshal(payload)
 	if err != nil {
 		logger.Error("failed to form a msg", "error", err)
diff --git a/main.go b/main.go
index 06575b0..63d0976 100644
--- a/main.go
+++ b/main.go
@@ -9,14 +9,15 @@ import (
 )
 
 var (
-	botRespMode     = false
-	editMode        = false
-	roleEditMode    = false
-	injectRole      = true
-	selectedIndex   = int(-1)
-	currentAPIIndex = 0 // Index to track current API in ApiLinks slice
-	currentORModelIndex = 0 // Index to track current OpenRouter model in ORFreeModels slice
-	shellMode           = false
+	botRespMode            = false
+	editMode               = false
+	roleEditMode           = false
+	injectRole             = true
+	selectedIndex          = int(-1)
+	currentAPIIndex        = 0 // Index to track current API in ApiLinks slice
+	currentORModelIndex    = 0 // Index to track current OpenRouter model in ORFreeModels slice
+	currentLocalModelIndex = 0 // Index to track current llama.cpp model
+	shellMode              = false
 	// indexLine           = "F12 to show keys help | bot resp mode: [orange:-:b]%v[-:-:-] (F6) | card's char: [orange:-:b]%s[-:-:-] (ctrl+s) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [orange:-:b]%v[-:-:-] (ctrl+k) | model: [orange:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [orange:-:b]%v[-:-:-] (F10)\nAPI_URL: [orange:-:b]%s[-:-:-] (ctrl+v) | ThinkUse: [orange:-:b]%v[-:-:-] (ctrl+p) | Log Level: [orange:-:b]%v[-:-:-] (ctrl+p) | Recording: [orange:-:b]%v[-:-:-] (ctrl+r) | Writing as: [orange:-:b]%s[-:-:-] (ctrl+q)"
 	indexLineCompletion = "F12 to show keys help | bot resp mode: [orange:-:b]%v[-:-:-] (F6) | card's char: [orange:-:b]%s[-:-:-] (ctrl+s) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [orange:-:b]%v[-:-:-] (ctrl+k) | model: [orange:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [orange:-:b]%v[-:-:-] (F10)\nAPI_URL: [orange:-:b]%s[-:-:-] (ctrl+v) | Insert <think>: [orange:-:b]%v[-:-:-] (ctrl+p) | Log Level: [orange:-:b]%v[-:-:-] (ctrl+p) | Recording: [orange:-:b]%v[-:-:-] (ctrl+r) | Writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | Bot will write as [orange:-:b]%s[-:-:-] (ctrl+x) | role_inject [orange:-:b]%v[-:-:-]"
 	focusSwitcher       = map[tview.Primitive]tview.Primitive{}
diff --git a/models/models.go b/models/models.go
index baadc8d..912f72b 100644
--- a/models/models.go
+++ b/models/models.go
@@ -89,10 +89,10 @@ type ImageContentPart struct {
 
 // RoleMsg represents a message with content that can be either a simple string or structured content parts
 type RoleMsg struct {
-	Role          string          `json:"role"`
-	Content       string          `json:"-"`
-	ContentParts  []interface{}   `json:"-"`
-	ToolCallID    string          `json:"tool_call_id,omitempty"`  // For tool response messages
+	Role            string        `json:"role"`
+	Content         string        `json:"-"`
+	ContentParts    []interface{} `json:"-"`
+	ToolCallID      string        `json:"tool_call_id,omitempty"` // For tool response messages
 	hasContentParts bool          // Flag to indicate which content type to marshal
 }
 
@@ -215,8 +215,8 @@ func (m RoleMsg) ToPrompt() string {
 // NewRoleMsg creates a simple RoleMsg with string content
 func NewRoleMsg(role, content string) RoleMsg {
 	return RoleMsg{
-		Role:        role,
-		Content:     content,
+		Role:            role,
+		Content:         content,
 		hasContentParts: false,
 	}
 }
@@ -420,34 +420,35 @@ type OpenAIReq struct {
 
 // ===
 
-type LLMModels struct {
-	Object string `json:"object"`
-	Data   []struct {
-		ID      string `json:"id"`
-		Object  string `json:"object"`
-		Created int    `json:"created"`
-		OwnedBy string `json:"owned_by"`
-		Meta    struct {
-			VocabType int   `json:"vocab_type"`
-			NVocab    int   `json:"n_vocab"`
-			NCtxTrain int   `json:"n_ctx_train"`
-			NEmbd     int   `json:"n_embd"`
-			NParams   int64 `json:"n_params"`
-			Size      int64 `json:"size"`
-		} `json:"meta"`
-	} `json:"data"`
-}
+// type LLMModels struct {
+// 	Object string `json:"object"`
+// 	Data   []struct {
+// 		ID      string `json:"id"`
+// 		Object  string `json:"object"`
+// 		Created int    `json:"created"`
+// 		OwnedBy string `json:"owned_by"`
+// 		Meta    struct {
+// 			VocabType int   `json:"vocab_type"`
+// 			NVocab    int   `json:"n_vocab"`
+// 			NCtxTrain int   `json:"n_ctx_train"`
+// 			NEmbd     int   `json:"n_embd"`
+// 			NParams   int64 `json:"n_params"`
+// 			Size      int64 `json:"size"`
+// 		} `json:"meta"`
+// 	} `json:"data"`
+// }
 
 type LlamaCPPReq struct {
-	Stream bool `json:"stream"`
+	Model  string `json:"model"`
+	Stream bool   `json:"stream"`
 	// For multimodal requests, prompt should be an object with prompt_string and multimodal_data
 	// For regular requests, prompt is a string
-	Prompt          interface{} `json:"prompt"`  // Can be string or object with prompt_string and multimodal_data
-	Temperature     float32     `json:"temperature"`
-	DryMultiplier   float32     `json:"dry_multiplier"`
-	Stop            []string    `json:"stop"`
-	MinP            float32     `json:"min_p"`
-	NPredict        int32       `json:"n_predict"`
+	Prompt        interface{} `json:"prompt"` // Can be string or object with prompt_string and multimodal_data
+	Temperature   float32     `json:"temperature"`
+	DryMultiplier float32     `json:"dry_multiplier"`
+	Stop          []string    `json:"stop"`
+	MinP          float32     `json:"min_p"`
+	NPredict      int32       `json:"n_predict"`
 	// MaxTokens        int     `json:"max_tokens"`
 	// DryBase          float64 `json:"dry_base"`
 	// DryAllowedLength int     `json:"dry_allowed_length"`
@@ -471,12 +472,11 @@ type PromptObject struct {
 	PromptString   string   `json:"prompt_string"`
 	MultimodalData []string `json:"multimodal_data,omitempty"`
 	// Alternative field name used by some llama.cpp implementations
-	ImageData      []string `json:"image_data,omitempty"` // For compatibility
+	ImageData []string `json:"image_data,omitempty"` // For compatibility
 }
 
-func NewLCPReq(prompt string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq {
+func NewLCPReq(prompt, model string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq {
 	var finalPrompt interface{}
-
 	if len(multimodalData) > 0 {
 		// When multimodal data is present, use the object format as per Python example:
 		// { "prompt": { "prompt_string": "...", "multimodal_data": [...] } }
@@ -489,8 +489,8 @@ func NewLCPReq(prompt string, multimodalData []string, props map[string]float32,
 		// When no multimodal data, use plain string
 		finalPrompt = prompt
 	}
-
 	return LlamaCPPReq{
+		Model:         model,
 		Stream:        true,
 		Prompt:        finalPrompt,
 		Temperature:   props["temperature"],
@@ -505,3 +505,27 @@ type LlamaCPPResp struct {
 	Content string `json:"content"`
 	Stop    bool   `json:"stop"`
 }
+
+type LCPModels struct {
+	Data []struct {
+		ID      string `json:"id"`
+		Object  string `json:"object"`
+		OwnedBy string `json:"owned_by"`
+		Created int    `json:"created"`
+		InCache bool   `json:"in_cache"`
+		Path    string `json:"path"`
+		Status  struct {
+			Value string   `json:"value"`
+			Args  []string `json:"args"`
+		} `json:"status"`
+	} `json:"data"`
+	Object string `json:"object"`
+}
+
+func (lcp *LCPModels) ListModels() []string {
+	resp := []string{}
+	for _, model := range lcp.Data {
+		resp = append(resp, model.ID)
+	}
+	return resp
+}
diff --git a/tui.go b/tui.go
index b907c5b..383d680 100644
--- a/tui.go
+++ b/tui.go
@@ -961,11 +961,16 @@ func init() {
 				}
 				updateStatusLine()
 			} else {
-				// For non-OpenRouter APIs, use the old logic
-				go func() {
-					fetchLCPModelName() // blocks
-					updateStatusLine()
-				}()
+				if len(LocalModels) > 0 {
+					currentLocalModelIndex = (currentLocalModelIndex + 1) % len(LocalModels)
+					chatBody.Model = LocalModels[currentLocalModelIndex]
+				}
+				updateStatusLine()
+				// // For non-OpenRouter APIs, use the old logic
+				// go func() {
+				// 	fetchLCPModelName() // blocks
+				// 	updateStatusLine()
+				// }()
 			}
 			return nil
 		}
author	Grail Finder <wohilas@gmail.com>	2025-12-12 14:07:10 +0300
committer	Grail Finder <wohilas@gmail.com>	2025-12-12 14:07:10 +0300
commit	9edda1fecf4d0fc4dc9ad92e008a986b74f50c80 (patch)
tree	bcfe133ce83aced1ff71ac37a7471aa25f888b84
parent	2e2e6e9f9cb87ab31fc7dc7e0e196ece884da4bd (diff)