diff options
| author | Grail Finder <wohilas@gmail.com> | 2026-03-02 11:25:20 +0300 |
|---|---|---|
| committer | Grail Finder <wohilas@gmail.com> | 2026-03-02 11:25:20 +0300 |
| commit | 4f6000a43ad2ee019a799c5b5154af9f853e2b83 (patch) | |
| tree | c0a74081e043aeb1e479c8540789050be468c4d9 | |
| parent | 9ba46b40cc7fb058ff506ff554ba19e7337448d9 (diff) | |
Enha: check if model has vision before giving it vision tools
| -rw-r--r-- | bot.go | 28 | ||||
| -rw-r--r-- | models/models.go | 14 | ||||
| -rw-r--r-- | models/openrouter.go | 13 | ||||
| -rw-r--r-- | popups.go | 2 | ||||
| -rw-r--r-- | tools.go | 41 |
5 files changed, 91 insertions, 7 deletions
@@ -433,6 +433,33 @@ func isModelLoaded(modelID string) (bool, error) { return false, nil } +func ModelHasVision(api, modelID string) bool { + switch { + case strings.Contains(api, "deepseek"): + return false + case strings.Contains(api, "openrouter"): + resp, err := http.Get("https://openrouter.ai/api/v1/models") + if err != nil { + logger.Warn("failed to fetch OR models for vision check", "error", err) + return false + } + defer resp.Body.Close() + orm := &models.ORModels{} + if err := json.NewDecoder(resp.Body).Decode(orm); err != nil { + logger.Warn("failed to decode OR models for vision check", "error", err) + return false + } + return orm.HasVision(modelID) + default: + models, err := fetchLCPModelsWithStatus() + if err != nil { + logger.Warn("failed to fetch LCP models for vision check", "error", err) + return false + } + return models.HasVision(modelID) + } +} + // monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded. func monitorModelLoad(modelID string) { go func() { @@ -1381,6 +1408,7 @@ func updateModelLists() { chatBody.Model = m cachedModelColor = "green" updateStatusLine() + UpdateToolCapabilities() app.Draw() return } diff --git a/models/models.go b/models/models.go index 973eb3d..97d0272 100644 --- a/models/models.go +++ b/models/models.go @@ -608,6 +608,20 @@ func (lcp *LCPModels) ListModels() []string { return resp } +func (lcp *LCPModels) HasVision(modelID string) bool { + for _, m := range lcp.Data { + if m.ID == modelID { + args := m.Status.Args + for i := 0; i < len(args)-1; i++ { + if args[i] == "--mmproj" { + return true + } + } + } + } + return false +} + type ResponseStats struct { Tokens int Duration float64 diff --git a/models/openrouter.go b/models/openrouter.go index 62709a1..2dd49cc 100644 --- a/models/openrouter.go +++ b/models/openrouter.go @@ -172,3 +172,16 @@ func (orm *ORModels) ListModels(free bool) []string { } return resp } + +func (orm *ORModels) HasVision(modelID string) bool { + for i := range orm.Data { + if orm.Data[i].ID == modelID { + for _, mod := range orm.Data[i].Architecture.InputModalities { + if mod == "image" { + return true + } + } + } + } + return false +} @@ -143,6 +143,7 @@ func showAPILinkSelectionPopup() { apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) { // Update the API in config cfg.CurrentAPI = mainText + UpdateToolCapabilities() // Update model list based on new API // Helper function to get model list for a given API (same as in props_table.go) getModelListForAPI := func(api string) []string { @@ -162,6 +163,7 @@ func showAPILinkSelectionPopup() { if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) { chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark) cfg.CurrentModel = chatBody.Model + UpdateToolCapabilities() } pages.RemovePage("apiLinkSelectionPopup") app.SetFocus(textArea) @@ -202,6 +202,7 @@ var ( windowToolsAvailable bool xdotoolPath string maimPath string + modelHasVision bool ) func init() { @@ -233,6 +234,29 @@ func checkWindowTools() { } } +func UpdateToolCapabilities() { + if !cfg.ToolUse { + return + } + modelHasVision = false + if cfg == nil || cfg.CurrentAPI == "" { + logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil") + registerWindowTools() + return + } + prevHasVision := modelHasVision + modelHasVision = ModelHasVision(cfg.CurrentAPI, cfg.CurrentModel) + if modelHasVision { + logger.Info("model has vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI) + } else { + logger.Info("model does not have vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI) + if windowToolsAvailable && !prevHasVision && modelHasVision == false { + notifyUser("window tools", "Window capture-and-view unavailable: model lacks vision support") + } + } + registerWindowTools() +} + // getWebAgentClient returns a singleton AgentClient for web agents. func getWebAgentClient() *agent.AgentClient { webAgentClientOnce.Do(func() { @@ -1344,9 +1368,8 @@ func registerWindowTools() { if windowToolsAvailable { fnMap["list_windows"] = listWindows fnMap["capture_window"] = captureWindow - fnMap["capture_window_and_view"] = captureWindowAndView - baseTools = append(baseTools, - models.Tool{ + windowTools := []models.Tool{ + { Type: "function", Function: models.ToolFunc{ Name: "list_windows", @@ -1358,7 +1381,7 @@ func registerWindowTools() { }, }, }, - models.Tool{ + { Type: "function", Function: models.ToolFunc{ Name: "capture_window", @@ -1375,7 +1398,10 @@ func registerWindowTools() { }, }, }, - models.Tool{ + } + if modelHasVision { + fnMap["capture_window_and_view"] = captureWindowAndView + windowTools = append(windowTools, models.Tool{ Type: "function", Function: models.ToolFunc{ Name: "capture_window_and_view", @@ -1391,8 +1417,9 @@ func registerWindowTools() { }, }, }, - }, - ) + }) + } + baseTools = append(baseTools, windowTools...) toolSysMsg += windowToolSysMsg } } |
