summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2026-03-02 11:25:20 +0300
committerGrail Finder <wohilas@gmail.com>2026-03-02 11:25:20 +0300
commit4f6000a43ad2ee019a799c5b5154af9f853e2b83 (patch)
treec0a74081e043aeb1e479c8540789050be468c4d9
parent9ba46b40cc7fb058ff506ff554ba19e7337448d9 (diff)
Enha: check if model has vision before giving it vision tools
-rw-r--r--bot.go28
-rw-r--r--models/models.go14
-rw-r--r--models/openrouter.go13
-rw-r--r--popups.go2
-rw-r--r--tools.go41
5 files changed, 91 insertions, 7 deletions
diff --git a/bot.go b/bot.go
index 661ade0..98dd27f 100644
--- a/bot.go
+++ b/bot.go
@@ -433,6 +433,33 @@ func isModelLoaded(modelID string) (bool, error) {
return false, nil
}
+func ModelHasVision(api, modelID string) bool {
+ switch {
+ case strings.Contains(api, "deepseek"):
+ return false
+ case strings.Contains(api, "openrouter"):
+ resp, err := http.Get("https://openrouter.ai/api/v1/models")
+ if err != nil {
+ logger.Warn("failed to fetch OR models for vision check", "error", err)
+ return false
+ }
+ defer resp.Body.Close()
+ orm := &models.ORModels{}
+ if err := json.NewDecoder(resp.Body).Decode(orm); err != nil {
+ logger.Warn("failed to decode OR models for vision check", "error", err)
+ return false
+ }
+ return orm.HasVision(modelID)
+ default:
+ models, err := fetchLCPModelsWithStatus()
+ if err != nil {
+ logger.Warn("failed to fetch LCP models for vision check", "error", err)
+ return false
+ }
+ return models.HasVision(modelID)
+ }
+}
+
// monitorModelLoad starts a goroutine that periodically checks if the specified model is loaded.
func monitorModelLoad(modelID string) {
go func() {
@@ -1381,6 +1408,7 @@ func updateModelLists() {
chatBody.Model = m
cachedModelColor = "green"
updateStatusLine()
+ UpdateToolCapabilities()
app.Draw()
return
}
diff --git a/models/models.go b/models/models.go
index 973eb3d..97d0272 100644
--- a/models/models.go
+++ b/models/models.go
@@ -608,6 +608,20 @@ func (lcp *LCPModels) ListModels() []string {
return resp
}
+func (lcp *LCPModels) HasVision(modelID string) bool {
+ for _, m := range lcp.Data {
+ if m.ID == modelID {
+ args := m.Status.Args
+ for i := 0; i < len(args)-1; i++ {
+ if args[i] == "--mmproj" {
+ return true
+ }
+ }
+ }
+ }
+ return false
+}
+
type ResponseStats struct {
Tokens int
Duration float64
diff --git a/models/openrouter.go b/models/openrouter.go
index 62709a1..2dd49cc 100644
--- a/models/openrouter.go
+++ b/models/openrouter.go
@@ -172,3 +172,16 @@ func (orm *ORModels) ListModels(free bool) []string {
}
return resp
}
+
+func (orm *ORModels) HasVision(modelID string) bool {
+ for i := range orm.Data {
+ if orm.Data[i].ID == modelID {
+ for _, mod := range orm.Data[i].Architecture.InputModalities {
+ if mod == "image" {
+ return true
+ }
+ }
+ }
+ }
+ return false
+}
diff --git a/popups.go b/popups.go
index 471a8d9..9998daa 100644
--- a/popups.go
+++ b/popups.go
@@ -143,6 +143,7 @@ func showAPILinkSelectionPopup() {
apiListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
// Update the API in config
cfg.CurrentAPI = mainText
+ UpdateToolCapabilities()
// Update model list based on new API
// Helper function to get model list for a given API (same as in props_table.go)
getModelListForAPI := func(api string) []string {
@@ -162,6 +163,7 @@ func showAPILinkSelectionPopup() {
if len(newModelList) > 0 && !slices.Contains(newModelList, chatBody.Model) {
chatBody.Model = strings.TrimPrefix(newModelList[0], models.LoadedMark)
cfg.CurrentModel = chatBody.Model
+ UpdateToolCapabilities()
}
pages.RemovePage("apiLinkSelectionPopup")
app.SetFocus(textArea)
diff --git a/tools.go b/tools.go
index 494711e..fc76933 100644
--- a/tools.go
+++ b/tools.go
@@ -202,6 +202,7 @@ var (
windowToolsAvailable bool
xdotoolPath string
maimPath string
+ modelHasVision bool
)
func init() {
@@ -233,6 +234,29 @@ func checkWindowTools() {
}
}
+func UpdateToolCapabilities() {
+ if !cfg.ToolUse {
+ return
+ }
+ modelHasVision = false
+ if cfg == nil || cfg.CurrentAPI == "" {
+ logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil")
+ registerWindowTools()
+ return
+ }
+ prevHasVision := modelHasVision
+ modelHasVision = ModelHasVision(cfg.CurrentAPI, cfg.CurrentModel)
+ if modelHasVision {
+ logger.Info("model has vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
+ } else {
+ logger.Info("model does not have vision support", "model", cfg.CurrentModel, "api", cfg.CurrentAPI)
+ if windowToolsAvailable && !prevHasVision && modelHasVision == false {
+ notifyUser("window tools", "Window capture-and-view unavailable: model lacks vision support")
+ }
+ }
+ registerWindowTools()
+}
+
// getWebAgentClient returns a singleton AgentClient for web agents.
func getWebAgentClient() *agent.AgentClient {
webAgentClientOnce.Do(func() {
@@ -1344,9 +1368,8 @@ func registerWindowTools() {
if windowToolsAvailable {
fnMap["list_windows"] = listWindows
fnMap["capture_window"] = captureWindow
- fnMap["capture_window_and_view"] = captureWindowAndView
- baseTools = append(baseTools,
- models.Tool{
+ windowTools := []models.Tool{
+ {
Type: "function",
Function: models.ToolFunc{
Name: "list_windows",
@@ -1358,7 +1381,7 @@ func registerWindowTools() {
},
},
},
- models.Tool{
+ {
Type: "function",
Function: models.ToolFunc{
Name: "capture_window",
@@ -1375,7 +1398,10 @@ func registerWindowTools() {
},
},
},
- models.Tool{
+ }
+ if modelHasVision {
+ fnMap["capture_window_and_view"] = captureWindowAndView
+ windowTools = append(windowTools, models.Tool{
Type: "function",
Function: models.ToolFunc{
Name: "capture_window_and_view",
@@ -1391,8 +1417,9 @@ func registerWindowTools() {
},
},
},
- },
- )
+ })
+ }
+ baseTools = append(baseTools, windowTools...)
toolSysMsg += windowToolSysMsg
}
}