Feat: read img tool for chat endpoint

author: Grail Finder <wohilas@gmail.com> 2026-03-02 07:12:28 +0300
committer: Grail Finder <wohilas@gmail.com> 2026-03-02 07:12:28 +0300
commit: caac1d397ad8e21c22219708c070e5e6608b7859 (patch)
tree: 503e677925292e8d4b763de8a14c5c6b90db3bdf /bot.go
parent: 742f1ca838f97cf7deaae624d93f307632863460 (diff)
1 files changed, 50 insertions, 7 deletions
diff --git a/bot.go b/bot.go
index bf3a239..b3ae41e 100644
--- a/bot.go
+++ b/bot.go
@@ -1174,17 +1174,60 @@ func findCall(msg, toolCall string) bool {
 	toolRunningMode = false
 	toolMsg := string(resp)
 	logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg)
-	fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
-		"\n\n", len(chatBody.Messages), cfg.ToolRole, toolMsg)
 	// Create tool response message with the proper tool_call_id
 	// Mark shell commands as always visible
 	isShellCommand := fc.Name == "execute_command"
-	toolResponseMsg := models.RoleMsg{
-		Role:           cfg.ToolRole,
-		Content:        toolMsg,
-		ToolCallID:     lastToolCall.ID,
-		IsShellCommand: isShellCommand,
+
+	// Check if response is multimodal content (image)
+	var toolResponseMsg models.RoleMsg
+	if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) {
+		// Parse multimodal content response
+		multimodalResp := models.MultimodalToolResp{}
+		if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" {
+			// Create RoleMsg with ContentParts
+			var contentParts []any
+			for _, part := range multimodalResp.Parts {
+				partType, ok := part["type"]
+				if !ok {
+					continue
+				}
+				if partType == "text" {
+					contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]})
+				} else if partType == "image_url" {
+					contentParts = append(contentParts, models.ImageContentPart{
+						Type: "image_url",
+						ImageURL: struct {
+							URL string `json:"url"`
+						}{URL: part["url"]},
+					})
+				}
+			}
+			toolResponseMsg = models.RoleMsg{
+				Role:            cfg.ToolRole,
+				ContentParts:    contentParts,
+				HasContentParts: true,
+				ToolCallID:      lastToolCall.ID,
+				IsShellCommand:  isShellCommand,
+			}
+		} else {
+			// Fallback to regular content
+			toolResponseMsg = models.RoleMsg{
+				Role:           cfg.ToolRole,
+				Content:        toolMsg,
+				ToolCallID:     lastToolCall.ID,
+				IsShellCommand: isShellCommand,
+			}
+		}
+	} else {
+		toolResponseMsg = models.RoleMsg{
+			Role:           cfg.ToolRole,
+			Content:        toolMsg,
+			ToolCallID:     lastToolCall.ID,
+			IsShellCommand: isShellCommand,
+		}
 	}
+	fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n",
+		"\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText())
 	chatBody.Messages = append(chatBody.Messages, toolResponseMsg)
 	logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages))
 	// Clear the stored tool call ID after using it
author	Grail Finder <wohilas@gmail.com>	2026-03-02 07:12:28 +0300
committer	Grail Finder <wohilas@gmail.com>	2026-03-02 07:12:28 +0300
commit	caac1d397ad8e21c22219708c070e5e6608b7859 (patch)
tree	503e677925292e8d4b763de8a14c5c6b90db3bdf /bot.go
parent	742f1ca838f97cf7deaae624d93f307632863460 (diff)