diff options
| author | Grail Finder <wohilas@gmail.com> | 2026-03-02 07:12:28 +0300 |
|---|---|---|
| committer | Grail Finder <wohilas@gmail.com> | 2026-03-02 07:12:28 +0300 |
| commit | caac1d397ad8e21c22219708c070e5e6608b7859 (patch) | |
| tree | 503e677925292e8d4b763de8a14c5c6b90db3bdf | |
| parent | 742f1ca838f97cf7deaae624d93f307632863460 (diff) | |
Feat: read img tool for chat endpoint
| -rw-r--r-- | bot.go | 57 | ||||
| -rw-r--r-- | models/consts.go | 3 | ||||
| -rw-r--r-- | models/models.go | 8 | ||||
| -rw-r--r-- | tools.go | 56 |
4 files changed, 113 insertions, 11 deletions
@@ -1174,17 +1174,60 @@ func findCall(msg, toolCall string) bool { toolRunningMode = false toolMsg := string(resp) logger.Info("llm used a tool call", "tool_name", fc.Name, "too_args", fc.Args, "id", fc.ID, "tool_resp", toolMsg) - fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n", - "\n\n", len(chatBody.Messages), cfg.ToolRole, toolMsg) // Create tool response message with the proper tool_call_id // Mark shell commands as always visible isShellCommand := fc.Name == "execute_command" - toolResponseMsg := models.RoleMsg{ - Role: cfg.ToolRole, - Content: toolMsg, - ToolCallID: lastToolCall.ID, - IsShellCommand: isShellCommand, + + // Check if response is multimodal content (image) + var toolResponseMsg models.RoleMsg + if strings.HasPrefix(strings.TrimSpace(toolMsg), `{"type":"multimodal_content"`) { + // Parse multimodal content response + multimodalResp := models.MultimodalToolResp{} + if err := json.Unmarshal([]byte(toolMsg), &multimodalResp); err == nil && multimodalResp.Type == "multimodal_content" { + // Create RoleMsg with ContentParts + var contentParts []any + for _, part := range multimodalResp.Parts { + partType, ok := part["type"] + if !ok { + continue + } + if partType == "text" { + contentParts = append(contentParts, models.TextContentPart{Type: "text", Text: part["text"]}) + } else if partType == "image_url" { + contentParts = append(contentParts, models.ImageContentPart{ + Type: "image_url", + ImageURL: struct { + URL string `json:"url"` + }{URL: part["url"]}, + }) + } + } + toolResponseMsg = models.RoleMsg{ + Role: cfg.ToolRole, + ContentParts: contentParts, + HasContentParts: true, + ToolCallID: lastToolCall.ID, + IsShellCommand: isShellCommand, + } + } else { + // Fallback to regular content + toolResponseMsg = models.RoleMsg{ + Role: cfg.ToolRole, + Content: toolMsg, + ToolCallID: lastToolCall.ID, + IsShellCommand: isShellCommand, + } + } + } else { + toolResponseMsg = models.RoleMsg{ + Role: cfg.ToolRole, + Content: toolMsg, + ToolCallID: lastToolCall.ID, + IsShellCommand: isShellCommand, + } } + fmt.Fprintf(textView, "%s[-:-:b](%d) <%s>: [-:-:-]\n%s\n", + "\n\n", len(chatBody.Messages), cfg.ToolRole, toolResponseMsg.GetText()) chatBody.Messages = append(chatBody.Messages, toolResponseMsg) logger.Debug("findCall: added actual tool response", "role", toolResponseMsg.Role, "content_len", len(toolResponseMsg.Content), "tool_call_id", toolResponseMsg.ToolCallID, "message_count_after_add", len(chatBody.Messages)) // Clear the stored tool call ID after using it diff --git a/models/consts.go b/models/consts.go index 4f61435..8b4002b 100644 --- a/models/consts.go +++ b/models/consts.go @@ -1,7 +1,8 @@ package models const ( - LoadedMark = "(loaded) " + LoadedMark = "(loaded) " + ToolRespMultyType = "multimodel_content" ) type APIType int diff --git a/models/models.go b/models/models.go index a35f16c..973eb3d 100644 --- a/models/models.go +++ b/models/models.go @@ -391,7 +391,6 @@ func CreateImageURLFromPath(imagePath string) (string, error) { if err != nil { return "", err } - // Determine the image format based on file extension var mimeType string switch { @@ -408,10 +407,8 @@ func CreateImageURLFromPath(imagePath string) (string, error) { default: mimeType = "image/jpeg" // default } - // Encode to base64 encoded := base64.StdEncoding.EncodeToString(data) - // Create data URL return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil } @@ -623,3 +620,8 @@ type ChatRoundReq struct { Regen bool Resume bool } + +type MultimodalToolResp struct { + Type string `json:"type"` + Parts []map[string]string `json:"parts"` +} @@ -469,6 +469,43 @@ func fileRead(args map[string]string) []byte { return jsonResult } +func fileReadImage(args map[string]string) []byte { + path, ok := args["path"] + if !ok || path == "" { + msg := "path not provided to file_read_image tool" + logger.Error(msg) + return []byte(msg) + } + path = resolvePath(path) + dataURL, err := models.CreateImageURLFromPath(path) + if err != nil { + msg := "failed to read image; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + // result := map[string]any{ + // "type": "multimodal_content", + // "parts": []map[string]string{ + // {"type": "text", "text": "Image at " + path}, + // {"type": "image_url", "url": dataURL}, + // }, + // } + result := models.MultimodalToolResp{ + Type: "multimodal_content", + Parts: []map[string]string{ + {"type": "text", "text": "Image at " + path}, + {"type": "image_url", "url": dataURL}, + }, + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult +} + func fileWrite(args map[string]string) []byte { path, ok := args["path"] if !ok || path == "" { @@ -1101,6 +1138,7 @@ var fnMap = map[string]fnSig{ "read_url_raw": readURLRaw, "file_create": fileCreate, "file_read": fileRead, + "file_read_image": fileReadImage, "file_write": fileWrite, "file_write_append": fileWriteAppend, "file_edit": fileEdit, @@ -1327,6 +1365,24 @@ var baseTools = []models.Tool{ }, }, }, + // file_read_image + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "file_read_image", + Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"path"}, + Properties: map[string]models.ToolArgProps{ + "path": models.ToolArgProps{ + Type: "string", + Description: "path of the image file to read", + }, + }, + }, + }, + }, // file_write models.Tool{ Type: "function", |
