From 7d18a9d77eca3b286849ae0a134f9d7277249bef Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Tue, 17 Feb 2026 16:19:33 +0300 Subject: Feat: indicator for a message with an image [image: filename] --- models/models.go | 88 +++++++++++++++++++++++--- models/models_test.go | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 247 insertions(+), 8 deletions(-) create mode 100644 models/models_test.go (limited to 'models') diff --git a/models/models.go b/models/models.go index 9f41803..13c024e 100644 --- a/models/models.go +++ b/models/models.go @@ -5,9 +5,22 @@ import ( "encoding/json" "fmt" "os" + "path/filepath" "strings" ) +var ( + // imageBaseDir is the base directory for displaying image paths. + // If set, image paths will be shown relative to this directory. + imageBaseDir = "" +) + +// SetImageBaseDir sets the base directory for displaying image paths. +// If dir is empty, full paths will be shown. +func SetImageBaseDir(dir string) { + imageBaseDir = dir +} + type FuncCall struct { ID string `json:"id,omitempty"` Name string `json:"name"` @@ -82,6 +95,7 @@ type TextContentPart struct { type ImageContentPart struct { Type string `json:"type"` + Path string `json:"path,omitempty"` // Store original file path ImageURL struct { URL string `json:"url"` } `json:"image_url"` @@ -169,10 +183,11 @@ func (m *RoleMsg) UnmarshalJSON(data []byte) error { func (m *RoleMsg) ToText(i int) string { // Convert content to string representation var contentStr string + var imageIndicators []string if !m.hasContentParts { contentStr = m.Content } else { - // For structured content, just take the text parts + // For structured content, collect text parts and image indicators var textParts []string for _, part := range m.ContentParts { switch p := part.(type) { @@ -181,13 +196,34 @@ func (m *RoleMsg) ToText(i int) string { textParts = append(textParts, p.Text) } case ImageContentPart: - // skip images for text display + // Collect image indicator + displayPath := p.Path + if displayPath == "" { + displayPath = "image" + } else { + displayPath = extractDisplayPath(displayPath) + } + imageIndicators = append(imageIndicators, fmt.Sprintf("[orange::i][image: %s][-:-:-]", displayPath)) case map[string]any: - if partType, exists := p["type"]; exists && partType == "text" { - if textVal, textExists := p["text"]; textExists { - if textStr, isStr := textVal.(string); isStr { - textParts = append(textParts, textStr) + if partType, exists := p["type"]; exists { + if partType == "text" { + if textVal, textExists := p["text"]; textExists { + if textStr, isStr := textVal.(string); isStr { + textParts = append(textParts, textStr) + } + } + } else if partType == "image_url" { + // Handle unmarshaled image content + var displayPath string + if pathVal, pathExists := p["path"]; pathExists { + if pathStr, isStr := pathVal.(string); isStr && pathStr != "" { + displayPath = extractDisplayPath(pathStr) + } } + if displayPath == "" { + displayPath = "image" + } + imageIndicators = append(imageIndicators, fmt.Sprintf("[orange::i][image: %s][-:-:-]", displayPath)) } } } @@ -201,7 +237,17 @@ func (m *RoleMsg) ToText(i int) string { // if !strings.HasPrefix(contentStr, m.Role+":") { icon := fmt.Sprintf("(%d) <%s>: ", i, m.Role) // } - textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, contentStr) + // Build final message with image indicators before text + var finalContent strings.Builder + if len(imageIndicators) > 0 { + // Add each image indicator on its own line + for _, indicator := range imageIndicators { + finalContent.WriteString(indicator) + finalContent.WriteString("\n") + } + } + finalContent.WriteString(contentStr) + textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, finalContent.String()) return strings.ReplaceAll(textMsg, "\n\n", "\n") } @@ -303,7 +349,7 @@ func (m *RoleMsg) AddTextPart(text string) { } // AddImagePart adds an image content part to the message -func (m *RoleMsg) AddImagePart(imageURL string) { +func (m *RoleMsg) AddImagePart(imageURL, imagePath string) { if !m.hasContentParts { // Convert to content parts format if m.Content != "" { @@ -316,6 +362,7 @@ func (m *RoleMsg) AddImagePart(imageURL string) { imagePart := ImageContentPart{ Type: "image_url", + Path: imagePath, // Store the original file path ImageURL: struct { URL string `json:"url"` }{URL: imageURL}, @@ -355,6 +402,31 @@ func CreateImageURLFromPath(imagePath string) (string, error) { return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil } +// extractDisplayPath returns a path suitable for display, potentially relative to imageBaseDir +func extractDisplayPath(p string) string { + if p == "" { + return "" + } + + // If base directory is set, try to make path relative to it + if imageBaseDir != "" { + if rel, err := filepath.Rel(imageBaseDir, p); err == nil { + // Check if relative path doesn't start with ".." (meaning it's within base dir) + // If it starts with "..", we might still want to show it as relative + // but for now we show full path if it goes outside base dir + if !strings.HasPrefix(rel, "..") { + p = rel + } + } + } + + // Truncate long paths to last 60 characters if needed + if len(p) > 60 { + return "..." + p[len(p)-60:] + } + return p +} + type ChatBody struct { Model string `json:"model"` Stream bool `json:"stream"` diff --git a/models/models_test.go b/models/models_test.go new file mode 100644 index 0000000..5f0a4f4 --- /dev/null +++ b/models/models_test.go @@ -0,0 +1,167 @@ +package models + +import ( + "strings" + "testing" +) + +func TestRoleMsgToTextWithImages(t *testing.T) { + tests := []struct { + name string + msg RoleMsg + index int + expected string // substring to check + }{ + { + name: "text and image", + index: 0, + msg: func() RoleMsg { + msg := NewMultimodalMsg("user", []interface{}{}) + msg.AddTextPart("Look at this picture") + msg.AddImagePart("data:image/jpeg;base64,abc123", "/home/user/Pictures/cat.jpg") + return msg + }(), + expected: "[orange::i][image: /home/user/Pictures/cat.jpg][-:-:-]", + }, + { + name: "image only", + index: 1, + msg: func() RoleMsg { + msg := NewMultimodalMsg("user", []interface{}{}) + msg.AddImagePart("data:image/png;base64,xyz789", "/tmp/screenshot_20250217_123456.png") + return msg + }(), + expected: "[orange::i][image: /tmp/screenshot_20250217_123456.png][-:-:-]", + }, + { + name: "long filename truncated", + index: 2, + msg: func() RoleMsg { + msg := NewMultimodalMsg("user", []interface{}{}) + msg.AddTextPart("Check this") + msg.AddImagePart("data:image/jpeg;base64,foo", "/very/long/path/to/a/really_long_filename_that_exceeds_forty_characters.jpg") + return msg + }(), + expected: "[orange::i][image: .../to/a/really_long_filename_that_exceeds_forty_characters.jpg][-:-:-]", + }, + { + name: "multiple images", + index: 3, + msg: func() RoleMsg { + msg := NewMultimodalMsg("user", []interface{}{}) + msg.AddTextPart("Multiple images") + msg.AddImagePart("data:image/jpeg;base64,a", "/path/img1.jpg") + msg.AddImagePart("data:image/png;base64,b", "/path/img2.png") + return msg + }(), + expected: "[orange::i][image: /path/img1.jpg][-:-:-]\n[orange::i][image: /path/img2.png][-:-:-]", + }, + { + name: "old format without path", + index: 4, + msg: RoleMsg{ + Role: "user", + hasContentParts: true, + ContentParts: []interface{}{ + map[string]interface{}{ + "type": "image_url", + "image_url": map[string]interface{}{ + "url": "data:image/jpeg;base64,old", + }, + }, + }, + }, + expected: "[orange::i][image: image][-:-:-]", + }, + { + name: "old format with path", + index: 5, + msg: RoleMsg{ + Role: "user", + hasContentParts: true, + ContentParts: []interface{}{ + map[string]interface{}{ + "type": "image_url", + "path": "/old/path/photo.jpg", + "image_url": map[string]interface{}{ + "url": "data:image/jpeg;base64,old", + }, + }, + }, + }, + expected: "[orange::i][image: /old/path/photo.jpg][-:-:-]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.msg.ToText(tt.index) + if !strings.Contains(result, tt.expected) { + t.Errorf("ToText() result does not contain expected indicator\ngot: %s\nwant substring: %s", result, tt.expected) + } + // Ensure the indicator appears before text content + if strings.Contains(tt.expected, "cat.jpg") && strings.Contains(result, "Look at this picture") { + indicatorPos := strings.Index(result, "[orange::i][image: /home/user/Pictures/cat.jpg][-:-:-]") + textPos := strings.Index(result, "Look at this picture") + if indicatorPos == -1 || textPos == -1 || indicatorPos >= textPos { + t.Errorf("image indicator should appear before text") + } + } + }) + } +} + +func TestExtractDisplayPath(t *testing.T) { + // Save original base dir + originalBaseDir := imageBaseDir + defer func() { imageBaseDir = originalBaseDir }() + + tests := []struct { + name string + baseDir string + path string + expected string + }{ + { + name: "no base dir shows full path", + baseDir: "", + path: "/home/user/images/cat.jpg", + expected: "/home/user/images/cat.jpg", + }, + { + name: "relative path within base dir", + baseDir: "/home/user", + path: "/home/user/images/cat.jpg", + expected: "images/cat.jpg", + }, + { + name: "path outside base dir shows full path", + baseDir: "/home/user", + path: "/tmp/test.jpg", + expected: "/tmp/test.jpg", + }, + { + name: "same directory", + baseDir: "/home/user/images", + path: "/home/user/images/cat.jpg", + expected: "cat.jpg", + }, + { + name: "long path truncated", + baseDir: "", + path: "/very/long/path/to/a/really_long_filename_that_exceeds_sixty_characters_limit_yes_it_is_very_long.jpg", + expected: "..._that_exceeds_sixty_characters_limit_yes_it_is_very_long.jpg", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + imageBaseDir = tt.baseDir + result := extractDisplayPath(tt.path) + if result != tt.expected { + t.Errorf("extractDisplayPath(%q) with baseDir=%q = %q, want %q", + tt.path, tt.baseDir, result, tt.expected) + } + }) + } +} -- cgit v1.2.3