summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2025-11-20 19:13:04 +0300
committerGrail Finder <wohilas@gmail.com>2025-11-20 19:13:04 +0300
commitc21074a8129007509d45c460a8df56917f721cf0 (patch)
treecc4342132371e7ad5d28e754ee5e3363aa7eb1ce
parentb4f9c5e26a21b6cd938c7a2eb3e8593ab9cfaafe (diff)
Feat: input img
-rw-r--r--llm.go54
-rw-r--r--models/models.go205
-rw-r--r--tables.go70
-rw-r--r--tui.go55
4 files changed, 366 insertions, 18 deletions
diff --git a/llm.go b/llm.go
index fb93615..f5cc4d5 100644
--- a/llm.go
+++ b/llm.go
@@ -8,6 +8,32 @@ import (
"strings"
)
+var imageAttachmentPath string // Global variable to track image attachment for next message
+
+// SetImageAttachment sets an image to be attached to the next message sent to the LLM and updates UI
+func SetImageAttachment(imagePath string) {
+ imageAttachmentPath = imagePath
+ // Update the UI to show image is attached (call function from tui.go)
+ // UpdateImageAttachmentStatus(imagePath)
+}
+
+// SetImageAttachmentWithoutUI sets an image to be attached without UI updates (for internal use where UI updates might cause hangs)
+func SetImageAttachmentWithoutUI(imagePath string) {
+ imageAttachmentPath = imagePath
+}
+
+// ClearImageAttachment clears any pending image attachment and updates UI
+func ClearImageAttachment() {
+ imageAttachmentPath = ""
+ // Update the UI to clear image attachment status (call function from tui.go)
+ // UpdateImageAttachmentStatus("")
+}
+
+// ClearImageAttachmentWithoutUI clears any pending image attachment without UI updates
+func ClearImageAttachmentWithoutUI() {
+ imageAttachmentPath = ""
+}
+
type ChunkParser interface {
ParseChunk([]byte) (*models.TextChunk, error)
FormMsg(msg, role string, cont bool) (io.Reader, error)
@@ -165,7 +191,33 @@ func (op OpenAIer) ParseChunk(data []byte) (*models.TextChunk, error) {
func (op OpenAIer) FormMsg(msg, role string, resume bool) (io.Reader, error) {
logger.Debug("formmsg openaier", "link", cfg.CurrentAPI)
if msg != "" { // otherwise let the bot continue
- newMsg := models.RoleMsg{Role: role, Content: msg}
+ // Create the message with support for multimodal content
+ var newMsg models.RoleMsg
+
+ // Check if we have an image to add to this message
+ if imageAttachmentPath != "" {
+ // Create a multimodal message with both text and image
+ newMsg = models.NewMultimodalMsg(role, []interface{}{})
+
+ // Add the text content
+ newMsg.AddTextPart(msg)
+
+ // Add the image content
+ imageURL, err := models.CreateImageURLFromPath(imageAttachmentPath)
+ if err != nil {
+ logger.Error("failed to create image URL from path", "error", err, "path", imageAttachmentPath)
+ // If image processing fails, fall back to simple text message
+ newMsg = models.NewRoleMsg(role, msg)
+ imageAttachmentPath = "" // Clear the attachment
+ } else {
+ newMsg.AddImagePart(imageURL)
+ imageAttachmentPath = "" // Clear the attachment after use
+ }
+ } else {
+ // Create a simple text message
+ newMsg = models.NewRoleMsg(role, msg)
+ }
+
chatBody.Messages = append(chatBody.Messages, newMsg)
}
req := models.OpenAIReq{
diff --git a/models/models.go b/models/models.go
index 0a10da1..ea9bf2e 100644
--- a/models/models.go
+++ b/models/models.go
@@ -1,7 +1,10 @@
package models
import (
+ "encoding/base64"
+ "encoding/json"
"fmt"
+ "os"
"strings"
)
@@ -69,23 +72,215 @@ type TextChunk struct {
FuncName string
}
+type TextContentPart struct {
+ Type string `json:"type"`
+ Text string `json:"text"`
+}
+
+type ImageContentPart struct {
+ Type string `json:"type"`
+ ImageURL struct {
+ URL string `json:"url"`
+ } `json:"image_url"`
+}
+
+// RoleMsg represents a message with content that can be either a simple string or structured content parts
type RoleMsg struct {
- Role string `json:"role"`
- Content string `json:"content"`
+ Role string `json:"role"`
+ Content string `json:"-"`
+ ContentParts []interface{} `json:"-"`
+ hasContentParts bool // Flag to indicate which content type to marshal
+}
+
+// MarshalJSON implements custom JSON marshaling for RoleMsg
+func (m RoleMsg) MarshalJSON() ([]byte, error) {
+ if m.hasContentParts {
+ // Use structured content format
+ aux := struct {
+ Role string `json:"role"`
+ Content []interface{} `json:"content"`
+ }{
+ Role: m.Role,
+ Content: m.ContentParts,
+ }
+ return json.Marshal(aux)
+ } else {
+ // Use simple content format
+ aux := struct {
+ Role string `json:"role"`
+ Content string `json:"content"`
+ }{
+ Role: m.Role,
+ Content: m.Content,
+ }
+ return json.Marshal(aux)
+ }
+}
+
+// UnmarshalJSON implements custom JSON unmarshaling for RoleMsg
+func (m *RoleMsg) UnmarshalJSON(data []byte) error {
+ // First, try to unmarshal as structured content format
+ var structured struct {
+ Role string `json:"role"`
+ Content []interface{} `json:"content"`
+ }
+ if err := json.Unmarshal(data, &structured); err == nil && len(structured.Content) > 0 {
+ m.Role = structured.Role
+ m.ContentParts = structured.Content
+ m.hasContentParts = true
+ return nil
+ }
+
+ // Otherwise, unmarshal as simple content format
+ var simple struct {
+ Role string `json:"role"`
+ Content string `json:"content"`
+ }
+ if err := json.Unmarshal(data, &simple); err != nil {
+ return err
+ }
+ m.Role = simple.Role
+ m.Content = simple.Content
+ m.hasContentParts = false
+ return nil
}
func (m RoleMsg) ToText(i int) string {
icon := fmt.Sprintf("(%d)", i)
+
+ // Convert content to string representation
+ contentStr := ""
+ if !m.hasContentParts {
+ contentStr = m.Content
+ } else {
+ // For structured content, just take the text parts
+ for _, part := range m.ContentParts {
+ if partMap, ok := part.(map[string]interface{}); ok {
+ if partType, exists := partMap["type"]; exists && partType == "text" {
+ if textVal, textExists := partMap["text"]; textExists {
+ if textStr, isStr := textVal.(string); isStr {
+ contentStr += textStr + " "
+ }
+ }
+ }
+ }
+ }
+ }
+
// check if already has role annotation (/completion makes them)
- if !strings.HasPrefix(m.Content, m.Role+":") {
+ if !strings.HasPrefix(contentStr, m.Role+":") {
icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
}
- textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content)
+ textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, contentStr)
return strings.ReplaceAll(textMsg, "\n\n", "\n")
}
func (m RoleMsg) ToPrompt() string {
- return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n")
+ contentStr := ""
+ if !m.hasContentParts {
+ contentStr = m.Content
+ } else {
+ // For structured content, just take the text parts
+ for _, part := range m.ContentParts {
+ if partMap, ok := part.(map[string]interface{}); ok {
+ if partType, exists := partMap["type"]; exists && partType == "text" {
+ if textVal, textExists := partMap["text"]; textExists {
+ if textStr, isStr := textVal.(string); isStr {
+ contentStr += textStr + " "
+ }
+ }
+ }
+ }
+ }
+ }
+ return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, contentStr), "\n\n", "\n")
+}
+
+// NewRoleMsg creates a simple RoleMsg with string content
+func NewRoleMsg(role, content string) RoleMsg {
+ return RoleMsg{
+ Role: role,
+ Content: content,
+ hasContentParts: false,
+ }
+}
+
+// NewMultimodalMsg creates a RoleMsg with structured content parts (text and images)
+func NewMultimodalMsg(role string, contentParts []interface{}) RoleMsg {
+ return RoleMsg{
+ Role: role,
+ ContentParts: contentParts,
+ hasContentParts: true,
+ }
+}
+
+// AddTextPart adds a text content part to the message
+func (m *RoleMsg) AddTextPart(text string) {
+ if !m.hasContentParts {
+ // Convert to content parts format
+ if m.Content != "" {
+ m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}}
+ } else {
+ m.ContentParts = []interface{}{}
+ }
+ m.hasContentParts = true
+ }
+
+ textPart := TextContentPart{Type: "text", Text: text}
+ m.ContentParts = append(m.ContentParts, textPart)
+}
+
+// AddImagePart adds an image content part to the message
+func (m *RoleMsg) AddImagePart(imageURL string) {
+ if !m.hasContentParts {
+ // Convert to content parts format
+ if m.Content != "" {
+ m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}}
+ } else {
+ m.ContentParts = []interface{}{}
+ }
+ m.hasContentParts = true
+ }
+
+ imagePart := ImageContentPart{
+ Type: "image_url",
+ ImageURL: struct {
+ URL string `json:"url"`
+ }{URL: imageURL},
+ }
+ m.ContentParts = append(m.ContentParts, imagePart)
+}
+
+// CreateImageURLFromPath creates a data URL from an image file path
+func CreateImageURLFromPath(imagePath string) (string, error) {
+ // Read the image file
+ data, err := os.ReadFile(imagePath)
+ if err != nil {
+ return "", err
+ }
+
+ // Determine the image format based on file extension
+ var mimeType string
+ switch {
+ case strings.HasSuffix(strings.ToLower(imagePath), ".png"):
+ mimeType = "image/png"
+ case strings.HasSuffix(strings.ToLower(imagePath), ".jpg"):
+ fallthrough
+ case strings.HasSuffix(strings.ToLower(imagePath), ".jpeg"):
+ mimeType = "image/jpeg"
+ case strings.HasSuffix(strings.ToLower(imagePath), ".gif"):
+ mimeType = "image/gif"
+ case strings.HasSuffix(strings.ToLower(imagePath), ".webp"):
+ mimeType = "image/webp"
+ default:
+ mimeType = "image/jpeg" // default
+ }
+
+ // Encode to base64
+ encoded := base64.StdEncoding.EncodeToString(data)
+
+ // Create data URL
+ return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil
}
type ChatBody struct {
diff --git a/tables.go b/tables.go
index 8302e91..c359102 100644
--- a/tables.go
+++ b/tables.go
@@ -563,6 +563,18 @@ func makeFilePicker() *tview.Flex {
// Track currently displayed directory (changes as user navigates)
var currentDisplayDir string = startDir
+ // Helper function to check if a file is an image
+ isImageFile := func(filename string) bool {
+ imageExtensions := []string{".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg"}
+ lowerFilename := strings.ToLower(filename)
+ for _, ext := range imageExtensions {
+ if strings.HasSuffix(lowerFilename, ext) {
+ return true
+ }
+ }
+ return false
+ }
+
// Create UI elements
listView := tview.NewList()
listView.SetBorder(true).SetTitle("Files & Directories").SetTitleAlign(tview.AlignLeft)
@@ -584,11 +596,23 @@ func makeFilePicker() *tview.Flex {
loadButton := tview.NewButton("Load")
loadButton.SetSelectedFunc(func() {
if selectedFile != "" {
- // Update the global text area with the selected file path
- textArea.SetText(selectedFile, true)
- app.SetFocus(textArea)
+ // Check if the selected file is an image
+ if isImageFile(selectedFile) {
+ // For image files, set it as an attachment for the next LLM message
+ SetImageAttachment(selectedFile)
+ statusView.SetText("Image attached: " + selectedFile + " (will be sent with next message)")
+ // Close the file picker but don't change the text area
+ pages.RemovePage(filePickerPage)
+ } else {
+ // For non-image files, update the text area with file path
+ textArea.SetText(selectedFile, true)
+ app.SetFocus(textArea)
+ pages.RemovePage(filePickerPage)
+ }
+ } else {
+ // If no file is selected, just close the picker
+ pages.RemovePage(filePickerPage)
}
- pages.RemovePage(filePickerPage)
})
cancelButton := tview.NewButton("Cancel")
@@ -649,6 +673,12 @@ func makeFilePicker() *tview.Flex {
// Add directories and files to the list
for _, file := range files {
name := file.Name()
+
+ // Skip hidden files and directories (those starting with a dot)
+ if strings.HasPrefix(name, ".") {
+ continue
+ }
+
if file.IsDir() {
// Capture the directory name for the closure to avoid loop variable issues
dirName := name
@@ -662,9 +692,19 @@ func makeFilePicker() *tview.Flex {
} else {
// Capture the file name for the closure to avoid loop variable issues
fileName := name
+ fullFilePath := path.Join(dir, fileName)
listView.AddItem(fileName, "(File)", 0, func() {
- selectedFile = path.Join(dir, fileName)
+ selectedFile = fullFilePath
statusView.SetText("Selected: " + selectedFile)
+
+ // Check if the file is an image
+ if isImageFile(fileName) {
+ // For image files, offer to attach to the next LLM message
+ statusView.SetText("Selected image: " + selectedFile + " (Press Load to attach)")
+ } else {
+ // For non-image files, display as before
+ statusView.SetText("Selected: " + selectedFile)
+ }
})
}
}
@@ -769,9 +809,23 @@ func makeFilePicker() *tview.Flex {
filePath := path.Join(currentDisplayDir, itemText)
// Verify it's actually a file (not just lacking a directory suffix)
if info, err := os.Stat(filePath); err == nil && !info.IsDir() {
- textArea.SetText(filePath, true)
- app.SetFocus(textArea)
- pages.RemovePage(filePickerPage)
+ // Check if the file is an image
+ if isImageFile(itemText) {
+ // For image files, set it as an attachment for the next LLM message
+ // Use the version without UI updates to avoid hangs in event handlers
+ logger.Info("setting image", "file", itemText)
+ SetImageAttachmentWithoutUI(filePath)
+ logger.Info("after setting image", "file", itemText)
+ statusView.SetText("Image attached: " + filePath + " (will be sent with next message)")
+ logger.Info("after setting text", "file", itemText)
+ pages.RemovePage(filePickerPage)
+ logger.Info("after update drawn", "file", itemText)
+ } else {
+ // For non-image files, update the text area with file path
+ textArea.SetText(filePath, true)
+ app.SetFocus(textArea)
+ pages.RemovePage(filePickerPage)
+ }
}
return nil
}
diff --git a/tui.go b/tui.go
index c42b8d4..3345968 100644
--- a/tui.go
+++ b/tui.go
@@ -231,10 +231,22 @@ func makeStatusLine() string {
if cfg.WriteNextMsgAsCompletionAgent != "" {
botPersona = cfg.WriteNextMsgAsCompletionAgent
}
+
+ // Add image attachment info to status line
+ var imageInfo string
+ if imageAttachmentPath != "" {
+ // Get just the filename from the path
+ imageName := path.Base(imageAttachmentPath)
+ imageInfo = fmt.Sprintf(" | attached img: [orange:-:b]%s[-:-:-]", imageName)
+ } else {
+ imageInfo = ""
+ }
+
statusLine := fmt.Sprintf(indexLineCompletion, botRespMode, cfg.AssistantRole, activeChatName,
cfg.ToolUse, chatBody.Model, cfg.SkipLLMResp, cfg.CurrentAPI, cfg.ThinkUse, logLevel.Level(),
isRecording, persona, botPersona, injectRole)
- return statusLine
+
+ return statusLine + imageInfo
}
func updateStatusLine() {
@@ -422,7 +434,7 @@ func init() {
})
flex = tview.NewFlex().SetDirection(tview.FlexRow).
AddItem(textView, 0, 40, false).
- AddItem(textArea, 0, 10, true).
+ AddItem(textArea, 0, 10, true). // Restore original height
AddItem(position, 0, 2, false)
editArea = tview.NewTextArea().
SetPlaceholder("Replace msg...")
@@ -801,8 +813,29 @@ func init() {
return nil
}
if event.Key() == tcell.KeyCtrlJ {
- // show image
- loadImage()
+ // show image - check for attached image first, then fall back to agent image
+ if imageAttachmentPath != "" {
+ // Load the attached image
+ file, err := os.Open(imageAttachmentPath)
+ if err != nil {
+ logger.Error("failed to open attached image", "path", imageAttachmentPath, "error", err)
+ // Fall back to showing agent image
+ loadImage()
+ } else {
+ defer file.Close()
+ img, _, err := image.Decode(file)
+ if err != nil {
+ logger.Error("failed to decode attached image", "path", imageAttachmentPath, "error", err)
+ // Fall back to showing agent image
+ loadImage()
+ } else {
+ imgView.SetImage(img)
+ }
+ }
+ } else {
+ // No attached image, show agent image as before
+ loadImage()
+ }
pages.AddPage(imgPage, imgView, true, true)
return nil
}
@@ -977,6 +1010,13 @@ func init() {
colorText()
}
go chatRound(msgText, persona, textView, false, false)
+ // Also clear any image attachment after sending the message
+ go func() {
+ // Wait a short moment for the message to be processed, then clear the image attachment
+ // This allows the image to be sent with the current message if it was attached
+ // But clears it for the next message
+ ClearImageAttachment()
+ }()
return nil
}
if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn {
@@ -990,3 +1030,10 @@ func init() {
return event
})
}
+
+// UpdateImageAttachmentStatus updates the UI to reflect the current image attachment status
+func UpdateImageAttachmentStatus(imagePath string) {
+ // The image attachment status is now shown in the main status line
+ // Just update the status line to reflect the current image attachment
+ updateStatusLine()
+}