diff options
| author | Grail Finder <wohilas@gmail.com> | 2025-11-20 19:13:04 +0300 |
|---|---|---|
| committer | Grail Finder <wohilas@gmail.com> | 2025-11-20 19:13:04 +0300 |
| commit | c21074a8129007509d45c460a8df56917f721cf0 (patch) | |
| tree | cc4342132371e7ad5d28e754ee5e3363aa7eb1ce | |
| parent | b4f9c5e26a21b6cd938c7a2eb3e8593ab9cfaafe (diff) | |
Feat: input img
| -rw-r--r-- | llm.go | 54 | ||||
| -rw-r--r-- | models/models.go | 205 | ||||
| -rw-r--r-- | tables.go | 70 | ||||
| -rw-r--r-- | tui.go | 55 |
4 files changed, 366 insertions, 18 deletions
@@ -8,6 +8,32 @@ import ( "strings" ) +var imageAttachmentPath string // Global variable to track image attachment for next message + +// SetImageAttachment sets an image to be attached to the next message sent to the LLM and updates UI +func SetImageAttachment(imagePath string) { + imageAttachmentPath = imagePath + // Update the UI to show image is attached (call function from tui.go) + // UpdateImageAttachmentStatus(imagePath) +} + +// SetImageAttachmentWithoutUI sets an image to be attached without UI updates (for internal use where UI updates might cause hangs) +func SetImageAttachmentWithoutUI(imagePath string) { + imageAttachmentPath = imagePath +} + +// ClearImageAttachment clears any pending image attachment and updates UI +func ClearImageAttachment() { + imageAttachmentPath = "" + // Update the UI to clear image attachment status (call function from tui.go) + // UpdateImageAttachmentStatus("") +} + +// ClearImageAttachmentWithoutUI clears any pending image attachment without UI updates +func ClearImageAttachmentWithoutUI() { + imageAttachmentPath = "" +} + type ChunkParser interface { ParseChunk([]byte) (*models.TextChunk, error) FormMsg(msg, role string, cont bool) (io.Reader, error) @@ -165,7 +191,33 @@ func (op OpenAIer) ParseChunk(data []byte) (*models.TextChunk, error) { func (op OpenAIer) FormMsg(msg, role string, resume bool) (io.Reader, error) { logger.Debug("formmsg openaier", "link", cfg.CurrentAPI) if msg != "" { // otherwise let the bot continue - newMsg := models.RoleMsg{Role: role, Content: msg} + // Create the message with support for multimodal content + var newMsg models.RoleMsg + + // Check if we have an image to add to this message + if imageAttachmentPath != "" { + // Create a multimodal message with both text and image + newMsg = models.NewMultimodalMsg(role, []interface{}{}) + + // Add the text content + newMsg.AddTextPart(msg) + + // Add the image content + imageURL, err := models.CreateImageURLFromPath(imageAttachmentPath) + if err != nil { + logger.Error("failed to create image URL from path", "error", err, "path", imageAttachmentPath) + // If image processing fails, fall back to simple text message + newMsg = models.NewRoleMsg(role, msg) + imageAttachmentPath = "" // Clear the attachment + } else { + newMsg.AddImagePart(imageURL) + imageAttachmentPath = "" // Clear the attachment after use + } + } else { + // Create a simple text message + newMsg = models.NewRoleMsg(role, msg) + } + chatBody.Messages = append(chatBody.Messages, newMsg) } req := models.OpenAIReq{ diff --git a/models/models.go b/models/models.go index 0a10da1..ea9bf2e 100644 --- a/models/models.go +++ b/models/models.go @@ -1,7 +1,10 @@ package models import ( + "encoding/base64" + "encoding/json" "fmt" + "os" "strings" ) @@ -69,23 +72,215 @@ type TextChunk struct { FuncName string } +type TextContentPart struct { + Type string `json:"type"` + Text string `json:"text"` +} + +type ImageContentPart struct { + Type string `json:"type"` + ImageURL struct { + URL string `json:"url"` + } `json:"image_url"` +} + +// RoleMsg represents a message with content that can be either a simple string or structured content parts type RoleMsg struct { - Role string `json:"role"` - Content string `json:"content"` + Role string `json:"role"` + Content string `json:"-"` + ContentParts []interface{} `json:"-"` + hasContentParts bool // Flag to indicate which content type to marshal +} + +// MarshalJSON implements custom JSON marshaling for RoleMsg +func (m RoleMsg) MarshalJSON() ([]byte, error) { + if m.hasContentParts { + // Use structured content format + aux := struct { + Role string `json:"role"` + Content []interface{} `json:"content"` + }{ + Role: m.Role, + Content: m.ContentParts, + } + return json.Marshal(aux) + } else { + // Use simple content format + aux := struct { + Role string `json:"role"` + Content string `json:"content"` + }{ + Role: m.Role, + Content: m.Content, + } + return json.Marshal(aux) + } +} + +// UnmarshalJSON implements custom JSON unmarshaling for RoleMsg +func (m *RoleMsg) UnmarshalJSON(data []byte) error { + // First, try to unmarshal as structured content format + var structured struct { + Role string `json:"role"` + Content []interface{} `json:"content"` + } + if err := json.Unmarshal(data, &structured); err == nil && len(structured.Content) > 0 { + m.Role = structured.Role + m.ContentParts = structured.Content + m.hasContentParts = true + return nil + } + + // Otherwise, unmarshal as simple content format + var simple struct { + Role string `json:"role"` + Content string `json:"content"` + } + if err := json.Unmarshal(data, &simple); err != nil { + return err + } + m.Role = simple.Role + m.Content = simple.Content + m.hasContentParts = false + return nil } func (m RoleMsg) ToText(i int) string { icon := fmt.Sprintf("(%d)", i) + + // Convert content to string representation + contentStr := "" + if !m.hasContentParts { + contentStr = m.Content + } else { + // For structured content, just take the text parts + for _, part := range m.ContentParts { + if partMap, ok := part.(map[string]interface{}); ok { + if partType, exists := partMap["type"]; exists && partType == "text" { + if textVal, textExists := partMap["text"]; textExists { + if textStr, isStr := textVal.(string); isStr { + contentStr += textStr + " " + } + } + } + } + } + } + // check if already has role annotation (/completion makes them) - if !strings.HasPrefix(m.Content, m.Role+":") { + if !strings.HasPrefix(contentStr, m.Role+":") { icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role) } - textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content) + textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, contentStr) return strings.ReplaceAll(textMsg, "\n\n", "\n") } func (m RoleMsg) ToPrompt() string { - return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n") + contentStr := "" + if !m.hasContentParts { + contentStr = m.Content + } else { + // For structured content, just take the text parts + for _, part := range m.ContentParts { + if partMap, ok := part.(map[string]interface{}); ok { + if partType, exists := partMap["type"]; exists && partType == "text" { + if textVal, textExists := partMap["text"]; textExists { + if textStr, isStr := textVal.(string); isStr { + contentStr += textStr + " " + } + } + } + } + } + } + return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, contentStr), "\n\n", "\n") +} + +// NewRoleMsg creates a simple RoleMsg with string content +func NewRoleMsg(role, content string) RoleMsg { + return RoleMsg{ + Role: role, + Content: content, + hasContentParts: false, + } +} + +// NewMultimodalMsg creates a RoleMsg with structured content parts (text and images) +func NewMultimodalMsg(role string, contentParts []interface{}) RoleMsg { + return RoleMsg{ + Role: role, + ContentParts: contentParts, + hasContentParts: true, + } +} + +// AddTextPart adds a text content part to the message +func (m *RoleMsg) AddTextPart(text string) { + if !m.hasContentParts { + // Convert to content parts format + if m.Content != "" { + m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}} + } else { + m.ContentParts = []interface{}{} + } + m.hasContentParts = true + } + + textPart := TextContentPart{Type: "text", Text: text} + m.ContentParts = append(m.ContentParts, textPart) +} + +// AddImagePart adds an image content part to the message +func (m *RoleMsg) AddImagePart(imageURL string) { + if !m.hasContentParts { + // Convert to content parts format + if m.Content != "" { + m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}} + } else { + m.ContentParts = []interface{}{} + } + m.hasContentParts = true + } + + imagePart := ImageContentPart{ + Type: "image_url", + ImageURL: struct { + URL string `json:"url"` + }{URL: imageURL}, + } + m.ContentParts = append(m.ContentParts, imagePart) +} + +// CreateImageURLFromPath creates a data URL from an image file path +func CreateImageURLFromPath(imagePath string) (string, error) { + // Read the image file + data, err := os.ReadFile(imagePath) + if err != nil { + return "", err + } + + // Determine the image format based on file extension + var mimeType string + switch { + case strings.HasSuffix(strings.ToLower(imagePath), ".png"): + mimeType = "image/png" + case strings.HasSuffix(strings.ToLower(imagePath), ".jpg"): + fallthrough + case strings.HasSuffix(strings.ToLower(imagePath), ".jpeg"): + mimeType = "image/jpeg" + case strings.HasSuffix(strings.ToLower(imagePath), ".gif"): + mimeType = "image/gif" + case strings.HasSuffix(strings.ToLower(imagePath), ".webp"): + mimeType = "image/webp" + default: + mimeType = "image/jpeg" // default + } + + // Encode to base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // Create data URL + return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil } type ChatBody struct { @@ -563,6 +563,18 @@ func makeFilePicker() *tview.Flex { // Track currently displayed directory (changes as user navigates) var currentDisplayDir string = startDir + // Helper function to check if a file is an image + isImageFile := func(filename string) bool { + imageExtensions := []string{".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".svg"} + lowerFilename := strings.ToLower(filename) + for _, ext := range imageExtensions { + if strings.HasSuffix(lowerFilename, ext) { + return true + } + } + return false + } + // Create UI elements listView := tview.NewList() listView.SetBorder(true).SetTitle("Files & Directories").SetTitleAlign(tview.AlignLeft) @@ -584,11 +596,23 @@ func makeFilePicker() *tview.Flex { loadButton := tview.NewButton("Load") loadButton.SetSelectedFunc(func() { if selectedFile != "" { - // Update the global text area with the selected file path - textArea.SetText(selectedFile, true) - app.SetFocus(textArea) + // Check if the selected file is an image + if isImageFile(selectedFile) { + // For image files, set it as an attachment for the next LLM message + SetImageAttachment(selectedFile) + statusView.SetText("Image attached: " + selectedFile + " (will be sent with next message)") + // Close the file picker but don't change the text area + pages.RemovePage(filePickerPage) + } else { + // For non-image files, update the text area with file path + textArea.SetText(selectedFile, true) + app.SetFocus(textArea) + pages.RemovePage(filePickerPage) + } + } else { + // If no file is selected, just close the picker + pages.RemovePage(filePickerPage) } - pages.RemovePage(filePickerPage) }) cancelButton := tview.NewButton("Cancel") @@ -649,6 +673,12 @@ func makeFilePicker() *tview.Flex { // Add directories and files to the list for _, file := range files { name := file.Name() + + // Skip hidden files and directories (those starting with a dot) + if strings.HasPrefix(name, ".") { + continue + } + if file.IsDir() { // Capture the directory name for the closure to avoid loop variable issues dirName := name @@ -662,9 +692,19 @@ func makeFilePicker() *tview.Flex { } else { // Capture the file name for the closure to avoid loop variable issues fileName := name + fullFilePath := path.Join(dir, fileName) listView.AddItem(fileName, "(File)", 0, func() { - selectedFile = path.Join(dir, fileName) + selectedFile = fullFilePath statusView.SetText("Selected: " + selectedFile) + + // Check if the file is an image + if isImageFile(fileName) { + // For image files, offer to attach to the next LLM message + statusView.SetText("Selected image: " + selectedFile + " (Press Load to attach)") + } else { + // For non-image files, display as before + statusView.SetText("Selected: " + selectedFile) + } }) } } @@ -769,9 +809,23 @@ func makeFilePicker() *tview.Flex { filePath := path.Join(currentDisplayDir, itemText) // Verify it's actually a file (not just lacking a directory suffix) if info, err := os.Stat(filePath); err == nil && !info.IsDir() { - textArea.SetText(filePath, true) - app.SetFocus(textArea) - pages.RemovePage(filePickerPage) + // Check if the file is an image + if isImageFile(itemText) { + // For image files, set it as an attachment for the next LLM message + // Use the version without UI updates to avoid hangs in event handlers + logger.Info("setting image", "file", itemText) + SetImageAttachmentWithoutUI(filePath) + logger.Info("after setting image", "file", itemText) + statusView.SetText("Image attached: " + filePath + " (will be sent with next message)") + logger.Info("after setting text", "file", itemText) + pages.RemovePage(filePickerPage) + logger.Info("after update drawn", "file", itemText) + } else { + // For non-image files, update the text area with file path + textArea.SetText(filePath, true) + app.SetFocus(textArea) + pages.RemovePage(filePickerPage) + } } return nil } @@ -231,10 +231,22 @@ func makeStatusLine() string { if cfg.WriteNextMsgAsCompletionAgent != "" { botPersona = cfg.WriteNextMsgAsCompletionAgent } + + // Add image attachment info to status line + var imageInfo string + if imageAttachmentPath != "" { + // Get just the filename from the path + imageName := path.Base(imageAttachmentPath) + imageInfo = fmt.Sprintf(" | attached img: [orange:-:b]%s[-:-:-]", imageName) + } else { + imageInfo = "" + } + statusLine := fmt.Sprintf(indexLineCompletion, botRespMode, cfg.AssistantRole, activeChatName, cfg.ToolUse, chatBody.Model, cfg.SkipLLMResp, cfg.CurrentAPI, cfg.ThinkUse, logLevel.Level(), isRecording, persona, botPersona, injectRole) - return statusLine + + return statusLine + imageInfo } func updateStatusLine() { @@ -422,7 +434,7 @@ func init() { }) flex = tview.NewFlex().SetDirection(tview.FlexRow). AddItem(textView, 0, 40, false). - AddItem(textArea, 0, 10, true). + AddItem(textArea, 0, 10, true). // Restore original height AddItem(position, 0, 2, false) editArea = tview.NewTextArea(). SetPlaceholder("Replace msg...") @@ -801,8 +813,29 @@ func init() { return nil } if event.Key() == tcell.KeyCtrlJ { - // show image - loadImage() + // show image - check for attached image first, then fall back to agent image + if imageAttachmentPath != "" { + // Load the attached image + file, err := os.Open(imageAttachmentPath) + if err != nil { + logger.Error("failed to open attached image", "path", imageAttachmentPath, "error", err) + // Fall back to showing agent image + loadImage() + } else { + defer file.Close() + img, _, err := image.Decode(file) + if err != nil { + logger.Error("failed to decode attached image", "path", imageAttachmentPath, "error", err) + // Fall back to showing agent image + loadImage() + } else { + imgView.SetImage(img) + } + } + } else { + // No attached image, show agent image as before + loadImage() + } pages.AddPage(imgPage, imgView, true, true) return nil } @@ -977,6 +1010,13 @@ func init() { colorText() } go chatRound(msgText, persona, textView, false, false) + // Also clear any image attachment after sending the message + go func() { + // Wait a short moment for the message to be processed, then clear the image attachment + // This allows the image to be sent with the current message if it was attached + // But clears it for the next message + ClearImageAttachment() + }() return nil } if event.Key() == tcell.KeyPgUp || event.Key() == tcell.KeyPgDn { @@ -990,3 +1030,10 @@ func init() { return event }) } + +// UpdateImageAttachmentStatus updates the UI to reflect the current image attachment status +func UpdateImageAttachmentStatus(imagePath string) { + // The image attachment status is now shown in the main status line + // Just update the status line to reflect the current image attachment + updateStatusLine() +} |
