From c21074a8129007509d45c460a8df56917f721cf0 Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Thu, 20 Nov 2025 19:13:04 +0300 Subject: Feat: input img --- models/models.go | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 200 insertions(+), 5 deletions(-) (limited to 'models/models.go') diff --git a/models/models.go b/models/models.go index 0a10da1..ea9bf2e 100644 --- a/models/models.go +++ b/models/models.go @@ -1,7 +1,10 @@ package models import ( + "encoding/base64" + "encoding/json" "fmt" + "os" "strings" ) @@ -69,23 +72,215 @@ type TextChunk struct { FuncName string } +type TextContentPart struct { + Type string `json:"type"` + Text string `json:"text"` +} + +type ImageContentPart struct { + Type string `json:"type"` + ImageURL struct { + URL string `json:"url"` + } `json:"image_url"` +} + +// RoleMsg represents a message with content that can be either a simple string or structured content parts type RoleMsg struct { - Role string `json:"role"` - Content string `json:"content"` + Role string `json:"role"` + Content string `json:"-"` + ContentParts []interface{} `json:"-"` + hasContentParts bool // Flag to indicate which content type to marshal +} + +// MarshalJSON implements custom JSON marshaling for RoleMsg +func (m RoleMsg) MarshalJSON() ([]byte, error) { + if m.hasContentParts { + // Use structured content format + aux := struct { + Role string `json:"role"` + Content []interface{} `json:"content"` + }{ + Role: m.Role, + Content: m.ContentParts, + } + return json.Marshal(aux) + } else { + // Use simple content format + aux := struct { + Role string `json:"role"` + Content string `json:"content"` + }{ + Role: m.Role, + Content: m.Content, + } + return json.Marshal(aux) + } +} + +// UnmarshalJSON implements custom JSON unmarshaling for RoleMsg +func (m *RoleMsg) UnmarshalJSON(data []byte) error { + // First, try to unmarshal as structured content format + var structured struct { + Role string `json:"role"` + Content []interface{} `json:"content"` + } + if err := json.Unmarshal(data, &structured); err == nil && len(structured.Content) > 0 { + m.Role = structured.Role + m.ContentParts = structured.Content + m.hasContentParts = true + return nil + } + + // Otherwise, unmarshal as simple content format + var simple struct { + Role string `json:"role"` + Content string `json:"content"` + } + if err := json.Unmarshal(data, &simple); err != nil { + return err + } + m.Role = simple.Role + m.Content = simple.Content + m.hasContentParts = false + return nil } func (m RoleMsg) ToText(i int) string { icon := fmt.Sprintf("(%d)", i) + + // Convert content to string representation + contentStr := "" + if !m.hasContentParts { + contentStr = m.Content + } else { + // For structured content, just take the text parts + for _, part := range m.ContentParts { + if partMap, ok := part.(map[string]interface{}); ok { + if partType, exists := partMap["type"]; exists && partType == "text" { + if textVal, textExists := partMap["text"]; textExists { + if textStr, isStr := textVal.(string); isStr { + contentStr += textStr + " " + } + } + } + } + } + } + // check if already has role annotation (/completion makes them) - if !strings.HasPrefix(m.Content, m.Role+":") { + if !strings.HasPrefix(contentStr, m.Role+":") { icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role) } - textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content) + textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, contentStr) return strings.ReplaceAll(textMsg, "\n\n", "\n") } func (m RoleMsg) ToPrompt() string { - return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n") + contentStr := "" + if !m.hasContentParts { + contentStr = m.Content + } else { + // For structured content, just take the text parts + for _, part := range m.ContentParts { + if partMap, ok := part.(map[string]interface{}); ok { + if partType, exists := partMap["type"]; exists && partType == "text" { + if textVal, textExists := partMap["text"]; textExists { + if textStr, isStr := textVal.(string); isStr { + contentStr += textStr + " " + } + } + } + } + } + } + return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, contentStr), "\n\n", "\n") +} + +// NewRoleMsg creates a simple RoleMsg with string content +func NewRoleMsg(role, content string) RoleMsg { + return RoleMsg{ + Role: role, + Content: content, + hasContentParts: false, + } +} + +// NewMultimodalMsg creates a RoleMsg with structured content parts (text and images) +func NewMultimodalMsg(role string, contentParts []interface{}) RoleMsg { + return RoleMsg{ + Role: role, + ContentParts: contentParts, + hasContentParts: true, + } +} + +// AddTextPart adds a text content part to the message +func (m *RoleMsg) AddTextPart(text string) { + if !m.hasContentParts { + // Convert to content parts format + if m.Content != "" { + m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}} + } else { + m.ContentParts = []interface{}{} + } + m.hasContentParts = true + } + + textPart := TextContentPart{Type: "text", Text: text} + m.ContentParts = append(m.ContentParts, textPart) +} + +// AddImagePart adds an image content part to the message +func (m *RoleMsg) AddImagePart(imageURL string) { + if !m.hasContentParts { + // Convert to content parts format + if m.Content != "" { + m.ContentParts = []interface{}{TextContentPart{Type: "text", Text: m.Content}} + } else { + m.ContentParts = []interface{}{} + } + m.hasContentParts = true + } + + imagePart := ImageContentPart{ + Type: "image_url", + ImageURL: struct { + URL string `json:"url"` + }{URL: imageURL}, + } + m.ContentParts = append(m.ContentParts, imagePart) +} + +// CreateImageURLFromPath creates a data URL from an image file path +func CreateImageURLFromPath(imagePath string) (string, error) { + // Read the image file + data, err := os.ReadFile(imagePath) + if err != nil { + return "", err + } + + // Determine the image format based on file extension + var mimeType string + switch { + case strings.HasSuffix(strings.ToLower(imagePath), ".png"): + mimeType = "image/png" + case strings.HasSuffix(strings.ToLower(imagePath), ".jpg"): + fallthrough + case strings.HasSuffix(strings.ToLower(imagePath), ".jpeg"): + mimeType = "image/jpeg" + case strings.HasSuffix(strings.ToLower(imagePath), ".gif"): + mimeType = "image/gif" + case strings.HasSuffix(strings.ToLower(imagePath), ".webp"): + mimeType = "image/webp" + default: + mimeType = "image/jpeg" // default + } + + // Encode to base64 + encoded := base64.StdEncoding.EncodeToString(data) + + // Create data URL + return fmt.Sprintf("data:%s;base64,%s", mimeType, encoded), nil } type ChatBody struct { -- cgit v1.2.3