diff options
| author | Grail Finder <wohilas@gmail.com> | 2025-12-09 15:03:21 +0300 |
|---|---|---|
| committer | Grail Finder <wohilas@gmail.com> | 2025-12-09 15:03:21 +0300 |
| commit | e1bac8d0646ab9e6eab976dac0199807cb79a09e (patch) | |
| tree | 48605fef19e4c5840620d372577f617adedeae2b /models | |
| parent | 378dceb3f4dbb19554e568489d9773c00af4e114 (diff) | |
Feat: image prompt for llama.cpp /completion
Diffstat (limited to 'models')
| -rw-r--r-- | models/models.go | 47 |
1 files changed, 34 insertions, 13 deletions
diff --git a/models/models.go b/models/models.go index b4e7113..baadc8d 100644 --- a/models/models.go +++ b/models/models.go @@ -440,13 +440,14 @@ type LLMModels struct { type LlamaCPPReq struct { Stream bool `json:"stream"` - // Messages []RoleMsg `json:"messages"` - Prompt string `json:"prompt"` - Temperature float32 `json:"temperature"` - DryMultiplier float32 `json:"dry_multiplier"` - Stop []string `json:"stop"` - MinP float32 `json:"min_p"` - NPredict int32 `json:"n_predict"` + // For multimodal requests, prompt should be an object with prompt_string and multimodal_data + // For regular requests, prompt is a string + Prompt interface{} `json:"prompt"` // Can be string or object with prompt_string and multimodal_data + Temperature float32 `json:"temperature"` + DryMultiplier float32 `json:"dry_multiplier"` + Stop []string `json:"stop"` + MinP float32 `json:"min_p"` + NPredict int32 `json:"n_predict"` // MaxTokens int `json:"max_tokens"` // DryBase float64 `json:"dry_base"` // DryAllowedLength int `json:"dry_allowed_length"` @@ -466,17 +467,37 @@ type LlamaCPPReq struct { // Samplers string `json:"samplers"` } -func NewLCPReq(prompt string, props map[string]float32, stopStrings []string) LlamaCPPReq { +type PromptObject struct { + PromptString string `json:"prompt_string"` + MultimodalData []string `json:"multimodal_data,omitempty"` + // Alternative field name used by some llama.cpp implementations + ImageData []string `json:"image_data,omitempty"` // For compatibility +} + +func NewLCPReq(prompt string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq { + var finalPrompt interface{} + + if len(multimodalData) > 0 { + // When multimodal data is present, use the object format as per Python example: + // { "prompt": { "prompt_string": "...", "multimodal_data": [...] } } + finalPrompt = PromptObject{ + PromptString: prompt, + MultimodalData: multimodalData, + ImageData: multimodalData, // Also populate for compatibility with different llama.cpp versions + } + } else { + // When no multimodal data, use plain string + finalPrompt = prompt + } + return LlamaCPPReq{ - Stream: true, - Prompt: prompt, - // Temperature: 0.8, - // DryMultiplier: 0.5, + Stream: true, + Prompt: finalPrompt, Temperature: props["temperature"], DryMultiplier: props["dry_multiplier"], + Stop: stopStrings, MinP: props["min_p"], NPredict: int32(props["n_predict"]), - Stop: stopStrings, } } |
