summaryrefslogtreecommitdiff
path: root/agent
diff options
context:
space:
mode:
Diffstat (limited to 'agent')
-rw-r--r--agent/agent.go41
-rw-r--r--agent/pw_agent.go119
-rw-r--r--agent/pw_tools.go338
-rw-r--r--agent/request.go227
-rw-r--r--agent/webagent.go33
5 files changed, 758 insertions, 0 deletions
diff --git a/agent/agent.go b/agent/agent.go
new file mode 100644
index 0000000..8a6614f
--- /dev/null
+++ b/agent/agent.go
@@ -0,0 +1,41 @@
+package agent
+
+// I see two types of agents possible:
+// ones who do their own tools calls
+// ones that works only with the output
+
+// A: main chat -> agent (handles everything: tool + processing), supports tool chaining
+// B: main chat -> tool -> agent (process tool output)
+
+// AgenterA gets a task like "go to the webpage, login and take a screenshot (tell me what you see)"
+// proceeds to make a plan and executes it.
+// returns with final result or an error
+type AgenterA interface {
+ ProcessTask(task string) []byte
+}
+
+// AgenterB defines an interface for processing tool outputs
+type AgenterB interface {
+ // Process takes the original tool arguments and the raw output from the tool,
+ // and returns a cleaned/summarized version suitable for the main LLM context
+ Process(args map[string]string, rawOutput []byte) []byte
+}
+
+// registry holds mapping from tool names to agents
+var RegistryB = make(map[string]AgenterB)
+var RegistryA = make(map[AgenterA][]string)
+
+// Register adds an agent for a specific tool name
+// If an agent already exists for the tool, it will be replaced
+func RegisterB(toolName string, a AgenterB) {
+ RegistryB[toolName] = a
+}
+
+func RegisterA(toolNames []string, a AgenterA) {
+ RegistryA[a] = toolNames
+}
+
+// Get returns the agent registered for the given tool name, or nil if none.
+func Get(toolName string) AgenterB {
+ return RegistryB[toolName]
+}
diff --git a/agent/pw_agent.go b/agent/pw_agent.go
new file mode 100644
index 0000000..787d411
--- /dev/null
+++ b/agent/pw_agent.go
@@ -0,0 +1,119 @@
+package agent
+
+import (
+ "encoding/json"
+ "gf-lt/models"
+ "strings"
+)
+
+// PWAgent: is AgenterA type agent (enclosed with tool chaining)
+// sysprompt explain tools and how to plan for execution
+type PWAgent struct {
+ *AgentClient
+ sysprompt string
+}
+
+// NewPWAgent creates a PWAgent with the given client and system prompt
+func NewPWAgent(client *AgentClient, sysprompt string) *PWAgent {
+ return &PWAgent{AgentClient: client, sysprompt: sysprompt}
+}
+
+// SetTools sets the tools available to the agent
+func (a *PWAgent) SetTools(tools []models.Tool) {
+ a.tools = tools
+}
+
+func (a *PWAgent) ProcessTask(task string) []byte {
+ req, err := a.FormFirstMsg(a.sysprompt, task)
+ if err != nil {
+ a.Log().Error("PWAgent failed to process the request", "error", err)
+ return []byte("PWAgent failed to process the request; err: " + err.Error())
+ }
+ toolCallLimit := 10
+ for i := 0; i < toolCallLimit; i++ {
+ resp, err := a.LLMRequest(req)
+ if err != nil {
+ a.Log().Error("failed to process the request", "error", err)
+ return []byte("failed to process the request; err: " + err.Error())
+ }
+ execTool, toolCallID, hasToolCall := findToolCall(resp)
+ if !hasToolCall {
+ return resp
+ }
+
+ a.setToolCallOnLastMessage(resp, toolCallID)
+
+ toolResp := string(execTool())
+ req, err = a.FormMsgWithToolCallID(toolResp, toolCallID)
+ if err != nil {
+ a.Log().Error("failed to form next message", "error", err)
+ return []byte("failed to form next message; err: " + err.Error())
+ }
+ }
+ return nil
+}
+
+func (a *PWAgent) setToolCallOnLastMessage(resp []byte, toolCallID string) {
+ if toolCallID == "" {
+ return
+ }
+ var genericResp map[string]interface{}
+ if err := json.Unmarshal(resp, &genericResp); err != nil {
+ return
+ }
+ var name string
+ var args map[string]string
+ if choices, ok := genericResp["choices"].([]interface{}); ok && len(choices) > 0 {
+ if firstChoice, ok := choices[0].(map[string]interface{}); ok {
+ if message, ok := firstChoice["message"].(map[string]interface{}); ok {
+ if toolCalls, ok := message["tool_calls"].([]interface{}); ok && len(toolCalls) > 0 {
+ if tc, ok := toolCalls[0].(map[string]interface{}); ok {
+ if fn, ok := tc["function"].(map[string]interface{}); ok {
+ name, _ = fn["name"].(string)
+ argsStr, _ := fn["arguments"].(string)
+ _ = json.Unmarshal([]byte(argsStr), &args)
+ }
+ }
+ }
+ }
+ }
+ }
+ if name == "" {
+ content, _ := genericResp["content"].(string)
+ name = extractToolNameFromText(content)
+ }
+ lastIdx := len(a.chatBody.Messages) - 1
+ if lastIdx >= 0 {
+ a.chatBody.Messages[lastIdx].ToolCallID = toolCallID
+ if name != "" {
+ argsJSON, _ := json.Marshal(args)
+ a.chatBody.Messages[lastIdx].ToolCall = &models.ToolCall{
+ ID: toolCallID,
+ Name: name,
+ Args: string(argsJSON),
+ }
+ }
+ }
+}
+
+func extractToolNameFromText(text string) string {
+ jsStr := toolCallRE.FindString(text)
+ if jsStr == "" {
+ return ""
+ }
+ jsStr = strings.TrimSpace(jsStr)
+ jsStr = strings.TrimPrefix(jsStr, "__tool_call__")
+ jsStr = strings.TrimSuffix(jsStr, "__tool_call__")
+ jsStr = strings.TrimSpace(jsStr)
+ start := strings.Index(jsStr, "{")
+ end := strings.LastIndex(jsStr, "}")
+ if start == -1 || end == -1 || end <= start {
+ return ""
+ }
+ jsStr = jsStr[start : end+1]
+ var fc models.FuncCall
+ if err := json.Unmarshal([]byte(jsStr), &fc); err != nil {
+ return ""
+ }
+ return fc.Name
+}
diff --git a/agent/pw_tools.go b/agent/pw_tools.go
new file mode 100644
index 0000000..d72e0f3
--- /dev/null
+++ b/agent/pw_tools.go
@@ -0,0 +1,338 @@
+package agent
+
+import (
+ "encoding/json"
+ "fmt"
+ "regexp"
+ "strconv"
+ "strings"
+
+ "gf-lt/models"
+)
+
+type ToolFunc func(map[string]string) []byte
+
+var pwToolMap = make(map[string]ToolFunc)
+
+func RegisterPWTool(name string, fn ToolFunc) {
+ pwToolMap[name] = fn
+}
+
+func GetPWTools() []models.Tool {
+ return pwTools
+}
+
+var pwTools = []models.Tool{
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_start",
+ Description: "Start a Playwright browser instance. Must be called first before any other browser automation. Uses headless mode by default.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{},
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_stop",
+ Description: "Stop the Playwright browser instance. Call when done with browser automation.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{},
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_is_running",
+ Description: "Check if Playwright browser is currently running.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{},
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_navigate",
+ Description: "Navigate to a URL in the browser.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"url"},
+ Properties: map[string]models.ToolArgProps{
+ "url": {Type: "string", Description: "URL to navigate to"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_click",
+ Description: "Click on an element on the current webpage. Use 'index' for multiple matches (default 0).",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"selector"},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector for the element"},
+ "index": {Type: "integer", Description: "Index for multiple matches (default 0)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_fill",
+ Description: "Type text into an input field. Use 'index' for multiple matches (default 0).",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"selector", "text"},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector for the input element"},
+ "text": {Type: "string", Description: "Text to type into the field"},
+ "index": {Type: "integer", Description: "Index for multiple matches (default 0)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_extract_text",
+ Description: "Extract text content from the page or specific elements. Use selector 'body' for all page text.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector (default 'body' for all page text)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_screenshot",
+ Description: "Take a screenshot of the page or a specific element. Returns a file path to the image.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector for element to screenshot"},
+ "full_page": {Type: "boolean", Description: "Capture full page (default false)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_screenshot_and_view",
+ Description: "Take a screenshot and return the image for viewing. Use to visually verify page state.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector for element to screenshot"},
+ "full_page": {Type: "boolean", Description: "Capture full page (default false)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_wait_for_selector",
+ Description: "Wait for an element to appear on the page before proceeding.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"selector"},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector to wait for"},
+ "timeout": {Type: "integer", Description: "Timeout in milliseconds (default 30000)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_drag",
+ Description: "Drag the mouse from point (x1,y1) to (x2,y2).",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"x1", "y1", "x2", "y2"},
+ Properties: map[string]models.ToolArgProps{
+ "x1": {Type: "number", Description: "Starting X coordinate"},
+ "y1": {Type: "number", Description: "Starting Y coordinate"},
+ "x2": {Type: "number", Description: "Ending X coordinate"},
+ "y2": {Type: "number", Description: "Ending Y coordinate"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_click_at",
+ Description: "Click at specific X,Y coordinates on the page.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"x", "y"},
+ Properties: map[string]models.ToolArgProps{
+ "x": {Type: "number", Description: "X coordinate"},
+ "y": {Type: "number", Description: "Y coordinate"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_get_html",
+ Description: "Get the HTML content of the page or a specific element.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector (default 'body')"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_get_dom",
+ Description: "Get a structured DOM representation with tag, attributes, text, and children.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector (default 'body')"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_search_elements",
+ Description: "Search for elements by text content or CSS selector.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "text": {Type: "string", Description: "Text content to search for"},
+ "selector": {Type: "string", Description: "CSS selector to search for"},
+ },
+ },
+ },
+ },
+}
+
+var toolCallRE = regexp.MustCompile(`__tool_call__(.+?)__tool_call__`)
+
+type ParsedToolCall struct {
+ ID string
+ Name string
+ Args map[string]string
+}
+
+func findToolCall(resp []byte) (func() []byte, string, bool) {
+ var genericResp map[string]interface{}
+ if err := json.Unmarshal(resp, &genericResp); err != nil {
+ return findToolCallFromText(string(resp))
+ }
+ if choices, ok := genericResp["choices"].([]interface{}); ok && len(choices) > 0 {
+ if firstChoice, ok := choices[0].(map[string]interface{}); ok {
+ if message, ok := firstChoice["message"].(map[string]interface{}); ok {
+ if toolCalls, ok := message["tool_calls"].([]interface{}); ok && len(toolCalls) > 0 {
+ return parseOpenAIToolCall(toolCalls)
+ }
+ if content, ok := message["content"].(string); ok {
+ return findToolCallFromText(content)
+ }
+ }
+ if text, ok := firstChoice["text"].(string); ok {
+ return findToolCallFromText(text)
+ }
+ }
+ }
+ if content, ok := genericResp["content"].(string); ok {
+ return findToolCallFromText(content)
+ }
+ return findToolCallFromText(string(resp))
+}
+
+func parseOpenAIToolCall(toolCalls []interface{}) (func() []byte, string, bool) {
+ if len(toolCalls) == 0 {
+ return nil, "", false
+ }
+ tc := toolCalls[0].(map[string]interface{})
+ id, _ := tc["id"].(string)
+ function, _ := tc["function"].(map[string]interface{})
+ name, _ := function["name"].(string)
+ argsStr, _ := function["arguments"].(string)
+ var args map[string]string
+ if err := json.Unmarshal([]byte(argsStr), &args); err != nil {
+ return func() []byte {
+ return []byte(fmt.Sprintf(`{"error": "failed to parse arguments: %v"}`, err))
+ }, id, true
+ }
+ return func() []byte {
+ fn, ok := pwToolMap[name]
+ if !ok {
+ return []byte(fmt.Sprintf(`{"error": "tool %s not found"}`, name))
+ }
+ return fn(args)
+ }, id, true
+}
+
+func findToolCallFromText(text string) (func() []byte, string, bool) {
+ jsStr := toolCallRE.FindString(text)
+ if jsStr == "" {
+ return nil, "", false
+ }
+ jsStr = strings.TrimSpace(jsStr)
+ jsStr = strings.TrimPrefix(jsStr, "__tool_call__")
+ jsStr = strings.TrimSuffix(jsStr, "__tool_call__")
+ jsStr = strings.TrimSpace(jsStr)
+ start := strings.Index(jsStr, "{")
+ end := strings.LastIndex(jsStr, "}")
+ if start == -1 || end == -1 || end <= start {
+ return func() []byte {
+ return []byte(`{"error": "no valid JSON found in tool call"}`)
+ }, "", true
+ }
+ jsStr = jsStr[start : end+1]
+ var fc models.FuncCall
+ if err := json.Unmarshal([]byte(jsStr), &fc); err != nil {
+ return func() []byte {
+ return []byte(fmt.Sprintf(`{"error": "failed to parse tool call: %v}`, err))
+ }, "", true
+ }
+ if fc.ID == "" {
+ fc.ID = "call_" + generateToolCallID()
+ }
+ return func() []byte {
+ fn, ok := pwToolMap[fc.Name]
+ if !ok {
+ return []byte(fmt.Sprintf(`{"error": "tool %s not found"}`, fc.Name))
+ }
+ return fn(fc.Args)
+ }, fc.ID, true
+}
+
+func generateToolCallID() string {
+ return strconv.Itoa(len(pwToolMap) % 10000)
+}
diff --git a/agent/request.go b/agent/request.go
new file mode 100644
index 0000000..095dc8e
--- /dev/null
+++ b/agent/request.go
@@ -0,0 +1,227 @@
+package agent
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "gf-lt/config"
+ "gf-lt/models"
+ "io"
+ "log/slog"
+ "net/http"
+ "strings"
+)
+
+var httpClient = &http.Client{}
+
+var defaultProps = map[string]float32{
+ "temperature": 0.8,
+ "dry_multiplier": 0.0,
+ "min_p": 0.05,
+ "n_predict": -1.0,
+}
+
+func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool) {
+ isCompletion = strings.Contains(api, "/completion") && !strings.Contains(api, "/chat/completions")
+ isChat = strings.Contains(api, "/chat/completions")
+ isDeepSeek = strings.Contains(api, "deepseek.com")
+ isOpenRouter = strings.Contains(api, "openrouter.ai")
+ return
+}
+
+type AgentClient struct {
+ cfg *config.Config
+ getToken func() string
+ log *slog.Logger
+ chatBody *models.ChatBody
+ sysprompt string
+ // lastToolCallID string
+ tools []models.Tool
+}
+
+func NewAgentClient(cfg *config.Config, log *slog.Logger, gt func() string) *AgentClient {
+ return &AgentClient{
+ cfg: cfg,
+ getToken: gt,
+ log: log,
+ }
+}
+
+func (ag *AgentClient) Log() *slog.Logger {
+ return ag.log
+}
+
+func (ag *AgentClient) FormFirstMsg(sysprompt, msg string) (io.Reader, error) {
+ ag.sysprompt = sysprompt
+ ag.chatBody = &models.ChatBody{
+ Messages: []models.RoleMsg{
+ {Role: "system", Content: ag.sysprompt},
+ {Role: "user", Content: msg},
+ },
+ Stream: false,
+ Model: ag.cfg.CurrentModel,
+ }
+ b, err := ag.buildRequest()
+ if err != nil {
+ return nil, err
+ }
+ return bytes.NewReader(b), nil
+}
+
+func (ag *AgentClient) FormMsg(msg string) (io.Reader, error) {
+ m := models.RoleMsg{
+ Role: "tool", Content: msg,
+ }
+ ag.chatBody.Messages = append(ag.chatBody.Messages, m)
+ b, err := ag.buildRequest()
+ if err != nil {
+ return nil, err
+ }
+ return bytes.NewReader(b), nil
+}
+
+func (ag *AgentClient) FormMsgWithToolCallID(msg, toolCallID string) (io.Reader, error) {
+ m := models.RoleMsg{
+ Role: "tool",
+ Content: msg,
+ ToolCallID: toolCallID,
+ }
+ ag.chatBody.Messages = append(ag.chatBody.Messages, m)
+ b, err := ag.buildRequest()
+ if err != nil {
+ return nil, err
+ }
+ return bytes.NewReader(b), nil
+}
+
+// buildRequest creates the appropriate LLM request based on the current API endpoint.
+func (ag *AgentClient) buildRequest() ([]byte, error) {
+ isCompletion, isChat, isDeepSeek, isOpenRouter := detectAPI(ag.cfg.CurrentAPI)
+ ag.log.Debug("agent building request", "api", ag.cfg.CurrentAPI, "isCompletion", isCompletion, "isChat", isChat, "isDeepSeek", isDeepSeek, "isOpenRouter", isOpenRouter)
+ // Build prompt for completion endpoints
+ if isCompletion {
+ var sb strings.Builder
+ for i := range ag.chatBody.Messages {
+ sb.WriteString(ag.chatBody.Messages[i].ToPrompt())
+ sb.WriteString("\n")
+ }
+ prompt := strings.TrimSpace(sb.String())
+ switch {
+ case isDeepSeek:
+ // DeepSeek completion
+ req := models.NewDSCompletionReq(prompt, ag.chatBody.Model, defaultProps["temperature"], []string{})
+ req.Stream = false // Agents don't need streaming
+ return json.Marshal(req)
+ case isOpenRouter:
+ // OpenRouter completion
+ req := models.NewOpenRouterCompletionReq(ag.chatBody.Model, prompt, defaultProps, []string{})
+ req.Stream = false // Agents don't need streaming
+ return json.Marshal(req)
+ default:
+ // Assume llama.cpp completion
+ req := models.NewLCPReq(prompt, ag.chatBody.Model, nil, defaultProps, []string{})
+ req.Stream = false // Agents don't need streaming
+ return json.Marshal(req)
+ }
+ }
+ switch {
+ case isDeepSeek:
+ // DeepSeek chat
+ req := models.NewDSChatReq(*ag.chatBody)
+ return json.Marshal(req)
+ case isOpenRouter:
+ // OpenRouter chat - agents don't use reasoning by default
+ req := models.NewOpenRouterChatReq(*ag.chatBody, defaultProps, ag.cfg.ReasoningEffort)
+ return json.Marshal(req)
+ default:
+ // Assume llama.cpp chat (OpenAI format)
+ req := models.OpenAIReq{
+ ChatBody: ag.chatBody,
+ Tools: ag.tools,
+ }
+ return json.Marshal(req)
+ }
+}
+
+func (ag *AgentClient) LLMRequest(body io.Reader) ([]byte, error) {
+ // Read the body for debugging (but we need to recreate it for the request)
+ bodyBytes, err := io.ReadAll(body)
+ if err != nil {
+ ag.log.Error("failed to read request body", "error", err)
+ return nil, err
+ }
+ req, err := http.NewRequest("POST", ag.cfg.CurrentAPI, bytes.NewReader(bodyBytes))
+ if err != nil {
+ ag.log.Error("failed to create request", "error", err)
+ return nil, err
+ }
+ req.Header.Add("Accept", "application/json")
+ req.Header.Add("Content-Type", "application/json")
+ req.Header.Add("Authorization", "Bearer "+ag.getToken())
+ req.Header.Set("Accept-Encoding", "gzip")
+ ag.log.Debug("agent LLM request", "url", ag.cfg.CurrentAPI, "body_preview", string(bodyBytes[:min(len(bodyBytes), 500)]))
+ resp, err := httpClient.Do(req)
+ if err != nil {
+ ag.log.Error("llamacpp api request failed", "error", err, "url", ag.cfg.CurrentAPI)
+ return nil, err
+ }
+ defer resp.Body.Close()
+ responseBytes, err := io.ReadAll(resp.Body)
+ if err != nil {
+ ag.log.Error("failed to read response", "error", err)
+ return nil, err
+ }
+ if resp.StatusCode >= 400 {
+ ag.log.Error("agent LLM request failed", "status", resp.StatusCode, "response", string(responseBytes[:min(len(responseBytes), 1000)]))
+ return responseBytes, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(responseBytes[:min(len(responseBytes), 200)]))
+ }
+ // Parse response and extract text content
+ text, err := extractTextFromResponse(responseBytes)
+ if err != nil {
+ ag.log.Error("failed to extract text from response", "error", err, "response_preview", string(responseBytes[:min(len(responseBytes), 500)]))
+ // Return raw response as fallback
+ return responseBytes, nil
+ }
+ return []byte(text), nil
+}
+
+// extractTextFromResponse parses common LLM response formats and extracts the text content.
+func extractTextFromResponse(data []byte) (string, error) {
+ // Try to parse as generic JSON first
+ var genericResp map[string]any
+ if err := json.Unmarshal(data, &genericResp); err != nil {
+ // Not JSON, return as string
+ return string(data), nil
+ }
+ // Check for OpenAI chat completion format
+ if choices, ok := genericResp["choices"].([]any); ok && len(choices) > 0 {
+ if firstChoice, ok := choices[0].(map[string]any); ok {
+ // Chat completion: choices[0].message.content
+ if message, ok := firstChoice["message"].(map[string]any); ok {
+ if content, ok := message["content"].(string); ok {
+ return content, nil
+ }
+ }
+ // Completion: choices[0].text
+ if text, ok := firstChoice["text"].(string); ok {
+ return text, nil
+ }
+ // Delta format for streaming (should not happen with stream: false)
+ if delta, ok := firstChoice["delta"].(map[string]any); ok {
+ if content, ok := delta["content"].(string); ok {
+ return content, nil
+ }
+ }
+ }
+ }
+ // Check for llama.cpp completion format
+ if content, ok := genericResp["content"].(string); ok {
+ return content, nil
+ }
+ // Unknown format, return pretty-printed JSON
+ prettyJSON, err := json.MarshalIndent(genericResp, "", " ")
+ if err != nil {
+ return string(data), nil
+ }
+ return string(prettyJSON), nil
+}
diff --git a/agent/webagent.go b/agent/webagent.go
new file mode 100644
index 0000000..e8ca3a2
--- /dev/null
+++ b/agent/webagent.go
@@ -0,0 +1,33 @@
+package agent
+
+import (
+ "fmt"
+)
+
+// WebAgentB is a simple agent that applies formatting functions
+type WebAgentB struct {
+ *AgentClient
+ sysprompt string
+}
+
+// NewWebAgentB creates a WebAgentB that uses the given formatting function
+func NewWebAgentB(client *AgentClient, sysprompt string) *WebAgentB {
+ return &WebAgentB{AgentClient: client, sysprompt: sysprompt}
+}
+
+// Process applies the formatting function to raw output
+func (a *WebAgentB) Process(args map[string]string, rawOutput []byte) []byte {
+ msg, err := a.FormFirstMsg(
+ a.sysprompt,
+ fmt.Sprintf("request:\n%+v\ntool response:\n%v", args, string(rawOutput)))
+ if err != nil {
+ a.Log().Error("failed to process the request", "error", err)
+ return []byte("failed to process the request; err: " + err.Error())
+ }
+ resp, err := a.LLMRequest(msg)
+ if err != nil {
+ a.Log().Error("failed to process the request", "error", err)
+ return []byte("failed to process the request; err: " + err.Error())
+ }
+ return resp
+}