summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--agent/agent.go12
-rw-r--r--agent/pw_agent.go126
-rw-r--r--agent/pw_tools.go349
-rw-r--r--agent/request.go145
-rw-r--r--agent/webagent.go3
-rw-r--r--config.example.toml2
-rw-r--r--storage/memory.go11
-rw-r--r--tools.go1506
-rw-r--r--tools/chain.go283
-rw-r--r--tools/fs.go753
-rw-r--r--tools_playwright.go77
11 files changed, 2286 insertions, 981 deletions
diff --git a/agent/agent.go b/agent/agent.go
index 8824ecb..8a6614f 100644
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -4,11 +4,12 @@ package agent
// ones who do their own tools calls
// ones that works only with the output
-// A: main chat -> agent (handles everything: tool + processing)
+// A: main chat -> agent (handles everything: tool + processing), supports tool chaining
// B: main chat -> tool -> agent (process tool output)
-// AgenterA gets a task "find out weather in london"
-// proceeds to make tool calls on its own
+// AgenterA gets a task like "go to the webpage, login and take a screenshot (tell me what you see)"
+// proceeds to make a plan and executes it.
+// returns with final result or an error
type AgenterA interface {
ProcessTask(task string) []byte
}
@@ -38,8 +39,3 @@ func RegisterA(toolNames []string, a AgenterA) {
func Get(toolName string) AgenterB {
return RegistryB[toolName]
}
-
-// Register is a convenience wrapper for RegisterB.
-func Register(toolName string, a AgenterB) {
- RegisterB(toolName, a)
-}
diff --git a/agent/pw_agent.go b/agent/pw_agent.go
new file mode 100644
index 0000000..2807331
--- /dev/null
+++ b/agent/pw_agent.go
@@ -0,0 +1,126 @@
+package agent
+
+import (
+ "encoding/json"
+ "gf-lt/models"
+ "strings"
+)
+
+// PWAgent: is AgenterA type agent (enclosed with tool chaining)
+// sysprompt explain tools and how to plan for execution
+type PWAgent struct {
+ *AgentClient
+ sysprompt string
+}
+
+// NewPWAgent creates a PWAgent with the given client and system prompt
+func NewPWAgent(client *AgentClient, sysprompt string) *PWAgent {
+ return &PWAgent{AgentClient: client, sysprompt: sysprompt}
+}
+
+// SetTools sets the tools available to the agent
+func (a *PWAgent) SetTools(tools []models.Tool) {
+ a.tools = tools
+}
+
+func (a *PWAgent) ProcessTask(task string) []byte {
+ req, err := a.FormFirstMsg(a.sysprompt, task)
+ if err != nil {
+ a.Log().Error("PWAgent failed to process the request", "error", err)
+ return []byte("PWAgent failed to process the request; err: " + err.Error())
+ }
+ toolCallLimit := 10
+ for i := 0; i < toolCallLimit; i++ {
+ resp, err := a.LLMRequest(req)
+ if err != nil {
+ a.Log().Error("failed to process the request", "error", err)
+ return []byte("failed to process the request; err: " + err.Error())
+ }
+ execTool, toolCallID, hasToolCall := findToolCall(resp)
+ if !hasToolCall {
+ return resp
+ }
+
+ a.setToolCallOnLastMessage(resp, toolCallID)
+
+ toolResp := string(execTool())
+ req, err = a.FormMsgWithToolCallID(toolResp, toolCallID)
+ if err != nil {
+ a.Log().Error("failed to form next message", "error", err)
+ return []byte("failed to form next message; err: " + err.Error())
+ }
+ }
+ return nil
+}
+
+func (a *PWAgent) setToolCallOnLastMessage(resp []byte, toolCallID string) {
+ if toolCallID == "" {
+ return
+ }
+
+ var genericResp map[string]interface{}
+ if err := json.Unmarshal(resp, &genericResp); err != nil {
+ return
+ }
+
+ var name string
+ var args map[string]string
+
+ if choices, ok := genericResp["choices"].([]interface{}); ok && len(choices) > 0 {
+ if firstChoice, ok := choices[0].(map[string]interface{}); ok {
+ if message, ok := firstChoice["message"].(map[string]interface{}); ok {
+ if toolCalls, ok := message["tool_calls"].([]interface{}); ok && len(toolCalls) > 0 {
+ if tc, ok := toolCalls[0].(map[string]interface{}); ok {
+ if fn, ok := tc["function"].(map[string]interface{}); ok {
+ name, _ = fn["name"].(string)
+ argsStr, _ := fn["arguments"].(string)
+ json.Unmarshal([]byte(argsStr), &args)
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if name == "" {
+ content, _ := genericResp["content"].(string)
+ name = extractToolNameFromText(content)
+ }
+
+ lastIdx := len(a.chatBody.Messages) - 1
+ if lastIdx >= 0 {
+ a.chatBody.Messages[lastIdx].ToolCallID = toolCallID
+ if name != "" {
+ argsJSON, _ := json.Marshal(args)
+ a.chatBody.Messages[lastIdx].ToolCall = &models.ToolCall{
+ ID: toolCallID,
+ Name: name,
+ Args: string(argsJSON),
+ }
+ }
+ }
+}
+
+func extractToolNameFromText(text string) string {
+ jsStr := toolCallRE.FindString(text)
+ if jsStr == "" {
+ return ""
+ }
+ jsStr = strings.TrimSpace(jsStr)
+ jsStr = strings.TrimPrefix(jsStr, "__tool_call__")
+ jsStr = strings.TrimSuffix(jsStr, "__tool_call__")
+ jsStr = strings.TrimSpace(jsStr)
+
+ start := strings.Index(jsStr, "{")
+ end := strings.LastIndex(jsStr, "}")
+ if start == -1 || end == -1 || end <= start {
+ return ""
+ }
+ jsStr = jsStr[start : end+1]
+
+ var fc models.FuncCall
+ if err := json.Unmarshal([]byte(jsStr), &fc); err != nil {
+ return ""
+ }
+ return fc.Name
+}
diff --git a/agent/pw_tools.go b/agent/pw_tools.go
new file mode 100644
index 0000000..19fd130
--- /dev/null
+++ b/agent/pw_tools.go
@@ -0,0 +1,349 @@
+package agent
+
+import (
+ "encoding/json"
+ "fmt"
+ "regexp"
+ "strings"
+
+ "gf-lt/models"
+)
+
+type ToolFunc func(map[string]string) []byte
+
+var pwToolMap = make(map[string]ToolFunc)
+
+func RegisterPWTool(name string, fn ToolFunc) {
+ pwToolMap[name] = fn
+}
+
+func GetPWTools() []models.Tool {
+ return pwTools
+}
+
+var pwTools = []models.Tool{
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_start",
+ Description: "Start a Playwright browser instance. Must be called first before any other browser automation. Uses headless mode by default.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{},
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_stop",
+ Description: "Stop the Playwright browser instance. Call when done with browser automation.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{},
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_is_running",
+ Description: "Check if Playwright browser is currently running.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{},
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_navigate",
+ Description: "Navigate to a URL in the browser.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"url"},
+ Properties: map[string]models.ToolArgProps{
+ "url": {Type: "string", Description: "URL to navigate to"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_click",
+ Description: "Click on an element on the current webpage. Use 'index' for multiple matches (default 0).",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"selector"},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector for the element"},
+ "index": {Type: "integer", Description: "Index for multiple matches (default 0)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_fill",
+ Description: "Type text into an input field. Use 'index' for multiple matches (default 0).",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"selector", "text"},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector for the input element"},
+ "text": {Type: "string", Description: "Text to type into the field"},
+ "index": {Type: "integer", Description: "Index for multiple matches (default 0)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_extract_text",
+ Description: "Extract text content from the page or specific elements. Use selector 'body' for all page text.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector (default 'body' for all page text)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_screenshot",
+ Description: "Take a screenshot of the page or a specific element. Returns a file path to the image.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector for element to screenshot"},
+ "full_page": {Type: "boolean", Description: "Capture full page (default false)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_screenshot_and_view",
+ Description: "Take a screenshot and return the image for viewing. Use to visually verify page state.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector for element to screenshot"},
+ "full_page": {Type: "boolean", Description: "Capture full page (default false)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_wait_for_selector",
+ Description: "Wait for an element to appear on the page before proceeding.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"selector"},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector to wait for"},
+ "timeout": {Type: "integer", Description: "Timeout in milliseconds (default 30000)"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_drag",
+ Description: "Drag the mouse from point (x1,y1) to (x2,y2).",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"x1", "y1", "x2", "y2"},
+ Properties: map[string]models.ToolArgProps{
+ "x1": {Type: "number", Description: "Starting X coordinate"},
+ "y1": {Type: "number", Description: "Starting Y coordinate"},
+ "x2": {Type: "number", Description: "Ending X coordinate"},
+ "y2": {Type: "number", Description: "Ending Y coordinate"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_click_at",
+ Description: "Click at specific X,Y coordinates on the page.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"x", "y"},
+ Properties: map[string]models.ToolArgProps{
+ "x": {Type: "number", Description: "X coordinate"},
+ "y": {Type: "number", Description: "Y coordinate"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_get_html",
+ Description: "Get the HTML content of the page or a specific element.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector (default 'body')"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_get_dom",
+ Description: "Get a structured DOM representation with tag, attributes, text, and children.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": {Type: "string", Description: "CSS selector (default 'body')"},
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_search_elements",
+ Description: "Search for elements by text content or CSS selector.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "text": {Type: "string", Description: "Text content to search for"},
+ "selector": {Type: "string", Description: "CSS selector to search for"},
+ },
+ },
+ },
+ },
+}
+
+var toolCallRE = regexp.MustCompile(`__tool_call__(.+?)__tool_call__`)
+
+type ParsedToolCall struct {
+ ID string
+ Name string
+ Args map[string]string
+}
+
+func findToolCall(resp []byte) (func() []byte, string, bool) {
+ var genericResp map[string]interface{}
+ if err := json.Unmarshal(resp, &genericResp); err != nil {
+ return findToolCallFromText(string(resp))
+ }
+
+ if choices, ok := genericResp["choices"].([]interface{}); ok && len(choices) > 0 {
+ if firstChoice, ok := choices[0].(map[string]interface{}); ok {
+ if message, ok := firstChoice["message"].(map[string]interface{}); ok {
+ if toolCalls, ok := message["tool_calls"].([]interface{}); ok && len(toolCalls) > 0 {
+ return parseOpenAIToolCall(toolCalls)
+ }
+ if content, ok := message["content"].(string); ok {
+ return findToolCallFromText(content)
+ }
+ }
+ if text, ok := firstChoice["text"].(string); ok {
+ return findToolCallFromText(text)
+ }
+ }
+ }
+
+ if content, ok := genericResp["content"].(string); ok {
+ return findToolCallFromText(content)
+ }
+
+ return findToolCallFromText(string(resp))
+}
+
+func parseOpenAIToolCall(toolCalls []interface{}) (func() []byte, string, bool) {
+ if len(toolCalls) == 0 {
+ return nil, "", false
+ }
+
+ tc := toolCalls[0].(map[string]interface{})
+ id, _ := tc["id"].(string)
+ function, _ := tc["function"].(map[string]interface{})
+ name, _ := function["name"].(string)
+ argsStr, _ := function["arguments"].(string)
+
+ var args map[string]string
+ if err := json.Unmarshal([]byte(argsStr), &args); err != nil {
+ return func() []byte {
+ return []byte(fmt.Sprintf(`{"error": "failed to parse arguments: %v"}`, err))
+ }, id, true
+ }
+
+ return func() []byte {
+ fn, ok := pwToolMap[name]
+ if !ok {
+ return []byte(fmt.Sprintf(`{"error": "tool %s not found"}`, name))
+ }
+ return fn(args)
+ }, id, true
+}
+
+func findToolCallFromText(text string) (func() []byte, string, bool) {
+ jsStr := toolCallRE.FindString(text)
+ if jsStr == "" {
+ return nil, "", false
+ }
+
+ jsStr = strings.TrimSpace(jsStr)
+ jsStr = strings.TrimPrefix(jsStr, "__tool_call__")
+ jsStr = strings.TrimSuffix(jsStr, "__tool_call__")
+ jsStr = strings.TrimSpace(jsStr)
+
+ start := strings.Index(jsStr, "{")
+ end := strings.LastIndex(jsStr, "}")
+ if start == -1 || end == -1 || end <= start {
+ return func() []byte {
+ return []byte(`{"error": "no valid JSON found in tool call"}`)
+ }, "", true
+ }
+
+ jsStr = jsStr[start : end+1]
+
+ var fc models.FuncCall
+ if err := json.Unmarshal([]byte(jsStr), &fc); err != nil {
+ return func() []byte {
+ return []byte(fmt.Sprintf(`{"error": "failed to parse tool call: %v}`, err))
+ }, "", true
+ }
+
+ if fc.ID == "" {
+ fc.ID = "call_" + generateToolCallID()
+ }
+
+ return func() []byte {
+ fn, ok := pwToolMap[fc.Name]
+ if !ok {
+ return []byte(fmt.Sprintf(`{"error": "tool %s not found"}`, fc.Name))
+ }
+ return fn(fc.Args)
+ }, fc.ID, true
+}
+
+func generateToolCallID() string {
+ return fmt.Sprintf("%d", len(pwToolMap)%10000)
+}
diff --git a/agent/request.go b/agent/request.go
index f42b06e..754f16e 100644
--- a/agent/request.go
+++ b/agent/request.go
@@ -30,12 +30,16 @@ func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool)
}
type AgentClient struct {
- cfg *config.Config
- getToken func() string
- log slog.Logger
+ cfg *config.Config
+ getToken func() string
+ log *slog.Logger
+ chatBody *models.ChatBody
+ sysprompt string
+ lastToolCallID string
+ tools []models.Tool
}
-func NewAgentClient(cfg *config.Config, log slog.Logger, gt func() string) *AgentClient {
+func NewAgentClient(cfg *config.Config, log *slog.Logger, gt func() string) *AgentClient {
return &AgentClient{
cfg: cfg,
getToken: gt,
@@ -44,93 +48,99 @@ func NewAgentClient(cfg *config.Config, log slog.Logger, gt func() string) *Agen
}
func (ag *AgentClient) Log() *slog.Logger {
- return &ag.log
+ return ag.log
}
-func (ag *AgentClient) FormMsg(sysprompt, msg string) (io.Reader, error) {
- b, err := ag.buildRequest(sysprompt, msg)
+func (ag *AgentClient) FormFirstMsg(sysprompt, msg string) (io.Reader, error) {
+ ag.sysprompt = sysprompt
+ ag.chatBody = &models.ChatBody{
+ Messages: []models.RoleMsg{
+ {Role: "system", Content: ag.sysprompt},
+ {Role: "user", Content: msg},
+ },
+ Stream: false,
+ Model: ag.cfg.CurrentModel,
+ }
+ b, err := ag.buildRequest()
if err != nil {
return nil, err
}
return bytes.NewReader(b), nil
}
-// buildRequest creates the appropriate LLM request based on the current API endpoint.
-func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
- api := ag.cfg.CurrentAPI
- model := ag.cfg.CurrentModel
- messages := []models.RoleMsg{
- {Role: "system", Content: sysprompt},
- {Role: "user", Content: msg},
+func (ag *AgentClient) FormMsg(msg string) (io.Reader, error) {
+ m := models.RoleMsg{
+ Role: "tool", Content: msg,
}
+ ag.chatBody.Messages = append(ag.chatBody.Messages, m)
+ b, err := ag.buildRequest()
+ if err != nil {
+ return nil, err
+ }
+ return bytes.NewReader(b), nil
+}
- // Determine API type
- isCompletion, isChat, isDeepSeek, isOpenRouter := detectAPI(api)
- ag.log.Debug("agent building request", "api", api, "isCompletion", isCompletion, "isChat", isChat, "isDeepSeek", isDeepSeek, "isOpenRouter", isOpenRouter)
+func (ag *AgentClient) FormMsgWithToolCallID(msg, toolCallID string) (io.Reader, error) {
+ m := models.RoleMsg{
+ Role: "tool",
+ Content: msg,
+ ToolCallID: toolCallID,
+ }
+ ag.chatBody.Messages = append(ag.chatBody.Messages, m)
+ b, err := ag.buildRequest()
+ if err != nil {
+ return nil, err
+ }
+ return bytes.NewReader(b), nil
+}
+// buildRequest creates the appropriate LLM request based on the current API endpoint.
+func (ag *AgentClient) buildRequest() ([]byte, error) {
+ isCompletion, isChat, isDeepSeek, isOpenRouter := detectAPI(ag.cfg.CurrentAPI)
+ ag.log.Debug("agent building request", "api", ag.cfg.CurrentAPI, "isCompletion", isCompletion, "isChat", isChat, "isDeepSeek", isDeepSeek, "isOpenRouter", isOpenRouter)
// Build prompt for completion endpoints
if isCompletion {
var sb strings.Builder
- for i := range messages {
- sb.WriteString(messages[i].ToPrompt())
+ for i := range ag.chatBody.Messages {
+ sb.WriteString(ag.chatBody.Messages[i].ToPrompt())
sb.WriteString("\n")
}
prompt := strings.TrimSpace(sb.String())
-
switch {
case isDeepSeek:
// DeepSeek completion
- req := models.NewDSCompletionReq(prompt, model, defaultProps["temperature"], []string{})
+ req := models.NewDSCompletionReq(prompt, ag.chatBody.Model, defaultProps["temperature"], []string{})
req.Stream = false // Agents don't need streaming
return json.Marshal(req)
case isOpenRouter:
// OpenRouter completion
- req := models.NewOpenRouterCompletionReq(model, prompt, defaultProps, []string{})
+ req := models.NewOpenRouterCompletionReq(ag.chatBody.Model, prompt, defaultProps, []string{})
req.Stream = false // Agents don't need streaming
return json.Marshal(req)
default:
// Assume llama.cpp completion
- req := models.NewLCPReq(prompt, model, nil, defaultProps, []string{})
+ req := models.NewLCPReq(prompt, ag.chatBody.Model, nil, defaultProps, []string{})
req.Stream = false // Agents don't need streaming
return json.Marshal(req)
}
}
-
- // Chat completions endpoints
- if isChat || !isCompletion {
- chatBody := &models.ChatBody{
- Model: model,
- Stream: false, // Agents don't need streaming
- Messages: messages,
- }
-
- switch {
- case isDeepSeek:
- // DeepSeek chat
- req := models.NewDSChatReq(*chatBody)
- return json.Marshal(req)
- case isOpenRouter:
- // OpenRouter chat - agents don't use reasoning by default
- req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "")
- return json.Marshal(req)
- default:
- // Assume llama.cpp chat (OpenAI format)
- req := models.OpenAIReq{
- ChatBody: chatBody,
- Tools: nil,
- }
- return json.Marshal(req)
+ switch {
+ case isDeepSeek:
+ // DeepSeek chat
+ req := models.NewDSChatReq(*ag.chatBody)
+ return json.Marshal(req)
+ case isOpenRouter:
+ // OpenRouter chat - agents don't use reasoning by default
+ req := models.NewOpenRouterChatReq(*ag.chatBody, defaultProps, ag.cfg.ReasoningEffort)
+ return json.Marshal(req)
+ default:
+ // Assume llama.cpp chat (OpenAI format)
+ req := models.OpenAIReq{
+ ChatBody: ag.chatBody,
+ Tools: ag.tools,
}
+ return json.Marshal(req)
}
-
- // Fallback (should not reach here)
- ag.log.Warn("unknown API, using default chat completions format", "api", api)
- chatBody := &models.ChatBody{
- Model: model,
- Stream: false, // Agents don't need streaming
- Messages: messages,
- }
- return json.Marshal(chatBody)
}
func (ag *AgentClient) LLMRequest(body io.Reader) ([]byte, error) {
@@ -165,7 +175,6 @@ func (ag *AgentClient) LLMRequest(body io.Reader) ([]byte, error) {
ag.log.Error("agent LLM request failed", "status", resp.StatusCode, "response", string(responseBytes[:min(len(responseBytes), 1000)]))
return responseBytes, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(responseBytes[:min(len(responseBytes), 200)]))
}
-
// Parse response and extract text content
text, err := extractTextFromResponse(responseBytes)
if err != nil {
@@ -179,17 +188,16 @@ func (ag *AgentClient) LLMRequest(body io.Reader) ([]byte, error) {
// extractTextFromResponse parses common LLM response formats and extracts the text content.
func extractTextFromResponse(data []byte) (string, error) {
// Try to parse as generic JSON first
- var genericResp map[string]interface{}
+ var genericResp map[string]any
if err := json.Unmarshal(data, &genericResp); err != nil {
// Not JSON, return as string
return string(data), nil
}
-
// Check for OpenAI chat completion format
- if choices, ok := genericResp["choices"].([]interface{}); ok && len(choices) > 0 {
- if firstChoice, ok := choices[0].(map[string]interface{}); ok {
+ if choices, ok := genericResp["choices"].([]any); ok && len(choices) > 0 {
+ if firstChoice, ok := choices[0].(map[string]any); ok {
// Chat completion: choices[0].message.content
- if message, ok := firstChoice["message"].(map[string]interface{}); ok {
+ if message, ok := firstChoice["message"].(map[string]any); ok {
if content, ok := message["content"].(string); ok {
return content, nil
}
@@ -199,19 +207,17 @@ func extractTextFromResponse(data []byte) (string, error) {
return text, nil
}
// Delta format for streaming (should not happen with stream: false)
- if delta, ok := firstChoice["delta"].(map[string]interface{}); ok {
+ if delta, ok := firstChoice["delta"].(map[string]any); ok {
if content, ok := delta["content"].(string); ok {
return content, nil
}
}
}
}
-
// Check for llama.cpp completion format
if content, ok := genericResp["content"].(string); ok {
return content, nil
}
-
// Unknown format, return pretty-printed JSON
prettyJSON, err := json.MarshalIndent(genericResp, "", " ")
if err != nil {
@@ -219,10 +225,3 @@ func extractTextFromResponse(data []byte) (string, error) {
}
return string(prettyJSON), nil
}
-
-func min(a, b int) int {
- if a < b {
- return a
- }
- return b
-}
diff --git a/agent/webagent.go b/agent/webagent.go
index ff6cd86..e8ca3a2 100644
--- a/agent/webagent.go
+++ b/agent/webagent.go
@@ -17,7 +17,8 @@ func NewWebAgentB(client *AgentClient, sysprompt string) *WebAgentB {
// Process applies the formatting function to raw output
func (a *WebAgentB) Process(args map[string]string, rawOutput []byte) []byte {
- msg, err := a.FormMsg(a.sysprompt,
+ msg, err := a.FormFirstMsg(
+ a.sysprompt,
fmt.Sprintf("request:\n%+v\ntool response:\n%v", args, string(rawOutput)))
if err != nil {
a.Log().Error("failed to process the request", "error", err)
diff --git a/config.example.toml b/config.example.toml
index 665fed6..8e45734 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -61,4 +61,4 @@ StripThinkingFromAPI = true # Strip <think> blocks from messages before sending
ReasoningEffort = "medium"
# playwright tools
PlaywrightEnabled = false
-PlaywrightDebug = false
+PlaywrightDebug = false # when true opens in gui mode (headless=false)
diff --git a/storage/memory.go b/storage/memory.go
index 406182f..71e3566 100644
--- a/storage/memory.go
+++ b/storage/memory.go
@@ -6,6 +6,7 @@ type Memories interface {
Memorise(m *models.Memory) (*models.Memory, error)
Recall(agent, topic string) (string, error)
RecallTopics(agent string) ([]string, error)
+ Forget(agent, topic string) error
}
func (p ProviderSQL) Memorise(m *models.Memory) (*models.Memory, error) {
@@ -52,3 +53,13 @@ func (p ProviderSQL) RecallTopics(agent string) ([]string, error) {
}
return topics, nil
}
+
+func (p ProviderSQL) Forget(agent, topic string) error {
+ query := "DELETE FROM memories WHERE agent = $1 AND topic = $2"
+ _, err := p.db.Exec(query, agent, topic)
+ if err != nil {
+ p.logger.Error("failed to delete memory", "query", query, "error", err)
+ return err
+ }
+ return nil
+}
diff --git a/tools.go b/tools.go
index 41b0b9b..494dc24 100644
--- a/tools.go
+++ b/tools.go
@@ -5,7 +5,10 @@ import (
"encoding/json"
"fmt"
"gf-lt/agent"
+ "gf-lt/config"
"gf-lt/models"
+ "gf-lt/storage"
+ "gf-lt/tools"
"io"
"os"
"os/exec"
@@ -40,94 +43,34 @@ Your current tools:
<tools>
[
{
-"name":"recall",
-"args": ["topic"],
-"when_to_use": "when asked about topic that user previously asked to memorise"
-},
-{
-"name":"memorise",
-"args": ["topic", "data"],
-"when_to_use": "when asked to memorise information under a topic"
-},
-{
-"name":"recall_topics",
-"args": [],
-"when_to_use": "to see what topics are saved in memory"
+"name":"run",
+"args": ["command"],
+"when_to_use": "main tool: run shell, memory, git, todo. Use run \"help\" for all commands, run \"help <cmd>\" for specific help. Examples: run \"ls -la\", run \"help\", run \"help memory\", run \"git status\", run \"memory store foo bar\""
},
{
"name":"websearch",
"args": ["query", "limit"],
-"when_to_use": "when asked to search the web for information; returns clean summary without html,css and other web elements; limit is optional (default 3)"
+"when_to_use": "search the web for information"
},
{
"name":"rag_search",
"args": ["query", "limit"],
-"when_to_use": "when asked to search the local document database for information; performs query refinement, semantic search, reranking, and synthesis; returns clean summary with sources; limit is optional (default 3)"
+"when_to_use": "search local document database"
},
{
"name":"read_url",
"args": ["url"],
-"when_to_use": "when asked to get content for specific webpage or url; returns clean summary without html,css and other web elements"
+"when_to_use": "get content from a webpage"
},
{
"name":"read_url_raw",
"args": ["url"],
-"when_to_use": "when asked to get content for specific webpage or url; returns raw data as is without processing"
-},
-{
-"name":"file_create",
-"args": ["path", "content"],
-"when_to_use": "when there is a need to create a new file with optional content"
-},
-{
-"name":"file_read",
-"args": ["path"],
-"when_to_use": "when you need to read the content of a file"
-},
-{
-"name":"file_read_image",
-"args": ["path"],
-"when_to_use": "when you need to read or view an image file"
-},
-{
-"name":"file_write",
-"args": ["path", "content"],
-"when_to_use": "when needed to overwrite content to a file"
-},
-{
-"name":"file_write_append",
-"args": ["path", "content"],
-"when_to_use": "when you need append content to a file; use sed to edit content"
-},
-{
-"name":"file_edit",
-"args": ["path", "oldString", "newString", "lineNumber"],
-"when_to_use": "when you need to make targeted changes to a specific section of a file without rewriting the entire file; lineNumber is optional - if provided, only edits that specific line; if not provided, replaces all occurrences of oldString"
-},
-{
-"name":"file_delete",
-"args": ["path"],
-"when_to_use": "when asked to delete a file"
-},
-{
-"name":"file_move",
-"args": ["src", "dst"],
-"when_to_use": "when you need to move a file from source to destination"
-},
-{
-"name":"file_copy",
-"args": ["src", "dst"],
-"when_to_use": "copy a file from source to destination"
-},
-{
-"name":"file_list",
-"args": ["path"],
-"when_to_use": "list files in a directory; path is optional (default: current directory)"
+"when_to_use": "get raw content from a webpage"
},
{
-"name":"execute_command",
-"args": ["command", "args"],
-"when_to_use": "execute a system command; args is optional; allowed commands: grep, sed, awk, find, cat, head, tail, sort, uniq, wc, ls, echo, cut, tr, cp, mv, rm, mkdir, rmdir, pwd, df, free, ps, top, du, whoami, date, uname, go"
+"name":"browser_agent",
+"args": ["task"],
+"when_to_use": "autonomous browser automation for complex tasks"
}
]
</tools>
@@ -210,6 +153,10 @@ var (
func initTools() {
sysMap[basicCard.ID] = basicCard
roleToID["assistant"] = basicCard.ID
+ // Initialize fs root directory
+ tools.SetFSRoot(cfg.FilePickerDir)
+ // Initialize memory store
+ tools.SetMemoryStore(&memoryAdapter{store: store, cfg: cfg}, cfg.AssistantRole)
sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
if err != nil {
if logger != nil {
@@ -258,7 +205,7 @@ func updateToolCapabilities() {
if cfg == nil || cfg.CurrentAPI == "" {
logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil")
registerWindowTools()
- registerPlaywrightTools()
+ // fnMap["browser_agent"] = runBrowserAgent
return
}
prevHasVision := modelHasVision
@@ -272,31 +219,19 @@ func updateToolCapabilities() {
}
}
registerWindowTools()
- registerPlaywrightTools()
+ // fnMap["browser_agent"] = runBrowserAgent
}
// getWebAgentClient returns a singleton AgentClient for web agents.
func getWebAgentClient() *agent.AgentClient {
webAgentClientOnce.Do(func() {
- if cfg == nil {
- if logger != nil {
- logger.Warn("web agent client unavailable: config not initialized")
- }
- return
- }
- if logger == nil {
- if logger != nil {
- logger.Warn("web agent client unavailable: logger not initialized")
- }
- return
- }
getToken := func() string {
if chunkParser == nil {
return ""
}
return chunkParser.GetToken()
}
- webAgentClient = agent.NewAgentClient(cfg, *logger, getToken)
+ webAgentClient = agent.NewAgentClient(cfg, logger, getToken)
})
return webAgentClient
}
@@ -306,13 +241,13 @@ func registerWebAgents() {
webAgentsOnce.Do(func() {
client := getWebAgentClient()
// Register rag_search agent
- agent.Register("rag_search", agent.NewWebAgentB(client, ragSearchSysPrompt))
+ agent.RegisterB("rag_search", agent.NewWebAgentB(client, ragSearchSysPrompt))
// Register websearch agent
- agent.Register("websearch", agent.NewWebAgentB(client, webSearchSysPrompt))
+ agent.RegisterB("websearch", agent.NewWebAgentB(client, webSearchSysPrompt))
// Register read_url agent
- agent.Register("read_url", agent.NewWebAgentB(client, readURLSysPrompt))
+ agent.RegisterB("read_url", agent.NewWebAgentB(client, readURLSysPrompt))
// Register summarize_chat agent
- agent.Register("summarize_chat", agent.NewWebAgentB(client, summarySysPrompt))
+ agent.RegisterB("summarize_chat", agent.NewWebAgentB(client, summarySysPrompt))
})
}
@@ -461,352 +396,6 @@ func readURLRaw(args map[string]string) []byte {
return []byte(fmt.Sprintf("%+v", resp))
}
-/*
-consider cases:
-- append mode (treat it like a journal appendix)
-- replace mode (new info/mind invalidates old ones)
-also:
-- some writing can be done without consideration of previous data;
-- others do;
-*/
-func memorise(args map[string]string) []byte {
- agent := cfg.AssistantRole
- if len(args) < 2 {
- msg := "not enough args to call memorise tool; need topic and data to remember"
- logger.Error(msg)
- return []byte(msg)
- }
- memory := &models.Memory{
- Agent: agent,
- Topic: args["topic"],
- Mind: args["data"],
- UpdatedAt: time.Now(),
- CreatedAt: time.Now(),
- }
- if _, err := store.Memorise(memory); err != nil {
- logger.Error("failed to save memory", "err", err, "memoory", memory)
- return []byte("failed to save info")
- }
- msg := "info saved under the topic:" + args["topic"]
- return []byte(msg)
-}
-
-func recall(args map[string]string) []byte {
- agent := cfg.AssistantRole
- if len(args) < 1 {
- logger.Warn("not enough args to call recall tool")
- return nil
- }
- mind, err := store.Recall(agent, args["topic"])
- if err != nil {
- msg := fmt.Sprintf("failed to recall; error: %v; args: %v", err, args)
- logger.Error(msg)
- return []byte(msg)
- }
- answer := fmt.Sprintf("under the topic: %s is stored:\n%s", args["topic"], mind)
- return []byte(answer)
-}
-
-func recallTopics(args map[string]string) []byte {
- agent := cfg.AssistantRole
- topics, err := store.RecallTopics(agent)
- if err != nil {
- logger.Error("failed to use tool", "error", err, "args", args)
- return nil
- }
- joinedS := strings.Join(topics, ";")
- return []byte(joinedS)
-}
-
-// File Manipulation Tools
-func fileCreate(args map[string]string) []byte {
- path, ok := args["path"]
- if !ok || path == "" {
- msg := "path not provided to file_create tool"
- logger.Error(msg)
- return []byte(msg)
- }
- path = resolvePath(path)
- content, ok := args["content"]
- if !ok {
- content = ""
- }
- if err := writeStringToFile(path, content); err != nil {
- msg := "failed to create file; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- msg := "file created successfully at " + path
- return []byte(msg)
-}
-
-func fileRead(args map[string]string) []byte {
- path, ok := args["path"]
- if !ok || path == "" {
- msg := "path not provided to file_read tool"
- logger.Error(msg)
- return []byte(msg)
- }
- path = resolvePath(path)
- content, err := readStringFromFile(path)
- if err != nil {
- msg := "failed to read file; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- result := map[string]string{
- "content": content,
- "path": path,
- }
- jsonResult, err := json.Marshal(result)
- if err != nil {
- msg := "failed to marshal result; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- return jsonResult
-}
-
-func fileReadImage(args map[string]string) []byte {
- path, ok := args["path"]
- if !ok || path == "" {
- msg := "path not provided to file_read_image tool"
- logger.Error(msg)
- return []byte(msg)
- }
- path = resolvePath(path)
- dataURL, err := models.CreateImageURLFromPath(path)
- if err != nil {
- msg := "failed to read image; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- // result := map[string]any{
- // "type": "multimodal_content",
- // "parts": []map[string]string{
- // {"type": "text", "text": "Image at " + path},
- // {"type": "image_url", "url": dataURL},
- // },
- // }
- result := models.MultimodalToolResp{
- Type: "multimodal_content",
- Parts: []map[string]string{
- {"type": "text", "text": "Image at " + path},
- {"type": "image_url", "url": dataURL},
- },
- }
- jsonResult, err := json.Marshal(result)
- if err != nil {
- msg := "failed to marshal result; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- return jsonResult
-}
-
-func fileWrite(args map[string]string) []byte {
- path, ok := args["path"]
- if !ok || path == "" {
- msg := "path not provided to file_write tool"
- logger.Error(msg)
- return []byte(msg)
- }
- path = resolvePath(path)
- content, ok := args["content"]
- if !ok {
- content = ""
- }
- if err := writeStringToFile(path, content); err != nil {
- msg := "failed to write to file; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- msg := "file written successfully at " + path
- return []byte(msg)
-}
-
-func fileWriteAppend(args map[string]string) []byte {
- path, ok := args["path"]
- if !ok || path == "" {
- msg := "path not provided to file_write_append tool"
- logger.Error(msg)
- return []byte(msg)
- }
- path = resolvePath(path)
- content, ok := args["content"]
- if !ok {
- content = ""
- }
- if err := appendStringToFile(path, content); err != nil {
- msg := "failed to append to file; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- msg := "file written successfully at " + path
- return []byte(msg)
-}
-
-func fileEdit(args map[string]string) []byte {
- path, ok := args["path"]
- if !ok || path == "" {
- msg := "path not provided to file_edit tool"
- logger.Error(msg)
- return []byte(msg)
- }
- path = resolvePath(path)
- oldString, ok := args["oldString"]
- if !ok || oldString == "" {
- msg := "oldString not provided to file_edit tool"
- logger.Error(msg)
- return []byte(msg)
- }
- newString, ok := args["newString"]
- if !ok {
- newString = ""
- }
- lineNumberStr, hasLineNumber := args["lineNumber"]
- // Read file content
- content, err := os.ReadFile(path)
- if err != nil {
- msg := "failed to read file: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- fileContent := string(content)
- var replacementCount int
- if hasLineNumber && lineNumberStr != "" {
- // Line-number based edit
- lineNum, err := strconv.Atoi(lineNumberStr)
- if err != nil {
- msg := "invalid lineNumber: must be a valid integer"
- logger.Error(msg)
- return []byte(msg)
- }
- lines := strings.Split(fileContent, "\n")
- if lineNum < 1 || lineNum > len(lines) {
- msg := fmt.Sprintf("lineNumber %d out of range (file has %d lines)", lineNum, len(lines))
- logger.Error(msg)
- return []byte(msg)
- }
- // Find oldString in the specific line
- targetLine := lines[lineNum-1]
- if !strings.Contains(targetLine, oldString) {
- msg := fmt.Sprintf("oldString not found on line %d", lineNum)
- logger.Error(msg)
- return []byte(msg)
- }
- lines[lineNum-1] = strings.Replace(targetLine, oldString, newString, 1)
- replacementCount = 1
- fileContent = strings.Join(lines, "\n")
- } else {
- // Replace all occurrences
- if !strings.Contains(fileContent, oldString) {
- msg := "oldString not found in file"
- logger.Error(msg)
- return []byte(msg)
- }
- fileContent = strings.ReplaceAll(fileContent, oldString, newString)
- replacementCount = strings.Count(fileContent, newString)
- }
- if err := os.WriteFile(path, []byte(fileContent), 0644); err != nil {
- msg := "failed to write file: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- msg := fmt.Sprintf("file edited successfully at %s (%d replacement(s))", path, replacementCount)
- return []byte(msg)
-}
-
-func fileDelete(args map[string]string) []byte {
- path, ok := args["path"]
- if !ok || path == "" {
- msg := "path not provided to file_delete tool"
- logger.Error(msg)
- return []byte(msg)
- }
- path = resolvePath(path)
- if err := removeFile(path); err != nil {
- msg := "failed to delete file; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- msg := "file deleted successfully at " + path
- return []byte(msg)
-}
-
-func fileMove(args map[string]string) []byte {
- src, ok := args["src"]
- if !ok || src == "" {
- msg := "source path not provided to file_move tool"
- logger.Error(msg)
- return []byte(msg)
- }
- src = resolvePath(src)
- dst, ok := args["dst"]
- if !ok || dst == "" {
- msg := "destination path not provided to file_move tool"
- logger.Error(msg)
- return []byte(msg)
- }
- dst = resolvePath(dst)
- if err := moveFile(src, dst); err != nil {
- msg := "failed to move file; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- msg := fmt.Sprintf("file moved successfully from %s to %s", src, dst)
- return []byte(msg)
-}
-
-func fileCopy(args map[string]string) []byte {
- src, ok := args["src"]
- if !ok || src == "" {
- msg := "source path not provided to file_copy tool"
- logger.Error(msg)
- return []byte(msg)
- }
- src = resolvePath(src)
- dst, ok := args["dst"]
- if !ok || dst == "" {
- msg := "destination path not provided to file_copy tool"
- logger.Error(msg)
- return []byte(msg)
- }
- dst = resolvePath(dst)
- if err := copyFile(src, dst); err != nil {
- msg := "failed to copy file; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- msg := fmt.Sprintf("file copied successfully from %s to %s", src, dst)
- return []byte(msg)
-}
-
-func fileList(args map[string]string) []byte {
- path, ok := args["path"]
- if !ok || path == "" {
- path = "." // default to current directory
- }
- path = resolvePath(path)
- files, err := listDirectory(path)
- if err != nil {
- msg := "failed to list directory; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- result := map[string]interface{}{
- "directory": path,
- "files": files,
- }
- jsonResult, err := json.Marshal(result)
- if err != nil {
- msg := "failed to marshal result; error: " + err.Error()
- logger.Error(msg)
- return []byte(msg)
- }
- return jsonResult
-}
-
// Helper functions for file operations
func resolvePath(p string) string {
if filepath.IsAbs(p) {
@@ -890,50 +479,466 @@ func listDirectory(path string) ([]string, error) {
return files, nil
}
-// Command Execution Tool
-func executeCommand(args map[string]string) []byte {
+// Unified run command - single entry point for shell, memory, and todo
+func runCmd(args map[string]string) []byte {
commandStr := args["command"]
if commandStr == "" {
- msg := "command not provided to execute_command tool"
+ msg := "command not provided to run tool"
logger.Error(msg)
return []byte(msg)
}
- // Handle commands passed as single string with spaces (e.g., "go run main.go" or "cd /tmp")
- // Split into base command and arguments
+
+ // Parse the command - first word is subcommand
parts := strings.Fields(commandStr)
if len(parts) == 0 {
- msg := "command not provided to execute_command tool"
- logger.Error(msg)
- return []byte(msg)
+ return []byte("[error] empty command")
+ }
+
+ subcmd := parts[0]
+ rest := parts[1:]
+
+ // Route to appropriate handler
+ switch subcmd {
+ case "help":
+ // help - show all commands
+ // help <cmd> - show help for specific command
+ return []byte(getHelp(rest))
+ case "memory":
+ // memory store <topic> <data> | memory get <topic> | memory list | memory forget <topic>
+ return []byte(tools.FsMemory(append([]string{"store"}, rest...), ""))
+ case "todo":
+ // todo create|read|update|delete - route to existing todo handlers
+ return []byte(handleTodoSubcommand(rest, args))
+ case "window", "windows":
+ // window list - list all windows
+ return listWindows(args)
+ case "capture", "screenshot":
+ // capture <window-name> - capture a window
+ return captureWindow(args)
+ case "capture_and_view", "screenshot_and_view":
+ // capture and view screenshot
+ return captureWindowAndView(args)
+ case "browser":
+ // browser <action> [args...] - Playwright browser automation
+ return runBrowserCommand(rest, args)
+ default:
+ // Everything else: shell with pipe/chaining support
+ result := tools.ExecChain(commandStr)
+ return []byte(result)
}
- command := parts[0]
- cmdArgs := parts[1:]
- if !isCommandAllowed(command, cmdArgs...) {
- msg := fmt.Sprintf("command '%s' is not allowed", command)
- logger.Error(msg)
- return []byte(msg)
+}
+
+// runBrowserCommand routes browser subcommands to Playwright handlers
+func runBrowserCommand(args []string, originalArgs map[string]string) []byte {
+ if len(args) == 0 {
+ return []byte(`usage: browser <action> [args...]
+Actions:
+ start - start browser
+ stop - stop browser
+ running - check if browser is running
+ go <url> - navigate to URL
+ click <selector> - click element
+ fill <selector> <text> - fill input
+ text [selector] - extract text
+ html [selector] - get HTML
+ dom - get DOM
+ screenshot [path] - take screenshot
+ screenshot_and_view - take and view screenshot
+ wait <selector> - wait for element
+ drag <from> <to> - drag element`)
+ }
+
+ action := args[0]
+ rest := args[1:]
+
+ switch action {
+ case "start":
+ return pwStart(originalArgs)
+ case "stop":
+ return pwStop(originalArgs)
+ case "running":
+ return pwIsRunning(originalArgs)
+ case "go", "navigate", "open":
+ // browser go <url>
+ url := ""
+ if len(rest) > 0 {
+ url = rest[0]
+ }
+ if url == "" {
+ return []byte("usage: browser go <url>")
+ }
+ return pwNavigate(map[string]string{"url": url})
+ case "click":
+ // browser click <selector> [index]
+ selector := ""
+ index := "0"
+ if len(rest) > 0 {
+ selector = rest[0]
+ }
+ if len(rest) > 1 {
+ index = rest[1]
+ }
+ if selector == "" {
+ return []byte("usage: browser click <selector> [index]")
+ }
+ return pwClick(map[string]string{"selector": selector, "index": index})
+ case "fill":
+ // browser fill <selector> <text>
+ if len(rest) < 2 {
+ return []byte("usage: browser fill <selector> <text>")
+ }
+ return pwFill(map[string]string{"selector": rest[0], "text": strings.Join(rest[1:], " ")})
+ case "text":
+ // browser text [selector]
+ selector := ""
+ if len(rest) > 0 {
+ selector = rest[0]
+ }
+ return pwExtractText(map[string]string{"selector": selector})
+ case "html":
+ // browser html [selector]
+ selector := ""
+ if len(rest) > 0 {
+ selector = rest[0]
+ }
+ return pwGetHTML(map[string]string{"selector": selector})
+ case "dom":
+ return pwGetDOM(originalArgs)
+ case "screenshot":
+ // browser screenshot [path]
+ path := ""
+ if len(rest) > 0 {
+ path = rest[0]
+ }
+ return pwScreenshot(map[string]string{"path": path})
+ case "screenshot_and_view":
+ // browser screenshot_and_view [path]
+ path := ""
+ if len(rest) > 0 {
+ path = rest[0]
+ }
+ return pwScreenshotAndView(map[string]string{"path": path})
+ case "wait":
+ // browser wait <selector>
+ selector := ""
+ if len(rest) > 0 {
+ selector = rest[0]
+ }
+ if selector == "" {
+ return []byte("usage: browser wait <selector>")
+ }
+ return pwWaitForSelector(map[string]string{"selector": selector})
+ case "drag":
+ // browser drag <x1> <y1> <x2> <y2> OR browser drag <from_selector> <to_selector>
+ if len(rest) < 4 && len(rest) < 2 {
+ return []byte("usage: browser drag <x1> <y1> <x2> <y2> OR browser drag <from_selector> <to_selector>")
+ }
+ // Check if first arg is a number (coordinates) or selector
+ _, err := strconv.Atoi(rest[0])
+ _, err2 := strconv.ParseFloat(rest[0], 64)
+ if err == nil || err2 == nil {
+ // Coordinates: browser drag 100 200 300 400
+ if len(rest) < 4 {
+ return []byte("usage: browser drag <x1> <y1> <x2> <y2>")
+ }
+ return pwDrag(map[string]string{
+ "x1": rest[0], "y1": rest[1],
+ "x2": rest[2], "y2": rest[3],
+ })
+ }
+ // Selectors: browser drag #item #container
+ // pwDrag needs coordinates, so we need to get element positions first
+ // This requires a different approach - use JavaScript to get centers
+ return pwDragBySelector(map[string]string{
+ "fromSelector": rest[0],
+ "toSelector": rest[1],
+ })
+ default:
+ return []byte(fmt.Sprintf("unknown browser action: %s", action))
}
- // Special handling for cd command - update FilePickerDir
- if command == "cd" {
- return handleCdCommand(cmdArgs)
+}
+
+// getHelp returns help text for commands
+func getHelp(args []string) string {
+ if len(args) == 0 {
+ // General help - show all commands
+ return `Available commands:
+ help <cmd> - show help for a command (use: help memory, help git, etc.)
+
+ # File operations
+ ls [path] - list files in directory
+ cat <file> - read file content
+ see <file> - view image file
+ write <file> - write content to file
+ stat <file> - get file info
+ rm <file> - delete file
+ cp <src> <dst> - copy file
+ mv <src> <dst> - move/rename file
+ mkdir <dir> - create directory
+ pwd - print working directory
+ cd <dir> - change directory
+ sed 's/old/new/[g]' [file] - text replacement
+
+ # Text processing
+ echo <args> - echo back input
+ time - show current time
+ grep <pattern> - filter lines (supports -i, -v, -c)
+ head [n] - show first n lines
+ tail [n] - show last n lines
+ wc [-l|-w|-c] - count lines/words/chars
+ sort [-r|-n] - sort lines
+ uniq [-c] - remove duplicates
+
+ # Git (read-only)
+ git <cmd> - git commands (status, log, diff, show, branch, etc.)
+
+ # Go
+ go <cmd> - go commands (run, build, test, mod, etc.)
+
+ # Memory
+ memory store <topic> <data> - save to memory
+ memory get <topic> - retrieve from memory
+ memory list - list all topics
+ memory forget <topic> - delete from memory
+
+ # Todo
+ todo create <task> - create a todo
+ todo read - list all todos
+ todo update <id> <status> - update todo (pending/in_progress/completed)
+ todo delete <id> - delete a todo
+
+ # Window (requires xdotool + maim)
+ window - list available windows
+ capture <name> - capture a window screenshot
+ capture_and_view <name> - capture and view screenshot
+
+ # Browser (requires Playwright)
+ browser start - start browser
+ browser stop - stop browser
+ browser running - check if running
+ browser go <url> - navigate to URL
+ browser click <sel> - click element
+ browser fill <sel> <txt> - fill input
+ browser text [sel] - extract text
+ browser html [sel] - get HTML
+ browser screenshot - take screenshot
+ browser wait <sel> - wait for element
+ browser drag <x1> <y1> <x2> <y2> - drag by coordinates
+ browser drag <sel1> <sel2> - drag by selectors (center points)
+
+ # System
+ <any shell command> - run shell command directly
+
+Use: run "command" to execute.`
+ }
+
+ // Specific command help
+ cmd := args[0]
+ switch cmd {
+ case "ls":
+ return `ls [directory]
+ List files in a directory.
+ Examples:
+ run "ls"
+ run "ls /home/user"
+ run "ls -la" (via shell)`
+ case "cat":
+ return `cat <file>
+ Read file content.
+ Examples:
+ run "cat readme.md"
+ run "cat -b image.png" (base64 output)`
+ case "see":
+ return `see <image-file>
+ View an image file for multimodal analysis.
+ Supports: png, jpg, jpeg, gif, webp, svg
+ Example:
+ run "see screenshot.png"`
+ case "write":
+ return `write <file> [content]
+ Write content to a file.
+ Examples:
+ run "write notes.txt hello world"
+ run "write data.json" (with stdin)`
+ case "memory":
+ return `memory <subcommand> [args]
+ Manage memory storage.
+ Subcommands:
+ store <topic> <data> - save data to a topic
+ get <topic> - retrieve data from a topic
+ list - list all topics
+ forget <topic> - delete a topic
+ Examples:
+ run "memory store foo bar"
+ run "memory get foo"
+ run "memory list"`
+ case "todo":
+ return `todo <subcommand> [args]
+ Manage todo list.
+ Subcommands:
+ create <task> - create a new todo
+ read [id] - list all todos or read specific one
+ update <id> <status> - update status (pending/in_progress/completed)
+ delete <id> - delete a todo
+ Examples:
+ run "todo create fix bug"
+ run "todo read"
+ run "todo update 1 completed"`
+ case "git":
+ return `git <subcommand>
+ Read-only git commands.
+ Allowed: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe, rev-list
+ Examples:
+ run "git status"
+ run "git log --oneline -5"
+ run "git diff HEAD~1"`
+ case "grep":
+ return `grep <pattern> [options]
+ Filter lines matching a pattern.
+ Options:
+ -i ignore case
+ -v invert match
+ -c count matches
+ Example:
+ run "grep error" (from stdin)
+ run "grep -i warning log.txt"`
+ case "cd":
+ return `cd <directory>
+ Change working directory.
+ Example:
+ run "cd /tmp"
+ run "cd .."`
+ case "pwd":
+ return `pwd
+ Print working directory.
+ Example:
+ run "pwd"`
+ case "sed":
+ return `sed 's/old/new/[g]' [file]
+ Stream editor for text replacement.
+ Options:
+ -i in-place editing
+ -g global replacement (replace all)
+ Examples:
+ run "sed 's/foo/bar/' file.txt"
+ run "sed 's/foo/bar/g' file.txt" (global)
+ run "sed -i 's/foo/bar/' file.txt" (in-place)
+ run "cat file.txt | sed 's/foo/bar/'" (pipe from stdin)`
+ case "go":
+ return `go <command>
+ Go toolchain commands.
+ Allowed: run, build, test, mod, get, install, clean, fmt, vet, etc.
+ Examples:
+ run "go run main.go"
+ run "go build ./..."
+ run "go test ./..."
+ run "go mod tidy"
+ run "go get github.com/package"`
+ case "window", "windows":
+ return `window
+ List available windows.
+ Requires: xdotool and maim
+ Example:
+ run "window"`
+ case "capture", "screenshot":
+ return `capture <window-name-or-id>
+ Capture a screenshot of a window.
+ Requires: xdotool and maim
+ Examples:
+ run "capture Firefox"
+ run "capture 0x12345678"
+ run "capture_and_view Firefox"`
+ case "capture_and_view":
+ return `capture_and_view <window-name-or-id>
+ Capture a window and return for viewing.
+ Requires: xdotool and maim
+ Examples:
+ run "capture_and_view Firefox"`
+ case "browser":
+ return `browser <action> [args]
+ Playwright browser automation.
+ Requires: Playwright browser server running
+ Actions:
+ start - start browser
+ stop - stop browser
+ running - check if browser is running
+ go <url> - navigate to URL
+ click <selector> - click element (use index for multiple: click #btn 1)
+ fill <selector> <text> - fill input field
+ text [selector] - extract text (from element or whole page)
+ html [selector] - get HTML (from element or whole page)
+ screenshot [path] - take screenshot
+ wait <selector> - wait for element to appear
+ drag <from> <to> - drag element to another element
+ Examples:
+ run "browser start"
+ run "browser go https://example.com"
+ run "browser click #submit-button"
+ run "browser fill #search-input hello"
+ run "browser text"
+ run "browser screenshot"
+ run "browser drag 100 200 300 400"
+ run "browser drag #item1 #container2"`
+ default:
+ return fmt.Sprintf("No help available for: %s. Use: run \"help\" for all commands.", cmd)
}
- // Execute with timeout for safety
- ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
- defer cancel()
- cmd := exec.CommandContext(ctx, command, cmdArgs...)
- cmd.Dir = cfg.FilePickerDir
- output, err := cmd.CombinedOutput()
- if err != nil {
- msg := fmt.Sprintf("command '%s' failed; error: %v; output: %s", command, err, string(output))
+}
+
+// handleTodoSubcommand routes todo subcommands to existing handlers
+func handleTodoSubcommand(args []string, originalArgs map[string]string) []byte {
+ if len(args) == 0 {
+ return []byte("usage: todo create|read|update|delete")
+ }
+
+ subcmd := args[0]
+
+ switch subcmd {
+ case "create":
+ task := strings.Join(args[1:], " ")
+ if task == "" {
+ task = originalArgs["task"]
+ }
+ if task == "" {
+ return []byte("usage: todo create <task>")
+ }
+ return todoCreate(map[string]string{"task": task})
+
+ case "read":
+ id := ""
+ if len(args) > 1 {
+ id = args[1]
+ }
+ return todoRead(map[string]string{"id": id})
+
+ case "update":
+ if len(args) < 2 {
+ return []byte("usage: todo update <id> <status>")
+ }
+ return todoUpdate(map[string]string{"id": args[1], "status": args[2]})
+
+ case "delete":
+ if len(args) < 2 {
+ return []byte("usage: todo delete <id>")
+ }
+ return todoDelete(map[string]string{"id": args[1]})
+
+ default:
+ return []byte(fmt.Sprintf("unknown todo subcommand: %s", subcmd))
+ }
+}
+
+// Command Execution Tool with pipe/chaining support
+func executeCommand(args map[string]string) []byte {
+ commandStr := args["command"]
+ if commandStr == "" {
+ msg := "command not provided to execute_command tool"
logger.Error(msg)
return []byte(msg)
}
- // Check if output is empty and return success message
- if len(output) == 0 {
- successMsg := fmt.Sprintf("command '%s' executed successfully and exited with code 0", commandStr)
- return []byte(successMsg)
- }
- return output
+
+ // Use chain execution for pipe/chaining support
+ result := tools.ExecChain(commandStr)
+ return []byte(result)
}
// handleCdCommand handles the cd command to update FilePickerDir
@@ -1155,65 +1160,6 @@ func todoDelete(args map[string]string) []byte {
return jsonResult
}
-var gitReadSubcommands = map[string]bool{
- "status": true,
- "log": true,
- "diff": true,
- "show": true,
- "branch": true,
- "reflog": true,
- "rev-parse": true,
- "shortlog": true,
- "describe": true,
-}
-
-func isCommandAllowed(command string, args ...string) bool {
- allowedCommands := map[string]bool{
- "cd": true,
- "grep": true,
- "sed": true,
- "awk": true,
- "find": true,
- "cat": true,
- "head": true,
- "tail": true,
- "sort": true,
- "uniq": true,
- "wc": true,
- "ls": true,
- "echo": true,
- "cut": true,
- "tr": true,
- "cp": true,
- "mv": true,
- "rm": true,
- "mkdir": true,
- "rmdir": true,
- "pwd": true,
- "df": true,
- "free": true,
- "ps": true,
- "top": true,
- "du": true,
- "whoami": true,
- "date": true,
- "uname": true,
- "git": true,
- "go": true,
- }
- // Allow all go subcommands (go run, go mod tidy, go test, etc.)
- if strings.HasPrefix(command, "go ") && allowedCommands["go"] {
- return true
- }
- if command == "git" && len(args) > 0 {
- return gitReadSubcommands[args[0]]
- }
- if !allowedCommands[command] {
- return false
- }
- return true
-}
-
func summarizeChat(args map[string]string) []byte {
if len(chatBody.Messages) == 0 {
return []byte("No chat history to summarize.")
@@ -1361,31 +1307,65 @@ func captureWindowAndView(args map[string]string) []byte {
type fnSig func(map[string]string) []byte
+// FS Command Handlers - Unix-style file operations
+// Convert map[string]string to []string for tools package
+func argsToSlice(args map[string]string) []string {
+ var result []string
+ // Common positional args in order
+ for _, key := range []string{"path", "src", "dst", "dir", "file"} {
+ if v, ok := args[key]; ok && v != "" {
+ result = append(result, v)
+ }
+ }
+ return result
+}
+
+func cmdMemory(args map[string]string) []byte {
+ return []byte(tools.FsMemory(argsToSlice(args), ""))
+}
+
+type memoryAdapter struct {
+ store storage.Memories
+ cfg *config.Config
+}
+
+func (m *memoryAdapter) Memorise(agent, topic, data string) (string, error) {
+ mem := &models.Memory{
+ Agent: agent,
+ Topic: topic,
+ Mind: data,
+ UpdatedAt: time.Now(),
+ CreatedAt: time.Now(),
+ }
+ result, err := m.store.Memorise(mem)
+ if err != nil {
+ return "", err
+ }
+ return result.Topic, nil
+}
+
+func (m *memoryAdapter) Recall(agent, topic string) (string, error) {
+ return m.store.Recall(agent, topic)
+}
+
+func (m *memoryAdapter) RecallTopics(agent string) ([]string, error) {
+ return m.store.RecallTopics(agent)
+}
+
+func (m *memoryAdapter) Forget(agent, topic string) error {
+ return m.store.Forget(agent, topic)
+}
+
var fnMap = map[string]fnSig{
- "recall": recall,
- "recall_topics": recallTopics,
- "memorise": memorise,
- "rag_search": ragsearch,
- "websearch": websearch,
- "websearch_raw": websearchRaw,
- "read_url": readURL,
- "read_url_raw": readURLRaw,
- "file_create": fileCreate,
- "file_read": fileRead,
- "file_read_image": fileReadImage,
- "file_write": fileWrite,
- "file_write_append": fileWriteAppend,
- "file_edit": fileEdit,
- "file_delete": fileDelete,
- "file_move": fileMove,
- "file_copy": fileCopy,
- "file_list": fileList,
- "execute_command": executeCommand,
- "todo_create": todoCreate,
- "todo_read": todoRead,
- "todo_update": todoUpdate,
- "todo_delete": todoDelete,
- "summarize_chat": summarizeChat,
+ "memory": cmdMemory,
+ "rag_search": ragsearch,
+ "websearch": websearch,
+ "websearch_raw": websearchRaw,
+ "read_url": readURL,
+ "read_url_raw": readURLRaw,
+ // Unified run command
+ "run": runCmd,
+ "summarize_chat": summarizeChat,
}
func removeWindowToolsFromBaseTools() {
@@ -1503,6 +1483,48 @@ func registerWindowTools() {
}
}
+var browserAgentSysPrompt = `You are an autonomous browser automation agent. Your goal is to complete the user's task by intelligently using browser automation tools.
+
+Important: The browser may already be running from a previous task! Always check pw_is_running first before starting a new browser.
+
+Available tools:
+- pw_start: Start browser (only if not already running)
+- pw_stop: Stop browser (only when you're truly done and browser is no longer needed)
+- pw_is_running: Check if browser is running
+- pw_navigate: Go to a URL
+- pw_click: Click an element by CSS selector
+- pw_fill: Type text into an input
+- pw_extract_text: Get text from page/element
+- pw_screenshot: Take a screenshot (returns file path)
+- pw_screenshot_and_view: Take screenshot with image for viewing
+- pw_wait_for_selector: Wait for element to appear
+- pw_drag: Drag mouse from one point to another
+- pw_click_at: Click at X,Y coordinates
+- pw_get_html: Get HTML content
+- pw_get_dom: Get structured DOM tree
+- pw_search_elements: Search for elements by text or selector
+
+Workflow:
+1. First, check if browser is already running (pw_is_running)
+2. Only start browser if not already running (pw_start)
+3. Navigate to required pages (pw_navigate)
+4. Interact with elements as needed (click, fill, etc.)
+5. Extract information or take screenshots as requested
+6. IMPORTANT: Do NOT stop the browser when done! Leave it running so the user can continue interacting with the page in subsequent requests.
+
+Always provide clear feedback about what you're doing and what you found.`
+
+func runBrowserAgent(args map[string]string) []byte {
+ task, ok := args["task"]
+ if !ok || task == "" {
+ return []byte(`{"error": "task argument is required"}`)
+ }
+ client := getWebAgentClient()
+ pwAgent := agent.NewPWAgent(client, browserAgentSysPrompt)
+ pwAgent.SetTools(agent.GetPWTools())
+ return pwAgent.ProcessTask(task)
+}
+
func registerPlaywrightTools() {
removePlaywrightToolsFromBaseTools()
if cfg != nil && cfg.PlaywrightEnabled {
@@ -1788,6 +1810,39 @@ func registerPlaywrightTools() {
}
baseTools = append(baseTools, playwrightTools...)
toolSysMsg += browserToolSysMsg
+ agent.RegisterPWTool("pw_start", pwStart)
+ agent.RegisterPWTool("pw_stop", pwStop)
+ agent.RegisterPWTool("pw_is_running", pwIsRunning)
+ agent.RegisterPWTool("pw_navigate", pwNavigate)
+ agent.RegisterPWTool("pw_click", pwClick)
+ agent.RegisterPWTool("pw_click_at", pwClickAt)
+ agent.RegisterPWTool("pw_fill", pwFill)
+ agent.RegisterPWTool("pw_extract_text", pwExtractText)
+ agent.RegisterPWTool("pw_screenshot", pwScreenshot)
+ agent.RegisterPWTool("pw_screenshot_and_view", pwScreenshotAndView)
+ agent.RegisterPWTool("pw_wait_for_selector", pwWaitForSelector)
+ agent.RegisterPWTool("pw_drag", pwDrag)
+ agent.RegisterPWTool("pw_get_html", pwGetHTML)
+ agent.RegisterPWTool("pw_get_dom", pwGetDOM)
+ agent.RegisterPWTool("pw_search_elements", pwSearchElements)
+ browserAgentTool := []models.Tool{
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "browser_agent",
+ Description: "Autonomous browser automation agent. Use for complex multi-step browser tasks like 'go to website, login, and take screenshot'. The agent will plan and execute steps automatically using browser tools.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"task"},
+ Properties: map[string]models.ToolArgProps{
+ "task": {Type: "string", Description: "The task to accomplish, e.g., 'go to github.com and take a screenshot of the homepage'"},
+ },
+ },
+ },
+ },
+ }
+ baseTools = append(baseTools, browserAgentTool...)
+ fnMap["browser_agent"] = runBrowserAgent
}
}
@@ -1909,364 +1964,19 @@ var baseTools = []models.Tool{
},
},
},
- // memorise
+ // run - unified command
models.Tool{
Type: "function",
Function: models.ToolFunc{
- Name: "memorise",
- Description: "Save topic-data in key-value cache. Use when asked to remember something/keep in mind.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"topic", "data"},
- Properties: map[string]models.ToolArgProps{
- "topic": models.ToolArgProps{
- Type: "string",
- Description: "topic is the key under which data is saved",
- },
- "data": models.ToolArgProps{
- Type: "string",
- Description: "data is the value that is saved under the topic-key",
- },
- },
- },
- },
- },
- // recall
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "recall",
- Description: "Recall topic-data from key-value cache. Use when precise info about the topic is needed.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"topic"},
- Properties: map[string]models.ToolArgProps{
- "topic": models.ToolArgProps{
- Type: "string",
- Description: "topic is the key to recall data from",
- },
- },
- },
- },
- },
- // recall_topics
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "recall_topics",
- Description: "Recall all topics from key-value cache. Use when need to know what topics are currently stored in memory.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{},
- Properties: map[string]models.ToolArgProps{},
- },
- },
- },
- // file_create
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_create",
- Description: "Create a new file with specified content. Use when you need to create a new file.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"path"},
- Properties: map[string]models.ToolArgProps{
- "path": models.ToolArgProps{
- Type: "string",
- Description: "path where the file should be created",
- },
- "content": models.ToolArgProps{
- Type: "string",
- Description: "content to write to the file (optional, defaults to empty string)",
- },
- },
- },
- },
- },
- // file_read
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_read",
- Description: "Read the content of a file. Use when you need to see the content of a file.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"path"},
- Properties: map[string]models.ToolArgProps{
- "path": models.ToolArgProps{
- Type: "string",
- Description: "path of the file to read",
- },
- },
- },
- },
- },
- // file_read_image
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_read_image",
- Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"path"},
- Properties: map[string]models.ToolArgProps{
- "path": models.ToolArgProps{
- Type: "string",
- Description: "path of the image file to read",
- },
- },
- },
- },
- },
- // file_write
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_write",
- Description: "Write content to a file. Will overwrite any content present.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"path", "content"},
- Properties: map[string]models.ToolArgProps{
- "path": models.ToolArgProps{
- Type: "string",
- Description: "path of the file to write to",
- },
- "content": models.ToolArgProps{
- Type: "string",
- Description: "content to write to the file",
- },
- },
- },
- },
- },
- // file_write_append
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_write_append",
- Description: "Append content to a file.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"path", "content"},
- Properties: map[string]models.ToolArgProps{
- "path": models.ToolArgProps{
- Type: "string",
- Description: "path of the file to write to",
- },
- "content": models.ToolArgProps{
- Type: "string",
- Description: "content to write to the file",
- },
- },
- },
- },
- },
- // file_edit
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_edit",
- Description: "Edit a specific section of a file by replacing oldString with newString. Use for targeted changes without rewriting the entire file.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"path", "oldString", "newString"},
- Properties: map[string]models.ToolArgProps{
- "path": models.ToolArgProps{
- Type: "string",
- Description: "path of the file to edit",
- },
- "oldString": models.ToolArgProps{
- Type: "string",
- Description: "the exact string to find and replace",
- },
- "newString": models.ToolArgProps{
- Type: "string",
- Description: "the string to replace oldString with",
- },
- "lineNumber": models.ToolArgProps{
- Type: "string",
- Description: "optional line number (1-indexed) to edit - if provided, only that line is edited",
- },
- },
- },
- },
- },
- // file_delete
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_delete",
- Description: "Delete a file. Use when you need to remove a file.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"path"},
- Properties: map[string]models.ToolArgProps{
- "path": models.ToolArgProps{
- Type: "string",
- Description: "path of the file to delete",
- },
- },
- },
- },
- },
- // file_move
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_move",
- Description: "Move a file from one location to another. Use when you need to relocate a file.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"src", "dst"},
- Properties: map[string]models.ToolArgProps{
- "src": models.ToolArgProps{
- Type: "string",
- Description: "source path of the file to move",
- },
- "dst": models.ToolArgProps{
- Type: "string",
- Description: "destination path where the file should be moved",
- },
- },
- },
- },
- },
- // file_copy
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_copy",
- Description: "Copy a file from one location to another. Use when you need to duplicate a file.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"src", "dst"},
- Properties: map[string]models.ToolArgProps{
- "src": models.ToolArgProps{
- Type: "string",
- Description: "source path of the file to copy",
- },
- "dst": models.ToolArgProps{
- Type: "string",
- Description: "destination path where the file should be copied",
- },
- },
- },
- },
- },
- // file_list
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "file_list",
- Description: "List files and directories in a directory. Use when you need to see what files are in a directory.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{},
- Properties: map[string]models.ToolArgProps{
- "path": models.ToolArgProps{
- Type: "string",
- Description: "path of the directory to list (optional, defaults to current directory)",
- },
- },
- },
- },
- },
- // execute_command
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "execute_command",
- Description: "Execute a shell command safely. Use when you need to run system commands like cd grep sed awk find cat head tail sort uniq wc ls echo cut tr cp mv rm mkdir rmdir pwd df free ps top du whoami date uname go git. Git is allowed for read-only operations: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe. Use 'cd /path' to change working directory.",
+ Name: "run",
+ Description: "Execute commands: shell, git, memory, todo. Usage: run \"<command>\". Examples: run \"ls -la\", run \"git status\", run \"memory store foo bar\", run \"memory get foo\", run \"todo create task\", run \"help\", run \"help memory\"",
Parameters: models.ToolFuncParams{
Type: "object",
Required: []string{"command"},
Properties: map[string]models.ToolArgProps{
"command": models.ToolArgProps{
Type: "string",
- Description: "command to execute with arguments (e.g., 'go run main.go', 'ls -la /tmp', 'cd /home/user'). Use a single string; arguments should be space-separated after the command.",
- },
- },
- },
- },
- },
- // todo_create
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "todo_create",
- Description: "Create a new todo item with a task. Returns the created todo with its ID.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"task"},
- Properties: map[string]models.ToolArgProps{
- "task": models.ToolArgProps{
- Type: "string",
- Description: "the task description to add to the todo list",
- },
- },
- },
- },
- },
- // todo_read
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "todo_read",
- Description: "Read todo items. Without ID returns all todos, with ID returns specific todo.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{},
- Properties: map[string]models.ToolArgProps{
- "id": models.ToolArgProps{
- Type: "string",
- Description: "optional id of the specific todo item to read",
- },
- },
- },
- },
- },
- // todo_update
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "todo_update",
- Description: "Update a todo item by ID with new task or status. Status must be one of: pending, in_progress, completed.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"id"},
- Properties: map[string]models.ToolArgProps{
- "id": models.ToolArgProps{
- Type: "string",
- Description: "id of the todo item to update",
- },
- "task": models.ToolArgProps{
- Type: "string",
- Description: "new task description (optional)",
- },
- "status": models.ToolArgProps{
- Type: "string",
- Description: "new status: pending, in_progress, or completed (optional)",
- },
- },
- },
- },
- },
- // todo_delete
- models.Tool{
- Type: "function",
- Function: models.ToolFunc{
- Name: "todo_delete",
- Description: "Delete a todo item by ID. Returns success message.",
- Parameters: models.ToolFuncParams{
- Type: "object",
- Required: []string{"id"},
- Properties: map[string]models.ToolArgProps{
- "id": models.ToolArgProps{
- Type: "string",
- Description: "id of the todo item to delete",
+ Description: "command to execute. Use: run \"help\" for all commands, run \"help <cmd>\" for specific help. Examples: ls, cat, grep, git status, memory store, todo create, etc.",
},
},
},
diff --git a/tools/chain.go b/tools/chain.go
new file mode 100644
index 0000000..73ab6cd
--- /dev/null
+++ b/tools/chain.go
@@ -0,0 +1,283 @@
+package tools
+
+import (
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+)
+
+// Operator represents a chain operator between commands.
+type Operator int
+
+const (
+ OpNone Operator = iota
+ OpAnd // &&
+ OpOr // ||
+ OpSeq // ;
+ OpPipe // |
+)
+
+// Segment is a single command in a chain.
+type Segment struct {
+ Raw string
+ Op Operator // operator AFTER this segment
+}
+
+// ParseChain splits a command string into segments by &&, ;, and |.
+// Respects quoted strings (single and double quotes).
+func ParseChain(input string) []Segment {
+ var segments []Segment
+ var current strings.Builder
+ runes := []rune(input)
+ n := len(runes)
+
+ for i := 0; i < n; i++ {
+ ch := runes[i]
+
+ // handle quotes
+ if ch == '\'' || ch == '"' {
+ quote := ch
+ current.WriteRune(ch)
+ i++
+ for i < n && runes[i] != quote {
+ current.WriteRune(runes[i])
+ i++
+ }
+ if i < n {
+ current.WriteRune(runes[i])
+ }
+ continue
+ }
+
+ // &&
+ if ch == '&' && i+1 < n && runes[i+1] == '&' {
+ segments = append(segments, Segment{
+ Raw: strings.TrimSpace(current.String()),
+ Op: OpAnd,
+ })
+ current.Reset()
+ i++ // skip second &
+ continue
+ }
+
+ // ;
+ if ch == ';' {
+ segments = append(segments, Segment{
+ Raw: strings.TrimSpace(current.String()),
+ Op: OpSeq,
+ })
+ current.Reset()
+ continue
+ }
+
+ // ||
+ if ch == '|' && i+1 < n && runes[i+1] == '|' {
+ segments = append(segments, Segment{
+ Raw: strings.TrimSpace(current.String()),
+ Op: OpOr,
+ })
+ current.Reset()
+ i++ // skip second |
+ continue
+ }
+
+ // | (single pipe)
+ if ch == '|' {
+ segments = append(segments, Segment{
+ Raw: strings.TrimSpace(current.String()),
+ Op: OpPipe,
+ })
+ current.Reset()
+ continue
+ }
+
+ current.WriteRune(ch)
+ }
+
+ // last segment
+ last := strings.TrimSpace(current.String())
+ if last != "" {
+ segments = append(segments, Segment{Raw: last, Op: OpNone})
+ }
+
+ return segments
+}
+
+// ExecChain executes a command string with pipe/chaining support.
+// Returns the combined output of all commands.
+func ExecChain(command string) string {
+ segments := ParseChain(command)
+ if len(segments) == 0 {
+ return "[error] empty command"
+ }
+
+ var collected []string
+ var lastOutput string
+ var lastErr error
+ pipeInput := ""
+
+ for i, seg := range segments {
+ if i > 0 {
+ prevOp := segments[i-1].Op
+ // && semantics: skip if previous failed
+ if prevOp == OpAnd && lastErr != nil {
+ continue
+ }
+ // || semantics: skip if previous succeeded
+ if prevOp == OpOr && lastErr == nil {
+ continue
+ }
+ }
+
+ // determine stdin for this segment
+ segStdin := ""
+ if i == 0 {
+ segStdin = pipeInput
+ } else if segments[i-1].Op == OpPipe {
+ segStdin = lastOutput
+ }
+
+ lastOutput, lastErr = execSingle(seg.Raw, segStdin)
+
+ // pipe: output flows to next command's stdin
+ // && or ;: collect output
+ if i < len(segments)-1 && seg.Op == OpPipe {
+ continue
+ }
+ if lastOutput != "" {
+ collected = append(collected, lastOutput)
+ }
+ }
+
+ return strings.Join(collected, "\n")
+}
+
+// execSingle executes a single command (with arguments) and returns output and error.
+func execSingle(command, stdin string) (string, error) {
+ parts := tokenize(command)
+ if len(parts) == 0 {
+ return "", fmt.Errorf("empty command")
+ }
+
+ name := parts[0]
+ args := parts[1:]
+
+ // Check if it's a built-in Go command
+ if result := execBuiltin(name, args, stdin); result != "" {
+ return result, nil
+ }
+
+ // Otherwise execute as system command
+ cmd := exec.Command(name, args...)
+ if stdin != "" {
+ cmd.Stdin = strings.NewReader(stdin)
+ }
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ return string(output), err
+ }
+ return string(output), nil
+}
+
+// tokenize splits a command string by whitespace, respecting quotes.
+func tokenize(input string) []string {
+ var tokens []string
+ var current strings.Builder
+ inQuote := false
+ var quoteChar rune
+
+ for _, ch := range input {
+ if inQuote {
+ if ch == quoteChar {
+ inQuote = false
+ } else {
+ current.WriteRune(ch)
+ }
+ continue
+ }
+
+ if ch == '\'' || ch == '"' {
+ inQuote = true
+ quoteChar = ch
+ continue
+ }
+
+ if ch == ' ' || ch == '\t' {
+ if current.Len() > 0 {
+ tokens = append(tokens, current.String())
+ current.Reset()
+ }
+ continue
+ }
+
+ current.WriteRune(ch)
+ }
+
+ if current.Len() > 0 {
+ tokens = append(tokens, current.String())
+ }
+
+ return tokens
+}
+
+// execBuiltin executes a built-in command if it exists.
+func execBuiltin(name string, args []string, stdin string) string {
+ switch name {
+ case "echo":
+ if stdin != "" {
+ return stdin
+ }
+ return strings.Join(args, " ")
+ case "time":
+ return "2006-01-02 15:04:05 MST"
+ case "cat":
+ if len(args) == 0 {
+ if stdin != "" {
+ return stdin
+ }
+ return ""
+ }
+ data, err := os.ReadFile(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] cat: %v", err)
+ }
+ return string(data)
+ case "pwd":
+ return fsRootDir
+ case "cd":
+ if len(args) == 0 {
+ return "[error] usage: cd <dir>"
+ }
+ dir := args[0]
+ // Resolve relative to fsRootDir
+ abs := dir
+ if !filepath.IsAbs(dir) {
+ abs = filepath.Join(fsRootDir, dir)
+ }
+ abs = filepath.Clean(abs)
+ info, err := os.Stat(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] cd: %v", err)
+ }
+ if !info.IsDir() {
+ return fmt.Sprintf("[error] cd: not a directory: %s", dir)
+ }
+ fsRootDir = abs
+ return fmt.Sprintf("Changed directory to: %s", fsRootDir)
+ case "go":
+ // Allow all go subcommands
+ if len(args) == 0 {
+ return "[error] usage: go <subcommand> [options]"
+ }
+ cmd := exec.Command("go", args...)
+ cmd.Dir = fsRootDir
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ return fmt.Sprintf("[error] go %s: %v\n%s", args[0], err, string(output))
+ }
+ return string(output)
+ }
+ return ""
+}
diff --git a/tools/fs.go b/tools/fs.go
new file mode 100644
index 0000000..9fc09bb
--- /dev/null
+++ b/tools/fs.go
@@ -0,0 +1,753 @@
+package tools
+
+import (
+ "encoding/base64"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+)
+
+var fsRootDir string
+var memoryStore MemoryStore
+var agentRole string
+
+type MemoryStore interface {
+ Memorise(agent, topic, data string) (string, error)
+ Recall(agent, topic string) (string, error)
+ RecallTopics(agent string) ([]string, error)
+ Forget(agent, topic string) error
+}
+
+func SetMemoryStore(store MemoryStore, role string) {
+ memoryStore = store
+ agentRole = role
+}
+
+func SetFSRoot(dir string) {
+ fsRootDir = dir
+}
+
+func GetFSRoot() string {
+ return fsRootDir
+}
+
+func SetFSCwd(dir string) error {
+ abs, err := filepath.Abs(dir)
+ if err != nil {
+ return err
+ }
+ info, err := os.Stat(abs)
+ if err != nil {
+ return err
+ }
+ if !info.IsDir() {
+ return fmt.Errorf("not a directory: %s", dir)
+ }
+ fsRootDir = abs
+ return nil
+}
+
+func resolvePath(rel string) (string, error) {
+ if fsRootDir == "" {
+ return "", fmt.Errorf("fs root not set")
+ }
+
+ if filepath.IsAbs(rel) {
+ abs := filepath.Clean(rel)
+ if !strings.HasPrefix(abs, fsRootDir+string(os.PathSeparator)) && abs != fsRootDir {
+ return "", fmt.Errorf("path escapes fs root: %s", rel)
+ }
+ return abs, nil
+ }
+
+ abs := filepath.Join(fsRootDir, rel)
+ abs = filepath.Clean(abs)
+ if !strings.HasPrefix(abs, fsRootDir+string(os.PathSeparator)) && abs != fsRootDir {
+ return "", fmt.Errorf("path escapes fs root: %s", rel)
+ }
+ return abs, nil
+}
+
+func humanSize(n int64) string {
+ switch {
+ case n >= 1<<20:
+ return fmt.Sprintf("%.1fMB", float64(n)/float64(1<<20))
+ case n >= 1<<10:
+ return fmt.Sprintf("%.1fKB", float64(n)/float64(1<<10))
+ default:
+ return fmt.Sprintf("%dB", n)
+ }
+}
+
+func IsImageFile(path string) bool {
+ ext := strings.ToLower(filepath.Ext(path))
+ return ext == ".png" || ext == ".jpg" || ext == ".jpeg" || ext == ".gif" || ext == ".webp" || ext == ".svg"
+}
+
+func FsLs(args []string, stdin string) string {
+ dir := ""
+ if len(args) > 0 {
+ dir = args[0]
+ }
+ abs, err := resolvePath(dir)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ entries, err := os.ReadDir(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] ls: %v", err)
+ }
+
+ var out strings.Builder
+ for _, e := range entries {
+ info, _ := e.Info()
+ if e.IsDir() {
+ fmt.Fprintf(&out, "d %-8s %s/\n", "-", e.Name())
+ } else if info != nil {
+ fmt.Fprintf(&out, "f %-8s %s\n", humanSize(info.Size()), e.Name())
+ } else {
+ fmt.Fprintf(&out, "f %-8s %s\n", "?", e.Name())
+ }
+ }
+ if out.Len() == 0 {
+ return "(empty directory)"
+ }
+ return strings.TrimRight(out.String(), "\n")
+}
+
+func FsCat(args []string, stdin string) string {
+ b64 := false
+ var path string
+ for _, a := range args {
+ if a == "-b" || a == "--base64" {
+ b64 = true
+ } else if path == "" {
+ path = a
+ }
+ }
+ if path == "" {
+ return "[error] usage: cat <path>"
+ }
+
+ abs, err := resolvePath(path)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ data, err := os.ReadFile(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] cat: %v", err)
+ }
+
+ if b64 {
+ result := base64.StdEncoding.EncodeToString(data)
+ if IsImageFile(path) {
+ result += fmt.Sprintf("\n![image](file://%s)", abs)
+ }
+ return result
+ }
+ return string(data)
+}
+
+func FsSee(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: see <image-path>"
+ }
+ path := args[0]
+
+ abs, err := resolvePath(path)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ info, err := os.Stat(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] see: %v", err)
+ }
+
+ if !IsImageFile(path) {
+ return fmt.Sprintf("[error] not an image file: %s (use cat to read text files)", path)
+ }
+
+ return fmt.Sprintf("Image: %s (%s)\n![image](file://%s)", path, humanSize(info.Size()), abs)
+}
+
+func FsWrite(args []string, stdin string) string {
+ b64 := false
+ var path string
+ var contentParts []string
+ for _, a := range args {
+ if a == "-b" || a == "--base64" {
+ b64 = true
+ } else if path == "" {
+ path = a
+ } else {
+ contentParts = append(contentParts, a)
+ }
+ }
+ if path == "" {
+ return "[error] usage: write <path> [content] or pipe stdin"
+ }
+
+ abs, err := resolvePath(path)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
+ return fmt.Sprintf("[error] mkdir: %v", err)
+ }
+
+ var data []byte
+ if b64 {
+ src := stdin
+ if src == "" && len(contentParts) > 0 {
+ src = strings.Join(contentParts, " ")
+ }
+ src = strings.TrimSpace(src)
+ var err error
+ data, err = base64.StdEncoding.DecodeString(src)
+ if err != nil {
+ return fmt.Sprintf("[error] base64 decode: %v", err)
+ }
+ } else {
+ if len(contentParts) > 0 {
+ data = []byte(strings.Join(contentParts, " "))
+ } else {
+ data = []byte(stdin)
+ }
+ }
+
+ if err := os.WriteFile(abs, data, 0o644); err != nil {
+ return fmt.Sprintf("[error] write: %v", err)
+ }
+
+ size := humanSize(int64(len(data)))
+ result := fmt.Sprintf("Written %s → %s", size, path)
+
+ if IsImageFile(path) {
+ result += fmt.Sprintf("\n![image](file://%s)", abs)
+ }
+
+ return result
+}
+
+func FsStat(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: stat <path>"
+ }
+
+ abs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ info, err := os.Stat(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] stat: %v", err)
+ }
+
+ mime := "application/octet-stream"
+ if IsImageFile(args[0]) {
+ ext := strings.ToLower(filepath.Ext(args[0]))
+ switch ext {
+ case ".png":
+ mime = "image/png"
+ case ".jpg", ".jpeg":
+ mime = "image/jpeg"
+ case ".gif":
+ mime = "image/gif"
+ case ".webp":
+ mime = "image/webp"
+ case ".svg":
+ mime = "image/svg+xml"
+ }
+ }
+
+ var out strings.Builder
+ fmt.Fprintf(&out, "File: %s\n", args[0])
+ fmt.Fprintf(&out, "Size: %s (%d bytes)\n", humanSize(info.Size()), info.Size())
+ fmt.Fprintf(&out, "Type: %s\n", mime)
+ fmt.Fprintf(&out, "Modified: %s\n", info.ModTime().Format(time.RFC3339))
+ if info.IsDir() {
+ fmt.Fprintf(&out, "Kind: directory\n")
+ }
+ return strings.TrimRight(out.String(), "\n")
+}
+
+func FsRm(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: rm <path>"
+ }
+
+ abs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ if err := os.RemoveAll(abs); err != nil {
+ return fmt.Sprintf("[error] rm: %v", err)
+ }
+ return fmt.Sprintf("Removed %s", args[0])
+}
+
+func FsCp(args []string, stdin string) string {
+ if len(args) < 2 {
+ return "[error] usage: cp <src> <dst>"
+ }
+
+ srcAbs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ dstAbs, err := resolvePath(args[1])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ data, err := os.ReadFile(srcAbs)
+ if err != nil {
+ return fmt.Sprintf("[error] cp read: %v", err)
+ }
+
+ if err := os.MkdirAll(filepath.Dir(dstAbs), 0o755); err != nil {
+ return fmt.Sprintf("[error] cp mkdir: %v", err)
+ }
+
+ if err := os.WriteFile(dstAbs, data, 0o644); err != nil {
+ return fmt.Sprintf("[error] cp write: %v", err)
+ }
+ return fmt.Sprintf("Copied %s → %s (%s)", args[0], args[1], humanSize(int64(len(data))))
+}
+
+func FsMv(args []string, stdin string) string {
+ if len(args) < 2 {
+ return "[error] usage: mv <src> <dst>"
+ }
+
+ srcAbs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ dstAbs, err := resolvePath(args[1])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ if err := os.MkdirAll(filepath.Dir(dstAbs), 0o755); err != nil {
+ return fmt.Sprintf("[error] mv mkdir: %v", err)
+ }
+
+ if err := os.Rename(srcAbs, dstAbs); err != nil {
+ return fmt.Sprintf("[error] mv: %v", err)
+ }
+ return fmt.Sprintf("Moved %s → %s", args[0], args[1])
+}
+
+func FsMkdir(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: mkdir <dir>"
+ }
+
+ abs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+
+ if err := os.MkdirAll(abs, 0o755); err != nil {
+ return fmt.Sprintf("[error] mkdir: %v", err)
+ }
+ return fmt.Sprintf("Created %s", args[0])
+}
+
+// Text processing commands
+
+func FsEcho(args []string, stdin string) string {
+ if stdin != "" {
+ return stdin
+ }
+ return strings.Join(args, " ")
+}
+
+func FsTime(args []string, stdin string) string {
+ return time.Now().Format("2006-01-02 15:04:05 MST")
+}
+
+func FsGrep(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: grep [-i] [-v] [-c] <pattern>"
+ }
+ ignoreCase := false
+ invert := false
+ countOnly := false
+ var pattern string
+ for _, a := range args {
+ switch a {
+ case "-i":
+ ignoreCase = true
+ case "-v":
+ invert = true
+ case "-c":
+ countOnly = true
+ default:
+ pattern = a
+ }
+ }
+ if pattern == "" {
+ return "[error] pattern required"
+ }
+ if ignoreCase {
+ pattern = strings.ToLower(pattern)
+ }
+
+ lines := strings.Split(stdin, "\n")
+ var matched []string
+ for _, line := range lines {
+ haystack := line
+ if ignoreCase {
+ haystack = strings.ToLower(line)
+ }
+ match := strings.Contains(haystack, pattern)
+ if invert {
+ match = !match
+ }
+ if match {
+ matched = append(matched, line)
+ }
+ }
+ if countOnly {
+ return fmt.Sprintf("%d", len(matched))
+ }
+ return strings.Join(matched, "\n")
+}
+
+func FsHead(args []string, stdin string) string {
+ n := 10
+ for i, a := range args {
+ if a == "-n" && i+1 < len(args) {
+ if parsed, err := strconv.Atoi(args[i+1]); err == nil {
+ n = parsed
+ }
+ } else if strings.HasPrefix(a, "-") {
+ continue
+ } else if parsed, err := strconv.Atoi(a); err == nil {
+ n = parsed
+ }
+ }
+ lines := strings.Split(stdin, "\n")
+ if n > 0 && len(lines) > n {
+ lines = lines[:n]
+ }
+ return strings.Join(lines, "\n")
+}
+
+func FsTail(args []string, stdin string) string {
+ n := 10
+ for i, a := range args {
+ if a == "-n" && i+1 < len(args) {
+ if parsed, err := strconv.Atoi(args[i+1]); err == nil {
+ n = parsed
+ }
+ } else if strings.HasPrefix(a, "-") {
+ continue
+ } else if parsed, err := strconv.Atoi(a); err == nil {
+ n = parsed
+ }
+ }
+ lines := strings.Split(stdin, "\n")
+ if n > 0 && len(lines) > n {
+ lines = lines[len(lines)-n:]
+ }
+ return strings.Join(lines, "\n")
+}
+
+func FsWc(args []string, stdin string) string {
+ lines := len(strings.Split(stdin, "\n"))
+ words := len(strings.Fields(stdin))
+ chars := len(stdin)
+ if len(args) > 0 {
+ switch args[0] {
+ case "-l":
+ return fmt.Sprintf("%d", lines)
+ case "-w":
+ return fmt.Sprintf("%d", words)
+ case "-c":
+ return fmt.Sprintf("%d", chars)
+ }
+ }
+ return fmt.Sprintf("%d lines, %d words, %d chars", lines, words, chars)
+}
+
+func FsSort(args []string, stdin string) string {
+ lines := strings.Split(stdin, "\n")
+ reverse := false
+ numeric := false
+ for _, a := range args {
+ if a == "-r" {
+ reverse = true
+ } else if a == "-n" {
+ numeric = true
+ }
+ }
+
+ sortFunc := func(i, j int) bool {
+ if numeric {
+ ni, _ := strconv.Atoi(lines[i])
+ nj, _ := strconv.Atoi(lines[j])
+ if reverse {
+ return ni > nj
+ }
+ return ni < nj
+ }
+ if reverse {
+ return lines[i] > lines[j]
+ }
+ return lines[i] < lines[j]
+ }
+
+ sort.Slice(lines, sortFunc)
+ return strings.Join(lines, "\n")
+}
+
+func FsUniq(args []string, stdin string) string {
+ lines := strings.Split(stdin, "\n")
+ showCount := false
+ for _, a := range args {
+ if a == "-c" {
+ showCount = true
+ }
+ }
+
+ var result []string
+ var prev string
+ first := true
+ count := 0
+ for _, line := range lines {
+ if first || line != prev {
+ if !first && showCount {
+ result = append(result, fmt.Sprintf("%d %s", count, prev))
+ } else if !first {
+ result = append(result, prev)
+ }
+ count = 1
+ prev = line
+ first = false
+ } else {
+ count++
+ }
+ }
+ if !first {
+ if showCount {
+ result = append(result, fmt.Sprintf("%d %s", count, prev))
+ } else {
+ result = append(result, prev)
+ }
+ }
+ return strings.Join(result, "\n")
+}
+
+var allowedGitSubcommands = map[string]bool{
+ "status": true,
+ "log": true,
+ "diff": true,
+ "show": true,
+ "branch": true,
+ "reflog": true,
+ "rev-parse": true,
+ "shortlog": true,
+ "describe": true,
+ "rev-list": true,
+}
+
+func FsGit(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: git <subcommand> [options]"
+ }
+
+ subcmd := args[0]
+ if !allowedGitSubcommands[subcmd] {
+ return fmt.Sprintf("[error] git: '%s' is not an allowed git command. Allowed: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe, rev-list", subcmd)
+ }
+
+ abs, err := resolvePath(".")
+ if err != nil {
+ return fmt.Sprintf("[error] git: %v", err)
+ }
+
+ // Pass all args to git (first arg is subcommand, rest are options)
+ cmd := exec.Command("git", args...)
+ cmd.Dir = abs
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ return fmt.Sprintf("[error] git %s: %v\n%s", subcmd, err, string(output))
+ }
+ return string(output)
+}
+
+func FsPwd(args []string, stdin string) string {
+ return fsRootDir
+}
+
+func FsCd(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: cd <dir>"
+ }
+ dir := args[0]
+ abs, err := resolvePath(dir)
+ if err != nil {
+ return fmt.Sprintf("[error] cd: %v", err)
+ }
+ info, err := os.Stat(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] cd: %v", err)
+ }
+ if !info.IsDir() {
+ return fmt.Sprintf("[error] cd: not a directory: %s", dir)
+ }
+ fsRootDir = abs
+ return fmt.Sprintf("Changed directory to: %s", fsRootDir)
+}
+
+func FsSed(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: sed 's/old/new/[g]' [file]"
+ }
+
+ inPlace := false
+ var filePath string
+ var pattern string
+
+ for _, a := range args {
+ if a == "-i" || a == "--in-place" {
+ inPlace = true
+ } else if strings.HasPrefix(a, "s") && len(a) > 1 {
+ // This looks like a sed pattern
+ pattern = a
+ } else if filePath == "" && !strings.HasPrefix(a, "-") {
+ filePath = a
+ }
+ }
+
+ if pattern == "" {
+ return "[error] usage: sed 's/old/new/[g]' [file]"
+ }
+
+ // Parse pattern: s/old/new/flags
+ parts := strings.Split(pattern[1:], "/")
+ if len(parts) < 2 {
+ return "[error] invalid sed pattern. Use: s/old/new/[g]"
+ }
+
+ oldStr := parts[0]
+ newStr := parts[1]
+ global := len(parts) >= 3 && strings.Contains(parts[2], "g")
+
+ var content string
+ if filePath != "" && stdin == "" {
+ // Read from file
+ abs, err := resolvePath(filePath)
+ if err != nil {
+ return fmt.Sprintf("[error] sed: %v", err)
+ }
+ data, err := os.ReadFile(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] sed: %v", err)
+ }
+ content = string(data)
+ } else if stdin != "" {
+ // Use stdin
+ content = stdin
+ } else {
+ return "[error] sed: no input (use file path or pipe from stdin)"
+ }
+
+ // Apply sed replacement
+ if global {
+ content = strings.ReplaceAll(content, oldStr, newStr)
+ } else {
+ content = strings.Replace(content, oldStr, newStr, 1)
+ }
+
+ if inPlace && filePath != "" {
+ abs, err := resolvePath(filePath)
+ if err != nil {
+ return fmt.Sprintf("[error] sed: %v", err)
+ }
+ if err := os.WriteFile(abs, []byte(content), 0644); err != nil {
+ return fmt.Sprintf("[error] sed: %v", err)
+ }
+ return fmt.Sprintf("Modified %s", filePath)
+ }
+
+ return content
+}
+
+func FsMemory(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: memory store <topic> <data> | memory get <topic> | memory list | memory forget <topic>"
+ }
+
+ if memoryStore == nil {
+ return "[error] memory store not initialized"
+ }
+
+ switch args[0] {
+ case "store":
+ if len(args) < 3 && stdin == "" {
+ return "[error] usage: memory store <topic> <data>"
+ }
+ topic := args[1]
+ var data string
+ if len(args) >= 3 {
+ data = strings.Join(args[2:], " ")
+ } else {
+ data = stdin
+ }
+ _, err := memoryStore.Memorise(agentRole, topic, data)
+ if err != nil {
+ return fmt.Sprintf("[error] failed to store: %v", err)
+ }
+ return fmt.Sprintf("Stored under topic: %s", topic)
+
+ case "get":
+ if len(args) < 2 {
+ return "[error] usage: memory get <topic>"
+ }
+ topic := args[1]
+ data, err := memoryStore.Recall(agentRole, topic)
+ if err != nil {
+ return fmt.Sprintf("[error] failed to recall: %v", err)
+ }
+ return fmt.Sprintf("Topic: %s\n%s", topic, data)
+
+ case "list", "topics":
+ topics, err := memoryStore.RecallTopics(agentRole)
+ if err != nil {
+ return fmt.Sprintf("[error] failed to list topics: %v", err)
+ }
+ if len(topics) == 0 {
+ return "No topics stored."
+ }
+ return "Topics: " + strings.Join(topics, ", ")
+
+ case "forget", "delete":
+ if len(args) < 2 {
+ return "[error] usage: memory forget <topic>"
+ }
+ topic := args[1]
+ err := memoryStore.Forget(agentRole, topic)
+ if err != nil {
+ return fmt.Sprintf("[error] failed to forget: %v", err)
+ }
+ return fmt.Sprintf("Deleted topic: %s", topic)
+
+ default:
+ return fmt.Sprintf("[error] unknown subcommand: %s. Use: store, get, list, topics, forget, delete", args[0])
+ }
+}
diff --git a/tools_playwright.go b/tools_playwright.go
index 3555469..786b170 100644
--- a/tools_playwright.go
+++ b/tools_playwright.go
@@ -455,6 +455,83 @@ func pwDrag(args map[string]string) []byte {
return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2))
}
+func pwDragBySelector(args map[string]string) []byte {
+ fromSelector, ok := args["fromSelector"]
+ if !ok || fromSelector == "" {
+ return []byte(`{"error": "fromSelector not provided"}`)
+ }
+ toSelector, ok := args["toSelector"]
+ if !ok || toSelector == "" {
+ return []byte(`{"error": "toSelector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+
+ // Get center coordinates of both elements using JavaScript
+ fromJS := fmt.Sprintf(`
+ function getCenter(selector) {
+ const el = document.querySelector(selector);
+ if (!el) return null;
+ const rect = el.getBoundingClientRect();
+ return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
+ }
+ getCenter(%q)
+ `, fromSelector)
+ toJS := fmt.Sprintf(`
+ function getCenter(selector) {
+ const el = document.querySelector(selector);
+ if (!el) return null;
+ const rect = el.getBoundingClientRect();
+ return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
+ }
+ getCenter(%q)
+ `, toSelector)
+
+ fromResult, err := page.Evaluate(fromJS)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get from element: %s"}`, err.Error()))
+ }
+ fromMap, ok := fromResult.(map[string]interface{})
+ if !ok || fromMap == nil {
+ return []byte(fmt.Sprintf(`{"error": "from selector '%s' not found"}`, fromSelector))
+ }
+ fromX := fromMap["x"].(float64)
+ fromY := fromMap["y"].(float64)
+
+ toResult, err := page.Evaluate(toJS)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get to element: %s"}`, err.Error()))
+ }
+ toMap, ok := toResult.(map[string]interface{})
+ if !ok || toMap == nil {
+ return []byte(fmt.Sprintf(`{"error": "to selector '%s' not found"}`, toSelector))
+ }
+ toX := toMap["x"].(float64)
+ toY := toMap["y"].(float64)
+
+ // Perform the drag using coordinates
+ mouse := page.Mouse()
+ err = mouse.Move(fromX, fromY)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
+ }
+ err = mouse.Down()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error()))
+ }
+ err = mouse.Move(toX, toY)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
+ }
+ err = mouse.Up()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error()))
+ }
+ msg := fmt.Sprintf("Dragged from %s (%.0f,%.0f) to %s (%.0f,%.0f)", fromSelector, fromX, fromY, toSelector, toX, toY)
+ return []byte(fmt.Sprintf(`{"success": true, "message": "%s"}`, msg))
+}
+
func pwClickAt(args map[string]string) []byte {
x, ok := args["x"]
if !ok {