diff options
Diffstat (limited to 'tools.go')
| -rw-r--r-- | tools.go | 1506 |
1 files changed, 608 insertions, 898 deletions
@@ -5,7 +5,10 @@ import ( "encoding/json" "fmt" "gf-lt/agent" + "gf-lt/config" "gf-lt/models" + "gf-lt/storage" + "gf-lt/tools" "io" "os" "os/exec" @@ -40,94 +43,34 @@ Your current tools: <tools> [ { -"name":"recall", -"args": ["topic"], -"when_to_use": "when asked about topic that user previously asked to memorise" -}, -{ -"name":"memorise", -"args": ["topic", "data"], -"when_to_use": "when asked to memorise information under a topic" -}, -{ -"name":"recall_topics", -"args": [], -"when_to_use": "to see what topics are saved in memory" +"name":"run", +"args": ["command"], +"when_to_use": "main tool: run shell, memory, git, todo. Use run \"help\" for all commands, run \"help <cmd>\" for specific help. Examples: run \"ls -la\", run \"help\", run \"help memory\", run \"git status\", run \"memory store foo bar\"" }, { "name":"websearch", "args": ["query", "limit"], -"when_to_use": "when asked to search the web for information; returns clean summary without html,css and other web elements; limit is optional (default 3)" +"when_to_use": "search the web for information" }, { "name":"rag_search", "args": ["query", "limit"], -"when_to_use": "when asked to search the local document database for information; performs query refinement, semantic search, reranking, and synthesis; returns clean summary with sources; limit is optional (default 3)" +"when_to_use": "search local document database" }, { "name":"read_url", "args": ["url"], -"when_to_use": "when asked to get content for specific webpage or url; returns clean summary without html,css and other web elements" +"when_to_use": "get content from a webpage" }, { "name":"read_url_raw", "args": ["url"], -"when_to_use": "when asked to get content for specific webpage or url; returns raw data as is without processing" -}, -{ -"name":"file_create", -"args": ["path", "content"], -"when_to_use": "when there is a need to create a new file with optional content" -}, -{ -"name":"file_read", -"args": ["path"], -"when_to_use": "when you need to read the content of a file" -}, -{ -"name":"file_read_image", -"args": ["path"], -"when_to_use": "when you need to read or view an image file" -}, -{ -"name":"file_write", -"args": ["path", "content"], -"when_to_use": "when needed to overwrite content to a file" -}, -{ -"name":"file_write_append", -"args": ["path", "content"], -"when_to_use": "when you need append content to a file; use sed to edit content" -}, -{ -"name":"file_edit", -"args": ["path", "oldString", "newString", "lineNumber"], -"when_to_use": "when you need to make targeted changes to a specific section of a file without rewriting the entire file; lineNumber is optional - if provided, only edits that specific line; if not provided, replaces all occurrences of oldString" -}, -{ -"name":"file_delete", -"args": ["path"], -"when_to_use": "when asked to delete a file" -}, -{ -"name":"file_move", -"args": ["src", "dst"], -"when_to_use": "when you need to move a file from source to destination" -}, -{ -"name":"file_copy", -"args": ["src", "dst"], -"when_to_use": "copy a file from source to destination" -}, -{ -"name":"file_list", -"args": ["path"], -"when_to_use": "list files in a directory; path is optional (default: current directory)" +"when_to_use": "get raw content from a webpage" }, { -"name":"execute_command", -"args": ["command", "args"], -"when_to_use": "execute a system command; args is optional; allowed commands: grep, sed, awk, find, cat, head, tail, sort, uniq, wc, ls, echo, cut, tr, cp, mv, rm, mkdir, rmdir, pwd, df, free, ps, top, du, whoami, date, uname, go" +"name":"browser_agent", +"args": ["task"], +"when_to_use": "autonomous browser automation for complex tasks" } ] </tools> @@ -210,6 +153,10 @@ var ( func initTools() { sysMap[basicCard.ID] = basicCard roleToID["assistant"] = basicCard.ID + // Initialize fs root directory + tools.SetFSRoot(cfg.FilePickerDir) + // Initialize memory store + tools.SetMemoryStore(&memoryAdapter{store: store, cfg: cfg}, cfg.AssistantRole) sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "") if err != nil { if logger != nil { @@ -258,7 +205,7 @@ func updateToolCapabilities() { if cfg == nil || cfg.CurrentAPI == "" { logger.Warn("cannot determine model capabilities: cfg or CurrentAPI is nil") registerWindowTools() - registerPlaywrightTools() + // fnMap["browser_agent"] = runBrowserAgent return } prevHasVision := modelHasVision @@ -272,31 +219,19 @@ func updateToolCapabilities() { } } registerWindowTools() - registerPlaywrightTools() + // fnMap["browser_agent"] = runBrowserAgent } // getWebAgentClient returns a singleton AgentClient for web agents. func getWebAgentClient() *agent.AgentClient { webAgentClientOnce.Do(func() { - if cfg == nil { - if logger != nil { - logger.Warn("web agent client unavailable: config not initialized") - } - return - } - if logger == nil { - if logger != nil { - logger.Warn("web agent client unavailable: logger not initialized") - } - return - } getToken := func() string { if chunkParser == nil { return "" } return chunkParser.GetToken() } - webAgentClient = agent.NewAgentClient(cfg, *logger, getToken) + webAgentClient = agent.NewAgentClient(cfg, logger, getToken) }) return webAgentClient } @@ -306,13 +241,13 @@ func registerWebAgents() { webAgentsOnce.Do(func() { client := getWebAgentClient() // Register rag_search agent - agent.Register("rag_search", agent.NewWebAgentB(client, ragSearchSysPrompt)) + agent.RegisterB("rag_search", agent.NewWebAgentB(client, ragSearchSysPrompt)) // Register websearch agent - agent.Register("websearch", agent.NewWebAgentB(client, webSearchSysPrompt)) + agent.RegisterB("websearch", agent.NewWebAgentB(client, webSearchSysPrompt)) // Register read_url agent - agent.Register("read_url", agent.NewWebAgentB(client, readURLSysPrompt)) + agent.RegisterB("read_url", agent.NewWebAgentB(client, readURLSysPrompt)) // Register summarize_chat agent - agent.Register("summarize_chat", agent.NewWebAgentB(client, summarySysPrompt)) + agent.RegisterB("summarize_chat", agent.NewWebAgentB(client, summarySysPrompt)) }) } @@ -461,352 +396,6 @@ func readURLRaw(args map[string]string) []byte { return []byte(fmt.Sprintf("%+v", resp)) } -/* -consider cases: -- append mode (treat it like a journal appendix) -- replace mode (new info/mind invalidates old ones) -also: -- some writing can be done without consideration of previous data; -- others do; -*/ -func memorise(args map[string]string) []byte { - agent := cfg.AssistantRole - if len(args) < 2 { - msg := "not enough args to call memorise tool; need topic and data to remember" - logger.Error(msg) - return []byte(msg) - } - memory := &models.Memory{ - Agent: agent, - Topic: args["topic"], - Mind: args["data"], - UpdatedAt: time.Now(), - CreatedAt: time.Now(), - } - if _, err := store.Memorise(memory); err != nil { - logger.Error("failed to save memory", "err", err, "memoory", memory) - return []byte("failed to save info") - } - msg := "info saved under the topic:" + args["topic"] - return []byte(msg) -} - -func recall(args map[string]string) []byte { - agent := cfg.AssistantRole - if len(args) < 1 { - logger.Warn("not enough args to call recall tool") - return nil - } - mind, err := store.Recall(agent, args["topic"]) - if err != nil { - msg := fmt.Sprintf("failed to recall; error: %v; args: %v", err, args) - logger.Error(msg) - return []byte(msg) - } - answer := fmt.Sprintf("under the topic: %s is stored:\n%s", args["topic"], mind) - return []byte(answer) -} - -func recallTopics(args map[string]string) []byte { - agent := cfg.AssistantRole - topics, err := store.RecallTopics(agent) - if err != nil { - logger.Error("failed to use tool", "error", err, "args", args) - return nil - } - joinedS := strings.Join(topics, ";") - return []byte(joinedS) -} - -// File Manipulation Tools -func fileCreate(args map[string]string) []byte { - path, ok := args["path"] - if !ok || path == "" { - msg := "path not provided to file_create tool" - logger.Error(msg) - return []byte(msg) - } - path = resolvePath(path) - content, ok := args["content"] - if !ok { - content = "" - } - if err := writeStringToFile(path, content); err != nil { - msg := "failed to create file; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - msg := "file created successfully at " + path - return []byte(msg) -} - -func fileRead(args map[string]string) []byte { - path, ok := args["path"] - if !ok || path == "" { - msg := "path not provided to file_read tool" - logger.Error(msg) - return []byte(msg) - } - path = resolvePath(path) - content, err := readStringFromFile(path) - if err != nil { - msg := "failed to read file; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - result := map[string]string{ - "content": content, - "path": path, - } - jsonResult, err := json.Marshal(result) - if err != nil { - msg := "failed to marshal result; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - return jsonResult -} - -func fileReadImage(args map[string]string) []byte { - path, ok := args["path"] - if !ok || path == "" { - msg := "path not provided to file_read_image tool" - logger.Error(msg) - return []byte(msg) - } - path = resolvePath(path) - dataURL, err := models.CreateImageURLFromPath(path) - if err != nil { - msg := "failed to read image; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - // result := map[string]any{ - // "type": "multimodal_content", - // "parts": []map[string]string{ - // {"type": "text", "text": "Image at " + path}, - // {"type": "image_url", "url": dataURL}, - // }, - // } - result := models.MultimodalToolResp{ - Type: "multimodal_content", - Parts: []map[string]string{ - {"type": "text", "text": "Image at " + path}, - {"type": "image_url", "url": dataURL}, - }, - } - jsonResult, err := json.Marshal(result) - if err != nil { - msg := "failed to marshal result; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - return jsonResult -} - -func fileWrite(args map[string]string) []byte { - path, ok := args["path"] - if !ok || path == "" { - msg := "path not provided to file_write tool" - logger.Error(msg) - return []byte(msg) - } - path = resolvePath(path) - content, ok := args["content"] - if !ok { - content = "" - } - if err := writeStringToFile(path, content); err != nil { - msg := "failed to write to file; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - msg := "file written successfully at " + path - return []byte(msg) -} - -func fileWriteAppend(args map[string]string) []byte { - path, ok := args["path"] - if !ok || path == "" { - msg := "path not provided to file_write_append tool" - logger.Error(msg) - return []byte(msg) - } - path = resolvePath(path) - content, ok := args["content"] - if !ok { - content = "" - } - if err := appendStringToFile(path, content); err != nil { - msg := "failed to append to file; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - msg := "file written successfully at " + path - return []byte(msg) -} - -func fileEdit(args map[string]string) []byte { - path, ok := args["path"] - if !ok || path == "" { - msg := "path not provided to file_edit tool" - logger.Error(msg) - return []byte(msg) - } - path = resolvePath(path) - oldString, ok := args["oldString"] - if !ok || oldString == "" { - msg := "oldString not provided to file_edit tool" - logger.Error(msg) - return []byte(msg) - } - newString, ok := args["newString"] - if !ok { - newString = "" - } - lineNumberStr, hasLineNumber := args["lineNumber"] - // Read file content - content, err := os.ReadFile(path) - if err != nil { - msg := "failed to read file: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - fileContent := string(content) - var replacementCount int - if hasLineNumber && lineNumberStr != "" { - // Line-number based edit - lineNum, err := strconv.Atoi(lineNumberStr) - if err != nil { - msg := "invalid lineNumber: must be a valid integer" - logger.Error(msg) - return []byte(msg) - } - lines := strings.Split(fileContent, "\n") - if lineNum < 1 || lineNum > len(lines) { - msg := fmt.Sprintf("lineNumber %d out of range (file has %d lines)", lineNum, len(lines)) - logger.Error(msg) - return []byte(msg) - } - // Find oldString in the specific line - targetLine := lines[lineNum-1] - if !strings.Contains(targetLine, oldString) { - msg := fmt.Sprintf("oldString not found on line %d", lineNum) - logger.Error(msg) - return []byte(msg) - } - lines[lineNum-1] = strings.Replace(targetLine, oldString, newString, 1) - replacementCount = 1 - fileContent = strings.Join(lines, "\n") - } else { - // Replace all occurrences - if !strings.Contains(fileContent, oldString) { - msg := "oldString not found in file" - logger.Error(msg) - return []byte(msg) - } - fileContent = strings.ReplaceAll(fileContent, oldString, newString) - replacementCount = strings.Count(fileContent, newString) - } - if err := os.WriteFile(path, []byte(fileContent), 0644); err != nil { - msg := "failed to write file: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - msg := fmt.Sprintf("file edited successfully at %s (%d replacement(s))", path, replacementCount) - return []byte(msg) -} - -func fileDelete(args map[string]string) []byte { - path, ok := args["path"] - if !ok || path == "" { - msg := "path not provided to file_delete tool" - logger.Error(msg) - return []byte(msg) - } - path = resolvePath(path) - if err := removeFile(path); err != nil { - msg := "failed to delete file; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - msg := "file deleted successfully at " + path - return []byte(msg) -} - -func fileMove(args map[string]string) []byte { - src, ok := args["src"] - if !ok || src == "" { - msg := "source path not provided to file_move tool" - logger.Error(msg) - return []byte(msg) - } - src = resolvePath(src) - dst, ok := args["dst"] - if !ok || dst == "" { - msg := "destination path not provided to file_move tool" - logger.Error(msg) - return []byte(msg) - } - dst = resolvePath(dst) - if err := moveFile(src, dst); err != nil { - msg := "failed to move file; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - msg := fmt.Sprintf("file moved successfully from %s to %s", src, dst) - return []byte(msg) -} - -func fileCopy(args map[string]string) []byte { - src, ok := args["src"] - if !ok || src == "" { - msg := "source path not provided to file_copy tool" - logger.Error(msg) - return []byte(msg) - } - src = resolvePath(src) - dst, ok := args["dst"] - if !ok || dst == "" { - msg := "destination path not provided to file_copy tool" - logger.Error(msg) - return []byte(msg) - } - dst = resolvePath(dst) - if err := copyFile(src, dst); err != nil { - msg := "failed to copy file; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - msg := fmt.Sprintf("file copied successfully from %s to %s", src, dst) - return []byte(msg) -} - -func fileList(args map[string]string) []byte { - path, ok := args["path"] - if !ok || path == "" { - path = "." // default to current directory - } - path = resolvePath(path) - files, err := listDirectory(path) - if err != nil { - msg := "failed to list directory; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - result := map[string]interface{}{ - "directory": path, - "files": files, - } - jsonResult, err := json.Marshal(result) - if err != nil { - msg := "failed to marshal result; error: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - return jsonResult -} - // Helper functions for file operations func resolvePath(p string) string { if filepath.IsAbs(p) { @@ -890,50 +479,466 @@ func listDirectory(path string) ([]string, error) { return files, nil } -// Command Execution Tool -func executeCommand(args map[string]string) []byte { +// Unified run command - single entry point for shell, memory, and todo +func runCmd(args map[string]string) []byte { commandStr := args["command"] if commandStr == "" { - msg := "command not provided to execute_command tool" + msg := "command not provided to run tool" logger.Error(msg) return []byte(msg) } - // Handle commands passed as single string with spaces (e.g., "go run main.go" or "cd /tmp") - // Split into base command and arguments + + // Parse the command - first word is subcommand parts := strings.Fields(commandStr) if len(parts) == 0 { - msg := "command not provided to execute_command tool" - logger.Error(msg) - return []byte(msg) + return []byte("[error] empty command") + } + + subcmd := parts[0] + rest := parts[1:] + + // Route to appropriate handler + switch subcmd { + case "help": + // help - show all commands + // help <cmd> - show help for specific command + return []byte(getHelp(rest)) + case "memory": + // memory store <topic> <data> | memory get <topic> | memory list | memory forget <topic> + return []byte(tools.FsMemory(append([]string{"store"}, rest...), "")) + case "todo": + // todo create|read|update|delete - route to existing todo handlers + return []byte(handleTodoSubcommand(rest, args)) + case "window", "windows": + // window list - list all windows + return listWindows(args) + case "capture", "screenshot": + // capture <window-name> - capture a window + return captureWindow(args) + case "capture_and_view", "screenshot_and_view": + // capture and view screenshot + return captureWindowAndView(args) + case "browser": + // browser <action> [args...] - Playwright browser automation + return runBrowserCommand(rest, args) + default: + // Everything else: shell with pipe/chaining support + result := tools.ExecChain(commandStr) + return []byte(result) } - command := parts[0] - cmdArgs := parts[1:] - if !isCommandAllowed(command, cmdArgs...) { - msg := fmt.Sprintf("command '%s' is not allowed", command) - logger.Error(msg) - return []byte(msg) +} + +// runBrowserCommand routes browser subcommands to Playwright handlers +func runBrowserCommand(args []string, originalArgs map[string]string) []byte { + if len(args) == 0 { + return []byte(`usage: browser <action> [args...] +Actions: + start - start browser + stop - stop browser + running - check if browser is running + go <url> - navigate to URL + click <selector> - click element + fill <selector> <text> - fill input + text [selector] - extract text + html [selector] - get HTML + dom - get DOM + screenshot [path] - take screenshot + screenshot_and_view - take and view screenshot + wait <selector> - wait for element + drag <from> <to> - drag element`) + } + + action := args[0] + rest := args[1:] + + switch action { + case "start": + return pwStart(originalArgs) + case "stop": + return pwStop(originalArgs) + case "running": + return pwIsRunning(originalArgs) + case "go", "navigate", "open": + // browser go <url> + url := "" + if len(rest) > 0 { + url = rest[0] + } + if url == "" { + return []byte("usage: browser go <url>") + } + return pwNavigate(map[string]string{"url": url}) + case "click": + // browser click <selector> [index] + selector := "" + index := "0" + if len(rest) > 0 { + selector = rest[0] + } + if len(rest) > 1 { + index = rest[1] + } + if selector == "" { + return []byte("usage: browser click <selector> [index]") + } + return pwClick(map[string]string{"selector": selector, "index": index}) + case "fill": + // browser fill <selector> <text> + if len(rest) < 2 { + return []byte("usage: browser fill <selector> <text>") + } + return pwFill(map[string]string{"selector": rest[0], "text": strings.Join(rest[1:], " ")}) + case "text": + // browser text [selector] + selector := "" + if len(rest) > 0 { + selector = rest[0] + } + return pwExtractText(map[string]string{"selector": selector}) + case "html": + // browser html [selector] + selector := "" + if len(rest) > 0 { + selector = rest[0] + } + return pwGetHTML(map[string]string{"selector": selector}) + case "dom": + return pwGetDOM(originalArgs) + case "screenshot": + // browser screenshot [path] + path := "" + if len(rest) > 0 { + path = rest[0] + } + return pwScreenshot(map[string]string{"path": path}) + case "screenshot_and_view": + // browser screenshot_and_view [path] + path := "" + if len(rest) > 0 { + path = rest[0] + } + return pwScreenshotAndView(map[string]string{"path": path}) + case "wait": + // browser wait <selector> + selector := "" + if len(rest) > 0 { + selector = rest[0] + } + if selector == "" { + return []byte("usage: browser wait <selector>") + } + return pwWaitForSelector(map[string]string{"selector": selector}) + case "drag": + // browser drag <x1> <y1> <x2> <y2> OR browser drag <from_selector> <to_selector> + if len(rest) < 4 && len(rest) < 2 { + return []byte("usage: browser drag <x1> <y1> <x2> <y2> OR browser drag <from_selector> <to_selector>") + } + // Check if first arg is a number (coordinates) or selector + _, err := strconv.Atoi(rest[0]) + _, err2 := strconv.ParseFloat(rest[0], 64) + if err == nil || err2 == nil { + // Coordinates: browser drag 100 200 300 400 + if len(rest) < 4 { + return []byte("usage: browser drag <x1> <y1> <x2> <y2>") + } + return pwDrag(map[string]string{ + "x1": rest[0], "y1": rest[1], + "x2": rest[2], "y2": rest[3], + }) + } + // Selectors: browser drag #item #container + // pwDrag needs coordinates, so we need to get element positions first + // This requires a different approach - use JavaScript to get centers + return pwDragBySelector(map[string]string{ + "fromSelector": rest[0], + "toSelector": rest[1], + }) + default: + return []byte(fmt.Sprintf("unknown browser action: %s", action)) } - // Special handling for cd command - update FilePickerDir - if command == "cd" { - return handleCdCommand(cmdArgs) +} + +// getHelp returns help text for commands +func getHelp(args []string) string { + if len(args) == 0 { + // General help - show all commands + return `Available commands: + help <cmd> - show help for a command (use: help memory, help git, etc.) + + # File operations + ls [path] - list files in directory + cat <file> - read file content + see <file> - view image file + write <file> - write content to file + stat <file> - get file info + rm <file> - delete file + cp <src> <dst> - copy file + mv <src> <dst> - move/rename file + mkdir <dir> - create directory + pwd - print working directory + cd <dir> - change directory + sed 's/old/new/[g]' [file] - text replacement + + # Text processing + echo <args> - echo back input + time - show current time + grep <pattern> - filter lines (supports -i, -v, -c) + head [n] - show first n lines + tail [n] - show last n lines + wc [-l|-w|-c] - count lines/words/chars + sort [-r|-n] - sort lines + uniq [-c] - remove duplicates + + # Git (read-only) + git <cmd> - git commands (status, log, diff, show, branch, etc.) + + # Go + go <cmd> - go commands (run, build, test, mod, etc.) + + # Memory + memory store <topic> <data> - save to memory + memory get <topic> - retrieve from memory + memory list - list all topics + memory forget <topic> - delete from memory + + # Todo + todo create <task> - create a todo + todo read - list all todos + todo update <id> <status> - update todo (pending/in_progress/completed) + todo delete <id> - delete a todo + + # Window (requires xdotool + maim) + window - list available windows + capture <name> - capture a window screenshot + capture_and_view <name> - capture and view screenshot + + # Browser (requires Playwright) + browser start - start browser + browser stop - stop browser + browser running - check if running + browser go <url> - navigate to URL + browser click <sel> - click element + browser fill <sel> <txt> - fill input + browser text [sel] - extract text + browser html [sel] - get HTML + browser screenshot - take screenshot + browser wait <sel> - wait for element + browser drag <x1> <y1> <x2> <y2> - drag by coordinates + browser drag <sel1> <sel2> - drag by selectors (center points) + + # System + <any shell command> - run shell command directly + +Use: run "command" to execute.` + } + + // Specific command help + cmd := args[0] + switch cmd { + case "ls": + return `ls [directory] + List files in a directory. + Examples: + run "ls" + run "ls /home/user" + run "ls -la" (via shell)` + case "cat": + return `cat <file> + Read file content. + Examples: + run "cat readme.md" + run "cat -b image.png" (base64 output)` + case "see": + return `see <image-file> + View an image file for multimodal analysis. + Supports: png, jpg, jpeg, gif, webp, svg + Example: + run "see screenshot.png"` + case "write": + return `write <file> [content] + Write content to a file. + Examples: + run "write notes.txt hello world" + run "write data.json" (with stdin)` + case "memory": + return `memory <subcommand> [args] + Manage memory storage. + Subcommands: + store <topic> <data> - save data to a topic + get <topic> - retrieve data from a topic + list - list all topics + forget <topic> - delete a topic + Examples: + run "memory store foo bar" + run "memory get foo" + run "memory list"` + case "todo": + return `todo <subcommand> [args] + Manage todo list. + Subcommands: + create <task> - create a new todo + read [id] - list all todos or read specific one + update <id> <status> - update status (pending/in_progress/completed) + delete <id> - delete a todo + Examples: + run "todo create fix bug" + run "todo read" + run "todo update 1 completed"` + case "git": + return `git <subcommand> + Read-only git commands. + Allowed: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe, rev-list + Examples: + run "git status" + run "git log --oneline -5" + run "git diff HEAD~1"` + case "grep": + return `grep <pattern> [options] + Filter lines matching a pattern. + Options: + -i ignore case + -v invert match + -c count matches + Example: + run "grep error" (from stdin) + run "grep -i warning log.txt"` + case "cd": + return `cd <directory> + Change working directory. + Example: + run "cd /tmp" + run "cd .."` + case "pwd": + return `pwd + Print working directory. + Example: + run "pwd"` + case "sed": + return `sed 's/old/new/[g]' [file] + Stream editor for text replacement. + Options: + -i in-place editing + -g global replacement (replace all) + Examples: + run "sed 's/foo/bar/' file.txt" + run "sed 's/foo/bar/g' file.txt" (global) + run "sed -i 's/foo/bar/' file.txt" (in-place) + run "cat file.txt | sed 's/foo/bar/'" (pipe from stdin)` + case "go": + return `go <command> + Go toolchain commands. + Allowed: run, build, test, mod, get, install, clean, fmt, vet, etc. + Examples: + run "go run main.go" + run "go build ./..." + run "go test ./..." + run "go mod tidy" + run "go get github.com/package"` + case "window", "windows": + return `window + List available windows. + Requires: xdotool and maim + Example: + run "window"` + case "capture", "screenshot": + return `capture <window-name-or-id> + Capture a screenshot of a window. + Requires: xdotool and maim + Examples: + run "capture Firefox" + run "capture 0x12345678" + run "capture_and_view Firefox"` + case "capture_and_view": + return `capture_and_view <window-name-or-id> + Capture a window and return for viewing. + Requires: xdotool and maim + Examples: + run "capture_and_view Firefox"` + case "browser": + return `browser <action> [args] + Playwright browser automation. + Requires: Playwright browser server running + Actions: + start - start browser + stop - stop browser + running - check if browser is running + go <url> - navigate to URL + click <selector> - click element (use index for multiple: click #btn 1) + fill <selector> <text> - fill input field + text [selector] - extract text (from element or whole page) + html [selector] - get HTML (from element or whole page) + screenshot [path] - take screenshot + wait <selector> - wait for element to appear + drag <from> <to> - drag element to another element + Examples: + run "browser start" + run "browser go https://example.com" + run "browser click #submit-button" + run "browser fill #search-input hello" + run "browser text" + run "browser screenshot" + run "browser drag 100 200 300 400" + run "browser drag #item1 #container2"` + default: + return fmt.Sprintf("No help available for: %s. Use: run \"help\" for all commands.", cmd) } - // Execute with timeout for safety - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - cmd := exec.CommandContext(ctx, command, cmdArgs...) - cmd.Dir = cfg.FilePickerDir - output, err := cmd.CombinedOutput() - if err != nil { - msg := fmt.Sprintf("command '%s' failed; error: %v; output: %s", command, err, string(output)) +} + +// handleTodoSubcommand routes todo subcommands to existing handlers +func handleTodoSubcommand(args []string, originalArgs map[string]string) []byte { + if len(args) == 0 { + return []byte("usage: todo create|read|update|delete") + } + + subcmd := args[0] + + switch subcmd { + case "create": + task := strings.Join(args[1:], " ") + if task == "" { + task = originalArgs["task"] + } + if task == "" { + return []byte("usage: todo create <task>") + } + return todoCreate(map[string]string{"task": task}) + + case "read": + id := "" + if len(args) > 1 { + id = args[1] + } + return todoRead(map[string]string{"id": id}) + + case "update": + if len(args) < 2 { + return []byte("usage: todo update <id> <status>") + } + return todoUpdate(map[string]string{"id": args[1], "status": args[2]}) + + case "delete": + if len(args) < 2 { + return []byte("usage: todo delete <id>") + } + return todoDelete(map[string]string{"id": args[1]}) + + default: + return []byte(fmt.Sprintf("unknown todo subcommand: %s", subcmd)) + } +} + +// Command Execution Tool with pipe/chaining support +func executeCommand(args map[string]string) []byte { + commandStr := args["command"] + if commandStr == "" { + msg := "command not provided to execute_command tool" logger.Error(msg) return []byte(msg) } - // Check if output is empty and return success message - if len(output) == 0 { - successMsg := fmt.Sprintf("command '%s' executed successfully and exited with code 0", commandStr) - return []byte(successMsg) - } - return output + + // Use chain execution for pipe/chaining support + result := tools.ExecChain(commandStr) + return []byte(result) } // handleCdCommand handles the cd command to update FilePickerDir @@ -1155,65 +1160,6 @@ func todoDelete(args map[string]string) []byte { return jsonResult } -var gitReadSubcommands = map[string]bool{ - "status": true, - "log": true, - "diff": true, - "show": true, - "branch": true, - "reflog": true, - "rev-parse": true, - "shortlog": true, - "describe": true, -} - -func isCommandAllowed(command string, args ...string) bool { - allowedCommands := map[string]bool{ - "cd": true, - "grep": true, - "sed": true, - "awk": true, - "find": true, - "cat": true, - "head": true, - "tail": true, - "sort": true, - "uniq": true, - "wc": true, - "ls": true, - "echo": true, - "cut": true, - "tr": true, - "cp": true, - "mv": true, - "rm": true, - "mkdir": true, - "rmdir": true, - "pwd": true, - "df": true, - "free": true, - "ps": true, - "top": true, - "du": true, - "whoami": true, - "date": true, - "uname": true, - "git": true, - "go": true, - } - // Allow all go subcommands (go run, go mod tidy, go test, etc.) - if strings.HasPrefix(command, "go ") && allowedCommands["go"] { - return true - } - if command == "git" && len(args) > 0 { - return gitReadSubcommands[args[0]] - } - if !allowedCommands[command] { - return false - } - return true -} - func summarizeChat(args map[string]string) []byte { if len(chatBody.Messages) == 0 { return []byte("No chat history to summarize.") @@ -1361,31 +1307,65 @@ func captureWindowAndView(args map[string]string) []byte { type fnSig func(map[string]string) []byte +// FS Command Handlers - Unix-style file operations +// Convert map[string]string to []string for tools package +func argsToSlice(args map[string]string) []string { + var result []string + // Common positional args in order + for _, key := range []string{"path", "src", "dst", "dir", "file"} { + if v, ok := args[key]; ok && v != "" { + result = append(result, v) + } + } + return result +} + +func cmdMemory(args map[string]string) []byte { + return []byte(tools.FsMemory(argsToSlice(args), "")) +} + +type memoryAdapter struct { + store storage.Memories + cfg *config.Config +} + +func (m *memoryAdapter) Memorise(agent, topic, data string) (string, error) { + mem := &models.Memory{ + Agent: agent, + Topic: topic, + Mind: data, + UpdatedAt: time.Now(), + CreatedAt: time.Now(), + } + result, err := m.store.Memorise(mem) + if err != nil { + return "", err + } + return result.Topic, nil +} + +func (m *memoryAdapter) Recall(agent, topic string) (string, error) { + return m.store.Recall(agent, topic) +} + +func (m *memoryAdapter) RecallTopics(agent string) ([]string, error) { + return m.store.RecallTopics(agent) +} + +func (m *memoryAdapter) Forget(agent, topic string) error { + return m.store.Forget(agent, topic) +} + var fnMap = map[string]fnSig{ - "recall": recall, - "recall_topics": recallTopics, - "memorise": memorise, - "rag_search": ragsearch, - "websearch": websearch, - "websearch_raw": websearchRaw, - "read_url": readURL, - "read_url_raw": readURLRaw, - "file_create": fileCreate, - "file_read": fileRead, - "file_read_image": fileReadImage, - "file_write": fileWrite, - "file_write_append": fileWriteAppend, - "file_edit": fileEdit, - "file_delete": fileDelete, - "file_move": fileMove, - "file_copy": fileCopy, - "file_list": fileList, - "execute_command": executeCommand, - "todo_create": todoCreate, - "todo_read": todoRead, - "todo_update": todoUpdate, - "todo_delete": todoDelete, - "summarize_chat": summarizeChat, + "memory": cmdMemory, + "rag_search": ragsearch, + "websearch": websearch, + "websearch_raw": websearchRaw, + "read_url": readURL, + "read_url_raw": readURLRaw, + // Unified run command + "run": runCmd, + "summarize_chat": summarizeChat, } func removeWindowToolsFromBaseTools() { @@ -1503,6 +1483,48 @@ func registerWindowTools() { } } +var browserAgentSysPrompt = `You are an autonomous browser automation agent. Your goal is to complete the user's task by intelligently using browser automation tools. + +Important: The browser may already be running from a previous task! Always check pw_is_running first before starting a new browser. + +Available tools: +- pw_start: Start browser (only if not already running) +- pw_stop: Stop browser (only when you're truly done and browser is no longer needed) +- pw_is_running: Check if browser is running +- pw_navigate: Go to a URL +- pw_click: Click an element by CSS selector +- pw_fill: Type text into an input +- pw_extract_text: Get text from page/element +- pw_screenshot: Take a screenshot (returns file path) +- pw_screenshot_and_view: Take screenshot with image for viewing +- pw_wait_for_selector: Wait for element to appear +- pw_drag: Drag mouse from one point to another +- pw_click_at: Click at X,Y coordinates +- pw_get_html: Get HTML content +- pw_get_dom: Get structured DOM tree +- pw_search_elements: Search for elements by text or selector + +Workflow: +1. First, check if browser is already running (pw_is_running) +2. Only start browser if not already running (pw_start) +3. Navigate to required pages (pw_navigate) +4. Interact with elements as needed (click, fill, etc.) +5. Extract information or take screenshots as requested +6. IMPORTANT: Do NOT stop the browser when done! Leave it running so the user can continue interacting with the page in subsequent requests. + +Always provide clear feedback about what you're doing and what you found.` + +func runBrowserAgent(args map[string]string) []byte { + task, ok := args["task"] + if !ok || task == "" { + return []byte(`{"error": "task argument is required"}`) + } + client := getWebAgentClient() + pwAgent := agent.NewPWAgent(client, browserAgentSysPrompt) + pwAgent.SetTools(agent.GetPWTools()) + return pwAgent.ProcessTask(task) +} + func registerPlaywrightTools() { removePlaywrightToolsFromBaseTools() if cfg != nil && cfg.PlaywrightEnabled { @@ -1788,6 +1810,39 @@ func registerPlaywrightTools() { } baseTools = append(baseTools, playwrightTools...) toolSysMsg += browserToolSysMsg + agent.RegisterPWTool("pw_start", pwStart) + agent.RegisterPWTool("pw_stop", pwStop) + agent.RegisterPWTool("pw_is_running", pwIsRunning) + agent.RegisterPWTool("pw_navigate", pwNavigate) + agent.RegisterPWTool("pw_click", pwClick) + agent.RegisterPWTool("pw_click_at", pwClickAt) + agent.RegisterPWTool("pw_fill", pwFill) + agent.RegisterPWTool("pw_extract_text", pwExtractText) + agent.RegisterPWTool("pw_screenshot", pwScreenshot) + agent.RegisterPWTool("pw_screenshot_and_view", pwScreenshotAndView) + agent.RegisterPWTool("pw_wait_for_selector", pwWaitForSelector) + agent.RegisterPWTool("pw_drag", pwDrag) + agent.RegisterPWTool("pw_get_html", pwGetHTML) + agent.RegisterPWTool("pw_get_dom", pwGetDOM) + agent.RegisterPWTool("pw_search_elements", pwSearchElements) + browserAgentTool := []models.Tool{ + { + Type: "function", + Function: models.ToolFunc{ + Name: "browser_agent", + Description: "Autonomous browser automation agent. Use for complex multi-step browser tasks like 'go to website, login, and take screenshot'. The agent will plan and execute steps automatically using browser tools.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"task"}, + Properties: map[string]models.ToolArgProps{ + "task": {Type: "string", Description: "The task to accomplish, e.g., 'go to github.com and take a screenshot of the homepage'"}, + }, + }, + }, + }, + } + baseTools = append(baseTools, browserAgentTool...) + fnMap["browser_agent"] = runBrowserAgent } } @@ -1909,364 +1964,19 @@ var baseTools = []models.Tool{ }, }, }, - // memorise + // run - unified command models.Tool{ Type: "function", Function: models.ToolFunc{ - Name: "memorise", - Description: "Save topic-data in key-value cache. Use when asked to remember something/keep in mind.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"topic", "data"}, - Properties: map[string]models.ToolArgProps{ - "topic": models.ToolArgProps{ - Type: "string", - Description: "topic is the key under which data is saved", - }, - "data": models.ToolArgProps{ - Type: "string", - Description: "data is the value that is saved under the topic-key", - }, - }, - }, - }, - }, - // recall - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "recall", - Description: "Recall topic-data from key-value cache. Use when precise info about the topic is needed.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"topic"}, - Properties: map[string]models.ToolArgProps{ - "topic": models.ToolArgProps{ - Type: "string", - Description: "topic is the key to recall data from", - }, - }, - }, - }, - }, - // recall_topics - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "recall_topics", - Description: "Recall all topics from key-value cache. Use when need to know what topics are currently stored in memory.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{}, - }, - }, - }, - // file_create - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_create", - Description: "Create a new file with specified content. Use when you need to create a new file.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"path"}, - Properties: map[string]models.ToolArgProps{ - "path": models.ToolArgProps{ - Type: "string", - Description: "path where the file should be created", - }, - "content": models.ToolArgProps{ - Type: "string", - Description: "content to write to the file (optional, defaults to empty string)", - }, - }, - }, - }, - }, - // file_read - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_read", - Description: "Read the content of a file. Use when you need to see the content of a file.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"path"}, - Properties: map[string]models.ToolArgProps{ - "path": models.ToolArgProps{ - Type: "string", - Description: "path of the file to read", - }, - }, - }, - }, - }, - // file_read_image - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_read_image", - Description: "Read an image file and return it for multimodal LLM viewing. Supports png, jpg, jpeg, gif, webp formats. Use when you need the LLM to see and analyze an image.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"path"}, - Properties: map[string]models.ToolArgProps{ - "path": models.ToolArgProps{ - Type: "string", - Description: "path of the image file to read", - }, - }, - }, - }, - }, - // file_write - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_write", - Description: "Write content to a file. Will overwrite any content present.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"path", "content"}, - Properties: map[string]models.ToolArgProps{ - "path": models.ToolArgProps{ - Type: "string", - Description: "path of the file to write to", - }, - "content": models.ToolArgProps{ - Type: "string", - Description: "content to write to the file", - }, - }, - }, - }, - }, - // file_write_append - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_write_append", - Description: "Append content to a file.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"path", "content"}, - Properties: map[string]models.ToolArgProps{ - "path": models.ToolArgProps{ - Type: "string", - Description: "path of the file to write to", - }, - "content": models.ToolArgProps{ - Type: "string", - Description: "content to write to the file", - }, - }, - }, - }, - }, - // file_edit - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_edit", - Description: "Edit a specific section of a file by replacing oldString with newString. Use for targeted changes without rewriting the entire file.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"path", "oldString", "newString"}, - Properties: map[string]models.ToolArgProps{ - "path": models.ToolArgProps{ - Type: "string", - Description: "path of the file to edit", - }, - "oldString": models.ToolArgProps{ - Type: "string", - Description: "the exact string to find and replace", - }, - "newString": models.ToolArgProps{ - Type: "string", - Description: "the string to replace oldString with", - }, - "lineNumber": models.ToolArgProps{ - Type: "string", - Description: "optional line number (1-indexed) to edit - if provided, only that line is edited", - }, - }, - }, - }, - }, - // file_delete - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_delete", - Description: "Delete a file. Use when you need to remove a file.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"path"}, - Properties: map[string]models.ToolArgProps{ - "path": models.ToolArgProps{ - Type: "string", - Description: "path of the file to delete", - }, - }, - }, - }, - }, - // file_move - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_move", - Description: "Move a file from one location to another. Use when you need to relocate a file.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"src", "dst"}, - Properties: map[string]models.ToolArgProps{ - "src": models.ToolArgProps{ - Type: "string", - Description: "source path of the file to move", - }, - "dst": models.ToolArgProps{ - Type: "string", - Description: "destination path where the file should be moved", - }, - }, - }, - }, - }, - // file_copy - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_copy", - Description: "Copy a file from one location to another. Use when you need to duplicate a file.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"src", "dst"}, - Properties: map[string]models.ToolArgProps{ - "src": models.ToolArgProps{ - Type: "string", - Description: "source path of the file to copy", - }, - "dst": models.ToolArgProps{ - Type: "string", - Description: "destination path where the file should be copied", - }, - }, - }, - }, - }, - // file_list - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "file_list", - Description: "List files and directories in a directory. Use when you need to see what files are in a directory.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "path": models.ToolArgProps{ - Type: "string", - Description: "path of the directory to list (optional, defaults to current directory)", - }, - }, - }, - }, - }, - // execute_command - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "execute_command", - Description: "Execute a shell command safely. Use when you need to run system commands like cd grep sed awk find cat head tail sort uniq wc ls echo cut tr cp mv rm mkdir rmdir pwd df free ps top du whoami date uname go git. Git is allowed for read-only operations: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe. Use 'cd /path' to change working directory.", + Name: "run", + Description: "Execute commands: shell, git, memory, todo. Usage: run \"<command>\". Examples: run \"ls -la\", run \"git status\", run \"memory store foo bar\", run \"memory get foo\", run \"todo create task\", run \"help\", run \"help memory\"", Parameters: models.ToolFuncParams{ Type: "object", Required: []string{"command"}, Properties: map[string]models.ToolArgProps{ "command": models.ToolArgProps{ Type: "string", - Description: "command to execute with arguments (e.g., 'go run main.go', 'ls -la /tmp', 'cd /home/user'). Use a single string; arguments should be space-separated after the command.", - }, - }, - }, - }, - }, - // todo_create - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "todo_create", - Description: "Create a new todo item with a task. Returns the created todo with its ID.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"task"}, - Properties: map[string]models.ToolArgProps{ - "task": models.ToolArgProps{ - Type: "string", - Description: "the task description to add to the todo list", - }, - }, - }, - }, - }, - // todo_read - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "todo_read", - Description: "Read todo items. Without ID returns all todos, with ID returns specific todo.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "id": models.ToolArgProps{ - Type: "string", - Description: "optional id of the specific todo item to read", - }, - }, - }, - }, - }, - // todo_update - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "todo_update", - Description: "Update a todo item by ID with new task or status. Status must be one of: pending, in_progress, completed.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"id"}, - Properties: map[string]models.ToolArgProps{ - "id": models.ToolArgProps{ - Type: "string", - Description: "id of the todo item to update", - }, - "task": models.ToolArgProps{ - Type: "string", - Description: "new task description (optional)", - }, - "status": models.ToolArgProps{ - Type: "string", - Description: "new status: pending, in_progress, or completed (optional)", - }, - }, - }, - }, - }, - // todo_delete - models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "todo_delete", - Description: "Delete a todo item by ID. Returns success message.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"id"}, - Properties: map[string]models.ToolArgProps{ - "id": models.ToolArgProps{ - Type: "string", - Description: "id of the todo item to delete", + Description: "command to execute. Use: run \"help\" for all commands, run \"help <cmd>\" for specific help. Examples: ls, cat, grep, git status, memory store, todo create, etc.", }, }, }, |
