diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/chain.go | 42 | ||||
| -rw-r--r-- | tools/fs.go | 82 | ||||
| -rw-r--r-- | tools/pw.go | 83 | ||||
| -rw-r--r-- | tools/tools.go | 1201 |
4 files changed, 558 insertions, 850 deletions
diff --git a/tools/chain.go b/tools/chain.go index 4afb7e5..b94d2f8 100644 --- a/tools/chain.go +++ b/tools/chain.go @@ -32,10 +32,8 @@ func ParseChain(input string) []Segment { var current strings.Builder runes := []rune(input) n := len(runes) - for i := 0; i < n; i++ { ch := runes[i] - // handle quotes if ch == '\'' || ch == '"' { quote := ch @@ -50,7 +48,6 @@ func ParseChain(input string) []Segment { } continue } - // && if ch == '&' && i+1 < n && runes[i+1] == '&' { segments = append(segments, Segment{ @@ -61,7 +58,6 @@ func ParseChain(input string) []Segment { i++ // skip second & continue } - // ; if ch == ';' { segments = append(segments, Segment{ @@ -71,7 +67,6 @@ func ParseChain(input string) []Segment { current.Reset() continue } - // || if ch == '|' && i+1 < n && runes[i+1] == '|' { segments = append(segments, Segment{ @@ -82,7 +77,6 @@ func ParseChain(input string) []Segment { i++ // skip second | continue } - // | (single pipe) if ch == '|' { segments = append(segments, Segment{ @@ -92,16 +86,13 @@ func ParseChain(input string) []Segment { current.Reset() continue } - current.WriteRune(ch) } - // last segment last := strings.TrimSpace(current.String()) if last != "" { segments = append(segments, Segment{Raw: last, Op: OpNone}) } - return segments } @@ -112,12 +103,10 @@ func ExecChain(command string) string { if len(segments) == 0 { return "[error] empty command" } - var collected []string var lastOutput string var lastErr error pipeInput := "" - for i, seg := range segments { if i > 0 { prevOp := segments[i-1].Op @@ -130,7 +119,6 @@ func ExecChain(command string) string { continue } } - // determine stdin for this segment segStdin := "" if i == 0 { @@ -138,9 +126,7 @@ func ExecChain(command string) string { } else if segments[i-1].Op == OpPipe { segStdin = lastOutput } - lastOutput, lastErr = execSingle(seg.Raw, segStdin) - // pipe: output flows to next command's stdin // && or ;: collect output if i < len(segments)-1 && seg.Op == OpPipe { @@ -150,7 +136,6 @@ func ExecChain(command string) string { collected = append(collected, lastOutput) } } - return strings.Join(collected, "\n") } @@ -160,15 +145,12 @@ func execSingle(command, stdin string) (string, error) { if len(parts) == 0 { return "", fmt.Errorf("empty command") } - name := parts[0] args := parts[1:] - // Check if it's a built-in Go command if result := execBuiltin(name, args, stdin); result != "" { return result, nil } - // Otherwise execute as system command cmd := exec.Command(name, args...) if stdin != "" { @@ -187,7 +169,6 @@ func tokenize(input string) []string { var current strings.Builder inQuote := false var quoteChar rune - for _, ch := range input { if inQuote { if ch == quoteChar { @@ -197,13 +178,11 @@ func tokenize(input string) []string { } continue } - if ch == '\'' || ch == '"' { inQuote = true quoteChar = ch continue } - if ch == ' ' || ch == '\t' { if current.Len() > 0 { tokens = append(tokens, current.String()) @@ -211,14 +190,11 @@ func tokenize(input string) []string { } continue } - current.WriteRune(ch) } - if current.Len() > 0 { tokens = append(tokens, current.String()) } - return tokens } @@ -242,7 +218,7 @@ func execBuiltin(name string, args []string, stdin string) string { path := args[0] abs := path if !filepath.IsAbs(path) { - abs = filepath.Join(fsRootDir, path) + abs = filepath.Join(cfg.FilePickerDir, path) } data, err := os.ReadFile(abs) if err != nil { @@ -250,16 +226,16 @@ func execBuiltin(name string, args []string, stdin string) string { } return string(data) case "pwd": - return fsRootDir + return cfg.FilePickerDir case "cd": if len(args) == 0 { return "[error] usage: cd <dir>" } dir := args[0] - // Resolve relative to fsRootDir + // Resolve relative to cfg.FilePickerDir abs := dir if !filepath.IsAbs(dir) { - abs = filepath.Join(fsRootDir, dir) + abs = filepath.Join(cfg.FilePickerDir, dir) } abs = filepath.Clean(abs) info, err := os.Stat(abs) @@ -269,8 +245,8 @@ func execBuiltin(name string, args []string, stdin string) string { if !info.IsDir() { return fmt.Sprintf("[error] cd: not a directory: %s", dir) } - fsRootDir = abs - return fmt.Sprintf("Changed directory to: %s", fsRootDir) + cfg.FilePickerDir = abs + return fmt.Sprintf("Changed directory to: %s", cfg.FilePickerDir) case "mkdir": if len(args) == 0 { return "[error] usage: mkdir [-p] <dir>" @@ -289,7 +265,7 @@ func execBuiltin(name string, args []string, stdin string) string { } abs := dirPath if !filepath.IsAbs(dirPath) { - abs = filepath.Join(fsRootDir, dirPath) + abs = filepath.Join(cfg.FilePickerDir, dirPath) } abs = filepath.Clean(abs) var mkdirFunc func(string, os.FileMode) error @@ -315,7 +291,7 @@ func execBuiltin(name string, args []string, stdin string) string { } abs := dir if !filepath.IsAbs(dir) { - abs = filepath.Join(fsRootDir, dir) + abs = filepath.Join(cfg.FilePickerDir, dir) } entries, err := os.ReadDir(abs) if err != nil { @@ -347,7 +323,7 @@ func execBuiltin(name string, args []string, stdin string) string { return "[error] usage: go <subcommand> [options]" } cmd := exec.Command("go", args...) - cmd.Dir = fsRootDir + cmd.Dir = cfg.FilePickerDir output, err := cmd.CombinedOutput() if err != nil { return fmt.Sprintf("[error] go %s: %v\n%s", args[0], err, string(output)) diff --git a/tools/fs.go b/tools/fs.go index f0368f6..7b33a3c 100644 --- a/tools/fs.go +++ b/tools/fs.go @@ -14,7 +14,6 @@ import ( "time" ) -var fsRootDir string var memoryStore MemoryStore var agentRole string @@ -31,11 +30,11 @@ func SetMemoryStore(store MemoryStore, role string) { } func SetFSRoot(dir string) { - fsRootDir = dir + cfg.FilePickerDir = dir } func GetFSRoot() string { - return fsRootDir + return cfg.FilePickerDir } func SetFSCwd(dir string) error { @@ -50,26 +49,24 @@ func SetFSCwd(dir string) error { if !info.IsDir() { return fmt.Errorf("not a directory: %s", dir) } - fsRootDir = abs + cfg.FilePickerDir = abs return nil } func resolvePath(rel string) (string, error) { - if fsRootDir == "" { + if cfg.FilePickerDir == "" { return "", fmt.Errorf("fs root not set") } - if filepath.IsAbs(rel) { abs := filepath.Clean(rel) - if !strings.HasPrefix(abs, fsRootDir+string(os.PathSeparator)) && abs != fsRootDir { + if !strings.HasPrefix(abs, cfg.FilePickerDir+string(os.PathSeparator)) && abs != cfg.FilePickerDir { return "", fmt.Errorf("path escapes fs root: %s", rel) } return abs, nil } - - abs := filepath.Join(fsRootDir, rel) + abs := filepath.Join(cfg.FilePickerDir, rel) abs = filepath.Clean(abs) - if !strings.HasPrefix(abs, fsRootDir+string(os.PathSeparator)) && abs != fsRootDir { + if !strings.HasPrefix(abs, cfg.FilePickerDir+string(os.PathSeparator)) && abs != cfg.FilePickerDir { return "", fmt.Errorf("path escapes fs root: %s", rel) } return abs, nil @@ -100,12 +97,10 @@ func FsLs(args []string, stdin string) string { if err != nil { return fmt.Sprintf("[error] %v", err) } - entries, err := os.ReadDir(abs) if err != nil { return fmt.Sprintf("[error] ls: %v", err) } - var out strings.Builder for _, e := range entries { info, _ := e.Info() @@ -136,17 +131,14 @@ func FsCat(args []string, stdin string) string { if path == "" { return "[error] usage: cat <path>" } - abs, err := resolvePath(path) if err != nil { return fmt.Sprintf("[error] %v", err) } - data, err := os.ReadFile(abs) if err != nil { return fmt.Sprintf("[error] cat: %v", err) } - if b64 { result := base64.StdEncoding.EncodeToString(data) if IsImageFile(path) { @@ -162,7 +154,6 @@ func FsViewImg(args []string, stdin string) string { return "[error] usage: view_img <image-path>" } path := args[0] - var abs string if filepath.IsAbs(path) { abs = path @@ -173,20 +164,16 @@ func FsViewImg(args []string, stdin string) string { return fmt.Sprintf("[error] %v", err) } } - if _, err := os.Stat(abs); err != nil { return fmt.Sprintf("[error] view_img: %v", err) } - if !IsImageFile(path) { return fmt.Sprintf("[error] not an image file: %s (use cat to read text files)", path) } - dataURL, err := models.CreateImageURLFromPath(abs) if err != nil { return fmt.Sprintf("[error] view_img: %v", err) } - result := models.MultimodalToolResp{ Type: "multimodal_content", Parts: []map[string]string{ @@ -222,16 +209,13 @@ func FsWrite(args []string, stdin string) string { if path == "" { return "[error] usage: write <path> [content] or pipe stdin" } - abs, err := resolvePath(path) if err != nil { return fmt.Sprintf("[error] %v", err) } - if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil { return fmt.Sprintf("[error] mkdir: %v", err) } - var data []byte if b64 { src := stdin @@ -251,18 +235,14 @@ func FsWrite(args []string, stdin string) string { data = []byte(stdin) } } - if err := os.WriteFile(abs, data, 0o644); err != nil { return fmt.Sprintf("[error] write: %v", err) } - size := humanSize(int64(len(data))) result := fmt.Sprintf("Written %s → %s", size, path) - if IsImageFile(path) { result += fmt.Sprintf("\n", abs) } - return result } @@ -270,17 +250,14 @@ func FsStat(args []string, stdin string) string { if len(args) == 0 { return "[error] usage: stat <path>" } - abs, err := resolvePath(args[0]) if err != nil { return fmt.Sprintf("[error] %v", err) } - info, err := os.Stat(abs) if err != nil { return fmt.Sprintf("[error] stat: %v", err) } - mime := "application/octet-stream" if IsImageFile(args[0]) { ext := strings.ToLower(filepath.Ext(args[0])) @@ -297,7 +274,6 @@ func FsStat(args []string, stdin string) string { mime = "image/svg+xml" } } - var out strings.Builder fmt.Fprintf(&out, "File: %s\n", args[0]) fmt.Fprintf(&out, "Size: %s (%d bytes)\n", humanSize(info.Size()), info.Size()) @@ -313,12 +289,10 @@ func FsRm(args []string, stdin string) string { if len(args) == 0 { return "[error] usage: rm <path>" } - abs, err := resolvePath(args[0]) if err != nil { return fmt.Sprintf("[error] %v", err) } - if err := os.RemoveAll(abs); err != nil { return fmt.Sprintf("[error] rm: %v", err) } @@ -329,7 +303,6 @@ func FsCp(args []string, stdin string) string { if len(args) < 2 { return "[error] usage: cp <src> <dst>" } - srcAbs, err := resolvePath(args[0]) if err != nil { return fmt.Sprintf("[error] %v", err) @@ -338,16 +311,13 @@ func FsCp(args []string, stdin string) string { if err != nil { return fmt.Sprintf("[error] %v", err) } - data, err := os.ReadFile(srcAbs) if err != nil { return fmt.Sprintf("[error] cp read: %v", err) } - if err := os.MkdirAll(filepath.Dir(dstAbs), 0o755); err != nil { return fmt.Sprintf("[error] cp mkdir: %v", err) } - if err := os.WriteFile(dstAbs, data, 0o644); err != nil { return fmt.Sprintf("[error] cp write: %v", err) } @@ -358,7 +328,6 @@ func FsMv(args []string, stdin string) string { if len(args) < 2 { return "[error] usage: mv <src> <dst>" } - srcAbs, err := resolvePath(args[0]) if err != nil { return fmt.Sprintf("[error] %v", err) @@ -367,11 +336,9 @@ func FsMv(args []string, stdin string) string { if err != nil { return fmt.Sprintf("[error] %v", err) } - if err := os.MkdirAll(filepath.Dir(dstAbs), 0o755); err != nil { return fmt.Sprintf("[error] mv mkdir: %v", err) } - if err := os.Rename(srcAbs, dstAbs); err != nil { return fmt.Sprintf("[error] mv: %v", err) } @@ -382,10 +349,8 @@ func FsMkdir(args []string, stdin string) string { if len(args) == 0 { return "[error] usage: mkdir [-p] <dir>" } - createParents := false var dirPath string - for _, a := range args { if a == "-p" || a == "--parents" { createParents = true @@ -393,27 +358,22 @@ func FsMkdir(args []string, stdin string) string { dirPath = a } } - if dirPath == "" { return "[error] usage: mkdir [-p] <dir>" } - abs, err := resolvePath(dirPath) if err != nil { return fmt.Sprintf("[error] %v", err) } - var mkdirFunc func(string, os.FileMode) error if createParents { mkdirFunc = os.MkdirAll } else { mkdirFunc = os.Mkdir } - if err := mkdirFunc(abs, 0o755); err != nil { return fmt.Sprintf("[error] mkdir: %v", err) } - if createParents { return fmt.Sprintf("Created %s (with parents)", dirPath) } @@ -459,7 +419,6 @@ func FsGrep(args []string, stdin string) string { if ignoreCase { pattern = strings.ToLower(pattern) } - lines := strings.Split(stdin, "\n") var matched []string for _, line := range lines { @@ -549,7 +508,6 @@ func FsSort(args []string, stdin string) string { numeric = true } } - sortFunc := func(i, j int) bool { if numeric { ni, _ := strconv.Atoi(lines[i]) @@ -564,7 +522,6 @@ func FsSort(args []string, stdin string) string { } return lines[i] < lines[j] } - sort.Slice(lines, sortFunc) return strings.Join(lines, "\n") } @@ -577,7 +534,6 @@ func FsUniq(args []string, stdin string) string { showCount = true } } - var result []string var prev string first := true @@ -623,17 +579,14 @@ func FsGit(args []string, stdin string) string { if len(args) == 0 { return "[error] usage: git <subcommand> [options]" } - subcmd := args[0] if !allowedGitSubcommands[subcmd] { return fmt.Sprintf("[error] git: '%s' is not an allowed git command. Allowed: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe, rev-list", subcmd) } - abs, err := resolvePath(".") if err != nil { return fmt.Sprintf("[error] git: %v", err) } - // Pass all args to git (first arg is subcommand, rest are options) cmd := exec.Command("git", args...) cmd.Dir = abs @@ -645,7 +598,7 @@ func FsGit(args []string, stdin string) string { } func FsPwd(args []string, stdin string) string { - return fsRootDir + return cfg.FilePickerDir } func FsCd(args []string, stdin string) string { @@ -664,19 +617,17 @@ func FsCd(args []string, stdin string) string { if !info.IsDir() { return fmt.Sprintf("[error] cd: not a directory: %s", dir) } - fsRootDir = abs - return fmt.Sprintf("Changed directory to: %s", fsRootDir) + cfg.FilePickerDir = abs + return fmt.Sprintf("Changed directory to: %s", cfg.FilePickerDir) } func FsSed(args []string, stdin string) string { if len(args) == 0 { return "[error] usage: sed 's/old/new/[g]' [file]" } - inPlace := false var filePath string var pattern string - for _, a := range args { if a == "-i" || a == "--in-place" { inPlace = true @@ -687,21 +638,17 @@ func FsSed(args []string, stdin string) string { filePath = a } } - if pattern == "" { return "[error] usage: sed 's/old/new/[g]' [file]" } - // Parse pattern: s/old/new/flags parts := strings.Split(pattern[1:], "/") if len(parts) < 2 { return "[error] invalid sed pattern. Use: s/old/new/[g]" } - oldStr := parts[0] newStr := parts[1] global := len(parts) >= 3 && strings.Contains(parts[2], "g") - var content string if filePath != "" && stdin == "" { // Read from file @@ -720,14 +667,12 @@ func FsSed(args []string, stdin string) string { } else { return "[error] sed: no input (use file path or pipe from stdin)" } - // Apply sed replacement if global { content = strings.ReplaceAll(content, oldStr, newStr) } else { content = strings.Replace(content, oldStr, newStr, 1) } - if inPlace && filePath != "" { abs, err := resolvePath(filePath) if err != nil { @@ -738,7 +683,6 @@ func FsSed(args []string, stdin string) string { } return fmt.Sprintf("Modified %s", filePath) } - return content } @@ -746,11 +690,9 @@ func FsMemory(args []string, stdin string) string { if len(args) == 0 { return "[error] usage: memory store <topic> <data> | memory get <topic> | memory list | memory forget <topic>" } - if memoryStore == nil { return "[error] memory store not initialized" } - switch args[0] { case "store": if len(args) < 3 && stdin == "" { @@ -768,7 +710,6 @@ func FsMemory(args []string, stdin string) string { return fmt.Sprintf("[error] failed to store: %v", err) } return fmt.Sprintf("Stored under topic: %s", topic) - case "get": if len(args) < 2 { return "[error] usage: memory get <topic>" @@ -779,7 +720,6 @@ func FsMemory(args []string, stdin string) string { return fmt.Sprintf("[error] failed to recall: %v", err) } return fmt.Sprintf("Topic: %s\n%s", topic, data) - case "list", "topics": topics, err := memoryStore.RecallTopics(agentRole) if err != nil { @@ -789,7 +729,6 @@ func FsMemory(args []string, stdin string) string { return "No topics stored." } return "Topics: " + strings.Join(topics, ", ") - case "forget", "delete": if len(args) < 2 { return "[error] usage: memory forget <topic>" @@ -800,7 +739,6 @@ func FsMemory(args []string, stdin string) string { return fmt.Sprintf("[error] failed to forget: %v", err) } return fmt.Sprintf("Deleted topic: %s", topic) - default: return fmt.Sprintf("[error] unknown subcommand: %s. Use: store, get, list, topics, forget, delete", args[0]) } diff --git a/tools/pw.go b/tools/pw.go index c21e8fe..936da1a 100644 --- a/tools/pw.go +++ b/tools/pw.go @@ -12,87 +12,6 @@ import ( "github.com/playwright-community/playwright-go" ) -var browserToolSysMsg = ` -Additional browser automation tools (Playwright): -[ -{ - "name": "pw_start", - "args": [], - "when_to_use": "start a browser instance before doing any browser automation. Must be called first." -}, -{ - "name": "pw_stop", - "args": [], - "when_to_use": "stop the browser instance when done with automation." -}, -{ - "name": "pw_is_running", - "args": [], - "when_to_use": "check if browser is currently running." -}, -{ - "name": "pw_navigate", - "args": ["url"], - "when_to_use": "open a specific URL in the web browser." -}, -{ - "name": "pw_click", - "args": ["selector", "index"], - "when_to_use": "click on an element on the current webpage. Use 'index' for multiple matches (default 0)." -}, -{ - "name": "pw_fill", - "args": ["selector", "text", "index"], - "when_to_use": "type text into an input field. Use 'index' for multiple matches (default 0)." -}, -{ - "name": "pw_extract_text", - "args": ["selector"], - "when_to_use": "extract text content from the page or specific elements. Use selector 'body' for all page text." -}, -{ - "name": "pw_screenshot", - "args": ["selector", "full_page"], - "when_to_use": "take a screenshot of the page or a specific element. Returns a file path to the image. Use to verify actions or inspect visual state." -}, -{ - "name": "pw_screenshot_and_view", - "args": ["selector", "full_page"], - "when_to_use": "take a screenshot and return the image for viewing. Use to visually verify page state." -}, -{ - "name": "pw_wait_for_selector", - "args": ["selector", "timeout"], - "when_to_use": "wait for an element to appear on the page before proceeding with further actions." -}, -{ - "name": "pw_drag", - "args": ["x1", "y1", "x2", "y2"], - "when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)." -}, -{ - "name": "pw_click_at", - "args": ["x", "y"], - "when_to_use": "click at specific X,Y coordinates on the page. Use when you know the exact position." -}, -{ - "name": "pw_get_html", - "args": ["selector"], - "when_to_use": "get the HTML content of the page or a specific element. Use to understand page structure or extract raw HTML." -}, -{ - "name": "pw_get_dom", - "args": ["selector"], - "when_to_use": "get a structured DOM representation with tag, attributes, text, and children. Use to inspect element hierarchy and properties." -}, -{ - "name": "pw_search_elements", - "args": ["text", "selector"], - "when_to_use": "search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML." -} -] -` - var ( pw *playwright.Playwright browser playwright.Browser @@ -532,6 +451,7 @@ func pwDragBySelector(args map[string]string) []byte { return []byte(fmt.Sprintf(`{"success": true, "message": "%s"}`, msg)) } +// nolint:unused func pwClickAt(args map[string]string) []byte { x, ok := args["x"] if !ok { @@ -682,6 +602,7 @@ func pwGetDOM(args map[string]string) []byte { return []byte(fmt.Sprintf(`{"dom": %s}`, string(data))) } +// nolint:unused func pwSearchElements(args map[string]string) []byte { text := args["text"] selector := args["selector"] diff --git a/tools/tools.go b/tools/tools.go index ee20a2d..bf6976f 100644 --- a/tools/tools.go +++ b/tools/tools.go @@ -8,11 +8,9 @@ import ( "gf-lt/config" "gf-lt/models" "gf-lt/storage" - "io" "log/slog" "os" "os/exec" - "path/filepath" "regexp" "strconv" "strings" @@ -25,20 +23,6 @@ import ( ) var ( - ToolCallRE = regexp.MustCompile(`__tool_call__\s*([\s\S]*?)__tool_call__`) - QuotesRE = regexp.MustCompile(`(".*?")`) - StarRE = regexp.MustCompile(`(\*.*?\*)`) - ThinkRE = regexp.MustCompile(`(?s)<think>.*?</think>`) - toolCallRE = ToolCallRE - quotesRE = QuotesRE - starRE = StarRE - thinkRE = ThinkRE - CodeBlockRE = regexp.MustCompile(`(?s)\x60{3}(?:.*?)\n(.*?)\n\s*\x60{3}\s*`) - SingleBacktickRE = regexp.MustCompile(`\x60([^\x60]*)\x60`) - codeBlockRE = CodeBlockRE - singleBacktickRE = SingleBacktickRE - RoleRE = regexp.MustCompile(`^(\w+):`) - SysLabels = []string{"assistant"} RpDefenitionSysMsg = ` For this roleplay immersion is at most importance. Every character thinks and acts based on their personality and setting of the roleplay. @@ -115,32 +99,8 @@ After that you are free to respond to the user. ragSearchSysPrompt = `Synthesize the document search results, extracting key information and presenting a concise answer. Provide sources and document IDs where relevant.` readURLSysPrompt = `Extract and summarize the content from the webpage. Provide key information, main points, and any relevant details.` summarySysPrompt = `Please provide a concise summary of the following conversation. Focus on key points, decisions, and actions. Provide only the summary, no additional commentary.` - webAgentClient *agent.AgentClient - webAgentClientOnce sync.Once - webAgentsOnce sync.Once ) -var windowToolSysMsg = ` -Additional window tools (available only if xdotool and maim are installed): -[ -{ -"name":"list_windows", -"args": [], -"when_to_use": "when asked to list visible windows; returns map of window ID to window name" -}, -{ -"name":"capture_window", -"args": ["window"], -"when_to_use": "when asked to take a screenshot of a specific window; saves to /tmp; window can be ID or name substring; returns file path" -}, -{ -"name":"capture_window_and_view", -"args": ["window"], -"when_to_use": "when asked to take a screenshot of a specific window and show it; saves to /tmp and returns image for viewing; window can be ID or name substring" -} -] -` - var WebSearcher searcher.WebSurfer var ( @@ -156,9 +116,22 @@ type Tools struct { logger *slog.Logger store storage.FullRepo WindowToolsAvailable bool - getTokenFunc func() string - webAgentClient *agent.AgentClient - webAgentClientOnce sync.Once + // getTokenFunc func() string + webAgentClient *agent.AgentClient + webAgentClientOnce sync.Once + webSearchAgent agent.AgenterB +} + +func (t *Tools) initAgentsB() { + t.GetWebAgentClient() + t.webSearchAgent = agent.NewWebAgentB(t.webAgentClient, webSearchSysPrompt) + agent.RegisterB("rag_search", agent.NewWebAgentB(t.webAgentClient, ragSearchSysPrompt)) + // Register websearch agent + agent.RegisterB("websearch", agent.NewWebAgentB(t.webAgentClient, webSearchSysPrompt)) + // Register read_url agent + agent.RegisterB("read_url", agent.NewWebAgentB(t.webAgentClient, readURLSysPrompt)) + // Register summarize_chat agent + agent.RegisterB("summarize_chat", agent.NewWebAgentB(t.webAgentClient, summarySysPrompt)) } func InitTools(cfg *config.Config, logger *slog.Logger, store storage.FullRepo) *Tools { @@ -201,6 +174,7 @@ func InitTools(cfg *config.Config, logger *slog.Logger, store storage.FullRepo) store: store, } t.checkWindowTools() + t.initAgentsB() return t } @@ -224,17 +198,17 @@ func SetTokenFunc(fn func() string) { getTokenFunc = fn } -func getWebAgentClient() *agent.AgentClient { - webAgentClientOnce.Do(func() { +func (t *Tools) GetWebAgentClient() *agent.AgentClient { + t.webAgentClientOnce.Do(func() { getToken := func() string { if getTokenFunc != nil { return getTokenFunc() } return "" } - webAgentClient = agent.NewAgentClient(cfg, logger, getToken) + t.webAgentClient = agent.NewAgentClient(cfg, logger, getToken) }) - return webAgentClient + return t.webAgentClient } func RegisterWindowTools(modelHasVision bool) { @@ -242,27 +216,13 @@ func RegisterWindowTools(modelHasVision bool) { // Window tools registration happens here if needed } -func RegisterPlaywrightTools() { - removePlaywrightToolsFromBaseTools() - if cfg != nil && cfg.PlaywrightEnabled { - // Playwright tools are registered here - } -} - -// webAgentsOnce.Do(func() { -// client := getWebAgentClient() -// // Register rag_search agent -// agent.RegisterB("rag_search", agent.NewWebAgentB(client, ragSearchSysPrompt)) -// // Register websearch agent -// agent.RegisterB("websearch", agent.NewWebAgentB(client, webSearchSysPrompt)) -// // Register read_url agent -// agent.RegisterB("read_url", agent.NewWebAgentB(client, readURLSysPrompt)) -// // Register summarize_chat agent -// agent.RegisterB("summarize_chat", agent.NewWebAgentB(client, summarySysPrompt)) -// }) +// func RegisterPlaywrightTools() { +// removePlaywrightToolsFromBaseTools() +// if cfg != nil && cfg.PlaywrightEnabled { +// // Playwright tools are registered here +// } // } -// web search (depends on extra server) func websearch(args map[string]string) []byte { // make http request return bytes query, ok := args["query"] @@ -407,89 +367,6 @@ func readURLRaw(args map[string]string) []byte { return []byte(fmt.Sprintf("%+v", resp)) } -// // Helper functions for file operations -// func resolvePath(p string) string { -// if filepath.IsAbs(p) { -// return p -// } -// return filepath.Join(cfg.FilePickerDir, p) -// } - -func readStringFromFile(filename string) (string, error) { - data, err := os.ReadFile(filename) - if err != nil { - return "", err - } - return string(data), nil -} - -func writeStringToFile(filename string, data string) error { - return os.WriteFile(filename, []byte(data), 0644) -} - -func appendStringToFile(filename string, data string) error { - file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return err - } - defer file.Close() - _, err = file.WriteString(data) - return err -} - -func removeFile(filename string) error { - return os.Remove(filename) -} - -func moveFile(src, dst string) error { - // First try with os.Rename (works within same filesystem) - if err := os.Rename(src, dst); err == nil { - return nil - } - // If that fails (e.g., cross-filesystem), copy and delete - return copyAndRemove(src, dst) -} - -func copyFile(src, dst string) error { - srcFile, err := os.Open(src) - if err != nil { - return err - } - defer srcFile.Close() - dstFile, err := os.Create(dst) - if err != nil { - return err - } - defer dstFile.Close() - _, err = io.Copy(dstFile, srcFile) - return err -} - -func copyAndRemove(src, dst string) error { - // Copy the file - if err := copyFile(src, dst); err != nil { - return err - } - // Remove the source file - return os.Remove(src) -} - -func listDirectory(path string) ([]string, error) { - entries, err := os.ReadDir(path) - if err != nil { - return nil, err - } - var files []string - for _, entry := range entries { - if entry.IsDir() { - files = append(files, entry.Name()+"/") // Add "/" to indicate directory - } else { - files = append(files, entry.Name()) - } - } - return files, nil -} - // Unified run command - single entry point for shell, memory, and todo func runCmd(args map[string]string) []byte { commandStr := args["command"] @@ -498,16 +375,13 @@ func runCmd(args map[string]string) []byte { logger.Error(msg) return []byte(msg) } - // Parse the command - first word is subcommand parts := strings.Fields(commandStr) if len(parts) == 0 { return []byte("[error] empty command") } - subcmd := parts[0] rest := parts[1:] - // Route to appropriate handler switch subcmd { case "help": @@ -566,10 +440,8 @@ Actions: wait <selector> - wait for element drag <from> <to> - drag element`) } - action := args[0] rest := args[1:] - switch action { case "start": return pwStart(originalArgs) @@ -916,9 +788,7 @@ func handleTodoSubcommand(args []string, originalArgs map[string]string) []byte if len(args) == 0 { return []byte("usage: todo create|read|update|delete") } - subcmd := args[0] - switch subcmd { case "create": task := strings.Join(args[1:], " ") @@ -929,26 +799,22 @@ func handleTodoSubcommand(args []string, originalArgs map[string]string) []byte return []byte("usage: todo create <task>") } return todoCreate(map[string]string{"task": task}) - case "read": id := "" if len(args) > 1 { id = args[1] } return todoRead(map[string]string{"id": id}) - case "update": if len(args) < 2 { return []byte("usage: todo update <id> <status>") } return todoUpdate(map[string]string{"id": args[1], "status": args[2]}) - case "delete": if len(args) < 2 { return []byte("usage: todo delete <id>") } return todoDelete(map[string]string{"id": args[1]}) - default: return []byte(fmt.Sprintf("unknown todo subcommand: %s", subcmd)) } @@ -962,55 +828,53 @@ func executeCommand(args map[string]string) []byte { logger.Error(msg) return []byte(msg) } - // Use chain execution for pipe/chaining support result := ExecChain(commandStr) return []byte(result) } -// handleCdCommand handles the cd command to update FilePickerDir -func handleCdCommand(args []string) []byte { - var targetDir string - if len(args) == 0 { - // cd with no args goes to home directory - homeDir, err := os.UserHomeDir() - if err != nil { - msg := "cd: cannot determine home directory: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - targetDir = homeDir - } else { - targetDir = args[0] - } - // Resolve relative paths against current FilePickerDir - if !filepath.IsAbs(targetDir) { - targetDir = filepath.Join(cfg.FilePickerDir, targetDir) - } - // Verify the directory exists - info, err := os.Stat(targetDir) - if err != nil { - msg := "cd: " + targetDir + ": " + err.Error() - logger.Error(msg) - return []byte(msg) - } - if !info.IsDir() { - msg := "cd: " + targetDir + ": not a directory" - logger.Error(msg) - return []byte(msg) - } - - // Update FilePickerDir - absDir, err := filepath.Abs(targetDir) - if err != nil { - msg := "cd: failed to resolve path: " + err.Error() - logger.Error(msg) - return []byte(msg) - } - cfg.FilePickerDir = absDir - msg := "FilePickerDir changed to: " + absDir - return []byte(msg) -} +// // handleCdCommand handles the cd command to update FilePickerDir +// func handleCdCommand(args []string) []byte { +// var targetDir string +// if len(args) == 0 { +// // cd with no args goes to home directory +// homeDir, err := os.UserHomeDir() +// if err != nil { +// msg := "cd: cannot determine home directory: " + err.Error() +// logger.Error(msg) +// return []byte(msg) +// } +// targetDir = homeDir +// } else { +// targetDir = args[0] +// } +// // Resolve relative paths against current FilePickerDir +// if !filepath.IsAbs(targetDir) { +// targetDir = filepath.Join(cfg.FilePickerDir, targetDir) +// } +// // Verify the directory exists +// info, err := os.Stat(targetDir) +// if err != nil { +// msg := "cd: " + targetDir + ": " + err.Error() +// logger.Error(msg) +// return []byte(msg) +// } +// if !info.IsDir() { +// msg := "cd: " + targetDir + ": not a directory" +// logger.Error(msg) +// return []byte(msg) +// } +// // Update FilePickerDir +// absDir, err := filepath.Abs(targetDir) +// if err != nil { +// msg := "cd: failed to resolve path: " + err.Error() +// logger.Error(msg) +// return []byte(msg) +// } +// cfg.FilePickerDir = absDir +// msg := "FilePickerDir changed to: " + absDir +// return []byte(msg) +// } // Helper functions for command execution // Todo structure @@ -1405,8 +1269,8 @@ var FnMap = map[string]fnSig{ "view_img": viewImgTool, "help": helpTool, // Unified run command - "run": runCmd, - // "summarize_chat": summarizeChat, + "run": runCmd, + "summarize_chat": summarizeChat, } func removeWindowToolsFromBaseTools() { @@ -1427,476 +1291,485 @@ func removeWindowToolsFromBaseTools() { delete(FnMap, "capture_window_and_view") } -func removePlaywrightToolsFromBaseTools() { - playwrightToolNames := map[string]bool{ - "pw_start": true, - "pw_stop": true, - "pw_is_running": true, - "pw_navigate": true, - "pw_click": true, - "pw_click_at": true, - "pw_fill": true, - "pw_extract_text": true, - "pw_screenshot": true, - "pw_screenshot_and_view": true, - "pw_wait_for_selector": true, - "pw_drag": true, - } - var filtered []models.Tool - for _, tool := range BaseTools { - if !playwrightToolNames[tool.Function.Name] { - filtered = append(filtered, tool) - } +func summarizeChat(args map[string]string) []byte { + data, err := json.Marshal(args) + if err != nil { + return []byte("error: failed to marshal arguments") } - BaseTools = filtered - delete(FnMap, "pw_start") - delete(FnMap, "pw_stop") - delete(FnMap, "pw_is_running") - delete(FnMap, "pw_navigate") - delete(FnMap, "pw_click") - delete(FnMap, "pw_click_at") - delete(FnMap, "pw_fill") - delete(FnMap, "pw_extract_text") - delete(FnMap, "pw_screenshot") - delete(FnMap, "pw_screenshot_and_view") - delete(FnMap, "pw_wait_for_selector") - delete(FnMap, "pw_drag") + return []byte(data) } -func (t *Tools) RegisterWindowTools(modelHasVision bool) { - removeWindowToolsFromBaseTools() - if t.WindowToolsAvailable { - FnMap["list_windows"] = listWindows - FnMap["capture_window"] = captureWindow - windowTools := []models.Tool{ - { - Type: "function", - Function: models.ToolFunc{ - Name: "list_windows", - Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{}, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "capture_window", - Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"window"}, - Properties: map[string]models.ToolArgProps{ - "window": models.ToolArgProps{ - Type: "string", - Description: "window ID or window name (partial match)", - }, - }, - }, - }, - }, - } - if modelHasVision { - FnMap["capture_window_and_view"] = captureWindowAndView - windowTools = append(windowTools, models.Tool{ - Type: "function", - Function: models.ToolFunc{ - Name: "capture_window_and_view", - Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"window"}, - Properties: map[string]models.ToolArgProps{ - "window": models.ToolArgProps{ - Type: "string", - Description: "window ID or window name (partial match)", - }, - }, - }, - }, - }) - } - BaseTools = append(BaseTools, windowTools...) - ToolSysMsg += windowToolSysMsg - } -} +// func removePlaywrightToolsFromBaseTools() { +// playwrightToolNames := map[string]bool{ +// "pw_start": true, +// "pw_stop": true, +// "pw_is_running": true, +// "pw_navigate": true, +// "pw_click": true, +// "pw_click_at": true, +// "pw_fill": true, +// "pw_extract_text": true, +// "pw_screenshot": true, +// "pw_screenshot_and_view": true, +// "pw_wait_for_selector": true, +// "pw_drag": true, +// } +// var filtered []models.Tool +// for _, tool := range BaseTools { +// if !playwrightToolNames[tool.Function.Name] { +// filtered = append(filtered, tool) +// } +// } +// BaseTools = filtered +// delete(FnMap, "pw_start") +// delete(FnMap, "pw_stop") +// delete(FnMap, "pw_is_running") +// delete(FnMap, "pw_navigate") +// delete(FnMap, "pw_click") +// delete(FnMap, "pw_click_at") +// delete(FnMap, "pw_fill") +// delete(FnMap, "pw_extract_text") +// delete(FnMap, "pw_screenshot") +// delete(FnMap, "pw_screenshot_and_view") +// delete(FnMap, "pw_wait_for_selector") +// delete(FnMap, "pw_drag") +// } -var browserAgentSysPrompt = `You are an autonomous browser automation agent. Your goal is to complete the user's task by intelligently using browser automation - -Important: The browser may already be running from a previous task! Always check pw_is_running first before starting a new browser. - -Available tools: -- pw_start: Start browser (only if not already running) -- pw_stop: Stop browser (only when you're truly done and browser is no longer needed) -- pw_is_running: Check if browser is running -- pw_navigate: Go to a URL -- pw_click: Click an element by CSS selector -- pw_fill: Type text into an input -- pw_extract_text: Get text from page/element -- pw_screenshot: Take a screenshot (returns file path) -- pw_screenshot_and_view: Take screenshot with image for viewing -- pw_wait_for_selector: Wait for element to appear -- pw_drag: Drag mouse from one point to another -- pw_click_at: Click at X,Y coordinates -- pw_get_html: Get HTML content -- pw_get_dom: Get structured DOM tree -- pw_search_elements: Search for elements by text or selector - -Workflow: -1. First, check if browser is already running (pw_is_running) -2. Only start browser if not already running (pw_start) -3. Navigate to required pages (pw_navigate) -4. Interact with elements as needed (click, fill, etc.) -5. Extract information or take screenshots as requested -6. IMPORTANT: Do NOT stop the browser when done! Leave it running so the user can continue interacting with the page in subsequent requests. - -Always provide clear feedback about what you're doing and what you found.` - -func runBrowserAgent(args map[string]string) []byte { - task, ok := args["task"] - if !ok || task == "" { - return []byte(`{"error": "task argument is required"}`) - } - client := getWebAgentClient() - pwAgent := agent.NewPWAgent(client, browserAgentSysPrompt) - pwAgent.SetTools(agent.GetPWTools()) - return pwAgent.ProcessTask(task) -} +// func (t *Tools) RegisterWindowTools(modelHasVision bool) { +// removeWindowToolsFromBaseTools() +// if t.WindowToolsAvailable { +// FnMap["list_windows"] = listWindows +// FnMap["capture_window"] = captureWindow +// windowTools := []models.Tool{ +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "list_windows", +// Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{}, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "capture_window", +// Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"window"}, +// Properties: map[string]models.ToolArgProps{ +// "window": models.ToolArgProps{ +// Type: "string", +// Description: "window ID or window name (partial match)", +// }, +// }, +// }, +// }, +// }, +// } +// if modelHasVision { +// FnMap["capture_window_and_view"] = captureWindowAndView +// windowTools = append(windowTools, models.Tool{ +// Type: "function", +// Function: models.ToolFunc{ +// Name: "capture_window_and_view", +// Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"window"}, +// Properties: map[string]models.ToolArgProps{ +// "window": models.ToolArgProps{ +// Type: "string", +// Description: "window ID or window name (partial match)", +// }, +// }, +// }, +// }, +// }) +// } +// BaseTools = append(BaseTools, windowTools...) +// ToolSysMsg += windowToolSysMsg +// } +// } -func registerPlaywrightTools() { - removePlaywrightToolsFromBaseTools() - if cfg != nil && cfg.PlaywrightEnabled { - FnMap["pw_start"] = pwStart - FnMap["pw_stop"] = pwStop - FnMap["pw_is_running"] = pwIsRunning - FnMap["pw_navigate"] = pwNavigate - FnMap["pw_click"] = pwClick - FnMap["pw_click_at"] = pwClickAt - FnMap["pw_fill"] = pwFill - FnMap["pw_extract_text"] = pwExtractText - FnMap["pw_screenshot"] = pwScreenshot - FnMap["pw_screenshot_and_view"] = pwScreenshotAndView - FnMap["pw_wait_for_selector"] = pwWaitForSelector - FnMap["pw_drag"] = pwDrag - FnMap["pw_get_html"] = pwGetHTML - FnMap["pw_get_dom"] = pwGetDOM - FnMap["pw_search_elements"] = pwSearchElements - playwrightTools := []models.Tool{ - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_start", - Description: "Start a Playwright browser instance. Call this first before using other pw_ Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{}, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_stop", - Description: "Stop the Playwright browser instance. Call when done with browser automation.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{}, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_is_running", - Description: "Check if Playwright browser is currently running.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{}, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_navigate", - Description: "Navigate to a URL in the browser.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"url"}, - Properties: map[string]models.ToolArgProps{ - "url": models.ToolArgProps{ - Type: "string", - Description: "URL to navigate to", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_click", - Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"selector"}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector for the element to click", - }, - "index": models.ToolArgProps{ - Type: "string", - Description: "optional index for multiple matches (default 0)", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_fill", - Description: "Fill an input field with text using CSS selector.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"selector", "text"}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector for the input element", - }, - "text": models.ToolArgProps{ - Type: "string", - Description: "text to fill into the input", - }, - "index": models.ToolArgProps{ - Type: "string", - Description: "optional index for multiple matches (default 0)", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_extract_text", - Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"selector"}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector (use 'body' for all page text)", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_screenshot", - Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "optional CSS selector for element to screenshot", - }, - "full_page": models.ToolArgProps{ - Type: "string", - Description: "optional: 'true' to capture full page (default false)", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_screenshot_and_view", - Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "optional CSS selector for element to screenshot", - }, - "full_page": models.ToolArgProps{ - Type: "string", - Description: "optional: 'true' to capture full page (default false)", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_wait_for_selector", - Description: "Wait for an element to appear on the page.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"selector"}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector to wait for", - }, - "timeout": models.ToolArgProps{ - Type: "string", - Description: "optional timeout in ms (default 30000)", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_drag", - Description: "Drag the mouse from one point to another.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"x1", "y1", "x2", "y2"}, - Properties: map[string]models.ToolArgProps{ - "x1": models.ToolArgProps{ - Type: "string", - Description: "starting X coordinate", - }, - "y1": models.ToolArgProps{ - Type: "string", - Description: "starting Y coordinate", - }, - "x2": models.ToolArgProps{ - Type: "string", - Description: "ending X coordinate", - }, - "y2": models.ToolArgProps{ - Type: "string", - Description: "ending Y coordinate", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_get_html", - Description: "Get the HTML content of the page or a specific element.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "optional CSS selector (default: body)", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_get_dom", - Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "selector": models.ToolArgProps{ - Type: "string", - Description: "optional CSS selector (default: body)", - }, - }, - }, - }, - }, - { - Type: "function", - Function: models.ToolFunc{ - Name: "pw_search_elements", - Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{}, - Properties: map[string]models.ToolArgProps{ - "text": models.ToolArgProps{ - Type: "string", - Description: "text to search for in elements", - }, - "selector": models.ToolArgProps{ - Type: "string", - Description: "CSS selector to search for", - }, - }, - }, - }, - }, - } - BaseTools = append(BaseTools, playwrightTools...) - ToolSysMsg += browserToolSysMsg - agent.RegisterPWTool("pw_start", pwStart) - agent.RegisterPWTool("pw_stop", pwStop) - agent.RegisterPWTool("pw_is_running", pwIsRunning) - agent.RegisterPWTool("pw_navigate", pwNavigate) - agent.RegisterPWTool("pw_click", pwClick) - agent.RegisterPWTool("pw_click_at", pwClickAt) - agent.RegisterPWTool("pw_fill", pwFill) - agent.RegisterPWTool("pw_extract_text", pwExtractText) - agent.RegisterPWTool("pw_screenshot", pwScreenshot) - agent.RegisterPWTool("pw_screenshot_and_view", pwScreenshotAndView) - agent.RegisterPWTool("pw_wait_for_selector", pwWaitForSelector) - agent.RegisterPWTool("pw_drag", pwDrag) - agent.RegisterPWTool("pw_get_html", pwGetHTML) - agent.RegisterPWTool("pw_get_dom", pwGetDOM) - agent.RegisterPWTool("pw_search_elements", pwSearchElements) - browserAgentTool := []models.Tool{ - { - Type: "function", - Function: models.ToolFunc{ - Name: "browser_agent", - Description: "Autonomous browser automation agent. Use for complex multi-step browser tasks like 'go to website, login, and take screenshot'. The agent will plan and execute steps automatically using browser ", - Parameters: models.ToolFuncParams{ - Type: "object", - Required: []string{"task"}, - Properties: map[string]models.ToolArgProps{ - "task": {Type: "string", Description: "The task to accomplish, e.g., 'go to github.com and take a screenshot of the homepage'"}, - }, - }, - }, - }, - } - BaseTools = append(BaseTools, browserAgentTool...) - FnMap["browser_agent"] = runBrowserAgent - } -} +// for pw agentA +// var browserAgentSysPrompt = `You are an autonomous browser automation agent. Your goal is to complete the user's task by intelligently using browser automation + +// Important: The browser may already be running from a previous task! Always check pw_is_running first before starting a new browser. + +// Available tools: +// - pw_start: Start browser (only if not already running) +// - pw_stop: Stop browser (only when you're truly done and browser is no longer needed) +// - pw_is_running: Check if browser is running +// - pw_navigate: Go to a URL +// - pw_click: Click an element by CSS selector +// - pw_fill: Type text into an input +// - pw_extract_text: Get text from page/element +// - pw_screenshot: Take a screenshot (returns file path) +// - pw_screenshot_and_view: Take screenshot with image for viewing +// - pw_wait_for_selector: Wait for element to appear +// - pw_drag: Drag mouse from one point to another +// - pw_click_at: Click at X,Y coordinates +// - pw_get_html: Get HTML content +// - pw_get_dom: Get structured DOM tree +// - pw_search_elements: Search for elements by text or selector + +// Workflow: +// 1. First, check if browser is already running (pw_is_running) +// 2. Only start browser if not already running (pw_start) +// 3. Navigate to required pages (pw_navigate) +// 4. Interact with elements as needed (click, fill, etc.) +// 5. Extract information or take screenshots as requested +// 6. IMPORTANT: Do NOT stop the browser when done! Leave it running so the user can continue interacting with the page in subsequent requests. + +// Always provide clear feedback about what you're doing and what you found.` + +// func (t *Tools) runBrowserAgent(args map[string]string) []byte { +// task, ok := args["task"] +// if !ok || task == "" { +// return []byte(`{"error": "task argument is required"}`) +// } +// client := t.GetWebAgentClient() +// pwAgent := agent.NewPWAgent(client, browserAgentSysPrompt) +// pwAgent.SetTools(agent.GetPWTools()) +// return pwAgent.ProcessTask(task) +// } + +// func registerPlaywrightTools() { +// removePlaywrightToolsFromBaseTools() +// if cfg != nil && cfg.PlaywrightEnabled { +// FnMap["pw_start"] = pwStart +// FnMap["pw_stop"] = pwStop +// FnMap["pw_is_running"] = pwIsRunning +// FnMap["pw_navigate"] = pwNavigate +// FnMap["pw_click"] = pwClick +// FnMap["pw_click_at"] = pwClickAt +// FnMap["pw_fill"] = pwFill +// FnMap["pw_extract_text"] = pwExtractText +// FnMap["pw_screenshot"] = pwScreenshot +// FnMap["pw_screenshot_and_view"] = pwScreenshotAndView +// FnMap["pw_wait_for_selector"] = pwWaitForSelector +// FnMap["pw_drag"] = pwDrag +// FnMap["pw_get_html"] = pwGetHTML +// FnMap["pw_get_dom"] = pwGetDOM +// FnMap["pw_search_elements"] = pwSearchElements +// playwrightTools := []models.Tool{ +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_start", +// Description: "Start a Playwright browser instance. Call this first before using other pw_ Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{}, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_stop", +// Description: "Stop the Playwright browser instance. Call when done with browser automation.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{}, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_is_running", +// Description: "Check if Playwright browser is currently running.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{}, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_navigate", +// Description: "Navigate to a URL in the browser.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"url"}, +// Properties: map[string]models.ToolArgProps{ +// "url": models.ToolArgProps{ +// Type: "string", +// Description: "URL to navigate to", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_click", +// Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"selector"}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector for the element to click", +// }, +// "index": models.ToolArgProps{ +// Type: "string", +// Description: "optional index for multiple matches (default 0)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_fill", +// Description: "Fill an input field with text using CSS selector.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"selector", "text"}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector for the input element", +// }, +// "text": models.ToolArgProps{ +// Type: "string", +// Description: "text to fill into the input", +// }, +// "index": models.ToolArgProps{ +// Type: "string", +// Description: "optional index for multiple matches (default 0)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_extract_text", +// Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"selector"}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector (use 'body' for all page text)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_screenshot", +// Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "optional CSS selector for element to screenshot", +// }, +// "full_page": models.ToolArgProps{ +// Type: "string", +// Description: "optional: 'true' to capture full page (default false)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_screenshot_and_view", +// Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "optional CSS selector for element to screenshot", +// }, +// "full_page": models.ToolArgProps{ +// Type: "string", +// Description: "optional: 'true' to capture full page (default false)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_wait_for_selector", +// Description: "Wait for an element to appear on the page.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"selector"}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector to wait for", +// }, +// "timeout": models.ToolArgProps{ +// Type: "string", +// Description: "optional timeout in ms (default 30000)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_drag", +// Description: "Drag the mouse from one point to another.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"x1", "y1", "x2", "y2"}, +// Properties: map[string]models.ToolArgProps{ +// "x1": models.ToolArgProps{ +// Type: "string", +// Description: "starting X coordinate", +// }, +// "y1": models.ToolArgProps{ +// Type: "string", +// Description: "starting Y coordinate", +// }, +// "x2": models.ToolArgProps{ +// Type: "string", +// Description: "ending X coordinate", +// }, +// "y2": models.ToolArgProps{ +// Type: "string", +// Description: "ending Y coordinate", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_get_html", +// Description: "Get the HTML content of the page or a specific element.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "optional CSS selector (default: body)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_get_dom", +// Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "optional CSS selector (default: body)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_search_elements", +// Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "text": models.ToolArgProps{ +// Type: "string", +// Description: "text to search for in elements", +// }, +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector to search for", +// }, +// }, +// }, +// }, +// }, +// } +// BaseTools = append(BaseTools, playwrightTools...) +// ToolSysMsg += browserToolSysMsg +// agent.RegisterPWTool("pw_start", pwStart) +// agent.RegisterPWTool("pw_stop", pwStop) +// agent.RegisterPWTool("pw_is_running", pwIsRunning) +// agent.RegisterPWTool("pw_navigate", pwNavigate) +// agent.RegisterPWTool("pw_click", pwClick) +// agent.RegisterPWTool("pw_click_at", pwClickAt) +// agent.RegisterPWTool("pw_fill", pwFill) +// agent.RegisterPWTool("pw_extract_text", pwExtractText) +// agent.RegisterPWTool("pw_screenshot", pwScreenshot) +// agent.RegisterPWTool("pw_screenshot_and_view", pwScreenshotAndView) +// agent.RegisterPWTool("pw_wait_for_selector", pwWaitForSelector) +// agent.RegisterPWTool("pw_drag", pwDrag) +// agent.RegisterPWTool("pw_get_html", pwGetHTML) +// agent.RegisterPWTool("pw_get_dom", pwGetDOM) +// agent.RegisterPWTool("pw_search_elements", pwSearchElements) +// browserAgentTool := []models.Tool{ +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "browser_agent", +// Description: "Autonomous browser automation agent. Use for complex multi-step browser tasks like 'go to website, login, and take screenshot'. The agent will plan and execute steps automatically using browser ", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"task"}, +// Properties: map[string]models.ToolArgProps{ +// "task": {Type: "string", Description: "The task to accomplish, e.g., 'go to github.com and take a screenshot of the homepage'"}, +// }, +// }, +// }, +// }, +// } +// BaseTools = append(BaseTools, browserAgentTool...) +// FnMap["browser_agent"] = tooler.runBrowserAgent +// } +// } -func CallToolWithAgent(name string, args map[string]string) []byte { +func CallToolWithAgent(name string, args map[string]string) ([]byte, bool) { f, ok := FnMap[name] if !ok { - return []byte(fmt.Sprintf("tool %s not found", name)) + return []byte(fmt.Sprintf("tool %s not found", name)), false } raw := f(args) if a := agent.Get(name); a != nil { - return a.Process(args, raw) + return a.Process(args, raw), true } - return raw + return raw, true } // openai style def |
