diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/chain.go | 416 | ||||
| -rw-r--r-- | tools/fs.go | 755 | ||||
| -rw-r--r-- | tools/pw.go | 645 | ||||
| -rw-r--r-- | tools/tools.go | 1909 |
4 files changed, 3725 insertions, 0 deletions
diff --git a/tools/chain.go b/tools/chain.go new file mode 100644 index 0000000..381cc1a --- /dev/null +++ b/tools/chain.go @@ -0,0 +1,416 @@ +package tools + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" +) + +// Operator represents a chain operator between commands. +type Operator int + +const ( + OpNone Operator = iota + OpAnd // && + OpOr // || + OpSeq // ; + OpPipe // | +) + +// Segment is a single command in a chain. +type Segment struct { + Raw string + Op Operator // operator AFTER this segment +} + +// ParseChain splits a command string into segments by &&, ;, and |. +// Respects quoted strings (single and double quotes). +func ParseChain(input string) []Segment { + var segments []Segment + var current strings.Builder + runes := []rune(input) + n := len(runes) + for i := 0; i < n; i++ { + ch := runes[i] + // handle quotes + if ch == '\'' || ch == '"' { + quote := ch + current.WriteRune(ch) + i++ + for i < n && runes[i] != quote { + current.WriteRune(runes[i]) + i++ + } + if i < n { + current.WriteRune(runes[i]) + } + continue + } + // && + if ch == '&' && i+1 < n && runes[i+1] == '&' { + segments = append(segments, Segment{ + Raw: strings.TrimSpace(current.String()), + Op: OpAnd, + }) + current.Reset() + i++ // skip second & + continue + } + // ; + if ch == ';' { + segments = append(segments, Segment{ + Raw: strings.TrimSpace(current.String()), + Op: OpSeq, + }) + current.Reset() + continue + } + // || + if ch == '|' && i+1 < n && runes[i+1] == '|' { + segments = append(segments, Segment{ + Raw: strings.TrimSpace(current.String()), + Op: OpOr, + }) + current.Reset() + i++ // skip second | + continue + } + // | (single pipe) + if ch == '|' { + segments = append(segments, Segment{ + Raw: strings.TrimSpace(current.String()), + Op: OpPipe, + }) + current.Reset() + continue + } + current.WriteRune(ch) + } + // last segment + last := strings.TrimSpace(current.String()) + if last != "" { + segments = append(segments, Segment{Raw: last, Op: OpNone}) + } + return segments +} + +// ExecChain executes a command string with pipe/chaining support. +// Returns the combined output of all commands. +func ExecChain(command string) string { + segments := ParseChain(command) + if len(segments) == 0 { + return "[error] empty command" + } + var collected []string + var lastOutput string + var lastErr error + pipeInput := "" + for i, seg := range segments { + if i > 0 { + prevOp := segments[i-1].Op + // && semantics: skip if previous failed + if prevOp == OpAnd && lastErr != nil { + continue + } + // || semantics: skip if previous succeeded + if prevOp == OpOr && lastErr == nil { + continue + } + } + // determine stdin for this segment + segStdin := "" + if i == 0 { + segStdin = pipeInput + } else if segments[i-1].Op == OpPipe { + segStdin = lastOutput + } + lastOutput, lastErr = execSingle(seg.Raw, segStdin) + // pipe: output flows to next command's stdin + // && or ;: collect output + if i < len(segments)-1 && seg.Op == OpPipe { + continue + } + if lastOutput != "" { + collected = append(collected, lastOutput) + } + } + return strings.Join(collected, "\n") +} + +// execSingle executes a single command (with arguments) and returns output and error. +func execSingle(command, stdin string) (string, error) { + parts := tokenize(command) + if len(parts) == 0 { + return "", errors.New("empty command") + } + name := parts[0] + args := parts[1:] + // Check if it's a built-in Go command + if result, isBuiltin := execBuiltin(name, args, stdin); isBuiltin { + return result, nil + } + // Otherwise execute as system command + cmd := exec.Command(name, args...) + if stdin != "" { + cmd.Stdin = strings.NewReader(stdin) + } + output, err := cmd.CombinedOutput() + if err != nil { + return string(output), err + } + return string(output), nil +} + +// tokenize splits a command string by whitespace, respecting quotes. +func tokenize(input string) []string { + var tokens []string + var current strings.Builder + inQuote := false + var quoteChar rune + for _, ch := range input { + if inQuote { + if ch == quoteChar { + inQuote = false + } else { + current.WriteRune(ch) + } + continue + } + if ch == '\'' || ch == '"' { + inQuote = true + quoteChar = ch + continue + } + if ch == ' ' || ch == '\t' { + if current.Len() > 0 { + tokens = append(tokens, current.String()) + current.Reset() + } + continue + } + current.WriteRune(ch) + } + if current.Len() > 0 { + tokens = append(tokens, current.String()) + } + return tokens +} + +// execBuiltin executes a built-in command if it exists. +// Returns (result, true) if it was a built-in (even if result is empty). +// Returns ("", false) if it's not a built-in command. +func execBuiltin(name string, args []string, stdin string) (string, bool) { + switch name { + case "echo": + if stdin != "" { + return stdin, true + } + return strings.Join(args, " "), true + case "time": + return "2006-01-02 15:04:05 MST", true + case "cat": + if len(args) == 0 { + if stdin != "" { + return stdin, true + } + return "", true + } + path := args[0] + abs := path + if !filepath.IsAbs(path) { + abs = filepath.Join(cfg.FilePickerDir, path) + } + data, err := os.ReadFile(abs) + if err != nil { + return fmt.Sprintf("[error] cat: %v", err), true + } + return string(data), true + case "pwd": + return cfg.FilePickerDir, true + case "cd": + if len(args) == 0 { + return "[error] usage: cd <dir>", true + } + dir := args[0] + // Resolve relative to cfg.FilePickerDir + abs := dir + if !filepath.IsAbs(dir) { + abs = filepath.Join(cfg.FilePickerDir, dir) + } + abs = filepath.Clean(abs) + info, err := os.Stat(abs) + if err != nil { + return fmt.Sprintf("[error] cd: %v", err), true + } + if !info.IsDir() { + return "[error] cd: not a directory: " + dir, true + } + cfg.FilePickerDir = abs + return "Changed directory to: " + cfg.FilePickerDir, true + case "mkdir": + if len(args) == 0 { + return "[error] usage: mkdir [-p] <dir>", true + } + createParents := false + var dirPath string + for _, a := range args { + if a == "-p" || a == "--parents" { + createParents = true + } else if dirPath == "" { + dirPath = a + } + } + if dirPath == "" { + return "[error] usage: mkdir [-p] <dir>", true + } + abs := dirPath + if !filepath.IsAbs(dirPath) { + abs = filepath.Join(cfg.FilePickerDir, dirPath) + } + abs = filepath.Clean(abs) + var mkdirFunc func(string, os.FileMode) error + if createParents { + mkdirFunc = os.MkdirAll + } else { + mkdirFunc = os.Mkdir + } + if err := mkdirFunc(abs, 0o755); err != nil { + return fmt.Sprintf("[error] mkdir: %v", err), true + } + if createParents { + return "Created " + dirPath + " (with parents)", true + } + return "Created " + dirPath, true + case "ls": + dir := "." + for _, a := range args { + if !strings.HasPrefix(a, "-") { + dir = a + break + } + } + abs := dir + if !filepath.IsAbs(dir) { + abs = filepath.Join(cfg.FilePickerDir, dir) + } + entries, err := os.ReadDir(abs) + if err != nil { + return fmt.Sprintf("[error] ls: %v", err), true + } + var out strings.Builder + for _, e := range entries { + info, _ := e.Info() + switch { + case e.IsDir(): + fmt.Fprintf(&out, "d %-8s %s/\n", "-", e.Name()) + case info != nil: + size := info.Size() + sizeStr := strconv.FormatInt(size, 10) + if size > 1024 { + sizeStr = fmt.Sprintf("%.1fKB", float64(size)/1024) + } + fmt.Fprintf(&out, "f %-8s %s\n", sizeStr, e.Name()) + default: + fmt.Fprintf(&out, "f %-8s %s\n", "?", e.Name()) + } + } + if out.Len() == 0 { + return "(empty directory)", true + } + return strings.TrimRight(out.String(), "\n"), true + case "go": + // Allow all go subcommands + if len(args) == 0 { + return "[error] usage: go <subcommand> [options]", true + } + cmd := exec.Command("go", args...) + cmd.Dir = cfg.FilePickerDir + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Sprintf("[error] go %s: %v\n%s", args[0], err, string(output)), true + } + return string(output), true + case "cp": + if len(args) < 2 { + return "[error] usage: cp <source> <dest>", true + } + src := args[0] + dst := args[1] + if !filepath.IsAbs(src) { + src = filepath.Join(cfg.FilePickerDir, src) + } + if !filepath.IsAbs(dst) { + dst = filepath.Join(cfg.FilePickerDir, dst) + } + data, err := os.ReadFile(src) + if err != nil { + return fmt.Sprintf("[error] cp: %v", err), true + } + err = os.WriteFile(dst, data, 0644) + if err != nil { + return fmt.Sprintf("[error] cp: %v", err), true + } + return "Copied " + src + " to " + dst, true + case "mv": + if len(args) < 2 { + return "[error] usage: mv <source> <dest>", true + } + src := args[0] + dst := args[1] + if !filepath.IsAbs(src) { + src = filepath.Join(cfg.FilePickerDir, src) + } + if !filepath.IsAbs(dst) { + dst = filepath.Join(cfg.FilePickerDir, dst) + } + err := os.Rename(src, dst) + if err != nil { + return fmt.Sprintf("[error] mv: %v", err), true + } + return "Moved " + src + " to " + dst, true + case "rm": + if len(args) == 0 { + return "[error] usage: rm [-r] <file>", true + } + recursive := false + var target string + for _, a := range args { + if a == "-r" || a == "-rf" || a == "-fr" || a == "-recursive" { + recursive = true + } else if target == "" { + target = a + } + } + if target == "" { + return "[error] usage: rm [-r] <file>", true + } + abs := target + if !filepath.IsAbs(target) { + abs = filepath.Join(cfg.FilePickerDir, target) + } + info, err := os.Stat(abs) + if err != nil { + return fmt.Sprintf("[error] rm: %v", err), true + } + if info.IsDir() { + if recursive { + err = os.RemoveAll(abs) + if err != nil { + return fmt.Sprintf("[error] rm: %v", err), true + } + return "Removed " + abs, true + } + return "[error] rm: is a directory (use -r)", true + } + err = os.Remove(abs) + if err != nil { + return fmt.Sprintf("[error] rm: %v", err), true + } + return "Removed " + abs, true + } + return "", false +} diff --git a/tools/fs.go b/tools/fs.go new file mode 100644 index 0000000..fb43084 --- /dev/null +++ b/tools/fs.go @@ -0,0 +1,755 @@ +package tools + +import ( + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "gf-lt/models" + "os" + "os/exec" + "path/filepath" + "sort" + "strconv" + "strings" + "time" +) + +var memoryStore MemoryStore +var agentRole string + +type MemoryStore interface { + Memorise(agent, topic, data string) (string, error) + Recall(agent, topic string) (string, error) + RecallTopics(agent string) ([]string, error) + Forget(agent, topic string) error +} + +func SetMemoryStore(store MemoryStore, role string) { + memoryStore = store + agentRole = role +} + +func SetFSRoot(dir string) { + if cfg == nil { + return + } + cfg.FilePickerDir = dir +} + +func GetFSRoot() string { + return cfg.FilePickerDir +} + +func SetFSCwd(dir string) error { + abs, err := filepath.Abs(dir) + if err != nil { + return err + } + info, err := os.Stat(abs) + if err != nil { + return err + } + if !info.IsDir() { + return fmt.Errorf("not a directory: %s", dir) + } + cfg.FilePickerDir = abs + return nil +} + +func resolvePath(rel string) (string, error) { + if cfg.FilePickerDir == "" { + return "", errors.New("fs root not set") + } + if filepath.IsAbs(rel) { + abs := filepath.Clean(rel) + if !strings.HasPrefix(abs, cfg.FilePickerDir+string(os.PathSeparator)) && abs != cfg.FilePickerDir { + return "", fmt.Errorf("path escapes fs root: %s", rel) + } + return abs, nil + } + abs := filepath.Join(cfg.FilePickerDir, rel) + abs = filepath.Clean(abs) + if !strings.HasPrefix(abs, cfg.FilePickerDir+string(os.PathSeparator)) && abs != cfg.FilePickerDir { + return "", fmt.Errorf("path escapes fs root: %s", rel) + } + return abs, nil +} + +func humanSize(n int64) string { + switch { + case n >= 1<<20: + return fmt.Sprintf("%.1fMB", float64(n)/float64(1<<20)) + case n >= 1<<10: + return fmt.Sprintf("%.1fKB", float64(n)/float64(1<<10)) + default: + return fmt.Sprintf("%dB", n) + } +} + +func IsImageFile(path string) bool { + ext := strings.ToLower(filepath.Ext(path)) + return ext == ".png" || ext == ".jpg" || ext == ".jpeg" || ext == ".gif" || ext == ".webp" || ext == ".svg" +} + +func FsLs(args []string, stdin string) string { + dir := "" + if len(args) > 0 { + dir = args[0] + } + abs, err := resolvePath(dir) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + entries, err := os.ReadDir(abs) + if err != nil { + return fmt.Sprintf("[error] ls: %v", err) + } + var out strings.Builder + for _, e := range entries { + info, _ := e.Info() + switch { + case e.IsDir(): + fmt.Fprintf(&out, "d %-8s %s/\n", "-", e.Name()) + case info != nil: + fmt.Fprintf(&out, "f %-8s %s\n", humanSize(info.Size()), e.Name()) + default: + fmt.Fprintf(&out, "f %-8s %s\n", "?", e.Name()) + } + } + if out.Len() == 0 { + return "(empty directory)" + } + return strings.TrimRight(out.String(), "\n") +} + +func FsCat(args []string, stdin string) string { + b64 := false + var path string + for _, a := range args { + if a == "-b" || a == "--base64" { + b64 = true + } else if path == "" { + path = a + } + } + if path == "" { + return "[error] usage: cat <path>" + } + abs, err := resolvePath(path) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + data, err := os.ReadFile(abs) + if err != nil { + return fmt.Sprintf("[error] cat: %v", err) + } + if b64 { + result := base64.StdEncoding.EncodeToString(data) + if IsImageFile(path) { + result += fmt.Sprintf("\n", abs) + } + return result + } + return string(data) +} + +func FsViewImg(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: view_img <image-path>" + } + path := args[0] + var abs string + if filepath.IsAbs(path) { + abs = path + } else { + var err error + abs, err = resolvePath(path) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + } + if _, err := os.Stat(abs); err != nil { + return fmt.Sprintf("[error] view_img: %v", err) + } + if !IsImageFile(path) { + return fmt.Sprintf("[error] not an image file: %s (use cat to read text files)", path) + } + dataURL, err := models.CreateImageURLFromPath(abs) + if err != nil { + return fmt.Sprintf("[error] view_img: %v", err) + } + result := models.MultimodalToolResp{ + Type: "multimodal_content", + Parts: []map[string]string{ + {"type": "text", "text": "Image: " + path}, + {"type": "image_url", "url": dataURL}, + }, + } + jsonResult, err := json.Marshal(result) + if err != nil { + return fmt.Sprintf("[error] view_img: %v", err) + } + return string(jsonResult) +} + +// FsSee is deprecated, use FsViewImg +func FsSee(args []string, stdin string) string { + return FsViewImg(args, stdin) +} + +func FsWrite(args []string, stdin string) string { + b64 := false + var path string + var contentParts []string + for _, a := range args { + switch a { + case "-b", "--base64": + b64 = true + default: + if path == "" { + path = a + } else { + contentParts = append(contentParts, a) + } + } + } + if path == "" { + return "[error] usage: write <path> [content] or pipe stdin" + } + abs, err := resolvePath(path) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil { + return fmt.Sprintf("[error] mkdir: %v", err) + } + var data []byte + if b64 { + src := stdin + if src == "" && len(contentParts) > 0 { + src = strings.Join(contentParts, " ") + } + src = strings.TrimSpace(src) + var err error + data, err = base64.StdEncoding.DecodeString(src) + if err != nil { + return fmt.Sprintf("[error] base64 decode: %v", err) + } + } else { + if len(contentParts) > 0 { + data = []byte(strings.Join(contentParts, " ")) + } else { + data = []byte(stdin) + } + } + if err := os.WriteFile(abs, data, 0o644); err != nil { + return fmt.Sprintf("[error] write: %v", err) + } + size := humanSize(int64(len(data))) + result := fmt.Sprintf("Written %s → %s", size, path) + if IsImageFile(path) { + result += fmt.Sprintf("\n", abs) + } + return result +} + +func FsStat(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: stat <path>" + } + abs, err := resolvePath(args[0]) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + info, err := os.Stat(abs) + if err != nil { + return fmt.Sprintf("[error] stat: %v", err) + } + mime := "application/octet-stream" + if IsImageFile(args[0]) { + ext := strings.ToLower(filepath.Ext(args[0])) + switch ext { + case ".png": + mime = "image/png" + case ".jpg", ".jpeg": + mime = "image/jpeg" + case ".gif": + mime = "image/gif" + case ".webp": + mime = "image/webp" + case ".svg": + mime = "image/svg+xml" + } + } + var out strings.Builder + fmt.Fprintf(&out, "File: %s\n", args[0]) + fmt.Fprintf(&out, "Size: %s (%d bytes)\n", humanSize(info.Size()), info.Size()) + fmt.Fprintf(&out, "Type: %s\n", mime) + fmt.Fprintf(&out, "Modified: %s\n", info.ModTime().Format(time.RFC3339)) + if info.IsDir() { + fmt.Fprintf(&out, "Kind: directory\n") + } + return strings.TrimRight(out.String(), "\n") +} + +func FsRm(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: rm <path>" + } + abs, err := resolvePath(args[0]) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + if err := os.RemoveAll(abs); err != nil { + return fmt.Sprintf("[error] rm: %v", err) + } + return "Removed " + args[0] +} + +func FsCp(args []string, stdin string) string { + if len(args) < 2 { + return "[error] usage: cp <src> <dst>" + } + srcAbs, err := resolvePath(args[0]) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + dstAbs, err := resolvePath(args[1]) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + data, err := os.ReadFile(srcAbs) + if err != nil { + return fmt.Sprintf("[error] cp read: %v", err) + } + if err := os.MkdirAll(filepath.Dir(dstAbs), 0o755); err != nil { + return fmt.Sprintf("[error] cp mkdir: %v", err) + } + if err := os.WriteFile(dstAbs, data, 0o644); err != nil { + return fmt.Sprintf("[error] cp write: %v", err) + } + return fmt.Sprintf("Copied %s → %s (%s)", args[0], args[1], humanSize(int64(len(data)))) +} + +func FsMv(args []string, stdin string) string { + if len(args) < 2 { + return "[error] usage: mv <src> <dst>" + } + srcAbs, err := resolvePath(args[0]) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + dstAbs, err := resolvePath(args[1]) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + if err := os.MkdirAll(filepath.Dir(dstAbs), 0o755); err != nil { + return fmt.Sprintf("[error] mv mkdir: %v", err) + } + if err := os.Rename(srcAbs, dstAbs); err != nil { + return fmt.Sprintf("[error] mv: %v", err) + } + return fmt.Sprintf("Moved %s → %s", args[0], args[1]) +} + +func FsMkdir(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: mkdir [-p] <dir>" + } + createParents := false + var dirPath string + for _, a := range args { + if a == "-p" || a == "--parents" { + createParents = true + } else if dirPath == "" { + dirPath = a + } + } + if dirPath == "" { + return "[error] usage: mkdir [-p] <dir>" + } + abs, err := resolvePath(dirPath) + if err != nil { + return fmt.Sprintf("[error] %v", err) + } + var mkdirFunc func(string, os.FileMode) error + if createParents { + mkdirFunc = os.MkdirAll + } else { + mkdirFunc = os.Mkdir + } + if err := mkdirFunc(abs, 0o755); err != nil { + return fmt.Sprintf("[error] mkdir: %v", err) + } + if createParents { + return "Created " + dirPath + " (with parents)" + } + return "Created " + dirPath +} + +// Text processing commands + +func FsEcho(args []string, stdin string) string { + if stdin != "" { + return stdin + } + return strings.Join(args, " ") +} + +func FsTime(args []string, stdin string) string { + return time.Now().Format("2006-01-02 15:04:05 MST") +} + +func FsGrep(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: grep [-i] [-v] [-c] <pattern>" + } + ignoreCase := false + invert := false + countOnly := false + var pattern string + for _, a := range args { + switch a { + case "-i": + ignoreCase = true + case "-v": + invert = true + case "-c": + countOnly = true + default: + pattern = a + } + } + if pattern == "" { + return "[error] pattern required" + } + if ignoreCase { + pattern = strings.ToLower(pattern) + } + lines := strings.Split(stdin, "\n") + var matched []string + for _, line := range lines { + haystack := line + if ignoreCase { + haystack = strings.ToLower(line) + } + match := strings.Contains(haystack, pattern) + if invert { + match = !match + } + if match { + matched = append(matched, line) + } + } + if countOnly { + return strconv.Itoa(len(matched)) + } + return strings.Join(matched, "\n") +} + +func FsHead(args []string, stdin string) string { + n := 10 + for i, a := range args { + if a == "-n" && i+1 < len(args) { + if parsed, err := strconv.Atoi(args[i+1]); err == nil { + n = parsed + } + } else if strings.HasPrefix(a, "-") { + continue + } else if parsed, err := strconv.Atoi(a); err == nil { + n = parsed + } + } + lines := strings.Split(stdin, "\n") + if n > 0 && len(lines) > n { + lines = lines[:n] + } + return strings.Join(lines, "\n") +} + +func FsTail(args []string, stdin string) string { + n := 10 + for i, a := range args { + if a == "-n" && i+1 < len(args) { + if parsed, err := strconv.Atoi(args[i+1]); err == nil { + n = parsed + } + } else if strings.HasPrefix(a, "-") { + continue + } else if parsed, err := strconv.Atoi(a); err == nil { + n = parsed + } + } + lines := strings.Split(stdin, "\n") + if n > 0 && len(lines) > n { + lines = lines[len(lines)-n:] + } + return strings.Join(lines, "\n") +} + +func FsWc(args []string, stdin string) string { + lines := len(strings.Split(stdin, "\n")) + words := len(strings.Fields(stdin)) + chars := len(stdin) + if len(args) > 0 { + switch args[0] { + case "-l": + return strconv.Itoa(lines) + case "-w": + return strconv.Itoa(words) + case "-c": + return strconv.Itoa(chars) + } + } + return fmt.Sprintf("%d lines, %d words, %d chars", lines, words, chars) +} + +func FsSort(args []string, stdin string) string { + lines := strings.Split(stdin, "\n") + reverse := false + numeric := false + for _, a := range args { + switch a { + case "-r": + reverse = true + case "-n": + numeric = true + } + } + sortFunc := func(i, j int) bool { + if numeric { + ni, _ := strconv.Atoi(lines[i]) + nj, _ := strconv.Atoi(lines[j]) + if reverse { + return ni > nj + } + return ni < nj + } + if reverse { + return lines[i] > lines[j] + } + return lines[i] < lines[j] + } + sort.Slice(lines, sortFunc) + return strings.Join(lines, "\n") +} + +func FsUniq(args []string, stdin string) string { + lines := strings.Split(stdin, "\n") + showCount := false + for _, a := range args { + if a == "-c" { + showCount = true + } + } + var result []string + var prev string + first := true + count := 0 + for _, line := range lines { + if first || line != prev { + if !first && showCount { + result = append(result, fmt.Sprintf("%d %s", count, prev)) + } else if !first { + result = append(result, prev) + } + count = 1 + prev = line + first = false + } else { + count++ + } + } + if !first { + if showCount { + result = append(result, fmt.Sprintf("%d %s", count, prev)) + } else { + result = append(result, prev) + } + } + return strings.Join(result, "\n") +} + +var allowedGitSubcommands = map[string]bool{ + "status": true, + "log": true, + "diff": true, + "show": true, + "branch": true, + "reflog": true, + "rev-parse": true, + "shortlog": true, + "describe": true, + "rev-list": true, +} + +func FsGit(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: git <subcommand> [options]" + } + subcmd := args[0] + if !allowedGitSubcommands[subcmd] { + return fmt.Sprintf("[error] git: '%s' is not an allowed git command. Allowed: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe, rev-list", subcmd) + } + abs, err := resolvePath(".") + if err != nil { + return fmt.Sprintf("[error] git: %v", err) + } + // Pass all args to git (first arg is subcommand, rest are options) + cmd := exec.Command("git", args...) + cmd.Dir = abs + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Sprintf("[error] git %s: %v\n%s", subcmd, err, string(output)) + } + return string(output) +} + +func FsPwd(args []string, stdin string) string { + return cfg.FilePickerDir +} + +func FsCd(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: cd <dir>" + } + dir := args[0] + abs, err := resolvePath(dir) + if err != nil { + return fmt.Sprintf("[error] cd: %v", err) + } + info, err := os.Stat(abs) + if err != nil { + return fmt.Sprintf("[error] cd: %v", err) + } + if !info.IsDir() { + return "[error] cd: not a directory: " + dir + } + cfg.FilePickerDir = abs + return "Changed directory to: " + cfg.FilePickerDir +} + +func FsSed(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: sed 's/old/new/[g]' [file]" + } + inPlace := false + var filePath string + var pattern string + for _, a := range args { + switch a { + case "-i", "--in-place": + inPlace = true + default: + if strings.HasPrefix(a, "s") && len(a) > 1 { + pattern = a + } else if filePath == "" && !strings.HasPrefix(a, "-") { + filePath = a + } + } + } + if pattern == "" { + return "[error] usage: sed 's/old/new/[g]' [file]" + } + // Parse pattern: s/old/new/flags + parts := strings.Split(pattern[1:], "/") + if len(parts) < 2 { + return "[error] invalid sed pattern. Use: s/old/new/[g]" + } + oldStr := parts[0] + newStr := parts[1] + global := len(parts) >= 3 && strings.Contains(parts[2], "g") + var content string + switch { + case filePath != "" && stdin == "": + abs, err := resolvePath(filePath) + if err != nil { + return fmt.Sprintf("[error] sed: %v", err) + } + data, err := os.ReadFile(abs) + if err != nil { + return fmt.Sprintf("[error] sed: %v", err) + } + content = string(data) + case stdin != "": + content = stdin + default: + return "[error] sed: no input (use file path or pipe from stdin)" + } + // Apply sed replacement + if global { + content = strings.ReplaceAll(content, oldStr, newStr) + } else { + content = strings.Replace(content, oldStr, newStr, 1) + } + if inPlace && filePath != "" { + abs, err := resolvePath(filePath) + if err != nil { + return fmt.Sprintf("[error] sed: %v", err) + } + if err := os.WriteFile(abs, []byte(content), 0644); err != nil { + return fmt.Sprintf("[error] sed: %v", err) + } + return "Modified " + filePath + } + return content +} + +func FsMemory(args []string, stdin string) string { + if len(args) == 0 { + return "[error] usage: memory store <topic> <data> | memory get <topic> | memory list | memory forget <topic>" + } + if memoryStore == nil { + return "[error] memory store not initialized" + } + switch args[0] { + case "store": + if len(args) < 3 && stdin == "" { + return "[error] usage: memory store <topic> <data>" + } + topic := args[1] + var data string + if len(args) >= 3 { + data = strings.Join(args[2:], " ") + } else { + data = stdin + } + _, err := memoryStore.Memorise(agentRole, topic, data) + if err != nil { + return fmt.Sprintf("[error] failed to store: %v", err) + } + return "Stored under topic: " + topic + case "get": + if len(args) < 2 { + return "[error] usage: memory get <topic>" + } + topic := args[1] + data, err := memoryStore.Recall(agentRole, topic) + if err != nil { + return fmt.Sprintf("[error] failed to recall: %v", err) + } + return fmt.Sprintf("Topic: %s\n%s", topic, data) + case "list", "topics": + topics, err := memoryStore.RecallTopics(agentRole) + if err != nil { + return fmt.Sprintf("[error] failed to list topics: %v", err) + } + if len(topics) == 0 { + return "No topics stored." + } + return "Topics: " + strings.Join(topics, ", ") + case "forget", "delete": + if len(args) < 2 { + return "[error] usage: memory forget <topic>" + } + topic := args[1] + err := memoryStore.Forget(agentRole, topic) + if err != nil { + return fmt.Sprintf("[error] failed to forget: %v", err) + } + return "Deleted topic: " + topic + default: + return fmt.Sprintf("[error] unknown subcommand: %s. Use: store, get, list, topics, forget, delete", args[0]) + } +} diff --git a/tools/pw.go b/tools/pw.go new file mode 100644 index 0000000..05b1390 --- /dev/null +++ b/tools/pw.go @@ -0,0 +1,645 @@ +package tools + +import ( + "encoding/json" + "fmt" + "gf-lt/models" + "os" + "strconv" + "strings" + "sync" + + "github.com/playwright-community/playwright-go" +) + +var ( + pw *playwright.Playwright + browser playwright.Browser + browserStarted bool + browserStartMu sync.Mutex + page playwright.Page +) + +func PwShutDown() error { + if pw == nil { + return nil + } + pwStop(nil) + return pw.Stop() +} + +func InstallPW() error { + err := playwright.Install(&playwright.RunOptions{Verbose: false}) + if err != nil { + logger.Warn("playwright not available", "error", err) + return err + } + return nil +} + +func CheckPlaywright() error { + var err error + pw, err = playwright.Run() + if err != nil { + logger.Warn("playwright not available", "error", err) + return err + } + return nil +} + +func pwStart(args map[string]string) []byte { + browserStartMu.Lock() + defer browserStartMu.Unlock() + if browserStarted { + return []byte(`{"error": "Browser already started"}`) + } + var err error + browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{ + Headless: playwright.Bool(!cfg.PlaywrightDebug), + }) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error())) + } + page, err = browser.NewPage() + if err != nil { + browser.Close() + return []byte(fmt.Sprintf(`{"error": "failed to create page: %s"}`, err.Error())) + } + browserStarted = true + return []byte(`{"success": true, "message": "Browser started"}`) +} + +func pwStop(args map[string]string) []byte { + browserStartMu.Lock() + defer browserStartMu.Unlock() + if !browserStarted { + return []byte(`{"success": true, "message": "Browser was not running"}`) + } + if page != nil { + page.Close() + page = nil + } + if browser != nil { + browser.Close() + browser = nil + } + browserStarted = false + return []byte(`{"success": true, "message": "Browser stopped"}`) +} + +func pwIsRunning(args map[string]string) []byte { + if browserStarted { + return []byte(`{"running": true, "message": "Browser is running"}`) + } + return []byte(`{"running": false, "message": "Browser is not running"}`) +} + +func pwNavigate(args map[string]string) []byte { + url, ok := args["url"] + if !ok || url == "" { + return []byte(`{"error": "url not provided"}`) + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + _, err := page.Goto(url) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to navigate: %s"}`, err.Error())) + } + title, _ := page.Title() + pageURL := page.URL() + return []byte(fmt.Sprintf(`{"success": true, "title": "%s", "url": "%s"}`, title, pageURL)) +} + +func pwClick(args map[string]string) []byte { + selector, ok := args["selector"] + if !ok || selector == "" { + return []byte(`{"error": "selector not provided"}`) + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + index := 0 + if args["index"] != "" { + if i, err := strconv.Atoi(args["index"]); err != nil { + logger.Warn("failed to parse index", "value", args["index"], "error", err) + } else { + index = i + } + } + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + if index >= count { + return []byte(fmt.Sprintf(`{"error": "Element not found at index %d (found %d elements)"}`, index, count)) + } + err = locator.Nth(index).Click() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error())) + } + return []byte(`{"success": true, "message": "Clicked element"}`) +} + +func pwFill(args map[string]string) []byte { + selector, ok := args["selector"] + if !ok || selector == "" { + return []byte(`{"error": "selector not provided"}`) + } + text := args["text"] + if text == "" { + text = "" + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + index := 0 + if args["index"] != "" { + if i, err := strconv.Atoi(args["index"]); err != nil { + logger.Warn("failed to parse index", "value", args["index"], "error", err) + } else { + index = i + } + } + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + if index >= count { + return []byte(fmt.Sprintf(`{"error": "Element not found at index %d"}`, index)) + } + err = locator.Nth(index).Fill(text) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to fill: %s"}`, err.Error())) + } + return []byte(`{"success": true, "message": "Filled input"}`) +} + +func pwExtractText(args map[string]string) []byte { + selector := args["selector"] + if selector == "" { + selector = "body" + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + if count == 0 { + return []byte(`{"error": "No elements found"}`) + } + if selector == "body" { + text, err := page.Locator("body").TextContent() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to get text: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"text": "%s"}`, text)) + } + var texts []string + for i := 0; i < count; i++ { + text, err := locator.Nth(i).TextContent() + if err != nil { + continue + } + texts = append(texts, text) + } + return []byte(fmt.Sprintf(`{"text": "%s"}`, joinLines(texts))) +} + +func joinLines(lines []string) string { + var sb strings.Builder + for i, line := range lines { + if i > 0 { + sb.WriteString("\n") + } + sb.WriteString(line) + } + return sb.String() +} + +func pwScreenshot(args map[string]string) []byte { + selector := args["selector"] + fullPage := args["full_page"] == "true" + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid()) + var err error + if selector != "" && selector != "body" { + locator := page.Locator(selector) + _, err = locator.Screenshot(playwright.LocatorScreenshotOptions{ + Path: playwright.String(path), + }) + } else { + _, err = page.Screenshot(playwright.PageScreenshotOptions{ + Path: playwright.String(path), + FullPage: playwright.Bool(fullPage), + }) + } + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"path": "%s"}`, path)) +} + +func pwScreenshotAndView(args map[string]string) []byte { + selector := args["selector"] + fullPage := args["full_page"] == "true" + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid()) + var err error + if selector != "" && selector != "body" { + locator := page.Locator(selector) + _, err = locator.Screenshot(playwright.LocatorScreenshotOptions{ + Path: playwright.String(path), + }) + } else { + _, err = page.Screenshot(playwright.PageScreenshotOptions{ + Path: playwright.String(path), + FullPage: playwright.Bool(fullPage), + }) + } + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error())) + } + dataURL, err := models.CreateImageURLFromPath(path) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to create image URL: %s"}`, err.Error())) + } + resp := models.MultimodalToolResp{ + Type: "multimodal_content", + Parts: []map[string]string{ + {"type": "text", "text": "Screenshot saved: " + path}, + {"type": "image_url", "url": dataURL}, + }, + } + jsonResult, err := json.Marshal(resp) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to marshal result: %s"}`, err.Error())) + } + return jsonResult +} + +func pwWaitForSelector(args map[string]string) []byte { + selector, ok := args["selector"] + if !ok || selector == "" { + return []byte(`{"error": "selector not provided"}`) + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + timeout := 30000 + if args["timeout"] != "" { + if t, err := strconv.Atoi(args["timeout"]); err != nil { + logger.Warn("failed to parse timeout", "value", args["timeout"], "error", err) + } else { + timeout = t + } + } + locator := page.Locator(selector) + err := locator.WaitFor(playwright.LocatorWaitForOptions{ + Timeout: playwright.Float(float64(timeout)), + }) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "element not found: %s"}`, err.Error())) + } + return []byte(`{"success": true, "message": "Element found"}`) +} + +func pwDrag(args map[string]string) []byte { + x1, ok := args["x1"] + if !ok { + return []byte(`{"error": "x1 not provided"}`) + } + y1, ok := args["y1"] + if !ok { + return []byte(`{"error": "y1 not provided"}`) + } + x2, ok := args["x2"] + if !ok { + return []byte(`{"error": "x2 not provided"}`) + } + y2, ok := args["y2"] + if !ok { + return []byte(`{"error": "y2 not provided"}`) + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + var fx1, fy1, fx2, fy2 float64 + if parsedX1, err := strconv.ParseFloat(x1, 64); err != nil { + logger.Warn("failed to parse x1", "value", x1, "error", err) + } else { + fx1 = parsedX1 + } + if parsedY1, err := strconv.ParseFloat(y1, 64); err != nil { + logger.Warn("failed to parse y1", "value", y1, "error", err) + } else { + fy1 = parsedY1 + } + if parsedX2, err := strconv.ParseFloat(x2, 64); err != nil { + logger.Warn("failed to parse x2", "value", x2, "error", err) + } else { + fx2 = parsedX2 + } + if parsedY2, err := strconv.ParseFloat(y2, 64); err != nil { + logger.Warn("failed to parse y2", "value", y2, "error", err) + } else { + fy2 = parsedY2 + } + mouse := page.Mouse() + err := mouse.Move(fx1, fy1) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error())) + } + err = mouse.Down() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error())) + } + err = mouse.Move(fx2, fy2) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error())) + } + err = mouse.Up() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2)) +} + +func pwDragBySelector(args map[string]string) []byte { + fromSelector, ok := args["fromSelector"] + if !ok || fromSelector == "" { + return []byte(`{"error": "fromSelector not provided"}`) + } + toSelector, ok := args["toSelector"] + if !ok || toSelector == "" { + return []byte(`{"error": "toSelector not provided"}`) + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + fromJS := fmt.Sprintf(` + function getCenter(selector) { + const el = document.querySelector(selector); + if (!el) return null; + const rect = el.getBoundingClientRect(); + return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }; + } + getCenter(%q) + `, fromSelector) + toJS := fmt.Sprintf(` + function getCenter(selector) { + const el = document.querySelector(selector); + if (!el) return null; + const rect = el.getBoundingClientRect(); + return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }; + } + getCenter(%q) + `, toSelector) + fromResult, err := page.Evaluate(fromJS) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to get from element: %s"}`, err.Error())) + } + fromMap, ok := fromResult.(map[string]interface{}) + if !ok || fromMap == nil { + return []byte(fmt.Sprintf(`{"error": "from selector '%s' not found"}`, fromSelector)) + } + fromX := fromMap["x"].(float64) + fromY := fromMap["y"].(float64) + toResult, err := page.Evaluate(toJS) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to get to element: %s"}`, err.Error())) + } + toMap, ok := toResult.(map[string]interface{}) + if !ok || toMap == nil { + return []byte(fmt.Sprintf(`{"error": "to selector '%s' not found"}`, toSelector)) + } + toX := toMap["x"].(float64) + toY := toMap["y"].(float64) + mouse := page.Mouse() + err = mouse.Move(fromX, fromY) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error())) + } + err = mouse.Down() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error())) + } + err = mouse.Move(toX, toY) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error())) + } + err = mouse.Up() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error())) + } + msg := fmt.Sprintf("Dragged from %s (%.0f,%.0f) to %s (%.0f,%.0f)", fromSelector, fromX, fromY, toSelector, toX, toY) + return []byte(fmt.Sprintf(`{"success": true, "message": "%s"}`, msg)) +} + +// nolint:unused +func pwClickAt(args map[string]string) []byte { + x, ok := args["x"] + if !ok { + return []byte(`{"error": "x not provided"}`) + } + y, ok := args["y"] + if !ok { + return []byte(`{"error": "y not provided"}`) + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + fx, err := strconv.ParseFloat(x, 64) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to parse x: %s"}`, err.Error())) + } + fy, err := strconv.ParseFloat(y, 64) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to parse y: %s"}`, err.Error())) + } + mouse := page.Mouse() + err = mouse.Click(fx, fy) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"success": true, "message": "Clicked at (%s,%s)"}`, x, y)) +} + +func pwGetHTML(args map[string]string) []byte { + selector := args["selector"] + if selector == "" { + selector = "body" + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + if count == 0 { + return []byte(`{"error": "No elements found"}`) + } + html, err := locator.First().InnerHTML() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html))) +} + +type DOMElement struct { + Tag string `json:"tag,omitempty"` + Attributes map[string]string `json:"attributes,omitempty"` + Text string `json:"text,omitempty"` + Children []DOMElement `json:"children,omitempty"` + Selector string `json:"selector,omitempty"` + InnerHTML string `json:"innerHTML,omitempty"` +} + +func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) { + var results []DOMElement + count, err := locator.Count() + if err != nil { + return nil, err + } + for i := 0; i < count; i++ { + el := locator.Nth(i) + dom, err := elementToDOM(el) + if err != nil { + continue + } + results = append(results, dom) + } + return results, nil +} + +func elementToDOM(el playwright.Locator) (DOMElement, error) { + dom := DOMElement{} + tag, err := el.Evaluate(`el => el.nodeName`, nil) + if err == nil { + dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag)) + } + attributes := make(map[string]string) + attrs, err := el.Evaluate(`el => { + let attrs = {}; + for (let i = 0; i < el.attributes.length; i++) { + let attr = el.attributes[i]; + attrs[attr.name] = attr.value; + } + return attrs; + }`, nil) + if err == nil { + if amap, ok := attrs.(map[string]any); ok { + for k, v := range amap { + if vs, ok := v.(string); ok { + attributes[k] = vs + } + } + } + } + if len(attributes) > 0 { + dom.Attributes = attributes + } + text, err := el.TextContent() + if err == nil && text != "" { + dom.Text = text + } + innerHTML, err := el.InnerHTML() + if err == nil && innerHTML != "" { + dom.InnerHTML = innerHTML + } + childCount, _ := el.Count() + if childCount > 0 { + childrenLocator := el.Locator("*") + children, err := buildDOMTree(childrenLocator) + if err == nil && len(children) > 0 { + dom.Children = children + } + } + return dom, nil +} + +func pwGetDOM(args map[string]string) []byte { + selector := args["selector"] + if selector == "" { + selector = "body" + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + locator := page.Locator(selector) + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error())) + } + if count == 0 { + return []byte(`{"error": "No elements found"}`) + } + dom, err := elementToDOM(locator.First()) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error())) + } + data, err := json.Marshal(dom) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"dom": %s}`, string(data))) +} + +// nolint:unused +func pwSearchElements(args map[string]string) []byte { + text := args["text"] + selector := args["selector"] + if text == "" && selector == "" { + return []byte(`{"error": "text or selector not provided"}`) + } + if !browserStarted || page == nil { + return []byte(`{"error": "Browser not started. Call pw_start first."}`) + } + var locator playwright.Locator + if text != "" { + locator = page.GetByText(text) + } else { + locator = page.Locator(selector) + } + count, err := locator.Count() + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error())) + } + if count == 0 { + return []byte(`{"elements": []}`) + } + var results []map[string]string + for i := 0; i < count; i++ { + el := locator.Nth(i) + tag, _ := el.Evaluate(`el => el.nodeName`, nil) + text, _ := el.TextContent() + html, _ := el.InnerHTML() + results = append(results, map[string]string{ + "index": strconv.Itoa(i), + "tag": strings.ToLower(fmt.Sprintf("%v", tag)), + "text": text, + "html": html, + }) + } + data, err := json.Marshal(results) + if err != nil { + return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error())) + } + return []byte(fmt.Sprintf(`{"elements": %s}`, string(data))) +} + +func jsonString(s string) string { + b, _ := json.Marshal(s) + return string(b) +} diff --git a/tools/tools.go b/tools/tools.go new file mode 100644 index 0000000..967e5de --- /dev/null +++ b/tools/tools.go @@ -0,0 +1,1909 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "gf-lt/agent" + "gf-lt/config" + "gf-lt/models" + "gf-lt/storage" + "log/slog" + "os" + "os/exec" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "gf-lt/rag" + + "github.com/GrailFinder/searchagent/searcher" +) + +var ( + RpDefenitionSysMsg = ` +For this roleplay immersion is at most importance. +Every character thinks and acts based on their personality and setting of the roleplay. +Meta discussions outside of roleplay is allowed if clearly labeled as out of character, for example: (ooc: {msg}) or <ooc>{msg}</ooc>. +` + ToolSysMsg = `You can do functions call if needed. +Your current tools: +<tools> +[ +{ +"name":"run", +"args": ["command"], +"when_to_use": "Main tool for file operations, shell commands, memory, git, and todo. Use run \"help\" for all commands. Examples: run \"ls -la\", run \"help\", run \"mkdir -p foo/bar\", run \"cat file.txt\", run \"view_img image.png\", run \"git status\", run \"memory store foo bar\", run \"todo create task\", run \"grep pattern file\", run \"cd /path\", run \"pwd\", run \"find . -name *.txt\", run \"file image.png\", run \"head file\", run \"tail file\", run \"wc -l file\", run \"sort file\", run \"uniq file\", run \"sed 's/old/new/' file\", run \"echo text\", run \"go build ./...\", run \"time\", run \"stat file\", run \"cp src dst\", run \"mv src dst\", run \"rm file\"" +}, +{ +"name":"view_img", +"args": ["file"], +"when_to_use": "View an image file and get it displayed in the conversation for visual analysis. Supports: png, jpg, jpeg, gif, webp, svg. Example: view_img /path/to/image.png or view_img image.png" +}, +{ +"name":"websearch", +"args": ["query", "limit"], +"when_to_use": "search the web for information" +}, +{ +"name":"rag_search", +"args": ["query", "limit"], +"when_to_use": "search local document database" +}, +{ +"name":"read_url", +"args": ["url"], +"when_to_use": "get content from a webpage" +}, +{ +"name":"read_url_raw", +"args": ["url"], +"when_to_use": "get raw content from a webpage" +} +] +</tools> +To make a function call return a json object within __tool_call__ tags; +<example_request> +__tool_call__ +{ +"name":"run", +"args": {"command": "ls -la /home"} +} +__tool_call__ +</example_request> +<example_request> +__tool_call__ +{ +"name":"view_img", +"args": {"file": "screenshot.png"} +} +__tool_call__ +</example_request> +Tool call is addressed to the tool agent, avoid sending more info than the tool call itself, while making a call. +When done right, tool call will be delivered to the tool agent. tool agent will respond with the results of the call. +<example_response> +tool: +total 1234 +drwxr-xr-x 2 user user 4096 Jan 1 12:00 . +</example_response> +After that you are free to respond to the user. +` + webSearchSysPrompt = `Summarize the web search results, extracting key information and presenting a concise answer. Provide sources and URLs where relevant.` + ragSearchSysPrompt = `Synthesize the document search results, extracting key information and presenting a concise answer. Provide sources and document IDs where relevant.` + readURLSysPrompt = `Extract and summarize the content from the webpage. Provide key information, main points, and any relevant details.` + summarySysPrompt = `Please provide a concise summary of the following conversation. Focus on key points, decisions, and actions. Provide only the summary, no additional commentary.` +) + +var WebSearcher searcher.WebSurfer + +var ( + xdotoolPath string + maimPath string + logger *slog.Logger + cfg *config.Config + getTokenFunc func() string +) + +type Tools struct { + cfg *config.Config + logger *slog.Logger + store storage.FullRepo + WindowToolsAvailable bool + // getTokenFunc func() string + webAgentClient *agent.AgentClient + webAgentClientOnce sync.Once + webSearchAgent agent.AgenterB +} + +func (t *Tools) initAgentsB() { + t.GetWebAgentClient() + t.webSearchAgent = agent.NewWebAgentB(t.webAgentClient, webSearchSysPrompt) + agent.RegisterB("rag_search", agent.NewWebAgentB(t.webAgentClient, ragSearchSysPrompt)) + // Register websearch agent + agent.RegisterB("websearch", agent.NewWebAgentB(t.webAgentClient, webSearchSysPrompt)) + // Register read_url agent + agent.RegisterB("read_url", agent.NewWebAgentB(t.webAgentClient, readURLSysPrompt)) + // Register summarize_chat agent + agent.RegisterB("summarize_chat", agent.NewWebAgentB(t.webAgentClient, summarySysPrompt)) +} + +func InitTools(initCfg *config.Config, logger *slog.Logger, store storage.FullRepo) *Tools { + logger = logger + cfg = initCfg + if initCfg.PlaywrightEnabled { + if err := CheckPlaywright(); err != nil { + // slow, need a faster check if playwright install + if err := InstallPW(); err != nil { + logger.Error("failed to install playwright", "error", err) + os.Exit(1) + return nil + } + if err := CheckPlaywright(); err != nil { + logger.Error("failed to run playwright", "error", err) + os.Exit(1) + return nil + } + } + } + // Initialize fs root directory + SetFSRoot(cfg.FilePickerDir) + // Initialize memory store + SetMemoryStore(&memoryAdapter{store: store, cfg: cfg}, cfg.AssistantRole) + sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "") + if err != nil { + if logger != nil { + logger.Warn("search agent unavailable; web_search tool disabled", "error", err) + } + WebSearcher = nil + } else { + WebSearcher = sa + } + if err := rag.Init(cfg, logger, store); err != nil { + logger.Warn("failed to init rag; rag_search tool will not be available", "error", err) + } + t := &Tools{ + cfg: cfg, + logger: logger, + store: store, + } + t.checkWindowTools() + t.initAgentsB() + return t +} + +func (t *Tools) checkWindowTools() { + xdotoolPath, _ = exec.LookPath("xdotool") + maimPath, _ = exec.LookPath("maim") + t.WindowToolsAvailable = xdotoolPath != "" && maimPath != "" + if t.WindowToolsAvailable { + t.logger.Info("window tools available: xdotool and maim found") + } else { + if xdotoolPath == "" { + t.logger.Warn("xdotool not found, window listing tools will not be available") + } + if maimPath == "" { + t.logger.Warn("maim not found, window capture tools will not be available") + } + } +} + +func SetTokenFunc(fn func() string) { + getTokenFunc = fn +} + +func (t *Tools) GetWebAgentClient() *agent.AgentClient { + t.webAgentClientOnce.Do(func() { + getToken := func() string { + if getTokenFunc != nil { + return getTokenFunc() + } + return "" + } + t.webAgentClient = agent.NewAgentClient(cfg, logger, getToken) + }) + return t.webAgentClient +} + +func RegisterWindowTools(modelHasVision bool) { + removeWindowToolsFromBaseTools() + // Window tools registration happens here if needed +} + +// func RegisterPlaywrightTools() { +// removePlaywrightToolsFromBaseTools() +// if cfg != nil && cfg.PlaywrightEnabled { +// // Playwright tools are registered here +// } +// } + +func websearch(args map[string]string) []byte { + // make http request return bytes + query, ok := args["query"] + if !ok || query == "" { + msg := "query not provided to web_search tool" + logger.Error(msg) + return []byte(msg) + } + limitS, ok := args["limit"] + if !ok || limitS == "" { + limitS = "3" + } + limit, err := strconv.Atoi(limitS) + if err != nil || limit == 0 { + logger.Warn("websearch limit; passed bad value; setting to default (3)", + "limit_arg", limitS, "error", err) + limit = 3 + } + resp, err := WebSearcher.Search(context.Background(), query, limit) + if err != nil { + msg := "search tool failed; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + data, err := json.Marshal(resp) + if err != nil { + msg := "failed to marshal search result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return data +} + +// rag search (searches local document database) +func ragsearch(args map[string]string) []byte { + query, ok := args["query"] + if !ok || query == "" { + msg := "query not provided to rag_search tool" + logger.Error(msg) + return []byte(msg) + } + limitS, ok := args["limit"] + if !ok || limitS == "" { + limitS = "10" + } + limit, err := strconv.Atoi(limitS) + if err != nil || limit == 0 { + logger.Warn("ragsearch limit; passed bad value; setting to default (3)", + "limit_arg", limitS, "error", err) + limit = 10 + } + ragInstance := rag.GetInstance() + if ragInstance == nil { + msg := "rag not initialized; rag_search tool is not available" + logger.Error(msg) + return []byte(msg) + } + results, err := ragInstance.Search(query, limit) + if err != nil { + msg := "rag search failed; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + data, err := json.Marshal(results) + if err != nil { + msg := "failed to marshal rag search result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return data +} + +// web search raw (returns raw data without processing) +func websearchRaw(args map[string]string) []byte { + // make http request return bytes + query, ok := args["query"] + if !ok || query == "" { + msg := "query not provided to websearch_raw tool" + logger.Error(msg) + return []byte(msg) + } + limitS, ok := args["limit"] + if !ok || limitS == "" { + limitS = "3" + } + limit, err := strconv.Atoi(limitS) + if err != nil || limit == 0 { + logger.Warn("websearch_raw limit; passed bad value; setting to default (3)", + "limit_arg", limitS, "error", err) + limit = 3 + } + resp, err := WebSearcher.Search(context.Background(), query, limit) + if err != nil { + msg := "search tool failed; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + // Return raw response without any processing + return []byte(fmt.Sprintf("%+v", resp)) +} + +// retrieves url content (text) +func readURL(args map[string]string) []byte { + // make http request return bytes + link, ok := args["url"] + if !ok || link == "" { + msg := "link not provided to read_url tool" + logger.Error(msg) + return []byte(msg) + } + resp, err := WebSearcher.RetrieveFromLink(context.Background(), link) + if err != nil { + msg := "search tool failed; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + data, err := json.Marshal(resp) + if err != nil { + msg := "failed to marshal search result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return data +} + +// retrieves url content raw (returns raw content without processing) +func readURLRaw(args map[string]string) []byte { + // make http request return bytes + link, ok := args["url"] + if !ok || link == "" { + msg := "link not provided to read_url_raw tool" + logger.Error(msg) + return []byte(msg) + } + resp, err := WebSearcher.RetrieveFromLink(context.Background(), link) + if err != nil { + msg := "search tool failed; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + // Return raw response without any processing + return []byte(fmt.Sprintf("%+v", resp)) +} + +// Unified run command - single entry point for shell, memory, and todo +func runCmd(args map[string]string) []byte { + commandStr := args["command"] + if commandStr == "" { + msg := "command not provided to run tool" + logger.Error(msg) + return []byte(msg) + } + // Parse the command - first word is subcommand + parts := strings.Fields(commandStr) + if len(parts) == 0 { + return []byte("[error] empty command") + } + subcmd := parts[0] + rest := parts[1:] + // Route to appropriate handler + switch subcmd { + case "help": + // help - show all commands + // help <cmd> - show help for specific command + return []byte(getHelp(rest)) + case "memory": + // memory store <topic> <data> | memory get <topic> | memory list | memory forget <topic> + return []byte(FsMemory(append([]string{"store"}, rest...), "")) + case "todo": + return handleTodoSubcommand(rest, args) + case "window", "windows": + // window list - list all windows + return listWindows(args) + case "capture", "screenshot": + // capture <window-name> - capture a window + return captureWindow(args) + case "capture_and_view", "screenshot_and_view": + // capture and view screenshot + return captureWindowAndView(args) + case "view_img": + // view_img <file> - view image for multimodal + return []byte(FsViewImg(rest, "")) + case "browser": + // browser <action> [args...] - Playwright browser automation + return runBrowserCommand(rest, args) + case "mkdir", "ls", "cat", "pwd", "cd", "cp", "mv", "rm", "sed", "grep", "head", "tail", "wc", "sort", "uniq", "echo", "time", "stat", "go", "find", "file": + // File operations and shell commands - use ExecChain which has whitelist + return executeCommand(args) + case "git": + // git has its own whitelist in FsGit + return []byte(FsGit(rest, "")) + default: + // Unknown subcommand - tell user to run help tool + return []byte("[error] command not allowed. Run 'help' tool to see available commands.") + } +} + +// runBrowserCommand routes browser subcommands to Playwright handlers +func runBrowserCommand(args []string, originalArgs map[string]string) []byte { + if len(args) == 0 { + return []byte(`usage: browser <action> [args...] +Actions: + start - start browser + stop - stop browser + running - check if browser is running + go <url> - navigate to URL + click <selector> - click element + fill <selector> <text> - fill input + text [selector] - extract text + html [selector] - get HTML + dom - get DOM + screenshot [path] - take screenshot + screenshot_and_view - take and view screenshot + wait <selector> - wait for element + drag <from> <to> - drag element`) + } + action := args[0] + rest := args[1:] + switch action { + case "start": + return pwStart(originalArgs) + case "stop": + return pwStop(originalArgs) + case "running": + return pwIsRunning(originalArgs) + case "go", "navigate", "open": + // browser go <url> + url := "" + if len(rest) > 0 { + url = rest[0] + } + if url == "" { + return []byte("usage: browser go <url>") + } + return pwNavigate(map[string]string{"url": url}) + case "click": + // browser click <selector> [index] + selector := "" + index := "0" + if len(rest) > 0 { + selector = rest[0] + } + if len(rest) > 1 { + index = rest[1] + } + if selector == "" { + return []byte("usage: browser click <selector> [index]") + } + return pwClick(map[string]string{"selector": selector, "index": index}) + case "fill": + // browser fill <selector> <text> + if len(rest) < 2 { + return []byte("usage: browser fill <selector> <text>") + } + return pwFill(map[string]string{"selector": rest[0], "text": strings.Join(rest[1:], " ")}) + case "text": + // browser text [selector] + selector := "" + if len(rest) > 0 { + selector = rest[0] + } + return pwExtractText(map[string]string{"selector": selector}) + case "html": + // browser html [selector] + selector := "" + if len(rest) > 0 { + selector = rest[0] + } + return pwGetHTML(map[string]string{"selector": selector}) + case "dom": + return pwGetDOM(originalArgs) + case "screenshot": + // browser screenshot [path] + path := "" + if len(rest) > 0 { + path = rest[0] + } + return pwScreenshot(map[string]string{"path": path}) + case "screenshot_and_view": + // browser screenshot_and_view [path] + path := "" + if len(rest) > 0 { + path = rest[0] + } + return pwScreenshotAndView(map[string]string{"path": path}) + case "wait": + // browser wait <selector> + selector := "" + if len(rest) > 0 { + selector = rest[0] + } + if selector == "" { + return []byte("usage: browser wait <selector>") + } + return pwWaitForSelector(map[string]string{"selector": selector}) + case "drag": + // browser drag <x1> <y1> <x2> <y2> OR browser drag <from_selector> <to_selector> + if len(rest) < 4 && len(rest) < 2 { + return []byte("usage: browser drag <x1> <y1> <x2> <y2> OR browser drag <from_selector> <to_selector>") + } + // Check if first arg is a number (coordinates) or selector + _, err := strconv.Atoi(rest[0]) + _, err2 := strconv.ParseFloat(rest[0], 64) + if err == nil || err2 == nil { + // Coordinates: browser drag 100 200 300 400 + if len(rest) < 4 { + return []byte("usage: browser drag <x1> <y1> <x2> <y2>") + } + return pwDrag(map[string]string{ + "x1": rest[0], "y1": rest[1], + "x2": rest[2], "y2": rest[3], + }) + } + // Selectors: browser drag #item #container + // pwDrag needs coordinates, so we need to get element positions first + // This requires a different approach - use JavaScript to get centers + return pwDragBySelector(map[string]string{ + "fromSelector": rest[0], + "toSelector": rest[1], + }) + default: + return []byte("unknown browser action: " + action) + } +} + +// getHelp returns help text for commands +func getHelp(args []string) string { + if len(args) == 0 { + // General help - show all commands + return `Available commands: + help <cmd> - show help for a command (use: help memory, help git, etc.) + + # File operations + ls [path] - list files in directory + cat <file> - read file content + view_img <file> - view image file + write <file> - write content to file + stat <file> - get file info + rm <file> - delete file + cp <src> <dst> - copy file + mv <src> <dst> - move/rename file + mkdir [-p] <dir> - create directory (use full path) + pwd - print working directory + cd <dir> - change directory + sed 's/old/new/[g]' [file] - text replacement + + # Text processing + echo <args> - echo back input + time - show current time + grep <pattern> - filter lines (supports -i, -v, -c) + head [n] - show first n lines + tail [n] - show last n lines + wc [-l|-w|-c] - count lines/words/chars + sort [-r|-n] - sort lines + uniq [-c] - remove duplicates + + # Git (read-only) + git <cmd> - git commands (status, log, diff, show, branch, etc.) + + # Go + go <cmd> - go commands (run, build, test, mod, etc.) + + # Memory + memory store <topic> <data> - save to memory + memory get <topic> - retrieve from memory + memory list - list all topics + memory forget <topic> - delete from memory + + # Todo + todo create <task> - create a todo + todo read - list all todos + todo update <id> <status> - update todo (pending/in_progress/completed) + todo delete <id> - delete a todo + + # Window (requires xdotool + maim) + window - list available windows + capture <name> - capture a window screenshot + capture_and_view <name> - capture and view screenshot + + # Browser (requires Playwright) + browser start - start browser + browser stop - stop browser + browser running - check if running + browser go <url> - navigate to URL + browser click <sel> - click element + browser fill <sel> <txt> - fill input + browser text [sel] - extract text + browser html [sel] - get HTML + browser screenshot - take screenshot + browser wait <sel> - wait for element + browser drag <x1> <y1> <x2> <y2> - drag by coordinates + browser drag <sel1> <sel2> - drag by selectors (center points) + + # System + <any shell command> - run shell command directly + +Use: run "command" to execute.` + } + + // Specific command help + cmd := args[0] + switch cmd { + case "ls": + return `ls [directory] + List files in a directory. + Examples: + run "ls" + run "ls /home/user" + run "ls -la" (via shell)` + case "cat": + return `cat <file> + Read file content. + Examples: + run "cat readme.md" + run "cat -b image.png" (base64 output)` + case "view_img": + return `view_img <image-file> + View an image file for multimodal analysis. + Supports: png, jpg, jpeg, gif, webp, svg + Example: + run "view_img screenshot.png"` + case "write": + return `write <file> [content] + Write content to a file. + Examples: + run "write notes.txt hello world" + run "write data.json" (with stdin)` + case "memory": + return `memory <subcommand> [args] + Manage memory storage. + Subcommands: + store <topic> <data> - save data to a topic + get <topic> - retrieve data from a topic + list - list all topics + forget <topic> - delete a topic + Examples: + run "memory store foo bar" + run "memory get foo" + run "memory list"` + case "todo": + return `todo <subcommand> [args] + Manage todo list. + Subcommands: + create <task> - create a new todo + read [id] - list all todos or read specific one + update <id> <status> - update status (pending/in_progress/completed) + delete <id> - delete a todo + Examples: + run "todo create fix bug" + run "todo read" + run "todo update 1 completed"` + case "git": + return `git <subcommand> + Read-only git commands. + Allowed: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe, rev-list + Examples: + run "git status" + run "git log --oneline -5" + run "git diff HEAD~1"` + case "grep": + return `grep <pattern> [options] + Filter lines matching a pattern. + Options: + -i ignore case + -v invert match + -c count matches + Example: + run "grep error" (from stdin) + run "grep -i warn log.txt"` + case "cd": + return `cd <directory> + Change working directory. + Example: + run "cd /tmp" + run "cd .."` + case "pwd": + return `pwd + Print working directory. + Example: + run "pwd"` + case "mkdir": + return `mkdir [-p] <directory> + Create a directory (use full path). + Options: + -p, --parents create parent directories as needed + Examples: + run "mkdir /full/path/myfolder" + run "mkdir -p /full/path/to/nested/folder"` + case "sed": + return `sed 's/old/new/[g]' [file] + Stream editor for text replacement. + Options: + -i in-place editing + -g global replacement (replace all) + Examples: + run "sed 's/foo/bar/' file.txt" + run "sed 's/foo/bar/g' file.txt" (global) + run "sed -i 's/foo/bar/' file.txt" (in-place) + run "cat file.txt | sed 's/foo/bar/'" (pipe from stdin)` + case "go": + return `go <command> + Go toolchain commands. + Allowed: run, build, test, mod, get, install, clean, fmt, vet, etc. + Examples: + run "go run main.go" + run "go build ./..." + run "go test ./..." + run "go mod tidy" + run "go get github.com/package"` + case "window", "windows": + return `window + List available windows. + Requires: xdotool and maim + Example: + run "window"` + case "capture", "screenshot": + return `capture <window-name-or-id> + Capture a screenshot of a window. + Requires: xdotool and maim + Examples: + run "capture Firefox" + run "capture 0x12345678" + run "capture_and_view Firefox"` + case "capture_and_view": + return `capture_and_view <window-name-or-id> + Capture a window and return for viewing. + Requires: xdotool and maim + Examples: + run "capture_and_view Firefox"` + case "browser": + return `browser <action> [args] + Playwright browser automation. + Requires: Playwright browser server running + Actions: + start - start browser + stop - stop browser + running - check if browser is running + go <url> - navigate to URL + click <selector> - click element (use index for multiple: click #btn 1) + fill <selector> <text> - fill input field + text [selector] - extract text (from element or whole page) + html [selector] - get HTML (from element or whole page) + screenshot [path] - take screenshot + wait <selector> - wait for element to appear + drag <from> <to> - drag element to another element + Examples: + run "browser start" + run "browser go https://example.com" + run "browser click #submit-button" + run "browser fill #search-input hello" + run "browser text" + run "browser screenshot" + run "browser drag 100 200 300 400" + run "browser drag #item1 #container2"` + default: + return fmt.Sprintf("No help available for: %s. Use: run \"help\" for all commands.", cmd) + } +} + +// handleTodoSubcommand routes todo subcommands to existing handlers +func handleTodoSubcommand(args []string, originalArgs map[string]string) []byte { + if len(args) == 0 { + return []byte("usage: todo create|read|update|delete") + } + subcmd := args[0] + switch subcmd { + case "create": + task := strings.Join(args[1:], " ") + if task == "" { + task = originalArgs["task"] + } + if task == "" { + return []byte("usage: todo create <task>") + } + return todoCreate(map[string]string{"task": task}) + case "read": + id := "" + if len(args) > 1 { + id = args[1] + } + return todoRead(map[string]string{"id": id}) + case "update": + if len(args) < 2 { + return []byte("usage: todo update <id> <status>") + } + return todoUpdate(map[string]string{"id": args[1], "status": args[2]}) + case "delete": + if len(args) < 2 { + return []byte("usage: todo delete <id>") + } + return todoDelete(map[string]string{"id": args[1]}) + default: + return []byte("unknown todo subcommand: " + subcmd) + } +} + +// Command Execution Tool with pipe/chaining support +func executeCommand(args map[string]string) []byte { + commandStr := args["command"] + if commandStr == "" { + msg := "command not provided to execute_command tool" + logger.Error(msg) + return []byte(msg) + } + // Use chain execution for pipe/chaining support + result := ExecChain(commandStr) + return []byte(result) +} + +// // handleCdCommand handles the cd command to update FilePickerDir +// func handleCdCommand(args []string) []byte { +// var targetDir string +// if len(args) == 0 { +// // cd with no args goes to home directory +// homeDir, err := os.UserHomeDir() +// if err != nil { +// msg := "cd: cannot determine home directory: " + err.Error() +// logger.Error(msg) +// return []byte(msg) +// } +// targetDir = homeDir +// } else { +// targetDir = args[0] +// } +// // Resolve relative paths against current FilePickerDir +// if !filepath.IsAbs(targetDir) { +// targetDir = filepath.Join(cfg.FilePickerDir, targetDir) +// } +// // Verify the directory exists +// info, err := os.Stat(targetDir) +// if err != nil { +// msg := "cd: " + targetDir + ": " + err.Error() +// logger.Error(msg) +// return []byte(msg) +// } +// if !info.IsDir() { +// msg := "cd: " + targetDir + ": not a directory" +// logger.Error(msg) +// return []byte(msg) +// } +// // Update FilePickerDir +// absDir, err := filepath.Abs(targetDir) +// if err != nil { +// msg := "cd: failed to resolve path: " + err.Error() +// logger.Error(msg) +// return []byte(msg) +// } +// cfg.FilePickerDir = absDir +// msg := "FilePickerDir changed to: " + absDir +// return []byte(msg) +// } + +// Helper functions for command execution +// Todo structure +type TodoItem struct { + ID string `json:"id"` + Task string `json:"task"` + Status string `json:"status"` // "pending", "in_progress", "completed" +} +type TodoList struct { + Items []TodoItem `json:"items"` +} + +func (t TodoList) ToString() string { + sb := strings.Builder{} + for i := range t.Items { + fmt.Fprintf(&sb, "\n[%s] %s. %s\n", t.Items[i].Status, t.Items[i].ID, t.Items[i].Task) + } + return sb.String() +} + +// Global todo list storage +var globalTodoList = TodoList{ + Items: []TodoItem{}, +} + +// Todo Management Tools +func todoCreate(args map[string]string) []byte { + task, ok := args["task"] + if !ok || task == "" { + msg := "task not provided to todo_create tool" + logger.Error(msg) + return []byte(msg) + } + // Generate simple ID + id := fmt.Sprintf("todo_%d", len(globalTodoList.Items)+1) + newItem := TodoItem{ + ID: id, + Task: task, + Status: "pending", + } + globalTodoList.Items = append(globalTodoList.Items, newItem) + result := map[string]string{ + "message": "todo created successfully", + "id": id, + "task": task, + "status": "pending", + "todos": globalTodoList.ToString(), + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult +} + +func todoRead(args map[string]string) []byte { + // Return all todos if no ID specified + result := map[string]interface{}{ + "todos": globalTodoList.ToString(), + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult +} + +func todoUpdate(args map[string]string) []byte { + id, ok := args["id"] + if !ok || id == "" { + msg := "id not provided to todo_update tool" + logger.Error(msg) + return []byte(msg) + } + task, taskOk := args["task"] + status, statusOk := args["status"] + if !taskOk && !statusOk { + msg := "neither task nor status provided to todo_update tool" + logger.Error(msg) + return []byte(msg) + } + // Find and update the todo + for i, item := range globalTodoList.Items { + if item.ID == id { + if taskOk { + globalTodoList.Items[i].Task = task + } + if statusOk { + // Validate status + if status == "pending" || status == "in_progress" || status == "completed" { + globalTodoList.Items[i].Status = status + } else { + result := map[string]string{ + "error": "status must be one of: pending, in_progress, completed", + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult + } + } + result := map[string]string{ + "message": "todo updated successfully", + "id": id, + "todos": globalTodoList.ToString(), + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult + } + } + // ID not found + result := map[string]string{ + "error": fmt.Sprintf("todo with id %s not found", id), + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult +} + +func todoDelete(args map[string]string) []byte { + id, ok := args["id"] + if !ok || id == "" { + msg := "id not provided to todo_delete tool" + logger.Error(msg) + return []byte(msg) + } + // Find and remove the todo + for i, item := range globalTodoList.Items { + if item.ID == id { + // Remove item from slice + globalTodoList.Items = append(globalTodoList.Items[:i], globalTodoList.Items[i+1:]...) + result := map[string]string{ + "message": "todo deleted successfully", + "id": id, + "todos": globalTodoList.ToString(), + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult + } + } + // ID not found + result := map[string]string{ + "error": fmt.Sprintf("todo with id %s not found", id), + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result; error: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult +} + +func viewImgTool(args map[string]string) []byte { + file, ok := args["file"] + if !ok || file == "" { + msg := "file not provided to view_img tool" + logger.Error(msg) + return []byte(msg) + } + result := FsViewImg([]string{file}, "") + return []byte(result) +} + +func helpTool(args map[string]string) []byte { + command, ok := args["command"] + var rest []string + if ok && command != "" { + parts := strings.Fields(command) + if len(parts) > 1 { + rest = parts[1:] + } + } + return []byte(getHelp(rest)) +} + +// func summarizeChat(args map[string]string) []byte { +// if len(chatBody.Messages) == 0 { +// return []byte("No chat history to summarize.") +// } +// // Format chat history for the agent +// chatText := chatToText(chatBody.Messages, true) // include system and tool messages +// return []byte(chatText) +// } + +func windowIDToHex(decimalID string) string { + id, err := strconv.ParseInt(decimalID, 10, 64) + if err != nil { + return decimalID + } + return fmt.Sprintf("0x%x", id) +} + +func listWindows(args map[string]string) []byte { + cmd := exec.Command(xdotoolPath, "search", "--name", ".") + output, err := cmd.Output() + if err != nil { + msg := "failed to list windows: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + windowIDs := strings.Fields(string(output)) + windows := make(map[string]string) + for _, id := range windowIDs { + id = strings.TrimSpace(id) + if id == "" { + continue + } + nameCmd := exec.Command(xdotoolPath, "getwindowname", id) + nameOutput, err := nameCmd.Output() + if err != nil { + continue + } + name := strings.TrimSpace(string(nameOutput)) + windows[id] = name + } + data, err := json.Marshal(windows) + if err != nil { + msg := "failed to marshal window list: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return data +} + +func captureWindow(args map[string]string) []byte { + window, ok := args["window"] + if !ok || window == "" { + return []byte("window parameter required (window ID or name)") + } + var windowID string + if _, err := strconv.Atoi(window); err == nil { + windowID = window + } else { + cmd := exec.Command(xdotoolPath, "search", "--name", window) + output, err := cmd.Output() + if err != nil || len(strings.Fields(string(output))) == 0 { + return []byte("window not found: " + window) + } + windowID = strings.Fields(string(output))[0] + } + nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID) + nameOutput, _ := nameCmd.Output() + windowName := strings.TrimSpace(string(nameOutput)) + windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "") + if windowName == "" { + windowName = "window" + } + timestamp := time.Now().Unix() + filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp) + cmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename) + if err := cmd.Run(); err != nil { + msg := "failed to capture window: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return []byte("screenshot saved: " + filename) +} + +func captureWindowAndView(args map[string]string) []byte { + window, ok := args["window"] + if !ok || window == "" { + return []byte("window parameter required (window ID or name)") + } + var windowID string + if _, err := strconv.Atoi(window); err == nil { + windowID = window + } else { + cmd := exec.Command(xdotoolPath, "search", "--name", window) + output, err := cmd.Output() + if err != nil || len(strings.Fields(string(output))) == 0 { + return []byte("window not found: " + window) + } + windowID = strings.Fields(string(output))[0] + } + nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID) + nameOutput, _ := nameCmd.Output() + windowName := strings.TrimSpace(string(nameOutput)) + windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "") + if windowName == "" { + windowName = "window" + } + timestamp := time.Now().Unix() + filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp) + captureCmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename) + if err := captureCmd.Run(); err != nil { + msg := "failed to capture window: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + dataURL, err := models.CreateImageURLFromPath(filename) + if err != nil { + msg := "failed to create image URL: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + result := models.MultimodalToolResp{ + Type: "multimodal_content", + Parts: []map[string]string{ + {"type": "text", "text": "Screenshot saved: " + filename}, + {"type": "image_url", "url": dataURL}, + }, + } + jsonResult, err := json.Marshal(result) + if err != nil { + msg := "failed to marshal result: " + err.Error() + logger.Error(msg) + return []byte(msg) + } + return jsonResult +} + +type fnSig func(map[string]string) []byte + +// FS Command Handlers - Unix-style file operations +// Convert map[string]string to []string for tools package +func argsToSlice(args map[string]string) []string { + var result []string + // Common positional args in order + for _, key := range []string{"path", "src", "dst", "dir", "file"} { + if v, ok := args[key]; ok && v != "" { + result = append(result, v) + } + } + return result +} + +func cmdMemory(args map[string]string) []byte { + return []byte(FsMemory(argsToSlice(args), "")) +} + +type memoryAdapter struct { + store storage.Memories + cfg *config.Config +} + +func (m *memoryAdapter) Memorise(agent, topic, data string) (string, error) { + mem := &models.Memory{ + Agent: agent, + Topic: topic, + Mind: data, + UpdatedAt: time.Now(), + CreatedAt: time.Now(), + } + result, err := m.store.Memorise(mem) + if err != nil { + return "", err + } + return result.Topic, nil +} + +func (m *memoryAdapter) Recall(agent, topic string) (string, error) { + return m.store.Recall(agent, topic) +} + +func (m *memoryAdapter) RecallTopics(agent string) ([]string, error) { + return m.store.RecallTopics(agent) +} + +func (m *memoryAdapter) Forget(agent, topic string) error { + return m.store.Forget(agent, topic) +} + +var FnMap = map[string]fnSig{ + "memory": cmdMemory, + "rag_search": ragsearch, + "websearch": websearch, + "websearch_raw": websearchRaw, + "read_url": readURL, + "read_url_raw": readURLRaw, + "view_img": viewImgTool, + "help": helpTool, + // Unified run command + "run": runCmd, + "summarize_chat": summarizeChat, +} + +func removeWindowToolsFromBaseTools() { + windowToolNames := map[string]bool{ + "list_windows": true, + "capture_window": true, + "capture_window_and_view": true, + } + var filtered []models.Tool + for _, tool := range BaseTools { + if !windowToolNames[tool.Function.Name] { + filtered = append(filtered, tool) + } + } + BaseTools = filtered + delete(FnMap, "list_windows") + delete(FnMap, "capture_window") + delete(FnMap, "capture_window_and_view") +} + +func summarizeChat(args map[string]string) []byte { + data, err := json.Marshal(args) + if err != nil { + return []byte("error: failed to marshal arguments") + } + return data +} + +// func removePlaywrightToolsFromBaseTools() { +// playwrightToolNames := map[string]bool{ +// "pw_start": true, +// "pw_stop": true, +// "pw_is_running": true, +// "pw_navigate": true, +// "pw_click": true, +// "pw_click_at": true, +// "pw_fill": true, +// "pw_extract_text": true, +// "pw_screenshot": true, +// "pw_screenshot_and_view": true, +// "pw_wait_for_selector": true, +// "pw_drag": true, +// } +// var filtered []models.Tool +// for _, tool := range BaseTools { +// if !playwrightToolNames[tool.Function.Name] { +// filtered = append(filtered, tool) +// } +// } +// BaseTools = filtered +// delete(FnMap, "pw_start") +// delete(FnMap, "pw_stop") +// delete(FnMap, "pw_is_running") +// delete(FnMap, "pw_navigate") +// delete(FnMap, "pw_click") +// delete(FnMap, "pw_click_at") +// delete(FnMap, "pw_fill") +// delete(FnMap, "pw_extract_text") +// delete(FnMap, "pw_screenshot") +// delete(FnMap, "pw_screenshot_and_view") +// delete(FnMap, "pw_wait_for_selector") +// delete(FnMap, "pw_drag") +// } + +// func (t *Tools) RegisterWindowTools(modelHasVision bool) { +// removeWindowToolsFromBaseTools() +// if t.WindowToolsAvailable { +// FnMap["list_windows"] = listWindows +// FnMap["capture_window"] = captureWindow +// windowTools := []models.Tool{ +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "list_windows", +// Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{}, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "capture_window", +// Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"window"}, +// Properties: map[string]models.ToolArgProps{ +// "window": models.ToolArgProps{ +// Type: "string", +// Description: "window ID or window name (partial match)", +// }, +// }, +// }, +// }, +// }, +// } +// if modelHasVision { +// FnMap["capture_window_and_view"] = captureWindowAndView +// windowTools = append(windowTools, models.Tool{ +// Type: "function", +// Function: models.ToolFunc{ +// Name: "capture_window_and_view", +// Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"window"}, +// Properties: map[string]models.ToolArgProps{ +// "window": models.ToolArgProps{ +// Type: "string", +// Description: "window ID or window name (partial match)", +// }, +// }, +// }, +// }, +// }) +// } +// BaseTools = append(BaseTools, windowTools...) +// ToolSysMsg += windowToolSysMsg +// } +// } + +// for pw agentA +// var browserAgentSysPrompt = `You are an autonomous browser automation agent. Your goal is to complete the user's task by intelligently using browser automation + +// Important: The browser may already be running from a previous task! Always check pw_is_running first before starting a new browser. + +// Available tools: +// - pw_start: Start browser (only if not already running) +// - pw_stop: Stop browser (only when you're truly done and browser is no longer needed) +// - pw_is_running: Check if browser is running +// - pw_navigate: Go to a URL +// - pw_click: Click an element by CSS selector +// - pw_fill: Type text into an input +// - pw_extract_text: Get text from page/element +// - pw_screenshot: Take a screenshot (returns file path) +// - pw_screenshot_and_view: Take screenshot with image for viewing +// - pw_wait_for_selector: Wait for element to appear +// - pw_drag: Drag mouse from one point to another +// - pw_click_at: Click at X,Y coordinates +// - pw_get_html: Get HTML content +// - pw_get_dom: Get structured DOM tree +// - pw_search_elements: Search for elements by text or selector + +// Workflow: +// 1. First, check if browser is already running (pw_is_running) +// 2. Only start browser if not already running (pw_start) +// 3. Navigate to required pages (pw_navigate) +// 4. Interact with elements as needed (click, fill, etc.) +// 5. Extract information or take screenshots as requested +// 6. IMPORTANT: Do NOT stop the browser when done! Leave it running so the user can continue interacting with the page in subsequent requests. + +// Always provide clear feedback about what you're doing and what you found.` + +// func (t *Tools) runBrowserAgent(args map[string]string) []byte { +// task, ok := args["task"] +// if !ok || task == "" { +// return []byte(`{"error": "task argument is required"}`) +// } +// client := t.GetWebAgentClient() +// pwAgent := agent.NewPWAgent(client, browserAgentSysPrompt) +// pwAgent.SetTools(agent.GetPWTools()) +// return pwAgent.ProcessTask(task) +// } + +// func registerPlaywrightTools() { +// removePlaywrightToolsFromBaseTools() +// if cfg != nil && cfg.PlaywrightEnabled { +// FnMap["pw_start"] = pwStart +// FnMap["pw_stop"] = pwStop +// FnMap["pw_is_running"] = pwIsRunning +// FnMap["pw_navigate"] = pwNavigate +// FnMap["pw_click"] = pwClick +// FnMap["pw_click_at"] = pwClickAt +// FnMap["pw_fill"] = pwFill +// FnMap["pw_extract_text"] = pwExtractText +// FnMap["pw_screenshot"] = pwScreenshot +// FnMap["pw_screenshot_and_view"] = pwScreenshotAndView +// FnMap["pw_wait_for_selector"] = pwWaitForSelector +// FnMap["pw_drag"] = pwDrag +// FnMap["pw_get_html"] = pwGetHTML +// FnMap["pw_get_dom"] = pwGetDOM +// FnMap["pw_search_elements"] = pwSearchElements +// playwrightTools := []models.Tool{ +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_start", +// Description: "Start a Playwright browser instance. Call this first before using other pw_ Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{}, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_stop", +// Description: "Stop the Playwright browser instance. Call when done with browser automation.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{}, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_is_running", +// Description: "Check if Playwright browser is currently running.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{}, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_navigate", +// Description: "Navigate to a URL in the browser.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"url"}, +// Properties: map[string]models.ToolArgProps{ +// "url": models.ToolArgProps{ +// Type: "string", +// Description: "URL to navigate to", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_click", +// Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"selector"}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector for the element to click", +// }, +// "index": models.ToolArgProps{ +// Type: "string", +// Description: "optional index for multiple matches (default 0)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_fill", +// Description: "Fill an input field with text using CSS selector.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"selector", "text"}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector for the input element", +// }, +// "text": models.ToolArgProps{ +// Type: "string", +// Description: "text to fill into the input", +// }, +// "index": models.ToolArgProps{ +// Type: "string", +// Description: "optional index for multiple matches (default 0)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_extract_text", +// Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"selector"}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector (use 'body' for all page text)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_screenshot", +// Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "optional CSS selector for element to screenshot", +// }, +// "full_page": models.ToolArgProps{ +// Type: "string", +// Description: "optional: 'true' to capture full page (default false)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_screenshot_and_view", +// Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "optional CSS selector for element to screenshot", +// }, +// "full_page": models.ToolArgProps{ +// Type: "string", +// Description: "optional: 'true' to capture full page (default false)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_wait_for_selector", +// Description: "Wait for an element to appear on the page.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"selector"}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector to wait for", +// }, +// "timeout": models.ToolArgProps{ +// Type: "string", +// Description: "optional timeout in ms (default 30000)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_drag", +// Description: "Drag the mouse from one point to another.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"x1", "y1", "x2", "y2"}, +// Properties: map[string]models.ToolArgProps{ +// "x1": models.ToolArgProps{ +// Type: "string", +// Description: "starting X coordinate", +// }, +// "y1": models.ToolArgProps{ +// Type: "string", +// Description: "starting Y coordinate", +// }, +// "x2": models.ToolArgProps{ +// Type: "string", +// Description: "ending X coordinate", +// }, +// "y2": models.ToolArgProps{ +// Type: "string", +// Description: "ending Y coordinate", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_get_html", +// Description: "Get the HTML content of the page or a specific element.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "optional CSS selector (default: body)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_get_dom", +// Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "optional CSS selector (default: body)", +// }, +// }, +// }, +// }, +// }, +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "pw_search_elements", +// Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{}, +// Properties: map[string]models.ToolArgProps{ +// "text": models.ToolArgProps{ +// Type: "string", +// Description: "text to search for in elements", +// }, +// "selector": models.ToolArgProps{ +// Type: "string", +// Description: "CSS selector to search for", +// }, +// }, +// }, +// }, +// }, +// } +// BaseTools = append(BaseTools, playwrightTools...) +// ToolSysMsg += browserToolSysMsg +// agent.RegisterPWTool("pw_start", pwStart) +// agent.RegisterPWTool("pw_stop", pwStop) +// agent.RegisterPWTool("pw_is_running", pwIsRunning) +// agent.RegisterPWTool("pw_navigate", pwNavigate) +// agent.RegisterPWTool("pw_click", pwClick) +// agent.RegisterPWTool("pw_click_at", pwClickAt) +// agent.RegisterPWTool("pw_fill", pwFill) +// agent.RegisterPWTool("pw_extract_text", pwExtractText) +// agent.RegisterPWTool("pw_screenshot", pwScreenshot) +// agent.RegisterPWTool("pw_screenshot_and_view", pwScreenshotAndView) +// agent.RegisterPWTool("pw_wait_for_selector", pwWaitForSelector) +// agent.RegisterPWTool("pw_drag", pwDrag) +// agent.RegisterPWTool("pw_get_html", pwGetHTML) +// agent.RegisterPWTool("pw_get_dom", pwGetDOM) +// agent.RegisterPWTool("pw_search_elements", pwSearchElements) +// browserAgentTool := []models.Tool{ +// { +// Type: "function", +// Function: models.ToolFunc{ +// Name: "browser_agent", +// Description: "Autonomous browser automation agent. Use for complex multi-step browser tasks like 'go to website, login, and take screenshot'. The agent will plan and execute steps automatically using browser ", +// Parameters: models.ToolFuncParams{ +// Type: "object", +// Required: []string{"task"}, +// Properties: map[string]models.ToolArgProps{ +// "task": {Type: "string", Description: "The task to accomplish, e.g., 'go to github.com and take a screenshot of the homepage'"}, +// }, +// }, +// }, +// }, +// } +// BaseTools = append(BaseTools, browserAgentTool...) +// FnMap["browser_agent"] = tooler.runBrowserAgent +// } +// } + +func CallToolWithAgent(name string, args map[string]string) ([]byte, bool) { + f, ok := FnMap[name] + if !ok { + return []byte(fmt.Sprintf("tool %s not found", name)), false + } + raw := f(args) + if a := agent.Get(name); a != nil { + return a.Process(args, raw), true + } + return raw, true +} + +// openai style def +var BaseTools = []models.Tool{ + // rag_search + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "rag_search", + Description: "Search local document database given query, limit of sources (default 3). Performs query refinement, semantic search, reranking, and synthesis.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"query", "limit"}, + Properties: map[string]models.ToolArgProps{ + "query": models.ToolArgProps{ + Type: "string", + Description: "search query", + }, + "limit": models.ToolArgProps{ + Type: "string", + Description: "limit of the document results", + }, + }, + }, + }, + }, + // websearch + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "websearch", + Description: "Search web given query, limit of sources (default 3).", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"query", "limit"}, + Properties: map[string]models.ToolArgProps{ + "query": models.ToolArgProps{ + Type: "string", + Description: "search query", + }, + "limit": models.ToolArgProps{ + Type: "string", + Description: "limit of the website results", + }, + }, + }, + }, + }, + // read_url + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "read_url", + Description: "Retrieves text content of given link, providing clean summary without html,css and other web elements.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"url"}, + Properties: map[string]models.ToolArgProps{ + "url": models.ToolArgProps{ + Type: "string", + Description: "link to the webpage to read text from", + }, + }, + }, + }, + }, + // websearch_raw + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "websearch_raw", + Description: "Search web given query, returning raw data as is without processing. Use when you need the raw response data instead of a clean summary.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"query", "limit"}, + Properties: map[string]models.ToolArgProps{ + "query": models.ToolArgProps{ + Type: "string", + Description: "search query", + }, + "limit": models.ToolArgProps{ + Type: "string", + Description: "limit of the website results", + }, + }, + }, + }, + }, + // read_url_raw + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "read_url_raw", + Description: "Retrieves raw content of given link without processing. Use when you need the raw response data instead of a clean summary.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"url"}, + Properties: map[string]models.ToolArgProps{ + "url": models.ToolArgProps{ + Type: "string", + Description: "link to the webpage to read text from", + }, + }, + }, + }, + }, + // help + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "help", + Description: "List all available commands. Use this to discover what commands are available when unsure.", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{}, + Properties: map[string]models.ToolArgProps{ + "command": models.ToolArgProps{ + Type: "string", + Description: "optional: get help for specific command (e.g., 'help memory')", + }, + }, + }, + }, + }, + // run - unified command + models.Tool{ + Type: "function", + Function: models.ToolFunc{ + Name: "run", + Description: "Execute commands: shell, git, memory, todo. Usage: run \"<command>\". Examples: run \"ls -la\", run \"git status\", run \"memory store foo bar\", run \"memory get foo\", run \"todo create task\", run \"help\", run \"help memory\"", + Parameters: models.ToolFuncParams{ + Type: "object", + Required: []string{"command"}, + Properties: map[string]models.ToolArgProps{ + "command": models.ToolArgProps{ + Type: "string", + Description: "command to execute. Use: run \"help\" for all commands, run \"help <cmd>\" for specific help. Examples: ls, cat, grep, git status, memory store, todo create, etc.", + }, + }, + }, + }, + }, +} |
