summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/chain.go416
-rw-r--r--tools/fs.go755
-rw-r--r--tools/pw.go645
-rw-r--r--tools/tools.go1909
4 files changed, 3725 insertions, 0 deletions
diff --git a/tools/chain.go b/tools/chain.go
new file mode 100644
index 0000000..381cc1a
--- /dev/null
+++ b/tools/chain.go
@@ -0,0 +1,416 @@
+package tools
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+)
+
+// Operator represents a chain operator between commands.
+type Operator int
+
+const (
+ OpNone Operator = iota
+ OpAnd // &&
+ OpOr // ||
+ OpSeq // ;
+ OpPipe // |
+)
+
+// Segment is a single command in a chain.
+type Segment struct {
+ Raw string
+ Op Operator // operator AFTER this segment
+}
+
+// ParseChain splits a command string into segments by &&, ;, and |.
+// Respects quoted strings (single and double quotes).
+func ParseChain(input string) []Segment {
+ var segments []Segment
+ var current strings.Builder
+ runes := []rune(input)
+ n := len(runes)
+ for i := 0; i < n; i++ {
+ ch := runes[i]
+ // handle quotes
+ if ch == '\'' || ch == '"' {
+ quote := ch
+ current.WriteRune(ch)
+ i++
+ for i < n && runes[i] != quote {
+ current.WriteRune(runes[i])
+ i++
+ }
+ if i < n {
+ current.WriteRune(runes[i])
+ }
+ continue
+ }
+ // &&
+ if ch == '&' && i+1 < n && runes[i+1] == '&' {
+ segments = append(segments, Segment{
+ Raw: strings.TrimSpace(current.String()),
+ Op: OpAnd,
+ })
+ current.Reset()
+ i++ // skip second &
+ continue
+ }
+ // ;
+ if ch == ';' {
+ segments = append(segments, Segment{
+ Raw: strings.TrimSpace(current.String()),
+ Op: OpSeq,
+ })
+ current.Reset()
+ continue
+ }
+ // ||
+ if ch == '|' && i+1 < n && runes[i+1] == '|' {
+ segments = append(segments, Segment{
+ Raw: strings.TrimSpace(current.String()),
+ Op: OpOr,
+ })
+ current.Reset()
+ i++ // skip second |
+ continue
+ }
+ // | (single pipe)
+ if ch == '|' {
+ segments = append(segments, Segment{
+ Raw: strings.TrimSpace(current.String()),
+ Op: OpPipe,
+ })
+ current.Reset()
+ continue
+ }
+ current.WriteRune(ch)
+ }
+ // last segment
+ last := strings.TrimSpace(current.String())
+ if last != "" {
+ segments = append(segments, Segment{Raw: last, Op: OpNone})
+ }
+ return segments
+}
+
+// ExecChain executes a command string with pipe/chaining support.
+// Returns the combined output of all commands.
+func ExecChain(command string) string {
+ segments := ParseChain(command)
+ if len(segments) == 0 {
+ return "[error] empty command"
+ }
+ var collected []string
+ var lastOutput string
+ var lastErr error
+ pipeInput := ""
+ for i, seg := range segments {
+ if i > 0 {
+ prevOp := segments[i-1].Op
+ // && semantics: skip if previous failed
+ if prevOp == OpAnd && lastErr != nil {
+ continue
+ }
+ // || semantics: skip if previous succeeded
+ if prevOp == OpOr && lastErr == nil {
+ continue
+ }
+ }
+ // determine stdin for this segment
+ segStdin := ""
+ if i == 0 {
+ segStdin = pipeInput
+ } else if segments[i-1].Op == OpPipe {
+ segStdin = lastOutput
+ }
+ lastOutput, lastErr = execSingle(seg.Raw, segStdin)
+ // pipe: output flows to next command's stdin
+ // && or ;: collect output
+ if i < len(segments)-1 && seg.Op == OpPipe {
+ continue
+ }
+ if lastOutput != "" {
+ collected = append(collected, lastOutput)
+ }
+ }
+ return strings.Join(collected, "\n")
+}
+
+// execSingle executes a single command (with arguments) and returns output and error.
+func execSingle(command, stdin string) (string, error) {
+ parts := tokenize(command)
+ if len(parts) == 0 {
+ return "", errors.New("empty command")
+ }
+ name := parts[0]
+ args := parts[1:]
+ // Check if it's a built-in Go command
+ if result, isBuiltin := execBuiltin(name, args, stdin); isBuiltin {
+ return result, nil
+ }
+ // Otherwise execute as system command
+ cmd := exec.Command(name, args...)
+ if stdin != "" {
+ cmd.Stdin = strings.NewReader(stdin)
+ }
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ return string(output), err
+ }
+ return string(output), nil
+}
+
+// tokenize splits a command string by whitespace, respecting quotes.
+func tokenize(input string) []string {
+ var tokens []string
+ var current strings.Builder
+ inQuote := false
+ var quoteChar rune
+ for _, ch := range input {
+ if inQuote {
+ if ch == quoteChar {
+ inQuote = false
+ } else {
+ current.WriteRune(ch)
+ }
+ continue
+ }
+ if ch == '\'' || ch == '"' {
+ inQuote = true
+ quoteChar = ch
+ continue
+ }
+ if ch == ' ' || ch == '\t' {
+ if current.Len() > 0 {
+ tokens = append(tokens, current.String())
+ current.Reset()
+ }
+ continue
+ }
+ current.WriteRune(ch)
+ }
+ if current.Len() > 0 {
+ tokens = append(tokens, current.String())
+ }
+ return tokens
+}
+
+// execBuiltin executes a built-in command if it exists.
+// Returns (result, true) if it was a built-in (even if result is empty).
+// Returns ("", false) if it's not a built-in command.
+func execBuiltin(name string, args []string, stdin string) (string, bool) {
+ switch name {
+ case "echo":
+ if stdin != "" {
+ return stdin, true
+ }
+ return strings.Join(args, " "), true
+ case "time":
+ return "2006-01-02 15:04:05 MST", true
+ case "cat":
+ if len(args) == 0 {
+ if stdin != "" {
+ return stdin, true
+ }
+ return "", true
+ }
+ path := args[0]
+ abs := path
+ if !filepath.IsAbs(path) {
+ abs = filepath.Join(cfg.FilePickerDir, path)
+ }
+ data, err := os.ReadFile(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] cat: %v", err), true
+ }
+ return string(data), true
+ case "pwd":
+ return cfg.FilePickerDir, true
+ case "cd":
+ if len(args) == 0 {
+ return "[error] usage: cd <dir>", true
+ }
+ dir := args[0]
+ // Resolve relative to cfg.FilePickerDir
+ abs := dir
+ if !filepath.IsAbs(dir) {
+ abs = filepath.Join(cfg.FilePickerDir, dir)
+ }
+ abs = filepath.Clean(abs)
+ info, err := os.Stat(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] cd: %v", err), true
+ }
+ if !info.IsDir() {
+ return "[error] cd: not a directory: " + dir, true
+ }
+ cfg.FilePickerDir = abs
+ return "Changed directory to: " + cfg.FilePickerDir, true
+ case "mkdir":
+ if len(args) == 0 {
+ return "[error] usage: mkdir [-p] <dir>", true
+ }
+ createParents := false
+ var dirPath string
+ for _, a := range args {
+ if a == "-p" || a == "--parents" {
+ createParents = true
+ } else if dirPath == "" {
+ dirPath = a
+ }
+ }
+ if dirPath == "" {
+ return "[error] usage: mkdir [-p] <dir>", true
+ }
+ abs := dirPath
+ if !filepath.IsAbs(dirPath) {
+ abs = filepath.Join(cfg.FilePickerDir, dirPath)
+ }
+ abs = filepath.Clean(abs)
+ var mkdirFunc func(string, os.FileMode) error
+ if createParents {
+ mkdirFunc = os.MkdirAll
+ } else {
+ mkdirFunc = os.Mkdir
+ }
+ if err := mkdirFunc(abs, 0o755); err != nil {
+ return fmt.Sprintf("[error] mkdir: %v", err), true
+ }
+ if createParents {
+ return "Created " + dirPath + " (with parents)", true
+ }
+ return "Created " + dirPath, true
+ case "ls":
+ dir := "."
+ for _, a := range args {
+ if !strings.HasPrefix(a, "-") {
+ dir = a
+ break
+ }
+ }
+ abs := dir
+ if !filepath.IsAbs(dir) {
+ abs = filepath.Join(cfg.FilePickerDir, dir)
+ }
+ entries, err := os.ReadDir(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] ls: %v", err), true
+ }
+ var out strings.Builder
+ for _, e := range entries {
+ info, _ := e.Info()
+ switch {
+ case e.IsDir():
+ fmt.Fprintf(&out, "d %-8s %s/\n", "-", e.Name())
+ case info != nil:
+ size := info.Size()
+ sizeStr := strconv.FormatInt(size, 10)
+ if size > 1024 {
+ sizeStr = fmt.Sprintf("%.1fKB", float64(size)/1024)
+ }
+ fmt.Fprintf(&out, "f %-8s %s\n", sizeStr, e.Name())
+ default:
+ fmt.Fprintf(&out, "f %-8s %s\n", "?", e.Name())
+ }
+ }
+ if out.Len() == 0 {
+ return "(empty directory)", true
+ }
+ return strings.TrimRight(out.String(), "\n"), true
+ case "go":
+ // Allow all go subcommands
+ if len(args) == 0 {
+ return "[error] usage: go <subcommand> [options]", true
+ }
+ cmd := exec.Command("go", args...)
+ cmd.Dir = cfg.FilePickerDir
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ return fmt.Sprintf("[error] go %s: %v\n%s", args[0], err, string(output)), true
+ }
+ return string(output), true
+ case "cp":
+ if len(args) < 2 {
+ return "[error] usage: cp <source> <dest>", true
+ }
+ src := args[0]
+ dst := args[1]
+ if !filepath.IsAbs(src) {
+ src = filepath.Join(cfg.FilePickerDir, src)
+ }
+ if !filepath.IsAbs(dst) {
+ dst = filepath.Join(cfg.FilePickerDir, dst)
+ }
+ data, err := os.ReadFile(src)
+ if err != nil {
+ return fmt.Sprintf("[error] cp: %v", err), true
+ }
+ err = os.WriteFile(dst, data, 0644)
+ if err != nil {
+ return fmt.Sprintf("[error] cp: %v", err), true
+ }
+ return "Copied " + src + " to " + dst, true
+ case "mv":
+ if len(args) < 2 {
+ return "[error] usage: mv <source> <dest>", true
+ }
+ src := args[0]
+ dst := args[1]
+ if !filepath.IsAbs(src) {
+ src = filepath.Join(cfg.FilePickerDir, src)
+ }
+ if !filepath.IsAbs(dst) {
+ dst = filepath.Join(cfg.FilePickerDir, dst)
+ }
+ err := os.Rename(src, dst)
+ if err != nil {
+ return fmt.Sprintf("[error] mv: %v", err), true
+ }
+ return "Moved " + src + " to " + dst, true
+ case "rm":
+ if len(args) == 0 {
+ return "[error] usage: rm [-r] <file>", true
+ }
+ recursive := false
+ var target string
+ for _, a := range args {
+ if a == "-r" || a == "-rf" || a == "-fr" || a == "-recursive" {
+ recursive = true
+ } else if target == "" {
+ target = a
+ }
+ }
+ if target == "" {
+ return "[error] usage: rm [-r] <file>", true
+ }
+ abs := target
+ if !filepath.IsAbs(target) {
+ abs = filepath.Join(cfg.FilePickerDir, target)
+ }
+ info, err := os.Stat(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] rm: %v", err), true
+ }
+ if info.IsDir() {
+ if recursive {
+ err = os.RemoveAll(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] rm: %v", err), true
+ }
+ return "Removed " + abs, true
+ }
+ return "[error] rm: is a directory (use -r)", true
+ }
+ err = os.Remove(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] rm: %v", err), true
+ }
+ return "Removed " + abs, true
+ }
+ return "", false
+}
diff --git a/tools/fs.go b/tools/fs.go
new file mode 100644
index 0000000..fb43084
--- /dev/null
+++ b/tools/fs.go
@@ -0,0 +1,755 @@
+package tools
+
+import (
+ "encoding/base64"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "gf-lt/models"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+)
+
+var memoryStore MemoryStore
+var agentRole string
+
+type MemoryStore interface {
+ Memorise(agent, topic, data string) (string, error)
+ Recall(agent, topic string) (string, error)
+ RecallTopics(agent string) ([]string, error)
+ Forget(agent, topic string) error
+}
+
+func SetMemoryStore(store MemoryStore, role string) {
+ memoryStore = store
+ agentRole = role
+}
+
+func SetFSRoot(dir string) {
+ if cfg == nil {
+ return
+ }
+ cfg.FilePickerDir = dir
+}
+
+func GetFSRoot() string {
+ return cfg.FilePickerDir
+}
+
+func SetFSCwd(dir string) error {
+ abs, err := filepath.Abs(dir)
+ if err != nil {
+ return err
+ }
+ info, err := os.Stat(abs)
+ if err != nil {
+ return err
+ }
+ if !info.IsDir() {
+ return fmt.Errorf("not a directory: %s", dir)
+ }
+ cfg.FilePickerDir = abs
+ return nil
+}
+
+func resolvePath(rel string) (string, error) {
+ if cfg.FilePickerDir == "" {
+ return "", errors.New("fs root not set")
+ }
+ if filepath.IsAbs(rel) {
+ abs := filepath.Clean(rel)
+ if !strings.HasPrefix(abs, cfg.FilePickerDir+string(os.PathSeparator)) && abs != cfg.FilePickerDir {
+ return "", fmt.Errorf("path escapes fs root: %s", rel)
+ }
+ return abs, nil
+ }
+ abs := filepath.Join(cfg.FilePickerDir, rel)
+ abs = filepath.Clean(abs)
+ if !strings.HasPrefix(abs, cfg.FilePickerDir+string(os.PathSeparator)) && abs != cfg.FilePickerDir {
+ return "", fmt.Errorf("path escapes fs root: %s", rel)
+ }
+ return abs, nil
+}
+
+func humanSize(n int64) string {
+ switch {
+ case n >= 1<<20:
+ return fmt.Sprintf("%.1fMB", float64(n)/float64(1<<20))
+ case n >= 1<<10:
+ return fmt.Sprintf("%.1fKB", float64(n)/float64(1<<10))
+ default:
+ return fmt.Sprintf("%dB", n)
+ }
+}
+
+func IsImageFile(path string) bool {
+ ext := strings.ToLower(filepath.Ext(path))
+ return ext == ".png" || ext == ".jpg" || ext == ".jpeg" || ext == ".gif" || ext == ".webp" || ext == ".svg"
+}
+
+func FsLs(args []string, stdin string) string {
+ dir := ""
+ if len(args) > 0 {
+ dir = args[0]
+ }
+ abs, err := resolvePath(dir)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ entries, err := os.ReadDir(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] ls: %v", err)
+ }
+ var out strings.Builder
+ for _, e := range entries {
+ info, _ := e.Info()
+ switch {
+ case e.IsDir():
+ fmt.Fprintf(&out, "d %-8s %s/\n", "-", e.Name())
+ case info != nil:
+ fmt.Fprintf(&out, "f %-8s %s\n", humanSize(info.Size()), e.Name())
+ default:
+ fmt.Fprintf(&out, "f %-8s %s\n", "?", e.Name())
+ }
+ }
+ if out.Len() == 0 {
+ return "(empty directory)"
+ }
+ return strings.TrimRight(out.String(), "\n")
+}
+
+func FsCat(args []string, stdin string) string {
+ b64 := false
+ var path string
+ for _, a := range args {
+ if a == "-b" || a == "--base64" {
+ b64 = true
+ } else if path == "" {
+ path = a
+ }
+ }
+ if path == "" {
+ return "[error] usage: cat <path>"
+ }
+ abs, err := resolvePath(path)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ data, err := os.ReadFile(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] cat: %v", err)
+ }
+ if b64 {
+ result := base64.StdEncoding.EncodeToString(data)
+ if IsImageFile(path) {
+ result += fmt.Sprintf("\n![image](file://%s)", abs)
+ }
+ return result
+ }
+ return string(data)
+}
+
+func FsViewImg(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: view_img <image-path>"
+ }
+ path := args[0]
+ var abs string
+ if filepath.IsAbs(path) {
+ abs = path
+ } else {
+ var err error
+ abs, err = resolvePath(path)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ }
+ if _, err := os.Stat(abs); err != nil {
+ return fmt.Sprintf("[error] view_img: %v", err)
+ }
+ if !IsImageFile(path) {
+ return fmt.Sprintf("[error] not an image file: %s (use cat to read text files)", path)
+ }
+ dataURL, err := models.CreateImageURLFromPath(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] view_img: %v", err)
+ }
+ result := models.MultimodalToolResp{
+ Type: "multimodal_content",
+ Parts: []map[string]string{
+ {"type": "text", "text": "Image: " + path},
+ {"type": "image_url", "url": dataURL},
+ },
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ return fmt.Sprintf("[error] view_img: %v", err)
+ }
+ return string(jsonResult)
+}
+
+// FsSee is deprecated, use FsViewImg
+func FsSee(args []string, stdin string) string {
+ return FsViewImg(args, stdin)
+}
+
+func FsWrite(args []string, stdin string) string {
+ b64 := false
+ var path string
+ var contentParts []string
+ for _, a := range args {
+ switch a {
+ case "-b", "--base64":
+ b64 = true
+ default:
+ if path == "" {
+ path = a
+ } else {
+ contentParts = append(contentParts, a)
+ }
+ }
+ }
+ if path == "" {
+ return "[error] usage: write <path> [content] or pipe stdin"
+ }
+ abs, err := resolvePath(path)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
+ return fmt.Sprintf("[error] mkdir: %v", err)
+ }
+ var data []byte
+ if b64 {
+ src := stdin
+ if src == "" && len(contentParts) > 0 {
+ src = strings.Join(contentParts, " ")
+ }
+ src = strings.TrimSpace(src)
+ var err error
+ data, err = base64.StdEncoding.DecodeString(src)
+ if err != nil {
+ return fmt.Sprintf("[error] base64 decode: %v", err)
+ }
+ } else {
+ if len(contentParts) > 0 {
+ data = []byte(strings.Join(contentParts, " "))
+ } else {
+ data = []byte(stdin)
+ }
+ }
+ if err := os.WriteFile(abs, data, 0o644); err != nil {
+ return fmt.Sprintf("[error] write: %v", err)
+ }
+ size := humanSize(int64(len(data)))
+ result := fmt.Sprintf("Written %s → %s", size, path)
+ if IsImageFile(path) {
+ result += fmt.Sprintf("\n![image](file://%s)", abs)
+ }
+ return result
+}
+
+func FsStat(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: stat <path>"
+ }
+ abs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ info, err := os.Stat(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] stat: %v", err)
+ }
+ mime := "application/octet-stream"
+ if IsImageFile(args[0]) {
+ ext := strings.ToLower(filepath.Ext(args[0]))
+ switch ext {
+ case ".png":
+ mime = "image/png"
+ case ".jpg", ".jpeg":
+ mime = "image/jpeg"
+ case ".gif":
+ mime = "image/gif"
+ case ".webp":
+ mime = "image/webp"
+ case ".svg":
+ mime = "image/svg+xml"
+ }
+ }
+ var out strings.Builder
+ fmt.Fprintf(&out, "File: %s\n", args[0])
+ fmt.Fprintf(&out, "Size: %s (%d bytes)\n", humanSize(info.Size()), info.Size())
+ fmt.Fprintf(&out, "Type: %s\n", mime)
+ fmt.Fprintf(&out, "Modified: %s\n", info.ModTime().Format(time.RFC3339))
+ if info.IsDir() {
+ fmt.Fprintf(&out, "Kind: directory\n")
+ }
+ return strings.TrimRight(out.String(), "\n")
+}
+
+func FsRm(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: rm <path>"
+ }
+ abs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ if err := os.RemoveAll(abs); err != nil {
+ return fmt.Sprintf("[error] rm: %v", err)
+ }
+ return "Removed " + args[0]
+}
+
+func FsCp(args []string, stdin string) string {
+ if len(args) < 2 {
+ return "[error] usage: cp <src> <dst>"
+ }
+ srcAbs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ dstAbs, err := resolvePath(args[1])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ data, err := os.ReadFile(srcAbs)
+ if err != nil {
+ return fmt.Sprintf("[error] cp read: %v", err)
+ }
+ if err := os.MkdirAll(filepath.Dir(dstAbs), 0o755); err != nil {
+ return fmt.Sprintf("[error] cp mkdir: %v", err)
+ }
+ if err := os.WriteFile(dstAbs, data, 0o644); err != nil {
+ return fmt.Sprintf("[error] cp write: %v", err)
+ }
+ return fmt.Sprintf("Copied %s → %s (%s)", args[0], args[1], humanSize(int64(len(data))))
+}
+
+func FsMv(args []string, stdin string) string {
+ if len(args) < 2 {
+ return "[error] usage: mv <src> <dst>"
+ }
+ srcAbs, err := resolvePath(args[0])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ dstAbs, err := resolvePath(args[1])
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ if err := os.MkdirAll(filepath.Dir(dstAbs), 0o755); err != nil {
+ return fmt.Sprintf("[error] mv mkdir: %v", err)
+ }
+ if err := os.Rename(srcAbs, dstAbs); err != nil {
+ return fmt.Sprintf("[error] mv: %v", err)
+ }
+ return fmt.Sprintf("Moved %s → %s", args[0], args[1])
+}
+
+func FsMkdir(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: mkdir [-p] <dir>"
+ }
+ createParents := false
+ var dirPath string
+ for _, a := range args {
+ if a == "-p" || a == "--parents" {
+ createParents = true
+ } else if dirPath == "" {
+ dirPath = a
+ }
+ }
+ if dirPath == "" {
+ return "[error] usage: mkdir [-p] <dir>"
+ }
+ abs, err := resolvePath(dirPath)
+ if err != nil {
+ return fmt.Sprintf("[error] %v", err)
+ }
+ var mkdirFunc func(string, os.FileMode) error
+ if createParents {
+ mkdirFunc = os.MkdirAll
+ } else {
+ mkdirFunc = os.Mkdir
+ }
+ if err := mkdirFunc(abs, 0o755); err != nil {
+ return fmt.Sprintf("[error] mkdir: %v", err)
+ }
+ if createParents {
+ return "Created " + dirPath + " (with parents)"
+ }
+ return "Created " + dirPath
+}
+
+// Text processing commands
+
+func FsEcho(args []string, stdin string) string {
+ if stdin != "" {
+ return stdin
+ }
+ return strings.Join(args, " ")
+}
+
+func FsTime(args []string, stdin string) string {
+ return time.Now().Format("2006-01-02 15:04:05 MST")
+}
+
+func FsGrep(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: grep [-i] [-v] [-c] <pattern>"
+ }
+ ignoreCase := false
+ invert := false
+ countOnly := false
+ var pattern string
+ for _, a := range args {
+ switch a {
+ case "-i":
+ ignoreCase = true
+ case "-v":
+ invert = true
+ case "-c":
+ countOnly = true
+ default:
+ pattern = a
+ }
+ }
+ if pattern == "" {
+ return "[error] pattern required"
+ }
+ if ignoreCase {
+ pattern = strings.ToLower(pattern)
+ }
+ lines := strings.Split(stdin, "\n")
+ var matched []string
+ for _, line := range lines {
+ haystack := line
+ if ignoreCase {
+ haystack = strings.ToLower(line)
+ }
+ match := strings.Contains(haystack, pattern)
+ if invert {
+ match = !match
+ }
+ if match {
+ matched = append(matched, line)
+ }
+ }
+ if countOnly {
+ return strconv.Itoa(len(matched))
+ }
+ return strings.Join(matched, "\n")
+}
+
+func FsHead(args []string, stdin string) string {
+ n := 10
+ for i, a := range args {
+ if a == "-n" && i+1 < len(args) {
+ if parsed, err := strconv.Atoi(args[i+1]); err == nil {
+ n = parsed
+ }
+ } else if strings.HasPrefix(a, "-") {
+ continue
+ } else if parsed, err := strconv.Atoi(a); err == nil {
+ n = parsed
+ }
+ }
+ lines := strings.Split(stdin, "\n")
+ if n > 0 && len(lines) > n {
+ lines = lines[:n]
+ }
+ return strings.Join(lines, "\n")
+}
+
+func FsTail(args []string, stdin string) string {
+ n := 10
+ for i, a := range args {
+ if a == "-n" && i+1 < len(args) {
+ if parsed, err := strconv.Atoi(args[i+1]); err == nil {
+ n = parsed
+ }
+ } else if strings.HasPrefix(a, "-") {
+ continue
+ } else if parsed, err := strconv.Atoi(a); err == nil {
+ n = parsed
+ }
+ }
+ lines := strings.Split(stdin, "\n")
+ if n > 0 && len(lines) > n {
+ lines = lines[len(lines)-n:]
+ }
+ return strings.Join(lines, "\n")
+}
+
+func FsWc(args []string, stdin string) string {
+ lines := len(strings.Split(stdin, "\n"))
+ words := len(strings.Fields(stdin))
+ chars := len(stdin)
+ if len(args) > 0 {
+ switch args[0] {
+ case "-l":
+ return strconv.Itoa(lines)
+ case "-w":
+ return strconv.Itoa(words)
+ case "-c":
+ return strconv.Itoa(chars)
+ }
+ }
+ return fmt.Sprintf("%d lines, %d words, %d chars", lines, words, chars)
+}
+
+func FsSort(args []string, stdin string) string {
+ lines := strings.Split(stdin, "\n")
+ reverse := false
+ numeric := false
+ for _, a := range args {
+ switch a {
+ case "-r":
+ reverse = true
+ case "-n":
+ numeric = true
+ }
+ }
+ sortFunc := func(i, j int) bool {
+ if numeric {
+ ni, _ := strconv.Atoi(lines[i])
+ nj, _ := strconv.Atoi(lines[j])
+ if reverse {
+ return ni > nj
+ }
+ return ni < nj
+ }
+ if reverse {
+ return lines[i] > lines[j]
+ }
+ return lines[i] < lines[j]
+ }
+ sort.Slice(lines, sortFunc)
+ return strings.Join(lines, "\n")
+}
+
+func FsUniq(args []string, stdin string) string {
+ lines := strings.Split(stdin, "\n")
+ showCount := false
+ for _, a := range args {
+ if a == "-c" {
+ showCount = true
+ }
+ }
+ var result []string
+ var prev string
+ first := true
+ count := 0
+ for _, line := range lines {
+ if first || line != prev {
+ if !first && showCount {
+ result = append(result, fmt.Sprintf("%d %s", count, prev))
+ } else if !first {
+ result = append(result, prev)
+ }
+ count = 1
+ prev = line
+ first = false
+ } else {
+ count++
+ }
+ }
+ if !first {
+ if showCount {
+ result = append(result, fmt.Sprintf("%d %s", count, prev))
+ } else {
+ result = append(result, prev)
+ }
+ }
+ return strings.Join(result, "\n")
+}
+
+var allowedGitSubcommands = map[string]bool{
+ "status": true,
+ "log": true,
+ "diff": true,
+ "show": true,
+ "branch": true,
+ "reflog": true,
+ "rev-parse": true,
+ "shortlog": true,
+ "describe": true,
+ "rev-list": true,
+}
+
+func FsGit(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: git <subcommand> [options]"
+ }
+ subcmd := args[0]
+ if !allowedGitSubcommands[subcmd] {
+ return fmt.Sprintf("[error] git: '%s' is not an allowed git command. Allowed: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe, rev-list", subcmd)
+ }
+ abs, err := resolvePath(".")
+ if err != nil {
+ return fmt.Sprintf("[error] git: %v", err)
+ }
+ // Pass all args to git (first arg is subcommand, rest are options)
+ cmd := exec.Command("git", args...)
+ cmd.Dir = abs
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ return fmt.Sprintf("[error] git %s: %v\n%s", subcmd, err, string(output))
+ }
+ return string(output)
+}
+
+func FsPwd(args []string, stdin string) string {
+ return cfg.FilePickerDir
+}
+
+func FsCd(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: cd <dir>"
+ }
+ dir := args[0]
+ abs, err := resolvePath(dir)
+ if err != nil {
+ return fmt.Sprintf("[error] cd: %v", err)
+ }
+ info, err := os.Stat(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] cd: %v", err)
+ }
+ if !info.IsDir() {
+ return "[error] cd: not a directory: " + dir
+ }
+ cfg.FilePickerDir = abs
+ return "Changed directory to: " + cfg.FilePickerDir
+}
+
+func FsSed(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: sed 's/old/new/[g]' [file]"
+ }
+ inPlace := false
+ var filePath string
+ var pattern string
+ for _, a := range args {
+ switch a {
+ case "-i", "--in-place":
+ inPlace = true
+ default:
+ if strings.HasPrefix(a, "s") && len(a) > 1 {
+ pattern = a
+ } else if filePath == "" && !strings.HasPrefix(a, "-") {
+ filePath = a
+ }
+ }
+ }
+ if pattern == "" {
+ return "[error] usage: sed 's/old/new/[g]' [file]"
+ }
+ // Parse pattern: s/old/new/flags
+ parts := strings.Split(pattern[1:], "/")
+ if len(parts) < 2 {
+ return "[error] invalid sed pattern. Use: s/old/new/[g]"
+ }
+ oldStr := parts[0]
+ newStr := parts[1]
+ global := len(parts) >= 3 && strings.Contains(parts[2], "g")
+ var content string
+ switch {
+ case filePath != "" && stdin == "":
+ abs, err := resolvePath(filePath)
+ if err != nil {
+ return fmt.Sprintf("[error] sed: %v", err)
+ }
+ data, err := os.ReadFile(abs)
+ if err != nil {
+ return fmt.Sprintf("[error] sed: %v", err)
+ }
+ content = string(data)
+ case stdin != "":
+ content = stdin
+ default:
+ return "[error] sed: no input (use file path or pipe from stdin)"
+ }
+ // Apply sed replacement
+ if global {
+ content = strings.ReplaceAll(content, oldStr, newStr)
+ } else {
+ content = strings.Replace(content, oldStr, newStr, 1)
+ }
+ if inPlace && filePath != "" {
+ abs, err := resolvePath(filePath)
+ if err != nil {
+ return fmt.Sprintf("[error] sed: %v", err)
+ }
+ if err := os.WriteFile(abs, []byte(content), 0644); err != nil {
+ return fmt.Sprintf("[error] sed: %v", err)
+ }
+ return "Modified " + filePath
+ }
+ return content
+}
+
+func FsMemory(args []string, stdin string) string {
+ if len(args) == 0 {
+ return "[error] usage: memory store <topic> <data> | memory get <topic> | memory list | memory forget <topic>"
+ }
+ if memoryStore == nil {
+ return "[error] memory store not initialized"
+ }
+ switch args[0] {
+ case "store":
+ if len(args) < 3 && stdin == "" {
+ return "[error] usage: memory store <topic> <data>"
+ }
+ topic := args[1]
+ var data string
+ if len(args) >= 3 {
+ data = strings.Join(args[2:], " ")
+ } else {
+ data = stdin
+ }
+ _, err := memoryStore.Memorise(agentRole, topic, data)
+ if err != nil {
+ return fmt.Sprintf("[error] failed to store: %v", err)
+ }
+ return "Stored under topic: " + topic
+ case "get":
+ if len(args) < 2 {
+ return "[error] usage: memory get <topic>"
+ }
+ topic := args[1]
+ data, err := memoryStore.Recall(agentRole, topic)
+ if err != nil {
+ return fmt.Sprintf("[error] failed to recall: %v", err)
+ }
+ return fmt.Sprintf("Topic: %s\n%s", topic, data)
+ case "list", "topics":
+ topics, err := memoryStore.RecallTopics(agentRole)
+ if err != nil {
+ return fmt.Sprintf("[error] failed to list topics: %v", err)
+ }
+ if len(topics) == 0 {
+ return "No topics stored."
+ }
+ return "Topics: " + strings.Join(topics, ", ")
+ case "forget", "delete":
+ if len(args) < 2 {
+ return "[error] usage: memory forget <topic>"
+ }
+ topic := args[1]
+ err := memoryStore.Forget(agentRole, topic)
+ if err != nil {
+ return fmt.Sprintf("[error] failed to forget: %v", err)
+ }
+ return "Deleted topic: " + topic
+ default:
+ return fmt.Sprintf("[error] unknown subcommand: %s. Use: store, get, list, topics, forget, delete", args[0])
+ }
+}
diff --git a/tools/pw.go b/tools/pw.go
new file mode 100644
index 0000000..05b1390
--- /dev/null
+++ b/tools/pw.go
@@ -0,0 +1,645 @@
+package tools
+
+import (
+ "encoding/json"
+ "fmt"
+ "gf-lt/models"
+ "os"
+ "strconv"
+ "strings"
+ "sync"
+
+ "github.com/playwright-community/playwright-go"
+)
+
+var (
+ pw *playwright.Playwright
+ browser playwright.Browser
+ browserStarted bool
+ browserStartMu sync.Mutex
+ page playwright.Page
+)
+
+func PwShutDown() error {
+ if pw == nil {
+ return nil
+ }
+ pwStop(nil)
+ return pw.Stop()
+}
+
+func InstallPW() error {
+ err := playwright.Install(&playwright.RunOptions{Verbose: false})
+ if err != nil {
+ logger.Warn("playwright not available", "error", err)
+ return err
+ }
+ return nil
+}
+
+func CheckPlaywright() error {
+ var err error
+ pw, err = playwright.Run()
+ if err != nil {
+ logger.Warn("playwright not available", "error", err)
+ return err
+ }
+ return nil
+}
+
+func pwStart(args map[string]string) []byte {
+ browserStartMu.Lock()
+ defer browserStartMu.Unlock()
+ if browserStarted {
+ return []byte(`{"error": "Browser already started"}`)
+ }
+ var err error
+ browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
+ Headless: playwright.Bool(!cfg.PlaywrightDebug),
+ })
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error()))
+ }
+ page, err = browser.NewPage()
+ if err != nil {
+ browser.Close()
+ return []byte(fmt.Sprintf(`{"error": "failed to create page: %s"}`, err.Error()))
+ }
+ browserStarted = true
+ return []byte(`{"success": true, "message": "Browser started"}`)
+}
+
+func pwStop(args map[string]string) []byte {
+ browserStartMu.Lock()
+ defer browserStartMu.Unlock()
+ if !browserStarted {
+ return []byte(`{"success": true, "message": "Browser was not running"}`)
+ }
+ if page != nil {
+ page.Close()
+ page = nil
+ }
+ if browser != nil {
+ browser.Close()
+ browser = nil
+ }
+ browserStarted = false
+ return []byte(`{"success": true, "message": "Browser stopped"}`)
+}
+
+func pwIsRunning(args map[string]string) []byte {
+ if browserStarted {
+ return []byte(`{"running": true, "message": "Browser is running"}`)
+ }
+ return []byte(`{"running": false, "message": "Browser is not running"}`)
+}
+
+func pwNavigate(args map[string]string) []byte {
+ url, ok := args["url"]
+ if !ok || url == "" {
+ return []byte(`{"error": "url not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ _, err := page.Goto(url)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to navigate: %s"}`, err.Error()))
+ }
+ title, _ := page.Title()
+ pageURL := page.URL()
+ return []byte(fmt.Sprintf(`{"success": true, "title": "%s", "url": "%s"}`, title, pageURL))
+}
+
+func pwClick(args map[string]string) []byte {
+ selector, ok := args["selector"]
+ if !ok || selector == "" {
+ return []byte(`{"error": "selector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ index := 0
+ if args["index"] != "" {
+ if i, err := strconv.Atoi(args["index"]); err != nil {
+ logger.Warn("failed to parse index", "value", args["index"], "error", err)
+ } else {
+ index = i
+ }
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if index >= count {
+ return []byte(fmt.Sprintf(`{"error": "Element not found at index %d (found %d elements)"}`, index, count))
+ }
+ err = locator.Nth(index).Click()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
+ }
+ return []byte(`{"success": true, "message": "Clicked element"}`)
+}
+
+func pwFill(args map[string]string) []byte {
+ selector, ok := args["selector"]
+ if !ok || selector == "" {
+ return []byte(`{"error": "selector not provided"}`)
+ }
+ text := args["text"]
+ if text == "" {
+ text = ""
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ index := 0
+ if args["index"] != "" {
+ if i, err := strconv.Atoi(args["index"]); err != nil {
+ logger.Warn("failed to parse index", "value", args["index"], "error", err)
+ } else {
+ index = i
+ }
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if index >= count {
+ return []byte(fmt.Sprintf(`{"error": "Element not found at index %d"}`, index))
+ }
+ err = locator.Nth(index).Fill(text)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to fill: %s"}`, err.Error()))
+ }
+ return []byte(`{"success": true, "message": "Filled input"}`)
+}
+
+func pwExtractText(args map[string]string) []byte {
+ selector := args["selector"]
+ if selector == "" {
+ selector = "body"
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"error": "No elements found"}`)
+ }
+ if selector == "body" {
+ text, err := page.Locator("body").TextContent()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get text: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"text": "%s"}`, text))
+ }
+ var texts []string
+ for i := 0; i < count; i++ {
+ text, err := locator.Nth(i).TextContent()
+ if err != nil {
+ continue
+ }
+ texts = append(texts, text)
+ }
+ return []byte(fmt.Sprintf(`{"text": "%s"}`, joinLines(texts)))
+}
+
+func joinLines(lines []string) string {
+ var sb strings.Builder
+ for i, line := range lines {
+ if i > 0 {
+ sb.WriteString("\n")
+ }
+ sb.WriteString(line)
+ }
+ return sb.String()
+}
+
+func pwScreenshot(args map[string]string) []byte {
+ selector := args["selector"]
+ fullPage := args["full_page"] == "true"
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
+ var err error
+ if selector != "" && selector != "body" {
+ locator := page.Locator(selector)
+ _, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
+ Path: playwright.String(path),
+ })
+ } else {
+ _, err = page.Screenshot(playwright.PageScreenshotOptions{
+ Path: playwright.String(path),
+ FullPage: playwright.Bool(fullPage),
+ })
+ }
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"path": "%s"}`, path))
+}
+
+func pwScreenshotAndView(args map[string]string) []byte {
+ selector := args["selector"]
+ fullPage := args["full_page"] == "true"
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
+ var err error
+ if selector != "" && selector != "body" {
+ locator := page.Locator(selector)
+ _, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
+ Path: playwright.String(path),
+ })
+ } else {
+ _, err = page.Screenshot(playwright.PageScreenshotOptions{
+ Path: playwright.String(path),
+ FullPage: playwright.Bool(fullPage),
+ })
+ }
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
+ }
+ dataURL, err := models.CreateImageURLFromPath(path)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to create image URL: %s"}`, err.Error()))
+ }
+ resp := models.MultimodalToolResp{
+ Type: "multimodal_content",
+ Parts: []map[string]string{
+ {"type": "text", "text": "Screenshot saved: " + path},
+ {"type": "image_url", "url": dataURL},
+ },
+ }
+ jsonResult, err := json.Marshal(resp)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to marshal result: %s"}`, err.Error()))
+ }
+ return jsonResult
+}
+
+func pwWaitForSelector(args map[string]string) []byte {
+ selector, ok := args["selector"]
+ if !ok || selector == "" {
+ return []byte(`{"error": "selector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ timeout := 30000
+ if args["timeout"] != "" {
+ if t, err := strconv.Atoi(args["timeout"]); err != nil {
+ logger.Warn("failed to parse timeout", "value", args["timeout"], "error", err)
+ } else {
+ timeout = t
+ }
+ }
+ locator := page.Locator(selector)
+ err := locator.WaitFor(playwright.LocatorWaitForOptions{
+ Timeout: playwright.Float(float64(timeout)),
+ })
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "element not found: %s"}`, err.Error()))
+ }
+ return []byte(`{"success": true, "message": "Element found"}`)
+}
+
+func pwDrag(args map[string]string) []byte {
+ x1, ok := args["x1"]
+ if !ok {
+ return []byte(`{"error": "x1 not provided"}`)
+ }
+ y1, ok := args["y1"]
+ if !ok {
+ return []byte(`{"error": "y1 not provided"}`)
+ }
+ x2, ok := args["x2"]
+ if !ok {
+ return []byte(`{"error": "x2 not provided"}`)
+ }
+ y2, ok := args["y2"]
+ if !ok {
+ return []byte(`{"error": "y2 not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ var fx1, fy1, fx2, fy2 float64
+ if parsedX1, err := strconv.ParseFloat(x1, 64); err != nil {
+ logger.Warn("failed to parse x1", "value", x1, "error", err)
+ } else {
+ fx1 = parsedX1
+ }
+ if parsedY1, err := strconv.ParseFloat(y1, 64); err != nil {
+ logger.Warn("failed to parse y1", "value", y1, "error", err)
+ } else {
+ fy1 = parsedY1
+ }
+ if parsedX2, err := strconv.ParseFloat(x2, 64); err != nil {
+ logger.Warn("failed to parse x2", "value", x2, "error", err)
+ } else {
+ fx2 = parsedX2
+ }
+ if parsedY2, err := strconv.ParseFloat(y2, 64); err != nil {
+ logger.Warn("failed to parse y2", "value", y2, "error", err)
+ } else {
+ fy2 = parsedY2
+ }
+ mouse := page.Mouse()
+ err := mouse.Move(fx1, fy1)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
+ }
+ err = mouse.Down()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error()))
+ }
+ err = mouse.Move(fx2, fy2)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
+ }
+ err = mouse.Up()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2))
+}
+
+func pwDragBySelector(args map[string]string) []byte {
+ fromSelector, ok := args["fromSelector"]
+ if !ok || fromSelector == "" {
+ return []byte(`{"error": "fromSelector not provided"}`)
+ }
+ toSelector, ok := args["toSelector"]
+ if !ok || toSelector == "" {
+ return []byte(`{"error": "toSelector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ fromJS := fmt.Sprintf(`
+ function getCenter(selector) {
+ const el = document.querySelector(selector);
+ if (!el) return null;
+ const rect = el.getBoundingClientRect();
+ return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
+ }
+ getCenter(%q)
+ `, fromSelector)
+ toJS := fmt.Sprintf(`
+ function getCenter(selector) {
+ const el = document.querySelector(selector);
+ if (!el) return null;
+ const rect = el.getBoundingClientRect();
+ return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
+ }
+ getCenter(%q)
+ `, toSelector)
+ fromResult, err := page.Evaluate(fromJS)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get from element: %s"}`, err.Error()))
+ }
+ fromMap, ok := fromResult.(map[string]interface{})
+ if !ok || fromMap == nil {
+ return []byte(fmt.Sprintf(`{"error": "from selector '%s' not found"}`, fromSelector))
+ }
+ fromX := fromMap["x"].(float64)
+ fromY := fromMap["y"].(float64)
+ toResult, err := page.Evaluate(toJS)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get to element: %s"}`, err.Error()))
+ }
+ toMap, ok := toResult.(map[string]interface{})
+ if !ok || toMap == nil {
+ return []byte(fmt.Sprintf(`{"error": "to selector '%s' not found"}`, toSelector))
+ }
+ toX := toMap["x"].(float64)
+ toY := toMap["y"].(float64)
+ mouse := page.Mouse()
+ err = mouse.Move(fromX, fromY)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
+ }
+ err = mouse.Down()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error()))
+ }
+ err = mouse.Move(toX, toY)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
+ }
+ err = mouse.Up()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error()))
+ }
+ msg := fmt.Sprintf("Dragged from %s (%.0f,%.0f) to %s (%.0f,%.0f)", fromSelector, fromX, fromY, toSelector, toX, toY)
+ return []byte(fmt.Sprintf(`{"success": true, "message": "%s"}`, msg))
+}
+
+// nolint:unused
+func pwClickAt(args map[string]string) []byte {
+ x, ok := args["x"]
+ if !ok {
+ return []byte(`{"error": "x not provided"}`)
+ }
+ y, ok := args["y"]
+ if !ok {
+ return []byte(`{"error": "y not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ fx, err := strconv.ParseFloat(x, 64)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to parse x: %s"}`, err.Error()))
+ }
+ fy, err := strconv.ParseFloat(y, 64)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to parse y: %s"}`, err.Error()))
+ }
+ mouse := page.Mouse()
+ err = mouse.Click(fx, fy)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"success": true, "message": "Clicked at (%s,%s)"}`, x, y))
+}
+
+func pwGetHTML(args map[string]string) []byte {
+ selector := args["selector"]
+ if selector == "" {
+ selector = "body"
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"error": "No elements found"}`)
+ }
+ html, err := locator.First().InnerHTML()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html)))
+}
+
+type DOMElement struct {
+ Tag string `json:"tag,omitempty"`
+ Attributes map[string]string `json:"attributes,omitempty"`
+ Text string `json:"text,omitempty"`
+ Children []DOMElement `json:"children,omitempty"`
+ Selector string `json:"selector,omitempty"`
+ InnerHTML string `json:"innerHTML,omitempty"`
+}
+
+func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) {
+ var results []DOMElement
+ count, err := locator.Count()
+ if err != nil {
+ return nil, err
+ }
+ for i := 0; i < count; i++ {
+ el := locator.Nth(i)
+ dom, err := elementToDOM(el)
+ if err != nil {
+ continue
+ }
+ results = append(results, dom)
+ }
+ return results, nil
+}
+
+func elementToDOM(el playwright.Locator) (DOMElement, error) {
+ dom := DOMElement{}
+ tag, err := el.Evaluate(`el => el.nodeName`, nil)
+ if err == nil {
+ dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag))
+ }
+ attributes := make(map[string]string)
+ attrs, err := el.Evaluate(`el => {
+ let attrs = {};
+ for (let i = 0; i < el.attributes.length; i++) {
+ let attr = el.attributes[i];
+ attrs[attr.name] = attr.value;
+ }
+ return attrs;
+ }`, nil)
+ if err == nil {
+ if amap, ok := attrs.(map[string]any); ok {
+ for k, v := range amap {
+ if vs, ok := v.(string); ok {
+ attributes[k] = vs
+ }
+ }
+ }
+ }
+ if len(attributes) > 0 {
+ dom.Attributes = attributes
+ }
+ text, err := el.TextContent()
+ if err == nil && text != "" {
+ dom.Text = text
+ }
+ innerHTML, err := el.InnerHTML()
+ if err == nil && innerHTML != "" {
+ dom.InnerHTML = innerHTML
+ }
+ childCount, _ := el.Count()
+ if childCount > 0 {
+ childrenLocator := el.Locator("*")
+ children, err := buildDOMTree(childrenLocator)
+ if err == nil && len(children) > 0 {
+ dom.Children = children
+ }
+ }
+ return dom, nil
+}
+
+func pwGetDOM(args map[string]string) []byte {
+ selector := args["selector"]
+ if selector == "" {
+ selector = "body"
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"error": "No elements found"}`)
+ }
+ dom, err := elementToDOM(locator.First())
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error()))
+ }
+ data, err := json.Marshal(dom)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"dom": %s}`, string(data)))
+}
+
+// nolint:unused
+func pwSearchElements(args map[string]string) []byte {
+ text := args["text"]
+ selector := args["selector"]
+ if text == "" && selector == "" {
+ return []byte(`{"error": "text or selector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ var locator playwright.Locator
+ if text != "" {
+ locator = page.GetByText(text)
+ } else {
+ locator = page.Locator(selector)
+ }
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"elements": []}`)
+ }
+ var results []map[string]string
+ for i := 0; i < count; i++ {
+ el := locator.Nth(i)
+ tag, _ := el.Evaluate(`el => el.nodeName`, nil)
+ text, _ := el.TextContent()
+ html, _ := el.InnerHTML()
+ results = append(results, map[string]string{
+ "index": strconv.Itoa(i),
+ "tag": strings.ToLower(fmt.Sprintf("%v", tag)),
+ "text": text,
+ "html": html,
+ })
+ }
+ data, err := json.Marshal(results)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"elements": %s}`, string(data)))
+}
+
+func jsonString(s string) string {
+ b, _ := json.Marshal(s)
+ return string(b)
+}
diff --git a/tools/tools.go b/tools/tools.go
new file mode 100644
index 0000000..967e5de
--- /dev/null
+++ b/tools/tools.go
@@ -0,0 +1,1909 @@
+package tools
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "gf-lt/agent"
+ "gf-lt/config"
+ "gf-lt/models"
+ "gf-lt/storage"
+ "log/slog"
+ "os"
+ "os/exec"
+ "regexp"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "gf-lt/rag"
+
+ "github.com/GrailFinder/searchagent/searcher"
+)
+
+var (
+ RpDefenitionSysMsg = `
+For this roleplay immersion is at most importance.
+Every character thinks and acts based on their personality and setting of the roleplay.
+Meta discussions outside of roleplay is allowed if clearly labeled as out of character, for example: (ooc: {msg}) or <ooc>{msg}</ooc>.
+`
+ ToolSysMsg = `You can do functions call if needed.
+Your current tools:
+<tools>
+[
+{
+"name":"run",
+"args": ["command"],
+"when_to_use": "Main tool for file operations, shell commands, memory, git, and todo. Use run \"help\" for all commands. Examples: run \"ls -la\", run \"help\", run \"mkdir -p foo/bar\", run \"cat file.txt\", run \"view_img image.png\", run \"git status\", run \"memory store foo bar\", run \"todo create task\", run \"grep pattern file\", run \"cd /path\", run \"pwd\", run \"find . -name *.txt\", run \"file image.png\", run \"head file\", run \"tail file\", run \"wc -l file\", run \"sort file\", run \"uniq file\", run \"sed 's/old/new/' file\", run \"echo text\", run \"go build ./...\", run \"time\", run \"stat file\", run \"cp src dst\", run \"mv src dst\", run \"rm file\""
+},
+{
+"name":"view_img",
+"args": ["file"],
+"when_to_use": "View an image file and get it displayed in the conversation for visual analysis. Supports: png, jpg, jpeg, gif, webp, svg. Example: view_img /path/to/image.png or view_img image.png"
+},
+{
+"name":"websearch",
+"args": ["query", "limit"],
+"when_to_use": "search the web for information"
+},
+{
+"name":"rag_search",
+"args": ["query", "limit"],
+"when_to_use": "search local document database"
+},
+{
+"name":"read_url",
+"args": ["url"],
+"when_to_use": "get content from a webpage"
+},
+{
+"name":"read_url_raw",
+"args": ["url"],
+"when_to_use": "get raw content from a webpage"
+}
+]
+</tools>
+To make a function call return a json object within __tool_call__ tags;
+<example_request>
+__tool_call__
+{
+"name":"run",
+"args": {"command": "ls -la /home"}
+}
+__tool_call__
+</example_request>
+<example_request>
+__tool_call__
+{
+"name":"view_img",
+"args": {"file": "screenshot.png"}
+}
+__tool_call__
+</example_request>
+Tool call is addressed to the tool agent, avoid sending more info than the tool call itself, while making a call.
+When done right, tool call will be delivered to the tool agent. tool agent will respond with the results of the call.
+<example_response>
+tool:
+total 1234
+drwxr-xr-x 2 user user 4096 Jan 1 12:00 .
+</example_response>
+After that you are free to respond to the user.
+`
+ webSearchSysPrompt = `Summarize the web search results, extracting key information and presenting a concise answer. Provide sources and URLs where relevant.`
+ ragSearchSysPrompt = `Synthesize the document search results, extracting key information and presenting a concise answer. Provide sources and document IDs where relevant.`
+ readURLSysPrompt = `Extract and summarize the content from the webpage. Provide key information, main points, and any relevant details.`
+ summarySysPrompt = `Please provide a concise summary of the following conversation. Focus on key points, decisions, and actions. Provide only the summary, no additional commentary.`
+)
+
+var WebSearcher searcher.WebSurfer
+
+var (
+ xdotoolPath string
+ maimPath string
+ logger *slog.Logger
+ cfg *config.Config
+ getTokenFunc func() string
+)
+
+type Tools struct {
+ cfg *config.Config
+ logger *slog.Logger
+ store storage.FullRepo
+ WindowToolsAvailable bool
+ // getTokenFunc func() string
+ webAgentClient *agent.AgentClient
+ webAgentClientOnce sync.Once
+ webSearchAgent agent.AgenterB
+}
+
+func (t *Tools) initAgentsB() {
+ t.GetWebAgentClient()
+ t.webSearchAgent = agent.NewWebAgentB(t.webAgentClient, webSearchSysPrompt)
+ agent.RegisterB("rag_search", agent.NewWebAgentB(t.webAgentClient, ragSearchSysPrompt))
+ // Register websearch agent
+ agent.RegisterB("websearch", agent.NewWebAgentB(t.webAgentClient, webSearchSysPrompt))
+ // Register read_url agent
+ agent.RegisterB("read_url", agent.NewWebAgentB(t.webAgentClient, readURLSysPrompt))
+ // Register summarize_chat agent
+ agent.RegisterB("summarize_chat", agent.NewWebAgentB(t.webAgentClient, summarySysPrompt))
+}
+
+func InitTools(initCfg *config.Config, logger *slog.Logger, store storage.FullRepo) *Tools {
+ logger = logger
+ cfg = initCfg
+ if initCfg.PlaywrightEnabled {
+ if err := CheckPlaywright(); err != nil {
+ // slow, need a faster check if playwright install
+ if err := InstallPW(); err != nil {
+ logger.Error("failed to install playwright", "error", err)
+ os.Exit(1)
+ return nil
+ }
+ if err := CheckPlaywright(); err != nil {
+ logger.Error("failed to run playwright", "error", err)
+ os.Exit(1)
+ return nil
+ }
+ }
+ }
+ // Initialize fs root directory
+ SetFSRoot(cfg.FilePickerDir)
+ // Initialize memory store
+ SetMemoryStore(&memoryAdapter{store: store, cfg: cfg}, cfg.AssistantRole)
+ sa, err := searcher.NewWebSurfer(searcher.SearcherTypeScraper, "")
+ if err != nil {
+ if logger != nil {
+ logger.Warn("search agent unavailable; web_search tool disabled", "error", err)
+ }
+ WebSearcher = nil
+ } else {
+ WebSearcher = sa
+ }
+ if err := rag.Init(cfg, logger, store); err != nil {
+ logger.Warn("failed to init rag; rag_search tool will not be available", "error", err)
+ }
+ t := &Tools{
+ cfg: cfg,
+ logger: logger,
+ store: store,
+ }
+ t.checkWindowTools()
+ t.initAgentsB()
+ return t
+}
+
+func (t *Tools) checkWindowTools() {
+ xdotoolPath, _ = exec.LookPath("xdotool")
+ maimPath, _ = exec.LookPath("maim")
+ t.WindowToolsAvailable = xdotoolPath != "" && maimPath != ""
+ if t.WindowToolsAvailable {
+ t.logger.Info("window tools available: xdotool and maim found")
+ } else {
+ if xdotoolPath == "" {
+ t.logger.Warn("xdotool not found, window listing tools will not be available")
+ }
+ if maimPath == "" {
+ t.logger.Warn("maim not found, window capture tools will not be available")
+ }
+ }
+}
+
+func SetTokenFunc(fn func() string) {
+ getTokenFunc = fn
+}
+
+func (t *Tools) GetWebAgentClient() *agent.AgentClient {
+ t.webAgentClientOnce.Do(func() {
+ getToken := func() string {
+ if getTokenFunc != nil {
+ return getTokenFunc()
+ }
+ return ""
+ }
+ t.webAgentClient = agent.NewAgentClient(cfg, logger, getToken)
+ })
+ return t.webAgentClient
+}
+
+func RegisterWindowTools(modelHasVision bool) {
+ removeWindowToolsFromBaseTools()
+ // Window tools registration happens here if needed
+}
+
+// func RegisterPlaywrightTools() {
+// removePlaywrightToolsFromBaseTools()
+// if cfg != nil && cfg.PlaywrightEnabled {
+// // Playwright tools are registered here
+// }
+// }
+
+func websearch(args map[string]string) []byte {
+ // make http request return bytes
+ query, ok := args["query"]
+ if !ok || query == "" {
+ msg := "query not provided to web_search tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ limitS, ok := args["limit"]
+ if !ok || limitS == "" {
+ limitS = "3"
+ }
+ limit, err := strconv.Atoi(limitS)
+ if err != nil || limit == 0 {
+ logger.Warn("websearch limit; passed bad value; setting to default (3)",
+ "limit_arg", limitS, "error", err)
+ limit = 3
+ }
+ resp, err := WebSearcher.Search(context.Background(), query, limit)
+ if err != nil {
+ msg := "search tool failed; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ data, err := json.Marshal(resp)
+ if err != nil {
+ msg := "failed to marshal search result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return data
+}
+
+// rag search (searches local document database)
+func ragsearch(args map[string]string) []byte {
+ query, ok := args["query"]
+ if !ok || query == "" {
+ msg := "query not provided to rag_search tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ limitS, ok := args["limit"]
+ if !ok || limitS == "" {
+ limitS = "10"
+ }
+ limit, err := strconv.Atoi(limitS)
+ if err != nil || limit == 0 {
+ logger.Warn("ragsearch limit; passed bad value; setting to default (3)",
+ "limit_arg", limitS, "error", err)
+ limit = 10
+ }
+ ragInstance := rag.GetInstance()
+ if ragInstance == nil {
+ msg := "rag not initialized; rag_search tool is not available"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ results, err := ragInstance.Search(query, limit)
+ if err != nil {
+ msg := "rag search failed; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ data, err := json.Marshal(results)
+ if err != nil {
+ msg := "failed to marshal rag search result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return data
+}
+
+// web search raw (returns raw data without processing)
+func websearchRaw(args map[string]string) []byte {
+ // make http request return bytes
+ query, ok := args["query"]
+ if !ok || query == "" {
+ msg := "query not provided to websearch_raw tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ limitS, ok := args["limit"]
+ if !ok || limitS == "" {
+ limitS = "3"
+ }
+ limit, err := strconv.Atoi(limitS)
+ if err != nil || limit == 0 {
+ logger.Warn("websearch_raw limit; passed bad value; setting to default (3)",
+ "limit_arg", limitS, "error", err)
+ limit = 3
+ }
+ resp, err := WebSearcher.Search(context.Background(), query, limit)
+ if err != nil {
+ msg := "search tool failed; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ // Return raw response without any processing
+ return []byte(fmt.Sprintf("%+v", resp))
+}
+
+// retrieves url content (text)
+func readURL(args map[string]string) []byte {
+ // make http request return bytes
+ link, ok := args["url"]
+ if !ok || link == "" {
+ msg := "link not provided to read_url tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ resp, err := WebSearcher.RetrieveFromLink(context.Background(), link)
+ if err != nil {
+ msg := "search tool failed; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ data, err := json.Marshal(resp)
+ if err != nil {
+ msg := "failed to marshal search result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return data
+}
+
+// retrieves url content raw (returns raw content without processing)
+func readURLRaw(args map[string]string) []byte {
+ // make http request return bytes
+ link, ok := args["url"]
+ if !ok || link == "" {
+ msg := "link not provided to read_url_raw tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ resp, err := WebSearcher.RetrieveFromLink(context.Background(), link)
+ if err != nil {
+ msg := "search tool failed; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ // Return raw response without any processing
+ return []byte(fmt.Sprintf("%+v", resp))
+}
+
+// Unified run command - single entry point for shell, memory, and todo
+func runCmd(args map[string]string) []byte {
+ commandStr := args["command"]
+ if commandStr == "" {
+ msg := "command not provided to run tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ // Parse the command - first word is subcommand
+ parts := strings.Fields(commandStr)
+ if len(parts) == 0 {
+ return []byte("[error] empty command")
+ }
+ subcmd := parts[0]
+ rest := parts[1:]
+ // Route to appropriate handler
+ switch subcmd {
+ case "help":
+ // help - show all commands
+ // help <cmd> - show help for specific command
+ return []byte(getHelp(rest))
+ case "memory":
+ // memory store <topic> <data> | memory get <topic> | memory list | memory forget <topic>
+ return []byte(FsMemory(append([]string{"store"}, rest...), ""))
+ case "todo":
+ return handleTodoSubcommand(rest, args)
+ case "window", "windows":
+ // window list - list all windows
+ return listWindows(args)
+ case "capture", "screenshot":
+ // capture <window-name> - capture a window
+ return captureWindow(args)
+ case "capture_and_view", "screenshot_and_view":
+ // capture and view screenshot
+ return captureWindowAndView(args)
+ case "view_img":
+ // view_img <file> - view image for multimodal
+ return []byte(FsViewImg(rest, ""))
+ case "browser":
+ // browser <action> [args...] - Playwright browser automation
+ return runBrowserCommand(rest, args)
+ case "mkdir", "ls", "cat", "pwd", "cd", "cp", "mv", "rm", "sed", "grep", "head", "tail", "wc", "sort", "uniq", "echo", "time", "stat", "go", "find", "file":
+ // File operations and shell commands - use ExecChain which has whitelist
+ return executeCommand(args)
+ case "git":
+ // git has its own whitelist in FsGit
+ return []byte(FsGit(rest, ""))
+ default:
+ // Unknown subcommand - tell user to run help tool
+ return []byte("[error] command not allowed. Run 'help' tool to see available commands.")
+ }
+}
+
+// runBrowserCommand routes browser subcommands to Playwright handlers
+func runBrowserCommand(args []string, originalArgs map[string]string) []byte {
+ if len(args) == 0 {
+ return []byte(`usage: browser <action> [args...]
+Actions:
+ start - start browser
+ stop - stop browser
+ running - check if browser is running
+ go <url> - navigate to URL
+ click <selector> - click element
+ fill <selector> <text> - fill input
+ text [selector] - extract text
+ html [selector] - get HTML
+ dom - get DOM
+ screenshot [path] - take screenshot
+ screenshot_and_view - take and view screenshot
+ wait <selector> - wait for element
+ drag <from> <to> - drag element`)
+ }
+ action := args[0]
+ rest := args[1:]
+ switch action {
+ case "start":
+ return pwStart(originalArgs)
+ case "stop":
+ return pwStop(originalArgs)
+ case "running":
+ return pwIsRunning(originalArgs)
+ case "go", "navigate", "open":
+ // browser go <url>
+ url := ""
+ if len(rest) > 0 {
+ url = rest[0]
+ }
+ if url == "" {
+ return []byte("usage: browser go <url>")
+ }
+ return pwNavigate(map[string]string{"url": url})
+ case "click":
+ // browser click <selector> [index]
+ selector := ""
+ index := "0"
+ if len(rest) > 0 {
+ selector = rest[0]
+ }
+ if len(rest) > 1 {
+ index = rest[1]
+ }
+ if selector == "" {
+ return []byte("usage: browser click <selector> [index]")
+ }
+ return pwClick(map[string]string{"selector": selector, "index": index})
+ case "fill":
+ // browser fill <selector> <text>
+ if len(rest) < 2 {
+ return []byte("usage: browser fill <selector> <text>")
+ }
+ return pwFill(map[string]string{"selector": rest[0], "text": strings.Join(rest[1:], " ")})
+ case "text":
+ // browser text [selector]
+ selector := ""
+ if len(rest) > 0 {
+ selector = rest[0]
+ }
+ return pwExtractText(map[string]string{"selector": selector})
+ case "html":
+ // browser html [selector]
+ selector := ""
+ if len(rest) > 0 {
+ selector = rest[0]
+ }
+ return pwGetHTML(map[string]string{"selector": selector})
+ case "dom":
+ return pwGetDOM(originalArgs)
+ case "screenshot":
+ // browser screenshot [path]
+ path := ""
+ if len(rest) > 0 {
+ path = rest[0]
+ }
+ return pwScreenshot(map[string]string{"path": path})
+ case "screenshot_and_view":
+ // browser screenshot_and_view [path]
+ path := ""
+ if len(rest) > 0 {
+ path = rest[0]
+ }
+ return pwScreenshotAndView(map[string]string{"path": path})
+ case "wait":
+ // browser wait <selector>
+ selector := ""
+ if len(rest) > 0 {
+ selector = rest[0]
+ }
+ if selector == "" {
+ return []byte("usage: browser wait <selector>")
+ }
+ return pwWaitForSelector(map[string]string{"selector": selector})
+ case "drag":
+ // browser drag <x1> <y1> <x2> <y2> OR browser drag <from_selector> <to_selector>
+ if len(rest) < 4 && len(rest) < 2 {
+ return []byte("usage: browser drag <x1> <y1> <x2> <y2> OR browser drag <from_selector> <to_selector>")
+ }
+ // Check if first arg is a number (coordinates) or selector
+ _, err := strconv.Atoi(rest[0])
+ _, err2 := strconv.ParseFloat(rest[0], 64)
+ if err == nil || err2 == nil {
+ // Coordinates: browser drag 100 200 300 400
+ if len(rest) < 4 {
+ return []byte("usage: browser drag <x1> <y1> <x2> <y2>")
+ }
+ return pwDrag(map[string]string{
+ "x1": rest[0], "y1": rest[1],
+ "x2": rest[2], "y2": rest[3],
+ })
+ }
+ // Selectors: browser drag #item #container
+ // pwDrag needs coordinates, so we need to get element positions first
+ // This requires a different approach - use JavaScript to get centers
+ return pwDragBySelector(map[string]string{
+ "fromSelector": rest[0],
+ "toSelector": rest[1],
+ })
+ default:
+ return []byte("unknown browser action: " + action)
+ }
+}
+
+// getHelp returns help text for commands
+func getHelp(args []string) string {
+ if len(args) == 0 {
+ // General help - show all commands
+ return `Available commands:
+ help <cmd> - show help for a command (use: help memory, help git, etc.)
+
+ # File operations
+ ls [path] - list files in directory
+ cat <file> - read file content
+ view_img <file> - view image file
+ write <file> - write content to file
+ stat <file> - get file info
+ rm <file> - delete file
+ cp <src> <dst> - copy file
+ mv <src> <dst> - move/rename file
+ mkdir [-p] <dir> - create directory (use full path)
+ pwd - print working directory
+ cd <dir> - change directory
+ sed 's/old/new/[g]' [file] - text replacement
+
+ # Text processing
+ echo <args> - echo back input
+ time - show current time
+ grep <pattern> - filter lines (supports -i, -v, -c)
+ head [n] - show first n lines
+ tail [n] - show last n lines
+ wc [-l|-w|-c] - count lines/words/chars
+ sort [-r|-n] - sort lines
+ uniq [-c] - remove duplicates
+
+ # Git (read-only)
+ git <cmd> - git commands (status, log, diff, show, branch, etc.)
+
+ # Go
+ go <cmd> - go commands (run, build, test, mod, etc.)
+
+ # Memory
+ memory store <topic> <data> - save to memory
+ memory get <topic> - retrieve from memory
+ memory list - list all topics
+ memory forget <topic> - delete from memory
+
+ # Todo
+ todo create <task> - create a todo
+ todo read - list all todos
+ todo update <id> <status> - update todo (pending/in_progress/completed)
+ todo delete <id> - delete a todo
+
+ # Window (requires xdotool + maim)
+ window - list available windows
+ capture <name> - capture a window screenshot
+ capture_and_view <name> - capture and view screenshot
+
+ # Browser (requires Playwright)
+ browser start - start browser
+ browser stop - stop browser
+ browser running - check if running
+ browser go <url> - navigate to URL
+ browser click <sel> - click element
+ browser fill <sel> <txt> - fill input
+ browser text [sel] - extract text
+ browser html [sel] - get HTML
+ browser screenshot - take screenshot
+ browser wait <sel> - wait for element
+ browser drag <x1> <y1> <x2> <y2> - drag by coordinates
+ browser drag <sel1> <sel2> - drag by selectors (center points)
+
+ # System
+ <any shell command> - run shell command directly
+
+Use: run "command" to execute.`
+ }
+
+ // Specific command help
+ cmd := args[0]
+ switch cmd {
+ case "ls":
+ return `ls [directory]
+ List files in a directory.
+ Examples:
+ run "ls"
+ run "ls /home/user"
+ run "ls -la" (via shell)`
+ case "cat":
+ return `cat <file>
+ Read file content.
+ Examples:
+ run "cat readme.md"
+ run "cat -b image.png" (base64 output)`
+ case "view_img":
+ return `view_img <image-file>
+ View an image file for multimodal analysis.
+ Supports: png, jpg, jpeg, gif, webp, svg
+ Example:
+ run "view_img screenshot.png"`
+ case "write":
+ return `write <file> [content]
+ Write content to a file.
+ Examples:
+ run "write notes.txt hello world"
+ run "write data.json" (with stdin)`
+ case "memory":
+ return `memory <subcommand> [args]
+ Manage memory storage.
+ Subcommands:
+ store <topic> <data> - save data to a topic
+ get <topic> - retrieve data from a topic
+ list - list all topics
+ forget <topic> - delete a topic
+ Examples:
+ run "memory store foo bar"
+ run "memory get foo"
+ run "memory list"`
+ case "todo":
+ return `todo <subcommand> [args]
+ Manage todo list.
+ Subcommands:
+ create <task> - create a new todo
+ read [id] - list all todos or read specific one
+ update <id> <status> - update status (pending/in_progress/completed)
+ delete <id> - delete a todo
+ Examples:
+ run "todo create fix bug"
+ run "todo read"
+ run "todo update 1 completed"`
+ case "git":
+ return `git <subcommand>
+ Read-only git commands.
+ Allowed: status, log, diff, show, branch, reflog, rev-parse, shortlog, describe, rev-list
+ Examples:
+ run "git status"
+ run "git log --oneline -5"
+ run "git diff HEAD~1"`
+ case "grep":
+ return `grep <pattern> [options]
+ Filter lines matching a pattern.
+ Options:
+ -i ignore case
+ -v invert match
+ -c count matches
+ Example:
+ run "grep error" (from stdin)
+ run "grep -i warn log.txt"`
+ case "cd":
+ return `cd <directory>
+ Change working directory.
+ Example:
+ run "cd /tmp"
+ run "cd .."`
+ case "pwd":
+ return `pwd
+ Print working directory.
+ Example:
+ run "pwd"`
+ case "mkdir":
+ return `mkdir [-p] <directory>
+ Create a directory (use full path).
+ Options:
+ -p, --parents create parent directories as needed
+ Examples:
+ run "mkdir /full/path/myfolder"
+ run "mkdir -p /full/path/to/nested/folder"`
+ case "sed":
+ return `sed 's/old/new/[g]' [file]
+ Stream editor for text replacement.
+ Options:
+ -i in-place editing
+ -g global replacement (replace all)
+ Examples:
+ run "sed 's/foo/bar/' file.txt"
+ run "sed 's/foo/bar/g' file.txt" (global)
+ run "sed -i 's/foo/bar/' file.txt" (in-place)
+ run "cat file.txt | sed 's/foo/bar/'" (pipe from stdin)`
+ case "go":
+ return `go <command>
+ Go toolchain commands.
+ Allowed: run, build, test, mod, get, install, clean, fmt, vet, etc.
+ Examples:
+ run "go run main.go"
+ run "go build ./..."
+ run "go test ./..."
+ run "go mod tidy"
+ run "go get github.com/package"`
+ case "window", "windows":
+ return `window
+ List available windows.
+ Requires: xdotool and maim
+ Example:
+ run "window"`
+ case "capture", "screenshot":
+ return `capture <window-name-or-id>
+ Capture a screenshot of a window.
+ Requires: xdotool and maim
+ Examples:
+ run "capture Firefox"
+ run "capture 0x12345678"
+ run "capture_and_view Firefox"`
+ case "capture_and_view":
+ return `capture_and_view <window-name-or-id>
+ Capture a window and return for viewing.
+ Requires: xdotool and maim
+ Examples:
+ run "capture_and_view Firefox"`
+ case "browser":
+ return `browser <action> [args]
+ Playwright browser automation.
+ Requires: Playwright browser server running
+ Actions:
+ start - start browser
+ stop - stop browser
+ running - check if browser is running
+ go <url> - navigate to URL
+ click <selector> - click element (use index for multiple: click #btn 1)
+ fill <selector> <text> - fill input field
+ text [selector] - extract text (from element or whole page)
+ html [selector] - get HTML (from element or whole page)
+ screenshot [path] - take screenshot
+ wait <selector> - wait for element to appear
+ drag <from> <to> - drag element to another element
+ Examples:
+ run "browser start"
+ run "browser go https://example.com"
+ run "browser click #submit-button"
+ run "browser fill #search-input hello"
+ run "browser text"
+ run "browser screenshot"
+ run "browser drag 100 200 300 400"
+ run "browser drag #item1 #container2"`
+ default:
+ return fmt.Sprintf("No help available for: %s. Use: run \"help\" for all commands.", cmd)
+ }
+}
+
+// handleTodoSubcommand routes todo subcommands to existing handlers
+func handleTodoSubcommand(args []string, originalArgs map[string]string) []byte {
+ if len(args) == 0 {
+ return []byte("usage: todo create|read|update|delete")
+ }
+ subcmd := args[0]
+ switch subcmd {
+ case "create":
+ task := strings.Join(args[1:], " ")
+ if task == "" {
+ task = originalArgs["task"]
+ }
+ if task == "" {
+ return []byte("usage: todo create <task>")
+ }
+ return todoCreate(map[string]string{"task": task})
+ case "read":
+ id := ""
+ if len(args) > 1 {
+ id = args[1]
+ }
+ return todoRead(map[string]string{"id": id})
+ case "update":
+ if len(args) < 2 {
+ return []byte("usage: todo update <id> <status>")
+ }
+ return todoUpdate(map[string]string{"id": args[1], "status": args[2]})
+ case "delete":
+ if len(args) < 2 {
+ return []byte("usage: todo delete <id>")
+ }
+ return todoDelete(map[string]string{"id": args[1]})
+ default:
+ return []byte("unknown todo subcommand: " + subcmd)
+ }
+}
+
+// Command Execution Tool with pipe/chaining support
+func executeCommand(args map[string]string) []byte {
+ commandStr := args["command"]
+ if commandStr == "" {
+ msg := "command not provided to execute_command tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ // Use chain execution for pipe/chaining support
+ result := ExecChain(commandStr)
+ return []byte(result)
+}
+
+// // handleCdCommand handles the cd command to update FilePickerDir
+// func handleCdCommand(args []string) []byte {
+// var targetDir string
+// if len(args) == 0 {
+// // cd with no args goes to home directory
+// homeDir, err := os.UserHomeDir()
+// if err != nil {
+// msg := "cd: cannot determine home directory: " + err.Error()
+// logger.Error(msg)
+// return []byte(msg)
+// }
+// targetDir = homeDir
+// } else {
+// targetDir = args[0]
+// }
+// // Resolve relative paths against current FilePickerDir
+// if !filepath.IsAbs(targetDir) {
+// targetDir = filepath.Join(cfg.FilePickerDir, targetDir)
+// }
+// // Verify the directory exists
+// info, err := os.Stat(targetDir)
+// if err != nil {
+// msg := "cd: " + targetDir + ": " + err.Error()
+// logger.Error(msg)
+// return []byte(msg)
+// }
+// if !info.IsDir() {
+// msg := "cd: " + targetDir + ": not a directory"
+// logger.Error(msg)
+// return []byte(msg)
+// }
+// // Update FilePickerDir
+// absDir, err := filepath.Abs(targetDir)
+// if err != nil {
+// msg := "cd: failed to resolve path: " + err.Error()
+// logger.Error(msg)
+// return []byte(msg)
+// }
+// cfg.FilePickerDir = absDir
+// msg := "FilePickerDir changed to: " + absDir
+// return []byte(msg)
+// }
+
+// Helper functions for command execution
+// Todo structure
+type TodoItem struct {
+ ID string `json:"id"`
+ Task string `json:"task"`
+ Status string `json:"status"` // "pending", "in_progress", "completed"
+}
+type TodoList struct {
+ Items []TodoItem `json:"items"`
+}
+
+func (t TodoList) ToString() string {
+ sb := strings.Builder{}
+ for i := range t.Items {
+ fmt.Fprintf(&sb, "\n[%s] %s. %s\n", t.Items[i].Status, t.Items[i].ID, t.Items[i].Task)
+ }
+ return sb.String()
+}
+
+// Global todo list storage
+var globalTodoList = TodoList{
+ Items: []TodoItem{},
+}
+
+// Todo Management Tools
+func todoCreate(args map[string]string) []byte {
+ task, ok := args["task"]
+ if !ok || task == "" {
+ msg := "task not provided to todo_create tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ // Generate simple ID
+ id := fmt.Sprintf("todo_%d", len(globalTodoList.Items)+1)
+ newItem := TodoItem{
+ ID: id,
+ Task: task,
+ Status: "pending",
+ }
+ globalTodoList.Items = append(globalTodoList.Items, newItem)
+ result := map[string]string{
+ "message": "todo created successfully",
+ "id": id,
+ "task": task,
+ "status": "pending",
+ "todos": globalTodoList.ToString(),
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ msg := "failed to marshal result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return jsonResult
+}
+
+func todoRead(args map[string]string) []byte {
+ // Return all todos if no ID specified
+ result := map[string]interface{}{
+ "todos": globalTodoList.ToString(),
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ msg := "failed to marshal result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return jsonResult
+}
+
+func todoUpdate(args map[string]string) []byte {
+ id, ok := args["id"]
+ if !ok || id == "" {
+ msg := "id not provided to todo_update tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ task, taskOk := args["task"]
+ status, statusOk := args["status"]
+ if !taskOk && !statusOk {
+ msg := "neither task nor status provided to todo_update tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ // Find and update the todo
+ for i, item := range globalTodoList.Items {
+ if item.ID == id {
+ if taskOk {
+ globalTodoList.Items[i].Task = task
+ }
+ if statusOk {
+ // Validate status
+ if status == "pending" || status == "in_progress" || status == "completed" {
+ globalTodoList.Items[i].Status = status
+ } else {
+ result := map[string]string{
+ "error": "status must be one of: pending, in_progress, completed",
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ msg := "failed to marshal result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return jsonResult
+ }
+ }
+ result := map[string]string{
+ "message": "todo updated successfully",
+ "id": id,
+ "todos": globalTodoList.ToString(),
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ msg := "failed to marshal result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return jsonResult
+ }
+ }
+ // ID not found
+ result := map[string]string{
+ "error": fmt.Sprintf("todo with id %s not found", id),
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ msg := "failed to marshal result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return jsonResult
+}
+
+func todoDelete(args map[string]string) []byte {
+ id, ok := args["id"]
+ if !ok || id == "" {
+ msg := "id not provided to todo_delete tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ // Find and remove the todo
+ for i, item := range globalTodoList.Items {
+ if item.ID == id {
+ // Remove item from slice
+ globalTodoList.Items = append(globalTodoList.Items[:i], globalTodoList.Items[i+1:]...)
+ result := map[string]string{
+ "message": "todo deleted successfully",
+ "id": id,
+ "todos": globalTodoList.ToString(),
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ msg := "failed to marshal result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return jsonResult
+ }
+ }
+ // ID not found
+ result := map[string]string{
+ "error": fmt.Sprintf("todo with id %s not found", id),
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ msg := "failed to marshal result; error: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return jsonResult
+}
+
+func viewImgTool(args map[string]string) []byte {
+ file, ok := args["file"]
+ if !ok || file == "" {
+ msg := "file not provided to view_img tool"
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ result := FsViewImg([]string{file}, "")
+ return []byte(result)
+}
+
+func helpTool(args map[string]string) []byte {
+ command, ok := args["command"]
+ var rest []string
+ if ok && command != "" {
+ parts := strings.Fields(command)
+ if len(parts) > 1 {
+ rest = parts[1:]
+ }
+ }
+ return []byte(getHelp(rest))
+}
+
+// func summarizeChat(args map[string]string) []byte {
+// if len(chatBody.Messages) == 0 {
+// return []byte("No chat history to summarize.")
+// }
+// // Format chat history for the agent
+// chatText := chatToText(chatBody.Messages, true) // include system and tool messages
+// return []byte(chatText)
+// }
+
+func windowIDToHex(decimalID string) string {
+ id, err := strconv.ParseInt(decimalID, 10, 64)
+ if err != nil {
+ return decimalID
+ }
+ return fmt.Sprintf("0x%x", id)
+}
+
+func listWindows(args map[string]string) []byte {
+ cmd := exec.Command(xdotoolPath, "search", "--name", ".")
+ output, err := cmd.Output()
+ if err != nil {
+ msg := "failed to list windows: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ windowIDs := strings.Fields(string(output))
+ windows := make(map[string]string)
+ for _, id := range windowIDs {
+ id = strings.TrimSpace(id)
+ if id == "" {
+ continue
+ }
+ nameCmd := exec.Command(xdotoolPath, "getwindowname", id)
+ nameOutput, err := nameCmd.Output()
+ if err != nil {
+ continue
+ }
+ name := strings.TrimSpace(string(nameOutput))
+ windows[id] = name
+ }
+ data, err := json.Marshal(windows)
+ if err != nil {
+ msg := "failed to marshal window list: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return data
+}
+
+func captureWindow(args map[string]string) []byte {
+ window, ok := args["window"]
+ if !ok || window == "" {
+ return []byte("window parameter required (window ID or name)")
+ }
+ var windowID string
+ if _, err := strconv.Atoi(window); err == nil {
+ windowID = window
+ } else {
+ cmd := exec.Command(xdotoolPath, "search", "--name", window)
+ output, err := cmd.Output()
+ if err != nil || len(strings.Fields(string(output))) == 0 {
+ return []byte("window not found: " + window)
+ }
+ windowID = strings.Fields(string(output))[0]
+ }
+ nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
+ nameOutput, _ := nameCmd.Output()
+ windowName := strings.TrimSpace(string(nameOutput))
+ windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
+ if windowName == "" {
+ windowName = "window"
+ }
+ timestamp := time.Now().Unix()
+ filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
+ cmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
+ if err := cmd.Run(); err != nil {
+ msg := "failed to capture window: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return []byte("screenshot saved: " + filename)
+}
+
+func captureWindowAndView(args map[string]string) []byte {
+ window, ok := args["window"]
+ if !ok || window == "" {
+ return []byte("window parameter required (window ID or name)")
+ }
+ var windowID string
+ if _, err := strconv.Atoi(window); err == nil {
+ windowID = window
+ } else {
+ cmd := exec.Command(xdotoolPath, "search", "--name", window)
+ output, err := cmd.Output()
+ if err != nil || len(strings.Fields(string(output))) == 0 {
+ return []byte("window not found: " + window)
+ }
+ windowID = strings.Fields(string(output))[0]
+ }
+ nameCmd := exec.Command(xdotoolPath, "getwindowname", windowID)
+ nameOutput, _ := nameCmd.Output()
+ windowName := strings.TrimSpace(string(nameOutput))
+ windowName = regexp.MustCompile(`[^a-zA-Z]+`).ReplaceAllString(windowName, "")
+ if windowName == "" {
+ windowName = "window"
+ }
+ timestamp := time.Now().Unix()
+ filename := fmt.Sprintf("/tmp/%s_%d.jpg", windowName, timestamp)
+ captureCmd := exec.Command(maimPath, "-i", windowIDToHex(windowID), filename)
+ if err := captureCmd.Run(); err != nil {
+ msg := "failed to capture window: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ dataURL, err := models.CreateImageURLFromPath(filename)
+ if err != nil {
+ msg := "failed to create image URL: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ result := models.MultimodalToolResp{
+ Type: "multimodal_content",
+ Parts: []map[string]string{
+ {"type": "text", "text": "Screenshot saved: " + filename},
+ {"type": "image_url", "url": dataURL},
+ },
+ }
+ jsonResult, err := json.Marshal(result)
+ if err != nil {
+ msg := "failed to marshal result: " + err.Error()
+ logger.Error(msg)
+ return []byte(msg)
+ }
+ return jsonResult
+}
+
+type fnSig func(map[string]string) []byte
+
+// FS Command Handlers - Unix-style file operations
+// Convert map[string]string to []string for tools package
+func argsToSlice(args map[string]string) []string {
+ var result []string
+ // Common positional args in order
+ for _, key := range []string{"path", "src", "dst", "dir", "file"} {
+ if v, ok := args[key]; ok && v != "" {
+ result = append(result, v)
+ }
+ }
+ return result
+}
+
+func cmdMemory(args map[string]string) []byte {
+ return []byte(FsMemory(argsToSlice(args), ""))
+}
+
+type memoryAdapter struct {
+ store storage.Memories
+ cfg *config.Config
+}
+
+func (m *memoryAdapter) Memorise(agent, topic, data string) (string, error) {
+ mem := &models.Memory{
+ Agent: agent,
+ Topic: topic,
+ Mind: data,
+ UpdatedAt: time.Now(),
+ CreatedAt: time.Now(),
+ }
+ result, err := m.store.Memorise(mem)
+ if err != nil {
+ return "", err
+ }
+ return result.Topic, nil
+}
+
+func (m *memoryAdapter) Recall(agent, topic string) (string, error) {
+ return m.store.Recall(agent, topic)
+}
+
+func (m *memoryAdapter) RecallTopics(agent string) ([]string, error) {
+ return m.store.RecallTopics(agent)
+}
+
+func (m *memoryAdapter) Forget(agent, topic string) error {
+ return m.store.Forget(agent, topic)
+}
+
+var FnMap = map[string]fnSig{
+ "memory": cmdMemory,
+ "rag_search": ragsearch,
+ "websearch": websearch,
+ "websearch_raw": websearchRaw,
+ "read_url": readURL,
+ "read_url_raw": readURLRaw,
+ "view_img": viewImgTool,
+ "help": helpTool,
+ // Unified run command
+ "run": runCmd,
+ "summarize_chat": summarizeChat,
+}
+
+func removeWindowToolsFromBaseTools() {
+ windowToolNames := map[string]bool{
+ "list_windows": true,
+ "capture_window": true,
+ "capture_window_and_view": true,
+ }
+ var filtered []models.Tool
+ for _, tool := range BaseTools {
+ if !windowToolNames[tool.Function.Name] {
+ filtered = append(filtered, tool)
+ }
+ }
+ BaseTools = filtered
+ delete(FnMap, "list_windows")
+ delete(FnMap, "capture_window")
+ delete(FnMap, "capture_window_and_view")
+}
+
+func summarizeChat(args map[string]string) []byte {
+ data, err := json.Marshal(args)
+ if err != nil {
+ return []byte("error: failed to marshal arguments")
+ }
+ return data
+}
+
+// func removePlaywrightToolsFromBaseTools() {
+// playwrightToolNames := map[string]bool{
+// "pw_start": true,
+// "pw_stop": true,
+// "pw_is_running": true,
+// "pw_navigate": true,
+// "pw_click": true,
+// "pw_click_at": true,
+// "pw_fill": true,
+// "pw_extract_text": true,
+// "pw_screenshot": true,
+// "pw_screenshot_and_view": true,
+// "pw_wait_for_selector": true,
+// "pw_drag": true,
+// }
+// var filtered []models.Tool
+// for _, tool := range BaseTools {
+// if !playwrightToolNames[tool.Function.Name] {
+// filtered = append(filtered, tool)
+// }
+// }
+// BaseTools = filtered
+// delete(FnMap, "pw_start")
+// delete(FnMap, "pw_stop")
+// delete(FnMap, "pw_is_running")
+// delete(FnMap, "pw_navigate")
+// delete(FnMap, "pw_click")
+// delete(FnMap, "pw_click_at")
+// delete(FnMap, "pw_fill")
+// delete(FnMap, "pw_extract_text")
+// delete(FnMap, "pw_screenshot")
+// delete(FnMap, "pw_screenshot_and_view")
+// delete(FnMap, "pw_wait_for_selector")
+// delete(FnMap, "pw_drag")
+// }
+
+// func (t *Tools) RegisterWindowTools(modelHasVision bool) {
+// removeWindowToolsFromBaseTools()
+// if t.WindowToolsAvailable {
+// FnMap["list_windows"] = listWindows
+// FnMap["capture_window"] = captureWindow
+// windowTools := []models.Tool{
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "list_windows",
+// Description: "List all visible windows with their IDs and names. Returns a map of window ID to window name.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{},
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "capture_window",
+// Description: "Capture a screenshot of a specific window and save it to /tmp. Requires window parameter (window ID or name substring).",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"window"},
+// Properties: map[string]models.ToolArgProps{
+// "window": models.ToolArgProps{
+// Type: "string",
+// Description: "window ID or window name (partial match)",
+// },
+// },
+// },
+// },
+// },
+// }
+// if modelHasVision {
+// FnMap["capture_window_and_view"] = captureWindowAndView
+// windowTools = append(windowTools, models.Tool{
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "capture_window_and_view",
+// Description: "Capture a screenshot of a specific window, save it to /tmp, and return the image for viewing. Requires window parameter (window ID or name substring).",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"window"},
+// Properties: map[string]models.ToolArgProps{
+// "window": models.ToolArgProps{
+// Type: "string",
+// Description: "window ID or window name (partial match)",
+// },
+// },
+// },
+// },
+// })
+// }
+// BaseTools = append(BaseTools, windowTools...)
+// ToolSysMsg += windowToolSysMsg
+// }
+// }
+
+// for pw agentA
+// var browserAgentSysPrompt = `You are an autonomous browser automation agent. Your goal is to complete the user's task by intelligently using browser automation
+
+// Important: The browser may already be running from a previous task! Always check pw_is_running first before starting a new browser.
+
+// Available tools:
+// - pw_start: Start browser (only if not already running)
+// - pw_stop: Stop browser (only when you're truly done and browser is no longer needed)
+// - pw_is_running: Check if browser is running
+// - pw_navigate: Go to a URL
+// - pw_click: Click an element by CSS selector
+// - pw_fill: Type text into an input
+// - pw_extract_text: Get text from page/element
+// - pw_screenshot: Take a screenshot (returns file path)
+// - pw_screenshot_and_view: Take screenshot with image for viewing
+// - pw_wait_for_selector: Wait for element to appear
+// - pw_drag: Drag mouse from one point to another
+// - pw_click_at: Click at X,Y coordinates
+// - pw_get_html: Get HTML content
+// - pw_get_dom: Get structured DOM tree
+// - pw_search_elements: Search for elements by text or selector
+
+// Workflow:
+// 1. First, check if browser is already running (pw_is_running)
+// 2. Only start browser if not already running (pw_start)
+// 3. Navigate to required pages (pw_navigate)
+// 4. Interact with elements as needed (click, fill, etc.)
+// 5. Extract information or take screenshots as requested
+// 6. IMPORTANT: Do NOT stop the browser when done! Leave it running so the user can continue interacting with the page in subsequent requests.
+
+// Always provide clear feedback about what you're doing and what you found.`
+
+// func (t *Tools) runBrowserAgent(args map[string]string) []byte {
+// task, ok := args["task"]
+// if !ok || task == "" {
+// return []byte(`{"error": "task argument is required"}`)
+// }
+// client := t.GetWebAgentClient()
+// pwAgent := agent.NewPWAgent(client, browserAgentSysPrompt)
+// pwAgent.SetTools(agent.GetPWTools())
+// return pwAgent.ProcessTask(task)
+// }
+
+// func registerPlaywrightTools() {
+// removePlaywrightToolsFromBaseTools()
+// if cfg != nil && cfg.PlaywrightEnabled {
+// FnMap["pw_start"] = pwStart
+// FnMap["pw_stop"] = pwStop
+// FnMap["pw_is_running"] = pwIsRunning
+// FnMap["pw_navigate"] = pwNavigate
+// FnMap["pw_click"] = pwClick
+// FnMap["pw_click_at"] = pwClickAt
+// FnMap["pw_fill"] = pwFill
+// FnMap["pw_extract_text"] = pwExtractText
+// FnMap["pw_screenshot"] = pwScreenshot
+// FnMap["pw_screenshot_and_view"] = pwScreenshotAndView
+// FnMap["pw_wait_for_selector"] = pwWaitForSelector
+// FnMap["pw_drag"] = pwDrag
+// FnMap["pw_get_html"] = pwGetHTML
+// FnMap["pw_get_dom"] = pwGetDOM
+// FnMap["pw_search_elements"] = pwSearchElements
+// playwrightTools := []models.Tool{
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_start",
+// Description: "Start a Playwright browser instance. Call this first before using other pw_ Uses headless mode by default (set PlaywrightHeadless=false in config for GUI).",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{},
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_stop",
+// Description: "Stop the Playwright browser instance. Call when done with browser automation.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{},
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_is_running",
+// Description: "Check if Playwright browser is currently running.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{},
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_navigate",
+// Description: "Navigate to a URL in the browser.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"url"},
+// Properties: map[string]models.ToolArgProps{
+// "url": models.ToolArgProps{
+// Type: "string",
+// Description: "URL to navigate to",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_click",
+// Description: "Click on an element using CSS selector. Use 'index' for multiple matches (default 0).",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"selector"},
+// Properties: map[string]models.ToolArgProps{
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "CSS selector for the element to click",
+// },
+// "index": models.ToolArgProps{
+// Type: "string",
+// Description: "optional index for multiple matches (default 0)",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_fill",
+// Description: "Fill an input field with text using CSS selector.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"selector", "text"},
+// Properties: map[string]models.ToolArgProps{
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "CSS selector for the input element",
+// },
+// "text": models.ToolArgProps{
+// Type: "string",
+// Description: "text to fill into the input",
+// },
+// "index": models.ToolArgProps{
+// Type: "string",
+// Description: "optional index for multiple matches (default 0)",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_extract_text",
+// Description: "Extract text content from the page or specific elements using CSS selector. Use 'body' for all page text.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"selector"},
+// Properties: map[string]models.ToolArgProps{
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "CSS selector (use 'body' for all page text)",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_screenshot",
+// Description: "Take a screenshot of the page or a specific element. Returns file path to saved image.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "optional CSS selector for element to screenshot",
+// },
+// "full_page": models.ToolArgProps{
+// Type: "string",
+// Description: "optional: 'true' to capture full page (default false)",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_screenshot_and_view",
+// Description: "Take a screenshot and return the image for viewing. Use when model needs to see the screenshot.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "optional CSS selector for element to screenshot",
+// },
+// "full_page": models.ToolArgProps{
+// Type: "string",
+// Description: "optional: 'true' to capture full page (default false)",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_wait_for_selector",
+// Description: "Wait for an element to appear on the page.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"selector"},
+// Properties: map[string]models.ToolArgProps{
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "CSS selector to wait for",
+// },
+// "timeout": models.ToolArgProps{
+// Type: "string",
+// Description: "optional timeout in ms (default 30000)",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_drag",
+// Description: "Drag the mouse from one point to another.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"x1", "y1", "x2", "y2"},
+// Properties: map[string]models.ToolArgProps{
+// "x1": models.ToolArgProps{
+// Type: "string",
+// Description: "starting X coordinate",
+// },
+// "y1": models.ToolArgProps{
+// Type: "string",
+// Description: "starting Y coordinate",
+// },
+// "x2": models.ToolArgProps{
+// Type: "string",
+// Description: "ending X coordinate",
+// },
+// "y2": models.ToolArgProps{
+// Type: "string",
+// Description: "ending Y coordinate",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_get_html",
+// Description: "Get the HTML content of the page or a specific element.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "optional CSS selector (default: body)",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_get_dom",
+// Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "optional CSS selector (default: body)",
+// },
+// },
+// },
+// },
+// },
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "pw_search_elements",
+// Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{},
+// Properties: map[string]models.ToolArgProps{
+// "text": models.ToolArgProps{
+// Type: "string",
+// Description: "text to search for in elements",
+// },
+// "selector": models.ToolArgProps{
+// Type: "string",
+// Description: "CSS selector to search for",
+// },
+// },
+// },
+// },
+// },
+// }
+// BaseTools = append(BaseTools, playwrightTools...)
+// ToolSysMsg += browserToolSysMsg
+// agent.RegisterPWTool("pw_start", pwStart)
+// agent.RegisterPWTool("pw_stop", pwStop)
+// agent.RegisterPWTool("pw_is_running", pwIsRunning)
+// agent.RegisterPWTool("pw_navigate", pwNavigate)
+// agent.RegisterPWTool("pw_click", pwClick)
+// agent.RegisterPWTool("pw_click_at", pwClickAt)
+// agent.RegisterPWTool("pw_fill", pwFill)
+// agent.RegisterPWTool("pw_extract_text", pwExtractText)
+// agent.RegisterPWTool("pw_screenshot", pwScreenshot)
+// agent.RegisterPWTool("pw_screenshot_and_view", pwScreenshotAndView)
+// agent.RegisterPWTool("pw_wait_for_selector", pwWaitForSelector)
+// agent.RegisterPWTool("pw_drag", pwDrag)
+// agent.RegisterPWTool("pw_get_html", pwGetHTML)
+// agent.RegisterPWTool("pw_get_dom", pwGetDOM)
+// agent.RegisterPWTool("pw_search_elements", pwSearchElements)
+// browserAgentTool := []models.Tool{
+// {
+// Type: "function",
+// Function: models.ToolFunc{
+// Name: "browser_agent",
+// Description: "Autonomous browser automation agent. Use for complex multi-step browser tasks like 'go to website, login, and take screenshot'. The agent will plan and execute steps automatically using browser ",
+// Parameters: models.ToolFuncParams{
+// Type: "object",
+// Required: []string{"task"},
+// Properties: map[string]models.ToolArgProps{
+// "task": {Type: "string", Description: "The task to accomplish, e.g., 'go to github.com and take a screenshot of the homepage'"},
+// },
+// },
+// },
+// },
+// }
+// BaseTools = append(BaseTools, browserAgentTool...)
+// FnMap["browser_agent"] = tooler.runBrowserAgent
+// }
+// }
+
+func CallToolWithAgent(name string, args map[string]string) ([]byte, bool) {
+ f, ok := FnMap[name]
+ if !ok {
+ return []byte(fmt.Sprintf("tool %s not found", name)), false
+ }
+ raw := f(args)
+ if a := agent.Get(name); a != nil {
+ return a.Process(args, raw), true
+ }
+ return raw, true
+}
+
+// openai style def
+var BaseTools = []models.Tool{
+ // rag_search
+ models.Tool{
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "rag_search",
+ Description: "Search local document database given query, limit of sources (default 3). Performs query refinement, semantic search, reranking, and synthesis.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"query", "limit"},
+ Properties: map[string]models.ToolArgProps{
+ "query": models.ToolArgProps{
+ Type: "string",
+ Description: "search query",
+ },
+ "limit": models.ToolArgProps{
+ Type: "string",
+ Description: "limit of the document results",
+ },
+ },
+ },
+ },
+ },
+ // websearch
+ models.Tool{
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "websearch",
+ Description: "Search web given query, limit of sources (default 3).",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"query", "limit"},
+ Properties: map[string]models.ToolArgProps{
+ "query": models.ToolArgProps{
+ Type: "string",
+ Description: "search query",
+ },
+ "limit": models.ToolArgProps{
+ Type: "string",
+ Description: "limit of the website results",
+ },
+ },
+ },
+ },
+ },
+ // read_url
+ models.Tool{
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "read_url",
+ Description: "Retrieves text content of given link, providing clean summary without html,css and other web elements.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"url"},
+ Properties: map[string]models.ToolArgProps{
+ "url": models.ToolArgProps{
+ Type: "string",
+ Description: "link to the webpage to read text from",
+ },
+ },
+ },
+ },
+ },
+ // websearch_raw
+ models.Tool{
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "websearch_raw",
+ Description: "Search web given query, returning raw data as is without processing. Use when you need the raw response data instead of a clean summary.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"query", "limit"},
+ Properties: map[string]models.ToolArgProps{
+ "query": models.ToolArgProps{
+ Type: "string",
+ Description: "search query",
+ },
+ "limit": models.ToolArgProps{
+ Type: "string",
+ Description: "limit of the website results",
+ },
+ },
+ },
+ },
+ },
+ // read_url_raw
+ models.Tool{
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "read_url_raw",
+ Description: "Retrieves raw content of given link without processing. Use when you need the raw response data instead of a clean summary.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"url"},
+ Properties: map[string]models.ToolArgProps{
+ "url": models.ToolArgProps{
+ Type: "string",
+ Description: "link to the webpage to read text from",
+ },
+ },
+ },
+ },
+ },
+ // help
+ models.Tool{
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "help",
+ Description: "List all available commands. Use this to discover what commands are available when unsure.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "command": models.ToolArgProps{
+ Type: "string",
+ Description: "optional: get help for specific command (e.g., 'help memory')",
+ },
+ },
+ },
+ },
+ },
+ // run - unified command
+ models.Tool{
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "run",
+ Description: "Execute commands: shell, git, memory, todo. Usage: run \"<command>\". Examples: run \"ls -la\", run \"git status\", run \"memory store foo bar\", run \"memory get foo\", run \"todo create task\", run \"help\", run \"help memory\"",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{"command"},
+ Properties: map[string]models.ToolArgProps{
+ "command": models.ToolArgProps{
+ Type: "string",
+ Description: "command to execute. Use: run \"help\" for all commands, run \"help <cmd>\" for specific help. Examples: ls, cat, grep, git status, memory store, todo create, etc.",
+ },
+ },
+ },
+ },
+ },
+}