summaryrefslogtreecommitdiff
path: root/tools_playwright.go
diff options
context:
space:
mode:
Diffstat (limited to 'tools_playwright.go')
-rw-r--r--tools_playwright.go653
1 files changed, 653 insertions, 0 deletions
diff --git a/tools_playwright.go b/tools_playwright.go
new file mode 100644
index 0000000..3555469
--- /dev/null
+++ b/tools_playwright.go
@@ -0,0 +1,653 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "gf-lt/models"
+ "os"
+ "strconv"
+ "strings"
+ "sync"
+
+ "github.com/playwright-community/playwright-go"
+)
+
+var browserToolSysMsg = `
+Additional browser automation tools (Playwright):
+[
+{
+ "name": "pw_start",
+ "args": [],
+ "when_to_use": "start a browser instance before doing any browser automation. Must be called first."
+},
+{
+ "name": "pw_stop",
+ "args": [],
+ "when_to_use": "stop the browser instance when done with automation."
+},
+{
+ "name": "pw_is_running",
+ "args": [],
+ "when_to_use": "check if browser is currently running."
+},
+{
+ "name": "pw_navigate",
+ "args": ["url"],
+ "when_to_use": "open a specific URL in the web browser."
+},
+{
+ "name": "pw_click",
+ "args": ["selector", "index"],
+ "when_to_use": "click on an element on the current webpage. Use 'index' for multiple matches (default 0)."
+},
+{
+ "name": "pw_fill",
+ "args": ["selector", "text", "index"],
+ "when_to_use": "type text into an input field. Use 'index' for multiple matches (default 0)."
+},
+{
+ "name": "pw_extract_text",
+ "args": ["selector"],
+ "when_to_use": "extract text content from the page or specific elements. Use selector 'body' for all page text."
+},
+{
+ "name": "pw_screenshot",
+ "args": ["selector", "full_page"],
+ "when_to_use": "take a screenshot of the page or a specific element. Returns a file path to the image. Use to verify actions or inspect visual state."
+},
+{
+ "name": "pw_screenshot_and_view",
+ "args": ["selector", "full_page"],
+ "when_to_use": "take a screenshot and return the image for viewing. Use to visually verify page state."
+},
+{
+ "name": "pw_wait_for_selector",
+ "args": ["selector", "timeout"],
+ "when_to_use": "wait for an element to appear on the page before proceeding with further actions."
+},
+{
+ "name": "pw_drag",
+ "args": ["x1", "y1", "x2", "y2"],
+ "when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)."
+},
+{
+ "name": "pw_click_at",
+ "args": ["x", "y"],
+ "when_to_use": "click at specific X,Y coordinates on the page. Use when you know the exact position."
+},
+{
+ "name": "pw_get_html",
+ "args": ["selector"],
+ "when_to_use": "get the HTML content of the page or a specific element. Use to understand page structure or extract raw HTML."
+},
+{
+ "name": "pw_get_dom",
+ "args": ["selector"],
+ "when_to_use": "get a structured DOM representation with tag, attributes, text, and children. Use to inspect element hierarchy and properties."
+},
+{
+ "name": "pw_search_elements",
+ "args": ["text", "selector"],
+ "when_to_use": "search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML."
+}
+]
+`
+
+var (
+ pw *playwright.Playwright
+ browser playwright.Browser
+ browserStarted bool
+ browserStartMu sync.Mutex
+ page playwright.Page
+)
+
+func pwShutDown() error {
+ if pw == nil {
+ return nil
+ }
+ pwStop(nil)
+ return pw.Stop()
+}
+
+func installPW() error {
+ err := playwright.Install(&playwright.RunOptions{Verbose: false})
+ if err != nil {
+ logger.Warn("playwright not available", "error", err)
+ return err
+ }
+ return nil
+}
+
+func checkPlaywright() error {
+ var err error
+ pw, err = playwright.Run()
+ if err != nil {
+ logger.Warn("playwright not available", "error", err)
+ return err
+ }
+ return nil
+}
+
+func pwStart(args map[string]string) []byte {
+ browserStartMu.Lock()
+ defer browserStartMu.Unlock()
+ if browserStarted {
+ return []byte(`{"error": "Browser already started"}`)
+ }
+ var err error
+ browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
+ Headless: playwright.Bool(!cfg.PlaywrightDebug),
+ })
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error()))
+ }
+ page, err = browser.NewPage()
+ if err != nil {
+ browser.Close()
+ return []byte(fmt.Sprintf(`{"error": "failed to create page: %s"}`, err.Error()))
+ }
+ browserStarted = true
+ return []byte(`{"success": true, "message": "Browser started"}`)
+}
+
+func pwStop(args map[string]string) []byte {
+ browserStartMu.Lock()
+ defer browserStartMu.Unlock()
+ if !browserStarted {
+ return []byte(`{"success": true, "message": "Browser was not running"}`)
+ }
+ if page != nil {
+ page.Close()
+ page = nil
+ }
+ if browser != nil {
+ browser.Close()
+ browser = nil
+ }
+ browserStarted = false
+ return []byte(`{"success": true, "message": "Browser stopped"}`)
+}
+
+func pwIsRunning(args map[string]string) []byte {
+ if browserStarted {
+ return []byte(`{"running": true, "message": "Browser is running"}`)
+ }
+ return []byte(`{"running": false, "message": "Browser is not running"}`)
+}
+
+func pwNavigate(args map[string]string) []byte {
+ url, ok := args["url"]
+ if !ok || url == "" {
+ return []byte(`{"error": "url not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ _, err := page.Goto(url)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to navigate: %s"}`, err.Error()))
+ }
+ title, _ := page.Title()
+ pageURL := page.URL()
+ return []byte(fmt.Sprintf(`{"success": true, "title": "%s", "url": "%s"}`, title, pageURL))
+}
+
+func pwClick(args map[string]string) []byte {
+ selector, ok := args["selector"]
+ if !ok || selector == "" {
+ return []byte(`{"error": "selector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ index := 0
+ if args["index"] != "" {
+ if i, err := strconv.Atoi(args["index"]); err != nil {
+ logger.Warn("failed to parse index", "value", args["index"], "error", err)
+ } else {
+ index = i
+ }
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if index >= count {
+ return []byte(fmt.Sprintf(`{"error": "Element not found at index %d (found %d elements)"}`, index, count))
+ }
+ err = locator.Nth(index).Click()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
+ }
+ return []byte(`{"success": true, "message": "Clicked element"}`)
+}
+
+func pwFill(args map[string]string) []byte {
+ selector, ok := args["selector"]
+ if !ok || selector == "" {
+ return []byte(`{"error": "selector not provided"}`)
+ }
+ text := args["text"]
+ if text == "" {
+ text = ""
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ index := 0
+ if args["index"] != "" {
+ if i, err := strconv.Atoi(args["index"]); err != nil {
+ logger.Warn("failed to parse index", "value", args["index"], "error", err)
+ } else {
+ index = i
+ }
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if index >= count {
+ return []byte(fmt.Sprintf(`{"error": "Element not found at index %d"}`, index))
+ }
+ err = locator.Nth(index).Fill(text)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to fill: %s"}`, err.Error()))
+ }
+ return []byte(`{"success": true, "message": "Filled input"}`)
+}
+
+func pwExtractText(args map[string]string) []byte {
+ selector := args["selector"]
+ if selector == "" {
+ selector = "body"
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"error": "No elements found"}`)
+ }
+ if selector == "body" {
+ text, err := page.Locator("body").TextContent()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get text: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"text": "%s"}`, text))
+ }
+ var texts []string
+ for i := 0; i < count; i++ {
+ text, err := locator.Nth(i).TextContent()
+ if err != nil {
+ continue
+ }
+ texts = append(texts, text)
+ }
+ return []byte(fmt.Sprintf(`{"text": "%s"}`, joinLines(texts)))
+}
+
+func joinLines(lines []string) string {
+ var sb strings.Builder
+ for i, line := range lines {
+ if i > 0 {
+ sb.WriteString("\n")
+ }
+ sb.WriteString(line)
+ }
+ return sb.String()
+}
+
+func pwScreenshot(args map[string]string) []byte {
+ selector := args["selector"]
+ fullPage := args["full_page"] == "true"
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
+ var err error
+ if selector != "" && selector != "body" {
+ locator := page.Locator(selector)
+ _, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
+ Path: playwright.String(path),
+ })
+ } else {
+ _, err = page.Screenshot(playwright.PageScreenshotOptions{
+ Path: playwright.String(path),
+ FullPage: playwright.Bool(fullPage),
+ })
+ }
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"path": "%s"}`, path))
+}
+
+func pwScreenshotAndView(args map[string]string) []byte {
+ selector := args["selector"]
+ fullPage := args["full_page"] == "true"
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ path := fmt.Sprintf("/tmp/pw_screenshot_%d.png", os.Getpid())
+ var err error
+ if selector != "" && selector != "body" {
+ locator := page.Locator(selector)
+ _, err = locator.Screenshot(playwright.LocatorScreenshotOptions{
+ Path: playwright.String(path),
+ })
+ } else {
+ _, err = page.Screenshot(playwright.PageScreenshotOptions{
+ Path: playwright.String(path),
+ FullPage: playwright.Bool(fullPage),
+ })
+ }
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to take screenshot: %s"}`, err.Error()))
+ }
+ dataURL, err := models.CreateImageURLFromPath(path)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to create image URL: %s"}`, err.Error()))
+ }
+ resp := models.MultimodalToolResp{
+ Type: "multimodal_content",
+ Parts: []map[string]string{
+ {"type": "text", "text": "Screenshot saved: " + path},
+ {"type": "image_url", "url": dataURL},
+ },
+ }
+ jsonResult, err := json.Marshal(resp)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to marshal result: %s"}`, err.Error()))
+ }
+ return jsonResult
+}
+
+func pwWaitForSelector(args map[string]string) []byte {
+ selector, ok := args["selector"]
+ if !ok || selector == "" {
+ return []byte(`{"error": "selector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ timeout := 30000
+ if args["timeout"] != "" {
+ if t, err := strconv.Atoi(args["timeout"]); err != nil {
+ logger.Warn("failed to parse timeout", "value", args["timeout"], "error", err)
+ } else {
+ timeout = t
+ }
+ }
+ locator := page.Locator(selector)
+ err := locator.WaitFor(playwright.LocatorWaitForOptions{
+ Timeout: playwright.Float(float64(timeout)),
+ })
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "element not found: %s"}`, err.Error()))
+ }
+ return []byte(`{"success": true, "message": "Element found"}`)
+}
+
+func pwDrag(args map[string]string) []byte {
+ x1, ok := args["x1"]
+ if !ok {
+ return []byte(`{"error": "x1 not provided"}`)
+ }
+ y1, ok := args["y1"]
+ if !ok {
+ return []byte(`{"error": "y1 not provided"}`)
+ }
+ x2, ok := args["x2"]
+ if !ok {
+ return []byte(`{"error": "x2 not provided"}`)
+ }
+ y2, ok := args["y2"]
+ if !ok {
+ return []byte(`{"error": "y2 not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ var fx1, fy1, fx2, fy2 float64
+ if parsedX1, err := strconv.ParseFloat(x1, 64); err != nil {
+ logger.Warn("failed to parse x1", "value", x1, "error", err)
+ } else {
+ fx1 = parsedX1
+ }
+ if parsedY1, err := strconv.ParseFloat(y1, 64); err != nil {
+ logger.Warn("failed to parse y1", "value", y1, "error", err)
+ } else {
+ fy1 = parsedY1
+ }
+ if parsedX2, err := strconv.ParseFloat(x2, 64); err != nil {
+ logger.Warn("failed to parse x2", "value", x2, "error", err)
+ } else {
+ fx2 = parsedX2
+ }
+ if parsedY2, err := strconv.ParseFloat(y2, 64); err != nil {
+ logger.Warn("failed to parse y2", "value", y2, "error", err)
+ } else {
+ fy2 = parsedY2
+ }
+ mouse := page.Mouse()
+ err := mouse.Move(fx1, fy1)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
+ }
+ err = mouse.Down()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to mouse down: %s"}`, err.Error()))
+ }
+ err = mouse.Move(fx2, fy2)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to move mouse: %s"}`, err.Error()))
+ }
+ err = mouse.Up()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to mouse up: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2))
+}
+
+func pwClickAt(args map[string]string) []byte {
+ x, ok := args["x"]
+ if !ok {
+ return []byte(`{"error": "x not provided"}`)
+ }
+ y, ok := args["y"]
+ if !ok {
+ return []byte(`{"error": "y not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ fx, err := strconv.ParseFloat(x, 64)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to parse x: %s"}`, err.Error()))
+ }
+ fy, err := strconv.ParseFloat(y, 64)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to parse y: %s"}`, err.Error()))
+ }
+ mouse := page.Mouse()
+ err = mouse.Click(fx, fy)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to click: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"success": true, "message": "Clicked at (%s,%s)"}`, x, y))
+}
+
+func pwGetHTML(args map[string]string) []byte {
+ selector := args["selector"]
+ if selector == "" {
+ selector = "body"
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"error": "No elements found"}`)
+ }
+ html, err := locator.First().InnerHTML()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html)))
+}
+
+type DOMElement struct {
+ Tag string `json:"tag,omitempty"`
+ Attributes map[string]string `json:"attributes,omitempty"`
+ Text string `json:"text,omitempty"`
+ Children []DOMElement `json:"children,omitempty"`
+ Selector string `json:"selector,omitempty"`
+ InnerHTML string `json:"innerHTML,omitempty"`
+}
+
+func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) {
+ var results []DOMElement
+ count, err := locator.Count()
+ if err != nil {
+ return nil, err
+ }
+ for i := 0; i < count; i++ {
+ el := locator.Nth(i)
+ dom, err := elementToDOM(el)
+ if err != nil {
+ continue
+ }
+ results = append(results, dom)
+ }
+ return results, nil
+}
+
+func elementToDOM(el playwright.Locator) (DOMElement, error) {
+ dom := DOMElement{}
+ tag, err := el.Evaluate(`el => el.nodeName`, nil)
+ if err == nil {
+ dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag))
+ }
+ attributes := make(map[string]string)
+ attrs, err := el.Evaluate(`el => {
+ let attrs = {};
+ for (let i = 0; i < el.attributes.length; i++) {
+ let attr = el.attributes[i];
+ attrs[attr.name] = attr.value;
+ }
+ return attrs;
+ }`, nil)
+ if err == nil {
+ if amap, ok := attrs.(map[string]any); ok {
+ for k, v := range amap {
+ if vs, ok := v.(string); ok {
+ attributes[k] = vs
+ }
+ }
+ }
+ }
+ if len(attributes) > 0 {
+ dom.Attributes = attributes
+ }
+ text, err := el.TextContent()
+ if err == nil && text != "" {
+ dom.Text = text
+ }
+ innerHTML, err := el.InnerHTML()
+ if err == nil && innerHTML != "" {
+ dom.InnerHTML = innerHTML
+ }
+ childCount, _ := el.Count()
+ if childCount > 0 {
+ childrenLocator := el.Locator("*")
+ children, err := buildDOMTree(childrenLocator)
+ if err == nil && len(children) > 0 {
+ dom.Children = children
+ }
+ }
+ return dom, nil
+}
+
+func pwGetDOM(args map[string]string) []byte {
+ selector := args["selector"]
+ if selector == "" {
+ selector = "body"
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"error": "No elements found"}`)
+ }
+ dom, err := elementToDOM(locator.First())
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error()))
+ }
+ data, err := json.Marshal(dom)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"dom": %s}`, string(data)))
+}
+
+func pwSearchElements(args map[string]string) []byte {
+ text := args["text"]
+ selector := args["selector"]
+ if text == "" && selector == "" {
+ return []byte(`{"error": "text or selector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ var locator playwright.Locator
+ if text != "" {
+ locator = page.GetByText(text)
+ } else {
+ locator = page.Locator(selector)
+ }
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"elements": []}`)
+ }
+ var results []map[string]string
+ for i := 0; i < count; i++ {
+ el := locator.Nth(i)
+ tag, _ := el.Evaluate(`el => el.nodeName`, nil)
+ text, _ := el.TextContent()
+ html, _ := el.InnerHTML()
+ results = append(results, map[string]string{
+ "index": strconv.Itoa(i),
+ "tag": strings.ToLower(fmt.Sprintf("%v", tag)),
+ "text": text,
+ "html": html,
+ })
+ }
+ data, err := json.Marshal(results)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"elements": %s}`, string(data)))
+}
+
+func jsonString(s string) string {
+ b, _ := json.Marshal(s)
+ return string(b)
+}