summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config.example.toml3
-rw-r--r--config/config.go4
-rw-r--r--tools.go58
-rw-r--r--tools_playwright.go193
4 files changed, 254 insertions, 4 deletions
diff --git a/config.example.toml b/config.example.toml
index 8893345..7df35d9 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -56,3 +56,6 @@ StripThinkingFromAPI = true # Strip <think> blocks from messages before sending
# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
# Models that support reasoning will include thinking content wrapped in <think> tags
ReasoningEffort = "medium"
+# playwright
+NoPlaywright = false
+PlaywrightDebug = false
diff --git a/config/config.go b/config/config.go
index 217c32b..e812fb8 100644
--- a/config/config.go
+++ b/config/config.go
@@ -71,8 +71,8 @@ type Config struct {
CharSpecificContextTag string `toml:"CharSpecificContextTag"`
AutoTurn bool `toml:"AutoTurn"`
// playwright browser
- NoPlaywright bool `toml:"NoPlaywright"` // when we want to avoid pw tool use
- PlaywrightHeadless bool `toml:"PlaywrightHeadless"`
+ NoPlaywright bool `toml:"NoPlaywright"` // when we want to avoid pw tool use
+ PlaywrightDebug bool `toml:"PlaywrightDebug"` // !headless
}
func LoadConfig(fn string) (*Config, error) {
diff --git a/tools.go b/tools.go
index 87956ef..5683a90 100644
--- a/tools.go
+++ b/tools.go
@@ -1493,6 +1493,9 @@ func registerPlaywrightTools() {
fnMap["pw_screenshot_and_view"] = pwScreenshotAndView
fnMap["pw_wait_for_selector"] = pwWaitForSelector
fnMap["pw_drag"] = pwDrag
+ fnMap["pw_get_html"] = pwGetHTML
+ fnMap["pw_get_dom"] = pwGetDOM
+ fnMap["pw_search_elements"] = pwSearchElements
playwrightTools := []models.Tool{
{
Type: "function",
@@ -1702,6 +1705,61 @@ func registerPlaywrightTools() {
},
},
},
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_get_html",
+ Description: "Get the HTML content of the page or a specific element.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": models.ToolArgProps{
+ Type: "string",
+ Description: "optional CSS selector (default: body)",
+ },
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_get_dom",
+ Description: "Get a structured DOM representation of an element with tag, attributes, text, and children.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "selector": models.ToolArgProps{
+ Type: "string",
+ Description: "optional CSS selector (default: body)",
+ },
+ },
+ },
+ },
+ },
+ {
+ Type: "function",
+ Function: models.ToolFunc{
+ Name: "pw_search_elements",
+ Description: "Search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML.",
+ Parameters: models.ToolFuncParams{
+ Type: "object",
+ Required: []string{},
+ Properties: map[string]models.ToolArgProps{
+ "text": models.ToolArgProps{
+ Type: "string",
+ Description: "text to search for in elements",
+ },
+ "selector": models.ToolArgProps{
+ Type: "string",
+ Description: "CSS selector to search for",
+ },
+ },
+ },
+ },
+ },
}
baseTools = append(baseTools, playwrightTools...)
toolSysMsg += browserToolSysMsg
diff --git a/tools_playwright.go b/tools_playwright.go
index 74a8e41..cd36b60 100644
--- a/tools_playwright.go
+++ b/tools_playwright.go
@@ -69,6 +69,21 @@ Additional browser automation tools (Playwright):
"name": "pw_drag",
"args": ["x1", "y1", "x2", "y2"],
"when_to_use": "drag the mouse from point (x1,y1) to (x2,y2)"
+},
+{
+ "name": "pw_get_html",
+ "args": ["selector"],
+ "when_to_use": "get the HTML content of the page or a specific element. Use when you need to understand page structure or extract HTML."
+},
+{
+ "name": "pw_get_dom",
+ "args": ["selector"],
+ "when_to_use": "get a structured DOM representation with tag, attributes, text, and children. Use when you need a readable tree view of page elements."
+},
+{
+ "name": "pw_search_elements",
+ "args": ["text", "selector"],
+ "when_to_use": "search for elements by text content or CSS selector. Returns matching elements with their tags, text, and HTML."
}
]
`
@@ -106,10 +121,9 @@ func pwStart(args map[string]string) []byte {
if browserStarted {
return []byte(`{"error": "Browser already started"}`)
}
- headless := cfg == nil || cfg.PlaywrightHeadless
var err error
browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
- Headless: playwright.Bool(headless),
+ Headless: playwright.Bool(!cfg.PlaywrightDebug),
})
if err != nil {
return []byte(fmt.Sprintf(`{"error": "failed to launch browser: %s"}`, err.Error()))
@@ -427,3 +441,178 @@ func pwDrag(args map[string]string) []byte {
}
return []byte(fmt.Sprintf(`{"success": true, "message": "Dragged from (%s,%s) to (%s,%s)"}`, x1, y1, x2, y2))
}
+
+func pwGetHTML(args map[string]string) []byte {
+ selector := args["selector"]
+ if selector == "" {
+ selector = "body"
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"error": "No elements found"}`)
+ }
+ html, err := locator.First().InnerHTML()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get HTML: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"html": %s}`, jsonString(html)))
+}
+
+type DOMElement struct {
+ Tag string `json:"tag,omitempty"`
+ Attributes map[string]string `json:"attributes,omitempty"`
+ Text string `json:"text,omitempty"`
+ Children []DOMElement `json:"children,omitempty"`
+ Selector string `json:"selector,omitempty"`
+ InnerHTML string `json:"innerHTML,omitempty"`
+}
+
+func buildDOMTree(locator playwright.Locator) ([]DOMElement, error) {
+ var results []DOMElement
+ count, err := locator.Count()
+ if err != nil {
+ return nil, err
+ }
+ for i := 0; i < count; i++ {
+ el := locator.Nth(i)
+ dom, err := elementToDOM(el)
+ if err != nil {
+ continue
+ }
+ results = append(results, dom)
+ }
+ return results, nil
+}
+
+func elementToDOM(el playwright.Locator) (DOMElement, error) {
+ dom := DOMElement{}
+
+ tag, err := el.Evaluate(`el => el.nodeName`, nil)
+ if err == nil {
+ dom.Tag = strings.ToLower(fmt.Sprintf("%v", tag))
+ }
+
+ attributes := make(map[string]string)
+ attrs, err := el.Evaluate(`el => {
+ let attrs = {};
+ for (let i = 0; i < el.attributes.length; i++) {
+ let attr = el.attributes[i];
+ attrs[attr.name] = attr.value;
+ }
+ return attrs;
+ }`, nil)
+ if err == nil {
+ if amap, ok := attrs.(map[string]any); ok {
+ for k, v := range amap {
+ if vs, ok := v.(string); ok {
+ attributes[k] = vs
+ }
+ }
+ }
+ }
+ if len(attributes) > 0 {
+ dom.Attributes = attributes
+ }
+
+ text, err := el.TextContent()
+ if err == nil && text != "" {
+ dom.Text = text
+ }
+
+ innerHTML, err := el.InnerHTML()
+ if err == nil && innerHTML != "" {
+ dom.InnerHTML = innerHTML
+ }
+
+ childCount, _ := el.Count()
+ if childCount > 0 {
+ childrenLocator := el.Locator("*")
+ children, err := buildDOMTree(childrenLocator)
+ if err == nil && len(children) > 0 {
+ dom.Children = children
+ }
+ }
+
+ return dom, nil
+}
+
+func pwGetDOM(args map[string]string) []byte {
+ selector := args["selector"]
+ if selector == "" {
+ selector = "body"
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ locator := page.Locator(selector)
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to find elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"error": "No elements found"}`)
+ }
+ dom, err := elementToDOM(locator.First())
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to get DOM: %s"}`, err.Error()))
+ }
+ data, err := json.Marshal(dom)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to marshal DOM: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"dom": %s}`, string(data)))
+}
+
+func pwSearchElements(args map[string]string) []byte {
+ text := args["text"]
+ selector := args["selector"]
+ if text == "" && selector == "" {
+ return []byte(`{"error": "text or selector not provided"}`)
+ }
+ if !browserStarted || page == nil {
+ return []byte(`{"error": "Browser not started. Call pw_start first."}`)
+ }
+ var locator playwright.Locator
+ if text != "" {
+ locator = page.GetByText(text)
+ } else {
+ locator = page.Locator(selector)
+ }
+ count, err := locator.Count()
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to search elements: %s"}`, err.Error()))
+ }
+ if count == 0 {
+ return []byte(`{"elements": []}`)
+ }
+ var results []map[string]string
+ for i := 0; i < count; i++ {
+ el := locator.Nth(i)
+ tag, _ := el.Evaluate(`el => el.nodeName`, nil)
+ text, _ := el.TextContent()
+ html, _ := el.InnerHTML()
+ results = append(results, map[string]string{
+ "index": fmt.Sprintf("%d", i),
+ "tag": strings.ToLower(fmt.Sprintf("%v", tag)),
+ "text": text,
+ "html": html,
+ })
+ }
+ data, err := json.Marshal(results)
+ if err != nil {
+ return []byte(fmt.Sprintf(`{"error": "failed to marshal results: %s"}`, err.Error()))
+ }
+ return []byte(fmt.Sprintf(`{"elements": %s}`, string(data)))
+}
+
+func jsonString(s string) string {
+ b, _ := json.Marshal(s)
+ return string(b)
+}