diff --git a/packages/playwright/src/mcp/browser/tools/keyboard.ts b/packages/playwright/src/mcp/browser/tools/keyboard.ts index 58a433dc500ce..cb2ba08dc0e9f 100644 --- a/packages/playwright/src/mcp/browser/tools/keyboard.ts +++ b/packages/playwright/src/mcp/browser/tools/keyboard.ts @@ -32,13 +32,37 @@ const pressKey = defineTabTool({ }, handle: async (tab, params, response) => { - response.setIncludeSnapshot(); response.addCode(`// Press ${params.key}`); response.addCode(`await page.keyboard.press('${params.key}');`); + await tab.page.keyboard.press(params.key); + }, +}); - await tab.waitForCompletion(async () => { - await tab.page.keyboard.press(params.key); - }); +const pressSequentially = defineTabTool({ + capability: 'internal', + + schema: { + name: 'browser_press_sequentially', + title: 'Press sequentially', + description: 'Press text sequentially on the keyboard', + inputSchema: z.object({ + text: z.string().describe('Text to press sequentially'), + submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'), + }), + type: 'input', + }, + + handle: async (tab, params, response) => { + response.addCode(`// Press ${params.text}`); + response.addCode(`await page.keyboard.type('${params.text}');`); + await tab.page.keyboard.type(params.text); + if (params.submit) { + response.addCode(`await page.keyboard.press('Enter');`); + response.setIncludeSnapshot(); + await tab.waitForCompletion(async () => { + await tab.page.keyboard.press('Enter'); + }); + } }, }); @@ -84,4 +108,5 @@ const type = defineTabTool({ export default [ pressKey, type, + pressSequentially, ]; diff --git a/packages/playwright/src/mcp/terminal/commands.ts b/packages/playwright/src/mcp/terminal/commands.ts index 7fb195b842641..c41722f74b5b2 100644 --- a/packages/playwright/src/mcp/terminal/commands.ts +++ b/packages/playwright/src/mcp/terminal/commands.ts @@ -156,7 +156,7 @@ const networkRequests = declareCommand({ }); const pressKey = declareCommand({ - name: 'press-key', + name: 'press', description: 'Press a key on the keyboard', args: z.object({ key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'), @@ -226,15 +226,13 @@ const type = declareCommand({ name: 'type', description: 'Type text into editable element', args: z.object({ - ref: z.string().describe('Exact target element reference from the page snapshot'), text: z.string().describe('Text to type into the element'), }), options: z.object({ submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'), - slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page.'), }), - toolName: 'browser_type', - toolParams: ({ ref, text }, { submit, slowly }) => ({ ref, text, submit, slowly }), + toolName: 'browser_press_sequentially', + toolParams: ({ text }, { submit }) => ({ text, submit }), }); const waitFor = declareCommand({ diff --git a/packages/playwright/src/mcp/terminal/help.json b/packages/playwright/src/mcp/terminal/help.json index e064df95dad38..3d1fb8c09f8c4 100644 --- a/packages/playwright/src/mcp/terminal/help.json +++ b/packages/playwright/src/mcp/terminal/help.json @@ -1,5 +1,5 @@ { - "global": "Usage: playwright-cli [options]\nCommands:\n click perform click on a web page\n close close the page\n dblclick perform double click on a web page\n console returns all console messages\n drag perform drag and drop between two elements\n evaluate evaluate javascript expression on page or element\n upload-file upload one or multiple files\n handle-dialog handle a dialog\n hover hover over element on page\n open open url\n go-back go back to the previous page\n network-requests returns all network requests since loading the page\n press-key press a key on the keyboard\n resize resize the browser window\n run-code run playwright code snippet\n select-option select an option in a dropdown\n snapshot capture accessibility snapshot of the current page, this is better than screenshot\n screenshot take a screenshot of the current page. you can't perform actions based on the screenshot, use browser_snapshot for actions.\n type type text into editable element\n wait-for wait for text to appear or disappear or a specified time to pass\n tab close a browser tab\n mouse-click-xy click left mouse button at a given position\n mouse-drag-xy drag left mouse button to a given position\n mouse-move-xy move mouse to a given position\n pdf-save save page as pdf\n start-tracing start trace recording\n stop-tracing stop trace recording", + "global": "Usage: playwright-cli [options]\nCommands:\n click perform click on a web page\n close close the page\n dblclick perform double click on a web page\n console returns all console messages\n drag perform drag and drop between two elements\n evaluate evaluate javascript expression on page or element\n upload-file upload one or multiple files\n handle-dialog handle a dialog\n hover hover over element on page\n open open url\n go-back go back to the previous page\n network-requests returns all network requests since loading the page\n press press a key on the keyboard\n resize resize the browser window\n run-code run playwright code snippet\n select-option select an option in a dropdown\n snapshot capture accessibility snapshot of the current page, this is better than screenshot\n screenshot take a screenshot of the current page. you can't perform actions based on the screenshot, use browser_snapshot for actions.\n type type text into editable element\n wait-for wait for text to appear or disappear or a specified time to pass\n tab close a browser tab\n mouse-click-xy click left mouse button at a given position\n mouse-drag-xy drag left mouse button to a given position\n mouse-move-xy move mouse to a given position\n pdf-save save page as pdf\n start-tracing start trace recording\n stop-tracing stop trace recording", "commands": { "click": "playwright-cli click \n\nPerform click on a web page\n\nArguments:\n \tExact target element reference from the page snapshot\nOptions:\n --button\tbutton to click, defaults to left\n --modifiers\tmodifier keys to press", "close": "playwright-cli close \n\nClose the page\n", @@ -13,13 +13,13 @@ "open": "playwright-cli open \n\nOpen URL\n\nArguments:\n \tThe URL to navigate to\nOptions:\n --headed\trun browser in headed mode", "go-back": "playwright-cli go-back \n\nGo back to the previous page\n", "network-requests": "playwright-cli network-requests \n\nReturns all network requests since loading the page\n\nOptions:\n --includeStatic\twhether to include successful static resources like images, fonts, scripts, etc. defaults to false.", - "press-key": "playwright-cli press-key \n\nPress a key on the keyboard\n\nArguments:\n \tName of the key to press or a character to generate, such as `ArrowLeft` or `a`", + "press": "playwright-cli press \n\nPress a key on the keyboard\n\nArguments:\n \tName of the key to press or a character to generate, such as `ArrowLeft` or `a`", "resize": "playwright-cli resize \n\nResize the browser window\n\nArguments:\n \tWidth of the browser window\n \tHeight of the browser window", "run-code": "playwright-cli run-code \n\nRun Playwright code snippet\n\nArguments:\n \tA JavaScript function containing Playwright code to execute. It will be invoked with a single argument, page, which you can use for any page interaction.", "select-option": "playwright-cli select-option \n\nSelect an option in a dropdown\n\nArguments:\n \tExact target element reference from the page snapshot\n \tArray of values to select in the dropdown. This can be a single value or multiple values.", "snapshot": "playwright-cli snapshot \n\nCapture accessibility snapshot of the current page, this is better than screenshot\n\nOptions:\n --filename\tsave snapshot to markdown file instead of returning it in the response.", "screenshot": "playwright-cli screenshot \n\nTake a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.\n\nArguments:\n \tExact target element reference from the page snapshot.\nOptions:\n --filename\tfile name to save the screenshot to. defaults to `page-{timestamp}.{png|jpeg}` if not specified.\n --fullPage\twhen true, takes a screenshot of the full scrollable page, instead of the currently visible viewport.", - "type": "playwright-cli type \n\nType text into editable element\n\nArguments:\n \tExact target element reference from the page snapshot\n \tText to type into the element\nOptions:\n --submit\twhether to submit entered text (press enter after)\n --slowly\twhether to type one character at a time. useful for triggering key handlers in the page.", + "type": "playwright-cli type \n\nType text into editable element\n\nArguments:\n \tText to type into the element\nOptions:\n --submit\twhether to submit entered text (press enter after)", "wait-for": "playwright-cli wait-for \n\nWait for text to appear or disappear or a specified time to pass\n\nOptions:\n --time\tthe time to wait in seconds\n --text\tthe text to wait for\n --textGone\tthe text to wait for to disappear", "tab": "playwright-cli tab \n\nClose a browser tab\n\nArguments:\n \tAction to perform on tabs, 'list' | 'new' | 'close' | 'select'\n \tTab index. If omitted, current tab is closed.", "mouse-click-xy": "playwright-cli mouse-click-xy \n\nClick left mouse button at a given position\n\nArguments:\n \tX coordinate\n \tY coordinate", diff --git a/tests/mcp/generator.spec.ts b/tests/mcp/generator.spec.ts index 8aab01b679f6a..b5769459ea141 100644 --- a/tests/mcp/generator.spec.ts +++ b/tests/mcp/generator.spec.ts @@ -47,6 +47,7 @@ test('generator tools intent', async ({ startClient }) => { 'browser_navigate_back', 'browser_open', 'browser_press_key', + 'browser_press_sequentially', 'browser_resize', 'browser_run_code', 'browser_select_option',