From 59b0702a214c28bfb7f3c19a9f3ac240d5603259 Mon Sep 17 00:00:00 2001 From: Costamagna Simone Date: Wed, 26 Feb 2025 12:26:50 +0100 Subject: [PATCH 1/2] Add '[1]' to XPath of the first element to prevent error in click() function. --- backend/marking_scripts/marking_buttons_2.js | 2 +- backend/marking_scripts/marking_input.js | 2 +- backend/marking_scripts/marking_links.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/marking_scripts/marking_buttons_2.js b/backend/marking_scripts/marking_buttons_2.js index 952371d..ac48d4c 100644 --- a/backend/marking_scripts/marking_buttons_2.js +++ b/backend/marking_scripts/marking_buttons_2.js @@ -60,7 +60,7 @@ function captureInteractiveElements(options = {}) { .filter(e => e.tagName === current.tagName) .indexOf(current) + 1; parts.unshift( - index > 1 + index > 0 ? `${current.tagName.toLowerCase()}[${index}]` : current.tagName.toLowerCase() ); diff --git a/backend/marking_scripts/marking_input.js b/backend/marking_scripts/marking_input.js index 4f9a024..9bd81ee 100644 --- a/backend/marking_scripts/marking_input.js +++ b/backend/marking_scripts/marking_input.js @@ -60,7 +60,7 @@ function captureInteractiveElements(options = {}) { .filter(e => e.tagName === current.tagName) .indexOf(current) + 1; parts.unshift( - index > 1 + index > 0 ? `${current.tagName.toLowerCase()}[${index}]` : current.tagName.toLowerCase() ); diff --git a/backend/marking_scripts/marking_links.js b/backend/marking_scripts/marking_links.js index 08be0fd..edfe054 100644 --- a/backend/marking_scripts/marking_links.js +++ b/backend/marking_scripts/marking_links.js @@ -60,7 +60,7 @@ function captureInteractiveElements(options = {}) { .filter(e => e.tagName === current.tagName) .indexOf(current) + 1; parts.unshift( - index > 1 + index > 0 ? `${current.tagName.toLowerCase()}[${index}]` : current.tagName.toLowerCase() ); From b5763ed09dfa5c24a35760b143cde07b7aed5d8d Mon Sep 17 00:00:00 2001 From: Costamagna Simone Date: Wed, 26 Feb 2025 12:28:06 +0100 Subject: [PATCH 2/2] Refactor prompt to replace negative expressions like 'don't' with positive alternatives like 'avoid' --- backend/app/task_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/app/task_agent.py b/backend/app/task_agent.py index 8042a05..30c3186 100644 --- a/backend/app/task_agent.py +++ b/backend/app/task_agent.py @@ -577,7 +577,7 @@ async def decide_immediate_action(state: AgentState): 5. Get all link elements: This will be the action you take if you decide you need to open a link 6. Go Back: If you decide you need to go back to the previous page, you should respond with "Go Back" 7. Go To Search: If you decide you need to go to a search engine, you should respond with "Go To Search" - - Dont call Go To Search if you are already on google.com as indicated by the current page url or if you have already navigated to google.com in the previous step. + - Avoid to call Go To Search if you are already on google.com as indicated by the current page url or if you have already navigated to google.com in the previous step. 8. Wait: If you decide you need to wait for a page to load, you should respond with "Wait" 9. Type in a text editor: If you decide you need to type in a text editor such as a google doc or some similar text editor based on the user input, you should respond with "Type in a text editor" - If you end up at a point where you need to type in a text editor after navigating to the respective text editor url, skip the other steps and directly respond with "Type in a text editor" since, this step has the ability to infer dom element for text editor @@ -956,7 +956,7 @@ async def click(state: AgentState): bbox_x, bbox_y ) await asyncio.sleep(0.5) - except Exception: + except Exception as e: return {"actions_taken": [f"Failed to scroll to element: {str(e)}"]}