Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions browsergym/core/src/browsergym/core/action/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

from . import utils
from .base import AbstractActionSet
from .functions import ( # check,; uncheck,
from .functions import (
clear,
click,
dblclick,
dblclick, # check,; uncheck,
drag_and_drop,
fill,
focus,
Expand Down Expand Up @@ -246,6 +246,12 @@
goto, # GOTO, SEARCH
send_msg_to_user, # TERMINATE
],
# example action set
"example": [
scroll,
keyboard_type,
mouse_click,
],
}


Expand Down
11 changes: 11 additions & 0 deletions browsergym/example_benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Example benchmark for BrowserGym

This package provides `browsergym.example`, which is an example benchmark for BrowserGym.

## Setup

1. Install the package
```sh
pip install -e browsergym/example_benchmark
```
2. Do any additional setup required by the benchmark.
34 changes: 34 additions & 0 deletions browsergym/example_benchmark/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[build-system]
requires = ["hatchling", "hatch-requirements-txt"]
build-backend = "hatchling.build"

[project]
name = "browsergym-example"
description = "Example benchmark for BrowserGym"
authors = [
{name = "AUTHOR NAME"},
]
readme = "README.md"
requires-python = ">3.10"
license = {text = "Apache-2.0"}
classifiers = [
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Apache Software License",
]
dynamic = ["dependencies", "version"]

[project.urls]
homepage = "https://github.com/ServiceNow/BrowserGym"

[tool.hatch.version]
path = "../core/src/browsergym/core/__init__.py"

[tool.hatch.metadata.hooks.requirements_txt]
files = ["requirements.txt"]

[tool.hatch.build.targets.wheel]
packages = ["src/browsergym"]
4 changes: 4 additions & 0 deletions browsergym/example_benchmark/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
browsergym-core==0.13.3
# any other dependencies for your benchmark
# e.g. torch==1.13.0
# or your own benchmark package as a backend
29 changes: 29 additions & 0 deletions browsergym/example_benchmark/src/browsergym/example/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os

from browsergym.core.registration import register_task

from . import example_task

ALL_TASKS = [
example_task.WikipediaTask,
example_task.AmazonTask,
]


# register the benchmark tasks
for task in ALL_TASKS:
register_task(
task.get_task_id(),
task,
task_kwargs={
"seed": 0,
"start_url": os.getenv("START_URL", "https://google.com"),
},
)

# register_tasks will expose the tasks through gym
# tasks will be importable with:
# import gym
# import browsergym.example_benchmark
# env = gym.make("browsergym/example.wikipedia")
# env = gym.make("browsergym/example.amazon")
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import os
from typing import Optional, Tuple

import playwright.sync_api

from browsergym.core.task import AbstractBrowserTask


class ExampleTask(AbstractBrowserTask):
"""
Example task for browsergym

"""

@classmethod
def get_task_id(cls):
return f"example.{cls.subdomain}"

def __init__(
self,
seed: int,
start_url: str = "https://google.com",
) -> None:
super().__init__(seed)
self.start_url = start_url

def setup(self, page: playwright.sync_api.Page) -> tuple[str, dict]:
"""
Perform any operation to allow the task to start. Return the goal as a string and any information that's relevant.
"""
page.goto(self.start_url)

# can also login or perform any other operation
# to prepare the task

goal = f"Go to the {self.goal_url} and send a message to the chat."
info = {"goal_url": self.goal_url}
return goal, info

def validate(self, page, chat_messages):
"""Check if the task was completed successfully and return a reward.

i.e. reward, done, message, info

Args:
page: the active playwright page.
chat_messages: the chat messages.
Returns:
reward: float, the reward obtained since last call to validate().
done: boolean flag, indicates if the task has finished or not (be it success or fail).
message: string, a new user message for the chat.
info: dictionnary, custom information from the task.
"""
# wait for the page to load if needed
page.wait_for_load_state("networkidle")
# check if the page is the goal page
if page.url == self.goal_url:
# check if the chat messages contain the goal
for message in chat_messages:
if self.goal in message:
return 1.0, True, "Task completed successfully", {}
return 0.0, False, "Task not completed", {}

def cheat(self, page: playwright.sync_api.Page, chat_messages: list[str]) -> None:
"""
Solve the task using a pre-defined solution (optional).
"""
raise NotImplementedError("Cheat method not implemented for this task.")

def teardown(self) -> None:
"""
Tear down the task and clean up any resource / data created by the task (optional).
"""
# typically shut down the browser...
pass


class WikipediaTask(ExampleTask):
"""
Wikipedia task for browsergym

"""

subdomain = "wikipedia"
goal_url = os.environ.get("WIKIPEDIA_URL", "https://en.wikipedia.org/wiki/Main_Page")


class AmazonTask(ExampleTask):
"""
Amazon task for browsergym

"""

subdomain = "amazon"
goal_url = os.environ.get("AMAZON_URL", "https://www.amazon.com/")
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@
retry_with_force=True,
demo_mode="off",
),
"example": HighLevelActionSetArgs(
subsets=["example"],
multiaction=False,
strict=False,
retry_with_force=True,
demo_mode="off",
),
}

# all benchmarks are callables designed for lazy loading, i.e. `bench = DEFAULT_BENCHMARKS["miniwob_all"]()`
Expand Down Expand Up @@ -260,4 +267,18 @@
),
task_metadata=task_metadata("weblinx"),
),
"example": lambda: Benchmark(
name="example",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["example"],
is_multi_tab=False,
supports_parallel_seeds=True,
backends=["example"],
env_args_list=make_env_args_list_from_repeat_tasks(
task_list=task_list_from_metadata(metadata=task_metadata("example")),
max_steps=10,
n_repeats=1, # 1 seed per task in the example benchmark
seeds_rng=np.random.RandomState(42),
),
task_metadata=task_metadata("example"),
),
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task_name,any_criterion
example.wikipedia,True
example.amazon,False
2 changes: 2 additions & 0 deletions browsergym/experiments/src/browsergym/experiments/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,8 @@ def _get_env_name(task_name: str):
import browsergym.assistantbench
elif task_name.startswith("weblinx"):
import weblinx_browsergym

## Do not forget to import your benchmark here..

return f"browsergym/{task_name}"

Expand Down
Loading