跳转到主要内容

browseruse_bench.utils.task_utils

Task processing utility functions.

Import

from browseruse_bench.utils import (
    load_tasks,
    load_tasks_with_benchmark_support,
    filter_tasks,
    filter_completed_tasks,
    is_task_completed_by_result_json,
    resolve_tasks_json_path,
    print_task_summary,
)

load_tasks

Load task data.
def load_tasks(
    tasks_json_path: str,
    prompt_fmt: Optional[str] = None
) -> List[Dict[str, Any]]
tasks_json_path
str
必填
Path to tasks JSON file
prompt_fmt
str
默认值:"None"
Optional prompt template, format like "{task}\n...{url}...". If provided, a prompt field will be added to the task dictionary.
return
list of dict
List of tasks, each containing task_id, task_text, url. Includes prompt if prompt_fmt is provided.

load_tasks_with_benchmark_support

Load tasks with support for different benchmarks (including BrowseComp).
def load_tasks_with_benchmark_support(
    tasks_json_path: Path,
    prompt_fmt: Optional[str] = None
) -> List[Dict[str, Any]]
tasks_json_path
Path
必填
Path to tasks JSON file
prompt_fmt
str
默认值:"None"
Optional prompt template (ignored for BrowseComp which has its own template)

filter_tasks

Filter tasks based on mode.
def filter_tasks(
    tasks: List[Dict[str, Any]],
    mode: str,
    count: int,
    task_ids: Optional[List[str]],
    task_id: Optional[str] = None
) -> List[Dict[str, Any]]
tasks
list of dict
必填
List of tasks
mode
str
必填
Filter mode:
  • single - Run first task only
  • first_n - Run first N tasks
  • sample_n - Randomly sample N tasks
  • specific - Run tasks with specified IDs
  • by_id - Run single task by ID
  • all - Run all tasks
count
int
必填
Number of tasks for first_n or sample_n mode
task_ids
list of str
默认值:"None"
List of task IDs for specific mode
task_id
str
默认值:"None"
Single task ID for by_id mode

filter_completed_tasks

Filter out completed tasks.
def filter_completed_tasks(
    tasks: List[Dict[str, Any]],
    output_dir: Path,
    check_func: Callable[[str, Path], bool]
) -> Tuple[List[Dict[str, Any]], int]
tasks
list of dict
必填
List of tasks
output_dir
Path
必填
Output directory
check_func
function
必填
Function to check if a task is completed
return
tuple
(List of remaining tasks, number of skipped tasks)

is_task_completed_by_result_json

Check if task is completed (via result.json).
def is_task_completed_by_result_json(
    task_id: str,
    output_dir: Path
) -> bool
task_id
str
必填
Task ID
output_dir
Path
必填
Output directory path
return
bool
True if result.json exists and is not empty

resolve_tasks_json_path

Resolve task JSON file path.
def resolve_tasks_json_path(
    tasks_json_arg: Optional[str],
    default_tasks_json: Path,
    env_var: str = 'TASKS_JSON'
) -> str
tasks_json_arg
str
默认值:"None"
Path passed via command line
default_tasks_json
Path
必填
Default path
env_var
str
默认值:"'TASKS_JSON'"
Environment variable name

Print task execution summary.
def print_task_summary(
    total_tasks: int,
    tasks_to_run: int,
    success_count: int,
    failed_count: int,
    output_dir: Path
) -> None
total_tasks
int
必填
Total number of tasks
tasks_to_run
int
必填
Number of tasks run in this session
success_count
int
必填
Number of successful tasks
failed_count
int
必填
Number of failed tasks
output_dir
Path
必填
Output directory path