browseruse_bench.utils.task_utils

Task processing utility functions.

Import

from browseruse_bench.utils import (
    load_tasks,
    load_tasks_with_benchmark_support,
    filter_tasks,
    filter_completed_tasks,
    is_task_completed_by_result_json,
    resolve_tasks_json_path,
    print_task_summary,
)

load_tasks

Load task data.

def load_tasks(
    tasks_json_path: str,
    prompt_fmt: Optional[str] = None
) -> List[Dict[str, Any]]

tasks_json_path

str

必填

Path to tasks JSON file

prompt_fmt

str

默认值:"None"

Optional prompt template, format like "{task}\n...{url}...". If provided, a prompt field will be added to the task dictionary.

return

list of dict

List of tasks, each containing task_id, task_text, url. Includes prompt if prompt_fmt is provided.

load_tasks_with_benchmark_support

Load tasks with support for different benchmarks (including BrowseComp).

def load_tasks_with_benchmark_support(
    tasks_json_path: Path,
    prompt_fmt: Optional[str] = None
) -> List[Dict[str, Any]]

tasks_json_path

Path

必填

Path to tasks JSON file

prompt_fmt

str

默认值:"None"

Optional prompt template (ignored for BrowseComp which has its own template)

filter_tasks

Filter tasks based on mode.

def filter_tasks(
    tasks: List[Dict[str, Any]],
    mode: str,
    count: int,
    task_ids: Optional[List[str]],
    task_id: Optional[str] = None
) -> List[Dict[str, Any]]

tasks

list of dict

必填

List of tasks

mode

str

必填

Filter mode:

single - Run first task only
first_n - Run first N tasks
sample_n - Randomly sample N tasks
specific - Run tasks with specified IDs
by_id - Run single task by ID
all - Run all tasks

count

int

必填

Number of tasks for first_n or sample_n mode

task_ids

list of str

默认值:"None"

List of task IDs for specific mode

task_id

str

默认值:"None"

Single task ID for by_id mode

filter_completed_tasks

Filter out completed tasks.

def filter_completed_tasks(
    tasks: List[Dict[str, Any]],
    output_dir: Path,
    check_func: Callable[[str, Path], bool]
) -> Tuple[List[Dict[str, Any]], int]

tasks

list of dict

必填

List of tasks

output_dir

Path

必填

Output directory

check_func

function

必填

Function to check if a task is completed

return

tuple

(List of remaining tasks, number of skipped tasks)

is_task_completed_by_result_json

Check if task is completed (via result.json).

def is_task_completed_by_result_json(
    task_id: str,
    output_dir: Path
) -> bool

task_id

str

必填

Task ID

output_dir

Path

必填

Output directory path

return

bool

True if result.json exists and is not empty

resolve_tasks_json_path

Resolve task JSON file path.

def resolve_tasks_json_path(
    tasks_json_arg: Optional[str],
    default_tasks_json: Path,
    env_var: str = 'TASKS_JSON'
) -> str

tasks_json_arg

str

默认值:"None"

Path passed via command line

default_tasks_json

Path

必填

Default path

env_var

str

默认值:"'TASKS_JSON'"

Environment variable name

print_task_summary

Print task execution summary.

def print_task_summary(
    total_tasks: int,
    tasks_to_run: int,
    success_count: int,
    failed_count: int,
    output_dir: Path
) -> None

total_tasks

int

必填

Total number of tasks

tasks_to_run

int

必填

Number of tasks run in this session

success_count

int

必填

Number of successful tasks

failed_count

int

必填

Number of failed tasks

output_dir

Path

必填

Output directory path

Overview

Utils

task_utils

browseruse_bench.utils.task_utils

Import

load_tasks

load_tasks_with_benchmark_support

filter_tasks

filter_completed_tasks

is_task_completed_by_result_json

resolve_tasks_json_path

print_task_summary

Overview

Utils

​browseruse_bench.utils.task_utils

​Import

​load_tasks

​load_tasks_with_benchmark_support

​filter_tasks

​filter_completed_tasks

​is_task_completed_by_result_json

​resolve_tasks_json_path

​print_task_summary

browseruse_bench.utils.task_utils

Import

load_tasks

load_tasks_with_benchmark_support

filter_tasks

filter_completed_tasks

is_task_completed_by_result_json

resolve_tasks_json_path

print_task_summary