跳转到主要内容

browseruse_bench.utils.db_utils

PostgreSQL database connection and operation utilities.

Import

from browseruse_bench.utils import (
    get_db_connection,
    db_cursor,
    init_db_schema,
    create_run_record,
    update_run_status,
    insert_task_results,
    insert_eval_metric,
)

get_db_connection

Get PostgreSQL connection.
def get_db_connection(
    database_url: Optional[str] = None
) -> psycopg2.extensions.connection
database_url
str
默认值:"None"
Optional database URL
return
psycopg2.extensions.connection
Psycopg2 connection object

db_cursor

Context manager providing auto-commit/rollback cursor.
@contextmanager
def db_cursor(
    database_url: Optional[str] = None
) -> Generator[psycopg2.extensions.cursor, None, None]

Usage Example

from browseruse_bench.utils import db_cursor

with db_cursor() as cur:
    cur.execute("SELECT * FROM benchmark_run LIMIT 10")
    rows = cur.fetchall()
# Auto-commits on success, auto-rollbacks on exception

init_db_schema

Initialize database schema (executes scripts/db_schema.sql).
def init_db_schema(database_url: Optional[str] = None) -> None
Raises FileNotFoundError if schema file does not exist.

create_run_record

Create a record in benchmark_run table and return run_uuid.
def create_run_record(
    agent_name: str,
    benchmark_name: str,
    mode: str,
    count: Optional[int],
    task_ids_json: Optional[str],
    started_at: datetime,
    status: str,
    experiment_path: Optional[str],
    database_url: Optional[str] = None
) -> str
agent_name
str
必填
Agent name
benchmark_name
str
必填
Benchmark name
mode
str
必填
Execution mode
count
int
默认值:"None"
Number of tasks
task_ids_json
str
默认值:"None"
JSON string of task IDs
started_at
datetime
必填
Start timestamp
status
str
必填
Initial status
experiment_path
str
默认值:"None"
Path to experiment directory
return
str
UUID of the created run record

update_run_status

Update benchmark_run status, finished time, and error message.
def update_run_status(
    run_uuid: str,
    status: str,
    finished_at: datetime,
    error_message: Optional[str] = None,
    database_url: Optional[str] = None
) -> None

insert_task_results

Batch insert into benchmark_task_result.
def insert_task_results(
    rows: Iterable[Dict[str, Any]],
    database_url: Optional[str] = None
) -> None

Required Fields

FieldDescription
run_uuidRun UUID
task_idTask ID
benchmark_nameBenchmark name
agent_nameAgent name
statusStatus

Optional Fields

started_at, finished_at, latency_ms, retries, error_type, raw_log_path, extra

insert_eval_metric

Insert a benchmark_eval_metric record.
def insert_eval_metric(
    row: Dict[str, Any],
    database_url: Optional[str] = None
) -> None

Field Description

FieldTypeDescription
run_uuidstrRun UUID (required)
agent_namestrAgent name (required)
benchmark_namestrBenchmark name (required)
model_namestrModel name
score_thresholdintScore threshold
total_tasksintTotal tasks
completed_tasksintCompleted tasks
passed_tasksintPassed tasks
avg_scorefloatAverage score
pass_ratefloatPass rate
avg_latency_msintAverage latency (ms)
extradictExtra data (JSON)