import asyncio
from runloop_api_client import AsyncRunloop
# Note: we use the AsyncRunloop client so we can easily await long-running operations.
client = AsyncRunloop() # API Key is automatically loaded from "RUNLOOP_API_KEY"
async def main():
# 1. Create a devbox and set up a minimal failing test inside it
devbox = await client.devboxes.create()
# Create tests/test_example.py in the devbox. This test will immediately raise,
# which gives the agent something concrete to fix.
await client.devboxes.execute_and_await_completion(
devbox.id,
command=(
"mkdir -p tests && "
"echo 'def test_example():\\n"
" raise Exception(\"intentional failure from test_example\")' "
"> tests/test_example.py"
),
)
# Snapshot the devbox after the test file has been created so the scenario
# environment always contains the failing test.
snapshot = await client.devboxes.snapshot_disk(
devbox.id,
name="my-scenario-baseline",
)
# 2. Create the scenario
scenario = await client.scenarios.create(
name="My First Scenario",
input_context={
"problem_statement": "Fix the failing unit test in tests/test_example.py",
},
environment_parameters={
"snapshot_id": snapshot.id,
},
scoring_contract={
"scoring_function_parameters": [{
"name": "bash_scorer",
"scorer": {
"type": "bash_script_scorer",
"bash_script": "pytest -q && echo 1.0 || echo 0.0",
},
"weight": 1.0,
}],
},
reference_output="pytest -q",
)
# 3. Start a scenario run and wait for the environment to be ready
scenario_run = await client.scenarios.start_run(
scenario_id=scenario.id,
run_name="my-first-scenario-run",
)
await client.devboxes.await_running(scenario_run.devbox_id)
# Run your agent here, using the problem statement as context
problem_statement = scenario_run.scenario.input_context.problem_statement
# my_agent = MyAgent(prompt=problem_statement)
# my_agent.solve(devbox=scenario_run.devbox_id)
# 4. Score the run
result = await client.scenarios.runs.score(scenario_run.id)
print(result.score)
asyncio.run(main())