> ## Documentation Index
> Fetch the complete documentation index at: https://docs.runloop.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Start a new BenchmarkRun.

> Start a new BenchmarkRun based on the provided Benchmark.



## OpenAPI

````yaml /openapi-specs/stainless-processed-openapi.json post /v1/benchmarks/start_run
openapi: 3.1.0
info:
  title: RunLoop API
  version: '0.1'
  description: >-
    The RunLoop API spec that allows you to host lambda functions and Devboxes
    to enable scaled long running ai workflows.
  contact:
    name: Runloop AI Support
    url: https://runloop.ai
    email: support@runloop.ai
servers:
  - url: https://api.runloop.ai
    description: Runloop API
    variables: {}
security:
  - bearerAuth: []
tags:
  - name: Benchmark
  - name: Blueprint
  - name: Blueprint-Lifecycle
  - name: Blueprint-ObservabilityTools
  - name: Devbox
  - name: Devbox-FileTools
  - name: Devbox-Lifecycle
  - name: Devbox-NetworkTools
  - name: Devbox-ObservabilityTools
  - name: Devbox-PersistenceTools
  - name: Devbox-ShellTools
  - name: Scenario
  - name: ScenarioScorer
  - name: accounts
  - name: agents
  - name: apikeys
  - name: axons
  - name: executions
  - name: gateway-configs
  - name: mcp-configs
  - name: network-policies
  - name: objects
  - name: restricted_keys
  - name: secrets
  - name: streaming
paths:
  /v1/benchmarks/start_run:
    post:
      tags:
        - Benchmark
      summary: Start a new BenchmarkRun.
      description: Start a new BenchmarkRun based on the provided Benchmark.
      operationId: startBenchmarkRun
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/StartBenchmarkRunParameters'
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BenchmarkRunView'
      x-codeSamples:
        - lang: JavaScript
          source: >-
            import Runloop from '@runloop/api-client';


            const client = new Runloop({
              bearerToken: process.env['RUNLOOP_API_KEY'], // This is the default and can be omitted
            });


            const benchmarkRunView = await client.benchmarks.startRun({
            benchmark_id: 'benchmark_id' });


            console.log(benchmarkRunView.id);
        - lang: Python
          source: |-
            import os
            from runloop_api_client import Runloop

            client = Runloop(
                bearer_token=os.environ.get("RUNLOOP_API_KEY"),  # This is the default and can be omitted
            )
            benchmark_run_view = client.benchmarks.start_run(
                benchmark_id="benchmark_id",
            )
            print(benchmark_run_view.id)
components:
  schemas:
    StartBenchmarkRunParameters:
      type: object
      properties:
        benchmark_id:
          type: string
          description: ID of the Benchmark to run.
        run_name:
          description: Display name of the run.
          type:
            - string
            - 'null'
        metadata:
          additionalProperties:
            type: string
          description: >-
            User defined metadata to attach to the benchmark run for
            organization.
          type:
            - object
            - 'null'
        runProfile:
          description: Runtime configuration to use for this benchmark run
          anyOf:
            - $ref: '#/components/schemas/RunProfile'
            - type: 'null'
      required:
        - benchmark_id
    BenchmarkRunView:
      type: object
      description: >-
        A BenchmarkRunView represents a run of a complete set of Scenarios,
        organized under a Benchmark or created by a BenchmarkJob.
      properties:
        id:
          type: string
          description: The ID of the BenchmarkRun.
        benchmark_id:
          description: >-
            The ID of the Benchmark definition. Present if run was created from
            a benchmark definition.
          type:
            - string
            - 'null'
        name:
          description: The name of the BenchmarkRun.
          type:
            - string
            - 'null'
        start_time_ms:
          type: integer
          format: int64
          description: >-
            The time the benchmark run execution started (Unix timestamp
            milliseconds).
        duration_ms:
          format: int64
          description: The duration for the BenchmarkRun to complete.
          type:
            - integer
            - 'null'
        state:
          $ref: '#/components/schemas/BenchmarkRunState'
          description: The state of the BenchmarkRun.
        score:
          format: float
          description: >-
            The final score across the BenchmarkRun, present once completed.
            Calculated as sum of scenario scores / number of scenario runs.
          type:
            - number
            - 'null'
        metadata:
          type: object
          additionalProperties:
            type: string
          description: >-
            User defined metadata to attach to the benchmark run for
            organization.
        purpose:
          description: Purpose of the run.
          type:
            - string
            - 'null'
        environment_variables:
          additionalProperties:
            type: string
          description: Environment variables used to run the benchmark.
          type:
            - object
            - 'null'
        secrets_provided:
          additionalProperties:
            type: string
          description: >-
            User secrets used to run the benchmark. Example: {"DB_PASS":
            "DATABASE_PASSWORD"} would set the environment variable 'DB_PASS' on
            all scenario devboxes to the value of the secret
            'DATABASE_PASSWORD'.
          type:
            - object
            - 'null'
      required:
        - id
        - start_time_ms
        - state
        - metadata
    RunProfile:
      type: object
      properties:
        purpose:
          description: Purpose of the run.
          type:
            - string
            - 'null'
        envVars:
          additionalProperties:
            type: string
          description: >-
            Mapping of Environment Variable to Value. May be shown in devbox
            logging. Example: {"DB_PASS": "DATABASE_PASSWORD"} would set the
            environment variable 'DB_PASS' to the value
            'DATABASE_PASSWORD_VALUE'.
          type:
            - object
            - 'null'
        secrets:
          additionalProperties:
            type: string
          description: >-
            Mapping of Environment Variable to User Secret Name. Never shown in
            devbox logging. Example: {"DB_PASS": "DATABASE_PASSWORD"} would set
            the environment variable 'DB_PASS' to the value of the secret
            'DATABASE_PASSWORD'.
          type:
            - object
            - 'null'
        launchParameters:
          description: >-
            Additional runtime LaunchParameters to apply after the devbox
            starts.
          anyOf:
            - $ref: '#/components/schemas/LaunchParameters'
            - type: 'null'
        mounts:
          items:
            $ref: '#/components/schemas/Mount'
          description: A list of mounts to be included in the scenario run.
          type:
            - array
            - 'null'
    BenchmarkRunState:
      type: string
      enum:
        - running
        - canceled
        - completed
        - failed
    LaunchParameters:
      type: object
      description: >-
        LaunchParameters enable you to customize the resources available to your
        Devbox as well as the environment set up that should be completed before
        the Devbox is marked as 'running'.
      properties:
        launch_commands:
          items:
            type: string
          description: >-
            Set of commands to be run at launch time, before the entrypoint
            process is run.
          type:
            - array
            - 'null'
        resource_size_request:
          description: >-
            Preset Devbox resources (vCPU, RAM in GiB, ephemeral disk in GiB).
            If not set, SMALL is used. X_SMALL: 0.5 vCPU, 1 GiB RAM, 4 GiB disk.
            SMALL: 1 vCPU, 2 GiB RAM, 4 GiB disk. MEDIUM: 2 vCPU, 4 GiB RAM, 8
            GiB disk. LARGE: 2 vCPU, 8 GiB RAM, 16 GiB disk. X_LARGE: 4 vCPU, 16
            GiB RAM, 16 GiB disk. XX_LARGE: 8 vCPU, 32 GiB RAM, 16 GiB disk.
            CUSTOM_SIZE: set custom_cpu_cores, custom_gb_memory, and optionally
            custom_disk_size.
          anyOf:
            - $ref: '#/components/schemas/ResourceSize'
            - type: 'null'
        available_ports:
          items:
            type: integer
            format: int32
          description: >-
            [Deprecated] A list of ports to make available on the Devbox. This
            field is ignored.
          type:
            - array
            - 'null'
        keep_alive_time_seconds:
          format: int64
          description: >-
            Time in seconds after which Devbox will automatically shutdown.
            Default is 1 hour. Maximum is 48 hours (172800 seconds).
          type:
            - integer
            - 'null'
        after_idle:
          description: >-
            Configure Devbox lifecycle based on idle activity. If after_idle is
            set, Devbox will ignore keep_alive_time_seconds. If both after_idle
            and lifecycle.after_idle are set, they must have the same value. Use
            lifecycle.after_idle instead.
          anyOf:
            - $ref: '#/components/schemas/IdleConfigurationParameters'
            - type: 'null'
        custom_cpu_cores:
          format: int32
          description: Custom CPU cores. Must be 0.5, 1, or a multiple of 2. Max is 16.
          type:
            - integer
            - 'null'
        custom_gb_memory:
          format: int32
          description: >-
            Custom memory size in GiB. Must be 1 or a multiple of 2. Max is
            64GiB.
          type:
            - integer
            - 'null'
        custom_disk_size:
          format: int32
          description: >-
            Custom disk size in GiB. Must be a multiple of 2. Min is 2GiB, max
            is 64GiB.
          type:
            - integer
            - 'null'
        architecture:
          description: >-
            The target architecture for the Devbox. If unset, defaults to
            x86_64.
          anyOf:
            - $ref: '#/components/schemas/Architecture'
            - type: 'null'
        user_parameters:
          description: >-
            Specify the user for execution on Devbox. If not set, default `user`
            will be used.
          anyOf:
            - $ref: '#/components/schemas/UserParameters'
            - type: 'null'
        required_services:
          items:
            type: string
          description: >-
            A list of ContainerizedService names to be started when a Devbox is
            created. A valid ContainerizedService must be specified in Blueprint
            to be started.
          type:
            - array
            - 'null'
        network_policy_id:
          description: >-
            (Optional) ID of the network policy to apply to Devboxes launched
            with these parameters. When set on a Blueprint launch parameters,
            Devboxes created from it will inherit this policy unless explicitly
            overridden.
          type:
            - string
            - 'null'
        lifecycle:
          description: >-
            Lifecycle configuration for idle and resume behavior. Configure idle
            policy via lifecycle.after_idle (if both this and the top-level
            after_idle are set, they must match), resume triggers via
            lifecycle.resume_triggers, and optional lifecycle hooks via
            lifecycle.lifecycle_hooks.
          anyOf:
            - $ref: '#/components/schemas/LifecycleConfigurationParameters'
            - type: 'null'
        provisioning_tier:
          description: >-
            (Optional, Alpha) standard is default and flex is lazily provisioned
            and may be pre-empted. This is an alpha feature and its behavior may
            change without notice.
          anyOf:
            - $ref: '#/components/schemas/ProvisioningTier'
            - type: 'null'
    Mount:
      oneOf:
        - $ref: '#/components/schemas/ObjectMount'
        - $ref: '#/components/schemas/AgentMount'
        - $ref: '#/components/schemas/CodeMount'
        - $ref: '#/components/schemas/FileMount'
        - $ref: '#/components/schemas/BrokerMount'
      discriminator:
        propertyName: type
        mapping:
          object_mount:
            $ref: '#/components/schemas/ObjectMount'
          agent_mount:
            $ref: '#/components/schemas/AgentMount'
          code_mount:
            $ref: '#/components/schemas/CodeMount'
          file_mount:
            $ref: '#/components/schemas/FileMount'
          broker_mount:
            $ref: '#/components/schemas/BrokerMount'
    ResourceSize:
      type: string
      enum:
        - X_SMALL
        - SMALL
        - MEDIUM
        - LARGE
        - X_LARGE
        - XX_LARGE
        - CUSTOM_SIZE
      description: >
        The size of the Devbox resources for Runloop to allocate.


        X_SMALL: 0.5 cpu x 1GiB memory x 4GiB disk

        SMALL: 1 cpu x 2GiB memory x 4GiB disk

        MEDIUM: 2 cpu x 4GiB memory x 8GiB disk

        LARGE: 2 cpu x 8GiB memory x 16GiB disk

        X_LARGE: 4 cpu x 16GiB memory x 16GiB disk

        XX_LARGE: 8 cpu x 32GiB memory x 16GiB disk

        CUSTOM_SIZE: To choose a custom size, set this enum and also the
        custom_cpu_cores, custom_gb_memory, and optionally custom_disk_size in
        launch parameters. CPU must be 0.5, 1, or a multiple of 2 (max 16).
        Memory must be 1 or a multiple of 2 (max 64GiB). Disk must be a multiple
        of 2 (min 2GiB, max 64GiB). The cpu:memory ratio must be between 1:2 and
        1:8 inclusive.
      x-enum-descriptions:
        X_SMALL: 0.5 cpu x 1GiB memory x 4GiB disk
        SMALL: 1 cpu x 2GiB memory x 4GiB disk
        MEDIUM: 2 cpu x 4GiB memory x 8GiB disk
        LARGE: 2 cpu x 8GiB memory x 16GiB disk
        X_LARGE: 4 cpu x 16GiB memory x 16GiB disk
        XX_LARGE: 8 cpu x 32GiB memory x 16GiB disk
        CUSTOM_SIZE: >-
          To choose a custom size, set this enum and also the custom_cpu_cores,
          custom_gb_memory, and optionally custom_disk_size in launch
          parameters. CPU must be 0.5, 1, or a multiple of 2 (max 16). Memory
          must be 1 or a multiple of 2 (max 64GiB). Disk must be a multiple of 2
          (min 2GiB, max 64GiB). The cpu:memory ratio must be between 1:2 and
          1:8 inclusive.
    IdleConfigurationParameters:
      type: object
      properties:
        idle_time_seconds:
          type: integer
          format: int32
          description: After idle_time_seconds, on_idle action will be taken.
        on_idle:
          $ref: '#/components/schemas/IdleAction'
          description: Action to take after Devbox becomes idle.
      required:
        - idle_time_seconds
        - on_idle
    Architecture:
      type: string
      enum:
        - x86_64
        - arm64
    UserParameters:
      type: object
      description: Configuration for the Linux user in the Devbox environment.
      properties:
        username:
          type: string
          description: Username for the Linux user.
        uid:
          type: integer
          format: int32
          description: User ID (UID) for the Linux user. Must be a non-negative integer.
      required:
        - username
        - uid
    LifecycleConfigurationParameters:
      type: object
      description: >-
        Lifecycle configuration for Devbox idle and resume behavior. Configure
        idle policy via after_idle, resume triggers via resume_triggers, and
        optional lifecycle hooks via lifecycle_hooks.
      properties:
        after_idle:
          description: >-
            Configure Devbox lifecycle based on idle activity. If both this and
            the top-level after_idle are set, they must have the same value.
            Prefer this field for new integrations.
          anyOf:
            - $ref: '#/components/schemas/IdleConfigurationParameters'
            - type: 'null'
        resume_triggers:
          description: Triggers that can resume a suspended Devbox.
          anyOf:
            - $ref: '#/components/schemas/ResumeTriggers'
            - type: 'null'
        lifecycle_hooks:
          description: >-
            Optional lifecycle hooks. suspend_commands run through the suspend
            path before the Devbox suspends; see launch_commands for work on
            every startup.
          anyOf:
            - $ref: '#/components/schemas/LifecycleHooks'
            - type: 'null'
    ProvisioningTier:
      type: string
      enum:
        - standard
        - flex
    ObjectMount:
      type: object
      properties:
        object_id:
          type: string
          description: The ID of the object to write.
        object_path:
          type: string
          description: >-
            The path to write the object on the Devbox. Use absolute path of
            object (ie /home/user/object.txt, or directory if archive
            /home/user/archive_dir)
        type:
          type: string
          enum:
            - object_mount
          default: object_mount
      required:
        - object_id
        - object_path
        - type
    AgentMount:
      type: object
      properties:
        agent_id:
          description: The ID of the agent to mount. Either agent_id or name must be set.
          type:
            - string
            - 'null'
        agent_name:
          description: >-
            The name of the agent to mount. Returns the most recent agent with a
            matching name if no agent id string provided. Either agent id or
            name must be set
          type:
            - string
            - 'null'
        agent_path:
          description: >-
            Path to mount the agent on the Devbox. Required for git and object
            agents. Use absolute path (e.g., /home/user/agent)
          type:
            - string
            - 'null'
        auth_token:
          description: >-
            Optional auth token for private repositories. Only used for git
            agents.
          type:
            - string
            - 'null'
        type:
          type: string
          enum:
            - agent_mount
          default: agent_mount
      required:
        - agent_id
        - agent_name
        - type
    CodeMount:
      type: object
      properties:
        repo_name:
          type: string
          description: >-
            The name of the repo to mount. By default, code will be mounted at
            /home/user/{repo_name}.
        repo_owner:
          type: string
          description: The owner of the repo.
        install_command:
          description: Installation command to install and setup repository.
          type:
            - string
            - 'null'
        git_ref:
          description: >-
            Optional git ref (branch or tag) to checkout. Defaults to the
            repository default branch.
          type:
            - string
            - 'null'
        token:
          description: The authentication token necessary to pull repo.
          type:
            - string
            - 'null'
        type:
          type: string
          enum:
            - code_mount
          default: code_mount
      required:
        - repo_name
        - repo_owner
        - type
    FileMount:
      type: object
      properties:
        target:
          type: string
          description: Target path where the file should be mounted.
        content:
          type: string
          description: Content of the file to mount.
        type:
          type: string
          enum:
            - file_mount
          default: file_mount
      required:
        - target
        - content
        - type
    BrokerMount:
      type: object
      properties:
        axon_id:
          type: string
          description: The ID of the axon event stream to mount onto the Devbox.
        protocol:
          description: The protocol used by the broker to deliver events to the agent.
          anyOf:
            - $ref: '#/components/schemas/BrokerMountProtocol'
            - type: 'null'
        agent_binary:
          description: >-
            Binary to launch the agent (e.g., 'opencode'). Used by protocols
            that launch a subprocess (acp, claude_json).
          type:
            - string
            - 'null'
        working_directory:
          description: >-
            Working directory in which to launch the agent binary. Defaults to
            the home directory if not specified.
          type:
            - string
            - 'null'
        launch_args:
          items:
            type: string
          description: >-
            Arguments to pass to the agent command (e.g., ['acp']). Used by
            protocols that launch a subprocess (acp, claude_json).
          type:
            - array
            - 'null'
        type:
          type: string
          enum:
            - broker_mount
          default: broker_mount
      required:
        - axon_id
        - type
    IdleAction:
      type: string
      enum:
        - shutdown
        - suspend
      description: |
        Action to take after Devbox idle timer is triggered.

        shutdown: Shutdown the Devbox.
        suspend: Suspend the Devbox.
      x-enum-descriptions:
        shutdown: Shutdown the Devbox.
        suspend: Suspend the Devbox.
    ResumeTriggers:
      type: object
      description: Triggers that can resume a suspended Devbox.
      properties:
        http:
          description: >-
            When true, HTTP traffic to a suspended Devbox via tunnel will
            trigger a resume.
          type:
            - boolean
            - 'null'
        axon_event:
          description: >-
            When true, axon events targeting a suspended Devbox will trigger a
            resume.
          type:
            - boolean
            - 'null'
    LifecycleHooks:
      type: object
      description: >-
        Lifecycle hooks for Devbox suspend. suspend_commands run sequentially as
        the configured Devbox user before the Devbox suspends; failures are
        logged but do not block suspending. The suspend_deadline_ms budget
        defaults to 30000 ms, may not exceed 60000 ms, and covers broker drain
        plus suspend_commands. If the deadline is exceeded, suspend work is
        abandoned, the timeout is logged, and the Devbox still proceeds to
        suspend. launch_commands still run on every startup, including after
        resume.
      properties:
        suspend_commands:
          items:
            type: string
          description: >-
            Commands to run through the suspend path before the Devbox suspends
            (e.g. cleanup, quiesce daemons).
          type:
            - array
            - 'null'
        suspend_deadline_ms:
          format: int64
          description: >-
            Deadline in milliseconds for broker drain and suspend_commands
            during suspend. Defaults to 30000 ms and may not exceed 60000 ms. If
            exceeded, suspend work is abandoned, the timeout is logged, and the
            Devbox still proceeds to suspend by shutting down vmagent and
            killing the VM.
          type:
            - integer
            - 'null'
    BrokerMountProtocol:
      type: string
      enum:
        - acp
        - claude_json
  securitySchemes:
    bearerAuth:
      scheme: bearer
      type: http

````