-
Notifications
You must be signed in to change notification settings - Fork 335
Add webcompat-triage agent #6170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| FROM python:3.12 AS builder | ||
|
|
||
| COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ | ||
|
|
||
| ENV UV_PROJECT_ENVIRONMENT=/opt/venv | ||
|
|
||
| WORKDIR /app | ||
|
|
||
| # Install external deps without building workspace members. | ||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||
| --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ | ||
| --mount=type=bind,source=uv.lock,target=uv.lock \ | ||
| --mount=type=bind,source=VERSION,target=VERSION \ | ||
| uv sync --frozen --no-dev --no-install-workspace --package hackbot-agent-webcompat-triage | ||
|
|
||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||
| --mount=type=bind,target=/app,rw \ | ||
| uv sync --locked --no-dev --no-editable --package hackbot-agent-webcompat-triage | ||
|
|
||
| FROM python:3.12 AS base | ||
|
|
||
| COPY --from=builder /opt/venv /opt/venv | ||
| WORKDIR /app | ||
|
|
||
| ENV PYTHONUNBUFFERED=1 | ||
| ENV PYTHONDONTWRITEBYTECODE=1 | ||
| ENV PATH="/opt/venv/bin:$PATH" | ||
|
|
||
| FROM base AS agent | ||
|
|
||
| # The Firefox DevTools MCP server is an npm package launched via `npx`, so the | ||
| # agent image needs Node.js + npm (the python base ships neither). It also | ||
| # needs the shared libraries Firefox requires to run headless; the Firefox | ||
| # binary itself is downloaded at agent startup (a fresh Nightly per run) via | ||
| # mozdownload/mozinstall, not baked in here. | ||
| RUN apt-get update \ | ||
| && apt-get install -y --no-install-recommends \ | ||
| nodejs npm \ | ||
| ca-certificates \ | ||
| libgtk-3-0 libdbus-glib-1-2 libx11-xcb1 libxtst6 libxt6 \ | ||
| libasound2 libpci3 \ | ||
| && rm -rf /var/lib/apt/lists/* | ||
|
|
||
| # hackbot.toml lives at the agent root (not inside the package), so copy it into | ||
| # the working dir; the runtime discovers it there (cwd) at startup. | ||
| COPY agents/webcompat-triage/hackbot.toml /app/hackbot.toml | ||
|
|
||
| RUN useradd --create-home --shell /bin/bash agent \ | ||
| && mkdir -p /workspace \ | ||
| && chown agent:agent /workspace | ||
|
|
||
| USER agent | ||
|
|
||
| CMD ["python", "-m", "hackbot_agents.webcompat_triage"] | ||
|
|
||
| FROM base AS broker | ||
|
|
||
| RUN useradd --create-home --shell /bin/bash broker | ||
|
|
||
| USER broker | ||
|
|
||
| EXPOSE 8765 | ||
|
|
||
| CMD ["python", "-m", "hackbot_agents.webcompat_triage.broker"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| services: | ||
| webcompat-triage-broker: | ||
| build: | ||
| context: ../.. | ||
| dockerfile: agents/webcompat-triage/Dockerfile | ||
| target: broker | ||
| environment: | ||
| BUGZILLA_API_URL: ${BUGZILLA_API_URL} | ||
| BUGZILLA_API_KEY: ${BUGZILLA_API_KEY} | ||
| expose: | ||
| - "8765" | ||
|
|
||
| webcompat-triage-agent: | ||
| build: | ||
| context: ../.. | ||
| dockerfile: agents/webcompat-triage/Dockerfile | ||
| target: agent | ||
| environment: | ||
| - RUN_ID | ||
| - BUG_DATA | ||
| - BUG_ID | ||
| - BUGZILLA_MCP_URL=http://webcompat-triage-broker:8765/mcp | ||
| - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?error} | ||
| # No uploader locally: summary/logs/attachments are written under | ||
| # /artifacts/<run_id>, bind-mounted to the host's ~/hackbot/artifacts. | ||
| - ARTIFACTS_DIR=/artifacts | ||
| volumes: | ||
| - ${HOME}/hackbot/artifacts:/artifacts | ||
| depends_on: | ||
| webcompat-triage-broker: | ||
| condition: service_started | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| # webcompat-triage needs no platform prep: no [source] checkout, no [firefox] build. | ||
| # Subject comes from the request (bug_data / bug_id); the DevTools MCP drives a | ||
| # Firefox instance installed in the image. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| from hackbot_runtime import HackbotContext, run_async | ||
| from pydantic_settings import BaseSettings, SettingsConfigDict | ||
|
|
||
| from .agent import WebcompatTriageResult, run_webcompat_triage | ||
| from .firefox_install import install_firefox_nightly | ||
|
|
||
|
|
||
| class AgentInputs(BaseSettings): | ||
| bugzilla_mcp_url: str | ||
| bug_data: str | None = None | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should consider making this explicitly a JSON object rather than just a string (although maybe in the end it doesn't make much difference?). |
||
| bug_id: int | None = None | ||
| model: str | None = None | ||
| max_turns: int | None = None | ||
| effort: str | None = None | ||
|
|
||
| model_config = SettingsConfigDict(extra="ignore") | ||
|
|
||
|
|
||
| async def main(ctx: HackbotContext) -> WebcompatTriageResult: | ||
| inputs = AgentInputs() | ||
|
|
||
| # Provision a fresh Nightly at startup so each run reproduces against a | ||
| # current build; drive the binary the install reports back. | ||
| firefox_path = str(install_firefox_nightly()) | ||
|
|
||
| return await run_webcompat_triage( | ||
| bugzilla_mcp_server={ | ||
| "type": "http", | ||
| "url": inputs.bugzilla_mcp_url, | ||
| }, | ||
| bug_data=inputs.bug_data, | ||
| bug_id=inputs.bug_id, | ||
| model=inputs.model, | ||
| max_turns=inputs.max_turns, | ||
| effort=inputs.effort, | ||
| firefox_path=firefox_path, | ||
| log=ctx.log_path, | ||
| verbose=True, | ||
| ) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| run_async(main) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,152 @@ | ||
| """Firefox web-compatibility triage agent. | ||
|
|
||
| Drives an agent that reproduces a broken-site report in Firefox | ||
| using the Firefox DevTools MCP. The bug is passed either inline as ``bug_data`` | ||
| text or a Bugzilla ``bug_id`` (read via Bugzilla broker). | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import sys | ||
| from pathlib import Path | ||
|
|
||
| from claude_agent_sdk import ( | ||
| ClaudeAgentOptions, | ||
| ClaudeSDKClient, | ||
| McpServerConfig, | ||
| ResultMessage, | ||
| ) | ||
| from hackbot_runtime import AgentError, HackbotAgentResult | ||
| from hackbot_runtime.claude import Reporter | ||
|
|
||
| from .config import BUGZILLA_READ_TOOLS, DEVTOOLS_TOOLS | ||
| from .devtools_mcp import build_devtools_server | ||
| from .result import ( | ||
| RESULT_SERVER_NAME, | ||
| SUBMIT_RESULT_TOOL, | ||
| ResultCollector, | ||
| TriageResult, | ||
| build_result_server, | ||
| ) | ||
|
|
||
| HERE = Path(__file__).resolve().parent | ||
|
|
||
|
|
||
| class WebcompatTriageResult(HackbotAgentResult): | ||
| result: TriageResult | None = None | ||
|
|
||
|
|
||
| def load_system_prompt() -> str: | ||
| return (HERE / "prompts" / "system.md").read_text() | ||
|
|
||
|
|
||
| def build_user_prompt(bug_data: str | None, bug_id: int | None) -> str: | ||
| if bug_data: | ||
| return ( | ||
| "Here is the web-compatibility report to work on:\n\n" | ||
| f"{bug_data}\n\n" | ||
| "Follow your task procedure." | ||
| ) | ||
| if bug_id is not None: | ||
| return ( | ||
| f"The web-compatibility report to work on is Bugzilla bug {bug_id}. " | ||
| "Fetch it using the Bugzilla MCP tools, then follow your task procedure." | ||
| ) | ||
| raise AgentError("neither bug_data nor bug_id was provided") | ||
|
|
||
|
|
||
| async def run_webcompat_triage( | ||
| *, | ||
| bugzilla_mcp_server: McpServerConfig, | ||
| bug_data: str | None = None, | ||
| bug_id: int | None = None, | ||
| model: str | None = None, | ||
| max_turns: int | None = None, | ||
| effort: str | None = None, | ||
| firefox_path: str | None = None, | ||
| verbose: bool = False, | ||
| log: Path | None = None, | ||
| ) -> WebcompatTriageResult: | ||
| """Reproduce a web-compat issue and return the agent's findings. | ||
|
|
||
| Returns a :class:`WebcompatTriageResult` on success; raises | ||
| :class:`AgentError` if the agent ends in an error. | ||
| """ | ||
| subject = bug_data if bug_data else f"bug {bug_id}" | ||
| print(f"[webcompat-triage] triaging {subject}", file=sys.stderr) | ||
|
|
||
| devtools_server = build_devtools_server( | ||
| firefox_path=Path(firefox_path) if firefox_path else None, | ||
| headless=True, | ||
| enable_script=True, | ||
| ) | ||
|
|
||
| # Structured-result MCP server (in-process): the agent calls submit_result | ||
| # once at the end, giving a predictable JSON result instead of free text. | ||
| result_collector = ResultCollector() | ||
| result_server = build_result_server(result_collector) | ||
|
|
||
| # Only wire up Bugzilla when there's a bug to fetch. With inline bug_data | ||
| # there's nothing to read, so the bugzilla MCP is not available | ||
| mcp_servers: dict[str, McpServerConfig] = { | ||
| "firefox-devtools": devtools_server, | ||
| RESULT_SERVER_NAME: result_server, | ||
| } | ||
| bugzilla_tools: list[str] = [] | ||
| if bug_id is not None: | ||
| mcp_servers["bugzilla"] = bugzilla_mcp_server | ||
| bugzilla_tools = BUGZILLA_READ_TOOLS | ||
|
|
||
| system_prompt = load_system_prompt() | ||
|
|
||
| options = ClaudeAgentOptions( | ||
| system_prompt=system_prompt, | ||
| mcp_servers=mcp_servers, | ||
| permission_mode="bypassPermissions", | ||
| allowed_tools=[ | ||
| "Read", | ||
| "Grep", | ||
| "Glob", | ||
| "Bash", | ||
| *bugzilla_tools, | ||
| *DEVTOOLS_TOOLS, | ||
| SUBMIT_RESULT_TOOL, | ||
| ], | ||
| model=model, | ||
| max_turns=max_turns, | ||
| **({"effort": effort} if effort else {}), | ||
| setting_sources=[], | ||
| # DevTools snapshots/screenshots of complex pages serialize to JSON that | ||
| # can exceed the SDK's default 1 MiB message buffer (the reader dies | ||
| # fatally if it does). Raise it well above that ceiling. | ||
| max_buffer_size=10 * 1024 * 1024, | ||
| ) | ||
|
|
||
| user_prompt = build_user_prompt(bug_data, bug_id) | ||
|
|
||
| result_msg: ResultMessage | None = None | ||
| with Reporter(verbose=verbose, log_path=log) as reporter: | ||
| reporter.header(subject) | ||
| async with ClaudeSDKClient(options=options) as client: | ||
| await client.query(user_prompt) | ||
| async for msg in client.receive_response(): | ||
| reporter.message(msg) | ||
| if isinstance(msg, ResultMessage): | ||
| result_msg = msg | ||
|
|
||
| if result_msg is None: | ||
| raise AgentError(f"{subject}: agent produced no result message") | ||
| if result_msg.is_error: | ||
| raise AgentError( | ||
| f"{subject} investigation failed: {result_msg.result or result_msg.subtype}" | ||
| ) | ||
| if result_collector.result is None: | ||
| raise AgentError( | ||
| f"{subject}: agent finished without submitting a result via submit_result" | ||
| ) | ||
|
|
||
| return WebcompatTriageResult( | ||
| result=result_collector.result, | ||
| num_turns=result_msg.num_turns, | ||
| total_cost_usd=result_msg.total_cost_usd, | ||
| ) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like result of the run is stored as an artifact in a json file. We should decide what the result should look like. So for a sample triage run it produced this in summary.json: The sample triage prompt didn't ask to leave a comment or attach testcases/screenshots, but if we add that and there is a bug id passed as well, these could be in the For example,
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My preference, which I don't think is universal, is that we try to get it to produce those artifacts without modelling it as an update to the bug (even if it doesn't actually perform the updates). The bug is intrinsically shared global state, and the more we depend on the model interacting with that directly, the harder it is likely to be to run experiments or use in non-bugzilla contexts (e.g. with the dashboard).
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, I've changed it to this format for now (defined in result.py): |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| """Bugzilla MCP broker. | ||
|
|
||
| Sidecar container that holds the Bugzilla API key and serves the | ||
| bugzilla MCP tools over HTTP. The agent process (in a sibling container | ||
| in the same Cloud Run Job task) reaches us at `127.0.0.1:<port>/mcp`. | ||
| The agent container itself binds no Bugzilla credentials. | ||
| """ | ||
|
|
||
| import logging | ||
| from contextlib import asynccontextmanager | ||
|
|
||
| import bugsy | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I appreciate this is not your fault, but I'm rather sad that bugbot is using bugsy, which is not maintained.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jgraham This was temporary :) We have a draft PR to migrate to libmozdata: #6180 That said, even though libmozdata is maintained and widely used across our tools, I think we need a more modern library that offers:
It might be worth starting a conversation with you, me, and @dklawren to figure out the best path forward. |
||
| import uvicorn | ||
| from agent_tools import bugzilla | ||
| from agent_tools.bugzilla import BugzillaContext | ||
| from agent_tools.claude_sdk import build_sdk_server | ||
| from mcp.server.streamable_http_manager import StreamableHTTPSessionManager | ||
| from pydantic_settings import BaseSettings, SettingsConfigDict | ||
| from starlette.applications import Starlette | ||
| from starlette.routing import Mount | ||
|
|
||
| log = logging.getLogger("webcompat-triage-broker") | ||
|
|
||
|
|
||
| class BrokerInputs(BaseSettings): | ||
| bugzilla_api_url: str | ||
| bugzilla_api_key: str | ||
| host: str = "0.0.0.0" | ||
| port: int = 8765 | ||
|
|
||
| model_config = SettingsConfigDict(extra="ignore") | ||
|
|
||
|
|
||
| def build_app(inputs: BrokerInputs) -> Starlette: | ||
| client = bugsy.Bugsy( | ||
| api_key=inputs.bugzilla_api_key, bugzilla_url=inputs.bugzilla_api_url | ||
| ) | ||
| ctx = BugzillaContext(client=client) | ||
| sdk_config = build_sdk_server("bugzilla", ctx, bugzilla.TOOLS) | ||
| mcp_server = sdk_config["instance"] | ||
|
|
||
| manager = StreamableHTTPSessionManager(app=mcp_server, stateless=True) | ||
|
|
||
| @asynccontextmanager | ||
| async def lifespan(app): | ||
| async with manager.run(): | ||
| log.info( | ||
| "bugzilla broker ready on %s:%d (read-only)", | ||
| inputs.host, | ||
| inputs.port, | ||
| ) | ||
| yield | ||
|
|
||
| async def mcp_handler(scope, receive, send): | ||
| await manager.handle_request(scope, receive, send) | ||
|
|
||
| return Starlette(routes=[Mount("/mcp", app=mcp_handler)], lifespan=lifespan) | ||
|
|
||
|
|
||
| def main() -> None: | ||
| logging.basicConfig( | ||
| level=logging.INFO, | ||
| format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | ||
| ) | ||
| inputs = BrokerInputs() | ||
| app = build_app(inputs) | ||
| uvicorn.run(app, host=inputs.host, port=inputs.port, log_config=None) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we prefix these either like
AUTOWEBCOMPAT_orBUGBUG_? We can change the settings class to strip the prefix.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will not be necessary since this will not be shared with other agents or so. Keeping it shorter would make it cleaner when running it locally.