diff --git a/config/aipods/artificer.yaml b/config/aipods/artificer.yaml index 479dd43..b2228fd 100644 --- a/config/aipods/artificer.yaml +++ b/config/aipods/artificer.yaml @@ -18,6 +18,11 @@ spec: model: model: gpt-5.5 reasoningEffort: xhigh + imageRef: + kind: env-image-dockerfile + repoUrl: git@github.com:pikasTech/agentrun.git + commitId: 6601b4afb4870ecec62f14459e14202e5ccca7ec + dockerfilePath: deploy/container/Containerfile workspaceRef: kind: opaque path: . diff --git a/deploy/container/Containerfile b/deploy/container/Containerfile index df93cca..a335488 100644 --- a/deploy/container/Containerfile +++ b/deploy/container/Containerfile @@ -11,12 +11,23 @@ ENV AGENTRUN_APP_ROOT=/workspace/agentrun ENV AGENTRUN_BOOT_REPO_URL=http://git-mirror-http.devops-infra.svc.cluster.local/pikasTech/agentrun.git RUN HTTP_PROXY="$HTTP_PROXY" HTTPS_PROXY="$HTTPS_PROXY" NO_PROXY="$NO_PROXY" http_proxy="$HTTP_PROXY" https_proxy="$HTTPS_PROXY" no_proxy="$NO_PROXY" \ - apk add --no-cache ca-certificates curl git github-cli kubectl nodejs openssh-client ripgrep + apk add --no-cache ca-certificates curl git github-cli kubectl nodejs npm openssh-client ripgrep COPY package.json bun.lock tsconfig.json ./ RUN HTTP_PROXY="$HTTP_PROXY" HTTPS_PROXY="$HTTPS_PROXY" NO_PROXY="$NO_PROXY" http_proxy="$HTTP_PROXY" https_proxy="$HTTPS_PROXY" no_proxy="$NO_PROXY" \ bun install --production RUN /opt/agentrun/node_modules/.bin/codex --version && /opt/agentrun/node_modules/.bin/codex app-server --help >/dev/null +RUN set -eu; \ + for tool in bun node npm git ssh gh rg curl kubectl; do command -v "$tool" >/dev/null; done; \ + bun --version >/tmp/agentrun-work-ready-smoke.txt; \ + node --version >>/tmp/agentrun-work-ready-smoke.txt; \ + npm --version >>/tmp/agentrun-work-ready-smoke.txt; \ + git --version >>/tmp/agentrun-work-ready-smoke.txt; \ + ssh -V 2>>/tmp/agentrun-work-ready-smoke.txt || true; \ + gh --version | head -n 1 >>/tmp/agentrun-work-ready-smoke.txt; \ + rg --version | head -n 1 >>/tmp/agentrun-work-ready-smoke.txt; \ + curl --version | head -n 1 >>/tmp/agentrun-work-ready-smoke.txt; \ + kubectl version --client 2>/dev/null | head -n 1 >>/tmp/agentrun-work-ready-smoke.txt COPY deploy/runtime/boot ./deploy/runtime/boot RUN chmod +x /opt/agentrun/deploy/runtime/boot/*.sh diff --git a/docs/reference/spec-v01-aipod-spec.md b/docs/reference/spec-v01-aipod-spec.md index e411628..01fdfbd 100644 --- a/docs/reference/spec-v01-aipod-spec.md +++ b/docs/reference/spec-v01-aipod-spec.md @@ -1,13 +1,13 @@ # v0.1 AipodSpec 规格 -`AipodSpec` 是 AgentRun `v0.1` 的声明式 agent 装配规格。它把已有的 `backendProfile`、`executionPolicy.secretScope`、`ResourceBundleRef.kind="gitbundle"`、Queue task 和 Session turn 装配入口集中到 YAML 文件中,避免把某个 agent 的模型、SecretRef、gitbundle、skill 或 tool 写死在 manager、runner 或 CLI 源码里。 +`AipodSpec` 是 AgentRun `v0.1` 的声明式 agent 装配规格。它把已有的 `imageRef`、`backendProfile`、`executionPolicy.secretScope`、`ResourceBundleRef.kind="gitbundle"`、Queue task 和 Session turn 装配入口集中到 YAML 文件中,避免把某个 agent 的 work-ready env image、模型、SecretRef、gitbundle、skill 或 tool 写死在 manager、runner 或 CLI 源码里。 ## 设计边界 - `AipodSpec` 只声明装配意图,不保存 API key、SSH private key、token、`auth.json`、`config.toml` 或其他 Secret 明文。 - manager 通过 `/api/v1/aipod-specs` 对 YAML 做增删改查;默认目录为仓库 `config/aipods/`,可用 `AGENTRUN_AIPOD_SPEC_DIR` 覆盖。 - CLI 通过 `aipod-specs list|show|render|apply|delete` 管理规格,通过 `queue submit --aipod ` 或 `sessions turn --aipod ` 使用规格。 -- `render` 只把规格展开为标准 Queue task / Session turn 输入,输出必须脱敏,只显示 SecretRef 名称、key、projection、gitbundle 摘要和 `valuesPrinted=false`。 +- `render` 只把规格展开为标准 Queue task / Session turn 输入,输出必须脱敏,只显示 imageRef source 摘要、SecretRef 名称、key、projection、gitbundle 摘要和 `valuesPrinted=false`。 - `AipodSpec` 不引入第二套 scheduler、runner、backend adapter 或 Code Queue;最终执行仍走 AgentRun Queue、Sessions、runner Job 和 Codex app-server stdio backend。 ## YAML 结构 @@ -20,6 +20,11 @@ kind: AipodSpec metadata: name: Artificer spec: + imageRef: + kind: env-image-dockerfile + repoUrl: git@github.com:pikasTech/agentrun.git + commitId: 0000000000000000000000000000000000000000 + dockerfilePath: deploy/container/Containerfile backendProfile: sub2api executionPolicy: sandbox: workspace-write @@ -46,17 +51,42 @@ spec: - `apiVersion` 固定为 `agentrun.pikastech.local/v0.1`,`kind` 固定为 `AipodSpec`。 - `metadata.name` 是 CLI/API 查找名,允许大小写,但文件落盘名会归一为安全 YAML 文件名。 +- `spec.imageRef` 描述该 Aipod 的 work-ready env image 来源。`kind` 固定为 `env-image-dockerfile`,最小字段为 `repoUrl`、完整 40 位 `commitId` 和仓库内相对 `dockerfilePath`;不得用可变 image tag 代替 source 引用。 - `spec.backendProfile` 使用 AgentRun 已注册或动态的 Codex-compatible profile slug,例如 `codex`、`deepseek`、`minimax-m3`、`dsflash-go` 或 `sub2api`。 - `spec.model.model` 会展开为 command payload 的 `model` 字段;完整 `spec.model` 同时进入 `payload.modelConfig`,用于保留 `reasoningEffort` 等声明但不作为 Secret 输出。 - `spec.executionPolicy` 复用 run 的执行策略校验,且必须恰好包含一个匹配 `backendProfile` 的 provider credential SecretRef。 - `spec.resourceBundleRef` 复用 RuntimeAssembly 的 gitbundle 规则,可为 `null`,但需要注入 skill/tool 时必须使用 gitbundle。 - `spec.payloadDefaults` 与 CLI render 输入合并;用户 prompt 通过 `--prompt`、`--prompt-file` 或 `--prompt-stdin` 覆盖或补充。 +## imageRef 与 env image reuse + +`imageRef` 是 Aipod work-ready 环境的声明式 source,不是最终 Kubernetes image 字符串。它回答“这个 Aipod 的基础执行镜像由哪个 Git commit 中的哪个 Dockerfile/Containerfile 构建”。 + +最小结构: + +```yaml +imageRef: + kind: env-image-dockerfile + repoUrl: git@github.com:pikasTech/agentrun.git + commitId: 0000000000000000000000000000000000000000 + dockerfilePath: deploy/container/Containerfile +``` + +规则: + +- `repoUrl` 必须是无明文 credential 的 Git URL;credential 只能通过受控 Git/mirror 运行面获取。 +- `commitId` 必须是完整 commit SHA,不能是 branch、tag、short SHA 或 dispatcher 当前 rollout hint。 +- `dockerfilePath` 必须是 repo 内相对路径,不能包含 `..`,不能是绝对路径。 +- Aipod 启动或 Queue dispatch 时必须先按 `imageRef` 解析 env image identity,并优先查询 artifact catalog / registry 复用已有 digest-pinned env image。 +- catalog/registry 未命中时只允许进入受控 CI/CD 构建路径;普通 Aipod 启动不得在任务内部临时 `apt`、`apk`、`bun install`、`npm install` 或等价命令补基础环境。 +- runner job 最终只接受解析后的 digest-pinned image;response、event 和 result 必须输出 `imageRef` source、env identity、reuse/build 状态和 digest 摘要,不输出 Secret。 + ## Artificer 默认规格 仓库内置 `config/aipods/artificer.yaml`,名称为 `Artificer`。它的长期目标是承接 UniDesk 分布式开发任务: - 使用 `backendProfile=sub2api`,模型声明为 `gpt-5.5`,reasoning effort 为 `xhigh`。 +- 通过 `imageRef` 引用 AgentRun `v0.1` env image Dockerfile source;启动时必须复用已物化的 digest-pinned env image,不能把依赖安装留给任务运行时。 - 通过 provider SecretRef `agentrun-v01-provider-sub2api` 获取 `auth.json` 与 `config.toml`。 - 通过 `toolCredentials` 获取 GitHub Issue/PR 写入 token:`agentrun-v01-tool-github-pr` -> env `GH_TOKEN`。 - 通过 `toolCredentials` 获取 UniDesk SSH 透传 token:`agentrun-v01-tool-unidesk-ssh` -> env `UNIDESK_SSH_CLIENT_TOKEN`。 @@ -121,8 +151,9 @@ CLI: ## 测试规格 -- A1:`config/aipods/artificer.yaml` 能被 manager list/show/render,render 结果包含 `backendProfile=sub2api`、`model=gpt-5.5`、`reasoningEffort=xhigh`、provider SecretRef、GitHub PR token env projection、UniDesk SSH env projection、GitHub SSH volume projection、Artificer `gitMirror.enabled=false`、AgentRun runner tools gitbundle 和 gitbundle requiredSkills。 +- A1:`config/aipods/artificer.yaml` 能被 manager list/show/render,render 结果包含 `imageRef.kind=env-image-dockerfile`、`repoUrl`、`commitId`、`dockerfilePath`、`backendProfile=sub2api`、`model=gpt-5.5`、`reasoningEffort=xhigh`、provider SecretRef、GitHub PR token env projection、UniDesk SSH env projection、GitHub SSH volume projection、Artificer `gitMirror.enabled=false`、AgentRun runner tools gitbundle 和 gitbundle requiredSkills。 - A2:`queue submit --aipod Artificer --dry-run` 输出标准 `queue-submit-plan`,且 `idempotencyKey`、prompt 与 metadata 被保留。 - A3:Artificer 默认 `gitMirror.enabled=false` 时 GitHub URL 保持 SSH fetch;显式启用 `gitMirror` 后 GitHub URL 改写到 mirror base URL;非 GitHub URL 不改写。 -- A4:runner Job dry-run 支持 tool credential volume mount,并且 response/manifest/event 不泄漏 Secret 明文。 -- A5:`bun run check` 和 `bun run self-test` 必须覆盖 A1-A4。 +- A4:Aipod 启动/dispatch 能基于 `imageRef` 命中 artifact catalog / registry 并复用 digest-pinned env image;未命中时返回明确 build-required 或进入受控 CI/CD,不在 runner 任务内补装依赖。 +- A5:runner Job dry-run 支持 tool credential volume mount,并且 response/manifest/event 不泄漏 Secret 明文。 +- A6:`bun run check` 和 `bun run self-test` 必须覆盖 A1-A5。 diff --git a/docs/reference/spec-v01-cicd.md b/docs/reference/spec-v01-cicd.md index 20fb017..a538150 100644 --- a/docs/reference/spec-v01-cicd.md +++ b/docs/reference/spec-v01-cicd.md @@ -52,9 +52,21 @@ CI 的最小检查应覆盖: - Bun/TS 单元自测试,包括 manager schema、adapter mock、Codex fake app-server stdio client 和 CLI JSON 输出。 - `deploy/deploy.json` 与 GitOps render 只读校验。 -容器镜像必须区分 env identity 与 source commit。`agentrun-mgr-env:` 只包含 Bun runtime、生产依赖、Codex CLI、git/kubectl/node 等系统依赖和 boot 脚本,不 bake `src/`、`scripts/` 或某个业务 source commit。运行时通过 `AGENTRUN_BOOT_COMMIT` 从 git mirror 按完整 SHA 做 `git fetch --depth=1 origin `,再用 env image 内的 `node_modules` 启动 manager 或 runner。CI/CD 仍然只允许纯 Tekton + Argo CD,不因 Bun 工具链引入自定义 runner、长期 poller 或源分支生成物提交。 +容器镜像必须区分 env identity 与 source commit。`agentrun-mgr-env:` 只包含 Bun runtime、生产依赖、Codex CLI、git/kubectl/node 等系统依赖和 boot 脚本,不 bake `src/`、`scripts/` 或某个业务 source commit。AipodSpec 通过 `spec.imageRef` 引用 env image Dockerfile source(`repoUrl`、`commitId`、`dockerfilePath`),运行时通过 `AGENTRUN_BOOT_COMMIT` 从 git mirror 按完整 SHA 做 `git fetch --depth=1 origin `,再用 env image 内的 `node_modules` 启动 manager 或 runner。CI/CD 仍然只允许纯 Tekton + Argo CD,不因 Bun 工具链引入自定义 runner、长期 poller 或源分支生成物提交。 -Env identity 的输入至少包含:Bun base image、系统包列表、`deploy/container/Containerfile`、`deploy/runtime/boot/*.sh`、`package.json`、`bun.lock` 和 `tsconfig.json`。只改业务 TS、文档、模板中不影响 runtime env 的内容时,planner 必须输出 `build=0 reuse=1 unsafeReuse=0`,并只复用上一版 catalog 中同一 `envIdentity` 的 digest。若 GitOps catalog 因 mirror flush lag 暂时不可见,即使 G14 registry 中已有同名 `agentrun-mgr-env:`,也必须先通过 `agentrun v01 git-mirror sync|flush` 或新的 PipelineRun 恢复 catalog provenance;不能把 registry HEAD 当作第二条发布真相。 +Env identity 的输入至少包含:`imageRef.repoUrl`、`imageRef.commitId`、`imageRef.dockerfilePath`、Bun base image、系统包列表、`deploy/runtime/boot/*.sh`、`package.json`、`bun.lock` 和 `tsconfig.json`。只改业务 TS、文档、模板中不影响 runtime env 的内容时,planner 必须输出 `build=0 reuse=1 unsafeReuse=0`,并只复用上一版 catalog 中同一 `envIdentity` 的 digest。若 GitOps catalog 因 mirror flush lag 暂时不可见,即使 G14 registry 中已有同名 `agentrun-mgr-env:`,也必须先通过 `agentrun v01 git-mirror sync|flush` 或新的 PipelineRun 恢复 catalog provenance;不能把 registry HEAD 当作第二条发布真相。 + +## Aipod env image reuse + +`AipodSpec.spec.imageRef` 是 Aipod 启动时选择 work-ready env image 的 source truth。Queue dispatch、Session turn 和 runner Job 创建不得直接信任 prompt、payload 或 CLI 临时传入的可变 image tag;它们必须先把 `imageRef` 映射到 artifact catalog 中的 env image identity 和 digest-pinned repository digest。 + +规则: + +- `imageRef.kind` 固定为 `env-image-dockerfile`,最小字段为 `repoUrl`、完整 `commitId` 和仓库内相对 `dockerfilePath`。 +- `repoUrl + commitId + dockerfilePath` 指向的 Dockerfile source 必须能通过 Git mirror 或受控 Git credential 读取;不能依赖 runner 任务内手工 clone 和 build。 +- 启动 Aipod 时若 catalog 中存在匹配 env identity 且 digest 合法,必须复用该 digest-pinned image,并在 response/event 中标记 `status=reused`。 +- catalog 未命中时,CLI/API 只能返回明确 `build-required` 计划或触发受控 Tekton PipelineRun;不得让 runner task 进入后再运行 `apt`、`apk`、`bun install`、`npm install` 补环境。 +- catalog 记录必须包含 `imageRef` source 摘要、`envIdentity`、env image tag、digest、repository digest、build/reuse 状态和 provenance。SecretRef、token、SSH key 和 provider config 不进入 catalog。 ## 真相源 @@ -132,7 +144,7 @@ Tekton promotion 可以读取 `deploy/deploy.json` 来 render runtime desired st - `v0.1` env image tag 使用 `envIdentity`,而不是 source commitId。source commitId 通过 `AGENTRUN_BOOT_COMMIT` 和 catalog `bootCommit` 进入 runtime。 - Runtime manifest 必须使用 digest pin 作为部署身份;G14 本地 registry 对同一 tag 的默认 HEAD 可能返回 Docker schema1 compatibility digest,Tekton 必须用 `Accept: application/vnd.docker.distribution.manifest.v2+json` 采集 containerd 可直接拉取的 schema2 manifest digest,并在写入 catalog 前按 digest HEAD 验证。 -- Catalog 必须记录 lane、source branch、GitOps branch、source commitId、serviceId、env image tag、digest、env identity、boot commit、toolchain inputs 和 build/reuse 状态。 +- Catalog 必须记录 lane、source branch、GitOps branch、source commitId、serviceId、imageRef source 摘要、env image tag、digest、env identity、boot commit、toolchain inputs 和 build/reuse 状态。 - 同一 env identity 对同一 service 应生成同一镜像;lane 差异放在 manifest、env、SecretRef、namespace、RBAC 和 runtime config 中,不 bake 进镜像。普通 source commit 差异由 boot checkout 选择。 - `deploy/deploy.json` 只承载人写 runtime intent,不承载 digest、publish state 或 reuse evidence。 - Source branch 不得因为 promotion 出现自动提交;若发布后 source branch 变化,必须是人工修改源码、测试、文档、模板或 `deploy/deploy.json`。 diff --git a/docs/reference/spec-v01-runtime-assembly.md b/docs/reference/spec-v01-runtime-assembly.md index b42f9a8..7089bad 100644 --- a/docs/reference/spec-v01-runtime-assembly.md +++ b/docs/reference/spec-v01-runtime-assembly.md @@ -2,7 +2,7 @@ 本文是 AgentRun `v0.1` runner/backend 启动前的权威装配 SPEC。所有会进入运行时容器、进程、文件系统或环境变量的执行输入,都必须先落到本文定义的装配模型,再由 manager/runner 渲染为受控 Job 输入;不得在 CLI、Queue、runner Job、issue 过程或临时热补丁中绕过装配模型直接拼接 credential、host path 或隐式环境。 -`RuntimeAssembly` 只回答一个问题:一次 run 到底用哪份 backend 镜像、哪个 profile/credential scope、哪份 session、哪份代码、初始 prompt、skill 和工具 credential。`BackendImageRef`、`ProfileRef`、`SessionRef`、`ResourceBundleRef` 仍是四个一等运行时要素;credential 注入不是第五个杂项要素,而是挂在 `ProfileRef`、`ResourceBundleRef` 或 tool scope 上的 SecretRef 装配引用。初始 prompt 与 skill 注入也不是第五条运行时路径,它们属于 `ResourceBundleRef` 指向的 Git-only 非敏感内容。 +`RuntimeAssembly` 只回答一个问题:一次 run 到底用哪份 backend/env 镜像、哪个 profile/credential scope、哪份 session、哪份代码、初始 prompt、skill 和工具 credential。`BackendImageRef`、`ProfileRef`、`SessionRef`、`ResourceBundleRef` 仍是四个一等运行时要素;credential 注入不是第五个杂项要素,而是挂在 `ProfileRef`、`ResourceBundleRef` 或 tool scope 上的 SecretRef 装配引用。初始 prompt 与 skill 注入也不是第五条运行时路径,它们属于 `ResourceBundleRef` 指向的 Git-only 非敏感内容。 ## 最简四要素 @@ -10,7 +10,7 @@ | 要素 | 最小字段 | v0.1 含义 | 不包含 | | --- | --- | --- | --- | -| `BackendImageRef` | `image` | digest-pinned backend/runner 镜像。 | API KEY、profile config、用户代码、session 文件。 | +| `BackendImageRef` | `source` 或已解析 `image` | Aipod env image Dockerfile source 与最终 digest-pinned backend/runner 镜像。 | API KEY、profile config、用户代码、session 文件、可变 image tag。 | | `ProfileRef` | `profile`、`secretRef` | provider profile 和 API KEY/配置 SecretRef。 | backend 镜像、session、repo 文件、GitHub/业务工具 credential。 | | `SessionRef` | `sessionId` 或 `null` | backend 会话文件持久化引用;P0 可以为 `null`。 | API KEY、完整 `CODEX_HOME`、Git workspace。 | | `ResourceBundleRef` | `kind="gitbundle"`、`repoUrl`、`bundles[]`、可选 `ref` / `commitId` / `promptRefs` / `requiredSkills` | 初始代码/文件输入、工具目录、skill 目录、required skill 校验和稳定初始 prompt;P0 固定 Git-only gitbundle,默认从 repo/ref 解析实际 commit。 | 上传文件、对象存储 artifact、inline env、Secret value、会话历史、旧 inline seed、inline skill manifest。 | @@ -20,7 +20,13 @@ P0 最小 JSON 形态: ```json { "backendImageRef": { - "image": "127.0.0.1:5000/agentrun/agentrun-mgr@sha256:..." + "source": { + "kind": "env-image-dockerfile", + "repoUrl": "git@github.com:pikasTech/agentrun.git", + "commitId": "0000000000000000000000000000000000000000", + "dockerfilePath": "deploy/container/Containerfile" + }, + "image": "127.0.0.1:5000/agentrun/agentrun-mgr-env@sha256:..." }, "profileRef": { "profile": "codex", @@ -124,10 +130,15 @@ HWLAB Workbench 的 project/workspace 不属于 RuntimeAssembly 四要素,也 ### BackendImageRef -- `image` 必须是 digest-pinned image。 -- image 来源必须是 CI/CD artifact catalog、GitOps manifest 或 manager allowlist;客户端不能在 run payload 中传任意镜像。 +- `BackendImageRef.source` 是 env image Dockerfile source,来自 AipodSpec `spec.imageRef` 或受控 runtime default;`kind` 固定为 `env-image-dockerfile`,最小字段为 `repoUrl`、完整 `commitId` 和仓库内相对 `dockerfilePath`。 +- `BackendImageRef.image` 是 source 解析后的 digest-pinned image,必须来自 CI/CD artifact catalog、GitOps manifest 或 manager allowlist;客户端不能在 run payload 中传任意可变镜像。 +- runner Job 创建前必须先用 `source` 解析 env image identity,并优先从 artifact catalog / registry 复用已有 digest;未命中只能返回明确 build-required 或进入受控 CI/CD,不能在普通任务容器内现装依赖。 - v0.1 可以继续使用现有 agentrun runner 镜像,不要求立即拆独立 backend image。 -- 验收时只需要能追溯实际 Deployment/Job image digest 和 source commit。 +- 验收时必须能追溯实际 Deployment/Job image digest、source commit、Dockerfile path、env identity 和 reuse/build 状态。 +- runner 镜像必须是 work-ready 执行环境:基础 CLI、Bun/Node/npm、Git/SSH/GitHub CLI、kubectl、curl、ripgrep 和 AgentRun 生产依赖必须在 env image 构建阶段进入镜像或受控预装层。普通任务不得在运行时用 `apt`、`apk`、`bun install`、`npm install` 或等价命令补装基础环境。 +- `workReady` capability 摘要必须出现在 manager health、runner job dry-run/response、queue dispatch response 和 runner startup event 中;摘要只能包含版本、工具名、版本号、依赖策略和 `valuesPrinted=false`,不得输出 env value、token 或 Secret 文件内容。 +- runner 启动后必须先执行短 smoke:镜像层检查 `bun`、`node`、`npm`、`git`、`ssh`、`gh`、`rg`、`curl`、`kubectl`;Artificer/UniDesk 工具型 gitbundle 装配后再检查 `tran`、`trans`、`apply_patch`。缺失时必须快速 `infra-failed`,不能把问题留给 prompt 内排查。 +- 项目依赖不属于默认 runtime preflight。AgentRun 自身 `node_modules` 固定为镜像层 `/opt/agentrun/node_modules`,boot 后只 symlink 到源码 checkout;业务 repo 的 `bun install`/`npm install` 只能由显式任务、派生镜像或受控 workspace cache 承担,不能成为每个 Aipod/Queue task 的默认前置。 ### ProfileRef @@ -230,7 +241,7 @@ skill 只来自 gitbundle 复制进 workspace 的 `.agents/skills//SKILL.m 5. Runner materialize tool credential 到该 run 允许的 env/file projection;未实现的 tool scope 必须显式 failed/blocked,不能静默跳过后让 agent 自己猜凭据。 6. Runner materialize `kind="gitbundle"` resource bundle 到 workspace;P0 未实现时必须显式 blocked,不能猜测 host path。 7. Runner 按 `bundles[]` 复制目录或文件,准备 workspace `tools/`、发现 `.agents/skills`,校验 `requiredSkills`,读取并校验 `promptRefs`,写入有界 assembly event。 -8. Runner 启动 backend,并在 event 中记录 image digest、profile、SecretRef 名称/key、tool credential scope、sessionRef、repoUrl、requested ref/commit、materialized commit、bundles、promptRefs、requiredSkills、tools 和 skillDirs 摘要。 +8. Runner 启动 backend,并在 event 中记录 imageRef source、env identity、image digest、reuse/build 状态、profile、SecretRef 名称/key、tool credential scope、sessionRef、repoUrl、requested ref/commit、materialized commit、bundles、promptRefs、requiredSkills、tools 和 skillDirs 摘要。 任何一个要素缺失或不合法,都必须按该要素失败;不得静默 fallback。 @@ -239,8 +250,9 @@ skill 只来自 gitbundle 复制进 workspace 的 `.agents/skills//SKILL.m ### A1 BackendImageRef 验收 - 实际 manager Deployment 和 runner Job 使用 digest-pinned image。 -- event、CLI 或诊断输出能看到 image digest 或可追溯到 GitOps/catalog。 -- run payload 不能传任意 image 字符串。 +- event、CLI 或诊断输出能看到 imageRef source、env identity、image digest 和可追溯 GitOps/catalog 条目。 +- run payload 不能传任意 image 字符串;Aipod 只能声明 env image Dockerfile source,dispatch 解析后才进入 runner job image。 +- 启动 Aipod 时命中已有 env identity 必须 reuse digest-pinned image;不得触发任务内 `apt`/`apk`/`bun install`/`npm install`。 ### A2 ProfileRef 验收 diff --git a/docs/reference/spec-v01-validation.md b/docs/reference/spec-v01-validation.md index 6a4e00c..087f611 100644 --- a/docs/reference/spec-v01-validation.md +++ b/docs/reference/spec-v01-validation.md @@ -24,6 +24,7 @@ - Postgres adapter:migration、事务、run/command/event round-trip、重启后可查询。 - Secret 分发:SecretRef schema、missing secret failure、redaction。 - AgentRun Queue:task schema、attempt 状态机、summary/stats/read cursor、Queue 与 Session 引用边界、旧 MiniMax/OpenCode 直连入口废弃和 redaction。 +- AipodSpec / env image reuse:`spec.imageRef` schema、Artificer render、Queue metadata 继承、runner-job imageRef 解析、digest-pinned env image reuse 可见性和禁止任务运行时安装基础依赖的 work-ready smoke。 - HWLAB v0.2 基线承接:可以用 fake backend/临时 manager 做组件自测试,覆盖 event contract、result completed 防误判、bounded output、runner job status、SessionRef profile 隔离、ResourceBundleRef 失败分类、`promptRefs`/gitbundle skillDirs/requiredSkills 装配和 backend preflight redaction;这些自测试不能替代真实 `agentrun-v01` CLI 交互验收。 自测试应使用 Bun + TypeScript 运行,Codex 相关自测试可以使用 fake app-server JSON-RPC client 模拟 `initialize`、`thread/start`、`thread/resume`、`turn/start`、assistant 输出、协议错误、timeout 和 transport close。 @@ -59,7 +60,7 @@ 6. manager 可查询 command state、append-only events、terminal_status 和 redacted logPath/job identity。 7. 重启 `agentrun-mgr` 后,run、command、events 和 terminal_status 仍可从 Postgres 查询。 8. 日志、event、CLI 输出和 health 中没有 provider credential、DSN password、token 或 URL credential 明文。 -9. 若变更涉及 RuntimeAssembly,必须能追溯 `BackendImageRef`、`ProfileRef`、`SessionRef` 和 `ResourceBundleRef` 的装配状态;未提供 session/resource 时必须显式为 `null`,提供时必须能查到 session/thread 和 Git commit/tree/workspace/bundles 摘要,不能由 runner 隐式猜测。若提供 `promptRefs`、gitbundle skills 或 `requiredSkills`,必须能查到 name/path/hash/bytes/injected 摘要;required prompt 或 required skill 缺失必须 blocked,不能 fallback 到模型默认 prompt 或默认 skill registry。 +9. 若变更涉及 RuntimeAssembly,必须能追溯 `BackendImageRef`、`ProfileRef`、`SessionRef` 和 `ResourceBundleRef` 的装配状态;Aipod 启动还必须能追溯 `imageRef` source、env identity、reuse/build 状态和 digest-pinned image,不能由 runner 隐式使用可变 tag 或任务运行时安装基础依赖。未提供 session/resource 时必须显式为 `null`,提供时必须能查到 session/thread 和 Git commit/tree/workspace/bundles 摘要,不能由 runner 隐式猜测。若提供 `promptRefs`、gitbundle skills 或 `requiredSkills`,必须能查到 name/path/hash/bytes/injected 摘要;required prompt 或 required skill 缺失必须 blocked,不能 fallback 到模型默认 prompt 或默认 skill registry。 ### CLI 交互联调标准 diff --git a/src/common/aipod-specs.ts b/src/common/aipod-specs.ts index a2ff2d2..72c007c 100644 --- a/src/common/aipod-specs.ts +++ b/src/common/aipod-specs.ts @@ -4,6 +4,7 @@ import { parse as parseYaml, stringify as stringifyYaml } from "yaml"; import { AgentRunError } from "./errors.js"; import type { AipodSpec, AipodSpecRecord, BackendProfile, CreateQueueTaskInput, ExecutionPolicy, JsonRecord, JsonValue, RenderAipodInput, RenderedAipodQueueTask, ResourceBundleRef, SessionRef, WorkspaceRef } from "./types.js"; import { backendProfileSpec, isBackendProfile } from "./backend-profiles.js"; +import { imageRefSourceSummary, validateAipodImageRef } from "./env-image-ref.js"; import { asRecord, stableHash, validateCreateQueueTask, validateExecutionPolicy, validateResourceBundleRef, validateSessionRef } from "./validation.js"; const aipodApiVersion = "agentrun.pikastech.local/v0.1"; @@ -75,6 +76,7 @@ export function validateAipodSpec(input: unknown, source = "inline"): AipodSpec const labels = metadata.labels === undefined ? undefined : asRecord(metadata.labels, "aipodSpec.metadata.labels"); const spec = asRecord(record.spec, "aipodSpec.spec"); const backendProfile = normalizeBackendProfile(requiredString(spec, "backendProfile")); + const imageRef = validateAipodImageRef(spec.imageRef, "aipodSpec.spec.imageRef"); const executionPolicy = validateExecutionPolicy(asRecord(spec.executionPolicy, "aipodSpec.spec.executionPolicy")); validateAipodProviderCredential(backendProfile, executionPolicy); const resourceBundleRef = validateResourceBundleRef(spec.resourceBundleRef); @@ -96,6 +98,7 @@ export function validateAipodSpec(input: unknown, source = "inline"): AipodSpec ...(stringValue(spec.providerId) ? { providerId: stringValue(spec.providerId) as string } : {}), backendProfile, ...(isJsonRecord(spec.model) ? { model: spec.model } : {}), + imageRef, ...(isJsonRecord(spec.workspaceRef) ? { workspaceRef: validateWorkspaceRef(spec.workspaceRef) } : {}), ...(spec.sessionRef !== undefined ? { sessionRef: validateSessionRef(spec.sessionRef) } : {}), executionPolicy, @@ -111,7 +114,8 @@ export function validateAipodSpec(input: unknown, source = "inline"): AipodSpec export function renderAipodSpec(record: AipodSpecRecord, input: RenderAipodInput = {}): RenderedAipodQueueTask { const spec = record.spec.spec; - const metadata = mergeRecords(spec.metadata, input.metadata, { aipod: record.name, aipodSpecHash: record.specHash }); + const imageRef = imageRefSourceSummary(spec.imageRef); + const metadata = mergeRecords(spec.metadata, input.metadata, { aipod: record.name, aipodSpecHash: record.specHash, aipodImageRef: imageRef }); const payload = mergeRecords(spec.payloadDefaults, input.payload); if (typeof input.prompt === "string" && input.prompt.trim().length > 0) payload.prompt = input.prompt; applyModelPayload(payload, spec.model); @@ -138,7 +142,7 @@ export function renderAipodSpec(record: AipodSpecRecord, input: RenderAipodInput action: "aipod-spec-render", aipod: summarizeAipodSpecRecord(record), queueTask, - dispatchDefaults: spec.dispatchDefaults ?? {}, + dispatchDefaults: aipodDispatchDefaults(spec.dispatchDefaults, spec.imageRef), valuesPrinted: false, }; } @@ -153,6 +157,7 @@ export function summarizeAipodSpecRecord(record: AipodSpecRecord): JsonRecord { source: record.source, backendProfile: spec.backendProfile, model: spec.model ?? null, + imageRef: imageRefSourceSummary(spec.imageRef), queue: spec.queue ?? "commander", lane: spec.lane ?? "v0.1", providerId: spec.providerId ?? "G14", @@ -254,6 +259,13 @@ function mergeRecords(...records: Array): JsonRecord { return Object.assign({}, ...records.filter(Boolean)); } +function aipodDispatchDefaults(base: JsonRecord | undefined, imageRef: AipodSpec["spec"]["imageRef"]): JsonRecord { + const result = mergeRecords(base); + const runnerJob = mergeRecords(isJsonRecord(result.runnerJob) ? result.runnerJob : undefined, { imageRef }); + result.runnerJob = runnerJob; + return result; +} + function applyModelPayload(payload: JsonRecord, model: JsonRecord | undefined): void { if (!model) return; const modelName = stringValue(model.model); diff --git a/src/common/env-image-ref.ts b/src/common/env-image-ref.ts new file mode 100644 index 0000000..9852cd0 --- /dev/null +++ b/src/common/env-image-ref.ts @@ -0,0 +1,176 @@ +import { readFile } from "node:fs/promises"; +import { AgentRunError } from "./errors.js"; +import type { AipodImageRef, JsonRecord } from "./types.js"; +import { asRecord, stableHash } from "./validation.js"; + +export interface RunnerEnvImageResolution extends JsonRecord { + status: "explicit-image" | "catalog-reused" | "runtime-default-reused" | "legacy-default"; + image: string; + imageRef: JsonRecord | null; + envIdentity: string | null; + digestPinned: boolean; + catalogFile: string | null; + valuesPrinted: false; +} + +export function validateAipodImageRef(value: unknown, fieldName = "imageRef"): AipodImageRef { + const record = asRecord(value, fieldName); + const kind = requiredString(record, "kind", fieldName); + if (kind !== "env-image-dockerfile") throw new AgentRunError("schema-invalid", `${fieldName}.kind must be env-image-dockerfile`, { httpStatus: 400 }); + const repoUrl = validateRepoUrl(requiredString(record, "repoUrl", fieldName), `${fieldName}.repoUrl`); + const commitId = requiredString(record, "commitId", fieldName).toLowerCase(); + if (!/^[0-9a-f]{40}$/u.test(commitId)) throw new AgentRunError("schema-invalid", `${fieldName}.commitId must be a full 40-character git commit sha`, { httpStatus: 400 }); + const dockerfilePath = validateDockerfilePath(requiredString(record, "dockerfilePath", fieldName), `${fieldName}.dockerfilePath`); + return { kind: "env-image-dockerfile", repoUrl, commitId, dockerfilePath }; +} + +export function imageRefSourceSummary(imageRef: AipodImageRef): JsonRecord { + return { + kind: imageRef.kind, + repoUrl: imageRef.repoUrl, + commitId: imageRef.commitId, + dockerfilePath: imageRef.dockerfilePath, + sourceIdentity: imageRefSourceIdentity(imageRef), + valuesPrinted: false, + }; +} + +export function imageRefSourceIdentity(imageRef: AipodImageRef): string { + return stableHash({ kind: imageRef.kind, repoUrl: imageRef.repoUrl, commitId: imageRef.commitId, dockerfilePath: imageRef.dockerfilePath }).slice(0, 20); +} + +export function isDigestPinnedImage(image: string): boolean { + return /@sha256:[0-9a-f]{64}$/u.test(image); +} + +export async function resolveRunnerEnvImage(options: { imageRef?: unknown; explicitImage?: string | null; defaultImage?: string | null; envIdentity?: string | null; artifactCatalogFile?: string | null }): Promise { + const imageRef = options.imageRef === undefined || options.imageRef === null ? null : validateAipodImageRef(options.imageRef, "runnerJob.imageRef"); + const explicitImage = stringValue(options.explicitImage); + const defaultImage = stringValue(options.defaultImage); + const catalogFile = stringValue(options.artifactCatalogFile); + const envIdentity = stringValue(options.envIdentity); + + if (!imageRef) { + const image = explicitImage ?? defaultImage; + if (!image) throw new AgentRunError("schema-invalid", "runner job image is required; set --image or AGENTRUN_RUNNER_IMAGE", { httpStatus: 400 }); + return { status: explicitImage ? "explicit-image" : "legacy-default", image, imageRef: null, envIdentity: envIdentity ?? null, digestPinned: isDigestPinnedImage(image), catalogFile: catalogFile ?? null, valuesPrinted: false }; + } + + if (explicitImage) { + throw new AgentRunError("schema-invalid", "runnerJob.imageRef resolves the env image; do not pass runnerJob.image for Aipod-dispatched jobs", { + httpStatus: 400, + details: { imageRef: imageRefSourceSummary(imageRef), valuesPrinted: false }, + }); + } + + const catalogResolution = catalogFile ? await resolveFromCatalog(catalogFile, imageRef) : null; + if (catalogResolution) return catalogResolution; + + if (defaultImage && isDigestPinnedImage(defaultImage)) { + return { + status: "runtime-default-reused", + image: defaultImage, + imageRef: imageRefSourceSummary(imageRef), + envIdentity: envIdentity ?? imageRefSourceIdentity(imageRef), + digestPinned: true, + catalogFile: catalogFile ?? null, + valuesPrinted: false, + }; + } + + throw new AgentRunError("schema-invalid", "Aipod runner env image is not reusable yet: imageRef requires a catalog hit or digest-pinned AGENTRUN_RUNNER_IMAGE", { + httpStatus: 409, + details: { imageRef: imageRefSourceSummary(imageRef), catalogFile: catalogFile ?? null, defaultImageDigestPinned: defaultImage ? isDigestPinnedImage(defaultImage) : false, buildRequired: true, valuesPrinted: false }, + }); +} + +async function resolveFromCatalog(catalogFile: string, imageRef: AipodImageRef): Promise { + let parsed: JsonRecord; + try { + parsed = JSON.parse(await readFile(catalogFile, "utf8")) as JsonRecord; + } catch (error) { + throw new AgentRunError("infra-failed", `artifact catalog ${catalogFile} could not be read`, { httpStatus: 502, details: { catalogFile, message: error instanceof Error ? error.message : String(error), valuesPrinted: false } }); + } + const services = Array.isArray(parsed.services) ? parsed.services : []; + const requestedSummary = imageRefSourceSummary(imageRef); + for (const item of services) { + if (!isRecord(item)) continue; + if (!catalogServiceMatchesImageRef(item, imageRef, requestedSummary)) continue; + const image = stringValue(item.envRepositoryDigest) ?? stringValue(item.repositoryDigest) ?? stringValue(item.image); + if (!image) continue; + if (!isDigestPinnedImage(image)) { + throw new AgentRunError("schema-invalid", "artifact catalog matched imageRef but did not provide a digest-pinned image", { httpStatus: 409, details: { catalogFile, imageRef: requestedSummary, image, valuesPrinted: false } }); + } + return { + status: "catalog-reused", + image, + imageRef: requestedSummary, + envIdentity: stringValue(item.envIdentity) ?? imageRefSourceIdentity(imageRef), + digestPinned: true, + catalogFile, + valuesPrinted: false, + }; + } + return null; +} + +function catalogServiceMatchesImageRef(service: JsonRecord, imageRef: AipodImageRef, requestedSummary: JsonRecord): boolean { + const direct = normalizedImageRefOrNull(service.imageRef); + if (direct && sameImageRef(direct, imageRef)) return true; + const provenance = isRecord(service.provenance) ? service.provenance : null; + const provenanceRef = normalizedImageRefOrNull(provenance?.imageRef); + if (provenanceRef && sameImageRef(provenanceRef, imageRef)) return true; + return stringValue(service.imageRefSourceIdentity) === requestedSummary.sourceIdentity || stringValue(service.envIdentity) === requestedSummary.sourceIdentity; +} + +function normalizedImageRefOrNull(value: unknown): AipodImageRef | null { + if (!isRecord(value)) return null; + try { + return validateAipodImageRef(value, "catalog.imageRef"); + } catch { + return null; + } +} + +function sameImageRef(left: AipodImageRef, right: AipodImageRef): boolean { + return left.kind === right.kind && left.repoUrl === right.repoUrl && left.commitId === right.commitId && left.dockerfilePath === right.dockerfilePath; +} + +function validateRepoUrl(value: string, fieldName: string): string { + if (/\s|[\x00-\x1f\x7f]/u.test(value)) throw new AgentRunError("schema-invalid", `${fieldName} must not contain whitespace or control characters`, { httpStatus: 400 }); + if (value.includes("://")) { + let url: URL; + try { + url = new URL(value); + } catch { + throw new AgentRunError("schema-invalid", `${fieldName} must be a valid git repo URL`, { httpStatus: 400 }); + } + if (!["https:", "http:", "ssh:", "git:"].includes(url.protocol)) throw new AgentRunError("schema-invalid", `${fieldName} must use http(s), ssh, or git protocol`, { httpStatus: 400 }); + if (url.password || (url.username && !(url.protocol === "ssh:" && url.username === "git"))) throw new AgentRunError("schema-invalid", `${fieldName} must not include credentials`, { httpStatus: 400 }); + if (url.search || url.hash) throw new AgentRunError("schema-invalid", `${fieldName} must not include query or fragment`, { httpStatus: 400 }); + return value; + } + if (!/^[A-Za-z0-9._-]+@[A-Za-z0-9._-]+:[A-Za-z0-9._/-]+(?:\.git)?$/u.test(value)) throw new AgentRunError("schema-invalid", `${fieldName} must be a git repo URL`, { httpStatus: 400 }); + return value; +} + +function validateDockerfilePath(value: string, fieldName: string): string { + if (value === "." || value.startsWith("/") || value.endsWith("/") || value.includes("\\")) throw new AgentRunError("schema-invalid", `${fieldName} must be a repository-relative file path`, { httpStatus: 400 }); + const parts = value.split("/"); + if (parts.some((part) => part.length === 0 || part === "." || part === "..")) throw new AgentRunError("schema-invalid", `${fieldName} must stay within the checkout`, { httpStatus: 400 }); + return value; +} + +function requiredString(record: JsonRecord, key: string, fieldName: string): string { + const value = record[key]; + if (typeof value !== "string" || value.trim().length === 0) throw new AgentRunError("schema-invalid", `${fieldName}.${key} is required`, { httpStatus: 400 }); + return value.trim(); +} + +function stringValue(value: unknown): string | null { + return typeof value === "string" && value.trim().length > 0 ? value.trim() : null; +} + +function isRecord(value: unknown): value is JsonRecord { + return typeof value === "object" && value !== null && !Array.isArray(value); +} diff --git a/src/common/types.ts b/src/common/types.ts index 43e8d3c..ede532b 100644 --- a/src/common/types.ts +++ b/src/common/types.ts @@ -61,6 +61,13 @@ export interface SecretRef extends JsonRecord { mountPath?: string; } +export interface AipodImageRef extends JsonRecord { + kind: "env-image-dockerfile"; + repoUrl: string; + commitId: string; + dockerfilePath: string; +} + export interface GitBundleItemRef extends JsonRecord { name?: string; repoUrl?: string; @@ -112,6 +119,7 @@ export interface AipodSpec extends JsonRecord { providerId?: string; backendProfile: BackendProfile; model?: JsonRecord; + imageRef: AipodImageRef; workspaceRef?: WorkspaceRef; sessionRef?: SessionRef | null; executionPolicy: ExecutionPolicy; @@ -456,6 +464,8 @@ export interface QueueDispatchResult extends JsonRecord { run: RunRecord; command: CommandRecord; runnerJob: JsonRecord; + envImage: JsonRecord | null; + workReady: JsonRecord | null; latestAttempt: QueueAttemptRef; pollCommands: JsonRecord; } diff --git a/src/common/work-ready.ts b/src/common/work-ready.ts new file mode 100644 index 0000000..2ede438 --- /dev/null +++ b/src/common/work-ready.ts @@ -0,0 +1,130 @@ +import { execFile } from "node:child_process"; +import { access } from "node:fs/promises"; +import { constants } from "node:fs"; +import { promisify } from "node:util"; +import { AgentRunError } from "./errors.js"; +import { stableHash } from "./validation.js"; +import type { JsonRecord } from "./types.js"; + +const execFileAsync = promisify(execFile); +const toolTimeoutMs = 5_000; + +export const workReadyVersion = "v0.1-runner-work-ready-20260610"; + +export const imageWorkReadyTools = Object.freeze([ + { name: "bun", command: "bun", args: ["--version"] }, + { name: "node", command: "node", args: ["--version"] }, + { name: "npm", command: "npm", args: ["--version"] }, + { name: "git", command: "git", args: ["--version"] }, + { name: "ssh", command: "ssh", args: ["-V"], versionFrom: "stderr" as const }, + { name: "gh", command: "gh", args: ["--version"], firstLine: true }, + { name: "rg", command: "rg", args: ["--version"], firstLine: true }, + { name: "curl", command: "curl", args: ["--version"], firstLine: true }, + { name: "kubectl", command: "kubectl", args: ["version", "--client"], firstLine: true }, +]); + +export const bundledWorkReadyTools = Object.freeze([ + { name: "tran", path: "/usr/local/bin/tran" }, + { name: "trans", path: "/usr/local/bin/trans" }, + { name: "apply_patch", path: "/usr/local/bin/apply_patch" }, +]); + +export function staticWorkReadyCapabilitySummary(): JsonRecord { + return { + version: workReadyVersion, + requiredImageTools: imageWorkReadyTools.map((tool) => tool.name), + requiredBundledTools: bundledWorkReadyTools.map((tool) => tool.name), + packageLayer: { + osFamily: "alpine", + packageManager: "apk", + runtimeInstallPolicy: "forbidden-for-common-tasks", + notes: ["基础 CLI 和 AgentRun npm 依赖必须在镜像构建阶段准备;普通任务不得运行 apt/apk/bun/npm install 来补基础环境。"], + }, + dependencyStrategy: { + agentrunNodeModules: "image-layer:/opt/agentrun/node_modules", + runnerBootNodeModules: "symlink:/workspace/agentrun/node_modules -> /opt/agentrun/node_modules", + projectDependencies: "not-installed-by-default", + projectDependencyCache: "explicit-task-or-derived-image-only", + }, + valuesPrinted: false, + }; +} + +export async function smokeImageWorkReadyCapabilities(env: NodeJS.ProcessEnv = process.env): Promise { + const toolResults = await Promise.all(imageWorkReadyTools.map((tool) => checkCommand(tool, env))); + const missing = toolResults.filter((item) => item.ok !== true).map((item) => item.name); + const summary = { + ...staticWorkReadyCapabilitySummary(), + imageTools: toolResults, + smoke: { ok: missing.length === 0, scope: "image", missing, checkedAt: new Date().toISOString(), valuesPrinted: false }, + capabilityHash: stableHash({ version: workReadyVersion, toolResults }), + valuesPrinted: false, + } satisfies JsonRecord; + if (missing.length > 0) { + throw new AgentRunError("infra-failed", `runner image is not work-ready; missing required image tools: ${missing.join(", ")}`, { httpStatus: 503, details: summary }); + } + return summary; +} + +export async function smokeBundledWorkReadyCapabilities(env: NodeJS.ProcessEnv = process.env): Promise { + const toolResults = await Promise.all(bundledWorkReadyTools.map((tool) => checkExecutable(tool, env))); + const missing = toolResults.filter((item) => item.ok !== true).map((item) => item.name); + const summary = { + ...staticWorkReadyCapabilitySummary(), + bundledTools: toolResults, + smoke: { ok: missing.length === 0, scope: "bundle", missing, checkedAt: new Date().toISOString(), valuesPrinted: false }, + capabilityHash: stableHash({ version: workReadyVersion, toolResults }), + valuesPrinted: false, + } satisfies JsonRecord; + if (missing.length > 0) { + throw new AgentRunError("infra-failed", `runner bundle is not work-ready; missing required bundled tools: ${missing.join(", ")}`, { httpStatus: 503, details: summary }); + } + return summary; +} + +async function checkCommand(tool: { name: string; command: string; args: string[]; versionFrom?: "stdout" | "stderr"; firstLine?: boolean }, env: NodeJS.ProcessEnv): Promise { + try { + const result = await execFileAsync(tool.command, tool.args, { timeout: toolTimeoutMs, env: redactedToolEnv(env) }); + const versionText = tool.versionFrom === "stderr" ? result.stderr : result.stdout || result.stderr; + return { name: tool.name, command: tool.command, ok: true, version: normalizeVersion(versionText, tool.firstLine), valuesPrinted: false }; + } catch (error) { + return { name: tool.name, command: tool.command, ok: false, failureKind: "tool-unavailable", message: error instanceof Error ? error.message : String(error), valuesPrinted: false }; + } +} + +async function checkExecutable(tool: { name: string; path: string }, env: NodeJS.ProcessEnv): Promise { + const candidate = pathForBundledTool(tool, env); + try { + await access(candidate, constants.X_OK); + return { name: tool.name, path: candidate, ok: true, valuesPrinted: false }; + } catch (error) { + return { name: tool.name, path: candidate, ok: false, failureKind: "tool-unavailable", message: error instanceof Error ? error.message : String(error), valuesPrinted: false }; + } +} + +function pathForBundledTool(tool: { name: string; path: string }, env: NodeJS.ProcessEnv): string { + const binPath = env.AGENTRUN_RESOURCE_BIN_PATH; + if (binPath && binPath.trim().length > 0) return `${binPath.replace(/\/+$/u, "")}/${tool.name}`; + return tool.path; +} + +function normalizeVersion(value: string, firstLine: boolean | undefined): string { + const normalized = firstLine ? value.trim().split(/\r?\n/u)[0] ?? "" : value.trim().replace(/\s+/gu, " "); + return normalized.slice(0, 160); +} + +function redactedToolEnv(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv { + const allowedKeys = ["PATH", "HOME", "KUBECONFIG", "SSL_CERT_FILE", "SSL_CERT_DIR"]; + const next: NodeJS.ProcessEnv = {}; + for (const key of allowedKeys) { + const value = env[key] ?? process.env[key]; + if (value !== undefined) next[key] = value; + } + const selftestBin = env.AGENTRUN_SELFTEST_WORK_READY_BIN_PATH ?? process.env.AGENTRUN_SELFTEST_WORK_READY_BIN_PATH; + if (selftestBin && selftestBin.trim().length > 0) next.PATH = `${selftestBin}${pathDelimiter()}${next.PATH ?? ""}`; + return next; +} + +function pathDelimiter(): string { + return process.platform === "win32" ? ";" : ":"; +} diff --git a/src/mgr/kubernetes-runner-job.ts b/src/mgr/kubernetes-runner-job.ts index d39b267..316e309 100644 --- a/src/mgr/kubernetes-runner-job.ts +++ b/src/mgr/kubernetes-runner-job.ts @@ -7,6 +7,8 @@ import type { ExecutionPolicy, JsonRecord } from "../common/types.js"; import { stableHash, validateEnvName } from "../common/validation.js"; import { renderRunnerJobManifest } from "../runner/k8s-job.js"; import type { RunnerSessionPvcOptions, RunnerTransientEnv } from "../runner/k8s-job.js"; +import { staticWorkReadyCapabilitySummary } from "../common/work-ready.js"; +import { resolveRunnerEnvImage } from "../common/env-image-ref.js"; const reusableCredentialEnvNames = new Set([ "AUTH_PASSWORD", @@ -36,6 +38,8 @@ export interface RunnerJobDefaults { managerUrl: string; image: string; sourceCommit: string; + envIdentity?: string; + artifactCatalogFile?: string; serviceAccountName?: string; kubectlCommand?: string; unideskSshEndpointEnv?: JsonRecord; @@ -51,6 +55,7 @@ export interface CreateRunnerJobInput extends JsonRecord { sourceCommit?: string; serviceAccountName?: string; idempotencyKey?: string; + imageRef?: JsonRecord; transientEnv?: JsonRecord[]; } @@ -61,8 +66,14 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; if (command.runId !== run.id) throw new AgentRunError("schema-invalid", `command ${commandId} does not belong to run ${run.id}`, { httpStatus: 400 }); if (command.type !== "turn") throw new AgentRunError("schema-invalid", `command ${commandId} is not a turn command`, { httpStatus: 400 }); - const image = optionalString(options.input.image) ?? options.defaults.image; - if (!image) throw new AgentRunError("schema-invalid", "runner job image is required; set --image or AGENTRUN_RUNNER_IMAGE", { httpStatus: 400 }); + const envImage = await resolveRunnerEnvImage({ + ...(options.input.imageRef !== undefined ? { imageRef: options.input.imageRef } : {}), + ...(optionalString(options.input.image) ? { explicitImage: optionalString(options.input.image) as string } : {}), + defaultImage: options.defaults.image, + ...(options.defaults.envIdentity ? { envIdentity: options.defaults.envIdentity } : {}), + ...(options.defaults.artifactCatalogFile ? { artifactCatalogFile: options.defaults.artifactCatalogFile } : {}), + }); + const image = envImage.image; const namespace = optionalString(options.input.namespace) ?? options.defaults.namespace; const managerUrl = optionalString(options.input.managerUrl) ?? options.defaults.managerUrl; const sourceCommit = optionalString(options.input.sourceCommit) ?? options.defaults.sourceCommit; @@ -79,6 +90,7 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; namespace, managerUrl, sourceCommit, + envImage, serviceAccountName: serviceAccountName ?? null, attemptId: optionalString(options.input.attemptId) ?? null, runnerId: optionalString(options.input.runnerId) ?? null, @@ -144,6 +156,7 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; runnerId: render.runnerId, namespace: render.namespace, jobName: render.jobName, + image, jobIdentity: { kind: "Job", namespace: render.namespace, @@ -158,14 +171,17 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; runnerId: render.runnerId, backendProfile: run.backendProfile, managerUrl, + image, sourceCommit, placement: "kubernetes-job", logPath: `kubectl -n ${render.namespace} logs job/${render.jobName}`, }, + envImage, secretRefs: render.secretRefs.map((item) => ({ profile: item.profile, name: item.secretRef.name, namespace: item.secretRef.namespace ?? render.namespace, keys: item.secretRef.keys ?? [], mountPath: item.runtimeMountPath, projectionPath: item.projectionMountPath, writableCopy: true, valuesPrinted: false })), toolCredentials: summarizeToolCredentials(render.toolCredentials, render.namespace), transientEnv: summarizeTransientEnv(transientEnv), transientEnvSecret: transientEnvSecretResponse, + workReady: staticWorkReadyCapabilitySummary(), retention: { ttlSecondsAfterFinished: render.ttlSecondsAfterFinished, }, @@ -208,6 +224,8 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; idempotencyKey: idempotencyKey ? "present" : null, transientEnv: summarizeTransientEnv(transientEnv), transientEnvSecret: transientEnvSecretResponse, + envImage, + workReady: staticWorkReadyCapabilitySummary(), toolCredentials: summarizeToolCredentials(render.toolCredentials, render.namespace), sessionRef: summarizeSessionRef(run.sessionRef ?? null), resourceBundleRef: summarizeResourceBundleRef(run.resourceBundleRef ?? null), diff --git a/src/mgr/queue-dispatch.ts b/src/mgr/queue-dispatch.ts index 9a2b00d..79d605b 100644 --- a/src/mgr/queue-dispatch.ts +++ b/src/mgr/queue-dispatch.ts @@ -49,6 +49,8 @@ export async function dispatchQueueTask(options: DispatchQueueTaskOptions): Prom run, command, runnerJob, + envImage: jsonRecordOrNull(runnerJob.envImage), + workReady: jsonRecordOrNull(runnerJob.workReady), latestAttempt, pollCommands: { queue: `./scripts/agentrun queue show ${task.id}`, @@ -127,6 +129,8 @@ function buildRunnerJobInput(task: QueueTaskRecord, commandId: string, input: Js copyString("runnerId", "runnerId"); copyString("sourceCommit", "sourceCommit"); copyString("serviceAccountName", "serviceAccountName"); + const imageRef = jsonRecordOrNull(input.imageRef) ?? jsonRecordOrNull(task.metadata.aipodImageRef); + if (imageRef) jobInput.imageRef = imageRef; if (input.transientEnv !== undefined) { if (!Array.isArray(input.transientEnv)) throw new AgentRunError("schema-invalid", "transientEnv must be an array", { httpStatus: 400 }); jobInput.transientEnv = input.transientEnv.map((item, index) => asRecord(item, `transientEnv[${index}]`)); @@ -140,6 +144,11 @@ function stringFrom(record: JsonRecord, key: string): string { return value; } +function jsonRecordOrNull(value: unknown): JsonRecord | null { + if (!value || typeof value !== "object" || Array.isArray(value)) return null; + return value as JsonRecord; +} + function optionalString(value: unknown): string | undefined { return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined; } diff --git a/src/mgr/runner-job-status.ts b/src/mgr/runner-job-status.ts index c3e168d..e828140 100644 --- a/src/mgr/runner-job-status.ts +++ b/src/mgr/runner-job-status.ts @@ -6,6 +6,7 @@ export function runnerJobStatusSummary(job: RunnerJobRecord, events: RunEvent[] const jobIdentity = recordAt(job.result, "jobIdentity"); const kubernetes = recordAt(job.result, "kubernetes"); const retention = recordAt(job.result, "retention"); + const envImage = recordAt(job.result, "envImage"); const terminalStatus = terminalEvent?.payload.terminalStatus; return { id: job.id, @@ -17,6 +18,7 @@ export function runnerJobStatusSummary(job: RunnerJobRecord, events: RunEvent[] jobName: job.jobName, managerUrl: job.managerUrl, image: job.image, + envImage, sourceCommit: job.sourceCommit, serviceAccountName: job.serviceAccountName, phase: terminalStatus ? `terminal:${terminalStatus}` : kubernetes.created === true ? "created" : "recorded", diff --git a/src/mgr/server.ts b/src/mgr/server.ts index 904da51..578fef8 100644 --- a/src/mgr/server.ts +++ b/src/mgr/server.ts @@ -17,6 +17,7 @@ import type { SessionPvcOptions } from "./session-pvc.js"; import { getProviderProfileConfig, getProviderProfileValidation, listBackendCapabilities, listProviderProfiles, removeProviderProfile, setProviderProfileConfig, setProviderProfileCredential, showProviderProfile, validateProviderProfile } from "./provider-profiles.js"; import { listToolCredentials, setGithubSshToolCredential, showToolCredential } from "./tool-credentials.js"; import { aipodSpecFromInput, applyAipodSpec, deleteAipodSpec, listAipodSpecs, renderAipodSpecByName, showAipodSpec } from "../common/aipod-specs.js"; +import { staticWorkReadyCapabilitySummary } from "../common/work-ready.js"; function pvcOptions(defaults: { kubectlCommand?: string } | undefined): SessionPvcOptions { return defaults?.kubectlCommand ? { kubectlCommand: defaults.kubectlCommand } : {}; @@ -42,6 +43,8 @@ export interface ManagerServerOptions { namespace?: string; managerUrl?: string; image?: string; + envIdentity?: string; + artifactCatalogFile?: string; serviceAccountName?: string; kubectlCommand?: string; unideskSshEndpointEnv?: JsonRecord; @@ -211,7 +214,7 @@ async function route({ method, url, body, store, sourceCommit, runnerJobDefaults if (method === "GET" && (path === "/health" || path === "/health/live" || path === "/health/readiness")) { const database = await store.health(); const ready = path === "/health/live" ? true : database.ready; - return { serviceId: "agentrun-mgr", live: true, ready, database, sourceCommit, secretRefs: { databaseUrl: database.adapter === "postgres" ? "redacted" : "not-used", valuesPrinted: false } }; + return { serviceId: "agentrun-mgr", live: true, ready, database, sourceCommit, runnerWorkReady: staticWorkReadyCapabilitySummary(), secretRefs: { databaseUrl: database.adapter === "postgres" ? "redacted" : "not-used", valuesPrinted: false } }; } if (method === "GET" && path === "/api/v1/backends") return await listBackendCapabilities(providerProfileDefaults) as JsonValue; if (method === "GET" && path === "/api/v1/tool-credentials") return await listToolCredentials(toolCredentialDefaults) as JsonValue; @@ -411,6 +414,8 @@ async function route({ method, url, body, store, sourceCommit, runnerJobDefaults managerUrl: runnerJobDefaults?.managerUrl ?? process.env.AGENTRUN_INTERNAL_MGR_URL ?? `http://agentrun-mgr.${namespace}.svc.cluster.local:8080`, image: runnerJobDefaults?.image ?? process.env.AGENTRUN_RUNNER_IMAGE ?? "", sourceCommit, + ...optionalStringRecord("envIdentity", runnerJobDefaults?.envIdentity ?? process.env.AGENTRUN_ENV_IDENTITY), + ...optionalStringRecord("artifactCatalogFile", runnerJobDefaults?.artifactCatalogFile ?? process.env.AGENTRUN_ARTIFACT_CATALOG_FILE), serviceAccountName: runnerJobDefaults?.serviceAccountName ?? process.env.AGENTRUN_RUNNER_SERVICE_ACCOUNT ?? "agentrun-v01-runner", ...(runnerJobDefaults?.kubectlCommand ? { kubectlCommand: runnerJobDefaults.kubectlCommand } : {}), ...(runnerJobDefaults?.unideskSshEndpointEnv ? { unideskSshEndpointEnv: runnerJobDefaults.unideskSshEndpointEnv } : {}), @@ -473,6 +478,8 @@ async function route({ method, url, body, store, sourceCommit, runnerJobDefaults managerUrl: runnerJobDefaults?.managerUrl ?? process.env.AGENTRUN_INTERNAL_MGR_URL ?? `http://agentrun-mgr.${namespace}.svc.cluster.local:8080`, image: runnerJobDefaults?.image ?? process.env.AGENTRUN_RUNNER_IMAGE ?? "", sourceCommit, + ...optionalStringRecord("envIdentity", runnerJobDefaults?.envIdentity ?? process.env.AGENTRUN_ENV_IDENTITY), + ...optionalStringRecord("artifactCatalogFile", runnerJobDefaults?.artifactCatalogFile ?? process.env.AGENTRUN_ARTIFACT_CATALOG_FILE), serviceAccountName: runnerJobDefaults?.serviceAccountName ?? process.env.AGENTRUN_RUNNER_SERVICE_ACCOUNT ?? "agentrun-v01-runner", ...(runnerJobDefaults?.kubectlCommand ? { kubectlCommand: runnerJobDefaults.kubectlCommand } : {}), ...(runnerJobDefaults?.unideskSshEndpointEnv ? { unideskSshEndpointEnv: runnerJobDefaults.unideskSshEndpointEnv } : {}), @@ -597,6 +604,10 @@ function stringField(record: JsonRecord, key: string): string { return value.trim(); } +function optionalStringRecord(key: string, value: unknown): JsonRecord { + return typeof value === "string" && value.trim().length > 0 ? { [key]: value.trim() } : {}; +} + function normalizeError(error: unknown): AgentRunError { if (error instanceof AgentRunError) return error; return new AgentRunError("infra-failed", error instanceof Error ? error.message : String(error), { httpStatus: 500 }); diff --git a/src/runner/k8s-job.ts b/src/runner/k8s-job.ts index fd6d22d..feaa040 100644 --- a/src/runner/k8s-job.ts +++ b/src/runner/k8s-job.ts @@ -1,6 +1,7 @@ import { stableHash } from "../common/validation.js"; import type { BackendProfile, ExecutionPolicy, JsonRecord, JsonValue, RunRecord, SecretRef } from "../common/types.js"; import { backendProfileSpec } from "../common/backend-profiles.js"; +import { staticWorkReadyCapabilitySummary } from "../common/work-ready.js"; const defaultBootRepoUrl = "http://git-mirror-http.devops-infra.svc.cluster.local/pikasTech/agentrun.git"; const defaultResourceBinPath = "/usr/local/bin"; @@ -126,6 +127,7 @@ export function renderRunnerJobDryRun(options: RunnerJobRenderOptions): JsonReco secretRefs: render.secretRefs.map((item) => ({ profile: item.profile, name: item.secretRef.name, namespace: item.secretRef.namespace ?? render.namespace, keys: item.secretRef.keys ?? [], mountPath: item.runtimeMountPath, projectionPath: item.projectionMountPath, writableCopy: true, valuesPrinted: false })), toolCredentials: summarizeToolCredentials(render.toolCredentials, render.namespace), transientEnv: summarizeTransientEnv(options.transientEnv ?? []), + workReady: staticWorkReadyCapabilitySummary(), retention: { ttlSecondsAfterFinished: render.ttlSecondsAfterFinished, }, @@ -241,6 +243,8 @@ function runnerEnv(options: RunnerJobRenderOptions, context: { namespace: string { name: "AGENTRUN_RUNTIME_NAMESPACE", value: context.namespace }, { name: "AGENTRUN_K8S_JOB_NAME", value: context.jobName }, { name: "AGENTRUN_LOG_PATH", value: "/tmp/agentrun-runner.jsonl" }, + { name: "AGENTRUN_WORK_READY_VERSION", value: String(staticWorkReadyCapabilitySummary().version) }, + { name: "AGENTRUN_PROJECT_DEPENDENCY_POLICY", value: "explicit-cache-or-derived-image-only" }, { name: "AGENTRUN_RUNNER_IDLE_TIMEOUT_MS", value: "600000" }, { name: "AGENTRUN_RUNNER_POLL_INTERVAL_MS", value: "250" }, { name: "HOME", value: "/home/agentrun" }, diff --git a/src/runner/run-once.ts b/src/runner/run-once.ts index a1afded..9c46cfb 100644 --- a/src/runner/run-once.ts +++ b/src/runner/run-once.ts @@ -3,6 +3,7 @@ import { createBackendSession, runBackendTurn, type BackendActiveTurnControl, ty import { materializeResourceBundle } from "./resource-bundle.js"; import type { BackendEvent, BackendProfile, CommandRecord, FailureKind, InitialPromptAssembly, JsonRecord, RunRecord, RunnerRecord, TerminalStatus } from "../common/types.js"; import { AgentRunError } from "../common/errors.js"; +import { smokeBundledWorkReadyCapabilities, smokeImageWorkReadyCapabilities } from "../common/work-ready.js"; export interface RunnerOnceOptions extends BackendAdapterOptions { managerUrl: string; @@ -57,6 +58,17 @@ export async function runOnce(options: RunnerOnceOptions): Promise { ...(options.podName ? { podName: options.podName } : {}), ...(options.logPath ? { logPath: options.logPath } : {}), }) as RunnerRecord; + try { + const imageWorkReady = await smokeImageWorkReadyCapabilities(options.env ?? process.env); + await api.appendEvent(options.runId, { type: "backend_status", payload: { phase: "runner-image-work-ready-smoke", attemptId, runnerId: runner.id, ...imageWorkReady } }); + } catch (error) { + const failureKind = failureKindFromError(error); + const message = errorMessage(error); + const details = failureDetailsFromError(error); + await api.appendEvent(options.runId, { type: "error", payload: { failureKind, message, phase: "runner-image-work-ready-smoke", attemptId, runnerId: runner.id, ...(details ? { details } : {}) } }); + const finalRun = await api.reportStatus(options.runId, { terminalStatus: terminalStatusForFailure(failureKind), failureKind, failureMessage: message }) as RunRecord; + return { runner, terminalStatus: finalRun.terminalStatus, failureKind, run: finalRun, commandsProcessed: 0, commandResults: [], stopped: "image-work-ready-smoke-failed" } as JsonRecord; + } let claimed: RunRecord; try { claimed = await claimRunWithLeaseRecovery(api, options, runner, attemptId, leaseMs); @@ -108,6 +120,10 @@ export async function runOnce(options: RunnerOnceOptions): Promise { resourceEnv = resourceEnvForMaterialized(options.env ?? process.env, materialized); initialPrompt = materialized.initialPrompt; await api.appendEvent(options.runId, { type: "backend_status", payload: { ...materialized.event, commandId: command.id, attemptId, runnerId: runner.id } }); + if (requiresBundledWorkReadyTools(claimed)) { + const bundleWorkReady = await smokeBundledWorkReadyCapabilities(resourceEnv ?? options.env ?? process.env); + await api.appendEvent(options.runId, { type: "backend_status", payload: { phase: "runner-bundle-work-ready-smoke", commandId: command.id, attemptId, runnerId: runner.id, ...bundleWorkReady } }); + } } } catch (error) { const failureKind = failureKindFromError(error); @@ -144,6 +160,13 @@ export async function runOnce(options: RunnerOnceOptions): Promise { } } +function requiresBundledWorkReadyTools(run: RunRecord): boolean { + const toolCredentials = run.executionPolicy.secretScope.toolCredentials ?? []; + if (toolCredentials.some((item) => item.tool === "unidesk-ssh" || item.tool === "github")) return true; + const requiredSkills = run.resourceBundleRef?.requiredSkills ?? []; + return requiredSkills.some((item) => item.name === "unidesk-trans" || item.name === "unidesk-gh" || item.name === "unidesk-code-queue" || item.name === "unidesk-cicd" || item.name === "dad-dev"); +} + function withResourceAssembly(options: RunnerOnceOptions, resourceEnv: NodeJS.ProcessEnv | undefined, initialPrompt: InitialPromptAssembly | undefined): RunnerOnceOptions { return { ...options, diff --git a/src/selftest/cases/10-manager-memory.ts b/src/selftest/cases/10-manager-memory.ts index ec01228..e7c2b9c 100644 --- a/src/selftest/cases/10-manager-memory.ts +++ b/src/selftest/cases/10-manager-memory.ts @@ -10,11 +10,13 @@ const selfTest: SelfTestCase = async () => { const server = await startManagerServer({ port: 0, host: "127.0.0.1", sourceCommit: "self-test", store }); try { const client = new ManagerClient(server.baseUrl); - const health = await client.get("/health/readiness") as { database?: { adapter?: string; reachable?: boolean; migrationReady?: boolean; failureKind?: string | null }; secretRefs?: { valuesPrinted?: boolean } }; + const health = await client.get("/health/readiness") as { database?: { adapter?: string; reachable?: boolean; migrationReady?: boolean; failureKind?: string | null }; runnerWorkReady?: JsonRecord; secretRefs?: { valuesPrinted?: boolean } }; assert.equal(health.database?.adapter, "memory-self-test"); assert.equal(health.database?.reachable, true); assert.equal(health.database?.migrationReady, true); assert.equal(health.database?.failureKind, null); + assert.equal(((health.runnerWorkReady as { valuesPrinted?: unknown } | undefined)?.valuesPrinted), false); + assert.ok((((health.runnerWorkReady as { requiredImageTools?: string[] } | undefined)?.requiredImageTools) ?? []).includes("npm")); assert.equal(health.secretRefs?.valuesPrinted, false); await assertLongResultUsesTerminalAssistant(client, store); return { name: "manager-memory", tests: ["manager-memory-lifecycle", "manager-result-long-trace"] }; diff --git a/src/selftest/cases/20-runner-k8s-job.ts b/src/selftest/cases/20-runner-k8s-job.ts index 208cbc8..258c75b 100644 --- a/src/selftest/cases/20-runner-k8s-job.ts +++ b/src/selftest/cases/20-runner-k8s-job.ts @@ -6,12 +6,13 @@ import { MemoryAgentRunStore } from "../../mgr/store.js"; import { ManagerClient } from "../../mgr/client.js"; import { renderRunnerJobDryRun } from "../../runner/k8s-job.js"; import type { JsonRecord, RunRecord } from "../../common/types.js"; -import { assertNoSecretLeak, createRunWithCommand, type SelfTestCase } from "../harness.js"; +import { assertNoSecretLeak, createRunWithCommand, loadArtificerImageRef, type SelfTestCase } from "../harness.js"; const selfTest: SelfTestCase = async (context) => { const server = await startManagerServer({ port: 0, host: "127.0.0.1", sourceCommit: "self-test", store: new MemoryAgentRunStore() }); try { const client = new ManagerClient(server.baseUrl); + const artificerImageRef = await loadArtificerImageRef(context.root); const githubToolCredentials = [{ tool: "github", purpose: "pull-request", @@ -45,6 +46,7 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(rendered.mutation, false); assert.equal(((rendered.retention as JsonRecord).ttlSecondsAfterFinished), 86_400); assert.equal((rendered.jobIdentity as { serviceAccountName?: string }).serviceAccountName, "agentrun-v01-runner"); + assertWorkReadySummary(rendered.workReady as JsonRecord); assertRunnerJobUsesWritableCodexHome(rendered.manifest as JsonRecord, context.codexHome, "codex-0", "/var/run/agentrun/secrets/codex-0"); assertRunnerJobUsesToolCredential(rendered, "GH_TOKEN", "agentrun-v01-tool-github-pr", "GH_TOKEN"); assertRunnerJobUsesToolCredential(rendered, "UNIDESK_SSH_CLIENT_TOKEN", "agentrun-v01-tool-unidesk-ssh", "UNIDESK_SSH_CLIENT_TOKEN"); @@ -173,6 +175,7 @@ process.exit(1); namespace: "agentrun-v01", managerUrl: "http://agentrun-mgr.agentrun-v01.svc.cluster.local:8080", image: "127.0.0.1:5000/agentrun/agentrun-mgr@sha256:1111111111111111111111111111111111111111111111111111111111111111", + envIdentity: "selftest-env-identity", kubectlCommand: fakeKubectl, unideskSshEndpointEnv: { name: "UNIDESK_MAIN_SERVER_IP", value: "https://unidesk.default.example.test" }, }, @@ -183,6 +186,12 @@ process.exit(1); const created = await jobClient.post(`/api/v1/runs/${jobItem.runId}/runner-jobs`, { commandId: jobItem.commandId, attemptId: "attempt_selftest_create", + imageRef: { + kind: "env-image-dockerfile", + repoUrl: "git@github.com:pikasTech/agentrun.git", + commitId: artificerImageRef.commitId, + dockerfilePath: "deploy/container/Containerfile", + }, transientEnv: [ { name: "HWLAB_API_KEY", value: "hwl_live_selftest", sensitive: true }, { name: "HWLAB_RUNTIME_API_URL", value: "http://runtime-api.test", sensitive: true }, @@ -196,6 +205,10 @@ process.exit(1); ], }); assert.equal((created as { mutation?: unknown }).mutation, true); + assert.equal((((created as JsonRecord).envImage as JsonRecord).status), "runtime-default-reused"); + assert.equal((((created as JsonRecord).envImage as JsonRecord).digestPinned), true); + assert.equal(((((created as JsonRecord).envImage as JsonRecord).imageRef as JsonRecord).kind), "env-image-dockerfile"); + assertWorkReadySummary((created as JsonRecord).workReady as JsonRecord); assert.equal(((created as JsonRecord).retention as JsonRecord).ttlSecondsAfterFinished, 86_400); assert.deepEqual((((created as JsonRecord).transientEnv as JsonRecord).names) as string[], ["HWLAB_API_KEY", "HWLAB_RUNTIME_API_URL", "HWLAB_RUNTIME_WEB_URL", "HWLAB_RUNTIME_NAMESPACE", "HWLAB_RUNTIME_LANE", "HWLAB_RUNTIME_ENDPOINT_SOURCE", "HWLAB_RUNTIME_ENDPOINT_LOCKED", "HWLAB_CODE_AGENT_ASSEMBLED_RUNTIME", "UNIDESK_MAIN_SERVER_IP"]); const transientEnvSecret = (created as JsonRecord).transientEnvSecret as JsonRecord; @@ -319,6 +332,16 @@ function assertRunnerJobUsesTransientEnvSecret(manifest: JsonRecord, envName: st assert.equal(secretKeyRef.key, envName); } +function assertWorkReadySummary(summary: JsonRecord): void { + assert.equal(summary.valuesPrinted, false); + assert.equal(typeof summary.version, "string"); + assert.ok((summary.requiredImageTools as string[]).includes("bun")); + assert.ok((summary.requiredImageTools as string[]).includes("npm")); + assert.ok((summary.requiredImageTools as string[]).includes("gh")); + assert.ok((summary.requiredBundledTools as string[]).includes("tran")); + assert.equal(((summary.dependencyStrategy as JsonRecord).projectDependencies), "not-installed-by-default"); +} + function assertRunnerJobUsesG14EgressProxy(manifest: JsonRecord): void { const proxy = "http://g14-provider-egress-proxy.unidesk.svc.cluster.local:18789"; assert.equal(runnerEnvValue(manifest, "HTTP_PROXY"), proxy); diff --git a/src/selftest/cases/75-queue-q2-dispatch.ts b/src/selftest/cases/75-queue-q2-dispatch.ts index 3101496..5143469 100644 --- a/src/selftest/cases/75-queue-q2-dispatch.ts +++ b/src/selftest/cases/75-queue-q2-dispatch.ts @@ -6,7 +6,7 @@ import { startManagerServer } from "../../mgr/server.js"; import { MemoryAgentRunStore } from "../../mgr/store.js"; import { ManagerClient } from "../../mgr/client.js"; import type { JsonRecord, QueueDispatchResult, QueueTaskRecord } from "../../common/types.js"; -import { assertNoSecretLeak, type SelfTestCase } from "../harness.js"; +import { assertNoSecretLeak, loadArtificerImageRef, type SelfTestCase } from "../harness.js"; const selfTest: SelfTestCase = async (context) => { const fakeKubectl = path.join(context.tmp, "fake-kubectl-queue-q2.js"); @@ -39,6 +39,13 @@ process.exit(1); `); await chmod(fakeKubectl, 0o755); const store = new MemoryAgentRunStore(); + const artificerImageRef = await loadArtificerImageRef(context.root); + const aipodImageRef = { + kind: "env-image-dockerfile", + repoUrl: "git@github.com:pikasTech/agentrun.git", + commitId: artificerImageRef.commitId, + dockerfilePath: "deploy/container/Containerfile", + }; const server = await startManagerServer({ port: 0, host: "127.0.0.1", @@ -48,6 +55,7 @@ process.exit(1); namespace: "agentrun-v01", managerUrl: "http://agentrun-mgr.agentrun-v01.svc.cluster.local:8080", image: "127.0.0.1:5000/agentrun/agentrun-mgr@sha256:1111111111111111111111111111111111111111111111111111111111111111", + envIdentity: "selftest-env-identity", kubectlCommand: fakeKubectl, unideskSshEndpointEnv: { name: "UNIDESK_MAIN_SERVER_IP", value: "https://unidesk.default.example.test" }, }, @@ -94,7 +102,7 @@ process.exit(1); resourceBundleRef: null, payload: { prompt: "queue dispatch hello" }, references: [{ kind: "issue", url: "https://github.com/pikasTech/agentrun/issues/39" }], - metadata: { source: "queue-q2-self-test" }, + metadata: { source: "queue-q2-self-test", aipodImageRef }, idempotencyKey: "queue-q2-dispatch-self-test", }) as QueueTaskRecord; const dispatchPlan = await runCliJson(context, server.baseUrl, ["queue", "dispatch", String(created.id), "--dry-run", "--attempt-id", "attempt_queue_q2_cli_dryrun"]); @@ -111,6 +119,12 @@ process.exit(1); const dispatched = await client.post(`/api/v1/queue/tasks/${created.id}/dispatch`, { attemptId: "attempt_queue_q2_selftest" }) as QueueDispatchResult; assert.equal(dispatched.action, "queue-dispatch"); assert.equal(dispatched.mutation, true); + assert.equal(((dispatched.envImage as JsonRecord).status), "runtime-default-reused"); + assert.equal(((dispatched.envImage as JsonRecord).digestPinned), true); + assert.equal(((dispatched.runnerJob as JsonRecord).image), "127.0.0.1:5000/agentrun/agentrun-mgr@sha256:1111111111111111111111111111111111111111111111111111111111111111"); + assert.equal(((((dispatched.runnerJob as JsonRecord).envImage as JsonRecord).imageRef as JsonRecord).dockerfilePath), "deploy/container/Containerfile"); + assert.equal(((dispatched.workReady as JsonRecord).valuesPrinted), false); + assert.ok((((dispatched.workReady as JsonRecord).requiredImageTools as string[]) ?? []).includes("npm")); assert.equal(dispatched.latestAttempt.attemptId, "attempt_queue_q2_selftest"); assert.equal(dispatched.latestAttempt.runId, dispatched.run.id); assert.equal(dispatched.latestAttempt.commandId, dispatched.command.id); diff --git a/src/selftest/cases/76-aipod-spec.ts b/src/selftest/cases/76-aipod-spec.ts index 92d04d0..ebb8680 100644 --- a/src/selftest/cases/76-aipod-spec.ts +++ b/src/selftest/cases/76-aipod-spec.ts @@ -6,12 +6,13 @@ import { startManagerServer } from "../../mgr/server.js"; import { MemoryAgentRunStore } from "../../mgr/store.js"; import type { JsonRecord } from "../../common/types.js"; import { resolveGitBundleFetchSource } from "../../runner/resource-bundle.js"; -import { assertNoSecretLeak, type SelfTestCase } from "../harness.js"; +import { assertNoSecretLeak, loadArtificerImageRef, type SelfTestCase } from "../harness.js"; const selfTest: SelfTestCase = async (context) => { const server = await startManagerServer({ port: 0, host: "127.0.0.1", sourceCommit: "self-test", store: new MemoryAgentRunStore(), aipodSpecDir: path.join(context.root, "config", "aipods") }); try { const client = new ManagerClient(server.baseUrl); + const artificerImageRef = await loadArtificerImageRef(context.root); const parsedWithoutBunGlobal = await runNodeParserCompat(context); assert.equal(parsedWithoutBunGlobal.name, "Artificer"); assert.equal(parsedWithoutBunGlobal.hasBunGlobal, false); @@ -24,6 +25,11 @@ const selfTest: SelfTestCase = async (context) => { const shownItem = shown.item as JsonRecord; assert.equal(shownItem.backendProfile, "sub2api"); assert.equal(((shownItem.model as JsonRecord).model), "gpt-5.5"); + const shownImageRef = shownItem.imageRef as JsonRecord; + assert.equal(shownImageRef.kind, "env-image-dockerfile"); + assert.equal(shownImageRef.repoUrl, "git@github.com:pikasTech/agentrun.git"); + assert.equal(shownImageRef.commitId, artificerImageRef.commitId); + assert.equal(shownImageRef.dockerfilePath, "deploy/container/Containerfile"); assert.equal(((shownItem.resourceBundleRef as JsonRecord).gitMirror as JsonRecord).enabled, false); const rendered = await client.post("/api/v1/aipod-specs/Artificer/render", { prompt: "处理 pikasTech/unidesk#245", idempotencyKey: "selftest-aipod-artificer" }) as JsonRecord; @@ -32,6 +38,12 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(task.backendProfile, "sub2api"); assert.equal(task.providerId, "G14"); assert.equal(task.idempotencyKey, "selftest-aipod-artificer"); + const taskImageRef = ((task.metadata as JsonRecord).aipodImageRef as JsonRecord); + assert.equal(taskImageRef.kind, "env-image-dockerfile"); + assert.equal(taskImageRef.valuesPrinted, false); + const runnerDefaults = ((rendered.dispatchDefaults as JsonRecord).runnerJob as JsonRecord); + assert.equal(((runnerDefaults.imageRef as JsonRecord).kind), "env-image-dockerfile"); + assert.equal(((runnerDefaults.imageRef as JsonRecord).dockerfilePath), "deploy/container/Containerfile"); assert.equal(((task.payload as JsonRecord).model), "gpt-5.5"); assert.equal((((task.payload as JsonRecord).modelConfig as JsonRecord).reasoningEffort), "xhigh"); const policy = task.executionPolicy as JsonRecord; @@ -76,7 +88,7 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(commands.some((item) => item.includes("aipod-specs render ")), true); assert.equal(commands.some((item) => item.includes("queue submit --aipod ")), true); assertNoSecretLeak(submitPlan); - return { name: "aipod-spec", tests: ["aipod-spec-yaml-parser-runtime-compatible", "aipod-spec-artificer-direct-ssh-render", "aipod-spec-git-mirror-url", "queue-submit-aipod-dry-run", "aipod-cli-help"] }; + return { name: "aipod-spec", tests: ["aipod-spec-yaml-parser-runtime-compatible", "aipod-spec-artificer-image-ref-render", "aipod-spec-artificer-direct-ssh-render", "aipod-spec-git-mirror-url", "queue-submit-aipod-dry-run", "aipod-cli-help"] }; } finally { await new Promise((resolve) => server.server.close(() => resolve())); } diff --git a/src/selftest/cases/90-runner-image-tools.ts b/src/selftest/cases/90-runner-image-tools.ts index 5a146cf..595aab5 100644 --- a/src/selftest/cases/90-runner-image-tools.ts +++ b/src/selftest/cases/90-runner-image-tools.ts @@ -1,11 +1,14 @@ import assert from "node:assert/strict"; -import { readFile } from "node:fs/promises"; +import { chmod, mkdir, readFile, writeFile } from "node:fs/promises"; import { execFile } from "node:child_process"; import { promisify } from "node:util"; import path from "node:path"; import type { SelfTestCase } from "../harness.js"; +import { imageRefSourceIdentity, imageRefSourceSummary, isDigestPinnedImage, validateAipodImageRef } from "../../common/env-image-ref.js"; +import { smokeBundledWorkReadyCapabilities, smokeImageWorkReadyCapabilities, staticWorkReadyCapabilitySummary } from "../../common/work-ready.js"; +import { loadArtificerImageRef } from "../harness.js"; -const requiredRunnerPackages = Object.freeze(["git", "openssh-client", "ripgrep"]); +const requiredRunnerPackages = Object.freeze(["ca-certificates", "curl", "git", "github-cli", "kubectl", "nodejs", "npm", "openssh-client", "ripgrep"]); const execFileAsync = promisify(execFile); const selfTest: SelfTestCase = async (context) => { @@ -18,6 +21,9 @@ const selfTest: SelfTestCase = async (context) => { for (const packageName of requiredRunnerPackages) { assert.equal(apkPackages.has(packageName), true, `runner image must install ${packageName}`); } + assert.equal(containerfile.includes("bun install --production"), true, "runner image must install AgentRun npm dependencies at image build time"); + assert.equal(containerfile.includes("npm --version"), true, "runner image build smoke must verify npm"); + assert.equal(containerfile.includes("gh --version"), true, "runner image build smoke must verify GitHub CLI"); assert.equal(tran.startsWith("#!/usr/bin/env bun\n"), true, "tools/tran must be a shebang executable discovered by gitbundle tools"); assert.equal(trans.startsWith("#!/bin/sh\n"), true, "tools/trans must be a shebang executable discovered by gitbundle tools"); @@ -35,10 +41,40 @@ const selfTest: SelfTestCase = async (context) => { assert.equal(parsed.unsupported?.includes("apply-patch"), false); const patchHelp = await execFileAsync(path.join(context.root, "tools/apply_patch"), ["--help"], { cwd: context.root, timeout: 10_000 }); assert.equal(patchHelp.stdout.includes("reads *** Begin Patch format"), true); + const summary = staticWorkReadyCapabilitySummary(); + assert.equal(summary.valuesPrinted, false); + assert.ok((summary.requiredImageTools as string[]).includes("npm"), "work-ready capability must include npm"); + assert.ok((summary.requiredImageTools as string[]).includes("gh"), "work-ready capability must include gh"); + assert.equal(((summary.dependencyStrategy as { projectDependencies?: unknown }).projectDependencies), "not-installed-by-default"); + const fakeBin = await createFakeToolBin(path.join(context.tmp, "work-ready-bin")); + const smokeEnv = { PATH: fakeBin, AGENTRUN_RESOURCE_BIN_PATH: path.join(context.root, "tools") }; + const imageSmoke = await smokeImageWorkReadyCapabilities(smokeEnv); + assert.equal(((imageSmoke.smoke as { ok?: unknown }).ok), true); + assert.equal(imageSmoke.valuesPrinted, false); + const bundleSmoke = await smokeBundledWorkReadyCapabilities(smokeEnv); + assert.equal(((bundleSmoke.smoke as { ok?: unknown }).ok), true); + assert.equal(bundleSmoke.valuesPrinted, false); + assert.equal(JSON.stringify({ imageSmoke, bundleSmoke }).includes("GH_TOKEN"), false); + const artificerImageRef = await loadArtificerImageRef(context.root); + const imageRef = validateAipodImageRef({ kind: "env-image-dockerfile", repoUrl: "git@github.com:pikasTech/agentrun.git", commitId: artificerImageRef.commitId, dockerfilePath: "deploy/container/Containerfile" }); + assert.equal(imageRefSourceIdentity(imageRef).length, 20); + assert.equal((imageRefSourceSummary(imageRef).valuesPrinted), false); + assert.equal(isDigestPinnedImage("127.0.0.1:5000/agentrun/agentrun-mgr@sha256:1111111111111111111111111111111111111111111111111111111111111111"), true); + assert.equal(isDigestPinnedImage("127.0.0.1:5000/agentrun/agentrun-mgr:self-test"), false); - return { name: "90-runner-image-tools", tests: ["runner image installs required CLI tools", "gitbundle tran tools are executable and documented", "runner apply-patch helper is bundled"] }; + return { name: "90-runner-image-tools", tests: ["runner image installs required CLI tools", "runner image build verifies work-ready tools", "gitbundle tran tools are executable and documented", "runner apply-patch helper is bundled", "work-ready smoke runs without printing secrets", "aipod imageRef validates env image source identity"] }; }; +async function createFakeToolBin(dir: string): Promise { + await mkdir(dir, { recursive: true }); + for (const tool of ["bun", "node", "npm", "git", "ssh", "gh", "rg", "curl", "kubectl"]) { + const file = path.join(dir, tool); + await writeFile(file, `#!/bin/sh\necho ${tool}-selftest-version\n`, "utf8"); + await chmod(file, 0o755); + } + return dir; +} + function installedApkPackages(containerfile: string): Set { const packages = new Set(); const normalized = containerfile.replace(/\\\s*\r?\n\s*/gu, " "); diff --git a/src/selftest/harness.ts b/src/selftest/harness.ts index 937e830..e4fd235 100644 --- a/src/selftest/harness.ts +++ b/src/selftest/harness.ts @@ -1,10 +1,11 @@ -import { mkdtemp, mkdir, writeFile, rm } from "node:fs/promises"; +import { chmod, mkdtemp, mkdir, readFile, writeFile, rm } from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import assert from "node:assert/strict"; import { ManagerClient } from "../mgr/client.js"; -import type { BackendProfile, JsonRecord } from "../common/types.js"; +import type { AipodImageRef, BackendProfile, JsonRecord } from "../common/types.js"; import { backendProfileSpec } from "../common/backend-profiles.js"; +import { parseAipodSpecYaml } from "../common/aipod-specs.js"; import { dsflashGoModelCatalogJson } from "../common/model-catalogs.js"; export interface SelfTestContext { @@ -31,14 +32,19 @@ type SelfTestRunContext = Pick & Par export async function createSelfTestContext(root: string): Promise { const tmp = await mkdtemp(path.join(os.tmpdir(), "agentrun-selftest-")); + const previousSelftestWorkReadyBinPath = process.env.AGENTRUN_SELFTEST_WORK_READY_BIN_PATH; const codexHome = path.join(tmp, "codex-home"); const deepseekHome = path.join(tmp, "deepseek-home"); const minimaxM3Home = path.join(tmp, "minimax-m3-home"); + const workReadyBin = path.join(tmp, "work-ready-bin"); const workspace = path.join(tmp, "workspace"); await mkdir(codexHome, { recursive: true }); await mkdir(deepseekHome, { recursive: true }); await mkdir(minimaxM3Home, { recursive: true }); + await mkdir(workReadyBin, { recursive: true }); await mkdir(workspace, { recursive: true }); + await writeFakeWorkReadyTools(workReadyBin); + process.env.AGENTRUN_SELFTEST_WORK_READY_BIN_PATH = workReadyBin; await writeFile(path.join(codexHome, "auth.json"), JSON.stringify({ token: "test-token-material" })); await writeFile(path.join(codexHome, "config.toml"), "model = \"gpt-test\"\n"); await writeFile(path.join(deepseekHome, "auth.json"), JSON.stringify({ token: "test-token-material-deepseek" })); @@ -58,10 +64,22 @@ export async function createSelfTestContext(root: string): Promise rm(tmp, { recursive: true, force: true }), + cleanup: async () => { + if (previousSelftestWorkReadyBinPath === undefined) delete process.env.AGENTRUN_SELFTEST_WORK_READY_BIN_PATH; + else process.env.AGENTRUN_SELFTEST_WORK_READY_BIN_PATH = previousSelftestWorkReadyBinPath; + await rm(tmp, { recursive: true, force: true }); + }, }; } +async function writeFakeWorkReadyTools(dir: string): Promise { + for (const tool of ["bun", "node", "npm", "git", "ssh", "gh", "rg", "curl", "kubectl"]) { + const file = path.join(dir, tool); + await writeFile(file, `#!/bin/sh\necho ${tool}-selftest-version\n`, "utf8"); + await chmod(file, 0o755); + } +} + export async function createRunWithCommand(client: ManagerClient, context: SelfTestRunContext, prompt: string, idempotencyKey: string, timeoutMs: number): Promise<{ runId: string; commandId: string }> { const backendProfile = context.backendProfile ?? "codex"; const run = await client.post("/api/v1/runs", { @@ -116,6 +134,12 @@ export function profileSecretHome(context: Pick & return context.codexHome; } +export async function loadArtificerImageRef(root: string): Promise { + const text = await readFile(path.join(root, "config", "aipods", "artificer.yaml"), "utf8"); + const spec = parseAipodSpecYaml(text, "selftest-artificer-image-ref"); + return spec.spec.imageRef; +} + function defaultFakeCommand(): string { return process.versions.bun ? process.execPath : "npx"; }