diff --git a/docs/reference/spec-v01-hwlab-manual-dispatch.md b/docs/reference/spec-v01-hwlab-manual-dispatch.md index 6c75e2b..4709df7 100644 --- a/docs/reference/spec-v01-hwlab-manual-dispatch.md +++ b/docs/reference/spec-v01-hwlab-manual-dispatch.md @@ -47,6 +47,7 @@ AgentRun `v0.1` 承接 HWLAB v0.2 时,只吸收原有 Code Agent 的通用执 | conversation/session/thread 复用 | `internal/cloud/codex-stdio-session.ts`、`internal/cloud/code-agent-session-registry.ts` | `SessionRef` 保存 session/thread 摘要,runner 有 thread 则 resume,无 thread 则 start | [spec-v01-runtime-assembly.md](spec-v01-runtime-assembly.md) | | 固定 repo workspace 执行 | `internal/cloud/code-agent-contract.ts`、`docs/reference/code-agent-chat-readiness.md` | `ResourceBundleRef` 使用 Git-only `repoUrl + full commitId` checkout 到隔离 workspace | [spec-v01-runtime-assembly.md](spec-v01-runtime-assembly.md)、[spec-v01-agentrun-runner.md](spec-v01-agentrun-runner.md) | | provider profile 隔离和 Secret 不泄露 | `internal/cloud/code-agent-contract.ts`、`docs/reference/code-agent-chat-readiness.md` | `ProfileRef/SecretRef` profile-scoped 投影、缺失为 `secret-unavailable`、禁止 fallback 和泄露值 | [spec-v01-runtime-assembly.md](spec-v01-runtime-assembly.md)、[spec-v01-backend-adapter.md](spec-v01-backend-adapter.md) | +| device-pod 短期会话 env 注入 | `internal/cloud/server-code-agent-http.ts` 的 `codeAgentDevicePodAuthEnv()` | `runner-jobs.transientEnv` 只在本次 Kubernetes Job env 中生效;只记录 name/count,不保存或输出 value | [spec-v01-agentrun-mgr.md](spec-v01-agentrun-mgr.md)、[spec-v01-secret-distribution.md](spec-v01-secret-distribution.md) | | provider/backend/cancel 等失败可区分 | `scripts/src/code-agent-response-contract.mjs`、`internal/cloud/code-agent-chat.ts` | failureKind 最小矩阵和 JSON 错误响应 | [spec-v01-agentrun-mgr.md](spec-v01-agentrun-mgr.md)、[spec-v01-backend-adapter.md](spec-v01-backend-adapter.md) | | stdout/stderr/tool 输出必须有界 | `docs/reference/code-agent-chat-readiness.md`、`internal/cloud/code-agent-trace-store.ts` | `command_output`/`tool_call` 记录摘要、字节数、截断标记和必要引用 | [spec-v01-backend-adapter.md](spec-v01-backend-adapter.md) | | runner/job 失败需要定位证据 | `internal/cloud/server-code-agent-http.ts` 的 trace/result 可见性 | runner job identity、attempt、jobName、pod/log identity 和最小 phase/exit 摘要 | [spec-v01-agentrun-runner.md](spec-v01-agentrun-runner.md)、[spec-v01-agentrun-mgr.md](spec-v01-agentrun-mgr.md) | @@ -64,9 +65,12 @@ AgentRun `v0.1` 承接 HWLAB v0.2 时,只吸收原有 Code Agent 的通用执 | `idempotencyKey` | HWLAB 必须用 `traceId`、`messageId` 或等价稳定 key;相同 key 和相同 payload 返回既有 job/attempt。 | | `image` / `backendImageRef` | 只能来自 manager allowlist、GitOps/catalog 或受控默认值;客户端不能传任意镜像扩大执行面。 | | `retention` / `ttlSecondsAfterFinished` | 可选;默认遵循 runner Job TTL 规格。 | +| `transientEnv` | 可选,只用于本次 runner Job 的 Kubernetes env 渲染;不得写入 run/command/result/event 明文。用于承接 HWLAB 原 Code Agent 的短期 device-pod session token 和 API URL。 | 响应必须短返回 JSON,不等待完整模型 turn,至少包含:`runId`、`commandId`、`attemptId`、`jobName`、`namespace`、`runnerId`、`logPath` 或 `podIdentity`、后续 `commands show` 与 `events` 轮询入口。重复提交若 payload 不同,必须结构化失败,不能创建第二个同名业务 attempt。 +`transientEnv` 是 runner-job 层的临时执行上下文,不是 AgentRun run 的 durable fact。manager 只能校验 env name、数量和 value 长度;payload hash 只保存 value hash,response、event、dry-run manifest 和错误详情不得输出明文 value。业务授权仍由 HWLAB 自己负责,AgentRun 只把调度方明确提供的短期 env 交给本次 runner。 + ## Run / Command 映射 HWLAB canary 创建 run 时应使用以下字段口径: diff --git a/docs/reference/spec-v01-secret-distribution.md b/docs/reference/spec-v01-secret-distribution.md index b45b656..38d8c2f 100644 --- a/docs/reference/spec-v01-secret-distribution.md +++ b/docs/reference/spec-v01-secret-distribution.md @@ -102,6 +102,18 @@ Run 的 `executionPolicy.secretScope` 只能包含引用,不包含值。示例 - Secret projection 不能直接作为 `CODEX_HOME`。Codex app-server 会读取并可能维护默认配置、PATH 或运行态文件;把只读 Secret volume 直接挂到 `CODEX_HOME` 会造成启动期写入失败。v0.1 的固定边界是:Secret volume 只读、`/home/agentrun` 由 `emptyDir` 提供可写 runtime home、复制动作只发生在 runner/backend 容器内且不打印文件内容。 - SecretRef 不存在或 RBAC 不允许时,run 必须失败为结构化 `failureKind=secret-unavailable` 或等价错误,不得降级成无凭证重试风暴。 +## runner-job transientEnv + +`transientEnv` 用于承接调度方生成的短期、单次 runner Job 运行上下文,例如 HWLAB Code Agent 的 device-pod session token 和 API URL。它不是 provider credential,也不是 run durable fact。 + +规则: + +- `transientEnv` 只能出现在 `POST /api/v1/runs/:runId/runner-jobs` 请求中;不得写入 `CreateRunInput`、command payload、event payload 或 result envelope 的 value 明文。 +- manager 只校验 env name、数量和 value 长度;runner job payload hash 只纳入 env name 与 value hash。 +- response、runner job status、event 和 dry-run manifest 只能展示 env name、count 和 `valuesPrinted=false`;dry-run manifest 中的 transient env value 必须显示为 `REDACTED`。 +- 正式 Kubernetes Job manifest 会把 value 注入到本次 runner container env;该 token 必须由调度方控制 TTL、权限和业务授权范围。 +- AgentRun 不解释 HWLAB device-pod 权限,也不把业务鉴权做成通用 policy;AgentRun 只负责不持久化、不回显、不扩散这类短期 env value。 + ## 分发路径 `v0.1` 默认路径: diff --git a/src/mgr/kubernetes-runner-job.ts b/src/mgr/kubernetes-runner-job.ts index 71287cc..a165d8b 100644 --- a/src/mgr/kubernetes-runner-job.ts +++ b/src/mgr/kubernetes-runner-job.ts @@ -6,6 +6,7 @@ import type { AgentRunStore } from "./store.js"; import type { JsonRecord } from "../common/types.js"; import { stableHash } from "../common/validation.js"; import { renderRunnerJobManifest } from "../runner/k8s-job.js"; +import type { RunnerTransientEnv } from "../runner/k8s-job.js"; export interface RunnerJobDefaults { namespace: string; @@ -26,6 +27,7 @@ export interface CreateRunnerJobInput extends JsonRecord { sourceCommit?: string; serviceAccountName?: string; idempotencyKey?: string; + transientEnv?: JsonRecord[]; } export async function createKubernetesRunnerJob(options: { store: AgentRunStore; runId: string; input: CreateRunnerJobInput; defaults: RunnerJobDefaults }): Promise { @@ -42,7 +44,18 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; const sourceCommit = optionalString(options.input.sourceCommit) ?? options.defaults.sourceCommit; const serviceAccountName = optionalString(options.input.serviceAccountName) ?? options.defaults.serviceAccountName; const idempotencyKey = optionalString(options.input.idempotencyKey); - const normalizedPayload = { commandId, image, namespace, managerUrl, sourceCommit, serviceAccountName: serviceAccountName ?? null, attemptId: optionalString(options.input.attemptId) ?? null, runnerId: optionalString(options.input.runnerId) ?? null }; + const transientEnv = transientEnvField(options.input.transientEnv); + const normalizedPayload = { + commandId, + image, + namespace, + managerUrl, + sourceCommit, + serviceAccountName: serviceAccountName ?? null, + attemptId: optionalString(options.input.attemptId) ?? null, + runnerId: optionalString(options.input.runnerId) ?? null, + transientEnv: transientEnv.map((item) => ({ name: item.name, valueHash: stableHash(item.value), sensitive: true })), + }; const payloadHash = stableHash(normalizedPayload); if (idempotencyKey) { const existing = await options.store.getRunnerJobByIdempotencyKey(run.id, idempotencyKey, payloadHash); @@ -58,6 +71,7 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; image, namespace, sourceCommit, + transientEnv, ...(serviceAccountName ? { serviceAccountName } : {}), }; const attemptId = optionalString(options.input.attemptId); @@ -92,6 +106,7 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; logPath: `kubectl -n ${render.namespace} logs job/${render.jobName}`, }, secretRefs: render.secretRefs.map((item) => ({ profile: item.profile, name: item.secretRef.name, namespace: item.secretRef.namespace ?? render.namespace, keys: item.secretRef.keys ?? [], mountPath: item.runtimeMountPath, projectionPath: item.projectionMountPath, writableCopy: true, valuesPrinted: false })), + transientEnv: summarizeTransientEnv(transientEnv), retention: { ttlSecondsAfterFinished: render.ttlSecondsAfterFinished, }, @@ -132,12 +147,40 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore; namespace: saved.namespace, jobName: saved.jobName, idempotencyKey: idempotencyKey ? "present" : null, + transientEnv: summarizeTransientEnv(transientEnv), sessionRef: summarizeSessionRef(run.sessionRef ?? null), resourceBundleRef: summarizeResourceBundleRef(run.resourceBundleRef ?? null), }); return response; } +function transientEnvField(value: unknown): RunnerTransientEnv[] { + if (value === undefined) return []; + if (!Array.isArray(value)) throw new AgentRunError("schema-invalid", "transientEnv must be an array", { httpStatus: 400 }); + if (value.length > 8) throw new AgentRunError("schema-invalid", "transientEnv must contain at most 8 entries", { httpStatus: 400 }); + const seen = new Set(); + return value.map((entry, index) => { + if (!entry || typeof entry !== "object" || Array.isArray(entry)) throw new AgentRunError("schema-invalid", `transientEnv[${index}] must be an object`, { httpStatus: 400 }); + const record = entry as JsonRecord; + const name = stringField(record, "name"); + if (!/^[A-Z_][A-Z0-9_]{0,63}$/u.test(name)) throw new AgentRunError("schema-invalid", `transientEnv[${index}].name must be an uppercase env name`, { httpStatus: 400 }); + if (seen.has(name)) throw new AgentRunError("schema-invalid", `transientEnv name ${name} is duplicated`, { httpStatus: 400 }); + seen.add(name); + const rawValue = record.value; + if (typeof rawValue !== "string" || rawValue.length === 0) throw new AgentRunError("schema-invalid", `transientEnv[${index}].value must be a non-empty string`, { httpStatus: 400 }); + if (Buffer.byteLength(rawValue, "utf8") > 8192) throw new AgentRunError("schema-invalid", `transientEnv[${index}].value is too large`, { httpStatus: 400 }); + return { name, value: rawValue, sensitive: true }; + }); +} + +function summarizeTransientEnv(items: RunnerTransientEnv[]): JsonRecord { + return { + count: items.length, + names: items.map((item) => item.name), + valuesPrinted: false, + }; +} + async function kubectlCreate(manifest: JsonRecord, kubectlCommand: string): Promise { const child = spawn(kubectlCommand, ["create", "-f", "-", "-o", "json"], { stdio: ["pipe", "pipe", "pipe"] }); let stdout = ""; diff --git a/src/runner/k8s-job.ts b/src/runner/k8s-job.ts index bc3705c..525be0e 100644 --- a/src/runner/k8s-job.ts +++ b/src/runner/k8s-job.ts @@ -15,9 +15,16 @@ export interface RunnerJobRenderOptions { imagePullPolicy?: string; backoffLimit?: number; ttlSecondsAfterFinished?: number; + transientEnv?: RunnerTransientEnv[]; dryRun?: boolean; } +export interface RunnerTransientEnv { + name: string; + value: string; + sensitive?: boolean; +} + interface CredentialProjection { profile: BackendProfile | string; secretRef: SecretRef; @@ -28,6 +35,7 @@ interface CredentialProjection { export function renderRunnerJobDryRun(options: RunnerJobRenderOptions): JsonRecord { const render = renderRunnerJobManifest({ ...options, dryRun: true }); + const manifest = redactTransientEnvInManifest(render.manifest, options.transientEnv ?? []); return { dryRun: true, mutation: false, @@ -48,6 +56,7 @@ export function renderRunnerJobDryRun(options: RunnerJobRenderOptions): JsonReco sourceCommit: render.sourceCommit, }, secretRefs: render.secretRefs.map((item) => ({ profile: item.profile, name: item.secretRef.name, namespace: item.secretRef.namespace ?? render.namespace, keys: item.secretRef.keys ?? [], mountPath: item.runtimeMountPath, projectionPath: item.projectionMountPath, writableCopy: true, valuesPrinted: false })), + transientEnv: summarizeTransientEnv(options.transientEnv ?? []), retention: { ttlSecondsAfterFinished: render.ttlSecondsAfterFinished, }, @@ -56,7 +65,7 @@ export function renderRunnerJobDryRun(options: RunnerJobRenderOptions): JsonReco events: `./scripts/agentrun runs events ${options.run.id} --manager-url ${options.managerUrl} --after-seq 0 --limit 100`, }, warnings: render.warnings, - manifest: render.manifest, + manifest, }; } @@ -151,9 +160,39 @@ function runnerEnv(options: RunnerJobRenderOptions, context: { namespace: string { name: "HOME", value: "/home/agentrun" }, { name: "CODEX_HOME", value: codexHome }, ...(selectedSecret ? [{ name: "AGENTRUN_CODEX_SECRET_HOME", value: selectedSecret.projectionMountPath }] : []), + ...transientEnvVars(options.transientEnv ?? []), ]; } +function transientEnvVars(items: RunnerTransientEnv[]): JsonRecord[] { + return items.map((item) => ({ name: item.name, value: item.value })); +} + +function summarizeTransientEnv(items: RunnerTransientEnv[]): JsonRecord { + return { + count: items.length, + names: items.map((item) => item.name), + valuesPrinted: false, + }; +} + +function redactTransientEnvInManifest(manifest: JsonRecord, items: RunnerTransientEnv[]): JsonRecord { + if (items.length === 0) return manifest; + const names = new Set(items.map((item) => item.name)); + const copy = JSON.parse(JSON.stringify(manifest)) as JsonRecord; + const spec = copy.spec as JsonRecord | undefined; + const template = spec?.template as JsonRecord | undefined; + const podSpec = template?.spec as JsonRecord | undefined; + const containers = Array.isArray(podSpec?.containers) ? podSpec.containers as JsonRecord[] : []; + for (const container of containers) { + const env = Array.isArray(container.env) ? container.env as JsonRecord[] : []; + for (const entry of env) { + if (typeof entry.name === "string" && names.has(entry.name)) entry.value = "REDACTED"; + } + } + return copy; +} + function credentialProjections(run: RunRecord, namespace: string): CredentialProjection[] { const policy: ExecutionPolicy = run.executionPolicy; const credentials = (policy.secretScope.providerCredentials ?? []).filter((item) => item.profile === run.backendProfile); diff --git a/src/selftest/cases/20-runner-k8s-job.ts b/src/selftest/cases/20-runner-k8s-job.ts index 1b8c026..bee8072 100644 --- a/src/selftest/cases/20-runner-k8s-job.ts +++ b/src/selftest/cases/20-runner-k8s-job.ts @@ -20,12 +20,15 @@ const selfTest: SelfTestCase = async (context) => { image: "127.0.0.1:5000/agentrun/agentrun-mgr@sha256:1111111111111111111111111111111111111111111111111111111111111111", attemptId: "attempt_selftest", sourceCommit: "self-test", + transientEnv: [{ name: "HWLAB_DEVICE_POD_SESSION_TOKEN", value: "test-token-material", sensitive: true }], }); assert.equal(rendered.dryRun, true); assert.equal(rendered.mutation, false); assert.equal(((rendered.retention as JsonRecord).ttlSecondsAfterFinished), 86_400); assert.equal((rendered.jobIdentity as { serviceAccountName?: string }).serviceAccountName, "agentrun-v01-runner"); assertRunnerJobUsesWritableCodexHome(rendered.manifest as JsonRecord, context.codexHome, "codex-0", "/var/run/agentrun/secrets/codex-0"); + assert.equal(runnerEnvValue(rendered.manifest as JsonRecord, "HWLAB_DEVICE_POD_SESSION_TOKEN"), "REDACTED"); + assert.deepEqual((((rendered.transientEnv as JsonRecord).names) as string[]), ["HWLAB_DEVICE_POD_SESSION_TOKEN"]); assertNoSecretLeak(rendered); const deepseekItem = await createRunWithCommand(client, { ...context, backendProfile: "deepseek" }, "deepseek job smoke", "selftest-deepseek-job-render", 15_000); @@ -68,16 +71,26 @@ console.log(JSON.stringify({ apiVersion: manifest.apiVersion, kind: manifest.kin try { const jobClient = new ManagerClient(serverWithKubectl.baseUrl); const jobItem = await createRunWithCommand(jobClient, context, "job create smoke", "selftest-job-create", 15_000); - const created = await jobClient.post(`/api/v1/runs/${jobItem.runId}/runner-jobs`, { commandId: jobItem.commandId, attemptId: "attempt_selftest_create" }); + const created = await jobClient.post(`/api/v1/runs/${jobItem.runId}/runner-jobs`, { + commandId: jobItem.commandId, + attemptId: "attempt_selftest_create", + transientEnv: [ + { name: "HWLAB_DEVICE_POD_SESSION_TOKEN", value: "test-token-material", sensitive: true }, + { name: "HWLAB_CLOUD_API_URL", value: "http://cloud.test", sensitive: true }, + ], + }); assert.equal((created as { mutation?: unknown }).mutation, true); assert.equal(((created as JsonRecord).retention as JsonRecord).ttlSecondsAfterFinished, 86_400); + assert.deepEqual((((created as JsonRecord).transientEnv as JsonRecord).names) as string[], ["HWLAB_DEVICE_POD_SESSION_TOKEN", "HWLAB_CLOUD_API_URL"]); const manifest = JSON.parse(await readFile(createdManifest, "utf8")) as JsonRecord; assert.equal((manifest.spec as JsonRecord).ttlSecondsAfterFinished, 86_400); + assert.equal(runnerEnvValue(manifest, "HWLAB_DEVICE_POD_SESSION_TOKEN"), "test-token-material"); + assert.equal(runnerEnvValue(manifest, "HWLAB_CLOUD_API_URL"), "http://cloud.test"); assertNoSecretLeak(created); } finally { await new Promise((resolve) => serverWithKubectl.server.close(() => resolve())); } - return { name: "runner-k8s-job", tests: ["runner-k8s-job-dry-run", "runner-k8s-job-deepseek-profile-dry-run", "runner-k8s-job-create-api", "runner-k8s-job-retention-ttl"] }; + return { name: "runner-k8s-job", tests: ["runner-k8s-job-dry-run", "runner-k8s-job-deepseek-profile-dry-run", "runner-k8s-job-create-api", "runner-k8s-job-retention-ttl", "runner-job-transient-env"] }; } finally { await new Promise((resolve) => server.server.close(() => resolve())); } @@ -85,6 +98,16 @@ console.log(JSON.stringify({ apiVersion: manifest.apiVersion, kind: manifest.kin export default selfTest; +function runnerEnvValue(manifest: JsonRecord, name: string): unknown { + const spec = manifest.spec as JsonRecord; + const template = spec.template as JsonRecord; + const podSpec = template.spec as JsonRecord; + const containers = podSpec.containers as JsonRecord[]; + const runner = containers[0] as JsonRecord; + const env = runner.env as JsonRecord[]; + return env.find((item) => item.name === name)?.value; +} + function assertRunnerJobUsesWritableCodexHome(manifest: JsonRecord, expectedCodexHome: string, volumeName: string, projectionPath: string): void { const spec = manifest.spec as JsonRecord; const template = spec.template as JsonRecord;