Merge pull request #38 from pikasTech/fix/v01-transient-runner-env

feat: 支持 runner transient env
This commit is contained in:
Lyon
2026-06-01 17:52:11 +08:00
committed by GitHub
5 changed files with 125 additions and 4 deletions
@@ -47,6 +47,7 @@ AgentRun `v0.1` 承接 HWLAB v0.2 时,只吸收原有 Code Agent 的通用执
| conversation/session/thread 复用 | `internal/cloud/codex-stdio-session.ts``internal/cloud/code-agent-session-registry.ts` | `SessionRef` 保存 session/thread 摘要,runner 有 thread 则 resume,无 thread 则 start | [spec-v01-runtime-assembly.md](spec-v01-runtime-assembly.md) |
| 固定 repo workspace 执行 | `internal/cloud/code-agent-contract.ts``docs/reference/code-agent-chat-readiness.md` | `ResourceBundleRef` 使用 Git-only `repoUrl + full commitId` checkout 到隔离 workspace | [spec-v01-runtime-assembly.md](spec-v01-runtime-assembly.md)、[spec-v01-agentrun-runner.md](spec-v01-agentrun-runner.md) |
| provider profile 隔离和 Secret 不泄露 | `internal/cloud/code-agent-contract.ts``docs/reference/code-agent-chat-readiness.md` | `ProfileRef/SecretRef` profile-scoped 投影、缺失为 `secret-unavailable`、禁止 fallback 和泄露值 | [spec-v01-runtime-assembly.md](spec-v01-runtime-assembly.md)、[spec-v01-backend-adapter.md](spec-v01-backend-adapter.md) |
| device-pod 短期会话 env 注入 | `internal/cloud/server-code-agent-http.ts``codeAgentDevicePodAuthEnv()` | `runner-jobs.transientEnv` 只在本次 Kubernetes Job env 中生效;只记录 name/count,不保存或输出 value | [spec-v01-agentrun-mgr.md](spec-v01-agentrun-mgr.md)、[spec-v01-secret-distribution.md](spec-v01-secret-distribution.md) |
| provider/backend/cancel 等失败可区分 | `scripts/src/code-agent-response-contract.mjs``internal/cloud/code-agent-chat.ts` | failureKind 最小矩阵和 JSON 错误响应 | [spec-v01-agentrun-mgr.md](spec-v01-agentrun-mgr.md)、[spec-v01-backend-adapter.md](spec-v01-backend-adapter.md) |
| stdout/stderr/tool 输出必须有界 | `docs/reference/code-agent-chat-readiness.md``internal/cloud/code-agent-trace-store.ts` | `command_output`/`tool_call` 记录摘要、字节数、截断标记和必要引用 | [spec-v01-backend-adapter.md](spec-v01-backend-adapter.md) |
| runner/job 失败需要定位证据 | `internal/cloud/server-code-agent-http.ts` 的 trace/result 可见性 | runner job identity、attempt、jobName、pod/log identity 和最小 phase/exit 摘要 | [spec-v01-agentrun-runner.md](spec-v01-agentrun-runner.md)、[spec-v01-agentrun-mgr.md](spec-v01-agentrun-mgr.md) |
@@ -64,9 +65,12 @@ AgentRun `v0.1` 承接 HWLAB v0.2 时,只吸收原有 Code Agent 的通用执
| `idempotencyKey` | HWLAB 必须用 `traceId``messageId` 或等价稳定 key;相同 key 和相同 payload 返回既有 job/attempt。 |
| `image` / `backendImageRef` | 只能来自 manager allowlist、GitOps/catalog 或受控默认值;客户端不能传任意镜像扩大执行面。 |
| `retention` / `ttlSecondsAfterFinished` | 可选;默认遵循 runner Job TTL 规格。 |
| `transientEnv` | 可选,只用于本次 runner Job 的 Kubernetes env 渲染;不得写入 run/command/result/event 明文。用于承接 HWLAB 原 Code Agent 的短期 device-pod session token 和 API URL。 |
响应必须短返回 JSON,不等待完整模型 turn,至少包含:`runId``commandId``attemptId``jobName``namespace``runnerId``logPath``podIdentity`、后续 `commands show``events` 轮询入口。重复提交若 payload 不同,必须结构化失败,不能创建第二个同名业务 attempt。
`transientEnv` 是 runner-job 层的临时执行上下文,不是 AgentRun run 的 durable fact。manager 只能校验 env name、数量和 value 长度;payload hash 只保存 value hashresponse、event、dry-run manifest 和错误详情不得输出明文 value。业务授权仍由 HWLAB 自己负责,AgentRun 只把调度方明确提供的短期 env 交给本次 runner。
## Run / Command 映射
HWLAB canary 创建 run 时应使用以下字段口径:
@@ -102,6 +102,18 @@ Run 的 `executionPolicy.secretScope` 只能包含引用,不包含值。示例
- Secret projection 不能直接作为 `CODEX_HOME`。Codex app-server 会读取并可能维护默认配置、PATH 或运行态文件;把只读 Secret volume 直接挂到 `CODEX_HOME` 会造成启动期写入失败。v0.1 的固定边界是:Secret volume 只读、`/home/agentrun``emptyDir` 提供可写 runtime home、复制动作只发生在 runner/backend 容器内且不打印文件内容。
- SecretRef 不存在或 RBAC 不允许时,run 必须失败为结构化 `failureKind=secret-unavailable` 或等价错误,不得降级成无凭证重试风暴。
## runner-job transientEnv
`transientEnv` 用于承接调度方生成的短期、单次 runner Job 运行上下文,例如 HWLAB Code Agent 的 device-pod session token 和 API URL。它不是 provider credential,也不是 run durable fact。
规则:
- `transientEnv` 只能出现在 `POST /api/v1/runs/:runId/runner-jobs` 请求中;不得写入 `CreateRunInput`、command payload、event payload 或 result envelope 的 value 明文。
- manager 只校验 env name、数量和 value 长度;runner job payload hash 只纳入 env name 与 value hash。
- response、runner job status、event 和 dry-run manifest 只能展示 env name、count 和 `valuesPrinted=false`dry-run manifest 中的 transient env value 必须显示为 `REDACTED`
- 正式 Kubernetes Job manifest 会把 value 注入到本次 runner container env;该 token 必须由调度方控制 TTL、权限和业务授权范围。
- AgentRun 不解释 HWLAB device-pod 权限,也不把业务鉴权做成通用 policy;AgentRun 只负责不持久化、不回显、不扩散这类短期 env value。
## 分发路径
`v0.1` 默认路径:
+44 -1
View File
@@ -6,6 +6,7 @@ import type { AgentRunStore } from "./store.js";
import type { JsonRecord } from "../common/types.js";
import { stableHash } from "../common/validation.js";
import { renderRunnerJobManifest } from "../runner/k8s-job.js";
import type { RunnerTransientEnv } from "../runner/k8s-job.js";
export interface RunnerJobDefaults {
namespace: string;
@@ -26,6 +27,7 @@ export interface CreateRunnerJobInput extends JsonRecord {
sourceCommit?: string;
serviceAccountName?: string;
idempotencyKey?: string;
transientEnv?: JsonRecord[];
}
export async function createKubernetesRunnerJob(options: { store: AgentRunStore; runId: string; input: CreateRunnerJobInput; defaults: RunnerJobDefaults }): Promise<JsonRecord> {
@@ -42,7 +44,18 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore;
const sourceCommit = optionalString(options.input.sourceCommit) ?? options.defaults.sourceCommit;
const serviceAccountName = optionalString(options.input.serviceAccountName) ?? options.defaults.serviceAccountName;
const idempotencyKey = optionalString(options.input.idempotencyKey);
const normalizedPayload = { commandId, image, namespace, managerUrl, sourceCommit, serviceAccountName: serviceAccountName ?? null, attemptId: optionalString(options.input.attemptId) ?? null, runnerId: optionalString(options.input.runnerId) ?? null };
const transientEnv = transientEnvField(options.input.transientEnv);
const normalizedPayload = {
commandId,
image,
namespace,
managerUrl,
sourceCommit,
serviceAccountName: serviceAccountName ?? null,
attemptId: optionalString(options.input.attemptId) ?? null,
runnerId: optionalString(options.input.runnerId) ?? null,
transientEnv: transientEnv.map((item) => ({ name: item.name, valueHash: stableHash(item.value), sensitive: true })),
};
const payloadHash = stableHash(normalizedPayload);
if (idempotencyKey) {
const existing = await options.store.getRunnerJobByIdempotencyKey(run.id, idempotencyKey, payloadHash);
@@ -58,6 +71,7 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore;
image,
namespace,
sourceCommit,
transientEnv,
...(serviceAccountName ? { serviceAccountName } : {}),
};
const attemptId = optionalString(options.input.attemptId);
@@ -92,6 +106,7 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore;
logPath: `kubectl -n ${render.namespace} logs job/${render.jobName}`,
},
secretRefs: render.secretRefs.map((item) => ({ profile: item.profile, name: item.secretRef.name, namespace: item.secretRef.namespace ?? render.namespace, keys: item.secretRef.keys ?? [], mountPath: item.runtimeMountPath, projectionPath: item.projectionMountPath, writableCopy: true, valuesPrinted: false })),
transientEnv: summarizeTransientEnv(transientEnv),
retention: {
ttlSecondsAfterFinished: render.ttlSecondsAfterFinished,
},
@@ -132,12 +147,40 @@ export async function createKubernetesRunnerJob(options: { store: AgentRunStore;
namespace: saved.namespace,
jobName: saved.jobName,
idempotencyKey: idempotencyKey ? "present" : null,
transientEnv: summarizeTransientEnv(transientEnv),
sessionRef: summarizeSessionRef(run.sessionRef ?? null),
resourceBundleRef: summarizeResourceBundleRef(run.resourceBundleRef ?? null),
});
return response;
}
function transientEnvField(value: unknown): RunnerTransientEnv[] {
if (value === undefined) return [];
if (!Array.isArray(value)) throw new AgentRunError("schema-invalid", "transientEnv must be an array", { httpStatus: 400 });
if (value.length > 8) throw new AgentRunError("schema-invalid", "transientEnv must contain at most 8 entries", { httpStatus: 400 });
const seen = new Set<string>();
return value.map((entry, index) => {
if (!entry || typeof entry !== "object" || Array.isArray(entry)) throw new AgentRunError("schema-invalid", `transientEnv[${index}] must be an object`, { httpStatus: 400 });
const record = entry as JsonRecord;
const name = stringField(record, "name");
if (!/^[A-Z_][A-Z0-9_]{0,63}$/u.test(name)) throw new AgentRunError("schema-invalid", `transientEnv[${index}].name must be an uppercase env name`, { httpStatus: 400 });
if (seen.has(name)) throw new AgentRunError("schema-invalid", `transientEnv name ${name} is duplicated`, { httpStatus: 400 });
seen.add(name);
const rawValue = record.value;
if (typeof rawValue !== "string" || rawValue.length === 0) throw new AgentRunError("schema-invalid", `transientEnv[${index}].value must be a non-empty string`, { httpStatus: 400 });
if (Buffer.byteLength(rawValue, "utf8") > 8192) throw new AgentRunError("schema-invalid", `transientEnv[${index}].value is too large`, { httpStatus: 400 });
return { name, value: rawValue, sensitive: true };
});
}
function summarizeTransientEnv(items: RunnerTransientEnv[]): JsonRecord {
return {
count: items.length,
names: items.map((item) => item.name),
valuesPrinted: false,
};
}
async function kubectlCreate(manifest: JsonRecord, kubectlCommand: string): Promise<JsonRecord> {
const child = spawn(kubectlCommand, ["create", "-f", "-", "-o", "json"], { stdio: ["pipe", "pipe", "pipe"] });
let stdout = "";
+40 -1
View File
@@ -15,9 +15,16 @@ export interface RunnerJobRenderOptions {
imagePullPolicy?: string;
backoffLimit?: number;
ttlSecondsAfterFinished?: number;
transientEnv?: RunnerTransientEnv[];
dryRun?: boolean;
}
export interface RunnerTransientEnv {
name: string;
value: string;
sensitive?: boolean;
}
interface CredentialProjection {
profile: BackendProfile | string;
secretRef: SecretRef;
@@ -28,6 +35,7 @@ interface CredentialProjection {
export function renderRunnerJobDryRun(options: RunnerJobRenderOptions): JsonRecord {
const render = renderRunnerJobManifest({ ...options, dryRun: true });
const manifest = redactTransientEnvInManifest(render.manifest, options.transientEnv ?? []);
return {
dryRun: true,
mutation: false,
@@ -48,6 +56,7 @@ export function renderRunnerJobDryRun(options: RunnerJobRenderOptions): JsonReco
sourceCommit: render.sourceCommit,
},
secretRefs: render.secretRefs.map((item) => ({ profile: item.profile, name: item.secretRef.name, namespace: item.secretRef.namespace ?? render.namespace, keys: item.secretRef.keys ?? [], mountPath: item.runtimeMountPath, projectionPath: item.projectionMountPath, writableCopy: true, valuesPrinted: false })),
transientEnv: summarizeTransientEnv(options.transientEnv ?? []),
retention: {
ttlSecondsAfterFinished: render.ttlSecondsAfterFinished,
},
@@ -56,7 +65,7 @@ export function renderRunnerJobDryRun(options: RunnerJobRenderOptions): JsonReco
events: `./scripts/agentrun runs events ${options.run.id} --manager-url ${options.managerUrl} --after-seq 0 --limit 100`,
},
warnings: render.warnings,
manifest: render.manifest,
manifest,
};
}
@@ -151,9 +160,39 @@ function runnerEnv(options: RunnerJobRenderOptions, context: { namespace: string
{ name: "HOME", value: "/home/agentrun" },
{ name: "CODEX_HOME", value: codexHome },
...(selectedSecret ? [{ name: "AGENTRUN_CODEX_SECRET_HOME", value: selectedSecret.projectionMountPath }] : []),
...transientEnvVars(options.transientEnv ?? []),
];
}
function transientEnvVars(items: RunnerTransientEnv[]): JsonRecord[] {
return items.map((item) => ({ name: item.name, value: item.value }));
}
function summarizeTransientEnv(items: RunnerTransientEnv[]): JsonRecord {
return {
count: items.length,
names: items.map((item) => item.name),
valuesPrinted: false,
};
}
function redactTransientEnvInManifest(manifest: JsonRecord, items: RunnerTransientEnv[]): JsonRecord {
if (items.length === 0) return manifest;
const names = new Set(items.map((item) => item.name));
const copy = JSON.parse(JSON.stringify(manifest)) as JsonRecord;
const spec = copy.spec as JsonRecord | undefined;
const template = spec?.template as JsonRecord | undefined;
const podSpec = template?.spec as JsonRecord | undefined;
const containers = Array.isArray(podSpec?.containers) ? podSpec.containers as JsonRecord[] : [];
for (const container of containers) {
const env = Array.isArray(container.env) ? container.env as JsonRecord[] : [];
for (const entry of env) {
if (typeof entry.name === "string" && names.has(entry.name)) entry.value = "REDACTED";
}
}
return copy;
}
function credentialProjections(run: RunRecord, namespace: string): CredentialProjection[] {
const policy: ExecutionPolicy = run.executionPolicy;
const credentials = (policy.secretScope.providerCredentials ?? []).filter((item) => item.profile === run.backendProfile);
+25 -2
View File
@@ -20,12 +20,15 @@ const selfTest: SelfTestCase = async (context) => {
image: "127.0.0.1:5000/agentrun/agentrun-mgr@sha256:1111111111111111111111111111111111111111111111111111111111111111",
attemptId: "attempt_selftest",
sourceCommit: "self-test",
transientEnv: [{ name: "HWLAB_DEVICE_POD_SESSION_TOKEN", value: "test-token-material", sensitive: true }],
});
assert.equal(rendered.dryRun, true);
assert.equal(rendered.mutation, false);
assert.equal(((rendered.retention as JsonRecord).ttlSecondsAfterFinished), 86_400);
assert.equal((rendered.jobIdentity as { serviceAccountName?: string }).serviceAccountName, "agentrun-v01-runner");
assertRunnerJobUsesWritableCodexHome(rendered.manifest as JsonRecord, context.codexHome, "codex-0", "/var/run/agentrun/secrets/codex-0");
assert.equal(runnerEnvValue(rendered.manifest as JsonRecord, "HWLAB_DEVICE_POD_SESSION_TOKEN"), "REDACTED");
assert.deepEqual((((rendered.transientEnv as JsonRecord).names) as string[]), ["HWLAB_DEVICE_POD_SESSION_TOKEN"]);
assertNoSecretLeak(rendered);
const deepseekItem = await createRunWithCommand(client, { ...context, backendProfile: "deepseek" }, "deepseek job smoke", "selftest-deepseek-job-render", 15_000);
@@ -68,16 +71,26 @@ console.log(JSON.stringify({ apiVersion: manifest.apiVersion, kind: manifest.kin
try {
const jobClient = new ManagerClient(serverWithKubectl.baseUrl);
const jobItem = await createRunWithCommand(jobClient, context, "job create smoke", "selftest-job-create", 15_000);
const created = await jobClient.post(`/api/v1/runs/${jobItem.runId}/runner-jobs`, { commandId: jobItem.commandId, attemptId: "attempt_selftest_create" });
const created = await jobClient.post(`/api/v1/runs/${jobItem.runId}/runner-jobs`, {
commandId: jobItem.commandId,
attemptId: "attempt_selftest_create",
transientEnv: [
{ name: "HWLAB_DEVICE_POD_SESSION_TOKEN", value: "test-token-material", sensitive: true },
{ name: "HWLAB_CLOUD_API_URL", value: "http://cloud.test", sensitive: true },
],
});
assert.equal((created as { mutation?: unknown }).mutation, true);
assert.equal(((created as JsonRecord).retention as JsonRecord).ttlSecondsAfterFinished, 86_400);
assert.deepEqual((((created as JsonRecord).transientEnv as JsonRecord).names) as string[], ["HWLAB_DEVICE_POD_SESSION_TOKEN", "HWLAB_CLOUD_API_URL"]);
const manifest = JSON.parse(await readFile(createdManifest, "utf8")) as JsonRecord;
assert.equal((manifest.spec as JsonRecord).ttlSecondsAfterFinished, 86_400);
assert.equal(runnerEnvValue(manifest, "HWLAB_DEVICE_POD_SESSION_TOKEN"), "test-token-material");
assert.equal(runnerEnvValue(manifest, "HWLAB_CLOUD_API_URL"), "http://cloud.test");
assertNoSecretLeak(created);
} finally {
await new Promise<void>((resolve) => serverWithKubectl.server.close(() => resolve()));
}
return { name: "runner-k8s-job", tests: ["runner-k8s-job-dry-run", "runner-k8s-job-deepseek-profile-dry-run", "runner-k8s-job-create-api", "runner-k8s-job-retention-ttl"] };
return { name: "runner-k8s-job", tests: ["runner-k8s-job-dry-run", "runner-k8s-job-deepseek-profile-dry-run", "runner-k8s-job-create-api", "runner-k8s-job-retention-ttl", "runner-job-transient-env"] };
} finally {
await new Promise<void>((resolve) => server.server.close(() => resolve()));
}
@@ -85,6 +98,16 @@ console.log(JSON.stringify({ apiVersion: manifest.apiVersion, kind: manifest.kin
export default selfTest;
function runnerEnvValue(manifest: JsonRecord, name: string): unknown {
const spec = manifest.spec as JsonRecord;
const template = spec.template as JsonRecord;
const podSpec = template.spec as JsonRecord;
const containers = podSpec.containers as JsonRecord[];
const runner = containers[0] as JsonRecord;
const env = runner.env as JsonRecord[];
return env.find((item) => item.name === name)?.value;
}
function assertRunnerJobUsesWritableCodexHome(manifest: JsonRecord, expectedCodexHome: string, volumeName: string, projectionPath: string): void {
const spec = manifest.spec as JsonRecord;
const template = spec.template as JsonRecord;