diff --git a/scripts/analyze-github-merged-pr-emails.ts b/scripts/analyze-github-merged-pr-emails.ts new file mode 100644 index 00000000..a6f9c38d --- /dev/null +++ b/scripts/analyze-github-merged-pr-emails.ts @@ -0,0 +1,380 @@ +#!/usr/bin/env bun + +type OutputFormat = "table" | "json" | "csv"; + +type Options = { + repo: string; + output: OutputFormat; + since: Date | null; + until: Date | null; + limit: number | null; + examples: number; + caseSensitive: boolean; + token: string | null; +}; + +type PullRecord = { + number: number; + title: string; + url: string; + mergedAt: string; + headOid: string | null; + email: string | null; + authorName: string | null; + authorLogin: string | null; +}; + +type EmailStat = { + email: string; + count: number; + prs: PullRecord[]; +}; + +const defaultRepo = "pikasTech/PikaPython"; + +async function main(): Promise { + const options = parseArgs(Bun.argv.slice(2)); + const repo = parseRepo(options.repo); + const pulls = await fetchMergedPulls(repo.owner, repo.name, options); + const filtered = pulls.filter((pr) => withinDateRange(pr.mergedAt, options.since, options.until)); + const selected = options.limit === null ? filtered : filtered.slice(0, options.limit); + const stats = summarize(selected, options); + + if (options.output === "json") { + printJson(options, selected, stats); + } else if (options.output === "csv") { + printCsv(selected, stats); + } else { + printTable(options, selected, stats); + } +} + +function parseArgs(args: string[]): Options { + const options: Options = { + repo: defaultRepo, + output: "table", + since: null, + until: null, + limit: null, + examples: 6, + caseSensitive: false, + token: process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN ?? null, + }; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]; + if (arg === "--help" || arg === "-h") { + printHelp(); + process.exit(0); + } + if (arg === "--repo") { + options.repo = requireValue(args, ++index, arg); + } else if (arg.startsWith("--repo=")) { + options.repo = arg.slice("--repo=".length); + } else if (arg === "--json") { + options.output = "json"; + } else if (arg === "--csv") { + options.output = "csv"; + } else if (arg === "--since") { + options.since = parseDateArg(requireValue(args, ++index, arg), arg); + } else if (arg.startsWith("--since=")) { + options.since = parseDateArg(arg.slice("--since=".length), "--since"); + } else if (arg === "--until") { + options.until = parseDateArg(requireValue(args, ++index, arg), arg); + } else if (arg.startsWith("--until=")) { + options.until = parseDateArg(arg.slice("--until=".length), "--until"); + } else if (arg === "--limit") { + options.limit = parsePositiveInt(requireValue(args, ++index, arg), arg); + } else if (arg.startsWith("--limit=")) { + options.limit = parsePositiveInt(arg.slice("--limit=".length), "--limit"); + } else if (arg === "--examples") { + options.examples = parsePositiveInt(requireValue(args, ++index, arg), arg); + } else if (arg.startsWith("--examples=")) { + options.examples = parsePositiveInt(arg.slice("--examples=".length), "--examples"); + } else if (arg === "--case-sensitive") { + options.caseSensitive = true; + } else if (arg === "--token") { + options.token = requireValue(args, ++index, arg); + } else if (arg.startsWith("--token=")) { + options.token = arg.slice("--token=".length); + } else { + throw new Error(`unknown argument: ${arg}`); + } + } + + return options; +} + +function printHelp(): void { + console.log(`Analyze merged GitHub PR counts by head commit author email. + +Usage: + bun scripts/analyze-github-merged-pr-emails.ts [options] + +Options: + --repo owner/name GitHub repo. Default: ${defaultRepo} + --since YYYY-MM-DD Include PRs merged at or after this UTC date. + --until YYYY-MM-DD Include PRs merged before or at this UTC date. + --limit N Limit merged PR records after date filtering. + --examples N Number of example PR numbers in table output. Default: 6 + --json Print machine-readable JSON. + --csv Print CSV. + --case-sensitive Do not lowercase emails before grouping. + --token TOKEN GitHub token. Defaults to GITHUB_TOKEN or GH_TOKEN. + +Definition: + A real PR is a pull request whose GitHub state is MERGED. + Each merged PR is counted once, using the author email of the PR head commit. +`); +} + +function requireValue(args: string[], index: number, flag: string): string { + const value = args[index]; + if (value === undefined || value.startsWith("--")) { + throw new Error(`${flag} requires a value`); + } + return value; +} + +function parseDateArg(value: string, flag: string): Date { + const normalized = /^\d{4}-\d{2}-\d{2}$/.test(value) ? `${value}T00:00:00.000Z` : value; + const parsed = new Date(normalized); + if (Number.isNaN(parsed.getTime())) { + throw new Error(`${flag} must be a valid date, got: ${value}`); + } + return parsed; +} + +function parsePositiveInt(value: string, flag: string): number { + const parsed = Number(value); + if (!Number.isInteger(parsed) || parsed < 1) { + throw new Error(`${flag} must be a positive integer, got: ${value}`); + } + return parsed; +} + +function parseRepo(repo: string): { owner: string; name: string } { + const cleaned = repo.replace(/^https:\/\/github\.com\//, "").replace(/\.git$/, ""); + const parts = cleaned.split("/"); + if (parts.length !== 2 || parts[0] === "" || parts[1] === "") { + throw new Error(`repo must be owner/name or github.com URL, got: ${repo}`); + } + return { owner: parts[0], name: parts[1] }; +} + +async function fetchMergedPulls(owner: string, name: string, options: Options): Promise { + if (options.token === null || options.token.trim() === "") { + throw new Error("GitHub GraphQL requires a token; set GITHUB_TOKEN/GH_TOKEN or pass --token"); + } + + const query = ` +query($owner: String!, $name: String!, $cursor: String) { + repository(owner: $owner, name: $name) { + pullRequests(states: MERGED, first: 100, after: $cursor, orderBy: {field: CREATED_AT, direction: DESC}) { + nodes { + number + title + url + mergedAt + commits(last: 1) { + nodes { + commit { + oid + author { + name + email + user { login } + } + } + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + rateLimit { + remaining + resetAt + } +}`; + + const pulls: PullRecord[] = []; + let cursor: string | null = null; + + while (true) { + const payload = await githubGraphql(options.token, query, { owner, name, cursor }); + const repository = asRecord(payload.data)?.repository; + const pullRequests = asRecord(repository)?.pullRequests; + const nodes = asArray(asRecord(pullRequests)?.nodes); + for (const node of nodes) { + const pr = parsePullNode(node); + if (pr !== null) pulls.push(pr); + } + const pageInfo = asRecord(asRecord(pullRequests)?.pageInfo); + const hasNextPage = pageInfo?.hasNextPage === true; + cursor = typeof pageInfo?.endCursor === "string" ? pageInfo.endCursor : null; + if (!hasNextPage || cursor === null) break; + } + + return pulls; +} + +async function githubGraphql(token: string, query: string, variables: Record): Promise> { + const response = await fetch("https://api.github.com/graphql", { + method: "POST", + headers: { + "Accept": "application/vnd.github+json", + "Authorization": `Bearer ${token}`, + "Content-Type": "application/json", + "User-Agent": "unidesk-pr-email-stats", + "X-GitHub-Api-Version": "2022-11-28", + }, + body: JSON.stringify({ query, variables }), + }); + const body = await response.text(); + let parsed: unknown; + try { + parsed = JSON.parse(body); + } catch { + throw new Error(`GitHub GraphQL returned non-JSON status=${response.status}: ${body.slice(0, 500)}`); + } + const record = asRecord(parsed); + if (record === null) { + throw new Error("GitHub GraphQL returned a non-object response"); + } + if (!response.ok || record.errors !== undefined) { + throw new Error(`GitHub GraphQL failed status=${response.status}: ${JSON.stringify(record.errors ?? record)}`); + } + return record; +} + +function parsePullNode(node: unknown): PullRecord | null { + const record = asRecord(node); + if (record === null) return null; + const number = typeof record.number === "number" ? record.number : null; + const title = typeof record.title === "string" ? record.title : ""; + const url = typeof record.url === "string" ? record.url : ""; + const mergedAt = typeof record.mergedAt === "string" ? record.mergedAt : null; + const commitNode = asArray(asRecord(record.commits)?.nodes)[0]; + const commit = asRecord(asRecord(commitNode)?.commit); + const author = asRecord(commit?.author); + if (number === null || mergedAt === null) return null; + return { + number, + title, + url, + mergedAt, + headOid: typeof commit?.oid === "string" ? commit.oid : null, + email: typeof author?.email === "string" && author.email.trim() !== "" ? author.email : null, + authorName: typeof author?.name === "string" && author.name.trim() !== "" ? author.name : null, + authorLogin: typeof asRecord(author?.user)?.login === "string" ? String(asRecord(author?.user)?.login) : null, + }; +} + +function withinDateRange(value: string, since: Date | null, until: Date | null): boolean { + const time = Date.parse(value); + if (Number.isNaN(time)) return false; + if (since !== null && time < since.getTime()) return false; + if (until !== null && time > until.getTime()) return false; + return true; +} + +function summarize(pulls: PullRecord[], options: Options): EmailStat[] { + const byEmail = new Map(); + for (const pr of pulls) { + const rawEmail = pr.email ?? "(unknown)"; + const email = options.caseSensitive ? rawEmail : rawEmail.toLowerCase(); + const list = byEmail.get(email) ?? []; + list.push(pr); + byEmail.set(email, list); + } + return [...byEmail.entries()] + .map(([email, prs]) => ({ email, count: prs.length, prs })) + .sort((left, right) => right.count - left.count || left.email.localeCompare(right.email)); +} + +function printTable(options: Options, pulls: PullRecord[], stats: EmailStat[]): void { + console.log(`MERGED_PR_EMAIL_STATS repo=${options.repo} source=head-commit-author total=${pulls.length}`); + if (options.since !== null || options.until !== null) { + console.log(`RANGE since=${options.since?.toISOString() ?? "-"} until=${options.until?.toISOString() ?? "-"}`); + } + console.log(""); + const rows = stats.map((stat) => ({ + EMAIL: stat.email, + PRS: String(stat.count), + PERCENT: pulls.length === 0 ? "0.0%" : `${((stat.count / pulls.length) * 100).toFixed(1)}%`, + EXAMPLES: stat.prs.slice(0, options.examples).map((pr) => `#${pr.number}`).join(","), + })); + printRows(rows, ["EMAIL", "PRS", "PERCENT", "EXAMPLES"]); + console.log(""); + console.log("Definition: merged PRs only; one PR counts once under the author email of its head commit."); +} + +function printRows(rows: Array>, columns: string[]): void { + const widths = new Map(); + for (const column of columns) { + widths.set(column, Math.max(column.length, ...rows.map((row) => row[column]?.length ?? 0))); + } + const line = (row: Record): string => columns + .map((column) => (row[column] ?? "").padEnd(widths.get(column) ?? column.length)) + .join(" ") + .trimEnd(); + console.log(line(Object.fromEntries(columns.map((column) => [column, column])))); + console.log(columns.map((column) => "-".repeat(widths.get(column) ?? column.length)).join(" ")); + for (const row of rows) console.log(line(row)); +} + +function printJson(options: Options, pulls: PullRecord[], stats: EmailStat[]): void { + console.log(JSON.stringify({ + repo: options.repo, + source: "head-commit-author", + totalMergedPullRequests: pulls.length, + since: options.since?.toISOString() ?? null, + until: options.until?.toISOString() ?? null, + stats: stats.map((stat) => ({ + email: stat.email, + count: stat.count, + percent: pulls.length === 0 ? 0 : stat.count / pulls.length, + pullRequests: stat.prs.map((pr) => ({ + number: pr.number, + mergedAt: pr.mergedAt, + headOid: pr.headOid, + authorName: pr.authorName, + authorLogin: pr.authorLogin, + title: pr.title, + url: pr.url, + })), + })), + }, null, 2)); +} + +function printCsv(pulls: PullRecord[], stats: EmailStat[]): void { + console.log(["email", "count", "percent", "pull_requests"].join(",")); + for (const stat of stats) { + const percent = pulls.length === 0 ? "0" : String(stat.count / pulls.length); + const prs = stat.prs.map((pr) => `#${pr.number}`).join(" "); + console.log([csv(stat.email), stat.count, percent, csv(prs)].join(",")); + } +} + +function csv(value: string): string { + if (!/[",\n]/.test(value)) return value; + return `"${value.replaceAll("\"", "\"\"")}"`; +} + +function asRecord(value: unknown): Record | null { + return typeof value === "object" && value !== null && !Array.isArray(value) ? value as Record : null; +} + +function asArray(value: unknown): unknown[] { + return Array.isArray(value) ? value : []; +} + +main().catch((error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + console.error(`error: ${message}`); + process.exit(1); +});