This commit is contained in:
2026-02-19 00:33:08 -08:00
parent e37f3dd7b1
commit 70dd0779f2
143 changed files with 31888 additions and 0 deletions

View File

@@ -0,0 +1,237 @@
#!/usr/bin/env python3
"""
Fetch all PR conversation comments + reviews + review threads (inline threads)
for the PR associated with the current git branch, by shelling out to:
gh api graphql
Requires:
- `gh auth login` already set up
- current branch has an associated (open) PR
Usage:
python fetch_comments.py > pr_comments.json
"""
from __future__ import annotations
import json
import subprocess
import sys
from typing import Any
QUERY = """\
query(
$owner: String!,
$repo: String!,
$number: Int!,
$commentsCursor: String,
$reviewsCursor: String,
$threadsCursor: String
) {
repository(owner: $owner, name: $repo) {
pullRequest(number: $number) {
number
url
title
state
# Top-level "Conversation" comments (issue comments on the PR)
comments(first: 100, after: $commentsCursor) {
pageInfo { hasNextPage endCursor }
nodes {
id
body
createdAt
updatedAt
author { login }
}
}
# Review submissions (Approve / Request changes / Comment), with body if present
reviews(first: 100, after: $reviewsCursor) {
pageInfo { hasNextPage endCursor }
nodes {
id
state
body
submittedAt
author { login }
}
}
# Inline review threads (grouped), includes resolved state
reviewThreads(first: 100, after: $threadsCursor) {
pageInfo { hasNextPage endCursor }
nodes {
id
isResolved
isOutdated
path
line
diffSide
startLine
startDiffSide
originalLine
originalStartLine
resolvedBy { login }
comments(first: 100) {
nodes {
id
body
createdAt
updatedAt
author { login }
}
}
}
}
}
}
}
"""
def _run(cmd: list[str], stdin: str | None = None) -> str:
p = subprocess.run(cmd, input=stdin, capture_output=True, text=True)
if p.returncode != 0:
raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{p.stderr}")
return p.stdout
def _run_json(cmd: list[str], stdin: str | None = None) -> dict[str, Any]:
out = _run(cmd, stdin=stdin)
try:
return json.loads(out)
except json.JSONDecodeError as e:
raise RuntimeError(f"Failed to parse JSON from command output: {e}\nRaw:\n{out}") from e
def _ensure_gh_authenticated() -> None:
try:
_run(["gh", "auth", "status"])
except RuntimeError:
print("run `gh auth login` to authenticate the GitHub CLI", file=sys.stderr)
raise RuntimeError("gh auth status failed; run `gh auth login` to authenticate the GitHub CLI") from None
def gh_pr_view_json(fields: str) -> dict[str, Any]:
# fields is a comma-separated list like: "number,headRepositoryOwner,headRepository"
return _run_json(["gh", "pr", "view", "--json", fields])
def get_current_pr_ref() -> tuple[str, str, int]:
"""
Resolve the PR for the current branch (whatever gh considers associated).
Works for cross-repo PRs too, by reading head repository owner/name.
"""
pr = gh_pr_view_json("number,headRepositoryOwner,headRepository")
owner = pr["headRepositoryOwner"]["login"]
repo = pr["headRepository"]["name"]
number = int(pr["number"])
return owner, repo, number
def gh_api_graphql(
owner: str,
repo: str,
number: int,
comments_cursor: str | None = None,
reviews_cursor: str | None = None,
threads_cursor: str | None = None,
) -> dict[str, Any]:
"""
Call `gh api graphql` using -F variables, avoiding JSON blobs with nulls.
Query is passed via stdin using query=@- to avoid shell newline/quoting issues.
"""
cmd = [
"gh",
"api",
"graphql",
"-F",
"query=@-",
"-F",
f"owner={owner}",
"-F",
f"repo={repo}",
"-F",
f"number={number}",
]
if comments_cursor:
cmd += ["-F", f"commentsCursor={comments_cursor}"]
if reviews_cursor:
cmd += ["-F", f"reviewsCursor={reviews_cursor}"]
if threads_cursor:
cmd += ["-F", f"threadsCursor={threads_cursor}"]
return _run_json(cmd, stdin=QUERY)
def fetch_all(owner: str, repo: str, number: int) -> dict[str, Any]:
conversation_comments: list[dict[str, Any]] = []
reviews: list[dict[str, Any]] = []
review_threads: list[dict[str, Any]] = []
comments_cursor: str | None = None
reviews_cursor: str | None = None
threads_cursor: str | None = None
pr_meta: dict[str, Any] | None = None
while True:
payload = gh_api_graphql(
owner=owner,
repo=repo,
number=number,
comments_cursor=comments_cursor,
reviews_cursor=reviews_cursor,
threads_cursor=threads_cursor,
)
if "errors" in payload and payload["errors"]:
raise RuntimeError(f"GitHub GraphQL errors:\n{json.dumps(payload['errors'], indent=2)}")
pr = payload["data"]["repository"]["pullRequest"]
if pr_meta is None:
pr_meta = {
"number": pr["number"],
"url": pr["url"],
"title": pr["title"],
"state": pr["state"],
"owner": owner,
"repo": repo,
}
c = pr["comments"]
r = pr["reviews"]
t = pr["reviewThreads"]
conversation_comments.extend(c.get("nodes") or [])
reviews.extend(r.get("nodes") or [])
review_threads.extend(t.get("nodes") or [])
comments_cursor = c["pageInfo"]["endCursor"] if c["pageInfo"]["hasNextPage"] else None
reviews_cursor = r["pageInfo"]["endCursor"] if r["pageInfo"]["hasNextPage"] else None
threads_cursor = t["pageInfo"]["endCursor"] if t["pageInfo"]["hasNextPage"] else None
if not (comments_cursor or reviews_cursor or threads_cursor):
break
assert pr_meta is not None
return {
"pull_request": pr_meta,
"conversation_comments": conversation_comments,
"reviews": reviews,
"review_threads": review_threads,
}
def main() -> None:
_ensure_gh_authenticated()
owner, repo, number = get_current_pr_ref()
result = fetch_all(owner, repo, number)
print(json.dumps(result, indent=2))
if __name__ == "__main__":
main()