#!/usr/bin/env python3 """ Fetch all PR conversation comments + reviews + review threads (inline threads) for the PR associated with the current git branch, by shelling out to: gh api graphql Requires: - `gh auth login` already set up - current branch has an associated (open) PR Usage: python fetch_comments.py > pr_comments.json """ from __future__ import annotations import json import subprocess import sys from typing import Any QUERY = """\ query( $owner: String!, $repo: String!, $number: Int!, $commentsCursor: String, $reviewsCursor: String, $threadsCursor: String ) { repository(owner: $owner, name: $repo) { pullRequest(number: $number) { number url title state # Top-level "Conversation" comments (issue comments on the PR) comments(first: 100, after: $commentsCursor) { pageInfo { hasNextPage endCursor } nodes { id body createdAt updatedAt author { login } } } # Review submissions (Approve / Request changes / Comment), with body if present reviews(first: 100, after: $reviewsCursor) { pageInfo { hasNextPage endCursor } nodes { id state body submittedAt author { login } } } # Inline review threads (grouped), includes resolved state reviewThreads(first: 100, after: $threadsCursor) { pageInfo { hasNextPage endCursor } nodes { id isResolved isOutdated path line diffSide startLine startDiffSide originalLine originalStartLine resolvedBy { login } comments(first: 100) { nodes { id body createdAt updatedAt author { login } } } } } } } } """ def _run(cmd: list[str], stdin: str | None = None) -> str: p = subprocess.run(cmd, input=stdin, capture_output=True, text=True) if p.returncode != 0: raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{p.stderr}") return p.stdout def _run_json(cmd: list[str], stdin: str | None = None) -> dict[str, Any]: out = _run(cmd, stdin=stdin) try: return json.loads(out) except json.JSONDecodeError as e: raise RuntimeError(f"Failed to parse JSON from command output: {e}\nRaw:\n{out}") from e def _ensure_gh_authenticated() -> None: try: _run(["gh", "auth", "status"]) except RuntimeError: print("run `gh auth login` to authenticate the GitHub CLI", file=sys.stderr) raise RuntimeError("gh auth status failed; run `gh auth login` to authenticate the GitHub CLI") from None def gh_pr_view_json(fields: str) -> dict[str, Any]: # fields is a comma-separated list like: "number,headRepositoryOwner,headRepository" return _run_json(["gh", "pr", "view", "--json", fields]) def get_current_pr_ref() -> tuple[str, str, int]: """ Resolve the PR for the current branch (whatever gh considers associated). Works for cross-repo PRs too, by reading head repository owner/name. """ pr = gh_pr_view_json("number,headRepositoryOwner,headRepository") owner = pr["headRepositoryOwner"]["login"] repo = pr["headRepository"]["name"] number = int(pr["number"]) return owner, repo, number def gh_api_graphql( owner: str, repo: str, number: int, comments_cursor: str | None = None, reviews_cursor: str | None = None, threads_cursor: str | None = None, ) -> dict[str, Any]: """ Call `gh api graphql` using -F variables, avoiding JSON blobs with nulls. Query is passed via stdin using query=@- to avoid shell newline/quoting issues. """ cmd = [ "gh", "api", "graphql", "-F", "query=@-", "-F", f"owner={owner}", "-F", f"repo={repo}", "-F", f"number={number}", ] if comments_cursor: cmd += ["-F", f"commentsCursor={comments_cursor}"] if reviews_cursor: cmd += ["-F", f"reviewsCursor={reviews_cursor}"] if threads_cursor: cmd += ["-F", f"threadsCursor={threads_cursor}"] return _run_json(cmd, stdin=QUERY) def fetch_all(owner: str, repo: str, number: int) -> dict[str, Any]: conversation_comments: list[dict[str, Any]] = [] reviews: list[dict[str, Any]] = [] review_threads: list[dict[str, Any]] = [] comments_cursor: str | None = None reviews_cursor: str | None = None threads_cursor: str | None = None pr_meta: dict[str, Any] | None = None while True: payload = gh_api_graphql( owner=owner, repo=repo, number=number, comments_cursor=comments_cursor, reviews_cursor=reviews_cursor, threads_cursor=threads_cursor, ) if "errors" in payload and payload["errors"]: raise RuntimeError(f"GitHub GraphQL errors:\n{json.dumps(payload['errors'], indent=2)}") pr = payload["data"]["repository"]["pullRequest"] if pr_meta is None: pr_meta = { "number": pr["number"], "url": pr["url"], "title": pr["title"], "state": pr["state"], "owner": owner, "repo": repo, } c = pr["comments"] r = pr["reviews"] t = pr["reviewThreads"] conversation_comments.extend(c.get("nodes") or []) reviews.extend(r.get("nodes") or []) review_threads.extend(t.get("nodes") or []) comments_cursor = c["pageInfo"]["endCursor"] if c["pageInfo"]["hasNextPage"] else None reviews_cursor = r["pageInfo"]["endCursor"] if r["pageInfo"]["hasNextPage"] else None threads_cursor = t["pageInfo"]["endCursor"] if t["pageInfo"]["hasNextPage"] else None if not (comments_cursor or reviews_cursor or threads_cursor): break assert pr_meta is not None return { "pull_request": pr_meta, "conversation_comments": conversation_comments, "reviews": reviews, "review_threads": review_threads, } def main() -> None: _ensure_gh_authenticated() owner, repo, number = get_current_pr_ref() result = fetch_all(owner, repo, number) print(json.dumps(result, indent=2)) if __name__ == "__main__": main()