ref(plugins/languages): indepth mode (#1118)

This commit is contained in:
Simon Lecoq
2022-10-16 13:58:41 -04:00
committed by GitHub
parent 85d8187c78
commit e863269d79
15 changed files with 779 additions and 351 deletions

View File

@@ -0,0 +1,182 @@
//Imports
import fs from "fs/promises"
import os from "os"
import paths from "path"
import git from "simple-git"
import {filters} from "../../../app/metrics/utils.mjs"
/**Analyzer */
export class Analyzer {
/**Constructor */
constructor(login, {account = "bypass", authoring = [], uid = Math.random(), shell, rest = null, context = {mode:"user"}, skipped = [], categories = ["programming", "markup"], timeout = {global:NaN, repositories:NaN}}) {
//User informations
this.login = login
this.account = account
this.authoring = authoring
this.uid = uid
this.gpg = []
//Utilities
this.shell = shell
this.rest = rest
this.context = context
this.markers = {
hash:/\b[0-9a-f]{40}\b/,
file:/^[+]{3}\sb[/](?<file>[\s\S]+)$/,
line:/^(?<op>[-+])\s*(?<content>[\s\S]+)$/,
}
this.parser = /^(?<login>[\s\S]+?)\/(?<name>[\s\S]+?)(?:@(?<branch>[\s\S]+?)(?::(?<ref>[\s\S]+))?)?$/
this.consumed = false
//Options
this.skipped = skipped
this.categories = categories
this.timeout = timeout
//Results
this.results = {partial: {global:false, repositories:false}, total: 0, lines: {}, stats: {}, colors: {}, commits: 0, files: 0, missed: {lines: 0, bytes: 0, commits: 0}, elapsed:0}
this.debug(`instantiated a new ${this.constructor.name}`)
}
/**Run analyzer */
async run(runner) {
if (this.consumed)
throw new Error("This analyzer has already been consumed, another instance needs to be created to perform a new analysis")
this.consumed = true
const results = await new Promise(async solve => {
let completed = false
if (Number.isFinite(this.timeout.global)) {
this.debug(`timeout set to ${this.timeout.global}m`)
setTimeout(() => {
if (!completed) {
try {
this.debug(`reached maximum execution time of ${this.timeout.global}m for analysis`)
this.results.partial.global = true
solve(this.results)
}
catch {
//Ignore errors
}
}
}, this.timeout.global * 60 * 1000)
}
await runner()
completed = true
solve(this.results)
})
results.partial = (results.partial.global)||(results.partial.repositories)
return results
}
/**Parse repository */
parse(repository) {
let branch = null, ref = null
if (typeof repository === "string") {
if (!this.parser.test(repository))
throw new TypeError(`"${repository}" pattern is not supported`)
const {login, name, ...groups} = repository.match(this.parser)?.groups ?? {}
repository = {owner:{login}, name}
branch = groups.branch ?? null
ref = groups.ref ?? null
}
const repo = `${repository.owner.login}/${repository.name}`
const path = paths.join(os.tmpdir(), `${this.uid}-${repo.replace(/[^\w]/g, "_")}`)
return {repo, path, branch, ref}
}
/**Clone a repository */
async clone(repository) {
const {repo, branch, path} = this.parse(repository)
let url = /^https?:\/\//.test(repo) ? repo : `https://github.com/${repo}`
try {
this.debug(`cloning ${url} to ${path}`)
await fs.rm(path, {recursive: true, force: true})
await fs.mkdir(path, {recursive: true})
await git(path).clone(url, ".", ["--single-branch"]).status()
this.debug(`cloned ${url} to ${path}`)
if (branch) {
this.debug(`switching to branch ${branch} for ${repo}`)
await git(path).branch(branch)
}
return true
}
catch (error) {
this.debug(`failed to clone ${url} (${error})`)
this.clean(path)
return false
}
}
/**Analyze a repository */
async analyze(path, {commits = []} = {}) {
const cache = {files:{}, languages:{}}
const start = Date.now()
let elapsed = 0, processed = 0
if (this.timeout.repositories)
this.debug(`timeout for repository analysis set to ${this.timeout.repositories}m`)
for (const commit of commits) {
elapsed = (Date.now() - start)/1000/60
if ((this.timeout.repositories)&&(elapsed > this.timeout.repositories)) {
this.results.partial.repositories = true
this.debug(`reached maximum execution time of ${this.timeout.repositories}m for repository analysis (${elapsed}m elapsed)`)
break
}
try {
const {total, files, missed, lines, stats} = await this.linguist(path, {commit, cache})
this.results.commits++
this.results.total += total
this.results.files += files
this.results.missed.lines += missed.lines
this.results.missed.bytes += missed.bytes
for (const language in lines) {
if (this.categories.includes(cache.languages[language]?.type))
this.results.lines[language] = (this.results.lines[language] ?? 0) + lines[language]
}
for (const language in stats) {
if (this.categories.includes(cache.languages[language]?.type))
this.results.stats[language] = (this.results.stats[language] ?? 0) + stats[language]
}
}
catch (error) {
this.debug(`skipping commit ${commit.sha} (${error})`)
this.results.missed.commits++
}
finally {
this.results.elapsed += elapsed
processed++
if ((processed%50 === 0)||(processed === commits.length))
this.debug(`at commit ${processed}/${commits.length} (${(100*processed/commits.length).toFixed(2)}%, ${elapsed.toFixed(2)}m elapsed)`)
}
}
this.results.colors = Object.fromEntries(Object.entries(cache.languages).map(([lang, {color}]) => [lang, color]))
}
/**Clean a path */
async clean(path) {
try {
this.debug(`cleaning ${path}`)
await fs.rm(path, {recursive: true, force: true})
this.debug(`cleaned ${path}`)
return true
}
catch (error) {
this.debug(`failed to clean (${error})`)
return false
}
}
/**Whether to skip a repository or not */
ignore(repository) {
const ignored = !filters.repo(repository, this.skipped)
if (ignored)
this.debug(`skipping ${typeof repository === "string" ? repository : `${repository?.owner?.login}/${repository?.name}`} as it matches skipped repositories`)
return ignored
}
/**Debug log */
debug(message) {
return console.debug(`metrics/compute/${this.login}/plugins > languages > ${this.constructor.name.replace(/([a-z])([A-Z])/, (_, a, b) => `${a} ${b.toLocaleLowerCase()}`).toLocaleLowerCase()} > ${message}`)
}
}

View File

@@ -0,0 +1,63 @@
//Imports
import { IndepthAnalyzer } from "./indepth.mjs"
import { RecentAnalyzer } from "./recent.mjs"
import OctokitRest from "@octokit/rest"
import yargsparser from "yargs-parser"
const help = `
`.trim()
/**Cli */
export async function cli() {
//Parse inputs
console.log("== metrics indepth analyzer cli ====================")
const argv = yargsparser(process.argv.slice(2))
if (argv.help) {
console.log(help)
return null
}
const {default: setup} = await import("../../../app/metrics/setup.mjs")
const {conf: {metadata}} = await setup({log: false})
const {login, _:repositories, mode = "indepth"} = argv
const {
"commits.authoring": authoring,
} = await metadata.plugins.base.inputs({q:{
"commits.authoring": argv["commits-authoring"] || login,
}, account: "bypass"})
const {
categories,
"analysis.timeout":_timeout_global,
"analysis.timeout.repositories":_timeout_repositories,
"recent.load":_recent_load,
"recent.days":_recent_days,
} = await metadata.plugins.languages.inputs({q: {
categories:argv.categories || "",
"analysis.timeout": argv["timeout-global"] || "",
"analysis.timeout.repositories": argv["timeout-repositories"] || "",
"recent.load": argv["recent-load"] || "",
"recent.days": argv["recent-days"] || "",
}, account: "bypass"})
//Prepare call
const imports = await import("../../../app/metrics/utils.mjs")
const rest = argv.token ? new OctokitRest.Octokit({auth: argv.token, baseUrl: argv["api-url"]}) : null
//Language analysis
console.log(`analysis mode | ${mode}`)
console.log(`login | ${login}`)
console.log(`rest token | ${rest ? "(provided)" : "(none)"}`)
console.log(`commits authoring | ${authoring}`)
console.log(`analysis timeout (global) | ${_timeout_global}`)
switch (mode) {
case "recent":{
console.log(`events to load | ${_recent_load}`)
console.log(`events maximum age | ${_recent_days}`)
return new RecentAnalyzer(login, {rest, shell:imports, authoring, categories, timeout:{global:_timeout_global, repositories:_timeout_repositories}, load:_recent_load, days:_recent_days}).run({})
}
case "indepth":{
console.log(`repositories | ${repositories}`)
return new IndepthAnalyzer(login, {rest, shell:imports, authoring, categories, timeout:{global:_timeout_global, repositories:_timeout_repositories}}).run({repositories})
}
}
}

View File

@@ -0,0 +1,232 @@
//Imports
import { Analyzer } from "./analyzer.mjs"
import fs from "fs/promises"
import os from "os"
import paths from "path"
import linguist from "linguist-js"
/**Indepth analyzer */
export class IndepthAnalyzer extends Analyzer {
/**Constructor */
constructor() {
super(...arguments)
this.manual = {repositories:[]}
Object.assign(this.results, {verified: {signature: 0}})
}
/**Run analyzer */
run({repositories}) {
this.manual.repositories = repositories.filter(repo => typeof repo === "string")
return super.run(async () => {
await this.gpgarmor()
for (const repository of repositories) {
if (this.results.partial.global)
break
if (this.ignore(repository))
continue
if (await this.clone(repository)) {
const {path, ref} = this.parse(repository)
await this.analyze(path, {ref})
await this.clean(path)
}
}
})
}
/**Whether to skip a repository or not (bypass filter if repository was manually specified)*/
ignore(repository) {
if (this.manual.repositories.includes(repository)) {
this.debug(`${repository} has been specified manually, not skipping`)
return false
}
return super.ignore(repository)
}
/**Populate gpg keys */
async gpgarmor() {
//Fetch gpg keys (web-flow is GitHub's public key when making changes from web ui)
try {
this.debug("fetching gpg keys")
for (const username of [this.login, "web-flow"]) {
const {data: keys} = await this.rest.users.listGpgKeysForUser({username})
this.gpg.push(...keys.map(({key_id: id, raw_key: pub, emails}) => ({id, pub, emails})))
if (username === this.login) {
for (const {email} of this.gpg.flatMap(({emails}) => emails)) {
this.debug(`auto-adding ${email} to commits_authoring (fetched from gpg)`)
this.authoring.push(email)
}
}
}
this.debug(`fetched ${this.gpg.length} gpg keys`)
}
catch (error) {
this.debug(`an error occurred while fetching gpg keys (${error})`)
}
//Import gpg keys
for (const {id, pub} of this.gpg) {
const path = paths.join(os.tmpdir(), `${this.uid}.${id}.gpg`)
try {
this.debug(`saving gpg ${id} to ${path}`)
await fs.writeFile(path, pub)
await this.shell.run(`gpg ${path}`)
if (process.env.GITHUB_ACTIONS) {
this.debug(`importing gpg ${id}`)
await this.shell.run(`gpg --import ${path}`)
}
else
this.debug("skipping import of gpg keys as we are not in GitHub Actions environment")
}
catch (error) {
this.debug(`an error occurred while importing gpg ${id}, skipping...`)
}
finally {
this.debug(`cleaning ${path}`)
await fs.rm(path, {recursive: true, force: true}).catch(error => this.debug(`failed to clean ${path} (${error})`))
}
}
}
/**Filter related commits in repository */
async filter(path, {ref}) {
const commits = new Set()
try {
this.debug(`filtering commits authored by ${this.login} in ${path}`)
for (const author of this.authoring) {
//Search by --author
{
const output = await this.shell.run(`git log --author='${author}' --pretty=format:"%H" --regexp-ignore-case --no-merges`, {cwd:path, env: {LANG: "en_GB"}}, {log:false, debug:false, prefixed: false})
const hashes = output.split("\n").map(line => line.trim()).filter(line => this.markers.hash.test(line))
hashes.forEach(hash => commits.add(hash))
this.debug(`found ${hashes.length} for ${author} (using --author)`)
}
//Search by --grep
{
const output = await this.shell.run(`git log --grep='${author}' --pretty=format:"%H" --regexp-ignore-case --no-merges`, {cwd:path, env: {LANG: "en_GB"}}, {log:false, debug:false, prefixed: false})
const hashes = output.split("\n").map(line => line.trim()).filter(line => this.markers.hash.test(line))
hashes.forEach(hash => commits.add(hash))
this.debug(`found ${hashes.length} for ${author} (using --grep)`)
}
}
//Apply ref range if specified
if (ref) {
this.debug(`filtering commits referenced by ${ref} in ${path}`)
const output = await this.shell.run(`git rev-list --boundary ${ref}`, {cwd:path, env: {LANG: "en_GB"}}, {log:false, debug:false, prefixed: false})
const hashes = output.split("\n").map(line => line.trim()).filter(line => this.markers.hash.test(line))
commits.forEach(commit => !hashes.includes(commit) ? commits.delete(commit) : null)
}
this.debug(`found ${commits.size} unique commits authored by ${this.login} in ${path}`)
}
catch (error) {
this.debug(`an error occurred during filtering of commits authored by ${this.login} in ${path} (${error})`)
}
return [...commits]
}
/**Filter commits in repository */
async commits(path, {ref}) {
const shas = await this.filter(path, {ref})
const commits = []
for (const sha of shas) {
try {
commits.push({
sha,
name: await this.shell.run(`git log ${sha} --format="%s (authored by %an on %cI)" --max-count=1`, {cwd: path, env: {LANG: "en_GB"}}, {log: false, debug:false, prefixed: false}),
verified: ("verified" in this.results) ? await this.shell.run(`git verify-commit ${sha}`, {cwd: path, env: {LANG: "en_GB"}}, {log: false, debug:false, prefixed: false}).then(() => true).catch(() => null) : null,
editions: await this.editions(path, {sha}),
})
}
catch (error) {
this.debug(`skipping commit ${sha} (${error})`)
}
}
return commits
}
/**Fetch commit patch and format it by files*/
async editions(path, {sha}) {
const editions = []
let edition = null
let cursor = 0
await this.shell.spawn("git", ["log", sha, "--format=''", "--max-count=1", "--patch"], {cwd: path, env: {LANG: "en_GB"}}, {
debug:false,
stdout:line => {
try {
//Ignore empty lines or unneeded lines
cursor++
if ((!/^[-+]/.test(line)) || (!line.trim().length))
return
//File marker
if (this.markers.file.test(line)) {
edition = {
path: `${path}/${line.match(this.markers.file)?.groups?.file}`.replace(/\\/g, "/"),
added: {lines:0, bytes:0},
deleted: {lines:0, bytes:0},
}
editions.push(edition)
return
}
//Line markers
if ((edition)&&(this.markers.line.test(line))) {
const {op = "+", content = ""} = line.match(this.markers.line)?.groups ?? {}
const size = Buffer.byteLength(content, "utf-8")
edition[{"+":"added", "-":"deleted"}[op]].bytes += size
edition[{"+":"added", "-":"deleted"}[op]].lines++
return
}
}
catch (error) {
this.debug(`skipping line ${sha}#${cursor} (${error})`)
}
}
})
return editions
}
/**Analyze a repository */
async analyze(path, {ref} = {}) {
const commits = await this.commits(path, {ref})
return super.analyze(path, {commits})
}
/**Run linguist against a commit and compute edited lines and bytes*/
async linguist(path, {commit, cache}) {
const result = {total:0, files:0, missed:{lines:0, bytes:0}, lines:{}, stats:{}}
const edited = new Set()
const seen = new Set()
for (const edition of commit.editions) {
edited.add(edition.path)
//Guess file language with linguist (only run it once per sha)
if ((!(edition.path in cache.files))&&(!seen.has(commit.sha))) {
this.debug(`language for file ${edition.path} is not in cache, running linguist at ${commit.sha}`)
await this.shell.run(`git checkout ${commit.sha}`, {cwd: path, env: {LANG: "en_GB"}}, {log: false, debug:false, prefixed: false})
const {files: {results: files}, languages: {results: languages}} = await linguist(path)
Object.assign(cache.files, files)
Object.assign(cache.languages, languages)
seen.add(commit.sha)
}
if (!(edition.path in cache.files))
cache.files[edition.path] = "<unknown>"
//Aggregate statistics
const language = cache.files[edition.path]
edition.language = language
result.total += edition.added.bytes
if (language === "<unknown>") {
result.missed.lines += edition.added.lines
result.missed.bytes += edition.added.bytes
}
else {
result.lines[language] = (result.lines[language] ?? 0) + edition.added.lines
result.stats[language] = (result.stats[language] ?? 0) + edition.added.bytes
}
}
result.files = edited.size
return result
}
}

View File

@@ -0,0 +1,144 @@
//Imports
import { Analyzer } from "./analyzer.mjs"
import {filters} from "../../../app/metrics/utils.mjs"
import linguist from "linguist-js"
/**Recent analyzer */
export class RecentAnalyzer extends Analyzer {
/**Constructor */
constructor() {
super(...arguments)
this.days = arguments[1]?.days ?? 0
this.load = arguments[1]?.load ?? 0
Object.assign(this.results, {days:this.days})
}
/**Run analyzer */
run() {
return super.run(async () => {
await this.analyze("/dev/null")
})
}
/**Analyze a repository */
async analyze(path) {
const patches = await this.patches()
return super.analyze(path, {commits:patches})
}
/**Fetch patches */
async patches() {
//Fetch commits from recent activity
this.debug(`fetching patches from last ${this.days || ""} days up to ${this.load || "∞"} events`)
const commits = [], pages = Math.ceil((this.load || Infinity) / 100)
if (this.context.mode === "repository") {
try {
const {data:{default_branch:branch}} = await this.rest.repos.get(this.context)
this.context.branch = branch
this.results.branch = branch
this.debug(`default branch for ${this.context.owner}/${this.context.repo} is ${branch}`)
}
catch (error) {
this.debug(`failed to get default branch for ${this.context.owner}/${this.context.repo} (${error})`)
}
}
try {
for (let page = 1; page <= pages; page++) {
this.debug(`fetching events page ${page}`)
commits.push(
...(await (this.context.mode === "repository" ? this.rest.activity.listRepoEvents(this.context) : this.rest.activity.listEventsForAuthenticatedUser({username: this.login, per_page: 100, page}))).data
.filter(({type, payload}) => (type === "PushEvent")&&((this.context.mode !== "repository")||((this.context.mode === "repository")&&(payload?.ref?.includes?.(`refs/heads/${this.context.branch}`)))))
.filter(({actor}) => (this.account === "organization")||(this.context.mode === "repository") ? true : !filters.text(actor.login, [this.login], {debug:false}))
.filter(({repo: {name: repo}}) => !this.ignore(repo))
.filter(({created_at}) => ((!this.days)||(new Date(created_at) > new Date(Date.now() - this.days * 24 * 60 * 60 * 1000)))),
)
}
}
catch {
this.debug("no more page to load")
}
this.debug(`fetched ${commits.length} commits`)
this.results.latest = Math.round((new Date().getTime() - new Date(commits.slice(-1).shift()?.created_at).getTime()) / (1000 * 60 * 60 * 24))
this.results.commits = commits.length
//Retrieve edited files and filter edited lines (those starting with +/-) from patches
this.debug("fetching patches")
const patches = [
...await Promise.allSettled(
commits
.flatMap(({payload}) => payload.commits)
.filter(({committer}) => filters.text(committer?.email, this.authoring, {debug:false}))
.map(commit => commit.url)
.map(async commit => (await this.rest.request(commit)).data),
),
]
.filter(({status}) => status === "fulfilled")
.map(({value}) => value)
.filter(({parents}) => parents.length <= 1)
.map(({sha, commit:{message, committer}, verification, files}) => ({
sha,
name:`${message} (authored by ${committer.name} on ${committer.date})`,
verified:verification?.verified ?? null,
editions:files.map(({filename, patch = ""}) => {
const edition = {
path: filename,
added: {lines:0, bytes:0},
deleted: {lines:0, bytes:0},
patch,
}
for (const line of patch.split("\n")) {
if ((!/^[-+]/.test(line)) || (!line.trim().length))
continue
if (this.markers.line.test(line)) {
const {op = "+", content = ""} = line.match(this.markers.line)?.groups ?? {}
const size = Buffer.byteLength(content, "utf-8")
edition[{"+":"added", "-":"deleted"}[op]].bytes += size
edition[{"+":"added", "-":"deleted"}[op]].lines++
continue
}
}
return edition
})
}))
return patches
}
/**Run linguist against a commit and compute edited lines and bytes*/
async linguist(_, {commit, cache:{languages}}) {
const cache = {files:{}, languages}
const result = {total:0, files:0, missed:{lines:0, bytes:0}, lines:{}, stats:{}, languages:{}}
const edited = new Set()
for (const edition of commit.editions) {
edited.add(edition.path)
//Guess file language with linguist
const {files: {results: files}, languages: {results: languages}, unknown} = await linguist(edition.path, {fileContent:edition.patch})
Object.assign(cache.files, files)
Object.assign(cache.languages, languages)
if (!(edition.path in cache.files))
cache.files[edition.path] = "<unknown>"
//Aggregate statistics
const language = cache.files[edition.path]
edition.language = language
const numbers = edition.patch
.split("\n")
.filter(line => this.markers.line.test(line))
.map(line => Buffer.byteLength(line.substring(1).trimStart(), "utf-8"))
const added = numbers.reduce((a, b) => a + b, 0)
result.total += added
if (language === "<unknown>") {
result.missed.lines += numbers.length
result.missed.bytes += unknown.bytes
}
else {
result.lines[language] = (result.lines[language] ?? 0) + numbers.length
result.stats[language] = (result.stats[language] ?? 0) + added
}
}
result.files = edited.size
result.languages = cache.languages
return result
}
}