From e863269d79b5cd9e3dbb9ca7c6228bd1566b951b Mon Sep 17 00:00:00 2001 From: Simon Lecoq <22963968+lowlighter@users.noreply.github.com> Date: Sun, 16 Oct 2022 13:58:41 -0400 Subject: [PATCH] ref(plugins/languages): indepth mode (#1118) --- .github/actions/spelling/allow.txt | 4 + package-lock.json | 7 +- package.json | 3 +- source/app/metrics/utils.mjs | 29 +- source/plugins/languages/README.md | 45 ++- .../plugins/languages/analyzer/analyzer.mjs | 182 ++++++++++ source/plugins/languages/analyzer/cli.mjs | 63 ++++ source/plugins/languages/analyzer/indepth.mjs | 232 +++++++++++++ source/plugins/languages/analyzer/recent.mjs | 144 ++++++++ source/plugins/languages/analyzers.mjs | 310 +----------------- source/plugins/languages/index.mjs | 41 +-- source/plugins/languages/metadata.yml | 36 +- .../repository/partials/languages.ejs | 3 + tests/mocks/api/github/rest/repos/get.mjs | 30 ++ tests/mocks/api/github/rest/request.mjs | 1 + 15 files changed, 779 insertions(+), 351 deletions(-) create mode 100644 source/plugins/languages/analyzer/analyzer.mjs create mode 100644 source/plugins/languages/analyzer/cli.mjs create mode 100644 source/plugins/languages/analyzer/indepth.mjs create mode 100644 source/plugins/languages/analyzer/recent.mjs create mode 100644 tests/mocks/api/github/rest/repos/get.mjs diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index a27fbe27..09b3e500 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -1,6 +1,10 @@ +gpgarmor github https leetcode pgn +scm +shas ssh ubuntu +yargsparser diff --git a/package-lock.json b/package-lock.json index 7c907ada..e50e5c95 100644 --- a/package-lock.json +++ b/package-lock.json @@ -49,7 +49,8 @@ "twemoji-parser": "^14.0.0", "vue": "^2.7.1", "vue-prism-component": "^1.2.0", - "xml-formatter": "^2.6.1" + "xml-formatter": "^2.6.1", + "yargs-parser": "^21.1.1" }, "devDependencies": { "eslint": "^8.25.0", @@ -10563,7 +10564,6 @@ "version": "21.1.1", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", - "dev": true, "engines": { "node": ">=12" } @@ -18668,8 +18668,7 @@ "yargs-parser": { "version": "21.1.1", "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", - "dev": true + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==" }, "yauzl": { "version": "2.10.0", diff --git a/package.json b/package.json index b5311034..8e42e10c 100644 --- a/package.json +++ b/package.json @@ -68,7 +68,8 @@ "twemoji-parser": "^14.0.0", "vue": "^2.7.1", "vue-prism-component": "^1.2.0", - "xml-formatter": "^2.6.1" + "xml-formatter": "^2.6.1", + "yargs-parser": "^21.1.1" }, "devDependencies": { "eslint": "^8.25.0", diff --git a/source/app/metrics/utils.mjs b/source/app/metrics/utils.mjs index 26a163f9..aeb64fb7 100644 --- a/source/app/metrics/utils.mjs +++ b/source/app/metrics/utils.mjs @@ -225,17 +225,19 @@ export async function language({filename, patch}) { } /**Run command (use this to execute commands and process whole output at once, may not be suitable for large outputs) */ -export async function run(command, options, {prefixed = true, log = true} = {}) { +export async function run(command, options, {prefixed = true, log = true, debug = true} = {}) { const prefix = {win32: "wsl"}[process.platform] ?? "" command = `${prefixed ? prefix : ""} ${command}`.trim() return new Promise((solve, reject) => { - console.debug(`metrics/command/run > ${command}`) + if (debug) + console.debug(`metrics/command/run > ${command}`) const child = processes.exec(command, options) let [stdout, stderr] = ["", ""] child.stdout.on("data", data => stdout += data) child.stderr.on("data", data => stderr += data) child.on("close", code => { - console.debug(`metrics/command/run > ${command} > exited with code ${code}`) + if (debug) + console.debug(`metrics/command/run > ${command} > exited with code ${code}`) if (log) { console.debug(stdout) console.debug(stderr) @@ -246,7 +248,7 @@ export async function run(command, options, {prefixed = true, log = true} = {}) } /**Spawn command (use this to execute commands and process output on the fly) */ -export async function spawn(command, args = [], options = {}, {prefixed = true, timeout = 300 * 1000, stdout} = {}) { //eslint-disable-line max-params +export async function spawn(command, args = [], options = {}, {prefixed = true, timeout = 300 * 1000, stdout, debug = true} = {}) { //eslint-disable-line max-params const prefix = {win32: "wsl"}[process.platform] ?? "" if ((prefixed) && (prefix)) { args.unshift(command) @@ -255,15 +257,18 @@ export async function spawn(command, args = [], options = {}, {prefixed = true, if (!stdout) throw new Error("`stdout` argument was not provided, use run() instead of spawn() if processing output is not needed") return new Promise((solve, reject) => { - console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")}`) + if (debug) + console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")}`) const child = processes.spawn(command, args, {...options, shell: true, timeout}) const reader = readline.createInterface({input: child.stdout}) reader.on("line", stdout) const closed = new Promise(close => reader.on("close", close)) child.on("close", async code => { - console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > exited with code ${code}`) + if (debug) + console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > exited with code ${code}`) await closed - console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > reader closed`) + if (debug) + console.debug(`metrics/command/spawn > ${command} with ${args.join(" ")} > reader closed`) return code === 0 ? solve() : reject() }) }) @@ -372,7 +377,7 @@ export const filters = { return result }, /**Repository filter*/ - repo(repository, patterns) { + repo(repository, patterns, {debug = true} = {}) { //Disable filtering when no pattern is provided if (!patterns.length) return true @@ -390,11 +395,12 @@ export const filters = { //Basic pattern matching const include = (!patterns.includes(repo)) && (!patterns.includes(`${user}/${repo}`)) - console.debug(`metrics/filters/repo > filter ${repo} (${include ? "included" : "excluded"})`) + if (debug) + console.debug(`metrics/filters/repo > filter ${repo} (${include ? "included" : "excluded"})`) return include }, /**Text filter*/ - text(text, patterns) { + text(text, patterns, {debug = true} = {}) { //Disable filtering when no pattern is provided if (!patterns.length) return true @@ -404,7 +410,8 @@ export const filters = { //Basic pattern matching const include = !patterns.includes(text) - console.debug(`metrics/filters/text > filter ${text} (${include ? "included" : "excluded"})`) + if (debug) + console.debug(`metrics/filters/text > filter ${text} (${include ? "included" : "excluded"})`) return include }, } diff --git a/source/plugins/languages/README.md b/source/plugins/languages/README.md index 34cf9c60..eed026f3 100644 --- a/source/plugins/languages/README.md +++ b/source/plugins/languages/README.md @@ -236,8 +236,7 @@ It will be automatically hidden if empty.

## 🔎 `indepth` mode -The default algorithm use the top languages provided of each repository you contributed to. -When working in collaborative projects with a lot of people, these numbers may be less representative of your actual work. +The default algorithm uses the top languages from each repository you contributed to using GitHub GraphQL API (which is similar to the displayed languages bar on github.com). When working in collaborative projects with a lot of people, these numbers may be less representative of your actual work. The `plugin_languages_indepth` option lets you use a more advanced algorithm for more accurate statistics. Under the hood, it will clone your repositories, run [linguist-js](https://github.com/Nixinova/Linguist) (a JavaScript port of [GitHub linguist](https://github.com/github/linguist)) and iterate over patches matching your `commits_authoring` setting. @@ -257,12 +256,52 @@ Since git lets you use any email and username for commits, *metrics* may not be > âš ī¸ This feature significantly increase workflow time -> âš ī¸ Since this mode iterates over **each commit of each repository**, it is not suited for large code base, especially those with a large amount of commits and the ones containing binaries. While `plugin_languages_analysis_timeout` can be used to increase the default timeout for analysis, please be responsible and keep this feature disabled if it cannot work on your account to save GitHub resources and our planet 🌏 +> âš ī¸ Since this mode iterates over **each matching commit of each repository**, it is not suited for large code base, especially those with a large amount of commits and the ones containing binaries. While `plugin_languages_analysis_timeout` and `plugin_languages_analysis_timeout_repositories` can be used to increase the default timeout for analysis, please be responsible and keep this feature disabled if it cannot work on your account to save GitHub resources and our planet 🌏 > âš ī¸ Although *metrics* does not send any code to external sources, repositories are temporarily cloned on the GitHub Action runner. It is advised to keep this option disabled when working with sensitive data or company code. Use at your own risk, *metrics* and its authors **cannot** be held responsible for any resulting code leaks. Source code is available for auditing at [analyzers.mjs](/source/plugins/languages/analyzers.mjs). > 🌐 Web instances must enable this feature in `settings.json` +Below is a summary of the process used to compute indepth statistics: + +## Most used mode + +1. Fetch GPG keys linked to your GitHub account + - automatically add attached emails to `commits_authoring` + - *web-flow* (GitHub's public key for changes made through web-ui) is also fetched +2. Import GPG keys so they can be used to verify commits later +3. Iterate through repositories + - early break if `plugin_languages_analysis_timeout` is reached + - skip repository if it matches `plugin_languages_skipped` + - include repositories from `plugin_languages_indepth_custom` + - a specific branch and commit range can be used + - a source other than github.com can be used +4. Clone repository + - target branch is checkout +5. List of authored commits is computed + - using `git log --author` and `commits_authoring` to search in commit headers + - using `git log --grep` and `commits_authoring` to search in commit body + - ensure these are within the range specified by `plugin_languages_indepth_custom` (if applicable) +6. Process authored commits + - early break if `plugin_languages_analysis_timeout_repositories` is reached + - using `git verify-commit` to check authenticity against imported GPG keys + - using `git log --patch` to extract added/deleted lines/bytes from each file + - using [GitHub linguist](https://github.com/github/linguist) ([linguist-js](https://github.com/Nixinova/LinguistJS)) to detect language for each file + - respect `plugin_languages_categories` option + - if a file has since been deleted or moved, checkout on the last commit file was present and run linguist again +7. Aggregate results + +## Recently used mode + +1. Fetch push events linked to your account (or target repository) + - matching `plugin_languages_recent_load` and `plugin_languages_recent_days` options + - matching committer emails from `commits_authoring` +2. Process authored commits + - using [GitHub linguist](https://github.com/github/linguist) ([linguist-js](https://github.com/Nixinova/LinguistJS)) to detect language for each file + - respect `plugin_languages_recent_categories` option + - directly pass file content rather than performing I/O and simulating a git repository +3. Aggregate results + ## 📅 Recently used languages This feature uses a similar algorithm as `indepth` mode, but uses patches from your events feed instead. diff --git a/source/plugins/languages/analyzer/analyzer.mjs b/source/plugins/languages/analyzer/analyzer.mjs new file mode 100644 index 00000000..8558a401 --- /dev/null +++ b/source/plugins/languages/analyzer/analyzer.mjs @@ -0,0 +1,182 @@ +//Imports +import fs from "fs/promises" +import os from "os" +import paths from "path" +import git from "simple-git" +import {filters} from "../../../app/metrics/utils.mjs" + +/**Analyzer */ +export class Analyzer { + + /**Constructor */ + constructor(login, {account = "bypass", authoring = [], uid = Math.random(), shell, rest = null, context = {mode:"user"}, skipped = [], categories = ["programming", "markup"], timeout = {global:NaN, repositories:NaN}}) { + //User informations + this.login = login + this.account = account + this.authoring = authoring + this.uid = uid + this.gpg = [] + + //Utilities + this.shell = shell + this.rest = rest + this.context = context + this.markers = { + hash:/\b[0-9a-f]{40}\b/, + file:/^[+]{3}\sb[/](?[\s\S]+)$/, + line:/^(?[-+])\s*(?[\s\S]+)$/, + } + this.parser = /^(?[\s\S]+?)\/(?[\s\S]+?)(?:@(?[\s\S]+?)(?::(?[\s\S]+))?)?$/ + this.consumed = false + + //Options + this.skipped = skipped + this.categories = categories + this.timeout = timeout + + //Results + this.results = {partial: {global:false, repositories:false}, total: 0, lines: {}, stats: {}, colors: {}, commits: 0, files: 0, missed: {lines: 0, bytes: 0, commits: 0}, elapsed:0} + this.debug(`instantiated a new ${this.constructor.name}`) + } + + /**Run analyzer */ + async run(runner) { + if (this.consumed) + throw new Error("This analyzer has already been consumed, another instance needs to be created to perform a new analysis") + this.consumed = true + const results = await new Promise(async solve => { + let completed = false + if (Number.isFinite(this.timeout.global)) { + this.debug(`timeout set to ${this.timeout.global}m`) + setTimeout(() => { + if (!completed) { + try { + this.debug(`reached maximum execution time of ${this.timeout.global}m for analysis`) + this.results.partial.global = true + solve(this.results) + } + catch { + //Ignore errors + } + } + }, this.timeout.global * 60 * 1000) + } + await runner() + completed = true + solve(this.results) + }) + results.partial = (results.partial.global)||(results.partial.repositories) + return results + } + + /**Parse repository */ + parse(repository) { + let branch = null, ref = null + if (typeof repository === "string") { + if (!this.parser.test(repository)) + throw new TypeError(`"${repository}" pattern is not supported`) + const {login, name, ...groups} = repository.match(this.parser)?.groups ?? {} + repository = {owner:{login}, name} + branch = groups.branch ?? null + ref = groups.ref ?? null + } + const repo = `${repository.owner.login}/${repository.name}` + const path = paths.join(os.tmpdir(), `${this.uid}-${repo.replace(/[^\w]/g, "_")}`) + return {repo, path, branch, ref} + } + + /**Clone a repository */ + async clone(repository) { + const {repo, branch, path} = this.parse(repository) + let url = /^https?:\/\//.test(repo) ? repo : `https://github.com/${repo}` + try { + this.debug(`cloning ${url} to ${path}`) + await fs.rm(path, {recursive: true, force: true}) + await fs.mkdir(path, {recursive: true}) + await git(path).clone(url, ".", ["--single-branch"]).status() + this.debug(`cloned ${url} to ${path}`) + if (branch) { + this.debug(`switching to branch ${branch} for ${repo}`) + await git(path).branch(branch) + } + return true + } + catch (error) { + this.debug(`failed to clone ${url} (${error})`) + this.clean(path) + return false + } + } + + /**Analyze a repository */ + async analyze(path, {commits = []} = {}) { + const cache = {files:{}, languages:{}} + const start = Date.now() + let elapsed = 0, processed = 0 + if (this.timeout.repositories) + this.debug(`timeout for repository analysis set to ${this.timeout.repositories}m`) + for (const commit of commits) { + elapsed = (Date.now() - start)/1000/60 + if ((this.timeout.repositories)&&(elapsed > this.timeout.repositories)) { + this.results.partial.repositories = true + this.debug(`reached maximum execution time of ${this.timeout.repositories}m for repository analysis (${elapsed}m elapsed)`) + break + } + try { + const {total, files, missed, lines, stats} = await this.linguist(path, {commit, cache}) + this.results.commits++ + this.results.total += total + this.results.files += files + this.results.missed.lines += missed.lines + this.results.missed.bytes += missed.bytes + for (const language in lines) { + if (this.categories.includes(cache.languages[language]?.type)) + this.results.lines[language] = (this.results.lines[language] ?? 0) + lines[language] + } + for (const language in stats) { + if (this.categories.includes(cache.languages[language]?.type)) + this.results.stats[language] = (this.results.stats[language] ?? 0) + stats[language] + } + } + catch (error) { + this.debug(`skipping commit ${commit.sha} (${error})`) + this.results.missed.commits++ + } + finally { + this.results.elapsed += elapsed + processed++ + if ((processed%50 === 0)||(processed === commits.length)) + this.debug(`at commit ${processed}/${commits.length} (${(100*processed/commits.length).toFixed(2)}%, ${elapsed.toFixed(2)}m elapsed)`) + } + } + this.results.colors = Object.fromEntries(Object.entries(cache.languages).map(([lang, {color}]) => [lang, color])) + } + + /**Clean a path */ + async clean(path) { + try { + this.debug(`cleaning ${path}`) + await fs.rm(path, {recursive: true, force: true}) + this.debug(`cleaned ${path}`) + return true + } + catch (error) { + this.debug(`failed to clean (${error})`) + return false + } + } + + /**Whether to skip a repository or not */ + ignore(repository) { + const ignored = !filters.repo(repository, this.skipped) + if (ignored) + this.debug(`skipping ${typeof repository === "string" ? repository : `${repository?.owner?.login}/${repository?.name}`} as it matches skipped repositories`) + return ignored + } + + /**Debug log */ + debug(message) { + return console.debug(`metrics/compute/${this.login}/plugins > languages > ${this.constructor.name.replace(/([a-z])([A-Z])/, (_, a, b) => `${a} ${b.toLocaleLowerCase()}`).toLocaleLowerCase()} > ${message}`) + } + +} diff --git a/source/plugins/languages/analyzer/cli.mjs b/source/plugins/languages/analyzer/cli.mjs new file mode 100644 index 00000000..0bd0474f --- /dev/null +++ b/source/plugins/languages/analyzer/cli.mjs @@ -0,0 +1,63 @@ +//Imports +import { IndepthAnalyzer } from "./indepth.mjs" +import { RecentAnalyzer } from "./recent.mjs" +import OctokitRest from "@octokit/rest" +import yargsparser from "yargs-parser" + +const help = ` + +`.trim() + +/**Cli */ +export async function cli() { + //Parse inputs + console.log("== metrics indepth analyzer cli ====================") + const argv = yargsparser(process.argv.slice(2)) + if (argv.help) { + console.log(help) + return null + } + const {default: setup} = await import("../../../app/metrics/setup.mjs") + const {conf: {metadata}} = await setup({log: false}) + const {login, _:repositories, mode = "indepth"} = argv + const { + "commits.authoring": authoring, + } = await metadata.plugins.base.inputs({q:{ + "commits.authoring": argv["commits-authoring"] || login, + }, account: "bypass"}) + const { + categories, + "analysis.timeout":_timeout_global, + "analysis.timeout.repositories":_timeout_repositories, + "recent.load":_recent_load, + "recent.days":_recent_days, + } = await metadata.plugins.languages.inputs({q: { + categories:argv.categories || "", + "analysis.timeout": argv["timeout-global"] || "", + "analysis.timeout.repositories": argv["timeout-repositories"] || "", + "recent.load": argv["recent-load"] || "", + "recent.days": argv["recent-days"] || "", + }, account: "bypass"}) + + //Prepare call + const imports = await import("../../../app/metrics/utils.mjs") + const rest = argv.token ? new OctokitRest.Octokit({auth: argv.token, baseUrl: argv["api-url"]}) : null + + //Language analysis + console.log(`analysis mode | ${mode}`) + console.log(`login | ${login}`) + console.log(`rest token | ${rest ? "(provided)" : "(none)"}`) + console.log(`commits authoring | ${authoring}`) + console.log(`analysis timeout (global) | ${_timeout_global}`) + switch (mode) { + case "recent":{ + console.log(`events to load | ${_recent_load}`) + console.log(`events maximum age | ${_recent_days}`) + return new RecentAnalyzer(login, {rest, shell:imports, authoring, categories, timeout:{global:_timeout_global, repositories:_timeout_repositories}, load:_recent_load, days:_recent_days}).run({}) + } + case "indepth":{ + console.log(`repositories | ${repositories}`) + return new IndepthAnalyzer(login, {rest, shell:imports, authoring, categories, timeout:{global:_timeout_global, repositories:_timeout_repositories}}).run({repositories}) + } + } +} diff --git a/source/plugins/languages/analyzer/indepth.mjs b/source/plugins/languages/analyzer/indepth.mjs new file mode 100644 index 00000000..31b3391f --- /dev/null +++ b/source/plugins/languages/analyzer/indepth.mjs @@ -0,0 +1,232 @@ +//Imports +import { Analyzer } from "./analyzer.mjs" +import fs from "fs/promises" +import os from "os" +import paths from "path" +import linguist from "linguist-js" + +/**Indepth analyzer */ +export class IndepthAnalyzer extends Analyzer { + /**Constructor */ + constructor() { + super(...arguments) + this.manual = {repositories:[]} + Object.assign(this.results, {verified: {signature: 0}}) + } + + /**Run analyzer */ + run({repositories}) { + this.manual.repositories = repositories.filter(repo => typeof repo === "string") + return super.run(async () => { + await this.gpgarmor() + for (const repository of repositories) { + if (this.results.partial.global) + break + if (this.ignore(repository)) + continue + if (await this.clone(repository)) { + const {path, ref} = this.parse(repository) + await this.analyze(path, {ref}) + await this.clean(path) + } + } + }) + } + + /**Whether to skip a repository or not (bypass filter if repository was manually specified)*/ + ignore(repository) { + if (this.manual.repositories.includes(repository)) { + this.debug(`${repository} has been specified manually, not skipping`) + return false + } + return super.ignore(repository) + } + + /**Populate gpg keys */ + async gpgarmor() { + //Fetch gpg keys (web-flow is GitHub's public key when making changes from web ui) + try { + this.debug("fetching gpg keys") + for (const username of [this.login, "web-flow"]) { + const {data: keys} = await this.rest.users.listGpgKeysForUser({username}) + this.gpg.push(...keys.map(({key_id: id, raw_key: pub, emails}) => ({id, pub, emails}))) + if (username === this.login) { + for (const {email} of this.gpg.flatMap(({emails}) => emails)) { + this.debug(`auto-adding ${email} to commits_authoring (fetched from gpg)`) + this.authoring.push(email) + } + } + } + this.debug(`fetched ${this.gpg.length} gpg keys`) + } + catch (error) { + this.debug(`an error occurred while fetching gpg keys (${error})`) + } + + //Import gpg keys + for (const {id, pub} of this.gpg) { + const path = paths.join(os.tmpdir(), `${this.uid}.${id}.gpg`) + try { + this.debug(`saving gpg ${id} to ${path}`) + await fs.writeFile(path, pub) + await this.shell.run(`gpg ${path}`) + if (process.env.GITHUB_ACTIONS) { + this.debug(`importing gpg ${id}`) + await this.shell.run(`gpg --import ${path}`) + } + else + this.debug("skipping import of gpg keys as we are not in GitHub Actions environment") + } + catch (error) { + this.debug(`an error occurred while importing gpg ${id}, skipping...`) + } + finally { + this.debug(`cleaning ${path}`) + await fs.rm(path, {recursive: true, force: true}).catch(error => this.debug(`failed to clean ${path} (${error})`)) + } + } + } + + /**Filter related commits in repository */ + async filter(path, {ref}) { + const commits = new Set() + try { + this.debug(`filtering commits authored by ${this.login} in ${path}`) + for (const author of this.authoring) { + //Search by --author + { + const output = await this.shell.run(`git log --author='${author}' --pretty=format:"%H" --regexp-ignore-case --no-merges`, {cwd:path, env: {LANG: "en_GB"}}, {log:false, debug:false, prefixed: false}) + const hashes = output.split("\n").map(line => line.trim()).filter(line => this.markers.hash.test(line)) + hashes.forEach(hash => commits.add(hash)) + this.debug(`found ${hashes.length} for ${author} (using --author)`) + } + //Search by --grep + { + const output = await this.shell.run(`git log --grep='${author}' --pretty=format:"%H" --regexp-ignore-case --no-merges`, {cwd:path, env: {LANG: "en_GB"}}, {log:false, debug:false, prefixed: false}) + const hashes = output.split("\n").map(line => line.trim()).filter(line => this.markers.hash.test(line)) + hashes.forEach(hash => commits.add(hash)) + this.debug(`found ${hashes.length} for ${author} (using --grep)`) + } + } + //Apply ref range if specified + if (ref) { + this.debug(`filtering commits referenced by ${ref} in ${path}`) + const output = await this.shell.run(`git rev-list --boundary ${ref}`, {cwd:path, env: {LANG: "en_GB"}}, {log:false, debug:false, prefixed: false}) + const hashes = output.split("\n").map(line => line.trim()).filter(line => this.markers.hash.test(line)) + commits.forEach(commit => !hashes.includes(commit) ? commits.delete(commit) : null) + } + this.debug(`found ${commits.size} unique commits authored by ${this.login} in ${path}`) + } + catch (error) { + this.debug(`an error occurred during filtering of commits authored by ${this.login} in ${path} (${error})`) + } + return [...commits] + } + + /**Filter commits in repository */ + async commits(path, {ref}) { + const shas = await this.filter(path, {ref}) + const commits = [] + for (const sha of shas) { + try { + commits.push({ + sha, + name: await this.shell.run(`git log ${sha} --format="%s (authored by %an on %cI)" --max-count=1`, {cwd: path, env: {LANG: "en_GB"}}, {log: false, debug:false, prefixed: false}), + verified: ("verified" in this.results) ? await this.shell.run(`git verify-commit ${sha}`, {cwd: path, env: {LANG: "en_GB"}}, {log: false, debug:false, prefixed: false}).then(() => true).catch(() => null) : null, + editions: await this.editions(path, {sha}), + }) + } + catch (error) { + this.debug(`skipping commit ${sha} (${error})`) + } + } + return commits + } + + /**Fetch commit patch and format it by files*/ + async editions(path, {sha}) { + const editions = [] + let edition = null + let cursor = 0 + await this.shell.spawn("git", ["log", sha, "--format=''", "--max-count=1", "--patch"], {cwd: path, env: {LANG: "en_GB"}}, { + debug:false, + stdout:line => { + try { + //Ignore empty lines or unneeded lines + cursor++ + if ((!/^[-+]/.test(line)) || (!line.trim().length)) + return + + //File marker + if (this.markers.file.test(line)) { + edition = { + path: `${path}/${line.match(this.markers.file)?.groups?.file}`.replace(/\\/g, "/"), + added: {lines:0, bytes:0}, + deleted: {lines:0, bytes:0}, + } + editions.push(edition) + return + } + + //Line markers + if ((edition)&&(this.markers.line.test(line))) { + const {op = "+", content = ""} = line.match(this.markers.line)?.groups ?? {} + const size = Buffer.byteLength(content, "utf-8") + edition[{"+":"added", "-":"deleted"}[op]].bytes += size + edition[{"+":"added", "-":"deleted"}[op]].lines++ + return + } + } + catch (error) { + this.debug(`skipping line ${sha}#${cursor} (${error})`) + } + } + }) + return editions + } + + /**Analyze a repository */ + async analyze(path, {ref} = {}) { + const commits = await this.commits(path, {ref}) + return super.analyze(path, {commits}) + } + + /**Run linguist against a commit and compute edited lines and bytes*/ + async linguist(path, {commit, cache}) { + const result = {total:0, files:0, missed:{lines:0, bytes:0}, lines:{}, stats:{}} + const edited = new Set() + const seen = new Set() + for (const edition of commit.editions) { + edited.add(edition.path) + + //Guess file language with linguist (only run it once per sha) + if ((!(edition.path in cache.files))&&(!seen.has(commit.sha))) { + this.debug(`language for file ${edition.path} is not in cache, running linguist at ${commit.sha}`) + await this.shell.run(`git checkout ${commit.sha}`, {cwd: path, env: {LANG: "en_GB"}}, {log: false, debug:false, prefixed: false}) + const {files: {results: files}, languages: {results: languages}} = await linguist(path) + Object.assign(cache.files, files) + Object.assign(cache.languages, languages) + seen.add(commit.sha) + } + if (!(edition.path in cache.files)) + cache.files[edition.path] = "" + + //Aggregate statistics + const language = cache.files[edition.path] + edition.language = language + result.total += edition.added.bytes + if (language === "") { + result.missed.lines += edition.added.lines + result.missed.bytes += edition.added.bytes + } + else { + result.lines[language] = (result.lines[language] ?? 0) + edition.added.lines + result.stats[language] = (result.stats[language] ?? 0) + edition.added.bytes + } + } + result.files = edited.size + return result + } + +} + diff --git a/source/plugins/languages/analyzer/recent.mjs b/source/plugins/languages/analyzer/recent.mjs new file mode 100644 index 00000000..490db229 --- /dev/null +++ b/source/plugins/languages/analyzer/recent.mjs @@ -0,0 +1,144 @@ +//Imports +import { Analyzer } from "./analyzer.mjs" +import {filters} from "../../../app/metrics/utils.mjs" +import linguist from "linguist-js" + +/**Recent analyzer */ +export class RecentAnalyzer extends Analyzer { + /**Constructor */ + constructor() { + super(...arguments) + this.days = arguments[1]?.days ?? 0 + this.load = arguments[1]?.load ?? 0 + Object.assign(this.results, {days:this.days}) + } + + /**Run analyzer */ + run() { + return super.run(async () => { + await this.analyze("/dev/null") + }) + } + + /**Analyze a repository */ + async analyze(path) { + const patches = await this.patches() + return super.analyze(path, {commits:patches}) + } + + /**Fetch patches */ + async patches() { + //Fetch commits from recent activity + this.debug(`fetching patches from last ${this.days || ""} days up to ${this.load || "∞"} events`) + const commits = [], pages = Math.ceil((this.load || Infinity) / 100) + if (this.context.mode === "repository") { + try { + const {data:{default_branch:branch}} = await this.rest.repos.get(this.context) + this.context.branch = branch + this.results.branch = branch + this.debug(`default branch for ${this.context.owner}/${this.context.repo} is ${branch}`) + } + catch (error) { + this.debug(`failed to get default branch for ${this.context.owner}/${this.context.repo} (${error})`) + } + } + try { + for (let page = 1; page <= pages; page++) { + this.debug(`fetching events page ${page}`) + commits.push( + ...(await (this.context.mode === "repository" ? this.rest.activity.listRepoEvents(this.context) : this.rest.activity.listEventsForAuthenticatedUser({username: this.login, per_page: 100, page}))).data + .filter(({type, payload}) => (type === "PushEvent")&&((this.context.mode !== "repository")||((this.context.mode === "repository")&&(payload?.ref?.includes?.(`refs/heads/${this.context.branch}`))))) + .filter(({actor}) => (this.account === "organization")||(this.context.mode === "repository") ? true : !filters.text(actor.login, [this.login], {debug:false})) + .filter(({repo: {name: repo}}) => !this.ignore(repo)) + .filter(({created_at}) => ((!this.days)||(new Date(created_at) > new Date(Date.now() - this.days * 24 * 60 * 60 * 1000)))), + ) + } + } + catch { + this.debug("no more page to load") + } + this.debug(`fetched ${commits.length} commits`) + this.results.latest = Math.round((new Date().getTime() - new Date(commits.slice(-1).shift()?.created_at).getTime()) / (1000 * 60 * 60 * 24)) + this.results.commits = commits.length + + //Retrieve edited files and filter edited lines (those starting with +/-) from patches + this.debug("fetching patches") + const patches = [ + ...await Promise.allSettled( + commits + .flatMap(({payload}) => payload.commits) + .filter(({committer}) => filters.text(committer?.email, this.authoring, {debug:false})) + .map(commit => commit.url) + .map(async commit => (await this.rest.request(commit)).data), + ), + ] + .filter(({status}) => status === "fulfilled") + .map(({value}) => value) + .filter(({parents}) => parents.length <= 1) + .map(({sha, commit:{message, committer}, verification, files}) => ({ + sha, + name:`${message} (authored by ${committer.name} on ${committer.date})`, + verified:verification?.verified ?? null, + editions:files.map(({filename, patch = ""}) => { + const edition = { + path: filename, + added: {lines:0, bytes:0}, + deleted: {lines:0, bytes:0}, + patch, + } + for (const line of patch.split("\n")) { + if ((!/^[-+]/.test(line)) || (!line.trim().length)) + continue + if (this.markers.line.test(line)) { + const {op = "+", content = ""} = line.match(this.markers.line)?.groups ?? {} + const size = Buffer.byteLength(content, "utf-8") + edition[{"+":"added", "-":"deleted"}[op]].bytes += size + edition[{"+":"added", "-":"deleted"}[op]].lines++ + continue + } + } + return edition + }) + })) + return patches + } + + /**Run linguist against a commit and compute edited lines and bytes*/ + async linguist(_, {commit, cache:{languages}}) { + const cache = {files:{}, languages} + const result = {total:0, files:0, missed:{lines:0, bytes:0}, lines:{}, stats:{}, languages:{}} + const edited = new Set() + for (const edition of commit.editions) { + edited.add(edition.path) + + //Guess file language with linguist + const {files: {results: files}, languages: {results: languages}, unknown} = await linguist(edition.path, {fileContent:edition.patch}) + Object.assign(cache.files, files) + Object.assign(cache.languages, languages) + if (!(edition.path in cache.files)) + cache.files[edition.path] = "" + + //Aggregate statistics + const language = cache.files[edition.path] + edition.language = language + const numbers = edition.patch + .split("\n") + .filter(line => this.markers.line.test(line)) + .map(line => Buffer.byteLength(line.substring(1).trimStart(), "utf-8")) + const added = numbers.reduce((a, b) => a + b, 0) + result.total += added + if (language === "") { + result.missed.lines += numbers.length + result.missed.bytes += unknown.bytes + } + else { + result.lines[language] = (result.lines[language] ?? 0) + numbers.length + result.stats[language] = (result.stats[language] ?? 0) + added + } + } + result.files = edited.size + result.languages = cache.languages + return result + } + +} diff --git a/source/plugins/languages/analyzers.mjs b/source/plugins/languages/analyzers.mjs index 6110b633..0f243c37 100644 --- a/source/plugins/languages/analyzers.mjs +++ b/source/plugins/languages/analyzers.mjs @@ -1,312 +1,22 @@ //Imports -import linguist from "linguist-js" +import { IndepthAnalyzer } from "./analyzer/indepth.mjs" +import { RecentAnalyzer } from "./analyzer/recent.mjs" +import { cli } from "./analyzer/cli.mjs" /**Indepth analyzer */ -export async function indepth({login, data, imports, repositories, gpg}, {skipped, categories, timeout}) { - return new Promise(async solve => { - //Results - const results = {partial: false, total: 0, lines: {}, stats: {}, colors: {}, commits: 0, files: 0, missed: {lines: 0, bytes: 0, commits: 0}, verified: {signature: 0}} - - //Timeout - if (Number.isFinite(timeout)) { - console.debug(`metrics/compute/${login}/plugins > languages > timeout set to ${timeout}m`) - setTimeout(() => { - results.partial = true - console.debug(`metrics/compute/${login}/plugins > languages > reached maximum execution time of ${timeout}m for analysis`) - solve(results) - }, timeout * 60 * 1000) - } - - //GPG keys imports - for (const {id, pub} of gpg) { - const path = imports.paths.join(imports.os.tmpdir(), `${data.user.databaseId}.${id}.gpg`) - console.debug(`metrics/compute/${login}/plugins > languages > saving gpg ${id} to ${path}`) - try { - await imports.fs.writeFile(path, pub) - if (process.env.GITHUB_ACTIONS) { - console.debug(`metrics/compute/${login}/plugins > languages > importing gpg ${id}`) - await imports.run(`gpg --import ${path}`) - } - else { - console.debug(`metrics/compute/${login}/plugins > languages > skipping import of gpg ${id}`) - } - } - catch (error) { - console.debug(`metrics/compute/${login}/plugins > languages > indepth > an error occurred while importing gpg ${id}, skipping...`) - } - finally { - //Cleaning - console.debug(`metrics/compute/${login}/plugins > languages > indepth > cleaning ${path}`) - await imports.fs.rm(path, {recursive: true, force: true}).catch(error => console.debug(`metrics/compute/${login}/plugins > languages > indepth > failed to clean ${path} (${error})`)) - } - } - - //Compute repositories stats from fetched repositories - for (const repository of repositories) { - //Early break - if (results.partial) - break - - //Skip repository if asked - if (!imports.filters.repo(repository, skipped)) - continue - - //Repository handle - const repo = `${repository.owner.login}/${repository.name}` - console.debug(`metrics/compute/${login}/plugins > languages > indepth > checking ${repo}`) - - //Temporary directory - const path = imports.paths.join(imports.os.tmpdir(), `${data.user.databaseId}-${repo.replace(/[^\w]/g, "_")}`) - console.debug(`metrics/compute/${login}/plugins > languages > indepth > cloning ${repo} to temp dir ${path}`) - - //Process - try { - //Git clone into temporary directory - await imports.fs.rm(path, {recursive: true, force: true}) - await imports.fs.mkdir(path, {recursive: true}) - const git = await imports.git(path) - await git.clone(`https://github.com/${repo}`, ".").status() - - //Analyze repository - await analyze(arguments[0], {results, path, categories}) - } - catch (error) { - console.debug(`metrics/compute/${login}/plugins > languages > indepth > an error occurred while processing ${repo}, skipping...`) - } - finally { - //Cleaning - console.debug(`metrics/compute/${login}/plugins > languages > indepth > cleaning temp dir ${path}`) - await imports.fs.rm(path, {recursive: true, force: true}).catch(error => console.debug(`metrics/compute/${login}/plugins > languages > indepth > failed to clean ${path} (${error})`)) - } - } - solve(results) - }) +export async function indepth({login, data, imports, rest, context, repositories}, {skipped, categories, timeout}) { + return new IndepthAnalyzer(login, {shell:imports, uid:data.user.databaseId, skipped, authoring:data.shared["commits.authoring"], timeout, rest, context, categories}).run({repositories}) } /**Recent languages activity */ -export async function recent({login, data, imports, rest, account}, {skipped = [], categories, days = 0, load = 0, tempdir = "recent", timeout}) { - return new Promise(async solve => { - //Results - const results = {partial: false, total: 0, lines: {}, stats: {}, colors: {}, commits: 0, files: 0, missed: {lines: 0, bytes: 0, commits: 0}, days} - - //Timeout - if (Number.isFinite(timeout)) { - console.debug(`metrics/compute/${login}/plugins > languages > timeout set to ${timeout}m`) - setTimeout(() => { - results.partial = true - console.debug(`metrics/compute/${login}/plugins > languages > reached maximum execution time of ${timeout}m for analysis`) - solve(results) - return - }, timeout * 60 * 1000) - } - - //Get user recent activity - console.debug(`metrics/compute/${login}/plugins > languages > querying api`) - const commits = [], pages = Math.ceil(load / 100) - try { - for (let page = 1; page <= pages; page++) { - console.debug(`metrics/compute/${login}/plugins > languages > loading page ${page}`) - commits.push( - ...(await rest.activity.listEventsForAuthenticatedUser({username: login, per_page: 100, page})).data - .filter(({type}) => type === "PushEvent") - .filter(({actor}) => account === "organization" ? true : actor.login?.toLocaleLowerCase() === login.toLocaleLowerCase()) - .filter(({repo: {name: repo}}) => imports.filters.repo(repo, skipped)) - .filter(({created_at}) => new Date(created_at) > new Date(Date.now() - days * 24 * 60 * 60 * 1000)), - ) - } - } - catch { - console.debug(`metrics/compute/${login}/plugins > languages > no more page to load`) - } - console.debug(`metrics/compute/${login}/plugins > languages > ${commits.length} commits loaded`) - results.latest = Math.round((new Date().getTime() - new Date(commits.slice(-1).shift()?.created_at).getTime()) / (1000 * 60 * 60 * 24)) - - //Retrieve edited files and filter edited lines (those starting with +/-) from patches - console.debug(`metrics/compute/${login}/plugins > languages > loading patches`) - console.debug(`metrics/compute/${login}/plugins > languages > commits authoring set to ${JSON.stringify(data.shared["commits.authoring"])}`) - let patches = [ - ...await Promise.allSettled( - commits - .flatMap(({payload}) => payload.commits) - .filter(({author}) => data.shared["commits.authoring"].filter(authoring => author?.login?.toLocaleLowerCase().includes(authoring) || author?.email?.toLocaleLowerCase().includes(authoring) || author?.name?.toLocaleLowerCase().includes(authoring)).length) - .map(commit => commit.url) - .map(async commit => (await rest.request(commit)).data), - ), - ] - .filter(({status}) => status === "fulfilled") - .map(({value}) => value) - .filter(({parents}) => parents.length <= 1) - .map(({files}) => files) - .flatMap(files => files.map(file => ({name: imports.paths.basename(file.filename), directory: imports.paths.dirname(file.filename), patch: file.patch ?? "", repo: file.raw_url?.match(/(?<=^https:..github.com\/)(?.*)(?=\/raw)/)?.groups.repo ?? "_"}))) - .map(({name, directory, patch, repo}) => ({name, directory: `${repo.replace(/[/]/g, "@")}/${directory}`, patch: patch.split("\n").filter(line => /^[+]/.test(line)).map(line => line.substring(1)).join("\n")})) - - //Temporary directory - const path = imports.paths.join(imports.os.tmpdir(), `${data.user.databaseId}-${tempdir}`) - console.debug(`metrics/compute/${login}/plugins > languages > creating temp dir ${path} with ${patches.length} files`) - - //Process - try { - //Save patches in temporary directory matching respective repository and filename - await imports.fs.rm(path, {recursive: true, force: true}) - await imports.fs.mkdir(path, {recursive: true}) - await Promise.all(patches.map(async ({name, directory, patch}) => { - await imports.fs.mkdir(imports.paths.join(path, directory), {recursive: true}) - await imports.fs.writeFile(imports.paths.join(path, directory, name), patch) - })) - - //Process temporary repositories - for (const directory of await imports.fs.readdir(path)) { - //Pull gitattributes if possible - for (const branch of ["main", "master"]) { - const repo = directory.replace("@", "/") - try { - await imports.fs.writeFile(imports.paths.join(path, directory, ".gitattributes"), await imports.fetch(`https://raw.githubusercontent.com/${repo}/${branch}/.gitattributes`).then(response => response.text()).catch(() => "")) - console.debug(`metrics/compute/${login}/plugins > languages > successfully fetched .gitattributes for ${repo}`) - break - } - catch { - console.debug(`metrics/compute/${login}/plugins > languages > cannot load .gitattributes on branch ${branch} for ${repo}`) - } - } - - //Create temporary git repository - console.debug(`metrics/compute/${login}/plugins > languages > creating temp git repository for ${directory}`) - const git = await imports.git(imports.paths.join(path, directory)) - await git.init().add(".").addConfig("user.name", data.shared["commits.authoring"]?.[0] ?? login).addConfig("user.email", "<>").commit("linguist").status() - - //Analyze repository - await analyze(arguments[0], {results, path: imports.paths.join(path, directory), categories}) - - //Since we reproduce a "partial repository" with a single commit, use number of commits retrieved instead - results.commits = commits.length - } - } - catch { - console.debug(`metrics/compute/${login}/plugins > languages > an error occurred while processing recently used languages`) - } - finally { - //Cleaning - console.debug(`metrics/compute/${login}/plugins > languages > cleaning temp dir ${path}`) - await imports.fs.rm(path, {recursive: true, force: true}).catch(error => console.debug(`metrics/compute/${login}/plugins > languages > indepth > failed to clean ${path} (${error})`)) - } - solve(results) - }) -} - -/**Analyze a single repository */ -async function analyze({login, imports, data}, {results, path, categories = ["programming", "markup"]}) { - //Gather language data - console.debug(`metrics/compute/${login}/plugins > languages > indepth > running linguist`) - const {files: {results: files}, languages: {results: languageResults}} = await linguist(path) - Object.assign(results.colors, Object.fromEntries(Object.entries(languageResults).map(([lang, {color}]) => [lang, color]))) - - //Processing diff - const per_page = 1 - const edited = new Set() - console.debug(`metrics/compute/${login}/plugins > languages > indepth > checking git log`) - try { - await imports.run("git log --max-count=1", {cwd: path}) - } - catch { - console.debug(`metrics/compute/${login}/plugins > languages > indepth > repo seems empty or impossible to git log, skipping`) - return - } - const pending = [] - for (let page = 0;; page++) { - try { - console.debug(`metrics/compute/${login}/plugins > languages > indepth > processing commits ${page * per_page} from ${(page + 1) * per_page}`) - let empty = true, file = null, lang = null - await imports.spawn("git", ["log", ...data.shared["commits.authoring"].map(authoring => `--author="${authoring}"`), "--regexp-ignore-case", "--format=short", "--no-merges", "--patch", `--max-count=${per_page}`, `--skip=${page * per_page}`], {cwd: path}, { - stdout(line) { - try { - //Unflag empty output - if ((empty) && (line.trim().length)) - empty = false - //Commits counter - if (/^commit [0-9a-f]{40}$/.test(line)) { - if (results.verified) { - const sha = line.match(/[0-9a-f]{40}/)?.[0] - if (sha) { - pending.push( - imports.run(`git verify-commit ${sha}`, {cwd: path, env: {LANG: "en_GB"}}, {log: false, prefixed: false}) - .then(() => results.verified.signature++) - .catch(() => null), - ) - } - } - results.commits++ - return - } - //Ignore empty lines or unneeded lines - if ((!/^[+]/.test(line)) || (!line.length)) - return - //File marker - if (/^[+]{3}\sb[/](?[\s\S]+)$/.test(line)) { - file = `${path}/${line.match(/^[+]{3}\sb[/](?[\s\S]+)$/)?.groups?.file}`.replace(/\\/g, "/") - lang = files[file] ?? "" - if ((lang) && (lang !== "") && (!categories.includes(languageResults[lang].type))) - lang = null - edited.add(file) - return - } - //Ignore unknown languages - if (!lang) - return - //Added line marker - if (/^[+]\s*(?[\s\S]+)$/.test(line)) { - const size = Buffer.byteLength(line.match(/^[+]\s*(?[\s\S]+)$/)?.groups?.line ?? "", "utf-8") - results.total += size - if (lang === "") { - results.missed.lines++ - results.missed.bytes += size - } - else { - results.stats[lang] = (results.stats[lang] ?? 0) + size - results.lines[lang] = (results.lines[lang] ?? 0) + 1 - } - } - } - catch (error) { - console.debug(`metrics/compute/${login}/plugins > languages > indepth > an error occurred while processing line (${error.message}), skipping...`) - } - }, - }) - if (empty) { - console.debug(`metrics/compute/${login}/plugins > languages > indepth > no more commits`) - break - } - } - catch { - console.debug(`metrics/compute/${login}/plugins > languages > indepth > an error occurred on page ${page}, skipping...`) - results.missed.commits += per_page - } - } - await Promise.allSettled(pending) - results.files += edited.size +export async function recent({login, data, imports, rest, context, account}, {skipped = [], categories, days = 0, load = 0, timeout}) { + return new RecentAnalyzer(login, {shell:imports, uid:data.user.databaseId, skipped, authoring:data.shared["commits.authoring"], timeout, account, rest, context, days, categories, load}).run() } //import.meta.main if (/languages.analyzers.mjs$/.test(process.argv[1])) { - ;(async function() { - //Parse inputs - const [_authoring, path] = process.argv.slice(2) - if ((!_authoring) || (!path)) { - console.log("Usage is:\n npm run indepth -- \n\n") - process.exit(1) - } - const {default: setup} = await import("../../app/metrics/setup.mjs") - const {conf: {metadata}} = await setup({log: false}) - const {"commits.authoring": authoring} = await metadata.plugins.base.inputs({q: {"commits.authoring": _authoring}, account: "bypass"}) - const data = {shared: {"commits.authoring": authoring}} - - //Prepare call - const imports = await import("../../app/metrics/utils.mjs") - const results = {total: 0, lines: {}, colors: {}, stats: {}, missed: {lines: 0, bytes: 0, commits: 0}} - console.debug = log => /exited with code null/.test(log) ? console.error(log.replace(/^.*--max-count=(?\d+) --skip=(?\d+).*$/, (_, step, start) => `error: skipped commits ${start} from ${Number(start) + Number(step)}`)) : null - - //Analyze repository - console.log(`commits authoring | ${authoring}\nrepository path | ${path}\n`) - await analyze({login: "cli", data, imports}, {results, path}) - console.log(results) + (async () => { + console.log(await cli()) + process.exit(0) })() } diff --git a/source/plugins/languages/index.mjs b/source/plugins/languages/index.mjs index ecd2d039..53c32b48 100644 --- a/source/plugins/languages/index.mjs +++ b/source/plugins/languages/index.mjs @@ -13,16 +13,18 @@ export default async function({login, data, imports, q, rest, account}, {enabled let context = {mode: "user"} if (q.repo) { console.debug(`metrics/compute/${login}/plugins > languages > switched to repository mode`) - context = {...context, mode: "repository"} + const {owner, repo} = data.user.repositories.nodes.map(({name: repo, owner: {login: owner}}) => ({repo, owner})).shift() + context = {...context, mode: "repository", owner, repo} } //Load inputs - let {ignored, skipped, other, colors, aliases, details, threshold, limit, indepth, "analysis.timeout": timeout, sections, categories, "recent.categories": _recent_categories, "recent.load": _recent_load, "recent.days": _recent_days} = imports.metadata.plugins.languages + let {ignored, skipped, other, colors, aliases, details, threshold, limit, indepth, "indepth.custom":_indepth_custom, "analysis.timeout": _timeout_global, "analysis.timeout.repositories": _timeout_repositories, sections, categories, "recent.categories": _recent_categories, "recent.load": _recent_load, "recent.days": _recent_days} = imports.metadata.plugins.languages .inputs({ data, account, q, }) + const timeout = {global:_timeout_global, repositories:_timeout_repositories} threshold = (Number(threshold.replace(/%$/, "")) || 0) / 100 skipped.push(...data.shared["repositories.skipped"]) if (!limit) @@ -39,7 +41,7 @@ export default async function({login, data, imports, q, rest, account}, {enabled console.debug(`metrics/compute/${login}/plugins > languages > custom colors ${JSON.stringify(colors)}`) //Unique languages - const repositories = [...data.user.repositories.nodes, ...data.user.repositoriesContributedTo.nodes] + const repositories = context.mode === "repository" ? data.user.repositories.nodes : [...data.user.repositories.nodes, ...data.user.repositoriesContributedTo.nodes] const unique = new Set(repositories.flatMap(repository => repository.languages.edges.map(({node: {name}}) => name))).size //Iterate through user's repositories and retrieve languages data @@ -62,47 +64,28 @@ export default async function({login, data, imports, q, rest, account}, {enabled } //Recently used languages - if ((sections.includes("recently-used")) && (context.mode === "user") && (imports.metadata.plugins.languages.extras("indepth", {extras}))) { + if ((sections.includes("recently-used")) && (imports.metadata.plugins.languages.extras("indepth", {extras}))) { try { console.debug(`metrics/compute/${login}/plugins > languages > using recent analyzer`) - languages["stats.recent"] = await recent_analyzer({login, data, imports, rest, account}, {skipped, categories: _recent_categories ?? categories, days: _recent_days, load: _recent_load, timeout}) + languages["stats.recent"] = await recent_analyzer({login, data, imports, rest, context, account}, {skipped, categories: _recent_categories ?? categories, days: _recent_days, load: _recent_load, timeout}) Object.assign(languages.colors, languages["stats.recent"].colors) } catch (error) { - console.debug(`metrics/compute/${login}/plugins > languages > ${error}`) + console.debug(`metrics/compute/${login}/plugins > languages > recent analyzer > ${error}`) } } //Indepth mode if ((indepth) && (imports.metadata.plugins.languages.extras("indepth", {extras}))) { - //Fetch gpg keys (web-flow is GitHub's public key when making changes from web ui) - const gpg = [] - try { - for (const username of [login, "web-flow"]) { - const {data: keys} = await rest.users.listGpgKeysForUser({username}) - gpg.push(...keys.map(({key_id: id, raw_key: pub, emails}) => ({id, pub, emails}))) - if (username === login) { - for (const {email} of gpg.flatMap(({emails}) => emails)) { - console.debug(`metrics/compute/${login}/plugins > languages > auto-adding ${email} to commits_authoring (fetched from gpg)`) - data.shared["commits.authoring"].push(email) - } - } - } - } - catch (error) { - console.debug(`metrics/compute/${login}/plugins > languages > ${error}`) - } - - //Analyze languages try { console.debug(`metrics/compute/${login}/plugins > languages > switching to indepth mode (this may take some time)`) const existingColors = languages.colors - Object.assign(languages, await indepth_analyzer({login, data, imports, repositories, gpg}, {skipped, categories, timeout})) + Object.assign(languages, await indepth_analyzer({login, data, imports, rest, context, repositories:repositories.concat(_indepth_custom)}, {skipped, categories, timeout})) Object.assign(languages.colors, existingColors) - console.debug(`metrics/compute/${login}/plugins > languages > indepth analysis missed ${languages.missed.commits} commits`) + console.debug(`metrics/compute/${login}/plugins > languages > indepth analysis processed successfully ${languages.commits} and missed ${languages.missed.commits} commits in ${languages.elapsed.toFixed(2)}m`) } catch (error) { - console.debug(`metrics/compute/${login}/plugins > languages > ${error}`) + console.debug(`metrics/compute/${login}/plugins > languages > indepth analyzer > ${error}`) } } @@ -122,7 +105,7 @@ export default async function({login, data, imports, q, rest, account}, {enabled //Compute languages stats for (const {section, stats = {}, lines = {}, missed = {bytes: 0}, total = 0} of [{section: "favorites", stats: languages.stats, lines: languages.lines, total: languages.total, missed: languages.missed}, {section: "recent", ...languages["stats.recent"]}]) { - console.debug(`metrics/compute/${login}/plugins > languages > computing stats ${section}`) + console.debug(`metrics/compute/${login}/plugins > languages > formatting stats ${section}`) languages[section] = Object.entries(stats).filter(([name]) => imports.filters.text(name, ignored)).sort(([_an, a], [_bn, b]) => b - a).slice(0, limit).map(([name, value]) => ({name, value, size: value, color: languages.colors[name], x: 0})).filter(({value}) => value / total > threshold) if (other) { let value = indepth ? missed.bytes : Object.entries(stats).filter(([name]) => !Object.values(languages[section]).map(({name}) => name).includes(name)).reduce((a, [_, b]) => a + b, 0) diff --git a/source/plugins/languages/metadata.yml b/source/plugins/languages/metadata.yml index 4c09f1bd..dc40837f 100644 --- a/source/plugins/languages/metadata.yml +++ b/source/plugins/languages/metadata.yml @@ -117,13 +117,43 @@ inputs: - metrics.run.tempdir - metrics.run.git + plugin_languages_indepth_custom: + description: | + Indepth mode - Custom repositories + + Specify a list of additional repositories to analyze. + + Below are the supported syntax formats: + - `owner/repo` (e.g. `lowlighter/metrics`) + - `owner/repo@branch` (e.g. `lowlighter/metrics@main`) + - `owner/repo@branch:commits` (e.g. `lowlighter/metrics@main:v1.0..v1.1`) + - See [`git rev-list`](https://git-scm.com/docs/git-rev-list#_description) documentation for more information about `commits` syntax + + It is possible to specify repositories that are not hosted on [github.com](https://github.com) by passing a full url instead. + In this case the repository must be accessible directly. + + > â„šī¸ This option bypass [`plugin_languages_skipped`](/source/plugins/languages/README.md#plugin_languages_skipped) + type: array + format: comma-separated + default: "" + example: lowlighter/metrics, lowlighter/metrics@main, lowlighter/metrics@latest:v1.0..v1.1 + plugin_languages_analysis_timeout: description: | Indepth mode - Analysis timeout type: number default: 15 min: 1 - max: 30 + max: 60 + + plugin_languages_analysis_timeout_repositories: + description: | + Indepth mode - Analysis timeout (repositories) + type: number + default: 7.5 + min: 0 + max: 15 + zero: disable plugin_languages_categories: description: | @@ -151,7 +181,7 @@ inputs: plugin_languages_recent_load: description: | - Events to load (recently-used section) + Indepth mode - Events to load (recently-used section) type: number default: 300 min: 100 @@ -159,7 +189,7 @@ inputs: plugin_languages_recent_days: description: | - Events maximum age (day, recently-used section) + Indepth mode - Events maximum age (day, recently-used section) type: number default: 14 min: 0 diff --git a/source/templates/repository/partials/languages.ejs b/source/templates/repository/partials/languages.ejs index 14f7c0d5..ce67a1fa 100644 --- a/source/templates/repository/partials/languages.ejs +++ b/source/templates/repository/partials/languages.ejs @@ -21,6 +21,9 @@ <% if (section === "recently-used") { %> estimation from <%= plugins.languages["stats.recent"]?.files %> edited file<%= s(plugins.languages["stats.recent"]?.files) %> from <%= plugins.languages["stats.recent"]?.commits %> commit<%= s(plugins.languages["stats.recent"]?.commits) %> + <% if (plugins.languages["stats.recent"]?.branch) { %> + on branch <%= plugins.languages["stats.recent"].branch %> + <% } %> <% { const days = plugins.languages["stats.recent"]?.latest ?? plugins.languages["stats.recent"]?.days; if (days) { %> over last <%= days %> day<%= s(days) %> <% } } %> diff --git a/tests/mocks/api/github/rest/repos/get.mjs b/tests/mocks/api/github/rest/repos/get.mjs new file mode 100644 index 00000000..e26d73b1 --- /dev/null +++ b/tests/mocks/api/github/rest/repos/get.mjs @@ -0,0 +1,30 @@ +/**Mocked data */ +export default async function({faker}, target, that, [{owner, repo}]) { + console.debug("metrics/compute/mocks > mocking rest api result > rest.repos.get") + return ({ + status: 200, + url: `https://api.github.com/repos/${owner}/${repo}`, + headers: { + server: "GitHub.com", + status: "200 OK", + "x-oauth-scopes": "repo", + }, + data: { + id: faker.datatype.number(100000), + name: repo, + full_name: `${owner}/${repo}`, + private: false, + owner: { + login: owner, + id: faker.datatype.number(100000), + }, + description: faker.lorem.sentences(), + created_at: faker.date.past(), + license: { + key: "mit", + name: "MIT License", + }, + default_branch: "main", + }, + }) +} diff --git a/tests/mocks/api/github/rest/request.mjs b/tests/mocks/api/github/rest/request.mjs index 9d6e188d..8a893185 100644 --- a/tests/mocks/api/github/rest/request.mjs +++ b/tests/mocks/api/github/rest/request.mjs @@ -52,6 +52,7 @@ export default async function({faker}, target, that, args) { patch: '@@ -0,0 +1,5 @@\n+//Imports\n+ import app from "./src/app.mjs"\n+\n+//Start app\n+ await app()\n\\ No newline at end of file', }, ], + parents: [] }, }) }