From b68d17614d44a0dfa3e9d3784eb207a0b063fe22 Mon Sep 17 00:00:00 2001 From: sudacode Date: Sun, 10 May 2026 22:19:21 -0700 Subject: [PATCH] Add canonical URLs and sitemap dedup for docs homepage indexability - Emit self-referential canonical `` tags on every VitePress page - Filter duplicate /README entry from generated sitemap - Extract DOCS_HOSTNAME constant; update Plausible test to match - Add seo.test.ts covering canonical generation and sitemap filtering --- ...42 - Improve-docs-homepage-indexability.md | 54 +++++++++++++++++++ changes/342-docs-indexability.md | 4 ++ docs-site/.vitepress/config.ts | 25 ++++++++- docs-site/package.json | 2 +- docs-site/plausible.test.ts | 3 +- docs-site/seo.test.ts | 40 ++++++++++++++ 6 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 backlog/tasks/task-342 - Improve-docs-homepage-indexability.md create mode 100644 changes/342-docs-indexability.md create mode 100644 docs-site/seo.test.ts diff --git a/backlog/tasks/task-342 - Improve-docs-homepage-indexability.md b/backlog/tasks/task-342 - Improve-docs-homepage-indexability.md new file mode 100644 index 00000000..94ce1d0f --- /dev/null +++ b/backlog/tasks/task-342 - Improve-docs-homepage-indexability.md @@ -0,0 +1,54 @@ +--- +id: TASK-342 +title: Improve docs homepage indexability +status: Done +assignee: + - codex +created_date: '2026-05-11 04:53' +updated_date: '2026-05-11 05:06' +labels: + - docs + - seo +dependencies: [] +references: + - 'https://docs.subminer.moe/' + - >- + https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls +priority: medium +--- + +## Description + + +Google Search Console reports https://docs.subminer.moe/ as Crawled - currently not indexed. Investigate and improve the docs-site homepage and crawl signals so the root docs page has clear unique content, self-consistent canonical metadata, and no avoidable duplicate sitemap entry from VitePress README output. Keep the change scoped to docs-site SEO/content and verify with docs tests/build plus live-style generated HTML inspection. + + +## Acceptance Criteria + +- [x] #1 Generated docs HTML for https://docs.subminer.moe/ declares a self-referential canonical URL and does not declare noindex. +- [x] #2 Generated sitemap does not advertise a duplicate /README docs page when the docs homepage is the intended canonical root. +- [x] #3 Docs tests cover the SEO/indexing signals so canonical and duplicate sitemap regressions are caught. +- [x] #4 Docs build/test commands pass or any blocker is documented with exact failure output. + + +## Implementation Plan + + +1. Add failing docs-site tests for canonical head generation and sitemap duplicate filtering. 2. Update VitePress config to emit canonical URLs and exclude /README from the sitemap. 3. Strengthen docs homepage content with unique overview/decision-path copy. 4. Run docs tests/build and inspect generated HTML/sitemap. + + +## Implementation Notes + + +2026-05-11: Added docs SEO tests, root canonical generation, sitemap README filtering, and stronger docs homepage orientation copy. Verified generated index.html contains self canonical/no noindex and generated sitemap omits /README. + +2026-05-11: Added changes/342-docs-indexability.md and verified changelog lint. Final verification: bun run --cwd docs-site test, bun run docs:build, bun x prettier --check touched docs/changelog files, bun run changelog:lint. + +2026-05-11: User requested removal of the added homepage orientation block. Removed it, removed the matching content test, and updated the changelog fragment. Generated index.html no longer contains the removed headings while preserving canonical metadata. + + +## Final Summary + + +Improved docs homepage indexability signals for https://docs.subminer.moe/. Added self-referential canonical generation for VitePress pages and filtered the duplicate /README page out of the generated sitemap. Added docs SEO regression coverage, kept the existing Plausible hostname test aligned with the shared hostname constant, and added a docs changelog fragment. Per follow-up request, the extra homepage orientation copy was removed before handoff. Verification passed: docs tests, docs build, targeted generated HTML/sitemap inspection, Prettier check for touched files, and changelog lint. + diff --git a/changes/342-docs-indexability.md b/changes/342-docs-indexability.md new file mode 100644 index 00000000..870024c3 --- /dev/null +++ b/changes/342-docs-indexability.md @@ -0,0 +1,4 @@ +type: docs +area: docs + +- Improved the docs homepage indexing signals with canonical URLs and a cleaner sitemap. diff --git a/docs-site/.vitepress/config.ts b/docs-site/.vitepress/config.ts index 42a3c326..071c680e 100644 --- a/docs-site/.vitepress/config.ts +++ b/docs-site/.vitepress/config.ts @@ -1,3 +1,15 @@ +const DOCS_HOSTNAME = 'https://docs.subminer.moe'; + +function pageToCanonicalHref(page: string): string | null { + if (page === '404.md') return null; + + const route = page + .replace(/(^|\/)index\.md$/, '') + .replace(/\.md$/, '') + .replace(/\/$/, ''); + return route ? `${DOCS_HOSTNAME}/${route}` : `${DOCS_HOSTNAME}/`; +} + export default { title: 'SubMiner Docs', description: @@ -34,7 +46,18 @@ export default { appearance: 'dark', cleanUrls: true, metaChunk: true, - sitemap: { hostname: 'https://docs.subminer.moe' }, + sitemap: { + hostname: DOCS_HOSTNAME, + transformItems(items) { + return items.filter( + (item) => item.url !== 'README' && item.url !== `${DOCS_HOSTNAME}/README`, + ); + }, + }, + transformHead({ page }) { + const href = pageToCanonicalHref(page); + return href ? [['link', { rel: 'canonical', href }]] : []; + }, lastUpdated: true, srcExclude: ['subagents/**'], markdown: { diff --git a/docs-site/package.json b/docs-site/package.json index e1e2ffa2..033aec36 100644 --- a/docs-site/package.json +++ b/docs-site/package.json @@ -8,7 +8,7 @@ "docs:dev": "VITE_EXTRA_EXTENSIONS=jsonc vitepress dev --host 0.0.0.0 --port 5173 --strictPort", "docs:build": "VITE_EXTRA_EXTENSIONS=jsonc vitepress build", "docs:preview": "VITE_EXTRA_EXTENSIONS=jsonc vitepress preview --host 0.0.0.0 --port 4173 --strictPort", - "test": "bun test plausible.test.ts index.assets.test.ts docs-sync.test.ts" + "test": "bun test plausible.test.ts index.assets.test.ts docs-sync.test.ts seo.test.ts" }, "dependencies": { "@catppuccin/vitepress": "^0.1.2", diff --git a/docs-site/plausible.test.ts b/docs-site/plausible.test.ts index 59dba889..c1407588 100644 --- a/docs-site/plausible.test.ts +++ b/docs-site/plausible.test.ts @@ -7,7 +7,8 @@ const docsConfigContents = readFileSync(docsConfigPath, 'utf8'); const docsThemeContents = readFileSync(docsThemePath, 'utf8'); test('docs site keeps docs hostname while sending plausible events to subminer.moe via worker.subminer.moe capture endpoint', () => { - expect(docsConfigContents).toContain("hostname: 'https://docs.subminer.moe'"); + expect(docsConfigContents).toContain("const DOCS_HOSTNAME = 'https://docs.subminer.moe'"); + expect(docsConfigContents).toContain('hostname: DOCS_HOSTNAME'); expect(docsThemeContents).toContain("const PLAUSIBLE_DOMAIN = 'subminer.moe'"); expect(docsThemeContents).toContain('const PLAUSIBLE_ENABLED_HOSTNAMES = new Set(['); expect(docsThemeContents).toContain("'docs.subminer.moe'"); diff --git a/docs-site/seo.test.ts b/docs-site/seo.test.ts new file mode 100644 index 00000000..0f46a094 --- /dev/null +++ b/docs-site/seo.test.ts @@ -0,0 +1,40 @@ +import { expect, test } from 'bun:test'; +import type { TransformContext } from 'vitepress'; +import docsConfig from './.vitepress/config'; + +function makeTransformContext(page: string): TransformContext { + return { + page, + siteConfig: {} as TransformContext['siteConfig'], + siteData: {} as TransformContext['siteData'], + pageData: {} as TransformContext['pageData'], + title: 'SubMiner', + description: 'SubMiner docs', + head: [], + content: '', + assets: [], + }; +} + +test('docs pages emit stable self-referential canonical URLs', async () => { + const rootHead = await docsConfig.transformHead?.(makeTransformContext('index.md')); + const usageHead = await docsConfig.transformHead?.(makeTransformContext('usage.md')); + + expect(rootHead).toContainEqual([ + 'link', + { rel: 'canonical', href: 'https://docs.subminer.moe/' }, + ]); + expect(usageHead).toContainEqual([ + 'link', + { rel: 'canonical', href: 'https://docs.subminer.moe/usage' }, + ]); + expect(JSON.stringify(rootHead).toLowerCase()).not.toContain('noindex'); +}); + +test('docs sitemap excludes duplicate README page from indexable URLs', async () => { + const items = [{ url: '' }, { url: 'README' }, { url: 'usage' }]; + + const transformedItems = await docsConfig.sitemap?.transformItems?.(items); + + expect(transformedItems?.map((item) => item.url)).toEqual(['', 'usage']); +});