Add canonical URLs and sitemap dedup for docs homepage indexability

- Emit self-referential canonical `<link>` tags on every VitePress page
- Filter duplicate /README entry from generated sitemap
- Extract DOCS_HOSTNAME constant; update Plausible test to match
- Add seo.test.ts covering canonical generation and sitemap filtering
This commit is contained in:
2026-05-10 22:19:21 -07:00
parent 30712738dc
commit b68d17614d
6 changed files with 125 additions and 3 deletions
@@ -0,0 +1,54 @@
---
id: TASK-342
title: Improve docs homepage indexability
status: Done
assignee:
- codex
created_date: '2026-05-11 04:53'
updated_date: '2026-05-11 05:06'
labels:
- docs
- seo
dependencies: []
references:
- 'https://docs.subminer.moe/'
- >-
https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls
priority: medium
---
## Description
<!-- SECTION:DESCRIPTION:BEGIN -->
Google Search Console reports https://docs.subminer.moe/ as Crawled - currently not indexed. Investigate and improve the docs-site homepage and crawl signals so the root docs page has clear unique content, self-consistent canonical metadata, and no avoidable duplicate sitemap entry from VitePress README output. Keep the change scoped to docs-site SEO/content and verify with docs tests/build plus live-style generated HTML inspection.
<!-- SECTION:DESCRIPTION:END -->
## Acceptance Criteria
<!-- AC:BEGIN -->
- [x] #1 Generated docs HTML for https://docs.subminer.moe/ declares a self-referential canonical URL and does not declare noindex.
- [x] #2 Generated sitemap does not advertise a duplicate /README docs page when the docs homepage is the intended canonical root.
- [x] #3 Docs tests cover the SEO/indexing signals so canonical and duplicate sitemap regressions are caught.
- [x] #4 Docs build/test commands pass or any blocker is documented with exact failure output.
<!-- AC:END -->
## Implementation Plan
<!-- SECTION:PLAN:BEGIN -->
1. Add failing docs-site tests for canonical head generation and sitemap duplicate filtering. 2. Update VitePress config to emit canonical URLs and exclude /README from the sitemap. 3. Strengthen docs homepage content with unique overview/decision-path copy. 4. Run docs tests/build and inspect generated HTML/sitemap.
<!-- SECTION:PLAN:END -->
## Implementation Notes
<!-- SECTION:NOTES:BEGIN -->
2026-05-11: Added docs SEO tests, root canonical generation, sitemap README filtering, and stronger docs homepage orientation copy. Verified generated index.html contains self canonical/no noindex and generated sitemap omits /README.
2026-05-11: Added changes/342-docs-indexability.md and verified changelog lint. Final verification: bun run --cwd docs-site test, bun run docs:build, bun x prettier --check touched docs/changelog files, bun run changelog:lint.
2026-05-11: User requested removal of the added homepage orientation block. Removed it, removed the matching content test, and updated the changelog fragment. Generated index.html no longer contains the removed headings while preserving canonical metadata.
<!-- SECTION:NOTES:END -->
## Final Summary
<!-- SECTION:FINAL_SUMMARY:BEGIN -->
Improved docs homepage indexability signals for https://docs.subminer.moe/. Added self-referential canonical generation for VitePress pages and filtered the duplicate /README page out of the generated sitemap. Added docs SEO regression coverage, kept the existing Plausible hostname test aligned with the shared hostname constant, and added a docs changelog fragment. Per follow-up request, the extra homepage orientation copy was removed before handoff. Verification passed: docs tests, docs build, targeted generated HTML/sitemap inspection, Prettier check for touched files, and changelog lint.
<!-- SECTION:FINAL_SUMMARY:END -->
+4
View File
@@ -0,0 +1,4 @@
type: docs
area: docs
- Improved the docs homepage indexing signals with canonical URLs and a cleaner sitemap.
+24 -1
View File
@@ -1,3 +1,15 @@
const DOCS_HOSTNAME = 'https://docs.subminer.moe';
function pageToCanonicalHref(page: string): string | null {
if (page === '404.md') return null;
const route = page
.replace(/(^|\/)index\.md$/, '')
.replace(/\.md$/, '')
.replace(/\/$/, '');
return route ? `${DOCS_HOSTNAME}/${route}` : `${DOCS_HOSTNAME}/`;
}
export default {
title: 'SubMiner Docs',
description:
@@ -34,7 +46,18 @@ export default {
appearance: 'dark',
cleanUrls: true,
metaChunk: true,
sitemap: { hostname: 'https://docs.subminer.moe' },
sitemap: {
hostname: DOCS_HOSTNAME,
transformItems(items) {
return items.filter(
(item) => item.url !== 'README' && item.url !== `${DOCS_HOSTNAME}/README`,
);
},
},
transformHead({ page }) {
const href = pageToCanonicalHref(page);
return href ? [['link', { rel: 'canonical', href }]] : [];
},
lastUpdated: true,
srcExclude: ['subagents/**'],
markdown: {
+1 -1
View File
@@ -8,7 +8,7 @@
"docs:dev": "VITE_EXTRA_EXTENSIONS=jsonc vitepress dev --host 0.0.0.0 --port 5173 --strictPort",
"docs:build": "VITE_EXTRA_EXTENSIONS=jsonc vitepress build",
"docs:preview": "VITE_EXTRA_EXTENSIONS=jsonc vitepress preview --host 0.0.0.0 --port 4173 --strictPort",
"test": "bun test plausible.test.ts index.assets.test.ts docs-sync.test.ts"
"test": "bun test plausible.test.ts index.assets.test.ts docs-sync.test.ts seo.test.ts"
},
"dependencies": {
"@catppuccin/vitepress": "^0.1.2",
+2 -1
View File
@@ -7,7 +7,8 @@ const docsConfigContents = readFileSync(docsConfigPath, 'utf8');
const docsThemeContents = readFileSync(docsThemePath, 'utf8');
test('docs site keeps docs hostname while sending plausible events to subminer.moe via worker.subminer.moe capture endpoint', () => {
expect(docsConfigContents).toContain("hostname: 'https://docs.subminer.moe'");
expect(docsConfigContents).toContain("const DOCS_HOSTNAME = 'https://docs.subminer.moe'");
expect(docsConfigContents).toContain('hostname: DOCS_HOSTNAME');
expect(docsThemeContents).toContain("const PLAUSIBLE_DOMAIN = 'subminer.moe'");
expect(docsThemeContents).toContain('const PLAUSIBLE_ENABLED_HOSTNAMES = new Set([');
expect(docsThemeContents).toContain("'docs.subminer.moe'");
+40
View File
@@ -0,0 +1,40 @@
import { expect, test } from 'bun:test';
import type { TransformContext } from 'vitepress';
import docsConfig from './.vitepress/config';
function makeTransformContext(page: string): TransformContext {
return {
page,
siteConfig: {} as TransformContext['siteConfig'],
siteData: {} as TransformContext['siteData'],
pageData: {} as TransformContext['pageData'],
title: 'SubMiner',
description: 'SubMiner docs',
head: [],
content: '',
assets: [],
};
}
test('docs pages emit stable self-referential canonical URLs', async () => {
const rootHead = await docsConfig.transformHead?.(makeTransformContext('index.md'));
const usageHead = await docsConfig.transformHead?.(makeTransformContext('usage.md'));
expect(rootHead).toContainEqual([
'link',
{ rel: 'canonical', href: 'https://docs.subminer.moe/' },
]);
expect(usageHead).toContainEqual([
'link',
{ rel: 'canonical', href: 'https://docs.subminer.moe/usage' },
]);
expect(JSON.stringify(rootHead).toLowerCase()).not.toContain('noindex');
});
test('docs sitemap excludes duplicate README page from indexable URLs', async () => {
const items = [{ url: '' }, { url: 'README' }, { url: 'usage' }];
const transformedItems = await docsConfig.sitemap?.transformItems?.(items);
expect(transformedItems?.map((item) => item.url)).toEqual(['', 'usage']);
});