Changes
12 changed files (+540/-531)
-
-
@@ -8,11 +8,18 @@ DocumentMetadata,FileReader, } from "../types.ts"; export type ContentParseResult< Content extends DocumentContent = DocumentContent, > = Content | { documentContent: Content; documentMetadata: DocumentMetadata; }; export interface ParseParameters { fileReader: FileReader; documentMetadata: DocumentMetadata; } export interface ContentParser { parse(params: ParseParameters): Promise<DocumentContent>; parse(params: ParseParameters): Promise<ContentParseResult>; }
-
-
-
@@ -4,12 +4,9 @@ // SPDX-License-Identifier: Apache-2.0import { assertObjectMatch } from "../deps/deno.land/std/assert/mod.ts"; import { VaultParser } from "../metadata_parser/vault_parser.ts"; import { MemoryFsReader } from "../filesystem_reader/memory_fs.ts"; import { ObsidianMarkdownParser } from "./obsidian_markdown.ts"; import type { DocumentMetadata, FileReader } from "../types.ts"; const metadataParser = new VaultParser(); import type { FileReader } from "../types.ts"; Deno.test("Should parse CommonMark syntax", async () => { const fs = new MemoryFsReader([
-
@@ -27,28 +24,87 @@ const fileReader =(await (fs.getRootDirectory().then((dir) => dir.read()).then((entries) => entries[0] ))) as FileReader; const documentMetadata = (await metadataParser.parse(fileReader)) as DocumentMetadata; const parser = new ObsidianMarkdownParser(); const content = await parser.parse({ documentMetadata, fileReader }); const content = await parser.parse({ documentMetadata: { title: "Test", name: "Test", }, fileReader, }); assertObjectMatch(content.content, { type: "root", children: [ { type: "heading", depth: 1, }, { type: "heading", depth: 2, }, { type: "heading", depth: 3, assertObjectMatch( "documentContent" in content ? content.documentContent.content : content.content, { type: "root", children: [ { type: "heading", depth: 1, }, { type: "heading", depth: 2, }, { type: "heading", depth: 3, }, ], }, ); }); Deno.test("Should parse YAML frontmatter", async () => { const fs = new MemoryFsReader([ { path: "Test.md", content: `--- title: "Alice's blog post #1" name: alice-blog-01 lang: en-US --- # H1 `, }, ]); const fileReader = (await (fs.getRootDirectory().then((dir) => dir.read()).then((entries) => entries[0] ))) as FileReader; const parser = new ObsidianMarkdownParser({ frontmatter: true }); const content = await parser.parse({ documentMetadata: { title: "Test", name: "Test", }, fileReader, }); assertObjectMatch(content, { documentContent: { content: { type: "root", children: [ { type: "heading", depth: 1, }, ], }, ], }, documentMetadata: { title: "Alice's blog post #1", name: "alice-blog-01", language: "en-US", }, }); });
-
-
-
@@ -2,24 +2,97 @@ // SPDX-FileCopyrightText: 2024 Shota FUJI <pockawoooh@gmail.com>// // SPDX-License-Identifier: Apache-2.0 import * as yamlFrontmatter from "../deps/deno.land/std/front_matter/yaml.ts"; import type * as Mdast from "../deps/esm.sh/mdast/types.ts"; import { fromMarkdown } from "../deps/esm.sh/mdast-util-from-markdown/mod.ts"; import type { ContentParser, ParseParameters } from "./interface.ts"; import type { ContentParser, ContentParseResult, ParseParameters, } from "./interface.ts"; import type { DocumentContent } from "../types.ts"; function getFrontMatterValue( frontmatter: Record<string, unknown>, key: string, ): string | undefined { if (!(key in frontmatter)) { return undefined; } const value = frontmatter[key]; if (typeof value !== "string") { return undefined; } return value; } export type ObsidianMarkdownDocument = DocumentContent< "obsidian_markdown", Mdast.Nodes >; export interface ObsidianMarkdownParserOptions { /** * Whether to enable reading of YAML frontmatter. * * ## `title` * * Use property value as a document title. * * ## `name` * * Use property value as a document name. * * ## `lang` / `language` * * Use property value as a document language. * * @default false */ frontmatter?: boolean; } export class ObsidianMarkdownParser implements ContentParser { #frontmatter: boolean; constructor({ frontmatter = false }: ObsidianMarkdownParserOptions = {}) { this.#frontmatter = frontmatter; } async parse( { fileReader }: ParseParameters, ): Promise<ObsidianMarkdownDocument> { { fileReader, documentMetadata }: ParseParameters, ): Promise<ContentParseResult<ObsidianMarkdownDocument>> { const bytes = await fileReader.read(); if (!this.#frontmatter) { return { kind: "obsidian_markdown", content: fromMarkdown(bytes), }; } const frontmatter = yamlFrontmatter.extract( new TextDecoder().decode(bytes), ); const name = getFrontMatterValue(frontmatter.attrs, "name"); const title = getFrontMatterValue(frontmatter.attrs, "title"); const lang = getFrontMatterValue(frontmatter.attrs, "lang") || getFrontMatterValue(frontmatter.attrs, "language"); return { kind: "obsidian_markdown", content: fromMarkdown(await fileReader.read()), documentMetadata: { name: name || documentMetadata.name, title: title || documentMetadata.title, language: lang || documentMetadata.language, }, documentContent: { kind: "obsidian_markdown", content: fromMarkdown(frontmatter.body), }, }; } }
-
-
-
@@ -7,7 +7,6 @@ assertObjectMatch,unreachable, } from "../deps/deno.land/std/assert/mod.ts"; import { VaultParser } from "../metadata_parser/vault_parser.ts"; import { MemoryFsReader } from "../filesystem_reader/memory_fs.ts"; import type { ContentParser } from "./interface.ts";
-
@@ -36,8 +35,6 @@ { path: "foo.md", content: "foo" },{ path: "bar.md", content: "bar" }, ]); const metadataParser = new VaultParser(); const parser = oneof( literal("foo"), literal("bar"),
-
@@ -52,15 +49,11 @@ if (item.type !== "file") {unreachable("MemoryFS gave a directory where expecting a file"); } const metadata = await metadataParser.parse(item); if ("skip" in metadata) { unreachable( "Metadata Parser skipped where it expected to return metadata", ); } const content = await parser.parse({ documentMetadata: metadata, documentMetadata: { name: item.name, title: item.name, }, fileReader: item, });
-
-
-
@@ -4,11 +4,16 @@ // SPDX-License-Identifier: Apache-2.0import { DenoFsReader } from "../filesystem_reader/deno_fs.ts"; import { DenoFsWriter } from "../filesystem_writer/deno_fs.ts"; import { DefaultTreeBuilder } from "../tree_builder/default_tree_builder.ts"; import { DefaultTreeBuilder, ignore, ignoreDotfiles, langDir, removeExtFromMetadata, } from "../tree_builder/default_tree_builder.ts"; import { ObsidianMarkdownParser } from "../content_parser/obsidian_markdown.ts"; import { JSONCanvasParser } from "../content_parser/json_canvas.ts"; import { oneof } from "../content_parser/oneof.ts"; import { VaultParser } from "../metadata_parser/vault_parser.ts"; import { DefaultThemeBuilder } from "../page_builder/default_theme/builder.tsx"; const outDir = new URL("./.dist", import.meta.url);
-
@@ -31,48 +36,25 @@ const fileSystemReader = new DenoFsReader(srcDir);const fileSystemWriter = new DenoFsWriter(outDir); const treeBuilder = new DefaultTreeBuilder({ defaultLanguage: "en", ignore(node) { return node.name.startsWith(".") || (node.path.length === 1 && node.name.endsWith(".ts")); }, strategies: [ ignoreDotfiles(), // Remove build related scripts ignore((node) => node.type === "file" && node.name.endsWith(".ts")), removeExtFromMetadata(), langDir({ en: "English", ja: "日本語", }, true), ], }); const contentParser = oneof( new JSONCanvasParser(), new ObsidianMarkdownParser(), ); const metadataParser = new VaultParser({ override(node) { if ( node.parent.type !== "root" || node.type !== "directory" || !(/^[a-z]+(-[a-z]+)*$/.test(node.name)) ) { return null; } switch (node.name) { case "ja": return { title: "日本語", language: node.name, }; case "en": return { title: "English", language: node.name, }; default: { return { language: node.name, }; } } }, }); const pageBuilder = new DefaultThemeBuilder("© 2024 Shota FUJI"); const documentTree = await treeBuilder.build({ fileSystemReader, metadataParser, contentParser, }); await pageBuilder.build({
-
-
-
@@ -2,30 +2,27 @@ {"nodes":[ {"id":"16ecec9768556d1e","type":"group","x":-540,"y":-1240,"width":1580,"height":1280,"color":"2","label":"Macana"}, {"id":"cef7273c31316035","type":"group","x":10,"y":-1560,"width":740,"height":220,"label":"Legends"}, {"id":"1f5682267b09b226","type":"text","text":"# Tree Builder\nResponsible for generating a *document tree* by accessing *FileSystem reader*.","x":0,"y":-860,"width":340,"height":140,"color":"5"}, {"id":"666a11c598e85979","type":"text","text":"# User script\nDeno script user wrote.","x":-920,"y":-554,"width":320,"height":128,"color":"5"}, {"id":"ecd9e84968c62b30","type":"text","text":"# Core\nAct as an endpoint for the Macana API. Schedule and coordinates various modules.","x":-420,"y":-580,"width":384,"height":180,"color":"5"}, {"id":"b47e4fabfd3da80f","type":"text","text":"# CLI","x":-505,"y":-126,"width":250,"height":100,"color":"1"}, {"id":"ed3a9fdae2c1eb0e","type":"text","text":"# User Agent\nMostly web browser.","x":-255,"y":120,"width":300,"height":160}, {"id":"d27605d3bbfcf821","type":"text","text":"# Dev Server","x":-210,"y":-119,"width":210,"height":86,"color":"1"}, {"id":"eea88958e6425901","type":"text","text":"# Obsidian Vault\nPage source data is stored as Markdown files in plain arbitrary directory.","x":1100,"y":-812,"width":300,"height":184}, {"id":"a4415114ae6abd10","type":"text","text":"# Module\nBox with this color indicates the box is a module.","x":30,"y":-1520,"width":280,"height":140,"color":"5"}, {"id":"985228be1f82ddfd","type":"text","text":"# Program\nBox with this color indicates the box is an executable.","x":388,"y":-1520,"width":280,"height":140,"color":"1"}, {"id":"bcf0d415d8a93b32","type":"text","text":"# FileSystem Reader\nResponsible for listing, reading, and watching directory or file. This module can only operate inside a source directory.","x":640,"y":-860,"width":340,"height":188,"color":"5"}, {"id":"d27605d3bbfcf821","type":"text","text":"# Dev Server","x":-210,"y":-119,"width":210,"height":86,"color":"1"}, {"id":"d9c3ec6b020b2ca1","type":"text","text":"# Content Parser\nResponsible for parsing a file and returning a *document content* and optional *document metadata*.\n","x":-505,"y":-1220,"width":480,"height":160,"color":"5"}, {"id":"1f5682267b09b226","type":"text","text":"# Tree Builder\nResponsible for generating a *document tree* by accessing *FileSystem reader*. Tree Builder generates *document metadata* from filesystem information, such as filename directory name, and structure. If *Content Parser* returns *document metadata* in addition to the *document content*, *Tree Builder* override the own generated metadata with the returned metadata.","x":179,"y":-1187,"width":498,"height":267,"color":"5"}, {"id":"ecd9e84968c62b30","type":"text","text":"# Core\nAct as an endpoint for the Macana API. Schedule and coordinates various modules.","x":-420,"y":-580,"width":384,"height":180,"color":"5"}, {"id":"ed3a9fdae2c1eb0e","type":"text","text":"# User Agent\nMostly web browser.","x":-255,"y":120,"width":300,"height":160}, {"id":"eea88958e6425901","type":"text","text":"# Obsidian Vault\nPage source data is stored as Markdown files in plain arbitrary directory.","x":1100,"y":-812,"width":300,"height":184}, {"id":"d9484aaae39c7cfd","type":"text","text":"# FileSystem Writer\nResponsible for creating and writing directory or file. This can only operate inside an output directory.","x":96,"y":-170,"width":324,"height":188,"color":"5"}, {"id":"b4d105b9f43d32e4","type":"text","text":"# Generated site\nHTML/CSS/JS files, along with RSS feed and other site metadata things.","x":100,"y":120,"width":320,"height":160}, {"id":"c637b07c530db189","type":"text","text":"# Page Builder\nResponsible for generating a HTML page from a *document tree* and a *document*.","x":88,"y":-480,"width":340,"height":160,"color":"5"}, {"id":"7e6026f0fc91341d","type":"text","text":"# Metadata Parser\nResponsible for parsing a file or a directory and returning a *document metadata*.\n","x":258,"y":-1180,"width":480,"height":160,"color":"5"}, {"id":"d9c3ec6b020b2ca1","type":"text","text":"# Content Parser\nResponsible for parsing a file and returning a *document content*.\n","x":-345,"y":-1180,"width":480,"height":160,"color":"5"} {"id":"c637b07c530db189","type":"text","text":"# Page Builder\nResponsible for generating a HTML page from a *document tree* and a *document*.","x":88,"y":-480,"width":340,"height":160,"color":"5"} ], "edges":[ {"id":"9d28e2e189fcc6d1","fromNode":"ed3a9fdae2c1eb0e","fromSide":"top","toNode":"d27605d3bbfcf821","toSide":"bottom","fromEnd":"arrow","label":"HTTP"}, {"id":"2502c36efb8650a2","fromNode":"1f5682267b09b226","fromSide":"top","toNode":"7e6026f0fc91341d","toSide":"bottom","color":"1","label":"File / Directory"}, {"id":"d283db6e20fddc67","fromNode":"7e6026f0fc91341d","fromSide":"right","toNode":"1f5682267b09b226","toSide":"right","color":"6","label":"Document metadata"}, {"id":"05e0aed4c96b7b73","fromNode":"ecd9e84968c62b30","fromSide":"top","toNode":"1f5682267b09b226","toSide":"left","color":"1","label":"Build request"}, {"id":"05e0aed4c96b7b73","fromNode":"ecd9e84968c62b30","fromSide":"top","toNode":"1f5682267b09b226","toSide":"bottom","color":"1","label":"Build request"}, {"id":"67d567fd897faaa5","fromNode":"1f5682267b09b226","fromSide":"bottom","toNode":"c637b07c530db189","toSide":"top","color":"3","label":"Document tree"}, {"id":"8284d0da786f676b","fromNode":"c637b07c530db189","fromSide":"bottom","toNode":"d9484aaae39c7cfd","toSide":"top","color":"4","label":"Generated files"}, {"id":"7b4d9467cbcec90e","fromNode":"bcf0d415d8a93b32","fromSide":"left","toNode":"1f5682267b09b226","toSide":"bottom","color":"1","label":"File / Directory"}, {"id":"7b4d9467cbcec90e","fromNode":"bcf0d415d8a93b32","fromSide":"top","toNode":"1f5682267b09b226","toSide":"right","color":"1","label":"File / Directory"}, {"id":"a0543dccaeca0491","fromNode":"bcf0d415d8a93b32","fromSide":"bottom","toNode":"c637b07c530db189","toSide":"right","color":"3","label":"File"}, {"id":"0e152fdaa4fc5b8b","fromNode":"d9484aaae39c7cfd","fromSide":"bottom","toNode":"b4d105b9f43d32e4","toSide":"top"}, {"id":"b5b8d60304fb4839","fromNode":"d27605d3bbfcf821","fromSide":"top","toNode":"ecd9e84968c62b30","toSide":"bottom"},
-
@@ -33,7 +30,7 @@ {"id":"372c5518dfc96cc2","fromNode":"b47e4fabfd3da80f","fromSide":"top","toNode":"ecd9e84968c62b30","toSide":"bottom"},{"id":"d1dfb8d829247781","fromNode":"d27605d3bbfcf821","fromSide":"right","toNode":"d9484aaae39c7cfd","toSide":"left"}, {"id":"8af32a521e2be033","fromNode":"eea88958e6425901","fromSide":"left","toNode":"bcf0d415d8a93b32","toSide":"right"}, {"id":"a2dff019b740cad3","fromNode":"666a11c598e85979","fromSide":"right","toNode":"ecd9e84968c62b30","toSide":"left"}, {"id":"1c69fcf3635a69ef","fromNode":"1f5682267b09b226","fromSide":"top","toNode":"d9c3ec6b020b2ca1","toSide":"bottom","color":"1","label":"File"}, {"id":"ad5ba77ef1e159bf","fromNode":"d9c3ec6b020b2ca1","fromSide":"left","toNode":"1f5682267b09b226","toSide":"left","color":"6","label":"Document content"} {"id":"1c69fcf3635a69ef","fromNode":"1f5682267b09b226","fromSide":"left","toNode":"d9c3ec6b020b2ca1","toSide":"right","color":"1","label":"File"}, {"id":"ad5ba77ef1e159bf","fromNode":"d9c3ec6b020b2ca1","fromSide":"bottom","toNode":"1f5682267b09b226","toSide":"left","color":"6","label":"Document content, document metadata"} ] }
-
-
metadata_parser/interface.ts (deleted)
-
@@ -1,27 +0,0 @@// SPDX-FileCopyrightText: 2024 Shota FUJI <pockawoooh@gmail.com> // // SPDX-License-Identifier: Apache-2.0 import type { DirectoryReader, DocumentMetadata, FileReader, } from "../types.ts"; /** * Skip this parser. * If no parsers left, do not include the file in a document tree. */ export interface Skip { readonly skip: true; } export interface MetadataParser { /** * Parses a file or directory then returns metadata for the file or directory. * Throws when the file or directory does not meet the expectation. */ parse( fileOrDirectory: FileReader | DirectoryReader, ): Promise<DocumentMetadata | Skip>; }
-
-
metadata_parser/vault_parser.test.ts (deleted)
-
@@ -1,234 +0,0 @@// SPDX-FileCopyrightText: 2024 Shota FUJI <pockawoooh@gmail.com> // // SPDX-License-Identifier: Apache-2.0 import { assertObjectMatch } from "../deps/deno.land/std/assert/mod.ts"; import { MemoryFsReader } from "../filesystem_reader/memory_fs.ts"; import { VaultParser } from "./vault_parser.ts"; import type { DirectoryReader } from "../types.ts"; Deno.test("Should use filename as title", async () => { const fs = new MemoryFsReader([ { path: "foo.md", content: "", }, ]); const root = await fs.getRootDirectory(); const [file] = await root.read(); assertObjectMatch(await new VaultParser().parse(file), { name: "foo", title: "foo", }); }); Deno.test("Should use directory name as title", async () => { const fs = new MemoryFsReader([ { path: "bar/foo.md", content: "", }, ]); const root = await fs.getRootDirectory(); const [dir] = await root.read(); assertObjectMatch(await new VaultParser().parse(dir), { name: "bar", title: "bar", }); }); Deno.test("Should parse canvas file", async () => { const fs = new MemoryFsReader([ { path: "foo.canvas", content: "", }, ]); const root = await fs.getRootDirectory(); const [file] = await root.read(); assertObjectMatch(await new VaultParser().parse(file), { name: "foo", title: "foo", }); }); Deno.test("Should skip files other than note and canvas", async () => { const fs = new MemoryFsReader([ { path: "main.tsx", content: "", }, ]); const root = await fs.getRootDirectory(); const [file] = await root.read(); assertObjectMatch(await new VaultParser().parse(file), { skip: true }); }); Deno.test("Should use name defined in YAML frontmatter", async () => { const fs = new MemoryFsReader([ { path: "Foo Bar.md", content: `--- name: foo-bar ---`, }, ]); const root = await fs.getRootDirectory(); const [file] = await root.read(); assertObjectMatch( await new VaultParser({ readFrontMatter: true }).parse(file), { name: "foo-bar", title: "Foo Bar", }, ); }); Deno.test("Should use title defined in YAML frontmatter", async () => { const fs = new MemoryFsReader([ { path: "Foo Bar.md", content: `--- title: Baz ---`, }, ]); const root = await fs.getRootDirectory(); const [file] = await root.read(); assertObjectMatch( await new VaultParser({ readFrontMatter: true }).parse(file), { name: "Foo Bar", title: "Baz", }, ); }); Deno.test("Should use language defined in YAML frontmatter", async () => { const fs = new MemoryFsReader([ { path: "Foo Bar.md", content: `--- lang: en ---`, }, ]); const root = await fs.getRootDirectory(); const [file] = await root.read(); assertObjectMatch( await new VaultParser({ readFrontMatter: true }).parse(file), { name: "Foo Bar", title: "Foo Bar", language: "en", }, ); }); Deno.test("Should use both name and title defined in YAML frontmatter", async () => { const fs = new MemoryFsReader([ { path: "Foo Bar.md", content: `--- name: foo-bar title: Baz ---`, }, ]); const root = await fs.getRootDirectory(); const [file] = await root.read(); assertObjectMatch( await new VaultParser({ readFrontMatter: true }).parse(file), { name: "foo-bar", title: "Baz", }, ); }); Deno.test("Should not read frontmatter if the flag is not on", async () => { const fs = new MemoryFsReader([ { path: "Foo Bar.md", content: `--- name: foo-bar title: Baz ---`, }, ]); const root = await fs.getRootDirectory(); const [file] = await root.read(); assertObjectMatch( await new VaultParser().parse(file), { name: "Foo Bar", title: "Foo Bar", }, ); }); Deno.test("Should overrides", async () => { const fs = new MemoryFsReader([ { path: "en/Foo Bar.md", content: "", }, { path: "ja/Foo Bar.md", content: "", }, ]); const root = await fs.getRootDirectory(); const [en, ja] = await root.read(); const parser = new VaultParser({ override(node) { if (node.type !== "directory" || !(/^(en|ja)$/.test(node.name))) { return null; } return { title: node.name === "ja" ? "日本語" : "English", language: node.name, }; }, }); assertObjectMatch( await parser.parse(en as DirectoryReader), { name: "en", title: "English", language: "en", }, ); assertObjectMatch( await parser.parse(ja), { name: "ja", title: "日本語", language: "ja", }, ); });
-
-
metadata_parser/vault_parser.ts (deleted)
-
@@ -1,136 +0,0 @@// SPDX-FileCopyrightText: 2024 Shota FUJI <pockawoooh@gmail.com> // // SPDX-License-Identifier: Apache-2.0 import { extname } from "../deps/deno.land/std/path/mod.ts"; import * as yamlFrontmatter from "../deps/deno.land/std/front_matter/yaml.ts"; import type { MetadataParser, Skip } from "./interface.ts"; import type { DirectoryReader, DocumentMetadata, FileReader, } from "../types.ts"; function getFrontMatterValue( frontmatter: Record<string, unknown>, key: string, ): string | undefined { if (!(key in frontmatter)) { return undefined; } const value = frontmatter[key]; if (typeof value !== "string") { return undefined; } return value; } export interface VaultParserOptions { /** * Whether to read YAML frontmatter of notes. * When enabled, * * - Use `name` property for document name if defined. * - Use `title` property for document title if defined. * - Use `lang` property or `language` property as a document language if defined. * * This flag is off by-default for performance reasons. */ readFrontMatter?: boolean; /** * An optional function to override default parsing behaviour. * If this function returned partial of metadata, Macana prefers it over default parsed metadata. * Metadata extracted from YAML frontmatters overrides the metadata this function returned. */ override?( node: FileReader | DirectoryReader, ): Partial<DocumentMetadata> | false | null | undefined; } /** * A parser for Obsidian Vault. * * By default, this parser uses file and directory name as document title * and lowercased escaped one as document name. */ export class VaultParser implements MetadataParser { #readFrontMatter: boolean; #override: VaultParserOptions["override"]; constructor({ readFrontMatter = false, override }: VaultParserOptions = {}) { this.#readFrontMatter = readFrontMatter; this.#override = override; } async parse( node: FileReader | DirectoryReader, ): Promise<DocumentMetadata | Skip> { const overrides = this.#override?.(node) || null; if (node.type === "directory") { return { name: overrides?.name || node.name, title: overrides?.title || node.name, language: overrides?.language, }; } const ext = extname(node.name); const basename = ext ? node.name.slice(0, -ext.length) : node.name; switch (ext) { case ".md": { const fromFileName: DocumentMetadata = { name: overrides?.name || basename, title: overrides?.title || basename, language: overrides?.language, }; if (this.#readFrontMatter) { const parsed = await this.#parseFrontMatter(node); return { name: parsed.name || fromFileName.name, title: parsed.title || fromFileName.title, language: parsed.language || fromFileName.language, }; } return fromFileName; } case ".canvas": { return { name: overrides?.name || basename, title: overrides?.title || basename, language: overrides?.language, }; } // Not an Obsidian document. default: { return { skip: true, }; } } } async #parseFrontMatter( file: FileReader, ): Promise<Partial<DocumentMetadata>> { const markdown = new TextDecoder().decode(await file.read()); // Obsidian currently supports YAML frontmatter only. const frontmatter = yamlFrontmatter.extract(markdown); const name = getFrontMatterValue(frontmatter.attrs, "name"); const title = getFrontMatterValue(frontmatter.attrs, "title"); const language = getFrontMatterValue(frontmatter.attrs, "lang") || getFrontMatterValue(frontmatter.attrs, "language"); return { name, title, language }; } }
-
-
-
@@ -4,13 +4,20 @@ // SPDX-License-Identifier: Apache-2.0import { assertEquals, assertNotEquals, assertObjectMatch, } from "../deps/deno.land/std/assert/mod.ts"; import { MemoryFsReader } from "../filesystem_reader/memory_fs.ts"; import { VaultParser } from "../metadata_parser/vault_parser.ts"; import { noopParser } from "../content_parser/noop.ts"; import { DefaultTreeBuilder } from "./default_tree_builder.ts"; import { DefaultTreeBuilder, fileExtensions, ignore, ignoreDotfiles, langDir, removeExtFromMetadata, } from "./default_tree_builder.ts"; const contentParser = noopParser;
-
@@ -19,12 +26,10 @@ const fileSystemReader = new MemoryFsReader([{ path: "Foo Bar/Baz Qux.md", content: "" }, { path: "Foo.md", content: "" }, ]); const metadataParser = new VaultParser(); const builder = new DefaultTreeBuilder({ defaultLanguage: "en" }); const tree = await builder.build({ fileSystemReader, metadataParser, contentParser, });
-
@@ -39,8 +44,8 @@ },entries: [ { metadata: { name: "Baz Qux", title: "Baz Qux", name: "Baz Qux.md", title: "Baz Qux.md", }, file: { name: "Baz Qux.md",
-
@@ -51,8 +56,8 @@ });assertObjectMatch(tree.nodes[1], { metadata: { name: "Foo", title: "Foo", name: "Foo.md", title: "Foo.md", }, file: { name: "Foo.md",
-
@@ -60,25 +65,59 @@ },}); }); Deno.test("Should ignore files and directories matches to `ignore` callback", async () => { Deno.test("Should respect metadata returned by Content Parser", async () => { const fileSystemReader = new MemoryFsReader([ { path: "Foo.md", content: "" }, ]); const builder = new DefaultTreeBuilder({ defaultLanguage: "en" }); const tree = await builder.build({ fileSystemReader, contentParser: { async parse() { return { documentContent: { kind: "null", content: null, }, documentMetadata: { title: "Brown fox", name: "jumps over", language: "lazy-dog", }, }; }, }, }); assertObjectMatch(tree.nodes[0], { metadata: { title: "Brown fox", name: "jumps over", language: "lazy-dog", }, file: { name: "Foo.md", }, }); }); Deno.test("ignore() and ignoreDotfiles() should ignore files and directories", async () => { const fileSystemReader = new MemoryFsReader([ { path: "foo/bar/baz.md", content: "" }, { path: "foo/bar.md", content: "" }, { path: "foo.md", content: "" }, { path: "bar/foo.md", content: "" }, { path: "bar/foo/baz.md", content: "" }, { path: ".baz.md", content: "" }, ]); const metadataParser = new VaultParser(); const builder = new DefaultTreeBuilder({ defaultLanguage: "en", ignore(node) { return node.name === "foo"; }, strategies: [ignore((node) => node.name === "foo"), ignoreDotfiles()], }); const tree = await builder.build({ fileSystemReader, metadataParser, contentParser, });
-
@@ -86,8 +125,8 @@ assertEquals(tree.nodes.length, 2);assertObjectMatch(tree.nodes[0], { metadata: { name: "foo", title: "foo", name: "foo.md", title: "foo.md", }, file: { name: "foo.md" }, });
-
@@ -103,8 +142,8 @@ },entries: [ { metadata: { name: "foo", title: "foo", name: "foo.md", title: "foo.md", }, file: { name: "foo.md",
-
@@ -113,3 +152,148 @@ },], }); }); Deno.test("fileExtensions() should ignore files not matching the extension list", async () => { const fileSystemReader = new MemoryFsReader([ { path: "foo.md", content: "" }, { path: "bar.txt", content: "" }, { path: "baz.html", content: "" }, { path: "qux.canvas", content: "" }, { path: "quux.jpeg", content: "" }, ]); const builder = new DefaultTreeBuilder({ defaultLanguage: "en", strategies: [fileExtensions([".md", ".canvas"])], }); const tree = await builder.build({ fileSystemReader, contentParser, }); assertEquals(tree.nodes.length, 2); assertObjectMatch(tree.nodes[0], { metadata: { name: "foo.md", title: "foo.md", }, file: { name: "foo.md" }, }); assertObjectMatch(tree.nodes[1], { metadata: { name: "qux.canvas", title: "qux.canvas", }, file: { name: "qux.canvas" }, }); }); Deno.test("langDir() should treat directories matching to the record as lang directory", async () => { const fileSystemReader = new MemoryFsReader([ { path: "en.md", content: "" }, { path: "en/foo.md", content: "" }, { path: "ja/foo.md", content: "" }, ]); const builder = new DefaultTreeBuilder({ defaultLanguage: "en", strategies: [langDir({ en: "English", ja: "日本語" })], }); const tree = await builder.build({ fileSystemReader, contentParser, }); assertObjectMatch(tree.nodes[0], { metadata: { name: "en.md", title: "en.md", }, file: { name: "en.md" }, }); assertNotEquals(tree.nodes[0].metadata.language, "en"); assertObjectMatch(tree.nodes[1], { metadata: { name: "en", title: "English", language: "en", }, directory: { name: "en", }, entries: [ { metadata: { name: "foo.md", title: "foo.md", }, file: { name: "foo.md" }, }, ], }); assertObjectMatch(tree.nodes[2], { metadata: { name: "ja", title: "日本語", language: "ja", }, directory: { name: "ja", }, entries: [ { metadata: { name: "foo.md", title: "foo.md", }, file: { name: "foo.md" }, }, ], }); }); Deno.test("removeExtFromMetadata() should remove file extension from document metadata", async () => { const fileSystemReader = new MemoryFsReader([ { path: "Foo/Bar.secret/Baz.md", content: "" }, ]); const builder = new DefaultTreeBuilder({ defaultLanguage: "en", strategies: [removeExtFromMetadata()], }); const tree = await builder.build({ fileSystemReader, contentParser, }); assertObjectMatch(tree.nodes[0], { metadata: { name: "Foo", title: "Foo", }, directory: { name: "Foo" }, entries: [ { metadata: { name: "Bar.secret", title: "Bar.secret", }, directory: { name: "Bar.secret" }, entries: [ { metadata: { name: "Baz", title: "Baz", }, file: { name: "Baz.md" }, }, ], }, ], }); });
-
-
-
@@ -2,15 +2,131 @@ // SPDX-FileCopyrightText: 2024 Shota FUJI <pockawoooh@gmail.com>// // SPDX-License-Identifier: Apache-2.0 import { extname } from "../deps/deno.land/std/path/mod.ts"; import type { BuildParameters, TreeBuilder } from "./interface.ts"; import type { DirectoryReader, Document, DocumentDirectory, DocumentMetadata, DocumentTree, FileReader, } from "../types.ts"; export type TreeBuildStrategyFunctionReturns = { skip: true; } | { skip?: false; metadata: DocumentMetadata; }; export interface TreeBuildStrategy { ( fileOrDirectory: FileReader | DirectoryReader, metadata: DocumentMetadata, ): | TreeBuildStrategyFunctionReturns | Promise<TreeBuildStrategyFunctionReturns>; } /** * Only accepts files having specific file extension. * Files not having an extension in the list will be excluded from a document tree. * * @param exts - A list of file extensions, including leading dot. */ export function fileExtensions(exts: readonly string[]): TreeBuildStrategy { return (node, metadata) => { if (node.type !== "file") { return { metadata }; } if (exts.some((ext) => node.name.endsWith(ext))) { return { metadata }; } return { skip: true }; }; } /** * Excludes certain files and directories. * * @param f - If this function returned `true`, the file will be excluded from a document tree. */ export function ignore( f: (fileOrDirectory: FileReader | DirectoryReader) => boolean, ): TreeBuildStrategy { return (node, metadata) => { if (f(node)) { return { skip: true }; } return { metadata }; }; } /** * Excludes dotfiles from a document tree. */ export function ignoreDotfiles(): TreeBuildStrategy { return ignore((node) => node.name.startsWith(".")); } export function langDir( langs: Record<string, string>, topLevelOnly: boolean = false, ): TreeBuildStrategy { const map = new Map(Object.entries(langs)); return (node, metadata) => { if (topLevelOnly && node.parent.type !== "root") { return { metadata }; } if (node.type !== "directory") { return { metadata }; } const title = map.get(node.name); if (!title) { return { metadata }; } return { metadata: { ...metadata, title, language: node.name, }, }; }; } /** * Do not include file extension in the metadata. * For example, "Bar.md" will be `{ title: "Bar", name: "Bar" }`. */ export function removeExtFromMetadata(): TreeBuildStrategy { return (node, metadata) => { if (node.type !== "file") { return { metadata }; } const ext = extname(node.name); const stem = ext ? node.name.slice(0, -ext.length) : node.name; return { metadata: { ...metadata, title: stem, name: stem, }, }; }; } export interface DefaultTreeBuilderConfig { /** * Default language tag (BCP 47).
-
@@ -18,33 +134,30 @@ */defaultLanguage: string; /** * Callback function to be invoked on every file and directory. * If this function returned true, the file or directory is skipped and does not * appear on the resulted document tree. * A list of callback functions that control whether a file or a directory should be * included in the document tree and override document metadata. */ ignore?(fileOrDirectory: FileReader | DirectoryReader): boolean; strategies?: readonly TreeBuildStrategy[]; } export class DefaultTreeBuilder implements TreeBuilder { #defaultLanguage: string; #ignore?: (fileOrDirectory: FileReader | DirectoryReader) => boolean; #strategies: readonly TreeBuildStrategy[]; constructor({ defaultLanguage, ignore }: DefaultTreeBuilderConfig) { constructor({ defaultLanguage, strategies }: DefaultTreeBuilderConfig) { this.#defaultLanguage = defaultLanguage; this.#ignore = ignore; this.#strategies = strategies || []; } async build( { fileSystemReader, metadataParser, contentParser }: BuildParameters, { fileSystemReader, contentParser }: BuildParameters, ): Promise<DocumentTree> { const root = await fileSystemReader.getRootDirectory(); const children = await root.read(); const entries = await Promise.all( children.map((child) => this.#build(child, { metadataParser, contentParser }) ), children.map((child) => this.#build(child, { contentParser })), ); return {
-
@@ -58,37 +171,40 @@ }async #build( node: FileReader | DirectoryReader, { metadataParser, contentParser }: Omit< { contentParser }: Omit< BuildParameters, "fileSystemReader" >, parentPath: readonly string[] = [], ): Promise<DocumentDirectory | Document | null> { if (this.#ignore && this.#ignore(node)) { // TODO: Debug log return null; } let metadata: DocumentMetadata = { name: node.name, title: node.name, }; const metadata = await metadataParser.parse(node); for (const strategy of this.#strategies) { const result = await strategy(node, metadata); if (result.skip) { // TODO: Debug log (or this should be in the each strategies?) return null; } // This SHOULD have check for `metadata.skip` being `true`. However, a bug // (or "feature") in TypeScript breaks type-narrowing by doing so. if ("skip" in metadata) { // TODO: Debug log return null; metadata = result.metadata; } if (node.type === "file") { const content = await contentParser.parse({ const result = await contentParser.parse({ fileReader: node, documentMetadata: metadata, }); return { type: "document", metadata, metadata: "documentMetadata" in result ? result.documentMetadata : metadata, file: node, content, content: "documentContent" in result ? result.documentContent : result, path: [...parentPath, metadata.name], }; }
-
@@ -97,7 +213,7 @@ const children = await node.read();const entries = await Promise.all( children.map((child) => this.#build(child, { metadataParser, contentParser }, [ this.#build(child, { contentParser }, [ ...parentPath, metadata.name, ])
-
-
-
@@ -3,13 +3,11 @@ //// SPDX-License-Identifier: Apache-2.0 import type { FileSystemReader } from "../filesystem_reader/interface.ts"; import type { MetadataParser } from "../metadata_parser/interface.ts"; import type { ContentParser } from "../content_parser/interface.ts"; import type { DocumentTree } from "../types.ts"; export interface BuildParameters { readonly fileSystemReader: FileSystemReader; readonly metadataParser: MetadataParser; readonly contentParser: ContentParser; }
-