multi-core builds

This commit is contained in:
Jacky Zhao 2023-06-04 12:35:45 -04:00
parent 4bdc17d4a1
commit 9ad89997a5
9 changed files with 224 additions and 108 deletions

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
.DS_Store .DS_Store
node_modules node_modules
public public
.quartz-cache

View file

@ -1,9 +1,20 @@
import { buildQuartz } from "./quartz" import { QuartzConfig } from "./quartz/cfg"
import Head from "./quartz/components/Head" import * as Head from "./quartz/components/Head"
import Header from "./quartz/components/Header" import * as Header from "./quartz/components/Header"
import { ContentPage, CreatedModifiedDate, Description, FrontMatter, GitHubFlavoredMarkdown, Katex, ObsidianFlavoredMarkdown, RemoveDrafts, ResolveLinks, SyntaxHighlighting } from "./quartz/plugins" import {
ContentPage,
CreatedModifiedDate,
Description,
FrontMatter,
GitHubFlavoredMarkdown,
Katex,
ObsidianFlavoredMarkdown,
RemoveDrafts,
ResolveLinks,
SyntaxHighlighting
} from "./quartz/plugins"
export default buildQuartz({ const config: QuartzConfig = {
configuration: { configuration: {
siteTitle: "🪴 Quartz 4.0", siteTitle: "🪴 Quartz 4.0",
enableSPA: true, enableSPA: true,
@ -61,4 +72,6 @@ export default buildQuartz({
}) })
] ]
}, },
}) }
export default config

View file

@ -5,10 +5,10 @@ import path from 'path'
import { hideBin } from 'yargs/helpers' import { hideBin } from 'yargs/helpers'
import esbuild from 'esbuild' import esbuild from 'esbuild'
import chalk from 'chalk' import chalk from 'chalk'
import requireFromString from 'require-from-string'
import { sassPlugin } from 'esbuild-sass-plugin' import { sassPlugin } from 'esbuild-sass-plugin'
const fp = "./quartz.config.ts" const cacheFile = "./.quartz-cache/transpiled-build.mjs"
const fp = "./quartz/build.ts"
const { version } = JSON.parse(readFileSync("./package.json").toString()) const { version } = JSON.parse(readFileSync("./package.json").toString())
export const BuildArgv = { export const BuildArgv = {
@ -52,16 +52,16 @@ yargs(hideBin(process.argv))
.version(version) .version(version)
.usage('$0 <cmd> [args]') .usage('$0 <cmd> [args]')
.command('build', 'Build Quartz into a bundle of static HTML files', BuildArgv, async (argv) => { .command('build', 'Build Quartz into a bundle of static HTML files', BuildArgv, async (argv) => {
const out = await esbuild.build({ await esbuild.build({
entryPoints: [fp], entryPoints: [fp],
write: false, outfile: path.join("quartz", cacheFile),
bundle: true, bundle: true,
keepNames: true, keepNames: true,
platform: "node", platform: "node",
format: "cjs", format: "esm",
jsx: "automatic", jsx: "automatic",
jsxImportSource: "preact", jsxImportSource: "preact",
external: ["@napi-rs/simple-git", "shiki"], packages: "external",
plugins: [ plugins: [
sassPlugin({ sassPlugin({
type: 'css-text' type: 'css-text'
@ -97,8 +97,7 @@ yargs(hideBin(process.argv))
process.exit(1) process.exit(1)
}) })
const mod = out.outputFiles[0].text const { default: init } = await import(cacheFile)
const init = requireFromString(mod, fp).default
init(argv, version) init(argv, version)
}) })
.showHelpOnFail(false) .showHelpOnFail(false)

View file

@ -0,0 +1,7 @@
#!/usr/bin/env node
import workerpool from 'workerpool'
const cacheFile = "./.quartz-cache/transpiled-worker.mjs"
const { parseFiles } = await import(cacheFile)
workerpool.worker({
parseFiles
})

75
quartz/build.ts Normal file
View file

@ -0,0 +1,75 @@
import path from "path"
import { PerfTimer } from "./perf"
import { rimraf } from "rimraf"
import { globby } from "globby"
import chalk from "chalk"
import http from "http"
import serveHandler from "serve-handler"
import { parseMarkdown } from "./processors/parse"
import { filterContent } from "./processors/filter"
import { emitContent } from "./processors/emit"
import cfg from "../quartz.config"
interface Argv {
directory: string
verbose: boolean
output: string
clean: boolean
serve: boolean
port: number
}
export default async function buildQuartz(argv: Argv, version: string) {
console.log(chalk.bgGreen.black(`\n Quartz v${version} \n`))
const perf = new PerfTimer()
const output = argv.output
if (argv.verbose) {
const pluginCount = Object.values(cfg.plugins).flat().length
const pluginNames = (key: 'transformers' | 'filters' | 'emitters') => cfg.plugins[key].map(plugin => plugin.name)
console.log(`Loaded ${pluginCount} plugins`)
console.log(` Transformers: ${pluginNames('transformers').join(", ")}`)
console.log(` Filters: ${pluginNames('filters').join(", ")}`)
console.log(` Emitters: ${pluginNames('emitters').join(", ")}`)
}
// clean
if (argv.clean) {
perf.addEvent('clean')
await rimraf(output)
if (argv.verbose) {
console.log(`Cleaned output directory \`${output}\` in ${perf.timeSince('clean')}`)
}
}
// glob
perf.addEvent('glob')
const fps = await globby('**/*.md', {
cwd: argv.directory,
ignore: cfg.configuration.ignorePatterns,
gitignore: true,
})
if (argv.verbose) {
console.log(`Found ${fps.length} input files in ${perf.timeSince('glob')}`)
}
const filePaths = fps.map(fp => `${argv.directory}${path.sep}${fp}`)
const parsedFiles = await parseMarkdown(cfg.plugins.transformers, argv.directory, filePaths, argv.verbose)
const filteredContent = filterContent(cfg.plugins.filters, parsedFiles, argv.verbose)
await emitContent(argv.directory, output, cfg, filteredContent, argv.verbose)
console.log(chalk.green(`Done processing ${fps.length} files in ${perf.timeSince()}`))
if (argv.serve) {
const server = http.createServer(async (req, res) => {
return serveHandler(req, res, {
public: output,
directoryListing: false
})
})
server.listen(argv.port)
console.log(`Started a Quartz server listening at http://localhost:${argv.port}`)
console.log('hint: exit with ctrl+c')
}
}

View file

@ -1,77 +0,0 @@
import path from "path"
import { QuartzConfig } from "./cfg"
import { PerfTimer } from "./perf"
import { rimraf } from "rimraf"
import { globby } from "globby"
import chalk from "chalk"
import http from "http"
import serveHandler from "serve-handler"
import { createProcessor, parseMarkdown } from "./processors/parse"
import { filterContent } from "./processors/filter"
import { emitContent } from "./processors/emit"
interface Argv {
directory: string
verbose: boolean
output: string
clean: boolean
serve: boolean
port: number
}
export function buildQuartz(cfg: QuartzConfig) {
return async (argv: Argv, version: string) => {
console.log(chalk.bgGreen.black(`\n Quartz v${version} \n`))
const perf = new PerfTimer()
const output = argv.output
if (argv.verbose) {
const pluginCount = Object.values(cfg.plugins).flat().length
const pluginNames = (key: 'transformers' | 'filters' | 'emitters') => cfg.plugins[key].map(plugin => plugin.name)
console.log(`Loaded ${pluginCount} plugins`)
console.log(` Transformers: ${pluginNames('transformers').join(", ")}`)
console.log(` Filters: ${pluginNames('filters').join(", ")}`)
console.log(` Emitters: ${pluginNames('emitters').join(", ")}`)
}
// clean
if (argv.clean) {
perf.addEvent('clean')
await rimraf(output)
if (argv.verbose) {
console.log(`Cleaned output directory \`${output}\` in ${perf.timeSince('clean')}`)
}
}
// glob
perf.addEvent('glob')
const fps = await globby('**/*.md', {
cwd: argv.directory,
ignore: cfg.configuration.ignorePatterns,
gitignore: true,
})
if (argv.verbose) {
console.log(`Found ${fps.length} input files in ${perf.timeSince('glob')}`)
}
const processor = createProcessor(cfg.plugins.transformers)
const filePaths = fps.map(fp => `${argv.directory}${path.sep}${fp}`)
const parsedFiles = await parseMarkdown(processor, argv.directory, filePaths, argv.verbose)
const filteredContent = filterContent(cfg.plugins.filters, parsedFiles, argv.verbose)
await emitContent(argv.directory, output, cfg, filteredContent, argv.verbose)
console.log(chalk.green(`Done processing ${fps.length} files in ${perf.timeSince()}`))
if (argv.serve) {
const server = http.createServer(async (req, res) => {
return serveHandler(req, res, {
public: output,
directoryListing: false
})
})
server.listen(argv.port)
console.log(`Started a Quartz server listening at http://localhost:${argv.port}`)
console.log('hint: exit with ctrl+c')
}
}
}

View file

@ -3,7 +3,7 @@ import remarkGfm from "remark-gfm"
import smartypants from 'remark-smartypants' import smartypants from 'remark-smartypants'
import { QuartzTransformerPlugin } from "../types" import { QuartzTransformerPlugin } from "../types"
import rehypeSlug from "rehype-slug" import rehypeSlug from "rehype-slug"
import rehypeAutolinkHeadings from "rehype-autolink-headings/lib" import rehypeAutolinkHeadings from "rehype-autolink-headings"
export interface Options { export interface Options {
enableSmartyPants: boolean enableSmartyPants: boolean

View file

@ -1,3 +1,4 @@
import esbuild from 'esbuild'
import remarkParse from 'remark-parse' import remarkParse from 'remark-parse'
import remarkRehype from 'remark-rehype' import remarkRehype from 'remark-rehype'
import { Processor, unified } from "unified" import { Processor, unified } from "unified"
@ -8,6 +9,8 @@ import { PerfTimer } from '../perf'
import { read } from 'to-vfile' import { read } from 'to-vfile'
import { slugify } from '../path' import { slugify } from '../path'
import path from 'path' import path from 'path'
import os from 'os'
import workerpool, { Promise as WorkerPromise } from 'workerpool'
import { QuartzTransformerPlugin } from '../plugins/types' import { QuartzTransformerPlugin } from '../plugins/types'
export type QuartzProcessor = Processor<MDRoot, HTMLRoot, void> export type QuartzProcessor = Processor<MDRoot, HTMLRoot, void>
@ -32,9 +35,55 @@ export function createProcessor(transformers: QuartzTransformerPlugin[]): any {
return processor return processor
} }
export async function parseMarkdown(processor: QuartzProcessor, baseDir: string, fps: string[], verbose: boolean): Promise<ProcessedContent[]> { function* chunks<T>(arr: T[], n: number) {
for (let i = 0; i < arr.length; i += n) {
yield arr.slice(i, i + n)
}
}
async function transpileWorkerScript(verbose: boolean) {
// transpile worker script
const cacheFile = "./.quartz-cache/transpiled-worker.mjs"
const fp = "./quartz/worker.ts"
if (verbose) {
console.log("Transpiling worker script")
}
await esbuild.build({
entryPoints: [fp],
outfile: path.join("quartz", cacheFile),
bundle: true,
keepNames: true,
platform: "node",
format: "esm",
packages: "external",
plugins: [
{
name: 'css-and-scripts-as-text',
setup(build) {
build.onLoad({ filter: /\.scss$/ }, (_) => ({
contents: '',
loader: 'text'
}))
build.onLoad({ filter: /\.inline\.(ts|js)$/ }, (_) => ({
contents: '',
loader: 'text'
}))
}
}
]
})
}
export async function parseMarkdown(transformers: QuartzTransformerPlugin[], baseDir: string, fps: string[], verbose: boolean): Promise<ProcessedContent[]> {
const perf = new PerfTimer() const perf = new PerfTimer()
const CHUNK_SIZE = 128
let concurrency = fps.length < CHUNK_SIZE ? 1 : os.availableParallelism()
const res: ProcessedContent[] = [] const res: ProcessedContent[] = []
if (concurrency === 1) {
// single-thread
const processor = createProcessor(transformers)
for (const fp of fps) { for (const fp of fps) {
const file = await read(fp) const file = await read(fp)
@ -49,7 +98,26 @@ export async function parseMarkdown(processor: QuartzProcessor, baseDir: string,
console.log(`[process] ${fp} -> ${file.data.slug}`) console.log(`[process] ${fp} -> ${file.data.slug}`)
} }
} }
} else {
await transpileWorkerScript(verbose)
const pool = workerpool.pool(
'./quartz/bootstrap-worker.mjs',
{
minWorkers: 'max',
maxWorkers: concurrency,
workerType: 'thread'
}
)
console.log(`Parsed and transformed ${res.length} Markdown files in ${perf.timeSince()}`) const childPromises: WorkerPromise<ProcessedContent[]>[] = []
for (const chunk of chunks(fps, CHUNK_SIZE)) {
childPromises.push(pool.exec('parseFiles', [baseDir, chunk, verbose]))
}
const results: ProcessedContent[][] = await WorkerPromise.all(childPromises)
res.push(...results.flat())
await pool.terminate()
}
console.log(`Parsed and transformed ${res.length} Markdown files with ${concurrency} cores in ${perf.timeSince()}`)
return res return res
} }

30
quartz/worker.ts Normal file
View file

@ -0,0 +1,30 @@
import { read } from "to-vfile"
import config from "../quartz.config"
import { createProcessor } from "./processors/parse"
import { slugify } from "./path"
import path from "path"
import { ProcessedContent } from "./plugins/vfile"
const transformers = config.plugins.transformers
const processor = createProcessor(transformers)
// only called from worker thread
export async function parseFiles(baseDir: string, fps: string[], verbose: boolean) {
const res: ProcessedContent[] = []
for (const fp of fps) {
const file = await read(fp)
// base data properties that plugins may use
file.data.slug = slugify(path.relative(baseDir, file.path))
file.data.filePath = fp
const ast = processor.parse(file)
res.push([await processor.run(ast, file), file])
if (verbose) {
console.log(`[process] ${fp} -> ${file.data.slug}`)
}
}
return res
}