scrapbox-parser
概要
Nodeはコードブロックと非コードブロックの2種類のみ
コードブロックには以下の情報が含まれる
type: Nodeの種類を表す. 'CodeBlock' か 'NonCodeBlock' のどちらか.
text: NodeのScrapbox上でのテキスト表現
headerText: コードブロックのヘッダ (code:*の部分) のテキスト表現
title: コードブロックヘッダcode:<title>の<title>部
language: titleで指定されている言語. titleが拡張子ならその拡張子に対応する言語が, 言語名ならその言語が, それ以外であればundefinedが選択される.
filename: コードブロックで指定されているコードのファイル名. languageが存在する時は含まれない.
使い方
code:js
import '/api/code/mizdra/scrapbox-parser/script.js'
const text = `
short text.
code:javascript
const message = "hello"
console.log(message)
long long long long long long text.
code:js
const add = (a, b) => a + b
item a
item b
code:a.js
"a-1"
"a-2"
code:b.js
"b-1"
"b-2"
end.
`
const parser = new ScrapboxParser(text)
const nodes = parser.parse()
console.log(JSON.stringify(nodes, null, 2))
出力例
code:output.json
[
{
"type": "NonCodeBlock",
"text": "\nshort text.\n\n"
},
{
"type": "CodeBlock",
"text": "code:javascript\n const message = \"hello\"\n console.log(message)\n",
"headerText": "code:javascript\n",
"title": "javascript",
"lang": "javascript",
"codeText": " const message = \"hello\"\n console.log(message)\n"
},
{
"type": "NonCodeBlock",
"text": "\nlong long long long long long text.\n\n"
},
{
"type": "CodeBlock",
"text": "code:js\n const add = (a, b) => a + b\n",
"headerText": "code:js\n",
"title": "js",
"lang": "js",
"codeText": " const add = (a, b) => a + b\n"
},
{
"type": "NonCodeBlock",
"text": "\n item a\n item b\n"
},
{
"type": "CodeBlock",
"text": " code:a.js\n \"a-1\"\n \"a-2\"\n",
"headerText": " code:a.js\n",
"title": "a.js",
"filename": "a.js",
"lang": "javascript",
"codeText": " \"a-1\"\n \"a-2\"\n"
},
{
"type": "CodeBlock",
"text": " code:b.js\n \"b-1\"\n \"b-2\"\n",
"headerText": " code:b.js\n",
"title": "b.js",
"filename": "b.js",
"lang": "javascript",
"codeText": " \"b-1\"\n \"b-2\"\n"
},
{
"type": "NonCodeBlock",
"text": "\nend.\n\n"
}
]
ソースコード
code:script.js
function isCodeBlockHeader(line) {
// リストによるネストを考慮して CodeBlock の判定を行う
if (line === undefined) return false
return line.trimStart(' ').startsWith('code:')
}
const fileTypes = {
javascript: 'javascript',
js: 'javascript',
jsx: 'javascript',
typescript: 'typescript',
ts: 'typescript',
tsx: 'typescript',
graphql: 'graphql',
gql: 'graphql',
}
function isLanguageName(title) {
return fileTypes.hasOwnProperty(title)
}
function getLanguageName(title) {
if (isLanguageName(title)) return fileTypestitle const extension = title.split('.').pop()
}
function getIndentSize(line) {
return line.length - line.trimStart(' ').length
}
export class Parser {
constructor(text) {
this.curLine = undefined
this.peekLine = undefined
this.bumpLine()
this.bumpLine()
}
bumpLine() {
this.curLine = this.peekLine
this.peekLine = this.lineIterator.next().value
}
parseCodeBlock() {
// parse header
const headerText = ${this.curLine}
const headerIndentSize = getIndentSize(this.curLine)
const title = this.curLine.trimStart(' ').slice(5)
const filename = isLanguageName(title) ? undefined : title
const lang = getLanguageName(title)
// parse code
let codeText = ''
while (getIndentSize(this.peekLine) > headerIndentSize) {
this.bumpLine()
codeText += ${this.curLine}\n
}
codeText = codeText.trimEnd()
return {
type: 'CodeBlock',
text: ${headerText}\n${codeText},
headerIndentSize,
headerText,
title,
filename,
lang,
codeText,
}
}
parseNonCodeBlock() {
let text = ''
while (true) {
text += ${this.curLine}\n
if (isCodeBlockHeader(this.peekLine) || this.peekLine === undefined) break
else this.bumpLine()
}
text = text.trimEnd()
return { type: 'NonCodeBlock', text }
}
parse() {
const nodes = []
while (this.curLine !== undefined) {
if (isCodeBlockHeader(this.curLine)) nodes.push(this.parseCodeBlock())
else nodes.push(this.parseNonCodeBlock())
this.bumpLine()
}
return nodes
}
}