scrapboxとHugoを同期させる
scrapbox自体は書き心地やUXが最高で手放したくないため, 一部ページを同期させ, 正しく検索結果が載るか試してみる.
コードは以下に示す通り.
scrapbox記法からmarkdownへの変換はこちらを改変したものを使用. code:main.sh
dates=(curl $url | jq ".pages[].updated" | xargs)
i=0
IFS=$'\n'
for title in $(curl $url | jq ".pages[].title" | sed -e "s/^\"//g" | sed -e "s/\"$//g")
do
md_title=echo $title | tr ' ' '_' | sed s/\?//g | sed s/!//g | sed s/://g
etitle=$(echo $title | python -c 'import sys;from urllib.parse import quote; [print(quote(l):-3,end="") for l in sys.stdin]') echo $title
echo $url
curl $url | node sb2md.js > $md_title.md
python scrapbox.py $md_title.md "${dates$i}" fi
i=expr $i + 1
done
code:sb2md.js
if (!Object.prototype.then) {
Object.prototype.then = function (f) { return f.call(null, this); }
}
process.stdin.resume();
process.stdin.setEncoding('utf8');
let input_string = '';
process.stdin.on('data', chunk => {
input_string += chunk;
});
process.stdin.on('end', () => {
const text = input_string;
console.log(sb2md(text));
});
function sb2md(text) {
// code block
const escapeCodeBlocks = s => s.replace(
/^code:(.+)$((\n^ \t.*$)+)/mg, (_, p1, p2) =>
'' + p1 + p2.replace(/^[ \t]/mg, '').replace(/\r|\n|\r\n/g, '+++') + '+++'
);
const unescapeCodeBlocks = s => s.replace(/\+{3}/g, '\n');
const replaceLine = line =>
/^`{3}/.test(line) ? line :
// level 2 heading
line.replace(/^\[\[([^\\]+)\]\]$/, '## $1') .replace(/^\[\*\s+(\S[^\\]*)\]$/, '## $1') // anchor link
// image block
// unordered list
.replace(/^\s{6}(\S.*)$/, ' - $1')
.replace(/^\s{5}(\S.*)$/, ' - $1')
.replace(/^\s{4}(\S.*)$/, ' - $1')
.replace(/^\s{3}(\S.*)$/, ' - $1')
.replace(/^\s{2}(\S.*)$/, ' - $1')
.replace(/^\s(\S.*)$/, '- $1')
// bold text
.replace(/\[\[([^\\]+)\]\]/g, '**$1**') .replace(/\[\*\s+([^\\]+)\]/g, '**$1**') // italic text
.replace(/\[\/\s+([^\\]+)\]/g, '*$1*'); return text
.then(escapeCodeBlocks)
.split(/\r|\n|\r\n/)
// first line is level 1 heading
.then(lines => [lines0.replace(/^(.+)$/, '$1')].concat(lines.slice(1))) .map(replaceLine)
.join('\n')
.then(unescapeCodeBlocks);
}
code:scrapbox.py
import os
import re
import sys
import datetime
import requests
from urllib.parse import quote
head_pattern = re.compile(r"\[\*# (^\$\*[^\\]+)\]^\(") math_pattern = re.compile(r"\[\$\s+(^\]+)\]") link_pattern = re.compile(r"\[(^\$\*[^\\]+)\]^\(") url_link_pattern = re.compile(r"\[(^\$\*[^\\]+) (https?://^\]+)\]") tag_pattern = re.compile(r'#(^\s+)') img_pattern = re.compile(r'!\\\((https?://.+)\)') large_math_pattern = re.compile(r'\s*-\s+(\$^\$+\$)\s*$') if len(sys.argv) < 3:
print('Usage: scrapbox.py <file> <date>')
sys.exit(1)
print(sys.argv)
lines = []
tags = []
in_snippets = False
with open(sys.argv1, 'r') as f: lines = f.readlines()
for i, line in enumerate(lines):
if len(line.replace(" ","")) <= 1: continue
if line.startswith("`"): in_snippets = not in_snippets
if in_snippets: continue
for j in range(6):
hash = "".join('*'*(6-j)) t = f"[{hash} ["
linesi = linesi.replace(t,"![](") if has_t:
linesi = linesi:-3 + ')\n' head_match = head_pattern.finditer(line)
math_match = math_pattern.finditer(line)
links_match = link_pattern.finditer(line)
tags_match = tag_pattern.finditer(line)
url_links_match = url_link_pattern.finditer(line)
if head_match:
targets = set()
for _link in head_match:
link = _link.group(1)
targets.add(link)
for link in targets:
linesi = linesi.replace(f"{link}", f"### {link}") if math_match:
targets = set()
for _link in math_match:
link = _link.group(1)
targets.add(link)
for link in targets:
update_link = link.replace("_",r"\_")
linesi = linesi.replace(f"{link}]", f"${update_link}$").replace("[$","") if url_links_match:
targets = set()
for url_link in url_links_match:
title = url_link.group(1)
url = url_link.group(2)
targets.add((title,url))
for title, url in targets:
if links_match:
targets = set()
for _link in links_match:
link = _link.group(1)
targets.add(link)
for link in targets:
md_path = link.replace(' ','_').replace('?','').replace('!','').replace(':','') + ".md"
md = '{{< ref "' + md_path + '" >}}'
if os.path.exists(md_path):
else:
if tags_match and i == 1:
for _tag in tags_match:
tag = _tag.group(1)
tags.append(tag)
linesi = linesi.replace(f"#{tag}",f"") img_match = img_pattern.finditer(linesi) if img_match:
for img in img_match:
url = img.group(1)
img_url = url
for e in exts:
if url.endswith(e):
img_url = url
break
img_url = f"{url}.{e}"
res = requests.get(img_url)
if res.status_code == 200:
break
linesi = linesi.replace(f"![]({url})",'{{<' + f'img src="{img_url}" position="center"' + '>}}<br>') large_math_match = large_math_pattern.finditer(linesi) if large_math_match:
for _math in large_math_match:
math = _math.group(1)
meta = \
"""
---
title: "TITLE"
date: DATE
description:
draft: false
hideToc: false
enableToc: true
enableTocContent: true
tocPosition: inner
tags:
TAGS
series:
-
# image: images/feature3/code-file.png
libraries:
- katex
- mermaid
- msc
---
"""
title = lines0.replace('\n','') if "paper" in tags:
title = f"【論文メモ】{title}"
else:
tags.append("post")
print("TAG",tags)
date = datetime.datetime.fromtimestamp(int(sys.argv2)).strftime('%Y-%m-%dT%H:%M:%S+09:00') meta = meta.replace("TITLE", title)
meta = meta.replace("DATE", date)
meta = meta.replace("TAGS", "\n".join(map(lambda x: f"- {x}",tags)))
lines = list(map(lambda x: x + '\n', meta.split('\n'))) + lines2: with open(sys.argv1, 'w') as f: f.writelines(lines)
# for line in lines:
# print(line,end='')