松岡正剛の千夜千冊をscrapbox書籍にするscript
使い方
ドメインが同じならどのページでも良い
3. かなり待つとzipをdownloadできるようになる
fetchにかなり時間がかるので注意
1秒おきに取得するようにしているのに加えて、server自体の応答が遅い
4. json filesを全部importする
1つだけ欠番があります
code
code:script.js
(() => {
(async () =>{
const latest = 1772;
const {pages, failed} = await fetchReviews(1, latest); // ここで取得するページの範囲を決める
console.log('Fetched: ', pages);
console.log('Pages failed to fetch: ', failed);
const {data, fileName} = await zipAll(convertScrapboxJSON(pages));
downloadBlob(data, fileName);
})();
fetch
code:script.js
const sleep = (milliseconds) => new Promise(resolve => setTimeout(resolve, milliseconds));
async function fetchReviews(from, to) {
const pages = [];
const failed = [];
for (let i = from; i <= to; i++){
const id = ${i}.padStart(4, '0');
try {
const res = await fetch(/${id}.html);
const page = {id, ...parse(await res.text())};
console.log(page);
pages.push(page);
} catch(e) {
console.error(e);
failed.push(id);
}
await sleep(1000);
}
return {pages, failed};
}
parser
htmlから著者名、書籍名、本文を取得する
code:script.js
function parse(html) {
const dom = new DOMParser().parseFromString(html, 'text/html');
const author = dom.querySelector('#default_style_area .min').textContent;
const title = dom.querySelector('#default_style_area .goth').textContent;
const body = [...dom.getElementsByClassName('entry-content')0.children] .flatMap(p => {
if (p.classList.contains('fig01')) {
const fig = parseFigure(p);
}
if (p.localName === 'p') { //なぜかfetch経由だと.fontLが外れてしまう
const para = parseParagraph(p)
return para.text === '' ? [] : para; }
return [];
});
return {author, title, body};
}
本文を変換する
code:script.js
function parseParagraph(fontL) {
fontL.querySelectorAll('br').forEach(br => br.outerHTML = '\n');
fontL.querySelectorAll('a').forEach(a => {
const match = a.href.match(/(\d+)\.html/);
if (!match) {
a.outerHTML = '';
return;
}
if (a.textContent.endsWith('夜')) {
a.outerHTML = [${id}];
return;
}
a.outerHTML = ${a.textContent}([${id}]);
});
return {type: 'paragraph', text: fontL.innerText};
}
本文中の画像と動画を変換する
code:script.js
function parseFigure(fig01) {
const src = fig01.querySelector('img, iframe')?.src;
if (!src) return;
const fig9 = fig01.getElementsByClassName('fig9')?.0; if (!fig9 || fig9.childElementCount === 0) return {type: 'figure', src, caption: '', description: ''};
const caption = fig9.firstChild.textContent ?? '';
const description = fig9.childElementCount > 1 ? fig9.lastChild.textContent : '';
return {type: 'figure', src, caption, description};
}
converter
ファイルサイズが巨大なので、500pagesずつに分割する
でないとimport時にエラーが出てしまう
code:script.js
const format = text => text.replaceAll(''[').replaceAll('', ']').replaceAll('\n', ' ');
function convertScrapboxJSON(pages) {
const titles = Object.fromEntries(
pages.map(({title, author, id}) => [id, ${format(author)}『${format(title)}』])
);
const json = {pages: pages.map(({author, title, body, id}) => ({
lines: [
...body.flatMap(p => {
switch(p.type) {
case 'paragraph':
return [...p.text.split('\n')
.flatMap(line => {
if (line.trim() === '') return [];
return line.replace(/\(\d+)\/g, (_, p1) => [${titles[p1] ?? p1}]); }),
'',];
case 'figure':
return [
${p.caption},
...(p.description === '' ? [] : [ ${p.description}]),
[${p.src}],
];
}
}),
'',
#${format(author)},
#${id},
]
}))};
const chunkNum = Math.floor(json.pages.length / 500) + 1;
.map(i => ({
data: {pages: json.pages.slice(i * 500, (i + 1) * 500)},
fileName: `import${
${i * 500 + 1}.padStart(4, '0')
}-${
${Math.min((i + 1) * 500 + 1, json.pages.length)}.padStart(4, '0')
}.json`,
}));
}
JSONをBlobに変換する
code:script.js
function toBlob(json) {
}
downloadする
code:script.js
function downloadBlob(blob, fileName) {
// download linkを生成
const url = URL.createObjectURL(blob);
// 隠しa要素を使ってdownloadする
const a = document.createElement('a');
a.href = url;
a.download = fileName;
a.style.display = 'none';
// downloadを実行
a.click();
// 後始末
URL.revokeObjectURL(url);
}
zipにまとめる
圧縮をかけてある
code:script.js
async function zipAll(data) {
if (!document.getElementById('scrapbox-userscript-jszip')) {
await useJSZip();
}
let zip = new JSZip();
data.forEach(({data, fileName}, i) => zip.file(fileName, toBlob(data)));
return {
data: await zip.generateAsync({type: 'blob', compression: 'DEFLATE',
compressionOptions: {
level: 9, // 最高圧縮率
}}),
fileName: imports.zip,
};
}
code:script.js
async function useJSZip() {
const id = 'scrapbox-userscript-jszip';
return new Promise((resolve, reject) => {
const oldScript = document.getElementById(id);
oldScript?.parentNode.removeChild(oldScript);
const script = document.createElement("script");
script.addEventListener('load', () => {
resolve();
});
script.addEventListener('error', (e) => {
reject(e);
});
script.src = '//cdnjs.cloudflare.com/ajax/libs/jszip/3.5.0/jszip.min.js';
script.id = id;
document.head.appendChild(script);
});
}
code:script.js
})();