pdfをscrapboxに変換する
wogikaze.iconがMN-Coreのドキュメントをアクティブ読書したくなった Claudeさんいつもありがとう.icon
h6もあったんだけど普通のテキストも入ってしまったのでh6は消した
code:js
(() => {
main();
function main() {
// Remove site-specific check
const body = document.body; // Use the entire body instead of a specific div
const sections = makeHierarchy(body);
const pages = [];
for (const section of sections) {
if (pages_.length === 0) continue;
pages.push(pages_.pop());
pages.push(...pages_);
}
const index = generateIndex(pages);
const json = {
pages: [index, ...pages.map(({id, lines}) => ({ title: id, lines: id, ...lines }))], };
downloadJSON(json);
}
function generateIndex(pages) {
return {
title: "Table of Contents",
lines: [
"Table of Contents",
...pages.map(({ id, level }) => ${" ".repeat(level - 1)}[${id}]),
],
};
}
function* toPages(section) {
const lines = [];
for (const node of section.slice(1)) {
if (Array.isArray(node)) {
if (pages.length === 0) continue;
lines.push([${link}], "");
yield pages.pop();
for (const page of pages) {
yield page;
}
continue;
}
lines.push(...convertNode(node), "");
}
yield { ...data, lines };
}
function convertNode(node) {
switch (node.nodeName) {
case "P":
return convertP(node).split("\n");
case "TABLE":
return ["table:table", ...node.innerText.split("\n").map((text) => ${text})];
case "UL":
case "OL":
return convertList(node);
case "PRE":
return ["code:txt", ...node.innerText.split("\n").map((text) => ${text})];
case "FIGURE":
return convertFigure(node);
default:
return node.innerText.split("\n");
}
}
function convertP(p) {
return Array.from(p.childNodes).map((node) => {
switch (node.nodeName) {
case "STRONG":
case "B":
return node.textContent.split("\n").map((text) => [* ${text}]).join("\n");
case "EM":
case "I":
return node.textContent.split("\n").map((text) => [_ ${text}]).join("\n");
case "CODE":
return node.textContent.split("\n").map((text) => \`${text}\`).join("\n");
case "A":
return convertLink(node);
case "UL":
case "OL":
return convertList(node).join("\n");
default:
return node.textContent;
}
}).join("");
}
function convertLink(node) {
if (node.id) {
const n = node.textContent.trim();
return ([Footnote ${n}]);
}
const url = node.href.trim();
const text = node.textContent.trim();
return url === text ? ${url} : [${url} ${text}];
}
function convertList(ul) {
const list = Array.from(ul.children).filter((li) => li.tagName === "LI");
return list.flatMap(
(li) => convertP(li).split("\n").map((text, i) => i === 0 ? ${text} : ${text})
);
}
function convertFigure(figure) {
const img = figure.querySelector("img");
if (!img) return [];
const src = img.src.trim();
const text = img.alt.trim() || "Image";
return [text, [${src}]];
}
function makeHierarchy(body) {
const headings = body.querySelectorAll("h1, h2, h3, h4, h5");
const sections = [];
let currentSection = null;
let currentLevel = 0;
headings.forEach((heading) => {
const level = parseInt(heading.tagName.slice(1));
if (level <= currentLevel) {
while (currentSection && currentSection0.level >= level) { currentSection = currentSection0.parent; }
}
const newSection = [{
// id: heading.id || heading.textContent.trim().replace(/\s+/g, '-').toLowerCase(),
ここ-に置き換えるのはやりすぎwogikaze.icon
code:js
id: heading.id || heading.textContent.trim().replace(/\s+/, ''),
text: heading.textContent,
level: level,
parent: currentSection
}];
if (currentSection) {
currentSection.push(newSection);
} else {
sections.push(newSection);
}
currentSection = newSection;
currentLevel = level;
let sibling = heading.nextElementSibling;
while (sibling && !sibling.matches("h1, h2, h3, h4, h5")) {
currentSection.push(sibling);
sibling = sibling.nextElementSibling;
}
});
return sections;
}
function downloadJSON(json) {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'extracted_content.json';
a.click();
URL.revokeObjectURL(url);
}
})();