PDFjsのviewerから画像データとテキストデータを取得するUserScript
pdf.jsで実装されたviewerから、PDFの画像とテキストデータを取得して、zipにしてdownloadするUserScript code:script.js
await (async () => {
canvasからblobを取り出す
code:script.js
const getBlob = (canvas) => new Promise(
(resolve, reject) => canvas.toBlob(
(blob) => !blob ? reject(new Error("Faild to create Blob from canvas")) : resolve(blob),
"image/png",
)
);
1枚ずつcanvasとテキストを取得する
canvasが読み込まれるのを待ってから返す
末尾まで読み終わったら終了する
code:script.js
async function* readPages() {
let index = 0;
while(true){
const page = PDFViewerApplication.pdfViewer._pagesindex; if (!page) break;
page.div.scrollIntoView();
yield await new Promise((resolve) =>{
const timer = setInterval(()=>{
// 読み込みが終わるまで待つ
const canvas = page.div.getElementsByTagName("canvas")?.0; if (!canvas) return;
if (page.div.getElementsByClassName("loadingIcon").length > 0) return;
clearInterval(timer);
// 描画を待ってから返す
setTimeout(() => {
const text = page.textLayer.textContentItemsStr?.join?.("\n") ?? "";
resolve({ canvas, text });
}, 2000);
},100);
});
index++;
}
}
画像編集
code:script.js
/** 余白を削る。大きさは比率で指定する */
const trim = (inputCanvas, paddingW , paddingH) => {
const canvas = document.createElement("canvas");
canvas.width = inputCanvas.width * (1 - 2 * paddingW);
canvas.height = inputCanvas.height * (1 - 2 * paddingH);
const ctx =canvas.getContext("2d");
ctx.drawImage(
inputCanvas,
inputCanvas.width * paddingW,
inputCanvas.height * paddingH,
canvas.width,
canvas.height,
0, 0, canvas.width, canvas.height
);
return canvas;
};
画像を分割する
code:script.js
async function* split(inputCanvas, splitW = 1, splitH = 1) {
const unitW = inputCanvas.width / splitW;
const unitH = inputCanvas.height / splitH;
const canvas = document.createElement("canvas");
canvas.width = unitW;
canvas.height = unitH;
const ctx =canvas.getContext("2d");
for (let y = 0; y + unitH <= inputCanvas.height; y += unitH) {
for (let x = 0; x + unitW <= inputCanvas.width; x += unitW) {
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.drawImage(inputCanvas, x, y, unitW, unitH, 0, 0, unitW, unitH );
yield canvas;
}
}
}
code:script.js
const installJSZip = () => new Promise((resolve, reject) => {
const id = "userscript-jszip";
if (document.getElementById(id)) {
resolve();
return;
}
const script = document.createElement("script");
script.addEventListener('load', () => {
resolve();
});
script.addEventListener('error', (e) => {
reject(e);
});
script.id = id;
document.head.append(script);
});
メインプログラム
code:script.js
await installJSZip();
const zip = new JSZip();
const pages = [];
for await (const { canvas, text } of readPages()) {
for await (const canvas2 of split(
trim(canvas, 0.1, 0.1),
2, 2
)) {
pages.push({ image: await getBlob(canvas2), text });
}
}
for (let i = 0; i < pages.length; i++) {
const name = `${
${i}.padStart(${pages.length}.length, "0")
}`;
zip.file(${name}.png, pagesi.image); zip.file(${name}.txt, pagesi.text); }
const zipBlob = await zip.generateAsync({
type: "blob",
compression: "DEFLATE",
compressionOptions: {
level: 9,
},
});
const a = document.createElement("a");
a.href = URL.createObjectURL(zipBlob);
const title = document.title.replace?.(/\.pdf$/, "");
a.download = ${title}.zip;
document.body.append(a);
a.click();
a.remove();
})();