import { ensureDir } from "../deno_std%2Ffs/mod.ts"; import { parse } from "../deno_std%2Fflags/mod.ts"; import { join } from "../deno_std%2Fpath/mod.ts"; import { getPDFpageNum } from '../PDFのページ数を取得/deno.ts'; import { Command } from "https://deno.land/x/cliffy@v0.25.7/command/mod.ts"; import ProgressBar from "https://deno.land/x/progress@v1.3.8/mod.ts"; const { args: [filename], options } = await new Command() .name("pdftotext-1by1") .description("pdfから1 pageずつテキストを取得する") .version("v1.0.0") .option("-l, --last ", "ここで指定したページ数までテキストを取得する") .option( "-o, --output ", "出力先directory", ) .arguments("") .parse(Deno.args); options.output ??= Deno.cwd(); await ensureDir(options.output); const json = []; const pageNum = options.last ?? await getPDFpageNum(await Deno.readFile(filename)); const zero = (n: number) => `${n}`.padStart(Math.floor(Math.log10(pageNum)) + 1, '0'); const progress = new ProgressBar({ total: pageNum, }); progress.render(0); for (let i = 0; i < pageNum; i++) { const page = i + 1; const path = join(options.output, `./${zero(page)}.txt`); const command = new Deno.Command("pdftotext", { args: ["-f", `${page}`, "-l", `${page}`, filename, path], }); const { success, stderr } = await command.output(); if (!success) { const message = new TextDecoder().decode(stderr); console.error(message); throw Error(message); } const text = await Deno.readTextFile(path); json.push(text); progress.render(page); } await Deno.writeTextFile(join(options.output, `./text-data.json`), JSON.stringify(json));