makePage.ts
private repoから一部だけコピペ
ブラウザ上でscrapboxのページをレンダリングなしに作成する機能がある
getPageData, toNikkeiURL, getThink, Thinkは新聞記事をscrapboxに取り込むテキストに変換する際に使ったもの
コードは非公開
uploadImageは記事内の画像をgyazoるやつ
toYYYYMMDDは日付変換
処理概要
記事をfetchしてscrapboxに取り込む→記事内の関連記事をmakePageする→記事内の画像をgyazoる→関連記事へのリンクとgyazoへのリンクで記事内のURLを置き換える
そのままだとmakePageの再帰処理で無限に関連記事を漁りまくってしまうので、maxDepthで制限をかけている
code:makePage.ts
import { patch, Socket } from "../deps/scrapbox-dom.ts";
import { getPageData, toNikkeiURL } from "./nikkei.ts";
import { getThink, Think } from "./think.ts";
import { uploadImage } from "./uploadImage.ts";
import { toYYYYMMDD } from "./utils.ts";
declare const GM_fetch: typeof fetch;
/** 編集済みの記事のID
*
* 循環編集が発生しないよう、編集済みの記事のIDは処理をパスするようにする
*
* valueは記事へのリンクと、maxDepth===0かどうか
*/
/**
* Creates a page for a given project and article ID.
*
* @param project - The project name.
* @param articleId - The ID of the article.
* @param socket - The socket object.
* @param maxDepth - The maximum depth for creating related articles. Default is 3.
* @returns A promise that resolves to the main title of the created page.
*/
export const makePage = async (
project: string,
articleId: string,
socket: Socket,
maxDepth = 3,
): Promise<string> => {
maxDepth = Math.max(0, maxDepth);
const edited = editedIds.get(articleId);
let makeOnlyRelated = false;
if (edited) {
if (!isZeroDepth || maxDepth === 0) return title;
// 関連記事の取り込みだけする
makeOnlyRelated = true;
}
const dom = new DOMParser().parseFromString(
await (await (GM_fetch ?? fetch)(toNikkeiURL(articleId))).text(),
"text/html",
);
const { title, relatedArticles, images, lines } = getPageData(dom, articleId);
// Titleを取得する
// 著作権上問題があることを示すマークをtitleに付けておく
const mainTitle = ${title} | 🆖PIRACY;
if (!makeOnlyRelated) {
const thinks = await getThink(articleId);
console.debug(
[${maxDepth}] Creating "${mainTitle}" and ${thinks.length} thinks...,
);
// 先にページを作っておく
await Promise.all([
patch(
project,
mainTitle,
(prev, { persistent }) =>
!persistent
: undefined,
{ socket },
),
...thinks.map((think) =>
patch(
project,
${[...think.comment.split("\n")[0]].slice(0, 15).join("")}...,
(prev, { persistent }) =>
!persistent
? [
...prev.map((line) => line.text),
...formatThink(think, mainTitle, new Date()),
]
: undefined,
{ socket },
)
),
]);
console.debug(
[${maxDepth}] Created "${mainTitle}" and ${thinks.length} thinks.,
);
}
let counter = 0;
if (maxDepth > 0||makeOnlyRelated) {
// 関連記事を作る
// API rate limitに引っかかるので、直列にする
for (const id of relatedArticles) {
console.debug(
[${maxDepth}] Creating ${counter}/${relatedArticles.length} related articles in "${mainTitle}"...,
);
const title = await makePage(project, id, socket, maxDepth - 1);
counter++;
const before = REPLACE:${toNikkeiURL(id)};
const after = [${title}];
}
console.debug(
[${maxDepth}] Created ${relatedArticles.length} related articles in "${mainTitle}".,
);
counter = 0;
} else {
console.debug(
[${maxDepth}] Skip creating related articles in "${mainTitle}".,
);
}
if (!makeOnlyRelated) {
for (const { src, caption } of images) {
console.debug(
[${maxDepth}] Uploading ${counter}/${images.length} images in "${mainTitle}"...,
);
const gyazo = await uploadImage(new URL(src), {
refererURL: toNikkeiURL(articleId),
title,
description: caption,
});
counter++;
if (!gyazo) continue;
const before = src;
const after = gyazo;
}
console.debug(
[${maxDepth}] Uploaded ${images.length} images in "${mainTitle}".,
);
}
// リンクを書き換える
console.debug(
[${maxDepth}] Replacing ${replacer.length} urls in "${mainTitle}"...,
);
await patch(project, mainTitle, (prev) =>
prev.map((line) => {
let text = line.text;
text = text.replaceAll(before, after);
}
return text;
}), { socket });
console.debug(
[${maxDepth}] Replaced ${replacer.length} urls in "${mainTitle}".,
);
return mainTitle;
};
const formatThink = (
think: Think,
mainTitle: string,
today: Date,
): string[] => [
[[${think.genre}]][${think.author.name}.icon],
...think.comment.replaceAll("。", "。\n")
.split("\n")
.flatMap((text) => text.trim() !== "" ? [ >${text.trim()}] : []),
"",
Commented at [${mainTitle}], on [${toYYYYMMDD(think.updated)}],
Added on [${toYYYYMMDD(today)}], // scrapした日付
];