advanced-link-searcher
interface
code:script.d.ts
export class SearchEngine {
constructor(projects: string[], option: {icon?: boolean; expired?: number});
search(query: string, option?: {timeout?: number; limit?:number; ambig?:number;}):
Promise<{state: 'cancelled';} | {result: string[]; state: 'fullfilled';}>;
reload(): Promise<void>;
}
検索エンジンの作成
const engine = new SearchEngine(projects, {icon: true, expired: 600})
projectsに指定したscrapbox projectからリンクを読み込む
instanceを生成した段階で、リンクの読み込みを開始する
icon: trueにすると、画像があるリンクのみ取得する
expiredに、cacheの寿命を秒単位で指定する
defaultは1時間
検索
engine.search(query, {timeout, limit, ambig})
戻り値
あいまい度順に並べた配列が返ってくる
code:ts
type Return = {
project: string;
title: string;
}[][];
projectは検索しない
titleのみをあいまい検索する
実装
設計
https://kakeru.app/1c2be668a2a6a9547d8f8cce59020995 https://i.kakeru.app/1c2be668a2a6a9547d8f8cce59020995.svg
workerからUI threadへの通信には文字列配列を渡す
直接object配列を渡しても速度には影響しないかな?
dependencies
code:script.js
import {CacheStorage} from '../scrapbox-link-database/script.js';
import {openDB} from '../idb/with-async-ittr.js';
import {singletonWorker} from '../singletonWorker/script.js';
// TODO: async-singletonで包んでおく
let idCounter = 0;
export class SearchEngine {
constructor(projects, {icon = false, expired = 3600, verbose = false} = {}) {
this._id = idCounter++;
this._projects = projects;
this._cacheStorage = new CacheStorage(expired);
this._icon = icon;
this._workerStoreName = this._icon ? SearchEngine.iconStoreName : SearchEngine.linkStoreName;
this._verbose = verbose;
this._initialized = this._initialize();
}
static databaseName = 'UserScript-SearchEngine';
static databaseVersion = 3;
static iconStoreName = 'icon-search-list';
static linkStoreName = 'link-search-list';
async search(query, {timeout = 5000, limit = 30, ambig} = {}) {
await this._initialized;
this._log('search', 'Start a search', {query, timeout, limit, ambig});
const raw = await Promise.all(workers.map((worker, i) =>
worker.delegate({
databaseName: SearchEngine.databaseName,
databaseVersion: SearchEngine.databaseVersion,
storeName: this._workerStoreName,
query, timeout, limit, ambig, verbose: this._verbose,
})
));
// 一つでも'cancelled'が混じっていたら{state: 'cancelled'}を返す
if (raw.some(({state}) => state === 'cancelled')) return {state: 'cancelled'};
// 転置して曖昧度順に結果を並べて、limit分だけ返す
const results = raw.map(({data: {result}}) => result);
this._log('search', 'Finish a search', {raw, results});
return {
result: transpose(results).flatMap(lists => lists.flat()).slice(0, limit),
state: 'fullfilled',
};
}
async reload() {
const projects = await this.update(this._projects, {icon: this._icon, reload: true});
if (projects.length > 0) await this._createWorkerSource();
}
// Workerが検索に使うリストを作る
async _createWorkerSource() {
const data = await this._cacheStorage.get(this._projects, {icon: this._icon});
const source = this._icon ?
data
.flatMap(({project, titles}) => titles.map(title => {return {project, title};}))
// 長さ順→辞書順に並べ替える
.sort((a, b) => a.title.length === b.title.length ?
a.title.localeCompare(b.title) :
a.title.length - b.title.length
) :
// 適当にかき混ぜる
shuffle(data.flatMap(
({project, pages}) => {
const titles = [...new Set(pages.flatMap(({title, links}) => title, ...links))]; return titles.map(title => {return {project, title};});
}
));
this._log('_createWorkerSource', Create ${source.length} search items.);
// 検索用文字列リストをworkers分だけ分割する
const chunkLength = Math.floor(source.length/workers.length) + 1;
const lists = workers.map((_, i) => source.slice(i * chunkLength, (i + 1) * chunkLength));
// databaseに格納する
this._log('_createWorkerSource', Put ${source.length} search items on Indexed DB.);
const tx = this._workerDB.transaction(this._workerStoreName, 'readwrite');
const store = tx.objectStore(this._workerStoreName);
const keys = await store.getAllKeys();
await Promise.all(lists.map((list, index) => {
return keys.some(key => key0 === id0 && key1 === id1) ? store.put({list, id}) :
store.add({list, id});
}));
await tx.done;
this._log('_createWorkerSource', Finish putting on Indexed DB.);
}
async _initialize() {
this._log('_initialize', 'Start initializing');
this._workerDB = await openDB(SearchEngine.databaseName, SearchEngine.databaseVersion, {
upgrade: (db) => {
// Object Storeをすべて消す
db.deleteObjectStore(storeName)
);
db.createObjectStore(SearchEngine.iconStoreName, {keyPath: 'id'});
db.createObjectStore(SearchEngine.linkStoreName, {keyPath: 'id'});
},
});
await this._createWorkerSource();
this._log('_initialize', 'Finish initializing');
}
_log(functionName, ...messages) {
if (!this._verbose) return;
console.log([${functionName}@advanced-link-searcher], ...messages);
}
}
code:script.js
const transpose = a => a0.map((_, c) => a.map(r => rc)); function shuffle(array) {
let result = array;
for (let i = result.length; 1 < i; i--) {
const k = Math.floor(Math.random() * i);
}
return result;
}
workerのcode
code:worker.js
self.importScripts('../asearch/worker.js');
self.importScripts('../idb/worker.js');
let db = undefined;
let verbose = false;
self.addEventListener('message', async ({data: {id, databaseName, databaseVersion, storeName, verbose: _verbose, ...rest}}) => {
await openDB(databaseName, databaseVersion);
const source = (await db.get(storeName, id))?.list ?? [];
if (_verbose) verbose = _verbose;
self.postMessage({result: search({source, ...rest}), id,});
});
async function openDB(databaseName, databaseVersion) {
if (db) return;
db = await idb.openDB(databaseName, databaseVersion);
}
function search({source, query, ambig, limit, timeout}) {
if (!source || source.length === 0) return [];
// 値のcheck
if (typeof query !== 'string') throw Error('query is not a string.');
if (typeof ambig !== 'number' && ambig !== undefined) throw Error('ambig is not a number');
if (typeof limit !== 'number' && limit !== undefined) throw Error('limit is not a number');
if (limit <= 0) throw Error('limit is not more than 0.');
if (typeof timeout !== 'number') throw Error('timeout is not a number.');
if (timeout <= 0) throw Error('timeout is not more than 0.');
// 検索語句が空のときは、検索候補の先頭limit個を取得する
if (query.trim() === '') {
_log('search', Finish a search for "${query}");
return [source.slice(0, limit).map(({project, title}) => /${project}/${title})];
}
_log('search', 'Start fuzzy search for ', {query, ambig, limit, timeout, source});
// 空白文字で区切った文字列を並び替え、曖昧検索objectを作る
const asearches = getPermutation(query.split(/\s/))
.map(wordList => new Asearch( ${wordList.join(' ')} ));
// ambigの最大値を計算する
let maxAmbig = ambig ?? Math.floor( ${query} .length / 4) + 1;
maxAmbig = maxAmbig > 4 ? 4 : maxAmbig;
// 検索する
const result = [];
const totalResults = new Set();
const start = (new Date()).getTime();
let matches;
let cancel = false; // 計算を中断するflag
for (let ambig = 0; ambig < maxAmbig; ambig++) {
matches = new Set();
for (const asearch of asearches) {
// 検索した文字列を重複を取り除いて追加する
for (const {project, title} of source) {
if (limit && totalResults.size >= limit) {
cancel = true;
break;
}
const path = /${project}/${title};
if (totalResults.has(path) || !asearch.match(title, ambig)) continue;
matches.add(path);
totalResults.add(path);
}
if (start + timeout < (new Date()).getTime()) {
_log('search', time out, {query});
cancel = true;
}
if (cancel) break;
}
if (cancel) break;
}
_log('search', Finish fuzzy search for "${query}": , result);
return result;
}
code:worker.js
// 重複は考慮していない
function getPermutation(list) {
if (list.length == 0) return list;
if (list.length == 1) return list; if (list.length == 2) return list0,list[1,[list1,list0]]; return list.flatMap(first => {
const restList = list.filter(item => item !== first);
});
}
function _log(functionName, ...messages) {
if (!verbose) return;
console.log([${functionName}@advanced-link-searcher-worker], ...messages);
}
test code
08:02:11 大体バグは潰せたと思う
09:04:15 バグつぶし終了
code:js
import('/api/code/programming-notes/advanced-link-searcher/test1.js')
code:test1.js
import {SearchEngine} from './script.js';
import {projects} from '../scrapbox-link-database/list.js';
const engine = new SearchEngine(projects);
window.engine = engine;
code:js
import('/api/code/programming-notes/advanced-link-searcher/test2.js')
code:test2.js
import {SearchEngine} from './script.js';
window.engine = engine;