asearch-engine
複数の補完ソースを切り替えてasearch検索できるUserScript 用途
interface
code:js
import {create} from 'xxx';
const emojiEngine = create({
converter: ({project, title}) => ${project} ${name},
source: emojis,
});
const resultPromises = emojiEngine.search('searchParam'); // 検索
わかりやすくするために、型付きで説明しておこう
code:ts
const create: <T>(props: CreateProps<T>) => SearchEngine<T>;
type CreateProps = <T>{
converter: (item: T) => string;
source?: T[];
limit?: number;
timeout?: number;
};
declare class SearchEngine<T> {
constructor(converter: (item: T) => string, limit: number, timeout: number);
search(query: string) => Promise<{
result: T[];
state: 'fullfilled' | 'timedout';
} | {
state: 'skipped';
}>[];
update(source: T[]) => Promise<void>;
};
引数:CreateProps
converterに、補完ソースから文字列への変換方法を指定する
sourceに補完ソースを指定するが、後からupdate追加することもできる
limitの検索結果の上限を入れる
defaultは30
timeoutに検索処理の制限時間をms単位で入れる
defaultは10000
戻り値:SearchEngine
検索に使うクラス
SearchEngine.searchに検索語句を渡して、実際に検索を行う
非同期関数
検索に使用したWeb Workerと同じ数だけPromiseが返ってくる
検索がskipされると、state === 'skipped'になる
検索が途中で打ち切られるとstate === 'timedoutになる
SearchEngine.updateで補完ソースを更新する
dependencies
code:script.js
import {asyncSingleton} from '/api/code/takker/async-singleton/script.js';
変数をカプセル化するために、classのstatic変数ではなくmodule内部のglobal変数として定義している
code:script.js
const workerSource = /api/code/takker/asearch-engine/worker.js;
const workers = range(navigator.hardwareConcurrency)
.map(_ => new Worker(workerSource));
let idCounter = 0; // 実行しているengineの識別子
create経由でしかSearchEngineを作れないようにする
code:script.js
export async function create({converter, source = [], limit = 30, timeout = 10000, ambig = undefined} = {}) {
const engine = new SearchEngine(converter, limit, timeout, ambig, idCounter);
idCounter++;
if (source.length > 0) await engine.update(source);
return engine;
}
class SearchEngine {
constructor(converter, limit, timeout, ambig, id) {
this.id = id;
this.converter = converter;
this.searchObjects = [];
this.skips = [];
this._searchCallbacks = workers.map(worker => asyncSingleton(async query => {
const {indicesList} = await postToWorker(worker, {
type: 'search', id, query, limit, timeout, ambig
});
return indicesList;
}));
}
search(query) {
return this._searchCallbacks.map(async (callback, workerIndex) => {
const {result: indicesList, state} = await callback(query);
if (state === 'canceled') return {state};
return {
result: indicesList
.map(indices => indices
.map(index => this.searchObjects[this.skipsworkerIndex + index]) ),
state,
};
});
}
async update(sourceObjects) {
// 元のobjectを保存する
this.searchObjects = sourceObjects;
// 検索用文字列リストを作る
const source = this.searchObjects.map(object => this.converter(object));
// 検索用文字列リストをworkersに分割して渡す
const chunkLength = Math.floor(source.length/workers.length) + 1;
const promises = workers.map((worker, i) =>{
const chunk = source.slice(i * chunkLength, (i + 1) * chunkLength);
this.skips.push(i * chunkLength);
//console.log(post to worker ${i}:, chunk);
return postToWorker(worker, {type: 'update', id: this.id, source: chunk});
});
return await Promise.all(promises);
}
}
複数の入力補完scriptが同じworkerに処理を送るので、どのscriptから来た命令なのかを区別できるようにする
code:script.js
function postToWorker(worker, message) {
worker.postMessage(message);
return new Promise(resolve => {
const callback = ({data}) => {
if (data.type !== message.type || data.id !== message.id) return;
resolve(data);
worker.removeEventListener('message', callback);
}
worker.addEventListener('message', callback);
});
}
worker code
code:worker.js
self.importScripts('/api/code/takker/asearch/worker.js');
let sourceList = []; // 検索用文字列を格納するリスト
self.addEventListener('message', ({data: {id, type, source, ...rest}}) => {
switch(type) {
case 'search':
self.postMessage({indicesList: search({id, ...rest}), id, type,});
break;
case 'update':
update({id, source});
self.postMessage({id, type,}); // 処理の終了を通知する
break;
}
});
function search({id, query, ambig, limit, timeout}) {
// 検索候補が空のときは何もしない
const source = sourceListid; if (!source || source.length === 0) return [];
// 値のcheck
if (typeof query !== 'string') throw Error('query is not a string.');
if (typeof ambig !== 'number' && ambig !== undefined) throw Error('ambig is not a number');
if (typeof limit !== 'number' && limit !== undefined) throw Error('limit is not a number');
if (limit <= 0) throw Error('limit is not more than 0.');
if (typeof timeout !== 'number') throw Error('timeout is not a number.');
if (timeout <= 0) throw Error('timeout is not more than 0.');
// 検索語句が空のときは、検索候補の先頭limit個の番号を取得する
if (query.trim() === '') return ...Array(limit ?? 100).keys();
// 空白文字で区切った文字列を並び替え、曖昧検索objectを作る
const asearches = getPermutation(query.split(/\s/))
.map(wordList => new Asearch( ${wordList.join(' ')} ));
// ambigの最大値を計算する
let maxAmbig = ambig ?? Math.floor( ${query} .length / 4) + 1;
maxAmbig = maxAmbig > 4 ? 4 : maxAmbig;
// 検索する
const indicesList = [];
const totalResults = new Set();
const start = (new Date()).getTime();
let matches = [];
let cancel = false; // 計算を中断するflag
for (let ambig = 0; ambig < maxAmbig; ambig++) {
matches = [];
for (const asearch of asearches) {
// 検索した文字列の要素番号を重複を取り除いて追加する
for (let i = 0; i < source.length; i++) {
if (limit && totalResults.size >= limit) {
cancel = true;
break;
}
if (totalResults.has(i)
|| !asearch.match(sourcei, ambig)) continue; matches.push(i);
totalResults.add(i);
}
if (start + timeout < (new Date()).getTime()) {
console.info('time out');
cancel = true;
}
if (cancel) break;
}
indicesList.push(matches);
if (cancel) break;
}
return indicesList;
}
function update({id, source}) {
}
code:worker.js
// 重複は考慮していない
function getPermutation(list) {
if (list.length == 0) return list;
if (list.length == 1) return list; if (list.length == 2) return list0,list[1,[list1,list0]]; return list.flatMap(first => {
const restList = list.filter(item => item !== first);
});
}
test code
code:js
(async () => {
const {execute} = await import('/api/code/takker/asearch-engine/test1.js');
await execute();
// 一つづつ検索
await externalSearch('scrapbox');
await emojiSearch('いい');
// 同時検索
await Promise.all([
externalSearch('scrapbox'),
emojiSearch('いい'),
]);
// 連続してPOSTされたら、途中の計算を省く
await Promise.all([
externalSearch('理想 問題'),
externalSearch('理想 問題'),
externalSearch('理想 問題'),
emojiSearch('goo'),
emojiSearch('goo'),
emojiSearch('goo'),
emojiSearch('goo'),
externalSearch('理想 問題'),
externalSearch('理想 問題'),
externalSearch('理想 問題'),
emojiSearch('goo'),
emojiSearch('goo'),
emojiSearch('goo'),
externalSearch('理想 問題'),
externalSearch('理想 問題'),
externalSearch('理想 問題'),
externalSearch('理想 問題'),
]);
})();
code:test1.js
import {create} from '/api/code/takker/asearch-engine/script.js';
import {projects as projectList} from '/api/code/takker/asearch-engine/test1-project-list.js';
export async function execute() {
createTestData(projectList),
createEmojiData(),
]);
create({
converter: ({project, title}) => ${project} ${title},
source: externalSource,
limit: 100,
ambig: 4,
}),
create({
converter: ({project, title}) => ${project} ${title},
source: emojiSource,
limit: 10,
}),
]);
let counter = 0;
const search = async (engine, query) => {
const promises = engine.search(query);
const counter_ = counter;
counter++;
promises.forEach(async (promise, index) => {
const {result, state} = await promise;
if (state === 'canceled') {
_log(Worker ${index} No. ${counter_} was canceled.);
return;
}
_log(Worker ${index} No. ${counter_}: , result
.map(searchedList => searchedList.map(({project, title}) => /${project}/${title})));
});
await Promise.all(promises);
};
window.emojiSearch = (query) => search(emojiEngine, query);
window.externalSearch = (query) => search(externalEngine, query);
}
async function createTestData(projects) {
return shuffle((await Promise.all(projects.map(project => fetchPagess(project)))).flat());
}
async function createEmojiData() {
.map(project => fetchPagess(project)))
).flat();
}
function shuffle(array) {
let result = array;
for (let i = result.length; 1 < i; i--) {
const k = Math.floor(Math.random() * i);
}
return result;
}
async function fetchPagess(project) {
// projectの空でない全てのページ数を取得する
_log(Start loading pages from /${project}.);
const res = await fetch(/api/pages/${project}/?limit=1);
const {count} = await res.json();
const maxIndex = Math.floor(parseInt(count) / 1000) + 1;
const response = await fetch(/api/pages/${project}/?limit=1000&skip=${index*1000});
const {pages} = await response.json();
return pages.map(({title}) => {return {project, title};});
});
const emojis = (await Promise.all(promises)).flat();
_log(Loaded ${emojis.length} pages from /${project}.);
return emojis;
}
async function fetchEmojis(project) {
// projectの全ページ数を取得する
_log(Start loading emojis from /${project}.);
const res = await fetch(/api/pages/${project}/?limit=1);
const {count} = await res.json();
const maxIndex = Math.floor(parseInt(count) / 1000) + 1;
const response = await fetch(/api/pages/${project}/?limit=1000&skip=${index*1000});
const {pages} = await response.json();
});
const emojis = (await Promise.all(promises)).flat();
_log(Loaded ${emojis.length} emojis from /${project}.);
return emojis;
}
function _log(msg, ...objects) {
if (typeof msg !== 'object') {
console.log([test1.js@${scrapbox.Page.title}] ${msg}, ...objects);
} else {
console.log([test1.js@${scrapbox.Page.title}] , msg, ...objects);
}
}
code:test1-project-list.js
export const projects = [
'hub',
'shokai',
'nishio',
'masui',
'rakusai',
'yuiseki',
'june29',
'ucdktr2016',
'villagepump',
'rashitamemo',
'thinkandcreateteck',
'customize',
'scrapboxlab',
'scrasobox',
'foldrr',
'scrapbox-drinkup',
'motoso',
'public-mrsekut',
'mrsekut-p',
'marshmallow-rm',
'wkpmm',
'sushitecture',
'nwtgck',
'dojineko',
'kadoyau',
'inteltank',
'sta',
'kn1cht',
'miyamonz',
'rmaruon',
'MISONLN41',
'yuta0801',
'choiyakiBox',
'choiyaki-hondana',
'spud-oimo',
'keroxp',
'aioilight',
];