fizzSearch
あいまい検索クラス
ambiguous text search
cf.
これを/icons/javascript.iconに書き直しただけ
更新とか
2020-09-17 20:34:24
ASearchで部分一致検索する方法がわかったので、歯抜けマッチングを使う必要がなくなった 2020-09-17 11:30:57
code:script.js
class Asearch {
get INITPAT() {return 0x80000000;} // 0100,0000,0000,0000,0000,0000,0000,0000
get MAXCHAR() {return 0x100;}
constructor(source) {
this.source = source;
this.shiftpat = Array(this.MAXCHAR).map(_ => 0);
this.epsilon = 0;
let mask = this.INITPAT;
const ref = this.unpack(this.source);
for (const item of ref) {
// 0x20 is a space
if (item === 0x20) {
this.epsilon |= mask;
} else {
this.shiftpatitem |= mask; mask >>>= 1;
}
}
this.acceptpat = mask;
return this;
}
isupper(c) {
// 0x41 = A, 0x5a = Z
return (c >= 0x41) && (c <= 0x5a);
}
islower(c) {
// 0x61 = a, 0x7a = z
return (c >= 0x61) && (c <= 0x7a);
}
tolower(c) {
return this.isupper(c) ? c + 0x20 : c;
}
toupper(c) {
return this.islower(c) ? c - 0x20 : c;
}
state(str = '') {
let i0 = this.INITSTATE0; let i1 = this.INITSTATE1; let i2 = this.INITSTATE2; let i3 = this.INITSTATE3; const ref = this.unpack(str);
for (const item of ref) {
const mask = this.shiftpatitem; i3 = (i3 & this.epsilon) | ((i3 & mask) >>> 1) | (i2 >>> 1) | i2;
i2 = (i2 & this.epsilon) | ((i2 & mask) >>> 1) | (i1 >>> 1) | i1;
i1 = (i1 & this.epsilon) | ((i1 & mask) >>> 1) | (i0 >>> 1) | i0;
i0 = (i0 & this.epsilon) | ((i0 & mask) >>> 1);
i1 |= i0 >>> 1;
i2 |= i1 >>> 1;
i3 |= i2 >>> 1;
}
}
match(str, ambig = 0) {
if (!(ambig < this.INITSTATE.length)) {
ambig = this.INITSTATE.length - 1;
}
const s = this.state(str);
return (sambig & this.acceptpat) !== 0; }
unpack(str) {
let bytes = [];
str.split('')
.map(item => item.charCodeAt(0))
.forEach(code => {
if (code > 0xFF) {
bytes.push((code & 0xFF00) >>> 8);
}
bytes.push(code & 0xFF);
});
return bytes;
}
}
本体
word:検索したい語句
list:検索候補
code:script.js
function fizzSearch(word, list) {
// 空白文字で区切った文字列を並び替える
const permutation = getPermutation(word.split(/\s/)).map(wordList => ${wordList.join(' ')} );
// parametorを変えてあいまい検索を実行するlambda式
const asearched = (targetWord) => {
const a = new Asearch(targetWord);
let limitCount = Math.floor(targetWord.length / 4) + 1;
limitCount = limitCount > 3 ? 3 : limitCount; // ambigの最大値は3
return range(limitCount)
.flatMap(i => list.filter(title => a.match(title, i)))
};
//const searchedList = permutation
const searchedList = permutation.flatMap(targetWord => asearched(targetWord));
// 重複を取り除いて返す
}
code:import.js
import '/api/code/takker/fizzSearch/script.js';
export default function(word, list) {
return fizzSearch(word, list);
}
全順序マッチング
順列を作成する関数を実装すれば良い
code:script.js
// 重複は考慮していない
function getPermutation(list) {
if (list.length == 0) return list;
if (list.length == 1) return list; if (list.length == 2) return list0,list[1,[list1,list0]]; return list.flatMap(first => {
const restList = list.filter(item => item !== first);
});
}
UserScript.icon