asearch
Scrapboxのタイトル検索で利用
はやい
byte列を比較しているだけ
結果はtrue/falseで返ってくる
あいまい度は指定できる(0〜3まで)
他のライブラリに依存していない、pure javascript
とてもわかり易い仕組みの図解
2021-05-19 21:38:33 Unicode対応した
UserScript/denoから使えるcode
code:script.js
export class Asearch {
get INITPAT() {return 0x80000000;} // 0100,0000,0000,0000,0000,0000,0000,0000
get MAXCHAR() {return 0x100;} // 256文字が上限
constructor(source) {
this.shiftpat = Array(this.MAXCHAR).map(_ => 0);
this.epsilon = 0;
let mask = this.INITPAT;
for (const item of this.unpack(source)) {
// 0x20 is a space
if (item === 0x20) {
this.epsilon |= mask;
} else {
this.shiftpatitem |= mask; mask >>>= 1;
}
}
this.acceptpat = mask;
}
tolower(c) {
return this.isupper(c) ? c + 0x20 : c;
}
toupper(c) {
return this.islower(c) ? c - 0x20 : c;
}
isupper(c) {
// 0x41 = A, 0x5a = Z
return (c >= 0x41) && (c <= 0x5a);
}
islower(c) {
// 0x61 = a, 0x7a = z
return (c >= 0x61) && (c <= 0x7a);
}
state(str = '') {
let i0 = this.INITSTATE0; let i1 = this.INITSTATE1; let i2 = this.INITSTATE2; let i3 = this.INITSTATE3; for (const item of this.unpack(str)) {
const mask = this.shiftpatitem; i3 = (i3 & this.epsilon) | ((i3 & mask) >>> 1) | (i2 >>> 1) | i2;
i2 = (i2 & this.epsilon) | ((i2 & mask) >>> 1) | (i1 >>> 1) | i1;
i1 = (i1 & this.epsilon) | ((i1 & mask) >>> 1) | (i0 >>> 1) | i0;
i0 = (i0 & this.epsilon) | ((i0 & mask) >>> 1);
i1 |= i0 >>> 1;
i2 |= i1 >>> 1;
i3 |= i2 >>> 1;
}
}
match(str, ambig = 0) {
ambig = ambig < this.INITSTATE.length ? ambig : this.INITSTATE.length - 1;
const s = this.state(str);
return (sambig & this.acceptpat) !== 0; }
unpack(str) {
return str.split('').map(char => char.charCodeAt(0));
}
// UTF-16 code pointを上8bitと下8bitとに分割する
/*unpack(str) {
return str.split('')
.flatMap(char => {
const code = char.charCodeAt(0);
if (code > 0xFF) {
}
});
}*/
}
test code
code:js
(async () => {
const {Asearch} = await import('/api/code/programming-notes/asearch/script.js');
const a = new Asearch('abcde');
assert(a, 'abcde');
assert(a, 'AbCdE');
assert(a, 'abcd');
assert(a, 'abcd', 1);
assert(a, 'ab de', 1);
assert(a, 'abe', 1);
assert(a, 'abe', 2);
const b = new Asearch('cheese burger');
assert(b, 'cheese burger');
assert(b, 'chess burger');
assert(b, 'chess burger', 2);
assert(b, 'chess', 2);
const c = new Asearch('漢字文字列');
assert(c, '漢字文字列');
assert(c, '漢字文字烈');
assert(c, '漢字文字烈', 1);
function assert(asearch, text, ambig = 0) {
console.log(text = ${text}, ambig = ${ambig} => ${asearch.match(text, ambig)});
}
})();
WebWorker用
code:worker.js
class Asearch {
get INITPAT() {return 0x80000000;} // 0100,0000,0000,0000,0000,0000,0000,0000
get MAXCHAR() {return 0x100;} // 256文字が上限
constructor(source) {
this.shiftpat = Array(this.MAXCHAR).map(_ => 0);
this.epsilon = 0;
let mask = this.INITPAT;
for (const item of this.unpack(source)) {
// 0x20 is a space
if (item === 0x20) {
this.epsilon |= mask;
} else {
this.shiftpatitem |= mask; mask >>>= 1;
}
}
this.acceptpat = mask;
}
tolower(c) {
return this.isupper(c) ? c + 0x20 : c;
}
toupper(c) {
return this.islower(c) ? c - 0x20 : c;
}
isupper(c) {
// 0x41 = A, 0x5a = Z
return (c >= 0x41) && (c <= 0x5a);
}
islower(c) {
// 0x61 = a, 0x7a = z
return (c >= 0x61) && (c <= 0x7a);
}
state(str = '') {
let i0 = this.INITSTATE0; let i1 = this.INITSTATE1; let i2 = this.INITSTATE2; let i3 = this.INITSTATE3; for (const item of this.unpack(str)) {
const mask = this.shiftpatitem; i3 = (i3 & this.epsilon) | ((i3 & mask) >>> 1) | (i2 >>> 1) | i2;
i2 = (i2 & this.epsilon) | ((i2 & mask) >>> 1) | (i1 >>> 1) | i1;
i1 = (i1 & this.epsilon) | ((i1 & mask) >>> 1) | (i0 >>> 1) | i0;
i0 = (i0 & this.epsilon) | ((i0 & mask) >>> 1);
i1 |= i0 >>> 1;
i2 |= i1 >>> 1;
i3 |= i2 >>> 1;
}
}
match(str, ambig = 0) {
ambig = ambig < this.INITSTATE.length ? ambig : this.INITSTATE.length - 1;
const s = this.state(str);
return (sambig & this.acceptpat) !== 0; }
// UTF-16 code pointを上8bitと下8bitとに分割する
unpack(str) {
return str.split('')
.flatMap(char => {
const code = char.charCodeAt(0);
if (code > 0xFF) {
}
});
}
}
JavaScript.icon
Deno.icon