fix: Normalize and deduplicate tokens for improved search relevance

Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
This commit is contained in:
2026-02-13 11:46:25 -07:00
parent dc2267030a
commit 0c5b956e53
+4 -2
View File
@@ -41,7 +41,7 @@ export class Tokenizer {
}
// Remove duplicates
// tokens = [...new Set(tokens)]
tokens = [...new Set(tokens)]
// Remove empty tokens
tokens = tokens.filter(Boolean)
@@ -107,7 +107,9 @@ export class Tokenizer {
}
private tokenizeWords(text: string, { skipChs = false } = {}): string[] {
const tokens = text.split(BRACKETS_AND_SPACE)
const tokens = text
.split(BRACKETS_AND_SPACE)
.map(t => t.replace(/[.,:;!?]+$/, ''))
if (skipChs) return tokens
return this.tokenizeChsWord(tokens)
}