From 0c5b956e5361ea35ca76546891bf785b37ba4a46 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Fri, 13 Feb 2026 11:46:25 -0700 Subject: [PATCH] fix: Normalize and deduplicate tokens for improved search relevance Co-authored-by: aider (gemini/gemini-2.5-pro) --- src/search/tokenizer.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/search/tokenizer.ts b/src/search/tokenizer.ts index 9bd5baf..5bb567b 100644 --- a/src/search/tokenizer.ts +++ b/src/search/tokenizer.ts @@ -41,7 +41,7 @@ export class Tokenizer { } // Remove duplicates - // tokens = [...new Set(tokens)] + tokens = [...new Set(tokens)] // Remove empty tokens tokens = tokens.filter(Boolean) @@ -107,7 +107,9 @@ export class Tokenizer { } private tokenizeWords(text: string, { skipChs = false } = {}): string[] { - const tokens = text.split(BRACKETS_AND_SPACE) + const tokens = text + .split(BRACKETS_AND_SPACE) + .map(t => t.replace(/[.,:;!?]+$/, '')) if (skipChs) return tokens return this.tokenizeChsWord(tokens) }