New release

Update README for match offset fixes
fix: Correct match offset when ignoring diacritics
2026-02-13 16:52:34 -07:00 · 2026-02-13 16:52:05 -07:00 · 2026-02-13 16:46:25 -07:00 · 2026-02-13 12:18:45 -07:00 · 2026-02-13 12:04:52 -07:00 · 2026-02-13 11:46:25 -07:00
6 changed files with 286 additions and 29 deletions
@@ -1,17 +1,23 @@
 # Tannersearch for Obsidian
-This is a fork of [obsidian-omnisearch](https://github.com/scambier/obsidian-omnisearch) by [@scambier](https://github.com/scambier) with the following changes:
+This is a fork of [obsidian-omnisearch](https://github.com/scambier/obsidian-omnisearch) (by [@scambier](https://github.com/scambier)) with the following changes:
- when opening a result, the cursor placement offset prioritizes note titles, headings, then content
+When opening a result, the cursor placement prioritizes note titles over headings over content
-  - this means if the note's name matches your search, it opens at the top
+- this means if the note's name matches your search, it opens at the top instead of on a random match in the middle of the note
- search terms aren't split on apostrophes
+
-  - searching for "Sun's BBQ" searches for ["Sun's", "BBQ"] instead of ["Sun", "s", "BBQ"]
+Search terms aren't split on apostrophes
- searches aren't HTML escaped
+- searching for "Sun's BBQ" searches for ["Sun's", "BBQ"] instead of ["Sun", "s", "BBQ"]
-  - before searching for "Sun's BBQ" would actually search for `Sun&#039;s BBQ`
+
- search terms less than 3 characters long or common words are ignored
+Search terms less than 3 characters long or common words are ignored
-  - ignored words: "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"
+- ignored words: "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"
- the first line of a paragraph is ranked like Heading 3 if it ends in a colon
+
-  - for example,
+Excerpt highlighting fixes
 - words with apostrophes weren't being highlighted
 - excerpt with blank lines were sometimes shown incorrectly
 - fixed match index calculations when diacritics were removed
 The first line of a paragraph is ranked like Heading 3 if it ends in a colon
 - for example,
 ```
 Japan trip:
@@ -22,6 +28,52 @@ Japan trip:
 ... "Japan trip:" is indexed and ranked the same as "### Japan trip"
 If the first paragraph of a note contains a line like "aka other name", then "other name" is ranked like H1
 - for example,
 ```
 see also: [[Travel General]]
 Aka: packing list
 content
 ```
 ... "packing list" is indexed and ranked the same as "# packing list". Note that "Aka:" isn't case or colon sensitive.
 ### Fork Installation
 Ensure the original Omnisearch plugin is installed, see instructions below.
 Download main.js into your `.obsidian/plugins/omnisearch` directory, example:
 ```
 $ cd ~/notes/.obsidian/plugins/omnisearch
 $ mv main.js main.js.bak
 $ wget https://raw.githubusercontent.com/tannercollin/obsidian-tannersearch/refs/heads/master/dist/main.js
 ```
 In Obsidian, open Settings > Community Plugins. Disable and enable Omnisearch.
 Open Settings > Omnisearch. Scroll to bottom. Click "Clear cache" data.
 Restart Obsidian.
 Note: on mobile you'll have to use some sort of sync or downloader and move the main.js over to your vault.
 ### Building the Fork
 If you'd rather build the fork yourself:
 ```
 $ git clone https://github.com/tannercollin/obsidian-tannersearch.git
 $ cd obsidian-tannersearch/
 $ npm install --legacy-peer-deps
 $ npm run build
 ```
 Then copy `dist/main.js` as above.
 # Original README
@@ -233,6 +233,12 @@ export class DocumentsRepository {
    const headings2 = metadata ? extractHeadingsFromCache(metadata, 2) : []
    const headings3 = metadata ? extractHeadingsFromCache(metadata, 3) : []
    const akaHeadings: string[] = content
      .split(/\n\s*\n/)[0]
      .split('\n')
      .map(line => line.match(/^aka:?\s*(.+)$/i)?.[1]?.trim())
      .filter((heading): heading is string => !!heading)
    const lines = content.split('\n')
    const colonHeadings: string[] = []
    for (let i = 0; i < lines.length; i++) {
@@ -263,7 +269,7 @@ export class DocumentsRepository {
      tags: tags,
      unmarkedTags: tags.map(t => t.replace('#', '')),
      aliases: getAliasesFromMetadata(metadata).join(''),
-      headings1: headings1.join(' '),
+      headings1: [...headings1, ...akaHeadings].join(' '),
      headings2: headings2.join(' '),
      headings3: [...headings3, ...colonHeadings].join(' '),
    }
@@ -166,11 +166,12 @@ export class SearchEngine {
    logVerbose(JSON.stringify(searchTokens, null, 1))
    let results = this.minisearch.search(searchTokens, {
      prefix: term => term.length >= options.prefixLength,
-      // length <= 3: no fuzziness
+      bm25: {b: 0.2, d: 0.5, k: 1.2},
-      // length <= 5: fuzziness of 10%
+      // length <= 4: no fuzziness
-      // length > 5: fuzziness of 20%
+      // length <= 5: 1/2 fuzziness
      // length > 5: full fuzziness
      fuzzy: term =>
-        term.length <= 3 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness,
+        term.length <= 4 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness,
      boost: {
        basename: settings.weightBasename,
        aliases: settings.weightBasename,
@@ -41,7 +41,7 @@ export class Tokenizer {
      }
      // Remove duplicates
-      // tokens = [...new Set(tokens)]
+      tokens = [...new Set(tokens)]
      // Remove empty tokens
      tokens = tokens.filter(Boolean)
@@ -107,7 +107,9 @@ export class Tokenizer {
  }
  private tokenizeWords(text: string, { skipChs = false } = {}): string[] {
-    const tokens = text.split(BRACKETS_AND_SPACE)
+    const tokens = text
      .split(BRACKETS_AND_SPACE)
      .map(t => t.replace(/[.,:;!?]+$/, ''))
    if (skipChs) return tokens
    return this.tokenizeChsWord(tokens)
  }
@@ -25,7 +25,9 @@ export class TextProcessor {
    try {
      return text.replace(
        new RegExp(
-          `(${matches.map(item => escapeRegExp(item.match)).join('|')})`,
+          `(${matches
            .map(item => escapeRegExp(escapeHTML(item.match)))
            .join('|')})`,
          'giu'
        ),
        `<span class="${highlightClass}">$1</span>`
@@ -47,7 +49,7 @@ export class TextProcessor {
    strings.sort((a, b) => b.length - a.length)
    const joined = `(${strings
-      .map(s => `\\b${escapeRegExp(s)}\\b|${escapeRegExp(s)}`)
+      .map(s => `(?<!\\w)${escapeRegExp(s)}(?!\\w)`)
      .join('|')})`
    return new RegExp(`${joined}`, 'gui')
@@ -82,9 +84,16 @@ export class TextProcessor {
      }
      const matchStartIndex = match.index
      const matchEndIndex = matchStartIndex + match[0].length
-      const originalMatch = originalText
+
-        .substring(matchStartIndex, matchEndIndex)
+      // If `ignoreDiacritics` is on, `text` may have a different length than `originalText`,
-        .trim()
+      // making `match.index` unreliable for `originalText`.
      // We use `match[0]`, which is the matched term (but without diacritics).
      const originalMatchBeforeTrim = this.plugin.settings.ignoreDiacritics
        ? match[0]
        : originalText.substring(matchStartIndex, matchEndIndex)
      const originalMatch = originalMatchBeforeTrim.trim()
      if (originalMatch && match.index >= 0) {
        matches.push({ match: originalMatch, offset: match.index })
      }
@@ -126,18 +135,18 @@ export class TextProcessor {
        content = content.slice(0, excerptAfter)
      }
      if (settings.renderLineReturnInExcerpts) {
        const last = content.lastIndexOf('\n', pos - from)
        if (last > 0) {
          content = content.slice(last)
        }
        const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g)
        // Remove multiple line returns
        content = content
          .split(lineReturn)
          .filter(l => l)
          .join('\n')
        const last = content.lastIndexOf('\n', pos - from)
        if (last > 0) {
          content = content.slice(last)
        }
      }
      content = escapeHTML(content)
Author	SHA1	Message	Date
tanner	ba13d435df	New release	2026-02-13 16:52:34 -07:00
tanner	7c35571b69	Update README for match offset fixes	2026-02-13 16:52:05 -07:00
tanner	bb58c12306	fix: Correct match offset when ignoring diacritics Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-13 16:46:25 -07:00
tanner	b6f98f5d04	Modify BM25 search params and fuzziness Decreasing Length normalization impact factor. I don't want notes to be ranked as differently if they are long vs. short.	2026-02-13 12:18:45 -07:00
tanner	7a1414d397	Add in default BM25 search params https://lucaong.github.io/minisearch/types/MiniSearch.BM25Params.html	2026-02-13 12:04:52 -07:00
tanner	0c5b956e53	fix: Normalize and deduplicate tokens for improved search relevance Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-13 11:46:25 -07:00
tanner	dc2267030a	Update README with excerpt changes	2026-02-13 11:38:39 -07:00
tanner	292fb765de	fix: Ensure correct excerpt slicing before removing blank lines Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-13 11:30:07 -07:00
tanner	ec2a720649	fix: Ensure highlighting matches HTML-escaped text in excerpts Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-13 10:56:07 -07:00
tanner	a8c18f5dca	fix: Improve search term highlighting for words with punctuation Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-13 10:48:20 -07:00
tanner	5e5708de4e	Add installation instructions to README	2026-02-06 09:43:57 -07:00
tanner	70deab0b77	Add main.js to repo for easy distribution	2026-02-06 09:28:19 -07:00
tanner	269a1e6ea4	Organize README, add aka info	2026-02-06 09:26:39 -07:00
tanner	9e68a725d0	Make aka header matching stricter	2026-02-06 09:12:28 -07:00
tanner	44da87a29d	feat: Extract 'aka' lines from first paragraph for H1 indexing Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-06 09:06:34 -07:00