Add installation instructions to README

2026-02-06 09:38:55 -07:00
5 changed files with 54 additions and 73 deletions
@@ -11,11 +11,6 @@ Search terms aren't split on apostrophes
 Search terms less than 3 characters long or common words are ignored
 - ignored words: "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"
 Excerpt highlighting fixes
 - words with apostrophes weren't being highlighted
 - excerpt with blank lines were sometimes shown incorrectly
 - fixed match index calculations when diacritics were removed
 The first line of a paragraph is ranked like Heading 3 if it ends in a colon
 - for example,
@@ -38,10 +33,10 @@ Aka: packing list
 content
 ```
-... "packing list" is indexed and ranked the same as "# packing list". Note that "Aka:" isn't case or colon sensitive.
+... "packing list" is indexed and ranged the same as "# packing list". Note that "Aka:" isn't case or colon sensitive.
-### Fork Installation
+## Fork Installation
 Ensure the original Omnisearch plugin is installed, see instructions below.
@@ -53,15 +48,13 @@ $ mv main.js main.js.bak
 $ wget https://raw.githubusercontent.com/tannercollin/obsidian-tannersearch/refs/heads/master/dist/main.js
 ```
-In Obsidian, open Settings > Community Plugins. Disable and enable Omnisearch.
+In Obsidian, open Settings > Community Plugings. Disable and enable Omnisearch.
 Open Settings > Omnisearch. Scroll to bottom. Click "Clear cache" data.
 Restart Obsidian.
-Note: on mobile you'll have to use some sort of sync or downloader and move the main.js over to your vault.
+## Building Fork
 ### Building the Fork
 If you'd rather build the fork yourself:
@@ -166,12 +166,11 @@ export class SearchEngine {
    logVerbose(JSON.stringify(searchTokens, null, 1))
    let results = this.minisearch.search(searchTokens, {
      prefix: term => term.length >= options.prefixLength,
-      bm25: {b: 0.2, d: 0.5, k: 1.2},
+      // length <= 3: no fuzziness
-      // length <= 4: no fuzziness
+      // length <= 5: fuzziness of 10%
-      // length <= 5: 1/2 fuzziness
+      // length > 5: fuzziness of 20%
      // length > 5: full fuzziness
      fuzzy: term =>
-        term.length <= 4 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness,
+        term.length <= 3 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness,
      boost: {
        basename: settings.weightBasename,
        aliases: settings.weightBasename,
@@ -41,7 +41,7 @@ export class Tokenizer {
      }
      // Remove duplicates
-      tokens = [...new Set(tokens)]
+      // tokens = [...new Set(tokens)]
      // Remove empty tokens
      tokens = tokens.filter(Boolean)
@@ -107,9 +107,7 @@ export class Tokenizer {
  }
  private tokenizeWords(text: string, { skipChs = false } = {}): string[] {
-    const tokens = text
+    const tokens = text.split(BRACKETS_AND_SPACE)
      .split(BRACKETS_AND_SPACE)
      .map(t => t.replace(/[.,:;!?]+$/, ''))
    if (skipChs) return tokens
    return this.tokenizeChsWord(tokens)
  }
@@ -25,9 +25,7 @@ export class TextProcessor {
    try {
      return text.replace(
        new RegExp(
-          `(${matches
+          `(${matches.map(item => escapeRegExp(item.match)).join('|')})`,
            .map(item => escapeRegExp(escapeHTML(item.match)))
            .join('|')})`,
          'giu'
        ),
        `<span class="${highlightClass}">$1</span>`
@@ -49,7 +47,7 @@ export class TextProcessor {
    strings.sort((a, b) => b.length - a.length)
    const joined = `(${strings
-      .map(s => `(?<!\\w)${escapeRegExp(s)}(?!\\w)`)
+      .map(s => `\\b${escapeRegExp(s)}\\b|${escapeRegExp(s)}`)
      .join('|')})`
    return new RegExp(`${joined}`, 'gui')
@@ -84,16 +82,9 @@ export class TextProcessor {
      }
      const matchStartIndex = match.index
      const matchEndIndex = matchStartIndex + match[0].length
-
+      const originalMatch = originalText
-      // If `ignoreDiacritics` is on, `text` may have a different length than `originalText`,
+        .substring(matchStartIndex, matchEndIndex)
-      // making `match.index` unreliable for `originalText`.
+        .trim()
      // We use `match[0]`, which is the matched term (but without diacritics).
      const originalMatchBeforeTrim = this.plugin.settings.ignoreDiacritics
        ? match[0]
        : originalText.substring(matchStartIndex, matchEndIndex)
      const originalMatch = originalMatchBeforeTrim.trim()
      if (originalMatch && match.index >= 0) {
        matches.push({ match: originalMatch, offset: match.index })
      }
@@ -135,18 +126,18 @@ export class TextProcessor {
        content = content.slice(0, excerptAfter)
      }
      if (settings.renderLineReturnInExcerpts) {
        const last = content.lastIndexOf('\n', pos - from)
        if (last > 0) {
          content = content.slice(last)
        }
        const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g)
        // Remove multiple line returns
        content = content
          .split(lineReturn)
          .filter(l => l)
          .join('\n')
        const last = content.lastIndexOf('\n', pos - from)
        if (last > 0) {
          content = content.slice(last)
        }
      }
      content = escapeHTML(content)