Compare commits

..

15 Commits

Author SHA1 Message Date
tanner ba13d435df New release 2026-02-13 16:52:34 -07:00
tanner 7c35571b69 Update README for match offset fixes 2026-02-13 16:52:05 -07:00
tanner bb58c12306 fix: Correct match offset when ignoring diacritics
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-13 16:46:25 -07:00
tanner b6f98f5d04 Modify BM25 search params and fuzziness
Decreasing Length normalization impact factor. I don't want notes to be
ranked as differently if they are long vs. short.
2026-02-13 12:18:45 -07:00
tanner 7a1414d397 Add in default BM25 search params
https://lucaong.github.io/minisearch/types/MiniSearch.BM25Params.html
2026-02-13 12:04:52 -07:00
tanner 0c5b956e53 fix: Normalize and deduplicate tokens for improved search relevance
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-13 11:46:25 -07:00
tanner dc2267030a Update README with excerpt changes 2026-02-13 11:38:39 -07:00
tanner 292fb765de fix: Ensure correct excerpt slicing before removing blank lines
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-13 11:30:07 -07:00
tanner ec2a720649 fix: Ensure highlighting matches HTML-escaped text in excerpts
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-13 10:56:07 -07:00
tanner a8c18f5dca fix: Improve search term highlighting for words with punctuation
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-13 10:48:20 -07:00
tanner 5e5708de4e Add installation instructions to README 2026-02-06 09:43:57 -07:00
tanner 70deab0b77 Add main.js to repo for easy distribution 2026-02-06 09:28:19 -07:00
tanner 269a1e6ea4 Organize README, add aka info 2026-02-06 09:26:39 -07:00
tanner 9e68a725d0 Make aka header matching stricter 2026-02-06 09:12:28 -07:00
tanner 44da87a29d feat: Extract 'aka' lines from first paragraph for H1 indexing
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-06 09:06:34 -07:00
6 changed files with 286 additions and 29 deletions
+63 -11
View File
@@ -1,17 +1,23 @@
# Tannersearch for Obsidian # Tannersearch for Obsidian
This is a fork of [obsidian-omnisearch](https://github.com/scambier/obsidian-omnisearch) by [@scambier](https://github.com/scambier) with the following changes: This is a fork of [obsidian-omnisearch](https://github.com/scambier/obsidian-omnisearch) (by [@scambier](https://github.com/scambier)) with the following changes:
- when opening a result, the cursor placement offset prioritizes note titles, headings, then content When opening a result, the cursor placement prioritizes note titles over headings over content
- this means if the note's name matches your search, it opens at the top - this means if the note's name matches your search, it opens at the top instead of on a random match in the middle of the note
- search terms aren't split on apostrophes
- searching for "Sun's BBQ" searches for ["Sun's", "BBQ"] instead of ["Sun", "s", "BBQ"] Search terms aren't split on apostrophes
- searches aren't HTML escaped - searching for "Sun's BBQ" searches for ["Sun's", "BBQ"] instead of ["Sun", "s", "BBQ"]
- before searching for "Sun's BBQ" would actually search for `Sun&#039;s BBQ`
- search terms less than 3 characters long or common words are ignored Search terms less than 3 characters long or common words are ignored
- ignored words: "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this" - ignored words: "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"
- the first line of a paragraph is ranked like Heading 3 if it ends in a colon
- for example, Excerpt highlighting fixes
- words with apostrophes weren't being highlighted
- excerpt with blank lines were sometimes shown incorrectly
- fixed match index calculations when diacritics were removed
The first line of a paragraph is ranked like Heading 3 if it ends in a colon
- for example,
``` ```
Japan trip: Japan trip:
@@ -22,6 +28,52 @@ Japan trip:
... "Japan trip:" is indexed and ranked the same as "### Japan trip" ... "Japan trip:" is indexed and ranked the same as "### Japan trip"
If the first paragraph of a note contains a line like "aka other name", then "other name" is ranked like H1
- for example,
```
see also: [[Travel General]]
Aka: packing list
content
```
... "packing list" is indexed and ranked the same as "# packing list". Note that "Aka:" isn't case or colon sensitive.
### Fork Installation
Ensure the original Omnisearch plugin is installed, see instructions below.
Download main.js into your `.obsidian/plugins/omnisearch` directory, example:
```
$ cd ~/notes/.obsidian/plugins/omnisearch
$ mv main.js main.js.bak
$ wget https://raw.githubusercontent.com/tannercollin/obsidian-tannersearch/refs/heads/master/dist/main.js
```
In Obsidian, open Settings > Community Plugins. Disable and enable Omnisearch.
Open Settings > Omnisearch. Scroll to bottom. Click "Clear cache" data.
Restart Obsidian.
Note: on mobile you'll have to use some sort of sync or downloader and move the main.js over to your vault.
### Building the Fork
If you'd rather build the fork yourself:
```
$ git clone https://github.com/tannercollin/obsidian-tannersearch.git
$ cd obsidian-tannersearch/
$ npm install --legacy-peer-deps
$ npm run build
```
Then copy `dist/main.js` as above.
# Original README # Original README
+187
View File
File diff suppressed because one or more lines are too long
+7 -1
View File
@@ -233,6 +233,12 @@ export class DocumentsRepository {
const headings2 = metadata ? extractHeadingsFromCache(metadata, 2) : [] const headings2 = metadata ? extractHeadingsFromCache(metadata, 2) : []
const headings3 = metadata ? extractHeadingsFromCache(metadata, 3) : [] const headings3 = metadata ? extractHeadingsFromCache(metadata, 3) : []
const akaHeadings: string[] = content
.split(/\n\s*\n/)[0]
.split('\n')
.map(line => line.match(/^aka:?\s*(.+)$/i)?.[1]?.trim())
.filter((heading): heading is string => !!heading)
const lines = content.split('\n') const lines = content.split('\n')
const colonHeadings: string[] = [] const colonHeadings: string[] = []
for (let i = 0; i < lines.length; i++) { for (let i = 0; i < lines.length; i++) {
@@ -263,7 +269,7 @@ export class DocumentsRepository {
tags: tags, tags: tags,
unmarkedTags: tags.map(t => t.replace('#', '')), unmarkedTags: tags.map(t => t.replace('#', '')),
aliases: getAliasesFromMetadata(metadata).join(''), aliases: getAliasesFromMetadata(metadata).join(''),
headings1: headings1.join(' '), headings1: [...headings1, ...akaHeadings].join(' '),
headings2: headings2.join(' '), headings2: headings2.join(' '),
headings3: [...headings3, ...colonHeadings].join(' '), headings3: [...headings3, ...colonHeadings].join(' '),
} }
+5 -4
View File
@@ -166,11 +166,12 @@ export class SearchEngine {
logVerbose(JSON.stringify(searchTokens, null, 1)) logVerbose(JSON.stringify(searchTokens, null, 1))
let results = this.minisearch.search(searchTokens, { let results = this.minisearch.search(searchTokens, {
prefix: term => term.length >= options.prefixLength, prefix: term => term.length >= options.prefixLength,
// length <= 3: no fuzziness bm25: {b: 0.2, d: 0.5, k: 1.2},
// length <= 5: fuzziness of 10% // length <= 4: no fuzziness
// length > 5: fuzziness of 20% // length <= 5: 1/2 fuzziness
// length > 5: full fuzziness
fuzzy: term => fuzzy: term =>
term.length <= 3 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness, term.length <= 4 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness,
boost: { boost: {
basename: settings.weightBasename, basename: settings.weightBasename,
aliases: settings.weightBasename, aliases: settings.weightBasename,
+4 -2
View File
@@ -41,7 +41,7 @@ export class Tokenizer {
} }
// Remove duplicates // Remove duplicates
// tokens = [...new Set(tokens)] tokens = [...new Set(tokens)]
// Remove empty tokens // Remove empty tokens
tokens = tokens.filter(Boolean) tokens = tokens.filter(Boolean)
@@ -107,7 +107,9 @@ export class Tokenizer {
} }
private tokenizeWords(text: string, { skipChs = false } = {}): string[] { private tokenizeWords(text: string, { skipChs = false } = {}): string[] {
const tokens = text.split(BRACKETS_AND_SPACE) const tokens = text
.split(BRACKETS_AND_SPACE)
.map(t => t.replace(/[.,:;!?]+$/, ''))
if (skipChs) return tokens if (skipChs) return tokens
return this.tokenizeChsWord(tokens) return this.tokenizeChsWord(tokens)
} }
+20 -11
View File
@@ -25,7 +25,9 @@ export class TextProcessor {
try { try {
return text.replace( return text.replace(
new RegExp( new RegExp(
`(${matches.map(item => escapeRegExp(item.match)).join('|')})`, `(${matches
.map(item => escapeRegExp(escapeHTML(item.match)))
.join('|')})`,
'giu' 'giu'
), ),
`<span class="${highlightClass}">$1</span>` `<span class="${highlightClass}">$1</span>`
@@ -47,7 +49,7 @@ export class TextProcessor {
strings.sort((a, b) => b.length - a.length) strings.sort((a, b) => b.length - a.length)
const joined = `(${strings const joined = `(${strings
.map(s => `\\b${escapeRegExp(s)}\\b|${escapeRegExp(s)}`) .map(s => `(?<!\\w)${escapeRegExp(s)}(?!\\w)`)
.join('|')})` .join('|')})`
return new RegExp(`${joined}`, 'gui') return new RegExp(`${joined}`, 'gui')
@@ -82,9 +84,16 @@ export class TextProcessor {
} }
const matchStartIndex = match.index const matchStartIndex = match.index
const matchEndIndex = matchStartIndex + match[0].length const matchEndIndex = matchStartIndex + match[0].length
const originalMatch = originalText
.substring(matchStartIndex, matchEndIndex) // If `ignoreDiacritics` is on, `text` may have a different length than `originalText`,
.trim() // making `match.index` unreliable for `originalText`.
// We use `match[0]`, which is the matched term (but without diacritics).
const originalMatchBeforeTrim = this.plugin.settings.ignoreDiacritics
? match[0]
: originalText.substring(matchStartIndex, matchEndIndex)
const originalMatch = originalMatchBeforeTrim.trim()
if (originalMatch && match.index >= 0) { if (originalMatch && match.index >= 0) {
matches.push({ match: originalMatch, offset: match.index }) matches.push({ match: originalMatch, offset: match.index })
} }
@@ -126,18 +135,18 @@ export class TextProcessor {
content = content.slice(0, excerptAfter) content = content.slice(0, excerptAfter)
} }
if (settings.renderLineReturnInExcerpts) { if (settings.renderLineReturnInExcerpts) {
const last = content.lastIndexOf('\n', pos - from)
if (last > 0) {
content = content.slice(last)
}
const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g) const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g)
// Remove multiple line returns // Remove multiple line returns
content = content content = content
.split(lineReturn) .split(lineReturn)
.filter(l => l) .filter(l => l)
.join('\n') .join('\n')
const last = content.lastIndexOf('\n', pos - from)
if (last > 0) {
content = content.slice(last)
}
} }
content = escapeHTML(content) content = escapeHTML(content)