import { excerptAfter, excerptBefore, type SearchMatch } from '../globals' import { removeDiacritics, warnVerbose } from './utils' import type { Query } from '../search/query' import { Notice } from 'obsidian' import { escapeRegExp } from 'lodash-es' import type OmnisearchPlugin from '../main' export class TextProcessor { constructor(private plugin: OmnisearchPlugin) {} /** * Wraps the matches in the text with a element and a highlight class * @param text * @param matches * @returns The html string with the matches highlighted */ public highlightText(text: string, matches: SearchMatch[]): string { const highlightClass = `suggestion-highlight omnisearch-highlight ${ this.plugin.settings.highlight ? 'omnisearch-default-highlight' : '' }` if (!matches.length) { return text } try { return text.replace( new RegExp( `(${matches .map(item => escapeRegExp(escapeHTML(item.match))) .join('|')})`, 'giu' ), `$1` ) } catch (e) { console.error('Omnisearch - Error in highlightText()', e) return text } } /** * Converts a list of strings to a list of words, using the \b word boundary. * Used to find excerpts in a note body, or select which words to highlight. */ public stringsToRegex(strings: string[]): RegExp { if (!strings.length) return /^$/g // sort strings by decreasing length, so that longer strings are matched first strings.sort((a, b) => b.length - a.length) const joined = `(${strings .map(s => `(?= 100 || new Date().getTime() - startTime > 50) { warnVerbose('Stopped getMatches at', count, 'results') break } const matchStartIndex = match.index const matchEndIndex = matchStartIndex + match[0].length // If `ignoreDiacritics` is on, `text` may have a different length than `originalText`, // making `match.index` unreliable for `originalText`. // We use `match[0]`, which is the matched term (but without diacritics). const originalMatchBeforeTrim = this.plugin.settings.ignoreDiacritics ? match[0] : originalText.substring(matchStartIndex, matchEndIndex) const originalMatch = originalMatchBeforeTrim.trim() if (originalMatch && match.index >= 0) { matches.push({ match: originalMatch, offset: match.index }) } } // If the query is more than 1 token and can be found "as is" in the text, put this match first if ( query && (query.query.text.length > 1 || query.getExactTerms().length > 0) ) { const bestMatchStr = query.getBestStringForExcerpt() const best = text.toLowerCase().indexOf(bestMatchStr) if (best > -1) { // We found the full query. We make it the first result, and remove any other match that it contains. matches = matches.filter( m => m.offset < best || m.offset >= best + bestMatchStr.length ) matches.unshift({ offset: best, match: originalText.substring(best, best + bestMatchStr.length), }) } } return matches } public makeExcerpt(content: string, offset: number): string { const settings = this.plugin.settings try { const pos = offset ?? -1 const from = Math.max(0, pos - excerptBefore) const to = Math.min(content.length, pos + excerptAfter) if (pos > -1) { content = (from > 0 ? '…' : '') + content.slice(from, to).trim() + (to < content.length - 1 ? '…' : '') } else { content = content.slice(0, excerptAfter) } if (settings.renderLineReturnInExcerpts) { const last = content.lastIndexOf('\n', pos - from) if (last > 0) { content = content.slice(last) } const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g) // Remove multiple line returns content = content .split(lineReturn) .filter(l => l) .join('\n') } content = escapeHTML(content) if (settings.renderLineReturnInExcerpts) { content = content.trim().replaceAll('\n', '
') } return content } catch (e) { new Notice( 'Omnisearch - Error while creating excerpt, see developer console' ) console.error(`Omnisearch - Error while creating excerpt`) console.error(e) return '' } } } export function escapeHTML(html: string): string { return html .replaceAll('&', '&') .replaceAll('<', '<') .replaceAll('>', '>') .replaceAll('"', '"') .replaceAll("'", ''') }