Compare commits

6 Commits

Author SHA1 Message Date
Tanner 92dc2bf920 Modify search to work with article contents 2026-06-13 11:54:52 -06:00
Tanner 3467f46323 Ignore data.ms.old/ 2026-06-13 11:54:35 -06:00
tanner 3f97048599 feat: Add "Search in article" filter checkbox to results page
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-16 17:06:53 -07:00
tanner a10f120fed fix: Extract prose from HTML text field for indexing
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-12 16:14:36 -07:00
tanner 5bcca8e201 refactor: Adapt Meilisearch integration to v1.29.0 API
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-12 13:34:48 -07:00
tanner c6f2880c25 feat: Add MeiliSearch API key authentication 2025-12-12 13:34:43 -07:00
7 changed files with 130 additions and 28 deletions
+67
View File
@@ -0,0 +1,67 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
from bs4 import BeautifulSoup
database.init()
search.init()
BATCH_SIZE = 1000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.full_json)
story_obj.pop('comments', False)
if 'text' in story_obj and story_obj['text']:
soup = BeautifulSoup(story_obj['text'], 'html.parser')
story_obj['text'] = soup.get_text()
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['taskUid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')
+4
View File
@@ -11,6 +11,7 @@ import sys
import time import time
import json import json
import requests import requests
from bs4 import BeautifulSoup
database.init() database.init()
search.init() search.init()
@@ -45,6 +46,9 @@ if __name__ == '__main__':
story = database.get_story(sid) story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title)) print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json) story_obj = json.loads(story.meta_json)
if 'text' in story_obj and story_obj['text']:
soup = BeautifulSoup(story_obj['text'], 'html.parser')
story_obj['text'] = soup.get_text()
stories.append(story_obj) stories.append(story_obj)
count += 1 count += 1
+22 -23
View File
@@ -10,7 +10,8 @@ SEARCH_ENABLED = bool(settings.MEILI_URL)
def meili_api(method, route, json=None, params=None, parse_json=True): def meili_api(method, route, json=None, params=None, parse_json=True):
try: try:
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4) headers = {'Authorization': 'Bearer ' + settings.MEILI_API_KEY}
r = method(settings.MEILI_URL + route, json=json, params=params, headers=headers, timeout=4)
if r.status_code > 299: if r.status_code > 299:
raise Exception('Bad response code ' + str(r.status_code)) raise Exception('Bad response code ' + str(r.status_code))
if parse_json: if parse_json:
@@ -24,42 +25,40 @@ def meili_api(method, route, json=None, params=None, parse_json=True):
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e)) logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
return False return False
def create_index(): def update_settings():
json = dict(uid='qotnews', primaryKey='id') json = {
return meili_api(requests.post, 'indexes', json=json) 'rankingRules': ['words', 'typo', 'proximity', 'attribute', 'date:desc', 'exactness'],
'searchableAttributes': ['title', 'url', 'author', 'text'],
def update_rankings(): 'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments', 'text'],
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness'] 'stopWords': ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'in', 'on', 'at', 'by', 'for', 'with', 'to', 'from', 'of', 'is', 'it', 'that', 'this'],
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json) }
return meili_api(requests.patch, 'indexes/qotnews/settings', json=json)
def update_attributes():
json = ['title', 'url', 'author']
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
return r
def init(): def init():
if not SEARCH_ENABLED: if not SEARCH_ENABLED:
logging.info('Search is not enabled, skipping init.') logging.info('Search is not enabled, skipping init.')
return return
print(create_index()) update_settings()
update_rankings()
update_attributes()
def put_story(story): def put_story(story):
if not SEARCH_ENABLED: return if not SEARCH_ENABLED: return
return meili_api(requests.post, 'indexes/qotnews/documents', [story]) return meili_api(requests.post, 'indexes/qotnews/documents', [story])
def search(q): def search(q, in_article=False):
if not SEARCH_ENABLED: return [] if not SEARCH_ENABLED: return []
params = dict(q=q, limit=settings.FEED_LENGTH)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False) json = dict(q=q, limit=settings.FEED_LENGTH)
if True:
json['attributesToSearchOn'] = ['text']
json['attributesToCrop'] = ['text']
json['attributesToRetrieve'] = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
json['cropLength'] = 80
r = meili_api(requests.post, 'indexes/qotnews/search', json=json, parse_json=False)
return r return r
if __name__ == '__main__': if __name__ == '__main__':
init() init()
print(update_rankings())
print(search('facebook')) print(search('facebook'))
+3 -2
View File
@@ -131,8 +131,9 @@ def apistats():
@flask_app.route('/api/search', strict_slashes=False) @flask_app.route('/api/search', strict_slashes=False)
def apisearch(): def apisearch():
q = request.args.get('q', '') q = request.args.get('q', '')
in_article = request.args.get('article', False)
if len(q) >= 3: if len(q) >= 3:
results = search.search(q) results = search.search(q, in_article)
else: else:
results = '[]' results = '[]'
res = Response(results) res = Response(results)
@@ -262,7 +263,7 @@ def static_story(sid):
show_comments=request.path.endswith('/c'), show_comments=request.path.endswith('/c'),
) )
http_server = WSGIServer(('', 33842), flask_app) http_server = WSGIServer(('0.0.0.0', 33842), flask_app)
def feed_thread(): def feed_thread():
global news_index, ref_list, current_item global news_index, ref_list, current_item
+1
View File
@@ -4,3 +4,4 @@
meilisearch-linux-amd64 meilisearch-linux-amd64
data.ms/ data.ms/
data.ms.old/
+27 -1
View File
@@ -1,6 +1,7 @@
import React, { useState, useEffect } from 'react'; import React, { useState, useEffect } from 'react';
import { Link, useLocation } from 'react-router-dom'; import { Link, useLocation, useHistory } from 'react-router-dom';
import { Helmet } from 'react-helmet'; import { Helmet } from 'react-helmet';
import queryString from 'query-string';
import { sourceLink, infoLine, logos } from './utils.js'; import { sourceLink, infoLine, logos } from './utils.js';
import AbortController from 'abort-controller'; import AbortController from 'abort-controller';
@@ -8,6 +9,19 @@ function Results() {
const [stories, setStories] = useState(false); const [stories, setStories] = useState(false);
const [error, setError] = useState(false); const [error, setError] = useState(false);
const location = useLocation(); const location = useLocation();
const history = useHistory();
const handleFilterChange = e => {
const isChecked = e.target.checked;
const currentQuery = queryString.parse(location.search);
if (isChecked) {
currentQuery.article = 'true';
} else {
delete currentQuery.article;
}
history.push('/search?' + queryString.stringify(currentQuery));
};
useEffect(() => { useEffect(() => {
const controller = new AbortController(); const controller = new AbortController();
@@ -32,11 +46,19 @@ function Results() {
}; };
}, [location.search]); }, [location.search]);
const searchInArticle = queryString.parse(location.search).article === 'true';
return ( return (
<div className='container'> <div className='container'>
<Helmet> <Helmet>
<title>Search Results | QotNews</title> <title>Search Results | QotNews</title>
</Helmet> </Helmet>
<div style={{marginBottom: '1rem'}}>
<input type="checkbox" id="search-in-article" className="checkbox" checked={searchInArticle} onChange={handleFilterChange} />
<label htmlFor="search-in-article">Search in article</label>
</div>
{error && <p>Connection error?</p>} {error && <p>Connection error?</p>}
{stories ? {stories ?
<> <>
@@ -56,6 +78,10 @@ function Results() {
</div> </div>
{infoLine(x)} {infoLine(x)}
{!!x?._formatted &&
<p>{x._formatted.text.replace(/\n/g, ' ')}</p>
}
</div> </div>
) )
: :
+6 -2
View File
@@ -15,7 +15,9 @@ function Search() {
const newSearch = event.target.value; const newSearch = event.target.value;
setSearch(newSearch); setSearch(newSearch);
if (newSearch.length >= 3) { if (newSearch.length >= 3) {
const searchQuery = queryString.stringify({ 'q': newSearch }); const currentQuery = queryString.parse(location.search);
currentQuery.q = newSearch;
const searchQuery = queryString.stringify(currentQuery);
history.replace('/search?' + searchQuery); history.replace('/search?' + searchQuery);
} else { } else {
history.replace('/'); history.replace('/');
@@ -24,7 +26,9 @@ function Search() {
const searchAgain = (event) => { const searchAgain = (event) => {
event.preventDefault(); event.preventDefault();
const searchString = queryString.stringify({ 'q': event.target[0].value }); const currentQuery = queryString.parse(location.search);
currentQuery.q = event.target[0].value;
const searchString = queryString.stringify(currentQuery);
history.push('/search?' + searchString); history.push('/search?' + searchString);
inputRef.current.blur(); inputRef.current.blur();
} }