forked from tanner/qotnews
Modify search to work with article contents
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
level=logging.INFO)
|
||||
|
||||
import database
|
||||
from sqlalchemy import select
|
||||
import search
|
||||
import sys
|
||||
|
||||
import time
|
||||
import json
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
database.init()
|
||||
search.init()
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
def put_stories(stories):
|
||||
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
|
||||
|
||||
def get_update(update_id):
|
||||
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
|
||||
|
||||
if __name__ == '__main__':
|
||||
num_stories = database.count_stories()
|
||||
|
||||
print('Reindex {} stories?'.format(num_stories))
|
||||
print('Press ENTER to continue, ctrl-c to cancel')
|
||||
input()
|
||||
|
||||
story_list = database.get_story_list()
|
||||
|
||||
count = 1
|
||||
while len(story_list):
|
||||
stories = []
|
||||
|
||||
for _ in range(BATCH_SIZE):
|
||||
try:
|
||||
sid = story_list.pop()
|
||||
except IndexError:
|
||||
break
|
||||
|
||||
story = database.get_story(sid)
|
||||
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
||||
story_obj = json.loads(story.full_json)
|
||||
story_obj.pop('comments', False)
|
||||
if 'text' in story_obj and story_obj['text']:
|
||||
soup = BeautifulSoup(story_obj['text'], 'html.parser')
|
||||
story_obj['text'] = soup.get_text()
|
||||
stories.append(story_obj)
|
||||
count += 1
|
||||
|
||||
res = put_stories(stories)
|
||||
update_id = res['taskUid']
|
||||
|
||||
print('Waiting for processing', end='')
|
||||
while get_update(update_id)['status'] != 'succeeded':
|
||||
time.sleep(0.5)
|
||||
print('.', end='', flush=True)
|
||||
|
||||
print()
|
||||
|
||||
print('Done.')
|
||||
|
||||
+15
-11
@@ -11,7 +11,7 @@ SEARCH_ENABLED = bool(settings.MEILI_URL)
|
||||
def meili_api(method, route, json=None, params=None, parse_json=True):
|
||||
try:
|
||||
headers = {'Authorization': 'Bearer ' + settings.MEILI_API_KEY}
|
||||
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
|
||||
r = method(settings.MEILI_URL + route, json=json, params=params, headers=headers, timeout=4)
|
||||
if r.status_code > 299:
|
||||
raise Exception('Bad response code ' + str(r.status_code))
|
||||
if parse_json:
|
||||
@@ -25,32 +25,36 @@ def meili_api(method, route, json=None, params=None, parse_json=True):
|
||||
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
|
||||
return False
|
||||
|
||||
def create_index():
|
||||
json = dict(uid='qotnews', primaryKey='id')
|
||||
return meili_api(requests.post, 'indexes', json=json)
|
||||
|
||||
def update_settings():
|
||||
json = {
|
||||
'rankingRules': ['typo', 'words', 'proximity', 'date:desc', 'exactness'],
|
||||
'searchableAttributes': ['title', 'url', 'author'],
|
||||
'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments'],
|
||||
'rankingRules': ['words', 'typo', 'proximity', 'attribute', 'date:desc', 'exactness'],
|
||||
'searchableAttributes': ['title', 'url', 'author', 'text'],
|
||||
'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments', 'text'],
|
||||
'stopWords': ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'in', 'on', 'at', 'by', 'for', 'with', 'to', 'from', 'of', 'is', 'it', 'that', 'this'],
|
||||
}
|
||||
return meili_api(requests.post, 'indexes/qotnews/settings', json=json)
|
||||
return meili_api(requests.patch, 'indexes/qotnews/settings', json=json)
|
||||
|
||||
def init():
|
||||
if not SEARCH_ENABLED:
|
||||
logging.info('Search is not enabled, skipping init.')
|
||||
return
|
||||
print(create_index())
|
||||
update_settings()
|
||||
|
||||
def put_story(story):
|
||||
if not SEARCH_ENABLED: return
|
||||
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
|
||||
|
||||
def search(q):
|
||||
def search(q, in_article=False):
|
||||
if not SEARCH_ENABLED: return []
|
||||
|
||||
json = dict(q=q, limit=settings.FEED_LENGTH)
|
||||
|
||||
if True:
|
||||
json['attributesToSearchOn'] = ['text']
|
||||
json['attributesToCrop'] = ['text']
|
||||
json['attributesToRetrieve'] = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
|
||||
json['cropLength'] = 80
|
||||
|
||||
r = meili_api(requests.post, 'indexes/qotnews/search', json=json, parse_json=False)
|
||||
return r
|
||||
|
||||
|
||||
+3
-2
@@ -131,8 +131,9 @@ def apistats():
|
||||
@flask_app.route('/api/search', strict_slashes=False)
|
||||
def apisearch():
|
||||
q = request.args.get('q', '')
|
||||
in_article = request.args.get('article', False)
|
||||
if len(q) >= 3:
|
||||
results = search.search(q)
|
||||
results = search.search(q, in_article)
|
||||
else:
|
||||
results = '[]'
|
||||
res = Response(results)
|
||||
@@ -262,7 +263,7 @@ def static_story(sid):
|
||||
show_comments=request.path.endswith('/c'),
|
||||
)
|
||||
|
||||
http_server = WSGIServer(('', 33842), flask_app)
|
||||
http_server = WSGIServer(('0.0.0.0', 33842), flask_app)
|
||||
|
||||
def feed_thread():
|
||||
global news_index, ref_list, current_item
|
||||
|
||||
@@ -78,6 +78,10 @@ function Results() {
|
||||
</div>
|
||||
|
||||
{infoLine(x)}
|
||||
|
||||
{!!x?._formatted &&
|
||||
<p>{x._formatted.text.replace(/\n/g, ' ')}</p>
|
||||
}
|
||||
</div>
|
||||
)
|
||||
:
|
||||
|
||||
Reference in New Issue
Block a user