Modify search to work with article contents
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
level=logging.INFO)
|
||||
|
||||
import database
|
||||
from sqlalchemy import select
|
||||
import search
|
||||
import sys
|
||||
|
||||
import time
|
||||
import json
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
database.init()
|
||||
search.init()
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
def put_stories(stories):
|
||||
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
|
||||
|
||||
def get_update(update_id):
|
||||
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
|
||||
|
||||
if __name__ == '__main__':
|
||||
num_stories = database.count_stories()
|
||||
|
||||
print('Reindex {} stories?'.format(num_stories))
|
||||
print('Press ENTER to continue, ctrl-c to cancel')
|
||||
input()
|
||||
|
||||
story_list = database.get_story_list()
|
||||
|
||||
count = 1
|
||||
while len(story_list):
|
||||
stories = []
|
||||
|
||||
for _ in range(BATCH_SIZE):
|
||||
try:
|
||||
sid = story_list.pop()
|
||||
except IndexError:
|
||||
break
|
||||
|
||||
story = database.get_story(sid)
|
||||
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
||||
story_obj = json.loads(story.full_json)
|
||||
story_obj.pop('comments', False)
|
||||
if 'text' in story_obj and story_obj['text']:
|
||||
soup = BeautifulSoup(story_obj['text'], 'html.parser')
|
||||
story_obj['text'] = soup.get_text()
|
||||
stories.append(story_obj)
|
||||
count += 1
|
||||
|
||||
res = put_stories(stories)
|
||||
update_id = res['taskUid']
|
||||
|
||||
print('Waiting for processing', end='')
|
||||
while get_update(update_id)['status'] != 'succeeded':
|
||||
time.sleep(0.5)
|
||||
print('.', end='', flush=True)
|
||||
|
||||
print()
|
||||
|
||||
print('Done.')
|
||||
|
||||
Reference in New Issue
Block a user