Files
qotnews/apiserver/reindex.py
T
2026-06-13 11:54:52 -06:00

68 lines
1.7 KiB
Python

import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
from bs4 import BeautifulSoup
database.init()
search.init()
BATCH_SIZE = 1000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.full_json)
story_obj.pop('comments', False)
if 'text' in story_obj and story_obj['text']:
soup = BeautifulSoup(story_obj['text'], 'html.parser')
story_obj['text'] = soup.get_text()
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['taskUid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')