diff --git a/apiserver/scripts/reindex.py b/apiserver/scripts/reindex.py index ee1334c..2626a28 100644 --- a/apiserver/scripts/reindex.py +++ b/apiserver/scripts/reindex.py @@ -11,6 +11,7 @@ import sys import time import json import requests +from bs4 import BeautifulSoup database.init() search.init() @@ -45,6 +46,9 @@ if __name__ == '__main__': story = database.get_story(sid) print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title)) story_obj = json.loads(story.meta_json) + if 'text' in story_obj and story_obj['text']: + soup = BeautifulSoup(story_obj['text'], 'html.parser') + story_obj['text'] = soup.get_text() stories.append(story_obj) count += 1