forked from tanner/qotnews
fix: Extract prose from HTML text field for indexing
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
This commit is contained in:
@@ -11,6 +11,7 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
database.init()
|
database.init()
|
||||||
search.init()
|
search.init()
|
||||||
@@ -45,6 +46,9 @@ if __name__ == '__main__':
|
|||||||
story = database.get_story(sid)
|
story = database.get_story(sid)
|
||||||
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
||||||
story_obj = json.loads(story.meta_json)
|
story_obj = json.loads(story.meta_json)
|
||||||
|
if 'text' in story_obj and story_obj['text']:
|
||||||
|
soup = BeautifulSoup(story_obj['text'], 'html.parser')
|
||||||
|
story_obj['text'] = soup.get_text()
|
||||||
stories.append(story_obj)
|
stories.append(story_obj)
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user