Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 92dc2bf920 | |||
| 3467f46323 | |||
| 3f97048599 | |||
| a10f120fed | |||
| 5bcca8e201 | |||
| c6f2880c25 | |||
| 5d014f50df |
@@ -0,0 +1,67 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.INFO)
|
||||||
|
|
||||||
|
import database
|
||||||
|
from sqlalchemy import select
|
||||||
|
import search
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
database.init()
|
||||||
|
search.init()
|
||||||
|
|
||||||
|
BATCH_SIZE = 1000
|
||||||
|
|
||||||
|
def put_stories(stories):
|
||||||
|
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
|
||||||
|
|
||||||
|
def get_update(update_id):
|
||||||
|
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
num_stories = database.count_stories()
|
||||||
|
|
||||||
|
print('Reindex {} stories?'.format(num_stories))
|
||||||
|
print('Press ENTER to continue, ctrl-c to cancel')
|
||||||
|
input()
|
||||||
|
|
||||||
|
story_list = database.get_story_list()
|
||||||
|
|
||||||
|
count = 1
|
||||||
|
while len(story_list):
|
||||||
|
stories = []
|
||||||
|
|
||||||
|
for _ in range(BATCH_SIZE):
|
||||||
|
try:
|
||||||
|
sid = story_list.pop()
|
||||||
|
except IndexError:
|
||||||
|
break
|
||||||
|
|
||||||
|
story = database.get_story(sid)
|
||||||
|
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
||||||
|
story_obj = json.loads(story.full_json)
|
||||||
|
story_obj.pop('comments', False)
|
||||||
|
if 'text' in story_obj and story_obj['text']:
|
||||||
|
soup = BeautifulSoup(story_obj['text'], 'html.parser')
|
||||||
|
story_obj['text'] = soup.get_text()
|
||||||
|
stories.append(story_obj)
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
res = put_stories(stories)
|
||||||
|
update_id = res['taskUid']
|
||||||
|
|
||||||
|
print('Waiting for processing', end='')
|
||||||
|
while get_update(update_id)['status'] != 'succeeded':
|
||||||
|
time.sleep(0.5)
|
||||||
|
print('.', end='', flush=True)
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
print('Done.')
|
||||||
|
|
||||||
@@ -11,6 +11,7 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
database.init()
|
database.init()
|
||||||
search.init()
|
search.init()
|
||||||
@@ -45,6 +46,9 @@ if __name__ == '__main__':
|
|||||||
story = database.get_story(sid)
|
story = database.get_story(sid)
|
||||||
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
||||||
story_obj = json.loads(story.meta_json)
|
story_obj = json.loads(story.meta_json)
|
||||||
|
if 'text' in story_obj and story_obj['text']:
|
||||||
|
soup = BeautifulSoup(story_obj['text'], 'html.parser')
|
||||||
|
story_obj['text'] = soup.get_text()
|
||||||
stories.append(story_obj)
|
stories.append(story_obj)
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
|
|||||||
+22
-23
@@ -10,7 +10,8 @@ SEARCH_ENABLED = bool(settings.MEILI_URL)
|
|||||||
|
|
||||||
def meili_api(method, route, json=None, params=None, parse_json=True):
|
def meili_api(method, route, json=None, params=None, parse_json=True):
|
||||||
try:
|
try:
|
||||||
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
|
headers = {'Authorization': 'Bearer ' + settings.MEILI_API_KEY}
|
||||||
|
r = method(settings.MEILI_URL + route, json=json, params=params, headers=headers, timeout=4)
|
||||||
if r.status_code > 299:
|
if r.status_code > 299:
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
if parse_json:
|
if parse_json:
|
||||||
@@ -24,42 +25,40 @@ def meili_api(method, route, json=None, params=None, parse_json=True):
|
|||||||
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
|
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def create_index():
|
def update_settings():
|
||||||
json = dict(uid='qotnews', primaryKey='id')
|
json = {
|
||||||
return meili_api(requests.post, 'indexes', json=json)
|
'rankingRules': ['words', 'typo', 'proximity', 'attribute', 'date:desc', 'exactness'],
|
||||||
|
'searchableAttributes': ['title', 'url', 'author', 'text'],
|
||||||
def update_rankings():
|
'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments', 'text'],
|
||||||
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
|
'stopWords': ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'in', 'on', 'at', 'by', 'for', 'with', 'to', 'from', 'of', 'is', 'it', 'that', 'this'],
|
||||||
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
|
}
|
||||||
|
return meili_api(requests.patch, 'indexes/qotnews/settings', json=json)
|
||||||
def update_attributes():
|
|
||||||
json = ['title', 'url', 'author']
|
|
||||||
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
|
|
||||||
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
|
|
||||||
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
|
|
||||||
return r
|
|
||||||
|
|
||||||
def init():
|
def init():
|
||||||
if not SEARCH_ENABLED:
|
if not SEARCH_ENABLED:
|
||||||
logging.info('Search is not enabled, skipping init.')
|
logging.info('Search is not enabled, skipping init.')
|
||||||
return
|
return
|
||||||
print(create_index())
|
update_settings()
|
||||||
update_rankings()
|
|
||||||
update_attributes()
|
|
||||||
|
|
||||||
def put_story(story):
|
def put_story(story):
|
||||||
if not SEARCH_ENABLED: return
|
if not SEARCH_ENABLED: return
|
||||||
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
|
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
|
||||||
|
|
||||||
def search(q):
|
def search(q, in_article=False):
|
||||||
if not SEARCH_ENABLED: return []
|
if not SEARCH_ENABLED: return []
|
||||||
params = dict(q=q, limit=settings.FEED_LENGTH)
|
|
||||||
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
|
json = dict(q=q, limit=settings.FEED_LENGTH)
|
||||||
|
|
||||||
|
if True:
|
||||||
|
json['attributesToSearchOn'] = ['text']
|
||||||
|
json['attributesToCrop'] = ['text']
|
||||||
|
json['attributesToRetrieve'] = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
|
||||||
|
json['cropLength'] = 80
|
||||||
|
|
||||||
|
r = meili_api(requests.post, 'indexes/qotnews/search', json=json, parse_json=False)
|
||||||
return r
|
return r
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
init()
|
init()
|
||||||
|
|
||||||
print(update_rankings())
|
|
||||||
|
|
||||||
print(search('facebook'))
|
print(search('facebook'))
|
||||||
|
|||||||
+3
-2
@@ -131,8 +131,9 @@ def apistats():
|
|||||||
@flask_app.route('/api/search', strict_slashes=False)
|
@flask_app.route('/api/search', strict_slashes=False)
|
||||||
def apisearch():
|
def apisearch():
|
||||||
q = request.args.get('q', '')
|
q = request.args.get('q', '')
|
||||||
|
in_article = request.args.get('article', False)
|
||||||
if len(q) >= 3:
|
if len(q) >= 3:
|
||||||
results = search.search(q)
|
results = search.search(q, in_article)
|
||||||
else:
|
else:
|
||||||
results = '[]'
|
results = '[]'
|
||||||
res = Response(results)
|
res = Response(results)
|
||||||
@@ -262,7 +263,7 @@ def static_story(sid):
|
|||||||
show_comments=request.path.endswith('/c'),
|
show_comments=request.path.endswith('/c'),
|
||||||
)
|
)
|
||||||
|
|
||||||
http_server = WSGIServer(('', 33842), flask_app)
|
http_server = WSGIServer(('0.0.0.0', 33842), flask_app)
|
||||||
|
|
||||||
def feed_thread():
|
def feed_thread():
|
||||||
global news_index, ref_list, current_item
|
global news_index, ref_list, current_item
|
||||||
|
|||||||
@@ -4,3 +4,4 @@
|
|||||||
|
|
||||||
meilisearch-linux-amd64
|
meilisearch-linux-amd64
|
||||||
data.ms/
|
data.ms/
|
||||||
|
data.ms.old/
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ const DANGEROUS_TAGS = ['svg', 'math'];
|
|||||||
const latexDelimiters = [
|
const latexDelimiters = [
|
||||||
{ left: '$$', right: '$$', display: true },
|
{ left: '$$', right: '$$', display: true },
|
||||||
{ left: '\\[', right: '\\]', display: true },
|
{ left: '\\[', right: '\\]', display: true },
|
||||||
{ left: '$', right: '$', display: false },
|
|
||||||
{ left: '\\(', right: '\\)', display: false }
|
{ left: '\\(', right: '\\)', display: false }
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -106,7 +105,7 @@ function Article({ cache }) {
|
|||||||
|
|
||||||
if (v.nodeName === '#text') {
|
if (v.nodeName === '#text') {
|
||||||
const text = v.data;
|
const text = v.data;
|
||||||
if (text.includes('\\[') || text.includes('\\(') || text.includes('$$') || text.includes('$')) {
|
if (text.includes('\\[') || text.includes('\\(') || text.includes('$$')) {
|
||||||
return <Latex key={key} delimiters={latexDelimiters}>{text}</Latex>;
|
return <Latex key={key} delimiters={latexDelimiters}>{text}</Latex>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,8 +137,7 @@ function Article({ cache }) {
|
|||||||
const textContent = v.textContent.trim();
|
const textContent = v.textContent.trim();
|
||||||
const isMath = (textContent.startsWith('\\(') && textContent.endsWith('\\)')) ||
|
const isMath = (textContent.startsWith('\\(') && textContent.endsWith('\\)')) ||
|
||||||
(textContent.startsWith('\\[') && textContent.endsWith('\\]')) ||
|
(textContent.startsWith('\\[') && textContent.endsWith('\\]')) ||
|
||||||
(textContent.startsWith('$$') && textContent.endsWith('$$')) ||
|
(textContent.startsWith('$$') && textContent.endsWith('$$'));
|
||||||
(textContent.startsWith('$') && textContent.endsWith('$') && textContent.indexOf('$') !== textContent.lastIndexOf('$'));
|
|
||||||
|
|
||||||
const props = { key: key };
|
const props = { key: key };
|
||||||
if (v.hasAttributes()) {
|
if (v.hasAttributes()) {
|
||||||
@@ -159,13 +157,6 @@ function Article({ cache }) {
|
|||||||
const firstParen = mathContent.indexOf('\\(');
|
const firstParen = mathContent.indexOf('\\(');
|
||||||
const lastParen = mathContent.lastIndexOf('\\)');
|
const lastParen = mathContent.lastIndexOf('\\)');
|
||||||
mathContent = mathContent.substring(0, firstParen) + '\\[' + mathContent.substring(firstParen + 2, lastParen) + '\\]' + mathContent.substring(lastParen + 2);
|
mathContent = mathContent.substring(0, firstParen) + '\\[' + mathContent.substring(firstParen + 2, lastParen) + '\\]' + mathContent.substring(lastParen + 2);
|
||||||
} else if (trimmed.startsWith('$') && !trimmed.startsWith('$$')) {
|
|
||||||
// Replace $ with $$
|
|
||||||
const firstDollar = mathContent.indexOf('$');
|
|
||||||
const lastDollar = mathContent.lastIndexOf('$');
|
|
||||||
if (firstDollar !== lastDollar) {
|
|
||||||
mathContent = mathContent.substring(0, firstDollar) + '$$' + mathContent.substring(firstDollar + 1, lastDollar) + '$$' + mathContent.substring(lastDollar + 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return <Tag {...props}><Latex delimiters={latexDelimiters}>{mathContent}</Latex></Tag>;
|
return <Tag {...props}><Latex delimiters={latexDelimiters}>{mathContent}</Latex></Tag>;
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import React, { useState, useEffect } from 'react';
|
import React, { useState, useEffect } from 'react';
|
||||||
import { Link, useLocation } from 'react-router-dom';
|
import { Link, useLocation, useHistory } from 'react-router-dom';
|
||||||
import { Helmet } from 'react-helmet';
|
import { Helmet } from 'react-helmet';
|
||||||
|
import queryString from 'query-string';
|
||||||
import { sourceLink, infoLine, logos } from './utils.js';
|
import { sourceLink, infoLine, logos } from './utils.js';
|
||||||
import AbortController from 'abort-controller';
|
import AbortController from 'abort-controller';
|
||||||
|
|
||||||
@@ -8,6 +9,19 @@ function Results() {
|
|||||||
const [stories, setStories] = useState(false);
|
const [stories, setStories] = useState(false);
|
||||||
const [error, setError] = useState(false);
|
const [error, setError] = useState(false);
|
||||||
const location = useLocation();
|
const location = useLocation();
|
||||||
|
const history = useHistory();
|
||||||
|
|
||||||
|
const handleFilterChange = e => {
|
||||||
|
const isChecked = e.target.checked;
|
||||||
|
|
||||||
|
const currentQuery = queryString.parse(location.search);
|
||||||
|
if (isChecked) {
|
||||||
|
currentQuery.article = 'true';
|
||||||
|
} else {
|
||||||
|
delete currentQuery.article;
|
||||||
|
}
|
||||||
|
history.push('/search?' + queryString.stringify(currentQuery));
|
||||||
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
@@ -32,11 +46,19 @@ function Results() {
|
|||||||
};
|
};
|
||||||
}, [location.search]);
|
}, [location.search]);
|
||||||
|
|
||||||
|
const searchInArticle = queryString.parse(location.search).article === 'true';
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className='container'>
|
<div className='container'>
|
||||||
<Helmet>
|
<Helmet>
|
||||||
<title>Search Results | QotNews</title>
|
<title>Search Results | QotNews</title>
|
||||||
</Helmet>
|
</Helmet>
|
||||||
|
|
||||||
|
<div style={{marginBottom: '1rem'}}>
|
||||||
|
<input type="checkbox" id="search-in-article" className="checkbox" checked={searchInArticle} onChange={handleFilterChange} />
|
||||||
|
<label htmlFor="search-in-article">Search in article</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
{error && <p>Connection error?</p>}
|
{error && <p>Connection error?</p>}
|
||||||
{stories ?
|
{stories ?
|
||||||
<>
|
<>
|
||||||
@@ -56,6 +78,10 @@ function Results() {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
{infoLine(x)}
|
{infoLine(x)}
|
||||||
|
|
||||||
|
{!!x?._formatted &&
|
||||||
|
<p>{x._formatted.text.replace(/\n/g, ' ')}</p>
|
||||||
|
}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
:
|
:
|
||||||
|
|||||||
@@ -15,7 +15,9 @@ function Search() {
|
|||||||
const newSearch = event.target.value;
|
const newSearch = event.target.value;
|
||||||
setSearch(newSearch);
|
setSearch(newSearch);
|
||||||
if (newSearch.length >= 3) {
|
if (newSearch.length >= 3) {
|
||||||
const searchQuery = queryString.stringify({ 'q': newSearch });
|
const currentQuery = queryString.parse(location.search);
|
||||||
|
currentQuery.q = newSearch;
|
||||||
|
const searchQuery = queryString.stringify(currentQuery);
|
||||||
history.replace('/search?' + searchQuery);
|
history.replace('/search?' + searchQuery);
|
||||||
} else {
|
} else {
|
||||||
history.replace('/');
|
history.replace('/');
|
||||||
@@ -24,7 +26,9 @@ function Search() {
|
|||||||
|
|
||||||
const searchAgain = (event) => {
|
const searchAgain = (event) => {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
const searchString = queryString.stringify({ 'q': event.target[0].value });
|
const currentQuery = queryString.parse(location.search);
|
||||||
|
currentQuery.q = event.target[0].value;
|
||||||
|
const searchString = queryString.stringify(currentQuery);
|
||||||
history.push('/search?' + searchString);
|
history.push('/search?' + searchString);
|
||||||
inputRef.current.blur();
|
inputRef.current.blur();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user