Compare commits

...

21 Commits

Author SHA1 Message Date
Tanner 92dc2bf920 Modify search to work with article contents 2026-06-13 11:54:52 -06:00
Tanner 3467f46323 Ignore data.ms.old/ 2026-06-13 11:54:35 -06:00
tanner 3f97048599 feat: Add "Search in article" filter checkbox to results page
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-16 17:06:53 -07:00
tanner a10f120fed fix: Extract prose from HTML text field for indexing
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-12 16:14:36 -07:00
tanner 5bcca8e201 refactor: Adapt Meilisearch integration to v1.29.0 API
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-12 13:34:48 -07:00
tanner c6f2880c25 feat: Add MeiliSearch API key authentication 2025-12-12 13:34:43 -07:00
tanner 5d014f50df fix: Remove single dollar sign math rendering due to false positives
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-05 17:24:40 +00:00
tanner bcfdff1067 Fix dt dd tags margin 2025-12-05 00:59:02 +00:00
tanner a888e38ae8 fix: Adjust comment metadata indentation in comments
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-05 00:49:13 +00:00
tanner 2bd51bb1cb fix: Refactor comments with DL/DD for text browser compatibility
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-05 00:45:00 +00:00
tanner 655346a7eb chore: Remove unused nojs div 2025-12-05 00:44:58 +00:00
tanner 125c1c5225 Fix buttons in color themes 2025-12-05 00:35:06 +00:00
tanner 5dd2069af5 Clear stories first on checkbox change 2025-12-04 23:12:30 +00:00
tanner d68fc73af5 Don't setStories when existing list is empty 2025-12-04 22:57:26 +00:00
tanner ff1297e507 Style checkbox 2025-12-04 22:55:23 +00:00
tanner 1d019f880b fix: Implement custom transparent checkbox for dark mode visibility
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-04 22:31:11 +00:00
tanner 23b56b26b1 style: Apply transparent background to checkboxes 2025-12-04 22:31:07 +00:00
tanner b439199836 fix: Cancel pending story fetches on filter change to prevent UI jumps
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-04 22:24:28 +00:00
tanner 5736cde21a feat: Fetch smallweb stories iteratively until limit met
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-04 22:18:22 +00:00
tanner ed8ad1b6f6 feat: Add domain exclusion to smallweb list loading 2025-12-04 22:18:19 +00:00
tanner 75779722c1 feat: Add smallweb filter checkbox and server-side filtering
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2025-12-04 22:09:11 +00:00
14 changed files with 309 additions and 63 deletions
+67
View File
@@ -0,0 +1,67 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
from bs4 import BeautifulSoup
database.init()
search.init()
BATCH_SIZE = 1000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.full_json)
story_obj.pop('comments', False)
if 'text' in story_obj and story_obj['text']:
soup = BeautifulSoup(story_obj['text'], 'html.parser')
story_obj['text'] = soup.get_text()
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['taskUid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')
+4
View File
@@ -11,6 +11,7 @@ import sys
import time
import json
import requests
from bs4 import BeautifulSoup
database.init()
search.init()
@@ -45,6 +46,9 @@ if __name__ == '__main__':
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
if 'text' in story_obj and story_obj['text']:
soup = BeautifulSoup(story_obj['text'], 'html.parser')
story_obj['text'] = soup.get_text()
stories.append(story_obj)
count += 1
+22 -23
View File
@@ -10,7 +10,8 @@ SEARCH_ENABLED = bool(settings.MEILI_URL)
def meili_api(method, route, json=None, params=None, parse_json=True):
try:
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
headers = {'Authorization': 'Bearer ' + settings.MEILI_API_KEY}
r = method(settings.MEILI_URL + route, json=json, params=params, headers=headers, timeout=4)
if r.status_code > 299:
raise Exception('Bad response code ' + str(r.status_code))
if parse_json:
@@ -24,42 +25,40 @@ def meili_api(method, route, json=None, params=None, parse_json=True):
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
return False
def create_index():
json = dict(uid='qotnews', primaryKey='id')
return meili_api(requests.post, 'indexes', json=json)
def update_rankings():
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
def update_attributes():
json = ['title', 'url', 'author']
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
return r
def update_settings():
json = {
'rankingRules': ['words', 'typo', 'proximity', 'attribute', 'date:desc', 'exactness'],
'searchableAttributes': ['title', 'url', 'author', 'text'],
'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments', 'text'],
'stopWords': ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'in', 'on', 'at', 'by', 'for', 'with', 'to', 'from', 'of', 'is', 'it', 'that', 'this'],
}
return meili_api(requests.patch, 'indexes/qotnews/settings', json=json)
def init():
if not SEARCH_ENABLED:
logging.info('Search is not enabled, skipping init.')
return
print(create_index())
update_rankings()
update_attributes()
update_settings()
def put_story(story):
if not SEARCH_ENABLED: return
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
def search(q):
def search(q, in_article=False):
if not SEARCH_ENABLED: return []
params = dict(q=q, limit=settings.FEED_LENGTH)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
json = dict(q=q, limit=settings.FEED_LENGTH)
if True:
json['attributesToSearchOn'] = ['text']
json['attributesToCrop'] = ['text']
json['attributesToRetrieve'] = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
json['cropLength'] = 80
r = meili_api(requests.post, 'indexes/qotnews/search', json=json, parse_json=False)
return r
if __name__ == '__main__':
init()
print(update_rankings())
print(search('facebook'))
+55 -2
View File
@@ -16,6 +16,7 @@ import traceback
import time
import datetime
import humanize
import urllib.request
from urllib.parse import urlparse, parse_qs
import settings
@@ -28,6 +29,25 @@ from flask import abort, Flask, request, render_template, stream_with_context, R
from werkzeug.exceptions import NotFound
from flask_cors import CORS
smallweb_set = set()
def load_smallweb_list():
EXCLUDED = [
'github.com',
]
global smallweb_set
try:
url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
with urllib.request.urlopen(url, timeout=10) as response:
urls = response.read().decode('utf-8').splitlines()
hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
smallweb_set = {h.replace('www.', '') for h in hosts if h not in EXCLUDED}
logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
except Exception as e:
logging.error('Failed to load smallweb list: {}'.format(e))
load_smallweb_list()
database.init()
search.init()
@@ -56,7 +76,39 @@ cors = CORS(flask_app)
def api():
skip = request.args.get('skip', 0)
limit = request.args.get('limit', settings.FEED_LENGTH)
if request.args.get('smallweb') == 'true' and smallweb_set:
limit = int(limit)
skip = int(skip)
filtered_stories = []
current_skip = skip
while len(filtered_stories) < limit:
stories_batch = database.get_stories(limit, current_skip)
if not stories_batch:
break
for story_str in stories_batch:
story = json.loads(story_str)
story_url = story.get('url') or story.get('link') or ''
if not story_url:
continue
hostname = urlparse(story_url).hostname
if hostname:
hostname = hostname.replace('www.', '')
if hostname in smallweb_set:
filtered_stories.append(story_str)
if len(filtered_stories) == limit:
break
if len(filtered_stories) == limit:
break
current_skip += limit
stories = filtered_stories
else:
stories = database.get_stories(limit, skip)
# hacky nested json
res = Response('{"stories":[' + ','.join(stories) + ']}')
res.headers['content-type'] = 'application/json'
@@ -79,8 +131,9 @@ def apistats():
@flask_app.route('/api/search', strict_slashes=False)
def apisearch():
q = request.args.get('q', '')
in_article = request.args.get('article', False)
if len(q) >= 3:
results = search.search(q)
results = search.search(q, in_article)
else:
results = '[]'
res = Response(results)
@@ -210,7 +263,7 @@ def static_story(sid):
show_comments=request.path.endswith('/c'),
)
http_server = WSGIServer(('', 33842), flask_app)
http_server = WSGIServer(('0.0.0.0', 33842), flask_app)
def feed_thread():
global news_index, ref_list, current_item
+1
View File
@@ -4,3 +4,4 @@
meilisearch-linux-amd64
data.ms/
data.ms.old/
+9 -7
View File
@@ -41,9 +41,6 @@
</head>
<body>
<div class="nojs">
<noscript></noscript>
</div>
<div id="root">
<div class="container menu">
<p>
@@ -81,21 +78,26 @@
<div class="story-text">{{ story.text | safe }}</div>
{% elif show_comments %}
{% macro render_comment(comment, level) %}
<div class="comment{% if level > 0 %} lined{% endif %}">
<dt></dt>
<dd class="comment{% if level > 0 %} lined{% endif %}">
<div class="info">
<p>
{% if comment.author == story.author %}[OP] {% endif %}{{ comment.author or '[Deleted]' }} | <a href="#{{ comment.author }}{{ comment.date }}" id="{{ comment.author }}{{ comment.date }}">{{ comment.date | fromnow }}</a>
</p>
</div>
<div class="text">{{ (comment.text | safe) if comment.text else '<p>[Empty / deleted comment]</p>' }}</div>
{% if comment.comments %}
<dl>
{% for reply in comment.comments %}
{{ render_comment(reply, level + 1) }}
{% endfor %}
</div>
</dl>
{% endif %}
</dd>
{% endmacro %}
<div class="comments">
<dl class="comments">
{% for comment in story.comments %}{{ render_comment(comment, 0) }}{% endfor %}
</div>
</dl>
{% endif %}
</div>
<div class='dot toggleDot'>
+2 -11
View File
@@ -12,7 +12,6 @@ const DANGEROUS_TAGS = ['svg', 'math'];
const latexDelimiters = [
{ left: '$$', right: '$$', display: true },
{ left: '\\[', right: '\\]', display: true },
{ left: '$', right: '$', display: false },
{ left: '\\(', right: '\\)', display: false }
];
@@ -106,7 +105,7 @@ function Article({ cache }) {
if (v.nodeName === '#text') {
const text = v.data;
if (text.includes('\\[') || text.includes('\\(') || text.includes('$$') || text.includes('$')) {
if (text.includes('\\[') || text.includes('\\(') || text.includes('$$')) {
return <Latex key={key} delimiters={latexDelimiters}>{text}</Latex>;
}
@@ -138,8 +137,7 @@ function Article({ cache }) {
const textContent = v.textContent.trim();
const isMath = (textContent.startsWith('\\(') && textContent.endsWith('\\)')) ||
(textContent.startsWith('\\[') && textContent.endsWith('\\]')) ||
(textContent.startsWith('$$') && textContent.endsWith('$$')) ||
(textContent.startsWith('$') && textContent.endsWith('$') && textContent.indexOf('$') !== textContent.lastIndexOf('$'));
(textContent.startsWith('$$') && textContent.endsWith('$$'));
const props = { key: key };
if (v.hasAttributes()) {
@@ -159,13 +157,6 @@ function Article({ cache }) {
const firstParen = mathContent.indexOf('\\(');
const lastParen = mathContent.lastIndexOf('\\)');
mathContent = mathContent.substring(0, firstParen) + '\\[' + mathContent.substring(firstParen + 2, lastParen) + '\\]' + mathContent.substring(lastParen + 2);
} else if (trimmed.startsWith('$') && !trimmed.startsWith('$$')) {
// Replace $ with $$
const firstDollar = mathContent.indexOf('$');
const lastDollar = mathContent.lastIndexOf('$');
if (firstDollar !== lastDollar) {
mathContent = mathContent.substring(0, firstDollar) + '$$' + mathContent.substring(firstDollar + 1, lastDollar) + '$$' + mathContent.substring(lastDollar + 1);
}
}
}
return <Tag {...props}><Latex delimiters={latexDelimiters}>{mathContent}</Latex></Tag>;
+29 -10
View File
@@ -8,9 +8,19 @@ function Feed({ updateCache }) {
const [stories, setStories] = useState(() => JSON.parse(localStorage.getItem('stories')) || false);
const [error, setError] = useState('');
const [loadingStatus, setLoadingStatus] = useState(null);
const [filterSmallweb, setFilterSmallweb] = useState(() => localStorage.getItem('filterSmallweb') === 'true');
const handleFilterChange = e => {
const isChecked = e.target.checked;
setStories(false);
setFilterSmallweb(isChecked);
localStorage.setItem('filterSmallweb', isChecked);
};
useEffect(() => {
fetch('/api')
const controller = new AbortController();
fetch(filterSmallweb ? '/api?smallweb=true' : '/api', { signal: controller.signal })
.then(res => {
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
@@ -26,21 +36,19 @@ function Feed({ updateCache }) {
if (!updated) return;
if (!stories || !stories.length) {
setStories(newApiStories);
localStorage.setItem('stories', JSON.stringify(newApiStories));
}
setLoadingStatus({ current: 0, total: newApiStories.length });
let currentStories = Array.isArray(stories) ? [...stories] : [];
let preloadedCount = 0;
for (const [index, newStory] of newApiStories.entries()) {
if (controller.signal.aborted) {
break;
}
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 10000); // 10-second timeout
const storyRes = await fetch('/api/' + newStory.id, { signal: controller.signal });
const storyFetchController = new AbortController();
const timeoutId = setTimeout(() => storyFetchController.abort(), 10000); // 10-second timeout
const storyRes = await fetch('/api/' + newStory.id, { signal: storyFetchController.signal });
clearTimeout(timeoutId);
if (!storyRes.ok) {
@@ -89,11 +97,17 @@ function Feed({ updateCache }) {
setLoadingStatus(null);
},
(error) => {
if (error.name === 'AbortError') {
console.log('Feed fetch aborted.');
return;
}
const errorMessage = `Failed to fetch the main story list from the API. Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
setError(errorMessage);
}
);
}, [updateCache]);
return () => controller.abort();
}, [updateCache, filterSmallweb]);
return (
<div className='container'>
@@ -102,6 +116,11 @@ function Feed({ updateCache }) {
<meta name="robots" content="index" />
</Helmet>
<div style={{marginBottom: '1rem'}}>
<input type="checkbox" id="filter-smallweb" className="checkbox" checked={filterSmallweb} onChange={handleFilterChange} />
<label htmlFor="filter-smallweb">Only Smallweb</label>
</div>
{error &&
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
+27 -1
View File
@@ -1,6 +1,7 @@
import React, { useState, useEffect } from 'react';
import { Link, useLocation } from 'react-router-dom';
import { Link, useLocation, useHistory } from 'react-router-dom';
import { Helmet } from 'react-helmet';
import queryString from 'query-string';
import { sourceLink, infoLine, logos } from './utils.js';
import AbortController from 'abort-controller';
@@ -8,6 +9,19 @@ function Results() {
const [stories, setStories] = useState(false);
const [error, setError] = useState(false);
const location = useLocation();
const history = useHistory();
const handleFilterChange = e => {
const isChecked = e.target.checked;
const currentQuery = queryString.parse(location.search);
if (isChecked) {
currentQuery.article = 'true';
} else {
delete currentQuery.article;
}
history.push('/search?' + queryString.stringify(currentQuery));
};
useEffect(() => {
const controller = new AbortController();
@@ -32,11 +46,19 @@ function Results() {
};
}, [location.search]);
const searchInArticle = queryString.parse(location.search).article === 'true';
return (
<div className='container'>
<Helmet>
<title>Search Results | QotNews</title>
</Helmet>
<div style={{marginBottom: '1rem'}}>
<input type="checkbox" id="search-in-article" className="checkbox" checked={searchInArticle} onChange={handleFilterChange} />
<label htmlFor="search-in-article">Search in article</label>
</div>
{error && <p>Connection error?</p>}
{stories ?
<>
@@ -56,6 +78,10 @@ function Results() {
</div>
{infoLine(x)}
{!!x?._formatted &&
<p>{x._formatted.text.replace(/\n/g, ' ')}</p>
}
</div>
)
:
+6 -2
View File
@@ -15,7 +15,9 @@ function Search() {
const newSearch = event.target.value;
setSearch(newSearch);
if (newSearch.length >= 3) {
const searchQuery = queryString.stringify({ 'q': newSearch });
const currentQuery = queryString.parse(location.search);
currentQuery.q = newSearch;
const searchQuery = queryString.stringify(currentQuery);
history.replace('/search?' + searchQuery);
} else {
history.replace('/');
@@ -24,7 +26,9 @@ function Search() {
const searchAgain = (event) => {
event.preventDefault();
const searchString = queryString.stringify({ 'q': event.target[0].value });
const currentQuery = queryString.parse(location.search);
currentQuery.q = event.target[0].value;
const searchString = queryString.stringify(currentQuery);
history.push('/search?' + searchString);
inputRef.current.blur();
}
+10 -1
View File
@@ -11,7 +11,8 @@
border: 1px solid #828282;
}
.black button {
.black .menu button,
.black .story-text button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
@@ -66,3 +67,11 @@
.black .comment.lined {
border-left: 1px solid #444444;
}
.black .checkbox:checked + label::after {
border-color: #ddd;
}
.black .copy-button {
color: #828282;
}
+10 -1
View File
@@ -11,7 +11,8 @@
border: 1px solid #828282;
}
.dark button {
.dark .menu button,
.dark .story-text button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
@@ -62,3 +63,11 @@
.dark .comment.lined {
border-left: 1px solid #444444;
}
.dark .checkbox:checked + label::after {
border-color: #ddd;
}
.dark .copy-button {
color: #828282;
}
+49
View File
@@ -189,6 +189,13 @@ span.source {
.comments {
margin-left: -1.25rem;
margin-top: 0;
margin-bottom: 0;
padding: 0;
}
.comments dl, .comments dd {
margin: 0;
}
.comment {
@@ -305,8 +312,50 @@ button.comment {
.copy-button {
font: 1.5rem/1 'icomoon2';
color: #828282;
background: transparent;
border: none;
cursor: pointer;
vertical-align: middle;
}
.checkbox {
-webkit-appearance: none;
appearance: none;
position: absolute;
opacity: 0;
cursor: pointer;
height: 0;
width: 0;
}
.checkbox + label {
position: relative;
cursor: pointer;
padding-left: 1.75rem;
user-select: none;
}
.checkbox + label::before {
content: '';
position: absolute;
left: 0;
top: 0.1em;
width: 1rem;
height: 1rem;
border: 1px solid #828282;
background-color: transparent;
border-radius: 3px;
}
.checkbox:checked + label::after {
content: "";
position: absolute;
left: 0.35rem;
top: 0.2em;
width: 0.3rem;
height: 0.6rem;
border: solid #000;
border-width: 0 2px 2px 0;
transform: rotate(45deg);
}
+14 -1
View File
@@ -20,7 +20,8 @@
background-color: #690000;
}
.red button {
.red .menu button,
.red .story-text button {
background-color: #440000;
border-color: #b00;
color: #b00;
@@ -80,3 +81,15 @@
.red .dot {
background-color: #440000;
}
.red .checkbox + label::before {
border: 1px solid #690000;
}
.red .checkbox:checked + label::after {
border-color: #aa0000;
}
.red .copy-button {
color: #690000;
}