improve meta data scraping.

This commit is contained in:
Jason Schwarzenberger
2020-12-04 12:46:46 +13:00
parent fdb4494cd8
commit da7f6330bf
8 changed files with 74 additions and 109 deletions
+4 -3
View File
@@ -29,8 +29,10 @@ def as_readable(details):
'siteName': details['site_name'],
'url': details['article_url'],
'publisher': details['site_name'],
'scraper_link': 'https://outline.com/' + details['short_code']
'scraper_link': 'https://outline.com/' + details['short_code'],
'meta': {}
}
readable['meta'].update(details['meta'])
return readable
def _get_outline(url):
@@ -40,8 +42,7 @@ def _get_outline(url):
headers = {'Referer': OUTLINE_REFERER}
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
if r.status_code == 429:
logging.info('Rate limited by outline, sleeping 30s and skipping...')
time.sleep(30)
logging.info('Rate limited by outline, skipping...')
return None
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))