diff --git a/App.py b/App.py index ba0b45a..2ea1e5b 100644 --- a/App.py +++ b/App.py @@ -220,7 +220,7 @@ class Links(urwid.ListBox): def parseLink(self, link): ext = link.split(".")[-1] - if ext.lower() in ("jpg", "jpeg", "gif", "png", "tif", "tiff"): + if Utils.checkPic(ext.lower()): os.system('nohup feh ' + link + ' /dev/null 2>&1 &') elif Utils.checkStreamingVideo(link): tui.destroyOverlay() diff --git a/Render.py b/Render.py index d27f7a3..d3ddbb4 100644 --- a/Render.py +++ b/Render.py @@ -1,30 +1,22 @@ from inscriptis import get_text import os import Utils -import html.parser +from bs4 import BeautifulSoup from RedditCommentsParser import RedditComments -class LinkParser(html.parser.HTMLParser): - def reset(self): - super().reset() - self.links = set() - - def handle_starttag(self, tag, attrs): - if tag == 'a': - for (name, value) in attrs: - if name == 'href': - self.links.add(value) - - class Article: def __init__(self, articleObj): - Utils.writeLog(articleObj) content = articleObj["summary"]["content"] - parser = LinkParser() - for line in content: - parser.feed(line) - self.links = list(parser.links) + soup = BeautifulSoup(content) + links = soup.find_all(href=True) + media = soup.find_all(src=True) + links_set = set() + for link in links: + links_set.add(link['href']) + for m in media: + links_set.add(m['src']) + self.links = list(links_set) self.text = get_text(content) self.title = articleObj["title"] self.date = Utils.timestampToDate(articleObj["timestampUsec"]) diff --git a/Utils.py b/Utils.py index 4305736..5745dc8 100644 --- a/Utils.py +++ b/Utils.py @@ -2,12 +2,20 @@ from datetime import datetime import re streaming_urls = ["^https://www.youtube.com", "^https://player.odycdn.com", "^https://youtu.be"] +pics = ["^jpg\\?*", "^jpeg\\?*", "^gif\\?*", "^png\\?*", "^tif\\?*", "^tiff\\?*"] def timestampToDate(ts): return datetime.fromtimestamp(int(ts)/1000000).strftime("%y-%m-%d %H:%M") +def checkPic(ext): + for p in pics: + if re.search(p, ext) is not None: + return True + return False + + def checkStreamingVideo(link): for pattern in streaming_urls: if re.search(pattern, link) is not None: