Implement processing links with href or src attributes.

3 years ago · 517ab557db
parent 29fceab613
commit 517ab557db
3 changed files with 19 additions and 19 deletions
--- a/App.py
+++ b/App.py
@ -220,7 +220,7 @@ class Links(urwid.ListBox):
    def parseLink(self, link):
        ext = link.split(".")[-1]
-        if ext.lower() in ("jpg", "jpeg", "gif", "png", "tif", "tiff"):
+        if Utils.checkPic(ext.lower()):
            os.system('nohup feh ' + link + ' </dev/null >/dev/null 2>&1 &')
        elif Utils.checkStreamingVideo(link):
            tui.destroyOverlay()
--- a/Render.py
+++ b/Render.py
@ -1,30 +1,22 @@
 from inscriptis import get_text
 import os
 import Utils
-import html.parser
+from bs4 import BeautifulSoup
 from RedditCommentsParser import RedditComments
 class LinkParser(html.parser.HTMLParser):
    def reset(self):
        super().reset()
        self.links = set()
    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            for (name, value) in attrs:
                if name == 'href':
                    self.links.add(value)
 class Article:
    def __init__(self, articleObj):
        Utils.writeLog(articleObj)
        content = articleObj["summary"]["content"]
-        parser = LinkParser()
+        soup = BeautifulSoup(content)
-        for line in content:
+        links = soup.find_all(href=True)
-            parser.feed(line)
+        media = soup.find_all(src=True)
-        self.links = list(parser.links)
+        links_set = set()
        for link in links:
            links_set.add(link['href'])
        for m in media:
            links_set.add(m['src'])
        self.links = list(links_set)
        self.text = get_text(content)
        self.title = articleObj["title"]
        self.date = Utils.timestampToDate(articleObj["timestampUsec"])
--- a/Utils.py
+++ b/Utils.py
@ -2,12 +2,20 @@ from datetime import datetime
 import re
 streaming_urls = ["^https://www.youtube.com", "^https://player.odycdn.com", "^https://youtu.be"]
 pics = ["^jpg\\?*", "^jpeg\\?*", "^gif\\?*", "^png\\?*", "^tif\\?*", "^tiff\\?*"]
 def timestampToDate(ts):
    return datetime.fromtimestamp(int(ts)/1000000).strftime("%y-%m-%d %H:%M")
 def checkPic(ext):
    for p in pics:
        if re.search(p, ext) is not None:
            return True
    return False
 def checkStreamingVideo(link):
    for pattern in streaming_urls:
        if re.search(pattern, link) is not None: