Implement processing links with href or src attributes.

master
VikingKong 3 years ago
parent 29fceab613
commit 517ab557db

@ -220,7 +220,7 @@ class Links(urwid.ListBox):
def parseLink(self, link): def parseLink(self, link):
ext = link.split(".")[-1] ext = link.split(".")[-1]
if ext.lower() in ("jpg", "jpeg", "gif", "png", "tif", "tiff"): if Utils.checkPic(ext.lower()):
os.system('nohup feh ' + link + ' </dev/null >/dev/null 2>&1 &') os.system('nohup feh ' + link + ' </dev/null >/dev/null 2>&1 &')
elif Utils.checkStreamingVideo(link): elif Utils.checkStreamingVideo(link):
tui.destroyOverlay() tui.destroyOverlay()

@ -1,30 +1,22 @@
from inscriptis import get_text from inscriptis import get_text
import os import os
import Utils import Utils
import html.parser from bs4 import BeautifulSoup
from RedditCommentsParser import RedditComments from RedditCommentsParser import RedditComments
class LinkParser(html.parser.HTMLParser):
def reset(self):
super().reset()
self.links = set()
def handle_starttag(self, tag, attrs):
if tag == 'a':
for (name, value) in attrs:
if name == 'href':
self.links.add(value)
class Article: class Article:
def __init__(self, articleObj): def __init__(self, articleObj):
Utils.writeLog(articleObj)
content = articleObj["summary"]["content"] content = articleObj["summary"]["content"]
parser = LinkParser() soup = BeautifulSoup(content)
for line in content: links = soup.find_all(href=True)
parser.feed(line) media = soup.find_all(src=True)
self.links = list(parser.links) links_set = set()
for link in links:
links_set.add(link['href'])
for m in media:
links_set.add(m['src'])
self.links = list(links_set)
self.text = get_text(content) self.text = get_text(content)
self.title = articleObj["title"] self.title = articleObj["title"]
self.date = Utils.timestampToDate(articleObj["timestampUsec"]) self.date = Utils.timestampToDate(articleObj["timestampUsec"])

@ -2,12 +2,20 @@ from datetime import datetime
import re import re
streaming_urls = ["^https://www.youtube.com", "^https://player.odycdn.com", "^https://youtu.be"] streaming_urls = ["^https://www.youtube.com", "^https://player.odycdn.com", "^https://youtu.be"]
pics = ["^jpg\\?*", "^jpeg\\?*", "^gif\\?*", "^png\\?*", "^tif\\?*", "^tiff\\?*"]
def timestampToDate(ts): def timestampToDate(ts):
return datetime.fromtimestamp(int(ts)/1000000).strftime("%y-%m-%d %H:%M") return datetime.fromtimestamp(int(ts)/1000000).strftime("%y-%m-%d %H:%M")
def checkPic(ext):
for p in pics:
if re.search(p, ext) is not None:
return True
return False
def checkStreamingVideo(link): def checkStreamingVideo(link):
for pattern in streaming_urls: for pattern in streaming_urls:
if re.search(pattern, link) is not None: if re.search(pattern, link) is not None:

Loading…
Cancel
Save