Implement processing links with href or src attributes.

master
VikingKong 3 years ago
parent 29fceab613
commit 517ab557db

@ -220,7 +220,7 @@ class Links(urwid.ListBox):
def parseLink(self, link):
ext = link.split(".")[-1]
if ext.lower() in ("jpg", "jpeg", "gif", "png", "tif", "tiff"):
if Utils.checkPic(ext.lower()):
os.system('nohup feh ' + link + ' </dev/null >/dev/null 2>&1 &')
elif Utils.checkStreamingVideo(link):
tui.destroyOverlay()

@ -1,30 +1,22 @@
from inscriptis import get_text
import os
import Utils
import html.parser
from bs4 import BeautifulSoup
from RedditCommentsParser import RedditComments
class LinkParser(html.parser.HTMLParser):
def reset(self):
super().reset()
self.links = set()
def handle_starttag(self, tag, attrs):
if tag == 'a':
for (name, value) in attrs:
if name == 'href':
self.links.add(value)
class Article:
def __init__(self, articleObj):
Utils.writeLog(articleObj)
content = articleObj["summary"]["content"]
parser = LinkParser()
for line in content:
parser.feed(line)
self.links = list(parser.links)
soup = BeautifulSoup(content)
links = soup.find_all(href=True)
media = soup.find_all(src=True)
links_set = set()
for link in links:
links_set.add(link['href'])
for m in media:
links_set.add(m['src'])
self.links = list(links_set)
self.text = get_text(content)
self.title = articleObj["title"]
self.date = Utils.timestampToDate(articleObj["timestampUsec"])

@ -2,12 +2,20 @@ from datetime import datetime
import re
streaming_urls = ["^https://www.youtube.com", "^https://player.odycdn.com", "^https://youtu.be"]
pics = ["^jpg\\?*", "^jpeg\\?*", "^gif\\?*", "^png\\?*", "^tif\\?*", "^tiff\\?*"]
def timestampToDate(ts):
return datetime.fromtimestamp(int(ts)/1000000).strftime("%y-%m-%d %H:%M")
def checkPic(ext):
for p in pics:
if re.search(p, ext) is not None:
return True
return False
def checkStreamingVideo(link):
for pattern in streaming_urls:
if re.search(pattern, link) is not None:

Loading…
Cancel
Save