from inscriptis import get_text import os import Utils import html.parser from RedditCommentsParser import RedditComments class LinkParser(html.parser.HTMLParser): def reset(self): super().reset() self.links = set() def handle_starttag(self, tag, attrs): if tag == 'a': for (name, value) in attrs: if name == 'href': self.links.add(value) class Article: def __init__(self, articleObj): Utils.writeLog(articleObj) content = articleObj["summary"]["content"] parser = LinkParser() for line in content: parser.feed(line) self.links = list(parser.links) self.text = get_text(content) self.title = articleObj["title"] self.date = Utils.timestampToDate(articleObj["timestampUsec"]) self.url = articleObj["canonical"][0]["href"] if Utils.checkStreamingVideo(self.url): self.links.append(self.url) elif Utils.checkReddit(self.url): comments_link = Utils.checkRedditComments(self.links) if comments_link: commentsObj = RedditComments(comments_link) commentsObj.getComments() for comment in commentsObj.comments: self.text += "\n\n" + comment self.currentPageNumber = 1 terminal_width, terminal_height = os.get_terminal_size() terminal_width -= 76 start_of_chunk = 0 end_of_chunk = 0 rows_passed = 0 self.chunks = [] i = 0 column_position = 0 for s in self.text: i += 1 column_position += 1 if column_position > terminal_width or s == "\n": rows_passed += 1 column_position = 0 if rows_passed > terminal_height - 2: end_of_chunk = i self.chunks.append(self.text[start_of_chunk:end_of_chunk]) start_of_chunk = end_of_chunk rows_passed = 0 if end_of_chunk <= i: self.chunks.append(self.text[start_of_chunk:i]) self.firstPage = self.chunks[0] self.numberOfPages = len(self.chunks) def scrollDown(self): if self.currentPageNumber == self.numberOfPages: pass else: self.currentPageNumber += 1 return self.chunks[self.currentPageNumber - 1] def scrollUp(self): if self.currentPageNumber == 1: pass else: self.currentPageNumber -= 1 return self.chunks[self.currentPageNumber - 1]