diff --git a/API.py b/API.py index b850838..937543a 100644 --- a/API.py +++ b/API.py @@ -27,7 +27,7 @@ class Fetcher: return category in self.articles.keys() def articlesFromCategory(self, category): - response = httpx.get(self.URL+"/reader/api/0/stream/contents?n=100"+"&s="+category, headers=self.headers) + response = httpx.get(self.URL+"/reader/api/0/stream/contents?n=1000"+"&s="+category, headers=self.headers) return response.json()["items"] def getFavorites(self): diff --git a/RedditCommentsParser.py b/RedditCommentsParser.py index 97d52bb..f892b4f 100644 --- a/RedditCommentsParser.py +++ b/RedditCommentsParser.py @@ -1,32 +1,20 @@ -from bs4 import BeautifulSoup -import httpx -import Utils +import praw +from datetime import datetime class RedditComments: def __init__(self, link): - Utils.writeLog(link) - page = httpx.get(link, follow_redirects=True) - Utils.writeLog(page) - content = page.text - self.soup = BeautifulSoup(content) - Utils.writeLog(self.soup) - self.commentObjects = self.soup.find_all("div", "Comment") - self.comments = [] - - def getHeader(self, commentObj): - headers = commentObj.find_all("a") - username = headers[0]["href"].split("/")[2] - date = headers[1].text - return username + " " + date + self.reddit = praw.Reddit(client_id='unBoGZxgkQSk8KsKWr_jag', + client_secret=None, + redirect_uri='http://localhost:8888', + user_agent='agent', + check_for_async=False) - def getText(self, commentObj): - p = commentObj.find("p") - if p is not None: - return p.text - else: - return "" + self.comments = [] + self.link = link def getComments(self): - for co in self.commentObjects: - self.comments.append(self.getHeader(co) + "\n" + self.getText(co) + "\n") + submission = self.reddit.submission(url=self.link) + for comment in submission.comments: + comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime('%Y-%m-%d %H:%M:%S') + self.comments.append(str(comment.author) + " " + str(comment_date) + "\n" + str(comment.body) + "\n")