Implement fetching Reddit comments via API instead of parsing HTML.

sqlite
VikingKong 3 years ago
parent bee2e4cb82
commit 2564246126

@ -27,7 +27,7 @@ class Fetcher:
return category in self.articles.keys()
def articlesFromCategory(self, category):
response = httpx.get(self.URL+"/reader/api/0/stream/contents?n=100"+"&s="+category, headers=self.headers)
response = httpx.get(self.URL+"/reader/api/0/stream/contents?n=1000"+"&s="+category, headers=self.headers)
return response.json()["items"]
def getFavorites(self):

@ -1,32 +1,20 @@
from bs4 import BeautifulSoup
import httpx
import Utils
import praw
from datetime import datetime
class RedditComments:
def __init__(self, link):
Utils.writeLog(link)
page = httpx.get(link, follow_redirects=True)
Utils.writeLog(page)
content = page.text
self.soup = BeautifulSoup(content)
Utils.writeLog(self.soup)
self.commentObjects = self.soup.find_all("div", "Comment")
self.comments = []
def getHeader(self, commentObj):
headers = commentObj.find_all("a")
username = headers[0]["href"].split("/")[2]
date = headers[1].text
return username + " " + date
self.reddit = praw.Reddit(client_id='unBoGZxgkQSk8KsKWr_jag',
client_secret=None,
redirect_uri='http://localhost:8888',
user_agent='agent',
check_for_async=False)
def getText(self, commentObj):
p = commentObj.find("p")
if p is not None:
return p.text
else:
return ""
self.comments = []
self.link = link
def getComments(self):
for co in self.commentObjects:
self.comments.append(self.getHeader(co) + "\n" + self.getText(co) + "\n")
submission = self.reddit.submission(url=self.link)
for comment in submission.comments:
comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime('%Y-%m-%d %H:%M:%S')
self.comments.append(str(comment.author) + " " + str(comment_date) + "\n" + str(comment.body) + "\n")

Loading…
Cancel
Save