parent
bee2e4cb82
commit
2564246126
@ -1,32 +1,20 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import httpx
|
||||
import Utils
|
||||
import praw
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class RedditComments:
|
||||
def __init__(self, link):
|
||||
Utils.writeLog(link)
|
||||
page = httpx.get(link, follow_redirects=True)
|
||||
Utils.writeLog(page)
|
||||
content = page.text
|
||||
self.soup = BeautifulSoup(content)
|
||||
Utils.writeLog(self.soup)
|
||||
self.commentObjects = self.soup.find_all("div", "Comment")
|
||||
self.comments = []
|
||||
|
||||
def getHeader(self, commentObj):
|
||||
headers = commentObj.find_all("a")
|
||||
username = headers[0]["href"].split("/")[2]
|
||||
date = headers[1].text
|
||||
return username + " " + date
|
||||
self.reddit = praw.Reddit(client_id='unBoGZxgkQSk8KsKWr_jag',
|
||||
client_secret=None,
|
||||
redirect_uri='http://localhost:8888',
|
||||
user_agent='agent',
|
||||
check_for_async=False)
|
||||
|
||||
def getText(self, commentObj):
|
||||
p = commentObj.find("p")
|
||||
if p is not None:
|
||||
return p.text
|
||||
else:
|
||||
return ""
|
||||
self.comments = []
|
||||
self.link = link
|
||||
|
||||
def getComments(self):
|
||||
for co in self.commentObjects:
|
||||
self.comments.append(self.getHeader(co) + "\n" + self.getText(co) + "\n")
|
||||
submission = self.reddit.submission(url=self.link)
|
||||
for comment in submission.comments:
|
||||
comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime('%Y-%m-%d %H:%M:%S')
|
||||
self.comments.append(str(comment.author) + " " + str(comment_date) + "\n" + str(comment.body) + "\n")
|
||||
|
||||
Loading…
Reference in new issue