parent
bee2e4cb82
commit
2564246126
@ -1,32 +1,20 @@
|
|||||||
from bs4 import BeautifulSoup
|
import praw
|
||||||
import httpx
|
from datetime import datetime
|
||||||
import Utils
|
|
||||||
|
|
||||||
|
|
||||||
class RedditComments:
|
class RedditComments:
|
||||||
def __init__(self, link):
|
def __init__(self, link):
|
||||||
Utils.writeLog(link)
|
self.reddit = praw.Reddit(client_id='unBoGZxgkQSk8KsKWr_jag',
|
||||||
page = httpx.get(link, follow_redirects=True)
|
client_secret=None,
|
||||||
Utils.writeLog(page)
|
redirect_uri='http://localhost:8888',
|
||||||
content = page.text
|
user_agent='agent',
|
||||||
self.soup = BeautifulSoup(content)
|
check_for_async=False)
|
||||||
Utils.writeLog(self.soup)
|
|
||||||
self.commentObjects = self.soup.find_all("div", "Comment")
|
|
||||||
self.comments = []
|
|
||||||
|
|
||||||
def getHeader(self, commentObj):
|
|
||||||
headers = commentObj.find_all("a")
|
|
||||||
username = headers[0]["href"].split("/")[2]
|
|
||||||
date = headers[1].text
|
|
||||||
return username + " " + date
|
|
||||||
|
|
||||||
def getText(self, commentObj):
|
self.comments = []
|
||||||
p = commentObj.find("p")
|
self.link = link
|
||||||
if p is not None:
|
|
||||||
return p.text
|
|
||||||
else:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def getComments(self):
|
def getComments(self):
|
||||||
for co in self.commentObjects:
|
submission = self.reddit.submission(url=self.link)
|
||||||
self.comments.append(self.getHeader(co) + "\n" + self.getText(co) + "\n")
|
for comment in submission.comments:
|
||||||
|
comment_date = datetime.utcfromtimestamp(comment.created_utc).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
self.comments.append(str(comment.author) + " " + str(comment_date) + "\n" + str(comment.body) + "\n")
|
||||||
|
|||||||
Loading…
Reference in new issue