You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Inomnibus/RedditCommentsParser.py

33 lines
932 B

from bs4 import BeautifulSoup
import httpx
import Utils
class RedditComments:
def __init__(self, link):
Utils.writeLog(link)
page = httpx.get(link, follow_redirects=True)
Utils.writeLog(page)
content = page.text
self.soup = BeautifulSoup(content)
Utils.writeLog(self.soup)
self.commentObjects = self.soup.find_all("div", "Comment")
self.comments = []
def getHeader(self, commentObj):
headers = commentObj.find_all("a")
username = headers[0]["href"].split("/")[2]
date = headers[1].text
return username + " " + date
def getText(self, commentObj):
p = commentObj.find("p")
if p is not None:
return p.text
else:
return ""
def getComments(self):
for co in self.commentObjects:
self.comments.append(self.getHeader(co) + "\n" + self.getText(co) + "\n")