import requests
from flask import Flask, render_template, request
from bs4 import BeautifulSoup
import os
import re
os.system("clear")
"""
When you try to scrape reddit make sure to send the 'headers' on your request.
Reddit blocks scrappers so we have to include these headers to make reddit think
that we are a normal computer and not a python script.
How to use: requests.get(url, headers=headers)
"""
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
"""
All subreddits have the same url:
i.e : https://reddit.com/r/javascript
You can add more subreddits to the list, just make sure they exist.
To make a request, use this url:
https://www.reddit.com/r/{subreddit}/top/?t=month
This will give you the top posts in per month.
"""
subreddits = [
"javascript",
"reactjs",
"reactnative",
"programming",
"css",
"golang",
"flutter",
"rust",
"django"
]
def webdatacomefile(language):
Bigdata = []
for upvotes in language:
url = f"https://www.reddit.com/r/{upvotes}/top/?t=month"
data = requests.get(url,headers =headers)
datasoup = BeautifulSoup(data.text, "html.parser")
datalistclass = datasoup.find_all("div",{'data-testid':re.compile('^t3_[\w&.\-]{1,6}$')})
for i in datalistclass:
smalldata = []
findnum = i.find("div",{"class":"_1rZYMD_4xY3gRcSS3p8ODO"})
smalldata.append(findnum.string)
findtitle = i.find("h3",{"class":"_eYtD2XCVieq6emjKBH3m"})
smalldata.append(findtitle.string)
findurl = i.find("a",{"class":"SQnoC3ObvgnGjWt90zD9Z _2INHSNB8V5eaWp4P0rY_mE"})
smalldata.append(findurl["href"])
smalldata.append(upvotes)
Bigdata.append(smalldata)
return Bigdata
def popularset(datalist):
bigdataset = sorted(datalist, key=lambda x: int(x[0].replace(".","").replace("k","000").replace("m","000000")), reverse=True)
return bigdataset
def homelanguageset():
languagedata = ["javascript","reactjs","reactnative","programming","css","golang","flutter","rust","django"]
return languagedata
def startgame():
app = Flask("DayEleven")
@app.route("/")
def start():
languageset = homelanguageset()
return render_template("day11home.html", data = languageset)
@app.route("/<readurl>")
def finding(readurl):
dataurl = request.args
webbigdata = webdatacomefile(dataurl)
cleanbigdata = popularset(webbigdata)
for i in cleanbigdata:
print(i)
return render_template("day11read.html",data = cleanbigdata, headhunter = dataurl)
app.run(host="0.0.0.0")
생각보다 빨리 끝넷다
재미가 붙었다 크롤링이 점점 쉬워진다
<!DOCTYPE html>
<html>
<head>
<title>
Reddit Reader
</title>
<link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>
<body>
<header>
<h1>Reddit Reader</h1>
<h3>A service to aggregate all your favourite subreddits</h3>
</header>
<main>
<form action="/read">
<h4>Select the subreddits you're interested on:</h4>
<ul>
{% for i in data %}
<li>
<input type="checkbox" name="{{i}}" id="{{i}}" />
<label for="{{i}}">
r/{{i}}
</label>
</li>
{% endfor %}
</ul>
<button type="submit">Aggregate</button>
</form>
</main>
</body>
</html>
받는 url 이다 input을 받아 irl 속성값을 전달한다~!
<!DOCTYPE html>
<html>
<head>
<title>
Reddit Reader
</title>
<link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>
<body>
<header>
<h1><a href="/">Reddit Reader</a></h1>
<h3>Reading: {% for i in headhunter %} r/{{i}}{% endfor %}</h3>
</header>
<main>
{% for i in data %}
<div>
<h3>
<a href="https://reddit.com{{i[2]}}">
{{i[1]}}
</a>
</h3>
<h4>
{{i[0]}} upvotes · r/{{i[3]}}
</h4>
<hr />
</div>
{% endfor %}
</main>
</body>
</html>
찾은 결과값 여러가지를 한번에 할 수 도 있다
순서는 저기 숫자순~