마즈막 과제이다
"""
These are the URLs that will give you remote jobs for the word 'python'
https://stackoverflow.com/jobs?r=true&q=python
https://weworkremotely.com/remote-jobs/search?term=python
https://remoteok.io/remote-dev+python-jobs
Good luck!
"""
import requests
from flask import Flask, render_template, request
from bs4 import BeautifulSoup
import os , sys
import csv
os.system("clear")
############ 크롤링~ #################
headers = {'User-Agent' : 'Mozilla.5.0 (Macintosh; Intel Mac OS X 10_9_3)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
def makesoup(url):
data = requests.get(url,headers =headers)
datasoup = BeautifulSoup(data.text, "html.parser")
return datasoup
def stackpage(stackurl):
stackpagenation = []
stacksoup = makesoup(stackurl)
stackdatadiv = stacksoup.find("div",{"class":"s-pagination"}).find_all('a')
print(stacksoup.find("div",{"class":"s-pagination"}))
for i in stackdatadiv:
stackpagenation.append(i["href"])
return stackpagenation
def stackdataget(stackurl, bigdata):
stacksoup = makesoup(stackurl)
stackdatadiv = stacksoup.find("div",{"class":"listResults"})
stackdatalist = stackdatadiv.find_all("div",{"class":"grid--cell fl1"})
print("stackdata downloading")
for i in stackdatalist:
smalldata = []
try:
a = i.find("h2",{"class":"mb4 fc-black-800 fs-body3"}).find('a')
smalldata.append(a.string)
span = i.find("h3",{"class":"fc-black-700 fs-body1 mb4"}).find('span')
smalldata.append(span.string.replace("\r","").replace("\n","").rstrip())
smalldata.append("https://stackoverflow.com"+a["href"])
bigdata.append(smalldata)
except:
pass
return bigdata
def weworkdataget(weworkurl, bigdata):
weworksoup = makesoup(weworkurl)
weworkdatasection = weworksoup.find("section",{"id":"category-2"})
weworkdatalist = weworkdatasection.find_all("li",{"class":"feature"})
print("wework downloading")
for i in weworkdatalist:
smalldata = []
try:
a = i.find_all('a')[1]
smalldata.append(a.find('span',{"class":"title"}).string)
smalldata.append(a.find('span',{"class":"company"}).string)
smalldata.append("https://weworkremotely.com"+a['href'])
bigdata.append(smalldata)
except:
pass
return bigdata
def remotedataget(remoteurl, bigdata):
remotesoup = makesoup(remoteurl)
remotedatadiv = remotesoup.find("div",{"class":"container"})
remotedatalist = remotedatadiv.find_all("tr",{"data-stack":""})
print("remote downloading")
for i in remotedatalist:
smalldata = []
if i.find("h2",{"itemprop":"title"}):
smalldata.append(i.find("h2",{"itemprop":"title"}).string)
smalldata.append(i.find("h3",{"itemprop":"name"}).string)
smalldata.append("https://remoteok.io/"+i.find("a",{"class":"companyLink"})["href"])
bigdata.append(smalldata)
return bigdata
def webdatacomefile(language):
Bigdata = []
stackurl = f"https://stackoverflow.com/jobs?q={language}"
Bigdata = stackdataget(stackurl,Bigdata)
weworkurl = f"https://weworkremotely.com/remote-jobs/search?utf8=%E2%9C%93&term={language}"
Bigdata = weworkdataget(weworkurl,Bigdata)
print(1)
Bigdata = weworkdataget(weworkurl,Bigdata)
print(2)
remoteurl = f"https://remoteok.io/remote-{language}-jobs"
Bigdata=remotedataget(remoteurl,Bigdata)
print(3)
###################################
"""
# 페이지네이션 나중에 수정 page 가 삭제된 양도 다 포함되있음 동적후 page 양혹은 jobs양 받아와야함
stackpageurl = stackpage(stackurl)
for stack in stackpageurl:
Bigdata = stackdataget("https://stackoverflow.com"+stack,Bigdata)
print(1)
"""
######################################
return Bigdata
############### 크롤링 끝 ################
def save(bigdataforcvs,language):
file = open(f"day13/csv/{language}.csv", mode="w")
writer = csv.writer(file)
writer.writerow(["Title","Company","Link"])
for jobs in bigdataforcvs:
writer.writerow(jobs)
return
#########################
def startgame():
app = Flask("Final")
@app.route("/")
def home():
return render_template("day13home.html")
@app.route("/search")
def search():
dataurl = request.args.get('term')
if dataurl in DB:
Bigdatalist=DB[dataurl]
else:
Bigdatalist=webdatacomefile(dataurl)
DB[dataurl]=Bigdatalist
save(Bigdatalist,dataurl)
return render_template("day13jobs.html",data = Bigdatalist, datalen = len(Bigdatalist), language = dataurl)
@app.route("/export")
def csvdownload():
dataurl = request.args.get('term')
csv = open(f"day13/csv/{dataurl}.csv")
return csv.read()
app.run(host="0.0.0.0")
## 디렉토리 이용방법, CSV 이용방법 DB구축 2가지
DB = {}
<!DOCTYPE html>
<html>
<head>
<title>
Remote Jobs
</title>
<link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>
<body>
<header>
<h1>Remote Jobs</h1>
</header>
<main>
<form action="/search">
<h3>Search by term:</h3>
<input placeholder="i.e python" required name="term" />
<button type="submit">Find my job</button>
</form>
</main>
</body>
</html>
<!DOCTYPE html>
<html>
<head>
<title>
Remote Jobs
</title>
<link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>
<body>
<header>
<h1>Remote Jobs</h1>
<h3>{{datalen}} {{language}} jobs found.</h3>
<h5><a href="/export?term={{language}}" download="{{language}}.csv">Export to CSV</a></h5>
<h6><a href="/">← Go back</a></h6>
</header>
<main>
{% for i in data %}
<div>
<h3>{{i[0]}}</h3>
<h4>{{i[1]}}</h4>
<a href="{{i[2]}}" target="_blank">Apply</a>
</div>
<hr />
{% endfor %}
</main>
</body>
</html>
이렇게 해서
클로링 데이터 csv에 저장
그리고 빠른 데이터 불러오기를위한 가짜 db를 구동
뭔가 밎밎해 보이지만 있을껀 다있다
이렇게 다운로드도 가능~!