'굿'에 해당되는 글 1건

마즈막 과제이다

"""
These are the URLs that will give you remote jobs for the word 'python'

https://stackoverflow.com/jobs?r=true&q=python
https://weworkremotely.com/remote-jobs/search?term=python
https://remoteok.io/remote-dev+python-jobs

Good luck!
"""

import requests
from flask import Flask, render_template, request
from bs4 import BeautifulSoup
import os , sys
import csv

os.system("clear")



############ 크롤링~ #################

headers = {'User-Agent' : 'Mozilla.5.0 (Macintosh; Intel Mac OS X 10_9_3)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
def makesoup(url):
  data = requests.get(url,headers =headers)
  datasoup = BeautifulSoup(data.text, "html.parser")
  return datasoup

def stackpage(stackurl):
  stackpagenation = []  
  stacksoup = makesoup(stackurl)
  stackdatadiv = stacksoup.find("div",{"class":"s-pagination"}).find_all('a')
  print(stacksoup.find("div",{"class":"s-pagination"}))
  for i in stackdatadiv:
    stackpagenation.append(i["href"])
  return stackpagenation

def stackdataget(stackurl, bigdata):
  stacksoup = makesoup(stackurl)
  stackdatadiv = stacksoup.find("div",{"class":"listResults"})
  stackdatalist = stackdatadiv.find_all("div",{"class":"grid--cell fl1"})
  print("stackdata downloading")
  for i in stackdatalist:
    smalldata = []
    try:
      a = i.find("h2",{"class":"mb4 fc-black-800 fs-body3"}).find('a')
      smalldata.append(a.string)    
      span = i.find("h3",{"class":"fc-black-700 fs-body1 mb4"}).find('span')
      smalldata.append(span.string.replace("\r","").replace("\n","").rstrip())
      smalldata.append("https://stackoverflow.com"+a["href"])
      bigdata.append(smalldata)
    except:
      pass
  return bigdata
    

def weworkdataget(weworkurl, bigdata):
  weworksoup = makesoup(weworkurl)
  weworkdatasection = weworksoup.find("section",{"id":"category-2"})
  weworkdatalist = weworkdatasection.find_all("li",{"class":"feature"})
  print("wework downloading")
  for i in weworkdatalist:
    smalldata = []
    try:
      a = i.find_all('a')[1]
      smalldata.append(a.find('span',{"class":"title"}).string)
      smalldata.append(a.find('span',{"class":"company"}).string)      
      smalldata.append("https://weworkremotely.com"+a['href'])
      bigdata.append(smalldata)
    except:
      pass
  return bigdata
  


def remotedataget(remoteurl, bigdata):
  remotesoup = makesoup(remoteurl)
  remotedatadiv = remotesoup.find("div",{"class":"container"})
  remotedatalist = remotedatadiv.find_all("tr",{"data-stack":""})
  print("remote downloading")
  for i in remotedatalist:
    smalldata = []
    if i.find("h2",{"itemprop":"title"}):
      smalldata.append(i.find("h2",{"itemprop":"title"}).string)
      smalldata.append(i.find("h3",{"itemprop":"name"}).string)
      smalldata.append("https://remoteok.io/"+i.find("a",{"class":"companyLink"})["href"])
      bigdata.append(smalldata)
  return bigdata


def webdatacomefile(language):
  Bigdata = []
  stackurl = f"https://stackoverflow.com/jobs?q={language}"
  Bigdata = stackdataget(stackurl,Bigdata)
  
  weworkurl = f"https://weworkremotely.com/remote-jobs/search?utf8=%E2%9C%93&term={language}"
  Bigdata = weworkdataget(weworkurl,Bigdata)
  print(1)
  Bigdata = weworkdataget(weworkurl,Bigdata)
  print(2)
  remoteurl = f"https://remoteok.io/remote-{language}-jobs"
  Bigdata=remotedataget(remoteurl,Bigdata)
  print(3)
  
  ###################################
  """
  # 페이지네이션 나중에 수정  page 가 삭제된 양도 다 포함되있음 동적후 page 양혹은 jobs양 받아와야함 
  
  stackpageurl = stackpage(stackurl)

  for stack in stackpageurl:
    Bigdata = stackdataget("https://stackoverflow.com"+stack,Bigdata)
    print(1)
  """
  ######################################
  return Bigdata
  

###############  크롤링 끝  ################
    
def save(bigdataforcvs,language):
  file = open(f"day13/csv/{language}.csv", mode="w")
  writer = csv.writer(file)
  writer.writerow(["Title","Company","Link"])

  for jobs in bigdataforcvs:
    writer.writerow(jobs)
  return
#########################

def startgame():
  app = Flask("Final")

  @app.route("/")
  def home():
    return render_template("day13home.html")
  
  @app.route("/search")
  def search():
    dataurl = request.args.get('term')
    if dataurl in DB:
      Bigdatalist=DB[dataurl]
    else:
      Bigdatalist=webdatacomefile(dataurl)
      DB[dataurl]=Bigdatalist
      save(Bigdatalist,dataurl)
    return render_template("day13jobs.html",data = Bigdatalist, datalen = len(Bigdatalist), language = dataurl)
  
  @app.route("/export")
  def csvdownload():
    dataurl = request.args.get('term')
    csv = open(f"day13/csv/{dataurl}.csv")
    return csv.read()

  app.run(host="0.0.0.0")



## 디렉토리 이용방법, CSV 이용방법 DB구축 2가지
DB = {}

 

<!DOCTYPE html>
<html>

<head>
	<title>
    Remote Jobs
  </title>
  <link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>

<body>
  <header>
    <h1>Remote Jobs</h1>
  </header>
  <main>
    <form action="/search">
      <h3>Search by term:</h3>
      <input placeholder="i.e python" required name="term" />
      <button type="submit">Find my job</button>
    </form>
  </main>
</body>

</html>

<!DOCTYPE html>
<html>

<head>
	<title>
    Remote Jobs
  </title>
  <link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>

<body>
  <header>
    <h1>Remote Jobs</h1>
    <h3>{{datalen}} {{language}} jobs found.</h3>
    <h5><a href="/export?term={{language}}" download="{{language}}.csv">Export to CSV</a></h5>
    <h6><a href="/">&larr; Go back</a></h6>
  </header>
  <main>
    {% for i in data %}
      <div>
        <h3>{{i[0]}}</h3>
        <h4>{{i[1]}}</h4>
        <a href="{{i[2]}}" target="_blank">Apply</a>
      </div>
      <hr />
    {% endfor %}
  </main>
</body>

</html>

이렇게 해서

클로링 데이터 csv에 저장

그리고 빠른 데이터 불러오기를위한 가짜 db를 구동

뭔가 밎밎해 보이지만 있을껀 다있다

이렇게 다운로드도 가능~!

블로그 이미지

Or71nH

,