from flask import Flask, render_template, request, redirect
app = Flask("SuperScrapper")
import requests

import os
os.system("clear")

base_url = "http://hn.algolia.com/api/v1"

# This URL gets the newest stories.
newurl = f"{base_url}/search_by_date?tags=story"

# This URL gets the most popular stories
popularurl = f"{base_url}/search?tags=story"

DataBagenew = []
innew = []
DataBagePopular = []
inpopular = []
DataBagecom = []


def populardataget():
    popular = requests.get(popularurl)
    populardic = popular.json()
    for i in populardic['hits']:
        if i['num_comments'] != 0 and i['title'] != None:
            if i['objectID'] not in inpopular:
                inpopular.append(i['objectID'])
                DataBagePopular.append(i)
    return DataBagePopular


def newdataget():
    new = requests.get(newurl)
    populardic = new.json()
    for i in populardic['hits']:
        if i['title'] != None:
            if i['objectID'] not in innew:
                innew.append(i['objectID'])
                DataBagenew.append(i)
    return DataBagenew


@app.route("/")
def home():
    order_by = request.args.get('order_by')
    print(order_by)
    if order_by == "new":
        if DataBagenew:
            DataBage = DataBagenew
        else:
            DataBage = newdataget()
        return render_template("mvp.html", data=DataBage, order=order_by)
    elif order_by == "popular":
        if DataBagePopular:
            DataBage = DataBagePopular
        else:
            DataBage = populardataget()
        return render_template("mvp.html", data=DataBage, order=order_by)
    else:
        if DataBagePopular:
            DataBage = DataBagePopular
        else:
            DataBage = populardataget()
        return render_template("mvp.html", data=DataBage, order=order_by)


@app.route("/<number>")
def comment(number):
    com = requests.get(f"{base_url}/items/{number}")
    comdic = com.json()
    return render_template("mvpcom.html", data = comdic)

def startgame():
  app.run(host="0.0.0.0")
/* MVP.css v1.6.2 - https://github.com/andybrewer/mvp */

:root {
    --border-radius: 5px;
    --box-shadow: 2px 2px 10px;
    --color: #118bee;
    --color-accent: #118bee15;
    --color-bg: #fff;
    --color-bg-secondary: #e9e9e9;
    --color-secondary: #920de9;
    --color-secondary-accent: #920de90b;
    --color-shadow: #f4f4f4;
    --color-text: #000;
    --color-text-secondary: #999;
    --font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
    --hover-brightness: 1.2;
    --justify-important: center;
    --justify-normal: left;
    --line-height: 1.5;
    --width-card: 285px;
    --width-card-medium: 460px;
    --width-card-wide: 800px;
    --width-content: 1080px;
}

/*
@media (prefers-color-scheme: dark) {
    :root {
        --color: #0097fc;
        --color-accent: #0097fc4f;
        --color-bg: #333;
        --color-bg-secondary: #555;
        --color-secondary: #e20de9;
        --color-secondary-accent: #e20de94f;
        --color-shadow: #bbbbbb20;
        --color-text: #f7f7f7;
        --color-text-secondary: #aaa;
    }
}
*/

/* Layout */
article aside {
    background: var(--color-secondary-accent);
    border-left: 4px solid var(--color-secondary);
    padding: 0.01rem 0.8rem;
}

body {
    background: var(--color-bg);
    color: var(--color-text);
    font-family: var(--font-family);
    line-height: var(--line-height);
    margin: 0;
    overflow-x: hidden;
    padding: 1rem 0;
}

footer,
header,
main {
    margin: 0 auto;
    max-width: var(--width-content);
    padding: 2rem 1rem;
}

hr {
    background-color: var(--color-bg-secondary);
    border: none;
    height: 1px;
    margin: 4rem 0;
}

section {
    display: flex;
    flex-wrap: wrap;
    justify-content: var(--justify-important);
}

section aside {
    border: 1px solid var(--color-bg-secondary);
    border-radius: var(--border-radius);
    box-shadow: var(--box-shadow) var(--color-shadow);
    margin: 1rem;
    padding: 1.25rem;
    width: var(--width-card);
}

section aside:hover {
    box-shadow: var(--box-shadow) var(--color-bg-secondary);
}

section aside img {
    max-width: 100%;
}

[hidden] {
    display: none;
}

/* Headers */
article header,
div header,
main header {
    padding-top: 0;
}

header {
    text-align: var(--justify-important);
}

header a b,
header a em,
header a i,
header a strong {
    margin-left: 0.5rem;
    margin-right: 0.5rem;
}

header nav img {
    margin: 1rem 0;
}

section header {
    padding-top: 0;
    width: 100%;
}

/* Nav */
nav {
    align-items: center;
    display: flex;
    font-weight: bold;
    justify-content: space-between;
    margin-bottom: 7rem;
}

nav ul {
    list-style: none;
    padding: 0;
}

nav ul li {
    display: inline-block;
    margin: 0 0.5rem;
    position: relative;
    text-align: left;
}

/* Nav Dropdown */
nav ul li:hover ul {
    display: block;
}

nav ul li ul {
    background: var(--color-bg);
    border: 1px solid var(--color-bg-secondary);
    border-radius: var(--border-radius);
    box-shadow: var(--box-shadow) var(--color-shadow);
    display: none;
    height: auto;
    left: -2px;
    padding: .5rem 1rem;
    position: absolute;
    top: 1.7rem;
    white-space: nowrap;
    width: auto;
}

nav ul li ul li,
nav ul li ul li a {
    display: block;
}

/* Typography */
code,
samp {
    background-color: var(--color-accent);
    border-radius: var(--border-radius);
    color: var(--color-text);
    display: inline-block;
    margin: 0 0.1rem;
    padding: 0 0.5rem;
}

details {
    margin: 1.3rem 0;
}

details summary {
    font-weight: bold;
    cursor: pointer;
}

h1,
h2,
h3,
h4,
h5,
h6 {
    line-height: var(--line-height);
}

mark {
    padding: 0.1rem;
}

ol li,
ul li {
    padding: 0.2rem 0;
}

p {
    margin: 0.75rem 0;
    padding: 0;
}

pre {
    margin: 1rem 0;
    max-width: var(--width-card-wide);
    padding: 1rem 0;
}

pre code,
pre samp {
    display: block;
    max-width: var(--width-card-wide);
    padding: 0.5rem 2rem;
    white-space: pre-wrap;
}

small {
    color: var(--color-text-secondary);
}

sup {
    background-color: var(--color-secondary);
    border-radius: var(--border-radius);
    color: var(--color-bg);
    font-size: xx-small;
    font-weight: bold;
    margin: 0.2rem;
    padding: 0.2rem 0.3rem;
    position: relative;
    top: -2px;
}

/* Links */
a {
    color: var(--color-secondary);
    display: inline-block;
    font-weight: bold;
    text-decoration: none;
}

a:hover {
    filter: brightness(var(--hover-brightness));
    text-decoration: underline;
}

a b,
a em,
a i,
a strong,
button {
    border-radius: var(--border-radius);
    display: inline-block;
    font-size: medium;
    font-weight: bold;
    line-height: var(--line-height);
    margin: 0.5rem 0;
    padding: 1rem 2rem;
}

button {
    font-family: var(--font-family);
}

button:hover {
    cursor: pointer;
    filter: brightness(var(--hover-brightness));
}

a b,
a strong,
button {
    background-color: var(--color);
    border: 2px solid var(--color);
    color: var(--color-bg);
}

a em,
a i {
    border: 2px solid var(--color);
    border-radius: var(--border-radius);
    color: var(--color);
    display: inline-block;
    padding: 1rem 2rem;
}

/* Images */
figure {
    margin: 0;
    padding: 0;
}

figure img {
    max-width: 100%;
}

figure figcaption {
    color: var(--color-text-secondary);
}

/* Forms */

button:disabled,
input:disabled {
    background: var(--color-bg-secondary);
    border-color: var(--color-bg-secondary);
    color: var(--color-text-secondary);
    cursor: not-allowed;
}

button[disabled]:hover {
    filter: none;
}

form {
    border: 1px solid var(--color-bg-secondary);
    border-radius: var(--border-radius);
    box-shadow: var(--box-shadow) var(--color-shadow);
    display: block;
    max-width: var(--width-card-wide);
    min-width: var(--width-card);
    padding: 1.5rem;
    text-align: var(--justify-normal);
}

form header {
    margin: 1.5rem 0;
    padding: 1.5rem 0;
}

input,
label,
select,
textarea {
    display: block;
    font-size: inherit;
    max-width: var(--width-card-wide);
}

input[type="checkbox"],
input[type="radio"] {
    display: inline-block;
}

input[type="checkbox"]+label,
input[type="radio"]+label {
    display: inline-block;
    font-weight: normal;
    position: relative;
    top: 1px;
}

input,
select,
textarea {
    border: 1px solid var(--color-bg-secondary);
    border-radius: var(--border-radius);
    margin-bottom: 1rem;
    padding: 0.4rem 0.8rem;
}

input[readonly],
textarea[readonly] {
    background-color: var(--color-bg-secondary);
}

label {
    font-weight: bold;
    margin-bottom: 0.2rem;
}

/* Tables */
table {
    border: 1px solid var(--color-bg-secondary);
    border-radius: var(--border-radius);
    border-spacing: 0;
    display: inline-block;
    max-width: 100%;
    overflow-x: auto;
    padding: 0;
    white-space: nowrap;
}

table td,
table th,
table tr {
    padding: 0.4rem 0.8rem;
    text-align: var(--justify-important);
}

table thead {
    background-color: var(--color);
    border-collapse: collapse;
    border-radius: var(--border-radius);
    color: var(--color-bg);
    margin: 0;
    padding: 0;
}

table thead th:first-child {
    border-top-left-radius: var(--border-radius);
}

table thead th:last-child {
    border-top-right-radius: var(--border-radius);
}

table thead th:first-child,
table tr td:first-child {
    text-align: var(--justify-normal);
}

table tr:nth-child(even) {
    background-color: var(--color-accent);
}

/* Quotes */
blockquote {
    display: block;
    font-size: x-large;
    line-height: var(--line-height);
    margin: 1rem auto;
    max-width: var(--width-card-medium);
    padding: 1.5rem 1rem;
    text-align: var(--justify-important);
}

blockquote footer {
    color: var(--color-text-secondary);
    display: block;
    font-size: small;
    line-height: var(--line-height);
    padding: 1.5rem 0;
}

css는 좋은 아져씨꺼 사용하였다

 

<!DOCTYPE html>
<html>

<head>
	<title>
    Nomad News |
    
      Popular
    
  </title>
  <link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>

<body>
  <header>
    <h1>Nomad News</h1>
    {% if order == "popular"%}
    <div>
      Order by:
      
        <strong>Popular</strong>
      
      |
      
        <a href="/?order_by=new">New</a>
      
    </div>
    {% elif order == "new"%}
    <div>
      Order by:
      
        <a href="/?order_by=popular">Popular</a>
      
      |
        
        <strong>New</strong>
      
    </div>
    {% else %}
    <div>
      Order by:
      
        <strong>Popular</strong>
      
      |
      
        <a href="/?order_by=new">New</a>
      
    </div>
    
    {% endif %}
  </header>
  <main>
    {% for i in data%}
      <div>
        <div>
          <a href="{{i["objectID"]}}">
            <h3>
              {{i["title"]}}
            </h3>
          </a> 
          (<a href="{{i["url"]}}" target="blanl">{{i["url"]}}
          </a>)
        </div>
        <div>
          {{i["points"]}} points | By: {{i["author"]}} | {{i["num_comments"]}} comments
        </div>
      </div>
      <hr />
    {% endfor %}

  </main>
</body>

</html>

메인 html이라고 볼수 있다

<!DOCTYPE html>
<html>

<head>
	<title>
    Nomad News | Steve Jobs has passed away.
  </title>
  <link href="https://andybrewer.github.io/mvp/mvp.css" rel="stylesheet"></link>
</head>

<body>
  <header>
    <h1>{{data['title']}}</h1>
    <div>
      {{data['points']}} points | By {{data['author']}} | <a href="{{data['url']}}" target="_blank">{{data['url']}}</a>
    </div>
  </header>
  <main>
    {% for i in data['children']%}
      <div>
          <strong>{{i['author']}}:</strong>
          <p type="html">{{i['text']|safe}}</p>        
      </div>
      <hr />
    {% endfor %}    
  </main>
</body>

</html>

서브 html이라고 볼수 있다

이번꺼는 잘 넘어갔고 

html에 값을 넘기는 방법과 파이썬으로 사용하는 방법 이용하여 만들면 좋다~!

블로그 이미지

Or71nH

,

쉬는날~~ 
그래서 동적 크롤링을 구연해봣다


자바 스크립트 inputtext 안에 있는 결과값 가져오기~!!!!



import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
print(1)

driver = webdriver.Chrome(options=chrome_options)
print(2)
driver.get("https://transferwise.com/gb/currency-converter/krw-to-usd-rate?amount=50")
print(3)
tag = driver.find_element_by_xpath("//input[@id='cc-amount-to']")
print(tag)
print (tag.get_attribute('value'))
print(4)

 

 

 

 

 

 

이렇게 있는 데
여기서 저 구동괸 결과값이 "value  data-hj-whitelist"
움 안보임.. 
정적으로는 불가능 js 구동후 값이 value에 들어감
그래서 

동적 코딩 시작

설치부터 
https://webnautes.tistory.com/1184

그런데 문제가 생겻다!!

엉?~??
sudo  가 안먹힌다 T^T 

정말 열심히 찾다가 
https://go-madhat.github.io/chrome-headless/

from selenium import webdriver

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome("./chromedriver", chrome_options = chrome_options)

driver.get('http://google.com')

driver.quit()


repl에 있는 chrome 드라이버를 자동 할당하여 사용하는 코드를 찾았다!! 
이렇게 저렇게 하다보니 

import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
print(1)

driver = webdriver.Chrome(options=chrome_options)
print(2)
driver.get("https://transferwise.com/gb/currency-converter/krw-to-usd-rate?amount=50")
print(3)

오오ㅗㅇ~!!!! 

되기 시작!!~!~!
드디어 repl안에서~!! 동적 크롤링 성공~!~!!~
오오옹오~!!~

문제는 이것이엿다
아!!!!!! 이거 벨류 어케 가져와?~!??~!?~!

난 별의별짓을 다하면서

computer : 놉~!

computer : 놉~!

computer : 옛다~! 
나 : ??????????????

computer : 놉~!

으으ㅏ아아아앙~!!~!~
그러던중

엇!@@!!@@!@!

 

오ㅗ오오오~!~!!~~!!~!ㅇ오오오오~!~!!~!~
됫어~!!~~!
이렇게 하게 되었다~!

블로그 이미지

Or71nH

,

동적 크롤링이 좋은데 동적 시간안에 못해서 정적 

어제 함수를 좀 응용해서 불러왔다 파일 임포트 참조~

import os
import requests
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

from bs4 import BeautifulSoup
from babel.numbers import format_currency
from day5 import play5

os.system("clear")



def comefileday5():
  biglist = play5.firstwork()
  return biglist

def finding(biglist):  
  country = input("#: ")
  if country.isdecimal():
    number = int(country)
    if number <= len(biglist):
      print(biglist[number][1])
      return number
    else:
      print("Choose a number from the list.")
      return finding(biglist)
  else:
    print("That wasn\'t a number.")
    return finding(biglist)

def money(papper1,papper2):  
  print(f"\nHow many {papper1} do you want to convert to {papper2}?")
  moneyback = input()
  if moneyback.isdecimal():
    number = int(moneyback)
    return number
  else:
    print("That wasn\'t a number.")
    return money(papper1,papper2)

  

def extractmoney(money1, money2, mymoney):
  result = requests.get(f"https://transferwise.com/gb/currency-converter/{money1}-to-{money2}-rate?amount={mymoney}")
  soup = BeautifulSoup(result.text,"html.parser")
  change = soup.find_all('input',{"id":"rate"})
  
  listdata = []

  for form in change:
    listdata.append(form)
  listsimple=str(listdata[0]).split()
  for i in listsimple:
    if 'value' == i[0:5]:
      moneychange = float(i[7:-3])
      done = float(mymoney)*moneychange
      print(format_currency(float(mymoney), money1.upper(), locale="ko_KR"),end= '')
      print(' is ',end='')
      print(format_currency(done, money2.upper(), locale="ko_KR"))

  



def startgame():  
  print("Welcome to CurrencyConvert PRO 2000")
  biglist = comefileday5()
  print("\nWhere are you from? Choose a country by number.\n")
  country_num1 = biglist[finding(biglist)][3]
  print("\nNow choose another country.\n")
  country_num2 = biglist[finding(biglist)][3]
  mymoneyback = money(country_num1.upper(),country_num2.upper())
  extractmoney(country_num1.lower(),country_num2.lower(),mymoneyback)


"""
Use the 'format_currency' function to format the output of the conversion
format_currency(AMOUNT, CURRENCY_CODE, locale="ko_KR" (no need to change this one))


print(format_currency(5000, "KRW", locale="ko_KR"))
"""

 

다음첼린지할때 이쁘게 해야징~

블로그 이미지

Or71nH

,

정적 크롤링 하기~!

import requests
from bs4 import BeautifulSoup


def finding(biglist):
    country = input("#: ")
    if country.isdecimal():
        number = int(country)
        if number <= len(biglist):
            print("You chose", biglist[number][1])
            print("The currency code is", biglist[number][3].upper())
        else:
            print("Choose a number from the list.")
            return finding()
    else:
        print("That wasn\'t a number.")
        return finding()


def findurlcode():
    indeed_result = requests.get('https://www.iban.com/currency-codes')
    indeed_soup = BeautifulSoup(indeed_result.text, "html.parser")
    datalist = indeed_soup.find(
        "table", {"class": "table table-bordered downloads tablesorter"})
    pages = datalist.find_all('tr')
    biglist = []
    count = 0
    for i in pages[1:]:
        minilist = []
        minilist.append(count)
        for k in i:
            if k != "\n":
                data = str(k.string)
                if data.isdecimal():
                    minilist.append(int(data))
                else:
                    minilist.append(data[0] + data[1:].lower())
        if type(minilist[4]) == type(0):
            biglist.append(minilist)
            count += 1
    return biglist


def firstwork():
    biglist = findurlcode()
    for i in biglist:
        print("#", i[0], i[1])
    return biglist


def startgame():
    print("Hello! Please choose select a country by number:")
    biglist = firstwork()
    finding(biglist)

 

 

내일꺼랑 연관있어 함수 정리 잘해놓기~

 

블로그 이미지

Or71nH

,

퀴즈 

1. 스플릿 하기

2.url 만들기

3.url 검사

단!!! com 만되게 해놧다 
co.kr은 안됨!~!!
나중에 정규식 고쳐야한다~!

 

import requests
from bs4 import BeautifulSoup


def finding(biglist):
    country = input("#: ")
    if country.isdecimal():
        number = int(country)
        if number <= len(biglist):
            print("You chose", biglist[number][1])
            print("The currency code is", biglist[number][3].upper())
        else:
            print("Choose a number from the list.")
            return finding()
    else:
        print("That wasn\'t a number.")
        return finding()


def findurlcode():
    indeed_result = requests.get('https://www.iban.com/currency-codes')
    indeed_soup = BeautifulSoup(indeed_result.text, "html.parser")
    datalist = indeed_soup.find(
        "table", {"class": "table table-bordered downloads tablesorter"})
    pages = datalist.find_all('tr')
    biglist = []
    count = 0
    for i in pages[1:]:
        minilist = []
        minilist.append(count)
        for k in i:
            if k != "\n":
                data = str(k.string)
                if data.isdecimal():
                    minilist.append(int(data))
                else:
                    minilist.append(data[0] + data[1:].lower())
        if type(minilist[4]) == type(0):
            biglist.append(minilist)
            count += 1
    return biglist


def firstwork():
    biglist = findurlcode()
    for i in biglist:
        print("#", i[0], i[1])
    return biglist


def startgame():
    print("Hello! Please choose select a country by number:")
    biglist = firstwork()
    finding(biglist)
블로그 이미지

Or71nH

,