⚙️ Web Crawling
📌 다나와 홈페이지 실습(Selenium & BeautifulSoup)
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
path = "chromedriver 응용파일 경로"
s = Service(path)
driver = webdriver.Chrome(service=s)
base_url = "https://prod.danawa.com/list/?cate=112758&15main_11_02="
driver.get(base_url)
- 제조사 옵션의 확장 버튼 클릭 & 애플 옵션 선택
btn_path ='//*[@id="dlMaker_simple"]/dd/div[2]/button[1]'
WebDriverWait(driver, 5).until(ec.presence_of_element_located((By.XPATH, btn_path))).click()
apple_btn = '//*[@id="searchMaker1452"]'
driver.find_element(By.XPATH, apple_btn).click()
time.sleep(3)
- 애플 카테고리에 속한 노트북들의 상품명 정보 수집
soup = BeautifulSoup(driver.page_source, "html.parser")
temp = soup.find("div", class_="main_prodlist main_prodlist_list").find_all("li", class_="prod_item prod_layer")
for i_data in temp:
print(i_data.find("a", {"name":"productName"}).text.strip())
print("-"*100)
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
path = "구글 드라이버 경로"
s = Service(path)
driver = webdriver.Chrome(service=s)
base_url = "https://prod.danawa.com/list/?cate=112758&15main_11_02="
driver.get(base_url)
btn_path ='//*[@id="dlMaker_simple"]/dd/div[2]/button[1]'
WebDriverWait(driver, 5).until(ec.presence_of_element_located((By.XPATH, btn_path))).click()
apple_btn = '//*[@id="searchMaker1452"]'
driver.find_element(By.XPATH, apple_btn).click()
time.sleep(3)
soup = BeautifulSoup(driver.page_source, "html.parser")
temp = soup.find("div", class_="main_prodlist main_prodlist_list").find_all("li", class_="prod_item prod_layer")
for i_data in temp:
print(i_data.find("a", {"name":"productName"}).text.strip())
print("-"*100)
💡 중간중간 로딩을 기다리는 과정들이 꼭 필요하다.