최대 1 분 소요
**[공지사항]** [개인적인 공부를 위한 내용입니다. 오류가 있을 수 있습니다.]
[파이썬(python) & 크롤링(crawling) - 021]
크롤링(Crawling) 기본 - 21
크롤링 실습
G마켓의 베스트상품 크롤링해보기
import requests, openpyxl
from bs4 import BeautifulSoup
excel_file = openpyxl.Workbook()
excel_sheet = excel_file.active
excel_sheet.append(['랭킹', '상품명', '판매가격', '상품상세링크', '판매처'])
excel_sheet.column_dimensions['B'].width = 80
excel_sheet.column_dimensions['C'].width = 20
excel_sheet.column_dimensions['D'].width = 80
excel_sheet.column_dimensions['E'].width = 20
res = requests.get('http://corners.gmarket.co.kr/Bestsellers?viewType=G&groupCode=G07')
soup = BeautifulSoup(res.content, 'html.parser')
bestlists = soup.select('div.best-list')
bestitems = bestlists[1]
products = bestitems.select('ul > li')
for index, product in enumerate(products):
title = product.select_one('a.itemname')
price = product.select_one('div.s-price > strong')
res_info = requests.get(title['href'])
soup_info = BeautifulSoup(res_info.content, 'html.parser')
provider_info = soup_info.select_one(' div.item-topinfo_headline > p > span.text__seller > a')
print (index + 1, title.get_text(), price.get_text(), title['href'], provider_info.get_text())
excel_sheet.append([index + 1, title.get_text(), price.get_text(), title['href'], provider_info.get_text()])
excel_sheet.cell(row=index+2 , column=4).hyperlink = title['href']
excel_file.save('GMARKET_BestFood100.xlsx')
excel_file.close()
댓글남기기