티스토리 뷰
<Preparation>
- Chrome driver
- "pip install selenium"
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import datetime
from random import *
from weasyprint import HTML
class cDaumSearch:
EXECUTABLE_PATH = ".../ChromeDriver/chromedriver.exe"
def __init__(self):
pass
def GetLinkPage(self, linkAddress):
try:
webDriver = webdriver.Chrome(executable_path=self.EXECUTABLE_PATH)
webDriver.get(linkAddress)
except:
returnMessage = {
'title':'NONE',
'body':'NONE',
}
return returnMessage
validPage = True
try:
titleTag = webDriver.find_element_by_tag_name('title')
titleAtt = titleTag.get_attribute('innerHTML')
titleString = titleAtt.replace("\n", "")
print("[titleS]", titleString)
if len(titleString) > 10:
title1 = titleString[2:10]
else:
title1 = titleString
except:
titleString = "NONE"
title1 = str(randint(1,100))
print("[Excepted]-TITLE")
validPage = False
try:
bodyTag = webDriver.find_element_by_tag_name('body')
bodyString = bodyTag.text
#print("[body]", bodyString)
except:
print("[Excepted]-BODY")
validPage = False
filename1 = 'NONE'
if validPage:
date0 = datetime.datetime.now()
datename1 = date0.strftime("%Y%m%d%H%M%S")
filename1 = "./PDFDocus/"+title1+"_"+datename1+".pdf"
HTML(linkAddress).write_pdf(filename1)
print("PDF saved", linkAddress)
returnMessage = {
'title':titleString,
'body':bodyString,
'pdffilename':filename1
}
webDriver.close()
return returnMessage
def GetLinkPageInfo(self, linkAddress):
try:
webDriver = webdriver.Chrome(executable_path=self.EXECUTABLE_PATH)
print("[DAUM-2]", linkAddress)
webDriver.get(linkAddress)
except:
returnMessage = {
'title':'NONE',
'body':'NONE',
}
return returnMessage
validPage = True
try:
titleTag = webDriver.find_element_by_tag_name('title')
titleAtt = titleTag.get_attribute('innerHTML')
titleString = titleAtt.replace("\n", "")
#print("[titleS]", titleString)
except:
titleString = "NONE"
print("[Excepted]-TITLE")
validPage = False
try:
bodyTag = webDriver.find_element_by_tag_name('body')
bodyString = bodyTag.text
#print("[body]", bodyString)
except:
print("[Excepted]-BODY")
validPage = False
filename1 = 'Invalid'
returnMessage = {
'title':titleString,
'body':bodyString,
'pdffilename':filename1
}
webDriver.close()
return returnMessage
def StartDaumNewsSearch(self, keyWord):
#LINK_DAUMNEWS = "https://search.daum.net/search?w=news&DA=PGD&enc=utf8&cluster=y&cluster_page=1&q="
LINK_DAUM = "https://www.daum.net"
webDriver = webdriver.Chrome(executable_path=self.EXECUTABLE_PATH)
#webDriver.get(LINK_DAUMNEWS+keyWord+"&p=1")
webDriver.get(LINK_DAUM)
elem1 = webDriver.find_element_by_css_selector('.tf_keyword')
elem1.clear()
elem1.send_keys(keyWord)
elem1.send_keys(Keys.RETURN)
elem2 = webDriver.find_element_by_css_selector('.tab_news')
elem2.click()
Items = webDriver.find_elements_by_css_selector('.f_link_b')
count1 = 1
returnMessages = []
for item1 in Items:
linkAddress1 = item1.get_attribute('href')
print("[DAUM-1]", linkAddress1)
pageInfo = self.GetLinkPageInfo(linkAddress1)
#print(pageInfo["body"])
message1 = {
'address':linkAddress1,
'title':pageInfo['title'],
'body':pageInfo['body']
}
returnMessages.append(message1)
Items = webDriver.find_element_by_css_selector('.paging_comm')
items1 = Items.find_elements_by_tag_name('a')
for item1 in items1:
linkAddress1 = item1.get_attribute('href')
Rsts1 = self.SearchEachPage(linkAddress1)
for Rst1 in Rsts1:
returnMessages.append(Rst1)
webDriver.close()
return returnMessages
def SearchEachPage(self, linkAddress):
webDriver = webdriver.Chrome(executable_path=self.EXECUTABLE_PATH)
webDriver.get(linkAddress)
print(linkAddress)
Items = webDriver.find_elements_by_css_selector('.f_link_b')
count1 = 1
returnMessages = []
for item1 in Items:
linkAddress1 = item1.get_attribute('href')
print("[ADDRESS]", linkAddress1)
pageInfo = self.GetLinkPageInfo(linkAddress1)
#print(pageInfo["body"])
message1 = {
'address':linkAddress1,
'title':pageInfo['title'],
'body':pageInfo['body']
}
returnMessages.append(message1)
webDriver.close()
return returnMessages
반응형
'SWDesk > App' 카테고리의 다른 글
[Python] Example for Naver Search with selenium (0) | 2021.01.05 |
---|---|
[Python] Example for Google Search with selenium (0) | 2021.01.04 |
Kotlin, Bluetooth 예제 (0) | 2020.12.20 |
[Python] NAS와 파일 공유하기(1) (0) | 2020.11.06 |
[Python] 파일에서 데이터 읽기 (0) | 2020.10.25 |
반응형
250x250
최근에 올라온 글
최근에 달린 댓글
- Total
- Today
- Yesterday
링크
TAG
- 전압
- arduino
- Innovation&Hurdles
- bilient
- 치매방지
- ServantClock
- badp
- 둎
- 혁신
- DYOV
- 절연형
- 심심풀이
- 전류
- Video
- 오블완
- 전압전류모니터링
- 심심풀이치매방지기
- Innovations
- 배프
- 아두이노
- image
- Hurdles
- 빌리언트
- 혁신과허들
- 허들
- 치매
- Decorator
- 티스토리챌린지
- BSC
- Innovations&Hurdles
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | 3 | 4 | |||
5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |
글 보관함