티스토리 뷰
<Preparation>
- Chrome driver
- "pip install selenium"
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import datetime
from random import *
from weasyprint import HTML
class cNaverSearch:
EXECUTABLE_PATH = ".../ChromeDriver/chromedriver.exe"
def __init__(self):
pass
def GetLinkPage(self, linkAddress):
try:
webDriver = webdriver.Chrome(executable_path=self.EXECUTABLE_PATH)
webDriver.get(linkAddress)
except:
returnMessage = {
'title':'NONE',
'body':'NONE',
}
return returnMessage
validPage = True
try:
titleTag = webDriver.find_element_by_tag_name('title')
titleAtt = titleTag.get_attribute('innerHTML')
titleString = titleAtt.replace("\n", "")
print("[titleS]", titleString)
if len(titleString) > 10:
title1 = titleString[2:10]
else:
title1 = titleString
except:
titleString = "NONE"
title1 = str(randint(1,100))
print("[Excepted]-TITLE")
validPage = False
try:
bodyTag = webDriver.find_element_by_tag_name('body')
bodyString = bodyTag.text
#print("[body]", bodyString)
except:
print("[Excepted]-BODY")
validPage = False
filename1 = 'NONE'
if validPage:
date0 = datetime.datetime.now()
datename1 = date0.strftime("%Y%m%d%H%M%S")
filename1 = "./PDFDocus/"+title1+"_"+datename1+".pdf"
HTML(linkAddress).write_pdf(filename1)
print("PDF saved", linkAddress)
returnMessage = {
'title':titleString,
'body':bodyString,
'pdffilename':filename1
}
webDriver.close()
return returnMessage
def GetLinkPageInfo(self, linkAddress):
try:
webDriver = webdriver.Chrome(executable_path=self.EXECUTABLE_PATH)
print("[NAVER-2]", linkAddress)
webDriver.get(linkAddress)
except:
returnMessage = {
'title':'NONE',
'body':'NONE',
}
return returnMessage
validPage = True
try:
titleTag = webDriver.find_element_by_tag_name('title')
titleAtt = titleTag.get_attribute('innerHTML')
titleString = titleAtt.replace("\n", "")
#print("[titleS]", titleString)
except:
titleString = "NONE"
print("[Excepted]-TITLE")
validPage = False
try:
bodyTag = webDriver.find_element_by_tag_name('body')
bodyString = bodyTag.text
#print("[body]", bodyString)
except:
print("[Excepted]-BODY")
validPage = False
filename1 = 'Invalid'
returnMessage = {
'title':titleString,
'body':bodyString,
'pdffilename':filename1
}
webDriver.close()
return returnMessage
def StartNaverNewsSearch(self, keyWord):
webDriver = webdriver.Chrome(executable_path=self.EXECUTABLE_PATH)
webDriver.get('https://search.naver.com/search.naver?where=news&sm=tab_jum&query='+keyWord)
elem = webDriver.find_element_by_name('query')
elem.clear()
elem.send_keys(keyWord)
elem.send_keys(Keys.RETURN)
Items = webDriver.find_element_by_css_selector('.sc_page_inner')
items1 = Items.find_elements_by_tag_name('a')
count1 = 1
returnMessages = []
for item1 in items1:
linkAddress1 = item1.get_attribute('href')
print("[NAVER-1", linkAddress1)
Rsts1 = self.SearchEachPage(linkAddress1)
for Rst1 in Rsts1:
returnMessages.append(Rst1)
webDriver.close()
return returnMessages
def SearchEachPage(self, linkAddress):
NAVERSEARCHADDRESS_0 = "https://search.naver.com/search.naver"
webDriver = webdriver.Chrome(executable_path=self.EXECUTABLE_PATH)
webDriver.get(linkAddress)
print(linkAddress)
Items = webDriver.find_elements_by_css_selector('.news_tit')
count1 = 1
returnMessages = []
for item1 in Items:
linkAddress1 = item1.get_attribute('href')
title1 = item1.get_attribute('title')
print("[ADDRESS]", linkAddress1)
print("[TITLE]", title1)
pageInfo = self.GetLinkPageInfo(linkAddress1)
#print(pageInfo["body"])
message1 = {
'address':linkAddress1,
'title':title1,
'body':pageInfo['body']
}
returnMessages.append(message1)
webDriver.close()
return returnMessages
반응형
'SWDesk > App' 카테고리의 다른 글
JSON 배열 전송 및 수신 : Python to PHP (2) | 2021.01.13 |
---|---|
[Python] Example for Naver Search with Naver-API (0) | 2021.01.08 |
[Python] Example for Google Search with selenium (0) | 2021.01.04 |
[Python] Example for Daum Search with selenium (0) | 2021.01.02 |
Kotlin, Bluetooth 예제 (0) | 2020.12.20 |
반응형
250x250
최근에 올라온 글
최근에 달린 댓글
- Total
- Today
- Yesterday
링크
TAG
- Innovations&Hurdles
- 절연형
- 배프
- 전류
- 허들
- 오블완
- ServantClock
- badp
- 치매방지
- 티스토리챌린지
- image
- 전압
- 아두이노
- bilient
- DYOV
- Innovations
- 심심풀이치매방지기
- 치매
- 둎
- BSC
- 전압전류모니터링
- 혁신과허들
- 심심풀이
- 빌리언트
- Innovation&Hurdles
- Video
- Decorator
- Hurdles
- arduino
- 혁신
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | 3 | 4 | |||
5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |
글 보관함