티스토리 뷰

 

 

class cBTiNC1:
    
    def __init__(self):
        pass


    def LoadnSaveArticles(self, cafeURL=None):
        webDriver = self.Login()
        if cafeURL:
            webDriver.get(cafeURL)
        else:
            webDriver.get(self.CafeURL)
        time.sleep(2)
        elem1 = webDriver.find_element_by_css_selector('.cafe-search')
        elem11 = elem1.find_element_by_tag_name('input')
        elem11.send_keys(self.KeyWord)
        elem12 = elem1.find_element_by_tag_name('button')
        elem12.send_keys(Keys.RETURN)
        time.sleep(2)

        webDriver = self.GetContentPage(webDriver)
        #print(webDriver.page_source)
        listCount = 0
        excel1 = cBExcel()

        while True:
            if listCount >= 10: break

            try:
                list1 = webDriver.find_element_by_css_selector('.prev-next')
            except:
                break
            lists11 = list1.find_elements_by_tag_name('a')
            listNumber = len(lists11)
            listIndex = 0 # 0
            while True:
                try:
                    list1 = webDriver.find_element_by_css_selector('.prev-next')
                    lists11 = list1.find_elements_by_tag_name('a')
                except:
                    break
                if listIndex >= listNumber:
                    break
                list2 = lists11[listIndex]
                try:
                    listNo = int(list2.text)
                except: 
                    if list2.text=='다음':
                        list2.click()
                        time.sleep(2)
                        listIndex = 0
                        break
                    else:
                        print("[???] UNKNOWN ........???")
                        listIndex += 1
                        continue
                    continue
                list2.click()
                time.sleep(2)
                listIndex += 1
                self.LoadArticles(webDriver)
            fileName = "Articles_"+str(listCount)+'.xlsx'
            excel1.SaveData(self.ArticleList, fileName)
            print("[ArticleList]", self.ArticleList)
            self.ArticleList = DataFrame()
            listCount += 1

    def LoadArticles(self, webDriver):
        #elem1 = webDriver.find_element_by_id('content-area')
        #elem1 = webDriver.find_element_by_css_selector('.article-board')
        webDriver = self.GetContentPage(webDriver)

        elems11 = webDriver.find_elements_by_css_selector('.td_article')
        elemNumber = len(elems11)
        elemIndex = 0

        while True:
            if elemIndex >= elemNumber:
                break
            webDriver = self.GatherArtices(webDriver, elemIndex)
            if not webDriver:
                return None
            #print(webDriver.page_source)
            webDriver = self.GetContentPage(webDriver)
            #print(webDriver.page_source)

            elemIndex += 1
        pass



    # Test Completed .....
    def GatherArtices(self, webDriver, elemIndex):
        try:
            elems11 = webDriver.find_elements_by_css_selector('.td_article')
            elem11_ = elems11[elemIndex].find_element_by_css_selector('.article')

            elem11_.send_keys(Keys.CONTROL+"\n")
            time.sleep(2)
        except Exception as e:
            print("[Exception]", e)
            return None
        webDriver.switch_to.window(webDriver.window_handles[1])
        res1 = self.GetArticle(webDriver)
        if not res1:
            pass
        else:
            if self.ArticleList.empty:
                self.ArticleList = DataFrame(res1)
            else:
                res11 = DataFrame(res1)
                self.ArticleList = self.ArticleList.append(res11, ignore_index=True)
            #print(self.ArticleList)
        webDriver.close()
        webDriver.switch_to.window(webDriver.window_handles[0])

        return webDriver
            
    def GetArticle(self, webDriver):
        
        webDriver = self.GetContentPage(webDriver)

        title1 = None
        try:
            elem1 = webDriver.find_element_by_css_selector('.ArticleTitle')
            title1 = elem1.text
            elem2 = webDriver.find_element_by_css_selector('.nick_box')
            writer1 = elem2.text
            elem3 = webDriver.find_element_by_css_selector('.article_info')
            elem31 = elem3.find_element_by_css_selector('.date')
            date1 = elem31.text
            elem4 = webDriver.find_element_by_css_selector('.se-main-container')
            elems41 = elem4.find_elements_by_css_selector('.se-text')
            text1 = ""
            for elem41 in elems41:
                text1 += elem41.text
        except:
            print("[???] GetArticle :", title1)
            return None

        try:
            comments = []
            commentString = ""
            elem5 = webDriver.find_element_by_css_selector('.comment_list')
            elems51 = elem5.find_elements_by_css_selector('.CommentItem')
            for elem51 in elems51:
                elem51_ = elem51.find_element_by_css_selector('.comment_text_box')
                comment1 = elem51_.text
                commentString += comment1+"n|"
                comments.append(comment1)
        except:
            pass

        rst1 = {
            'Writer': [writer1],
            'Date': [date1],
            'Text': [text1],
            'Comments': [commentString]
        }

        return rst1

    def Login(self, params=None):
        # params = {'LoginID', 'LoginPW', 'CafeName', 'Keyword'}
        print(" Login ......................", self.UserID)
        webDriver = webdriver.Chrome(executable_path=cConstants.PATH_CHROMEDRIVER)
        webDriver.get(self.HomepageURL)

        classNames = self.Class_LoginButton
        elem1 = webDriver.find_element_by_css_selector(classNames[0])
        if classNames[1]:
            elem11 = elem1.find_element_by_css_selector(classNames[1])
        else:
            elem11 = elem1
        elem11.send_keys(Keys.RETURN)
        time.sleep(1)

        classNames = self.Class_LoginID
        elem1 = webDriver.find_element_by_css_selector(classNames[0])
        if classNames[1]:
            elem11 = elem1.find_element_by_tag_name(classNames[1])
        else:
            elem11 = elem1
        elem11.send_keys(self.UserID)
        time.sleep(3)

        classNames = self.Class_LoginPW
        elem1 = webDriver.find_element_by_css_selector(classNames[0])
        if classNames[1]:
            elem11 = elem1.find_element_by_tag_name(classNames[1])
        else:
            elem11 = elem1
        elem11.send_keys(self.UserPW)
        time.sleep(3)
#        elem11.send_keys(Keys.RETURN)

        classNames = self.Class_LoginSubmit
        elem1 = webDriver.find_element_by_css_selector(classNames[0])
        if classNames[1]:
            elem11 = elem1.find_element_by_css_selector(classNames[1])
        else:
            elem11 = elem1
        #elem11.send_keys(Keys.RETURN)
        webDriver.find_element_by_xpath('//*[@id="frmNIDLogin"]/fieldset/input').click()
        #elem11.click()
        time.sleep(30)

        return webDriver

 

반응형

'SWDesk' 카테고리의 다른 글

[Python] Download Images from Web  (0) 2021.04.24
series2supervised 변환 예제  (0) 2021.04.10
DataFrame 예제  (0) 2021.03.18
Database Backup  (0) 2021.03.11
[Python] 네이버 카페 글쓰기 프로그램 소스  (0) 2021.03.10
반응형
250x250
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2025/02   »
1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28
글 보관함