티스토리 뷰

cCompanyInfoCollector

 

from pandas import DataFrame
from datetime import datetime, timedelta

from BExcel import cBExcel

class cCompanyInfoCollector:

    def __init__(self):
        pass

    def CollectCompanyInfo_SMINFO(self, fileName=None):

        if not fileName:
            fileName = "Companies.txt"

        date0 = datetime.now()
        dateName = date0.strftime("%Y%m%d")
        excelName = "Companies_SMINFO_"+dateName+".xlsx"


        file1 = open("companies.txt", 'r', encoding="utf-8")

        CompaniesDF = DataFrame()

        isCompanyInfo = False
        while True:
            line1 = file1.readline()
            if not line1:
                file1.close()
                break
            if isCompanyInfo:
                if line1.find("페이지로")>0:
                    isCompanyInfo = False
                    continue
                infos11 = line1.split("\t")
                print(infos11)
                info11 = {
                    'CompanyName': infos11[0].strip(),
                    'CEO': infos11[1].strip(),
                    'CompanyType': infos11[2].strip(),
                    'BusinessCategory': infos11[3].strip(),
                    'Address': infos11[4].strip()
                }

                if CompaniesDF.empty:
                    CompaniesDF = DataFrame([info11])
                else:
                    CompaniesDF = CompaniesDF.append(info11, ignore_index=True)

            if line1.startswith("기업명\t대표자명"):
                isCompanyInfo = True
                continue

        if CompaniesDF.empty: return

        excel1 = cBExcel()
        dataDF0 = excel1.LoadData(excelName)

        if dataDF0.empty:
            excel1.SaveData(CompaniesDF, excelName)
        else:
            dataDF0 = dataDF0.append(CompaniesDF, ignore_index=True)
            excel1.SaveData(dataDF0, excelName)
            print(dataDF0)

 

반응형