SWDesk
[Python] 기업 정보 수집 프로그램 소스
inhae
2022. 6. 4. 18:46
cCompanyInfoCollector
from pandas import DataFrame
from datetime import datetime, timedelta
from BExcel import cBExcel
class cCompanyInfoCollector:
def __init__(self):
pass
def CollectCompanyInfo_SMINFO(self, fileName=None):
if not fileName:
fileName = "Companies.txt"
date0 = datetime.now()
dateName = date0.strftime("%Y%m%d")
excelName = "Companies_SMINFO_"+dateName+".xlsx"
file1 = open("companies.txt", 'r', encoding="utf-8")
CompaniesDF = DataFrame()
isCompanyInfo = False
while True:
line1 = file1.readline()
if not line1:
file1.close()
break
if isCompanyInfo:
if line1.find("페이지로")>0:
isCompanyInfo = False
continue
infos11 = line1.split("\t")
print(infos11)
info11 = {
'CompanyName': infos11[0].strip(),
'CEO': infos11[1].strip(),
'CompanyType': infos11[2].strip(),
'BusinessCategory': infos11[3].strip(),
'Address': infos11[4].strip()
}
if CompaniesDF.empty:
CompaniesDF = DataFrame([info11])
else:
CompaniesDF = CompaniesDF.append(info11, ignore_index=True)
if line1.startswith("기업명\t대표자명"):
isCompanyInfo = True
continue
if CompaniesDF.empty: return
excel1 = cBExcel()
dataDF0 = excel1.LoadData(excelName)
if dataDF0.empty:
excel1.SaveData(CompaniesDF, excelName)
else:
dataDF0 = dataDF0.append(CompaniesDF, ignore_index=True)
excel1.SaveData(dataDF0, excelName)
print(dataDF0)
반응형