티스토리 뷰
Filename : AETronix22B.py
Functions
- Collect images
- Set page-no
- Collect product infos
- Collect categegory infos
[Collect Images]
def CollectImages(self, sheetName=None, folderName=None):
fileName = "file10.xlsx"
folderPath = "./Images"
if not os.path.exists(folderPath):
os.mkdir(folderPath)
if not sheetName: sheetName = "LiFt"
if not folderName: folderName = "LightingFittings"
folderPathName = folderPath + "/" + folderName
if not os.path.exists(folderPathName):
os.mkdir(folderPathName)
goodsInfoDF = bExcel.ReadFile(fileName, sheetName)
if goodsInfoDF.empty: return;
goodsInfos = goodsInfoDF.to_dict('records')
imageURLs = []
for goodsInfo in goodsInfos:
imageURL = goodsInfo.get('ImageURL')
if not imageURL: continue;
if (not imageURLs) and (imageURL in imageURLs): continue;
index11 = imageURL.rfind("/")
index12 = imageURL.rfind(".")
imageName = imageURL[(index11+1):]
imagePathName = folderPathName + "/" + imageName
if os.path.exists(imagePathName): continue
imageURLs.append(imageURL)
urllib.request.urlretrieve(imageURL, imagePathName)
[Select PageNo.]
def SetPage(self, webDriver, pageTagInfo, pageNo):
targetTag11 = "AE43"
elem11s = pageTagInfo.GetTargetElem(webDriver, targetTag11)
tag_a = {
'TagType': "html",
'TagValue': "a"
}
for elem11 in elem11s:
pageNo11 = self.WebControl.GetTextinElement(elem11)
if not pageNo11: continue;
try:
if pageNo==int(pageNo11):
self.WebControl.TreatClick(webDriver, elem11, 10)
return pageNo
except: continue;
return None
[Collect Product Infos]
def GetProductInfo(self, pageURL):
webDriver = self.WebControl.GetWebDriver()
webDriver = self.WebControl.AccessPage(webDriver, pageURL, 5)
pageTagInfo = cPageTagInfo(self.SheetName_TagInfo)
tag_title = "AE28"
tag_price = "AE32"
tag_ids = "AE30"
tag_values = "AE31"
tag_alert = "AE33"
tag_image = "AE25"
elem_image = pageTagInfo.GetTargetElem(webDriver, tag_image)
imageURL = elem_image.get_attribute('src')
#print("[ImageURL]", imageURL)
elem_title = pageTagInfo.GetTargetElem(webDriver, tag_title)
elem_price = pageTagInfo.GetTargetElem(webDriver, tag_price)
elem_ids = pageTagInfo.GetTargetElem(webDriver, tag_ids)
elem_values = pageTagInfo.GetTargetElem(webDriver, tag_values)
title1 = self.WebControl.GetTextinElement(elem_title)
rst1 = {
'Title': title1,
'ImageURL': imageURL
}
price1 = self.WebControl.GetTextinElement(elem_price)
if not price1: rst1['Price'] = price1
try:
elem_alert= pageTagInfo.GetTargetElem(webDriver, tag_alert)
alert1 = self.WebControl.GetTextinElement(elem_alert)
if alert1: rst1['Alert'] = alert1
except:
pass
index1 = 0
for elem_id in elem_ids:
elemName = self.WebControl.GetTextinElement(elem_id)
elemValue = self.WebControl.GetTextinElement(elem_values[index1])
if not elemName: rst1['Remarks'] = elemValue
else: rst1[elemName] = elemValue
index1 += 1
#print("[Rst]", rst1)
webDriver.quit()
return rst1
[Collect Category Infos]
def GetMidCatData(self, midCatInfo):
self.SubCatNames = []
self.SubCatInfos = {}
self.ProductInfoDF = DataFrame()
url11 = midCatInfo.get('CatURL')
sheetName = midCatInfo.get('SheetName')
midCatName = midCatInfo.get('MidCatName')
tag_a = {
'TagType': "html",
'TagValue': "a"
}
webDriver = self.WebControl.GetWebDriver(False)
webDriver = self.WebControl.AccessPage(webDriver, url11, 10)
self.PageTagInfo.ClearPageElems()
targetTag11 = "AE10"
elem11s = self.PageTagInfo.GetTargetElem(webDriver, targetTag11)
searchNext = False
for elem11 in elem11s:
elem11a = self.WebControl.GetElement(elem11, tag_a)
subCatLink = elem11a.get_attribute('href')
subCatName = self.WebControl.GetTextinElement(elem11a)
if not subCatName: continue;
print(subCatName, " : ", subCatLink)
subCatInfo = {
subCatName: subCatLink
}
productNumber = self.GetProductInfos(subCatLink, subCatName)
if self.Is4ProductInfo: bExcel.WriteFile(self.ProductInfoDF, "./File16.xlsx", sheetName)
midCatInfo['SubCatName'] = subCatName
if productNumber: midCatInfo['ProductNumber'] = productNumber
else: midCatInfo['ProductNumber'] = 0
print(midCatInfo)
self.MidCatInfoDF = self.MidCatInfoDF.append(midCatInfo, ignore_index=True)
webDriver.quit()
반응형
'SWDesk' 카테고리의 다른 글
[Python] 사진에서 사람의 눈과 코, 입 등 특징점을 추출하는 코드 (0) | 2023.03.05 |
---|---|
[Python] Collecting Company Data (0) | 2023.01.19 |
[Python] Compare 'Dict' (0) | 2022.10.11 |
[Python] DataFrame과 dict 합치기 (0) | 2022.10.10 |
[Python] TTS(text to sound) Example (0) | 2022.10.03 |
반응형
250x250
최근에 올라온 글
최근에 달린 댓글
- Total
- Today
- Yesterday
링크
TAG
- Innovations&Hurdles
- image
- Decorator
- arduino
- 혁신과허들
- 아두이노
- Innovations
- DYOV
- badp
- 전압
- 오블완
- Innovation&Hurdles
- 빌리언트
- 치매방지
- 심심풀이
- 혁신
- 전압전류모니터링
- 둎
- Hurdles
- 허들
- Video
- 티스토리챌린지
- BSC
- 심심풀이치매방지기
- 배프
- 치매
- bilient
- ServantClock
- 전류
- 절연형
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 23 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 |
글 보관함