티스토리 뷰

SWDesk

[Python Test] Manipulating DataFrame

inhae 2022. 2. 14. 11:54
  • columns
  • info()
  • shape
  • describe()
  • max()
  • isnull()
  • sum()
  • duplicated()
  • drop_dupicates()
  • drop()
  • index
  • append()
  • update()
  • str.slice()
  • assign()
  • to_numeric()
def Test_AnalyzeEPowerData1(): # 2022.02.10
    from BOpenData import cBOpenData
    import pandas as pd

    openData = cBOpenData();
    param1 = {
        'RequestType': "LoadEPowerDatas",
        'StartDate': "20220209000000",
        #'CheckQuery': "Y",
        'DataNumber': 300
    }
    ePowerData0_ = openData.LoadEPowerDatasfromServer(param1)
    #print(ePowerData0)
    #tail0 = ePowerData0.tail() # last 5 rows
    columns0 = ePowerData0_.columns
    ePowerData0 = pd.DataFrame()
    for column0 in columns0:
        ePowerData0[column0] = pd.to_numeric(ePowerData0_[column0])
    print("<Type>")
    print(ePowerData0.info())
    print("<Shape>")
    print(ePowerData0.shape)
    print("<Statistics>")
    print(ePowerData0.describe())
    print(ePowerData0['suppAbility'].describe())
    print("<Max>")
    max0 = ePowerData0.max();
    print(max0['suppAbility'].dtype)

    print("[EPowerData1]")
    ePowerData1 = ePowerData0.loc[:, ['baseDatetime', 'currPwrTot', 'suppAbility']]
    print(ePowerData1)

    #결측치를 갖는 행 제거
    null11 = ePowerData1.isnull().sum()
    print("<null count>")
    print(null11)
    ePowerData11 = ePowerData1.dropna()
    # 중복치 확인 및 처리
    overlapped11 = ePowerData1.duplicated().sum()
    print("<Overlapped>")
    print(overlapped11)
    ePowerData12 = ePowerData1.drop_duplicates()
    #ePowerData12 = ePowerData1.drop_duplicates(inplace=True)

    # index 기준 행 삭제
    ePowerData13 = ePowerData1.drop([1, 3])
    #ePowerData13 = ePowerData1.drop([ePowerData1.index[1], ePowerData1.index[3]])
    print(ePowerData13)

    # 열 조건에 따라 행 삭제
    indexes14 = ePowerData1[ePowerData1['currPwrTot']<67000].index
    indexes14_ = ePowerData1[(ePowerData1['currPwrTot']<67000) | (ePowerData1['currPwrTot']>70000)].index # | 'OR' operation
    ePowerData14 = ePowerData1.drop(indexes14)
    print("<EPowerData-14>")
    print(ePowerData14)

    # Dict에서 행 추가
    dict15 = {
        'baseDatetime': 20220210121212,
        #'currPwrTot': 80000.0,
        'suppAbility': 100000.0
    }
    #ePowerData15 = ePowerData1.append(dict15, ignore_index=True)
    ePowerData15_ = pd.DataFrame(dict15, index=['test01'])
    ePowerData15 = ePowerData1.append(ePowerData15_)
    print("<EPowerData-15>")
    print(ePowerData15)
    
    # 행 추가 방법2
    data16 = [(20220202001010, 345465, 33653476), (20220203494949, 23452308, 384530)]
    ePowerData16_ = pd.DataFrame(data16, columns=ePowerData1.columns, index=['haha', 'haha2'])
    ePowerData16 = ePowerData1.append(ePowerData16_)
    print("<EPowerData-16>")
    print(ePowerData16)

    # Operation

    ePowerData171 = pd.DataFrame()
    ePowerData171 = ePowerData171.append(ePowerData1[:][0:3], ignore_index=True)
    ePowerData172 = pd.DataFrame()
    ePowerData172 = ePowerData172.append(ePowerData1[:][2:5], ignore_index=True)
    ePowerData17 = ePowerData171 - ePowerData172
    print("<EPowerData-17>")
    print(ePowerData17)

    ePowerData18 = pd.DataFrame()
    #ePowerData18 = ePowerData1 # --> [!] Same variable
    #ePowerData18 = ePowerData18.update(ePowerData1) # --> [?] None type index
    ePowerData18 = ePowerData18.append(ePowerData1)
    ePowerData18.index = ePowerData1['baseDatetime']
    print("<EPowerData-18>")
    print(ePowerData18)
    print(ePowerData1)

    data1 = ePowerData0_['baseDatetime'].str.slice(start=0, stop=8)
    data2 = ePowerData0_['baseDatetime'].str.slice(start=8, stop=10)
    data3 = ePowerData0_['baseDatetime'].str.slice(start=10, stop=12)
    data4 = ePowerData0_['baseDatetime'].str.slice(start=12)
    ePowerData20 = ePowerData1.assign(timeValue=data1, hourValue=data2, minValue=data3, secValue=data4)
    print("<EPowerData-20>")
    print(ePowerData20)

    dayValue21 = "20220209"
    otherDays = ePowerData20[ePowerData20['dayValue']!=dayValue21].index
    ePowerData21 = ePowerData20.drop(otherDays)
    print("<EPowerData-21>")
    print(ePowerData21)

    stat21 = ePowerData21.describe()
    print("<Statistics-2>")
    print(stat21)
    minValue = stat21['currPwrTot']['min']
    maxValue = stat21['currPwrTot']['max']
    stdValue = stat21['currPwrTot']['std']
    sumValue = ePowerData20['currPwrTot'].sum()/12
    minIndexes = ePowerData21[ePowerData21['currPwrTot'] == minValue].index
    minHour = ePowerData21['hourValue'][minIndexes[0]]
    minMin = ePowerData21['minValue'][minIndexes[0]]
    minSec = ePowerData21['secValue'][minIndexes[0]]
    print("<min>", minValue, "@", minHour, ":", minMin, ":", minSec)

    maxIndexes = ePowerData21[ePowerData21['currPwrTot'] == maxValue].index
    maxHour = ePowerData21['hourValue'][maxIndexes[0]]
    maxMin = ePowerData21['minValue'][maxIndexes[0]]
    maxSec = ePowerData21['secValue'][maxIndexes[0]]
    print("<max>", maxValue, "@", maxHour, ":", maxMin, ":", maxSec)

    stat22 = {
        'Mean': stat21['currPwrTot']['mean'],
        'Std': stat21['currPwrTot']['std'],
        'Max': 00,
        'MaxTime': 00,
        'Min': 00,
        'Mintime': 00
    }
    print("<sum>", sumValue)

    import matplotlib.pyplot as plt
    ePowerData21[['currPwrTot']].plot()
    plt.title("plot1")
    plt.xlabel("time")
    plt.ylabel("value")
    plt.show()
반응형

'SWDesk' 카테고리의 다른 글

BStockTrade - DataManager  (1) 2022.03.15
Python에서 한글 사용 예제  (0) 2022.02.24
[Python] Tistory API 수정  (0) 2021.11.21
[Python] Kakao API 수정  (0) 2021.11.16
[Python] Class Member Variables  (0) 2021.10.30