본문 바로가기

코딩으로 익히는 Python/Pandas

[Python] 15. pandas DataFrame 통계&시각화 : sum(), unstack(), mean(), values, min(), idxmax(), median(), quantile(), count(), describe()예제

728x90
반응형
SMALL
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib

rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False

 

data =  {'eng':[10,30,50,70],
         'kor':[20,40,60,80],
         'math':[90,50,20,70]}
df = pd.DataFrame(data, index=['a','b','c','d'] )
df


def fn( v ):
    print("=========")
    print("v=>",v)
    print("=========")
    return v.sum()
df.apply( fn )
[OUT] :

=========
v=> a    10
b    30
c    50
d    70
Name: eng, dtype: int64
=========
=========
v=> a    20
b    40
c    60
d    80
Name: kor, dtype: int64
=========
=========
v=> a    90
b    50
c    20
d    70
Name: math, dtype: int64
=========

eng     160
kor     200
math    230
dtype: int64

 

df.apply( fn , axis=1 )
[OUT] :

=========
v=> eng     10
kor     20
math    90
Name: a, dtype: int64
=========
=========
v=> eng     30
kor     40
math    50
Name: b, dtype: int64
=========
=========
v=> eng     50
kor     60
math    20
Name: c, dtype: int64
=========
=========
v=> eng     70
kor     80
math    70
Name: d, dtype: int64
=========

a    120
b    120
c    130
d    220
dtype: int64

 

df.apply( lambda v: v.sum() )
[OUT] :

eng     160
kor     200
math    230
dtype: int64

 

#국어,영어,수학점수의 평균이 70점이상이면
# '합격' 아니면 '불합격' 으로 결과 컬과 컬럼을 추가하세요
df['결과'] =df.apply(lambda v:'합격' if v.mean()>=70 else '불합격',axis=1)
df

 

df.nlargest(2,'eng' )


시각화

df.plot()
plt.show()

 

df.plot(kind='bar')
plt.show()

 

df.plot(kind='bar', stacked=True)

n = 0
for i, sr in df.iterrows():
    print(i, sr['eng'], sr['kor'], sr['math'] )
    e,k,m = (sr['eng'], sr['kor'], sr['math'])
    plt.text(n,e/2,f'{e}점', va='center', ha='center')
    plt.text(n,e+k/2,f'{k}점', va='center', ha='center')
    plt.text(n,e+k+m/2,f'{m}점', va='center', ha='center')
    n += 1
plt.show()

 

df.plot(kind='bar',y=['eng','kor'])
plt.show()

 

df.plot(kind='scatter', x='eng',y='math', s=[100,200,300,400] ,c=['r','b','g','y'] )
plt.show() #색상(c):r,g,b,k,m,y,c

 

df.plot( kind='box')
plt.show()


df

 

for n in df: # df.columns
    print(n)
[OUT] :

eng
kor
math
결과

 

for n in df.index:
    print(n)
[OUT] :

a
b
c
d

 

for i, sr in df.items():
    print( i, sr['a'], sr['b'],sr['c'],sr['d'] )
[OUT] :

eng 10 30 50 70
kor 20 40 60 80
math 90 50 20 70
결과 불합격 불합격 불합격 합격

 

for i, sr in df.iterrows():
    print(i, sr['eng'], sr['kor'], sr['math'] )
[OUT] :

a 10 20 90
b 30 40 50
c 50 60 20
d 70 80 70

review
- plt.text(), f'{}', va='center', ha='center'
728x90
반응형
LIST