728x90
반응형
SMALL
from bs4 import BeautifulSoup
import urllib.request as REQ
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib
from datetime import datetime
from pandas_datareader import data
rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False
datetime 표준라이브러리
dt = datetime(1995,9,7,9,4,13)
dt
[OUT] :
datetime.datetime(1995, 9, 7, 9, 4, 13)
print(dt.year,dt.month,dt.day,dt.hour,dt.minute,dt.second)
[OUT] :
1995 9 7 9 4 13
cur = datetime.now() # static method (현재시간)
cur
[OUT] :
datetime.datetime(2020, 12, 29, 10, 32, 35, 378300)
시계열데이터의 정의
1. 인덱스가 datetime형
2. 시계열데이터의 indexing, slicing 사용가능
data = [[1,2],[3,4],[5,6],[7,8],[9,10],[11,12],[13,14],[15,16]]
df = pd.DataFrame(data)
df.index=['2019-01-01','2019-02-01','2019-03-01','2019-04-01',
'2019-05-01','2019-06-01','2019-07-01','2019-08-01']
df
#요건 시계열데이터가 아님
df.index #dtype = 'object' -> 시계열데이터아님
[OUT] :
Index(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01', '2019-05-01',
'2019-06-01', '2019-07-01', '2019-08-01'],
dtype='object')
data1 = [[1,2],[3,4],[5,6],[7,8],[9,10],[11,12],[13,14],[15,16]]
df1 = pd.DataFrame(data1)
df1.index = [datetime(2019,1,1),datetime(2019,2,1),datetime(2019,3,1),datetime(2019,4,1),
datetime(2019,5,1),datetime(2019,6,1),datetime(2019,7,1),datetime(2019,8,1),]
df1
df1.index #dtype = 'datetime64[ns]' -> 시계열데이터
[OUT] :
DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
'2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01'],
dtype='datetime64[ns]', freq=None)
문자열을 datetime으로 형변환
- 2020년 1월 1일 이런경우는 to_datetime 에러남
- ex) pd.to_datetime(2019/1/1) -> ok
- ex) pd.to_datetime('2020년1월1일') -> error
df.index = pd.to_datetime(df.index)
df.index
# 형변환 성공
[OUT] :
DatetimeIndex(['2019-01-01', '2019-02-01', '2019-03-01', '2019-04-01',
'2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01'],
dtype='datetime64[ns]', freq=None)
pd.to_datetime('2020년1월1일',format='%Y년%m월%d일')
# format형식에 맞게끔 적어줘야함 -> Y만 대문자
[OUT] :
Timestamp('2020-01-01 00:00:00')
삼성 주식 데이터 가져오기
# data.get_data_yahoo(종목코드, 시작날짜, 끝날짜)
samsungDF = data.get_data_yahoo('005930.KS', '2017-01-01') # KS->코스피, 끝날짜 생략하면 오늘날짜까지
samsungDF
samsungDF.index
[OUT] :
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
'2017-01-06', '2017-01-09', '2017-01-10', '2017-01-11',
'2017-01-12', '2017-01-13',
...
'2020-12-15', '2020-12-16', '2020-12-17', '2020-12-18',
'2020-12-21', '2020-12-22', '2020-12-23', '2020-12-24',
'2020-12-28', '2020-12-29'],
dtype='datetime64[ns]', name='Date', length=973, freq=None)
samsungDF.plot(y='Close')
plt.show()
시계열데이터만 적용되는 인덱싱 슬라이싱
samsungDF['2019']
# 시계열데이터라 가능함 -> 독특한 인덱싱 슬라이싱
samsungDF['2019-1']
samsungDF['2019-01':'2019-02']
...
samsungDF[:'2018']
samsungDF
samsungDF['year'] = samsungDF.index.year
samsungDF.groupby('year').mean()
samsungDF.groupby(samsungDF.index.month).mean()
samsungDF.groupby([samsungDF.index.year,samsungDF.index.month]).mean()
...
samsungDF.resample('A').mean() # 각 년도별 평균값 A->year end
samsungDF.resample('m').mean() # 각 월별 평균값 m->month
...
samsungDF.resample('w').mean() # 각 주간별 평균값 w->weekly
samsungDF.resample('W-MON').mean()
samsungDF.resample('Q').mean() #분기별 Q -> quarter end
data1 = [[1,2],[3,4],[5,8],[7,6],[9,12],[11,10],[13,16],[15,20]]
df1 = pd.DataFrame(data1)
df1.index = [datetime(2019,1,1),datetime(2019,2,1),datetime(2019,3,1),datetime(2019,4,1),
datetime(2019,5,1),datetime(2019,6,1),datetime(2019,7,1),datetime(2019,8,1),]
df1
df1.columns=['A','B']
plt.plot(df1.index,df1['B'])
plt.show()
이동평균
df1.rolling(window=2).mean()
df1.rolling(window=2, min_periods=1).mean()
df1.rolling(window=2).mean()['B']
[OUT] :
2019-01-01 NaN
2019-02-01 3.0
2019-03-01 6.0
2019-04-01 7.0
2019-05-01 9.0
2019-06-01 11.0
2019-07-01 13.0
2019-08-01 18.0
Name: B, dtype: float64
plt.xticks(rotation=45)
plt.plot(df1.index, df1['B'])
plt.plot(df1.index, df1.rolling(window=2).mean()['B'])
plt.show()
# 평균값으로 추세선 도출(요런 추세를 가진다)
# samsungDF.plot(y='Close')
plt.plot(samsungDF.index, samsungDF['Close'])
plt.plot(samsungDF.index, samsungDF.rolling(window=20).mean()['Close'],'r-')
plt.show()
review
- datetime만의 독특한 인덱싱 슬라이싱 익히기
728x90
반응형
LIST