728x90
반응형
SMALL
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import cross_val_score, cross_validate
import multiprocessing
from sklearn.svm import SVC, SVR
from sklearn.manifold import TSNE
from sklearn.linear_model import SGDRegressor, SGDClassifier
from sklearn.datasets import load_boston, load_breast_cancer,load_iris,load_wine
from sklearn.datasets import load_wine
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC # SVM
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import make_column_transformer
import sklearn.metrics as m
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
SVM(서포트벡터머신)
wine데이터셋을 불러와서 실습해보기
wine = load_wine()
x_data = wine['data']
y_data = wine['target']
x_train, x_test, y_train, y_test = train_test_split( x_data, y_data, test_size=0.2,
random_state=42, stratify=y_data)
model_wine = make_pipeline( StandardScaler(), SVC() )
model_wine.fit(x_train,y_train)
[OUT] :
Pipeline(memory=None,
steps=[('standardscaler',
StandardScaler(copy=True, with_mean=True, with_std=True)),
('svc',
SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None,
coef0=0.0, decision_function_shape='ovr', degree=3,
gamma='scale', kernel='rbf', max_iter=-1,
probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False))],
verbose=False)
model_wine.score(x_test,y_test)
[OUT] :
0.9722222222222222
연습문제
데이터 불러오기 (pd.read_csv)
python 파일 경로에 만든 후 data5폴더에 다음의 'pima-indians-diabetes.data.csv' 파일 넣어놓기
pima-indians-diabetes.data.csv
0.02MB
pima-indians 데이터 GridSearch 통해 최적의 파라미터 찾고 confusion matrix, score 확인하시오
Solution
df = pd.read_csv('data5/pima-indians-diabetes.data.csv')
df.head(3)
x_data = df.iloc[:,:-1]
y_data = df.iloc[:,-1]
x_train, x_test, y_train, y_test = train_test_split( x_data, y_data, test_size=0.2,
random_state=42, stratify=y_data)
# SVC
model_pipe = make_pipeline( StandardScaler(), SVC() )
param_grid={"svc__C":range(11),
"svc__kernel":['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
"svc__gamma" : ['scale', 'auto']}
model_pima = GridSearchCV(model_pipe, param_grid, scoring='f1_macro')
model_pima.fit( x_train, y_train )
[OUT] :
GridSearchCV(cv=None, error_score=nan,
estimator=Pipeline(memory=None,
steps=[('standardscaler',
StandardScaler(copy=True,
with_mean=True,
with_std=True)),
('svc',
SVC(C=1.0, break_ties=False,
cache_size=200, class_weight=None,
coef0=0.0,
decision_function_shape='ovr',
degree=3, gamma='scale',
kernel='rbf', max_iter=-1,
probability=False,
random_state=None, shrinking=True,
tol=0.001, verbose=False))],
verbose=False),
iid='deprecated', n_jobs=None,
param_grid={'svc__C': range(0, 11),
'svc__gamma': ['scale', 'auto'],
'svc__kernel': ['linear', 'poly', 'rbf', 'sigmoid',
'precomputed']},
pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
scoring='f1_macro', verbose=0)
# 예측
y_pred = model_pima.best_estimator_.predict( x_test )
y_pred
[OUT] :
array([1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
dtype=int64)
# best parameter
model_pima.best_params_
[OUT] :
{'svc__C': 1, 'svc__gamma': 'scale', 'svc__kernel': 'linear'}
# 스코어
model_pima.best_estimator_.score( x_test, y_test )
[OUT] :
0.7207792207792207
# confusion_matrix
cm = m.confusion_matrix(y_test,y_pred)
plt.figure(figsize=(15,10))
sns.heatmap(cm,annot=True,cmap='Reds')
plt.show()
728x90
반응형
LIST
'코딩으로 익히는 Python > 모델링' 카테고리의 다른 글
[Python] 23. PCA(차원축소),T-SNE (11) | 2021.01.26 |
---|---|
[Python] 22. Kmeans (0) | 2021.01.26 |
[Python] 20. 나이브베이즈 (0) | 2021.01.26 |
[Python] 19. MLP : pima-indians 예제 (0) | 2021.01.26 |
[Python] 18. Ensemble(앙상블) : breast_cancer,wine 예제 (5) | 2021.01.25 |