본문 바로가기

코딩으로 익히는 Python/모델링

[Python] 16. 강아지&고양이 이미지 분류 실습

728x90
반응형
SMALL
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
import sklearn.metrics as m

# from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# !pip install tensorflow하면 되는데 아나콘다 콘솔창에서 하기
# -> yes or no 물어보는 게 있을 수 있기 때문

from sklearn.datasets import fetch_openml

import warnings
warnings.simplefilter('ignore')

 

데이터 불러오기 (data_generator.flow_from_directory)

python 파일 경로에 만든 후 다음의 'myimg'폴더 압축 해제 후 넣어놓기 (출처 : 구글 이미지)

myimg.zip
7.25MB


강아지 & 고양이 이미지 분류 모델 학습시키기

 

data_generator = ImageDataGenerator(rescale=1./255)
traingen = data_generator.flow_from_directory("myimg",target_size=(98,98),class_mode='sparse',seed=1)
# class_mode 설정 안하면 cat(1,0)으로, dog(0,1)로 줌 -> 원핫인코딩
[OUT] :

Found 32 images belonging to 2 classes.

 

x_train, y_train = traingen.next()

 

x_train.shape # (개수,width,height,color_depth)
[OUT] :

(32, 98, 98, 3)

 

y_train.shape
[OUT] :

(32,)

 

y_train # class_mode='sparse'로 줘서 cat(0), dog(1)로 라벨 인코딩됨
[OUT] :

array([1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 0., 0., 1.,
       0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0.],
      dtype=float32)

 

plt.imshow(x_train[0])
plt.show()

 

# 특성데이터는 fit할 때 매트릭스줘야함 그런데 현재 4차원
x_reshape = x_train.reshape(32,-1) # 98*98*3 = 28812
x_reshape.shape
[OUT] :

(32, 28812)

 

model = MLPClassifier(verbose=1)
model.fit(x_reshape,y_train)
[OUT] :

Iteration 1, loss = 0.84443503
Iteration 2, loss = 32.48735774
Iteration 3, loss = 11.40872558
Iteration 4, loss = 6.76982243
Iteration 5, loss = 8.14841605
Iteration 6, loss = 7.50756683
Iteration 7, loss = 6.12564047
Iteration 8, loss = 4.25307509
Iteration 9, loss = 2.06693820
Iteration 10, loss = 0.64580305
# ...
# 중략
# ...
Iteration 199, loss = 0.03868952
Iteration 200, loss = 0.03832532
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=1, warm_start=False)

 

model.score(x_reshape,y_train) # train 이미 fit 했으므로 정확하지만 새로운 데이터에 대해서는 아닐것
[OUT] :

1.0

 

model.predict([x_reshape[0]]) # 역시나 기존 이미지는 잘 예측함
[OUT] :

array([1.], dtype=float32)

 

imgData = image.load_img('myimg/test.jpg',target_size=(98,98))
imgData

 

testImg = image.img_to_array(imgData)/255 # /255하는 이유는 scailing
testImg
[OUT] :

array([[[0.93333334, 0.99215686, 1.        ],
        [0.9411765 , 1.        , 1.        ],
        [0.9019608 , 0.972549  , 0.98039216],
        ...,
        [0.17254902, 0.17254902, 0.1254902 ],
        [0.18431373, 0.27058825, 0.1882353 ],
        [0.2901961 , 0.3764706 , 0.28627452]],

       [[0.9372549 , 0.99215686, 1.        ],
        [0.94509804, 1.        , 1.        ],
        [0.9098039 , 0.96862745, 0.98039216],
        ...,
        [0.1764706 , 0.19215687, 0.13725491],
        [0.19607843, 0.2901961 , 0.20392157],
        [0.23137255, 0.3254902 , 0.23137255]],

       [[0.9490196 , 0.9843137 , 1.        ],
        [0.9529412 , 0.99607843, 1.        ],
        [0.9137255 , 0.96862745, 0.98039216],
        ...,
        [0.1764706 , 0.21960784, 0.15686275],
        [0.21568628, 0.33333334, 0.23921569],
        [0.22745098, 0.3372549 , 0.23921569]],

       ...,

       [[0.20392157, 0.42352942, 0.38039216],
        [0.19607843, 0.41568628, 0.37254903],
        [0.2       , 0.40784314, 0.36862746],
        ...,
        [0.78039217, 0.7137255 , 0.6509804 ],
        [0.7647059 , 0.7019608 , 0.6431373 ],
        [0.84705883, 0.7921569 , 0.7411765 ]],

       [[0.2       , 0.44313726, 0.39215687],
        [0.19215687, 0.42745098, 0.38039216],
        [0.19215687, 0.41960785, 0.37254903],
        ...,
        [0.73333335, 0.6666667 , 0.6039216 ],
        [0.7529412 , 0.6901961 , 0.6313726 ],
        [0.84705883, 0.7921569 , 0.7411765 ]],

       [[0.16078432, 0.4117647 , 0.35686275],
        [0.16862746, 0.41960785, 0.3647059 ],
        [0.18039216, 0.42352942, 0.37254903],
        ...,
        [0.654902  , 0.5882353 , 0.5254902 ],
        [0.68235296, 0.61960787, 0.56078434],
        [0.77254903, 0.7176471 , 0.6666667 ]]], dtype=float32)

 

testImg.shape
[OUT] :

(98, 98, 3)

 

model.predict(testImg.reshape(1,-1)) 
# 학습시킨 데이터셋이 매우 적지만 예측 잘 됨 -> 아마 학습한 강아지랑 비슷해서 그런듯
[OUT] :

array([1.], dtype=float32)

학습한 모델을 저장하기

 

from sklearn.externals import joblib

 

joblib.dump(model,'catdog.pkl') # 피클 : 객체를 저장할 때 쓰임
[OUT] :

['catdog.pkl']

학습한 모델을 불러오기

 

mymodel = joblib.load('catdog.pkl')
mymodel.predict(testImg.reshape(1,-1))
[OUT] :

array([1.], dtype=float32)

 

728x90
반응형
LIST