본문 바로가기

코딩으로 익히는 Python/모델링

[Python] 14. NN : XOR문제, MLPClassifier

728x90
반응형
SMALL
import numpy as np 
import pandas as pd 
import seaborn as sb 
import matplotlib.pyplot as plt
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

import warnings
warnings.simplefilter('ignore')

 

x_data = np.array( [[0,0],[0,1],[1,0],[1,1]])
y_data = np.array( [[0],[1],[1],[0]])

LogisticRegression

 

model_logi = LogisticRegression()
model_logi.fit(x_data,y_data)
[OUT] :

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

 

model_logi.coef_
[OUT] :

array([[0., 0.]])

 

model_logi.score(x_data,y_data)
[OUT] :

0.5

 

model_logi.predict(x_data) # 0,1,1,0 을 맞추는 게 불가능함 -> XOR
[OUT] :

array([0, 0, 0, 0])

 

XOR을 해결하기 위한 방법? Deep Learning(딥러닝)


MLPClassifier

 

model_mlp = MLPClassifier()
model_mlp.fit(x_data,y_data) #  hidden_layer_sizes=(100,)
[OUT] :

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

 

model_mlp.coefs_ # w
[OUT] :

model_mlp.coefs_ # w
model_mlp.coefs_ # w
[array([[-1.53465171e-01,  3.12797632e-01,  1.68773614e-08,
         -5.05586978e-04, -5.10634594e-08, -1.52860908e-02,
         -3.76451302e-01,  2.63648464e-01, -2.33248386e-01,
# ...
# 중략
# ...
          9.13015200e-02,  2.42897801e-01,  2.39018800e-02,
          2.52884322e-01,  2.25935045e-01,  2.09518351e-01,
          1.35635592e-01, -1.58099958e-01, -3.42959764e-01,
         -2.00350774e-01,  2.48529832e-02,  8.58031470e-07,
          3.67113360e-01]]),
 array([[-1.36117333e-01],
        [ 4.36824448e-01],
        [ 1.78162144e-02],
        [-4.32418334e-02],
# ...
# 중략
# ...
        [-2.49441676e-01],
        [ 3.99756702e-01],
        [-2.93557791e-01],
        [ 3.45002233e-02],
        [ 2.58970275e-01]])]

 

len(model_mlp.coefs_) # ndarray 2개가 있음
[OUT] :

2

 

model_mlp.coefs_[0].shape # (특성데이터 개수, 히든레이어의 w 개수)
[OUT] :

(2, 100)

 

model_mlp.coefs_[1].shape # (히든레이어의 w 개수, 라벨 개수)
[OUT] :

(100, 1)

 

model_mlp.intercepts_ # b
[OUT] :

[array([ 1.77629878e-01,  1.19080432e-03, -1.05335171e-01, -2.20170378e-01,
        -4.35777799e-02, -1.01635701e-02,  6.78783868e-05,  1.73971684e-02,
         2.33427798e-01,  2.03594596e-01, -1.95936527e-04,  2.03354196e-01,
         1.79169703e-01,  9.42591454e-03,  1.11215718e-01, -4.13967760e-04,
         5.56719914e-03,  1.96086064e-01, -1.97997616e-01,  1.00915031e-01,
         2.53565788e-01,  2.10636378e-01, -2.08344181e-01,  1.68049734e-01,
         4.02675569e-02, -2.38766181e-01, -1.43100834e-01, -3.11647957e-02,
         1.39579545e-01,  5.37992644e-02, -2.31756792e-04,  9.01729914e-02,
         5.53215677e-07,  6.07463708e-02, -8.89498541e-04,  1.83974288e-01,
         2.43416897e-01, -1.09254274e-01, -1.44730088e-05, -6.31155066e-03,
         1.70065547e-01, -2.42637383e-04, -1.37539689e-01, -2.16691279e-01,
        -9.07759115e-02,  1.81378620e-04, -4.50294135e-02, -2.68902469e-01,
        -4.49067091e-03,  1.70582531e-04,  2.39000034e-01,  2.29476761e-01,
        -1.27314388e-01, -2.44700627e-01,  5.96464187e-02,  2.63689022e-02,
        -4.35778818e-05, -1.89348543e-01, -2.31774932e-02,  1.60281287e-01,
        -1.66609975e-01,  1.74868267e-01,  2.03728059e-01,  1.69001017e-01,
        -8.12575609e-02,  2.83478168e-04,  2.46170928e-01, -9.13650011e-02,
        -2.73742395e-01, -1.48048034e-01,  1.30811149e-01, -3.12036509e-02,
         1.99549441e-01,  9.01935757e-05,  7.91250536e-02, -2.08443302e-01,
         1.33076959e-01, -5.87785956e-02,  2.70014050e-01,  1.83909198e-04,
         2.15187084e-01,  2.34449123e-01, -1.67018951e-01, -2.33917038e-01,
         1.67104582e-01,  7.46848582e-02,  2.25379540e-01,  1.20354545e-01,
        -2.39922049e-01, -1.20281260e-01,  9.63996692e-02,  5.89296626e-04,
        -2.09293881e-01,  3.02112389e-03,  3.84279395e-02,  3.42873271e-01,
        -1.10435271e-02,  2.73860139e-01, -1.98489248e-01,  3.33495581e-04]),
 array([0.16046711])]

 

model_mlp.intercepts_[0].shape # 100개 (데이터와 w를 matmul하고 +b해줘야하므로)
[OUT] :

(100,)

 

model_mlp.intercepts_[1].shape # 1개
[OUT] :

(1,)

 

len(model_mlp.intercepts_) # ndarray 2개가 있음
[OUT] :

2

 

model_mlp.score(x_data,y_data)
[OUT] :

1.0

 

model_mlp.predict(x_data) # 0,1,1,0 을 맞추는 게 가능함 -> XOR문제 해결
[OUT] :

array([0, 1, 1, 0])

MLPClassifier hidden layer sizes 바꾸기

 

#1

 

model_mlp = MLPClassifier(hidden_layer_sizes=(200,4))
model_mlp.fit(x_data,y_data) #  hidden_layer_sizes=(200,4)로 바꿔봄
[OUT] :

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(200, 4), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

 

print(len(model_mlp.coefs_)) # ndarray 3개가 있음
print('hidden layer1 :',model_mlp.coefs_[0].shape)
print('hidden layer2 :',model_mlp.coefs_[1].shape)
print('output layer :',model_mlp.coefs_[2].shape)
print(len(model_mlp.intercepts_)) # ndarray 3개가 있음
print(model_mlp.intercepts_[0].shape) # 200개 (데이터와 w를 matmul하고 +b해줘야하므로)
print(model_mlp.intercepts_[1].shape) # 4개
print(model_mlp.intercepts_[2].shape) # 1개
print('mlp score :',model_mlp.score(x_data,y_data))
print('x_data prediction :',model_mlp.predict(x_data)) # 0,1,1,0 을 맞추는 게 가능함
[OUT] :

3
hidden layer1 : (2, 200)
hidden layer2 : (200, 4)
output layer : (4, 1)
3
(200,)
(4,)
(1,)
mlp score : 1.0
x_data prediction : [0 1 1 0]

 

#2

 

model_mlp = MLPClassifier(hidden_layer_sizes=(200,10))
model_mlp.fit(x_data,y_data) #  hidden_layer_sizes=(200,10)으로 바꿔봄
[OUT] :

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(200, 10), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

 

print(len(model_mlp.coefs_)) # ndarray 3개가 있음
print('hidden layer1 :',model_mlp.coefs_[0].shape)
print('hidden layer2 :',model_mlp.coefs_[1].shape)
print('output layer :',model_mlp.coefs_[2].shape)
print(len(model_mlp.intercepts_)) # ndarray 3개가 있음
print(model_mlp.intercepts_[0].shape) # 200개 (데이터와 w를 matmul하고 +b해줘야하므로)
print(model_mlp.intercepts_[1].shape) # 10개
print(model_mlp.intercepts_[2].shape) # 1개
print('mlp score :',model_mlp.score(x_data,y_data))
print('x_data prediction :',model_mlp.predict(x_data)) # 0,1,1,0 을 맞추는 게 가능함
[OUT] :

3
hidden layer1 : (2, 200)
hidden layer2 : (200, 10)
output layer : (10, 1)
3
(200,)
(10,)
(1,)
mlp score : 1.0
x_data prediction : [0 1 1 0]

 

#3

 

model_mlp = MLPClassifier(hidden_layer_sizes=(200,10,5))
model_mlp.fit(x_data,y_data) #  hidden_layer_sizes=(200,10,5)로 바꿔봄
[OUT] :

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(200, 10, 5), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

 

print(len(model_mlp.coefs_)) # ndarray 4개가 있음
print('hidden layer1 :',model_mlp.coefs_[0].shape)
print('hidden layer2 :',model_mlp.coefs_[1].shape)
print('hidden layer3 :',model_mlp.coefs_[2].shape)
print('output layer :',model_mlp.coefs_[3].shape)
print(len(model_mlp.intercepts_)) # ndarray 4개가 있음
print(model_mlp.intercepts_[0].shape) # 200개 (데이터와 w를 matmul하고 +b해줘야하므로)
print(model_mlp.intercepts_[1].shape) # 10개
print(model_mlp.intercepts_[2].shape) # 5개
print(model_mlp.intercepts_[3].shape) # 1개
print('mlp score :',model_mlp.score(x_data,y_data))
print('x_data prediction :',model_mlp.predict(x_data)) # 0,1,1,0 을 맞추는 게 가능함
[OUT] :

4
hidden layer1 : (2, 200)
hidden layer2 : (200, 10)
hidden layer3 : (10, 5)
output layer : (5, 1)
4
(200,)
(10,)
(5,)
(1,)
mlp score : 1.0
x_data prediction : [0 1 1 0]

 

#4

 

model_mlp = MLPClassifier(hidden_layer_sizes=(100,50,30,20))
model_mlp.fit(x_data,y_data) # hidden_layer_sizes=(100,50,30,20)으로 바꿔봄
[OUT] :

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100, 50, 30, 20), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

 

print(len(model_mlp.coefs_)) # ndarray 5개가 있음
print('hidden layer1 :',model_mlp.coefs_[0].shape)
print('hidden layer2 :',model_mlp.coefs_[1].shape)
print('hidden layer3 :',model_mlp.coefs_[2].shape)
print('hidden layer4 :',model_mlp.coefs_[3].shape)
print('output layer :',model_mlp.coefs_[4].shape)
print(len(model_mlp.intercepts_)) # ndarray 5개가 있음
print(model_mlp.intercepts_[0].shape) # 100개 (데이터와 w를 matmul하고 +b해줘야하므로)
print(model_mlp.intercepts_[1].shape) # 50개
print(model_mlp.intercepts_[2].shape) # 30개
print(model_mlp.intercepts_[3].shape) # 20개
print(model_mlp.intercepts_[4].shape) # 1개
print('mlp score :',model_mlp.score(x_data,y_data))
print('x_data prediction :',model_mlp.predict(x_data)) # 0,1,1,0 을 맞추는 게 가능함
[OUT] :

5
hidden layer1 : (2, 100)
hidden layer2 : (100, 50)
hidden layer3 : (50, 30)
hidden layer4 : (30, 20)
output layer : (20, 1)
5
(100,)
(50,)
(30,)
(20,)
(1,)
mlp score : 1.0
x_data prediction : [0 1 1 0]

 

결론

- 이 데이터셋은 단순하기 때문에 hidden layer 주기만 하면 0,1,1,0 나옴
- 만약 안나오면 max_iter값을 주기
- hidden_layer_sizes=(200,10) -> 히든레이어 2개, (200,10,5) -> 3개, (200,50,30,20) -> 4개

 


MLPClassifier 원리 이해 위한 직접 계산

 

model_mlp = MLPClassifier(hidden_layer_sizes=(100,))
model_mlp.fit(x_data,y_data) 
[OUT] :

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

 

def relu(x):
    return np.maximum(0, x)
    
def sigmoid(x):
	return 1 / (1 + np.exp(-x))

 

a = np.matmul(x_data,model_mlp.coefs_[0])+model_mlp.intercepts_[0]
a.shape 
[OUT] :

(4, 100)

 

b = np.matmul(relu(a),model_mlp.coefs_[1])+model_mlp.intercepts_[1]
b.shape # a -> relu함수 통과시켜준 후 b 결과값 내기
[OUT] :

(4, 1)

 

b # b -> sigmoid함수 통과시켜준 후 최종 output 내기
[OUT] :

array([[-0.7312762 ],
       [ 0.79304013],
       [ 0.78729295],
       [-0.77487063]])

 

sigmoid(b)
[OUT] :

array([[0.32491474],
       [0.68848373],
       [0.68724978],
       [0.31542643]])

 

output = sigmoid(b)
result = []
for i in range(len(output)):
    if output[i]<0.5:
        result.append(0)
    else:
        result.append(1)
result
[OUT] :

[0, 1, 1, 0] # 결과값 잘 나옴 model_mlp.predict(x_data) == array([0, 1, 1, 0])

 

결론

- hidden layer는 relu통과
- output은 이진분류는 sigmoid, 다중분류는 softmax 통과


 

 

 

 

728x90
반응형
LIST