end0tknr's kipple - web写経開発

太宰府天満宮の狛犬って、妙にカワイイ

ロジスティック回帰による二項分類/パーセプトロン (2/2) ( deep learning & python )

先日のシグモイド関数(ロジスティック関数)を用いたtensoflow実装。

end0tknr.hateblo.jp

というより、↓こちらの Chapter2の写経。

github.com

#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import multivariate_normal, permutation
import pandas as pd
from pandas import DataFrame, Series

def make_training_data():
    np.random.seed(20160512)
    
    # t=1 : 2種の薬(X1, X2)投与による効果がない場合
    mu0, variance0, n0 = [10, 11], 20, 20  #平均, 分散, data数
    # multivariate_normal() : 多次元正規分布の乱数を生成
    # ├ param1 : 平均
    # ├ param2 : 共分散行列. np.eye(2)は2x2の単位行列生成
    # └ param3 : data数
    data0 = multivariate_normal(mu0, np.eye(2)*variance0 ,n0)
    df0 = DataFrame(data0, columns=['x1','x2'])
    df0['t'] = 0

    # t=1 : 2種の薬(X1, X2)投与による効果がある場合
    mu1, variance1, n1  = [18, 20], 15, 22  #平均, 分散, data数
    data1 = multivariate_normal(mu1, np.eye(2)*variance1 ,n1)
    df1 = DataFrame(data1, columns=['x1','x2'])
    df1['t'] = 1
    
    # 2個の行列を連結(≠結合)
    df = pd.concat([df0, df1], ignore_index=True)

    train_set = df.reindex(permutation(df.index)).reset_index(drop=True)

    # train_setに含まれるx1, x2, t列を{x1, x2}と{t}に分割
    train_x = train_set[['x1','x2']].as_matrix()
    train_t = train_set['t'].as_matrix().reshape([len(train_set), 1])

    return train_x, train_t

# 予測関数作成
def make_predict_func():
    x = tf.placeholder(tf.float32, [None, 2])
    w = tf.Variable(tf.zeros([2, 1]))
    w0 = tf.Variable(tf.zeros([1]))
    f = tf.matmul(x, w) + w0  # f(x) = wx + w0   ※w,x,w0はいずれもベクトル
    p = tf.sigmoid(f)         # シグモイド関数 = ロジスティック関数
    return p, w, x, w0

# 誤差関数
def make_err_func(p):
    t = tf.placeholder(tf.float32, [None, 1])
    # 最尤推定を行う誤差関数
    loss = -tf.reduce_sum(t*tf.log(p) + (1-t)*tf.log(1-p))
    # 勾配降下法によるトレーニングアルゴリズム
    train_step = tf.train.AdamOptimizer().minimize(loss)
    # pとtの符号で比較する為、-0.5を実施
    correct_prediction = tf.equal(tf.sign(p-0.5), tf.sign(t-0.5))
    # reduce_mean()とはベクトルの各成分の平均値算出
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    return loss, t, train_step, accuracy

def main():
    # トレーニングデータ
    train_x, train_t = make_training_data()
    # 予測関数
    p ,w, x, w0 = make_predict_func()
    # 誤差関数
    loss, t, train_step, accuracy = make_err_func(p)

    # セッション作成 & Variable初期化
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    # 勾配降下法によるパラメーター最適化
    i = 0
    for _ in range(30000):
        i += 1
        sess.run(train_step, feed_dict={x:train_x, t:train_t})
        if i % 2000 == 0:
            loss_val, acc_val = sess.run(
                [loss, accuracy], feed_dict={x:train_x, t:train_t})
            print ('itep: %d, loss: %f, accuracy: %f'
                   % (i, loss_val, acc_val))

    # 結果(w0, w1, w2)の取り出し
    w0_val, w_val = sess.run([w0, w])
    w0_val, w1_val, w2_val = w0_val[0], w_val[0][0], w_val[1][0]
    print "w0:", w0_val, " w1:",w1_val, " w2:",w2_val


if __name__ == '__main__':
    main()

↑こう書くと↓こう表示されます

$ ./foo_2.py 
itep: 2000, loss: 17.505960, accuracy: 0.857143
itep: 4000, loss: 12.778822, accuracy: 0.928571
itep: 6000, loss: 9.999125, accuracy: 0.928571
itep: 8000, loss: 8.244436, accuracy: 0.976190
itep: 10000, loss: 7.087447, accuracy: 0.952381
itep: 12000, loss: 6.303907, accuracy: 0.952381
itep: 14000, loss: 5.765183, accuracy: 0.952381
itep: 16000, loss: 5.393257, accuracy: 0.952381
itep: 18000, loss: 5.138913, accuracy: 0.952381
itep: 20000, loss: 4.969873, accuracy: 0.952381
itep: 22000, loss: 4.863929, accuracy: 0.952381
itep: 24000, loss: 4.804683, accuracy: 0.952381
itep: 26000, loss: 4.778569, accuracy: 0.952381
itep: 28000, loss: 4.772072, accuracy: 0.952381
itep: 30000, loss: 4.771708, accuracy: 0.952381
w0: -21.0061  w1: 0.849911  w2: 0.621193