end0tknr's kipple - web写経開発

太宰府天満宮の狛犬って、妙にカワイイ

numpy for python によるニューラルネットワーク学習

GitHub - oreilly-japan/deep-learning-from-scratch: 『ゼロから作る Deep Learning』(O'Reilly Japan, 2016)

「ゼロから作るDeep Learning ① (Pythonで学ぶディープラーニングの理論と実装)」 p.112～121 の写経です。

また、以下のキカガクのページも分かりやすいかと思います。

https://free.kikagaku.ai/tutorial/basic_of_deep_learning/learn/neural_network_basic_backward

目次

python
実行結果

python

# coding: utf-8

import gzip
import matplotlib.pyplot as plt
import numpy             as np
import os
import sys
import urllib.request

def main():
    # MNISTデータのdownload
    mymnist = MyMnist()
    (x_train, t_train, x_test, t_test) = mymnist.load_mnist()

    iters_num     = 10000 # 繰り返し回数
    batch_size    = 100
    learning_rate = 0.1   # 学習率 η
    
    train_size  = x_train.shape[0]
    iter_per_epoch = max(train_size / batch_size, 1)
    
    train_loss_list = []
    train_acc_list  = []
    test_acc_list   = []

    # 2層ニューラルネットワークでの訓練
    network = TwoLayerNet(28*28, 100, 10)
    # for param_type in ['W1','b1','W2','b2',]:
    #     print( param_type, network.params[param_type].shape )

    for i in range(iters_num):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 勾配を計算し、パラメータを更新
        #grad = network.numerical_gradient(x_batch, t_batch)
        grad = network.gradient(x_batch, t_batch)

        for key in ('W1', 'b1', 'W2', 'b2'):
            network.params[key] -= learning_rate * grad[key]
        
        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            print("train acc, test acc | " + str(train_acc) + \
                  ", " + str(test_acc))

    my_plot = MyPlot()
    my_plot.disp_graph(train_acc_list,test_acc_list)
    

   
class MyMnist:
    def __init__(self):
        pass

    def load_mnist(self):
        data_files = self.download_mnist()
        # convert numpy
        dataset = {}
        dataset['train_img']   = self.load_img(  data_files['train_img'] )
        dataset['train_label'] = self.load_label(data_files['train_label'])
        dataset['test_img']    = self.load_img(  data_files['test_img']  )
        dataset['test_label']  = self.load_label(data_files['test_label'])

        for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].astype(np.float32)
            dataset[key] /= 255.0

        for key in ('train_label','test_label'):
            dataset[key]=self.change_one_hot_label( dataset[key] )

        return (dataset['train_img'],
                dataset['train_label'],
                dataset['test_img'],
                dataset['test_label'] )

    def change_one_hot_label(self,X):
        T = np.zeros((X.size, 10))
        for idx, row in enumerate(T):
            row[X[idx]] = 1
        return T
    
    def download_mnist(self):
        url_base = 'http://yann.lecun.com/exdb/mnist/'
        key_file = {'train_img'  :'train-images-idx3-ubyte.gz',
                    'train_label':'train-labels-idx1-ubyte.gz',
                    'test_img'   :'t10k-images-idx3-ubyte.gz',
                    'test_label' :'t10k-labels-idx1-ubyte.gz' }
        data_files = {}
        dataset_dir = os.path.dirname(os.path.abspath(__file__))
        
        for data_name, file_name in key_file.items():
            req_url   = url_base+file_name
            file_path = dataset_dir + "/" + file_name

            request  = urllib.request.Request( req_url )
            response = urllib.request.urlopen(request).read()
            with open(file_path, mode='wb') as f:
                f.write(response)
                
            data_files[data_name] = file_path
        return data_files

    def load_img( self,file_path):
        img_size    = 784 # = 28*28
        
        with gzip.open(file_path, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=16)
        data = data.reshape(-1, img_size)
        return data
    
    def load_label(self,file_path):
        with gzip.open(file_path, 'rb') as f:
            labels = np.frombuffer(f.read(), np.uint8, offset=8)
        return labels


class MyPlot:
    def __init__(self):
        pass
    def disp_graph(self,train_acc_list,test_acc_list):
        markers = {'train': 'o', 'test': 's'}
        x = np.arange(len(train_acc_list))
        plt.plot(x, train_acc_list, label='train acc')
        plt.plot(x, test_acc_list, label='test acc', linestyle='--')
        plt.xlabel("epochs")
        plt.ylabel("accuracy")
        plt.ylim(0, 1.0)
        plt.legend(loc='lower right')
        plt.show()

class TwoLayerNet:

    def __init__(self,
                 input_size,    # 入力層のneuron数
                 hidden_size,   # 隠れ層の〃
                 output_size,   # 出力層の〃
                 weight_init_std=0.01):
        # 重みの初期化
        self.params = {}
        self.params['W1'] = \
            weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = \
            weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
    
        a1 = np.dot(x, W1) + b1
        z1 = self.sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = self.softmax(a2)
        
        return y
        
    # x:入力データ, t:教師データ
    def loss(self, x, t):
        y = self.predict(x)
        return self.cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    # x:入力データ, t:教師データ
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = self._numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = self._numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = self._numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = self._numerical_gradient(loss_W, self.params['b2'])
        
        return grads
        
    def _numerical_gradient(self, f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x)

        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            tmp_val = x[idx]
            x[idx] = tmp_val + h
            fxh1 = f(x) # f(x+h)

            x[idx] = tmp_val - h 
            fxh2 = f(x) # f(x-h)
            grad[idx] = (fxh1 - fxh2) / (2*h)

            x[idx] = tmp_val # 値を元に戻す
            it.iternext()
        return grad

    def gradient(self, x, t):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        
        batch_num = x.shape[0]
        
        # forward
        a1 = np.dot(x, W1) + b1
        z1 = self.sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = self.softmax(a2)
        
        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        
        dz1 = np.dot(dy, W2.T)
        da1 = self.sigmoid_grad(a1) * dz1
        grads['W1'] = np.dot(x.T, da1)
        grads['b1'] = np.sum(da1, axis=0)

        return grads

    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))    

    def sigmoid_grad(self,x):
        return (1.0 - self.sigmoid(x)) * self.sigmoid(x)

    def cross_entropy_error(self, y, t):
        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)

        # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
        if t.size == y.size:
            t = t.argmax(axis=1)

        batch_size = y.shape[0]
        return -np.sum(np.log(y[np.arange(batch_size),t]+ 1e-7))/batch_size

    def softmax(self,x):
        x = x - np.max(x, axis=-1, keepdims=True)   # オーバーフロー対策
        return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)
    
if __name__ == '__main__':
    main()

実行結果

(dl_scratch) C:\Users\end0t\tmp\deep-learning-from-scratch\ch04>python foo.py
train acc, test acc | 0.0993, 0.1032
train acc, test acc | 0.8140666666666667, 0.8168
train acc, test acc | 0.8824, 0.8873
train acc, test acc | 0.8999166666666667, 0.9047
train acc, test acc | 0.9076333333333333, 0.9121
train acc, test acc | 0.9124, 0.9162
train acc, test acc | 0.9184166666666667, 0.9219
train acc, test acc | 0.9225333333333333, 0.9231
train acc, test acc | 0.92515, 0.9279
train acc, test acc | 0.9284166666666667, 0.9299
train acc, test acc | 0.9318833333333333, 0.9321
train acc, test acc | 0.9342333333333334, 0.9346
train acc, test acc | 0.9367, 0.9374
train acc, test acc | 0.93965, 0.9399
train acc, test acc | 0.9422, 0.9412
train acc, test acc | 0.9439166666666666, 0.9423
train acc, test acc | 0.9461333333333334, 0.9441