numpy for python によるニューラルネットワーク学習 (誤差逆伝播法版) - end0tknr's kipple

GitHub - oreilly-japan/deep-learning-from-scratch: 『ゼロから作る Deep Learning』(O'Reilly Japan, 2016)

「ゼロから作るDeep Learning ① (Pythonで学ぶディープラーニングの理論と実装)」 p.156～163 の写経です。

以前、以下のentryでは、勾配計算に数値微分を使用していますが、今回は、誤差逆伝播法を使用しています。

numpy for python によるニューラルネットワーク学習 - end0tknr's kipple - web写経開発

# coding: utf-8

import gzip
import matplotlib.pyplot as plt
import numpy             as np
import os
import sys
import urllib.request
from collections import OrderedDict


def main():
    # MNISTデータのdownload
    mymnist = MyMnist()
    (x_train, t_train, x_test, t_test) = mymnist.load_mnist()

    network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

    iters_num     = 10000 # 繰り返し回数
    train_size    = x_train.shape[0]
    batch_size    = 100
    learning_rate = 0.1

    train_loss_list = []
    train_acc_list  = []
    test_acc_list   = []
    
    iter_per_epoch = max(train_size / batch_size, 1)

    for i in range(iters_num):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 勾配
        #grad = network.numerical_gradient(x_batch, t_batch)
        grad = network.gradient(x_batch, t_batch)

        # 更新
        for key in ('W1', 'b1', 'W2', 'b2'):
            network.params[key] -= learning_rate * grad[key]

        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            print(train_acc, test_acc)

    my_plot = MyPlot()
    my_plot.disp_graph(train_acc_list,test_acc_list)
    
class TwoLayerNet:

    def __init__(self,
                 input_size,    # 入力層のneuron数
                 hidden_size,   # 隠れ層の〃
                 output_size,   # 出力層の〃
                 weight_init_std = 0.01):
        # 重みの初期化
        self.params = {}
        self.params['W1'] = \
            weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = \
            weight_init_std * np.random.randn(hidden_size, output_size) 
        self.params['b2'] = np.zeros(output_size)

        # Affine層と ReLU層の作成
        self.layers = OrderedDict()     # 順序付き辞書
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

        # 出力層としての Softmax-with-Loss 層の作成
        self.lastLayer = SoftmaxWithLoss()
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        
        return x
        
    # x:入力データ, t:教師データ
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    # x:入力データ, t:教師データ
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = self._numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = self._numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = self._numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = self._numerical_gradient(loss_W, self.params['b2'])
        
        return grads

    def _numerical_gradient(self, f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x)

        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            tmp_val = x[idx]
            x[idx] = tmp_val + h
            fxh1 = f(x) # f(x+h)

            x[idx] = tmp_val - h 
            fxh2 = f(x) # f(x-h)
            grad[idx] = (fxh1 - fxh2) / (2*h)

            x[idx] = tmp_val # 値を元に戻す
            it.iternext()
        return grad
    
        
    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 設定
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        
        return grads


class Affine:
    def __init__(self, W, b):
        self.W =W
        self.b = b
        self.x = None
        self.original_x_shape = None
        self.dW = None
        self.db = None

    def forward(self, x):
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.W) + self.b
        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T) # W.Tはnumpyによる転置行列
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        # 変数前のアスタリスクは、入力値の分割
        dx = dx.reshape(*self.original_x_shape)
        return dx


class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None # 損失
        self.y    = None # softmax の出力
        self.t    = None # 教師データ（one-hot vector）
    def forward(self, x, t):
        self.t = t
        self.y = self.softmax(x)
        self.loss = self.cross_entropy_error(self.y, self.t)
        return self.loss
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size
        return dx

    def softmax(self,x):
        x = x - np.max(x, axis=-1, keepdims=True)   # オーバーフロー対策
        return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True)

    def cross_entropy_error(self, y, t):
        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)
            
        # 教師データがone-hot-vectorの場合、正解ラベルのindexへ
        if t.size == y.size:
            t = t.argmax(axis=1)
             
        batch_size = y.shape[0]
        return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self, x):
        # 0以下の要素をTrue、それ以外をFalse化
        self.mask = (x <= 0)
        # print( self.mask )

        # Trueの要素位置にものを0化、それ以外はそのまま
        out = x.copy()
        out[self.mask] = 0
        # print( out )
        
        return out
    
    def backward(self, dout):
        # print( self.mask )
        
        dout[self.mask] = 0
        # print( dout )
        dx = dout
        return dx

class MyMnist:
    def __init__(self):
        pass

    def load_mnist(self):
        data_files = self.download_mnist()
        # convert numpy
        dataset = {}
        dataset['train_img']   = self.load_img(  data_files['train_img'] )
        dataset['train_label'] = self.load_label(data_files['train_label'])
        dataset['test_img']    = self.load_img(  data_files['test_img']  )
        dataset['test_label']  = self.load_label(data_files['test_label'])

        for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].astype(np.float32)
            dataset[key] /= 255.0

        for key in ('train_label','test_label'):
            dataset[key]=self.change_one_hot_label( dataset[key] )

        return (dataset['train_img'],
                dataset['train_label'],
                dataset['test_img'],
                dataset['test_label'] )

    def change_one_hot_label(self,X):
        T = np.zeros((X.size, 10))
        for idx, row in enumerate(T):
            row[X[idx]] = 1
        return T
    
    def download_mnist(self):
        url_base = 'http://yann.lecun.com/exdb/mnist/'
        key_file = {'train_img'  :'train-images-idx3-ubyte.gz',
                    'train_label':'train-labels-idx1-ubyte.gz',
                    'test_img'   :'t10k-images-idx3-ubyte.gz',
                    'test_label' :'t10k-labels-idx1-ubyte.gz' }
        data_files = {}
        dataset_dir = os.path.dirname(os.path.abspath(__file__))
        
        for data_name, file_name in key_file.items():
            req_url   = url_base+file_name
            file_path = dataset_dir + "/" + file_name

            request  = urllib.request.Request( req_url )
            response = urllib.request.urlopen(request).read()
            with open(file_path, mode='wb') as f:
                f.write(response)
                
            data_files[data_name] = file_path
        return data_files

    def load_img( self,file_path):
        img_size    = 784 # = 28*28
        
        with gzip.open(file_path, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=16)
        data = data.reshape(-1, img_size)
        return data
    
    def load_label(self,file_path):
        with gzip.open(file_path, 'rb') as f:
            labels = np.frombuffer(f.read(), np.uint8, offset=8)
        return labels

class MyPlot:
    def __init__(self):
        pass
    def disp_graph(self,train_acc_list,test_acc_list):
        markers = {'train': 'o', 'test': 's'}
        x = np.arange(len(train_acc_list))
        plt.plot(x, train_acc_list, label='train acc')
        plt.plot(x, test_acc_list, label='test acc', linestyle='--')
        plt.xlabel("epochs")
        plt.ylabel("accuracy")
        plt.ylim(0, 1.0)
        plt.legend(loc='lower right')
        plt.show()
        
    
if __name__ == '__main__':
    main()

上記を実行すると、以下のように表示されます。

(dl_scratch) C:\Users\end0t\tmp\deep-learning-from-scratch\ch05>python foo5.py
0.14275 0.1407
0.9061833333333333 0.9092
0.9242833333333333 0.9257
0.9384166666666667 0.9379
0.9472333333333334 0.9462
0.9537 0.9511
0.9568166666666666 0.9557
0.9632333333333334 0.9602
0.96755 0.9632
0.9692 0.965
0.97145 0.9673
0.9744 0.9684
0.9755333333333334 0.9698
0.9776 0.9712
0.9781 0.9707
0.9797666666666667 0.9707
0.9784 0.9721