GitHub - oreilly-japan/deep-learning-from-scratch: 『ゼロから作る Deep Learning』(O'Reilly Japan, 2016)
「ゼロから作るDeep Learning ① (Pythonで学ぶディープラーニングの理論と実装)」 p.156~163 の写経です。
以前、以下のentryでは、勾配計算に数値微分を使用していますが、 今回は、誤差逆伝播法を使用しています。
numpy for python によるニューラルネットワーク学習 - end0tknr's kipple - web写経開発
# coding: utf-8 import gzip import matplotlib.pyplot as plt import numpy as np import os import sys import urllib.request from collections import OrderedDict def main(): # MNISTデータのdownload mymnist = MyMnist() (x_train, t_train, x_test, t_test) = mymnist.load_mnist() network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 # 繰り返し回数 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 勾配 #grad = network.numerical_gradient(x_batch, t_batch) grad = network.gradient(x_batch, t_batch) # 更新 for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print(train_acc, test_acc) my_plot = MyPlot() my_plot.disp_graph(train_acc_list,test_acc_list) class TwoLayerNet: def __init__(self, input_size, # 入力層のneuron数 hidden_size, # 隠れ層の〃 output_size, # 出力層の〃 weight_init_std = 0.01): # 重みの初期化 self.params = {} self.params['W1'] = \ weight_init_std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = \ weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # Affine層と ReLU層の作成 self.layers = OrderedDict() # 順序付き辞書 self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) # 出力層としての Softmax-with-Loss 層の作成 self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x # x:入力データ, t:教師データ def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1 : t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy # x:入力データ, t:教師データ def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = self._numerical_gradient(loss_W, self.params['W1']) grads['b1'] = self._numerical_gradient(loss_W, self.params['b1']) grads['W2'] = self._numerical_gradient(loss_W, self.params['W2']) grads['b2'] = self._numerical_gradient(loss_W, self.params['b2']) return grads def _numerical_gradient(self, f, x): h = 1e-4 # 0.0001 grad = np.zeros_like(x) it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) while not it.finished: idx = it.multi_index tmp_val = x[idx] x[idx] = tmp_val + h fxh1 = f(x) # f(x+h) x[idx] = tmp_val - h fxh2 = f(x) # f(x-h) grad[idx] = (fxh1 - fxh2) / (2*h) x[idx] = tmp_val # 値を元に戻す it.iternext() return grad def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1'] = self.layers['Affine1'].dW grads['b1'] = self.layers['Affine1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db return grads class Affine: def __init__(self, W, b): self.W =W self.b = b self.x = None self.original_x_shape = None self.dW = None self.db = None def forward(self, x): self.original_x_shape = x.shape x = x.reshape(x.shape[0], -1) self.x = x out = np.dot(self.x, self.W) + self.b return out def backward(self, dout): dx = np.dot(dout, self.W.T) # W.Tはnumpyによる転置行列 self.dW = np.dot(self.x.T, dout) self.db = np.sum(dout, axis=0) # 変数前のアスタリスクは、入力値の分割 dx = dx.reshape(*self.original_x_shape) return dx class SoftmaxWithLoss: def __init__(self): self.loss = None # 損失 self.y = None # softmax の出力 self.t = None # 教師データ(one-hot vector) def forward(self, x, t): self.t = t self.y = self.softmax(x) self.loss = self.cross_entropy_error(self.y, self.t) return self.loss def backward(self, dout=1): batch_size = self.t.shape[0] dx = (self.y - self.t) / batch_size return dx def softmax(self,x): x = x - np.max(x, axis=-1, keepdims=True) # オーバーフロー対策 return np.exp(x) / np.sum(np.exp(x), axis=-1, keepdims=True) def cross_entropy_error(self, y, t): if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師データがone-hot-vectorの場合、正解ラベルのindexへ if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size class Relu: def __init__(self): self.mask = None def forward(self, x): # 0以下の要素をTrue、それ以外をFalse化 self.mask = (x <= 0) # print( self.mask ) # Trueの要素位置にものを0化、それ以外はそのまま out = x.copy() out[self.mask] = 0 # print( out ) return out def backward(self, dout): # print( self.mask ) dout[self.mask] = 0 # print( dout ) dx = dout return dx class MyMnist: def __init__(self): pass def load_mnist(self): data_files = self.download_mnist() # convert numpy dataset = {} dataset['train_img'] = self.load_img( data_files['train_img'] ) dataset['train_label'] = self.load_label(data_files['train_label']) dataset['test_img'] = self.load_img( data_files['test_img'] ) dataset['test_label'] = self.load_label(data_files['test_label']) for key in ('train_img', 'test_img'): dataset[key] = dataset[key].astype(np.float32) dataset[key] /= 255.0 for key in ('train_label','test_label'): dataset[key]=self.change_one_hot_label( dataset[key] ) return (dataset['train_img'], dataset['train_label'], dataset['test_img'], dataset['test_label'] ) def change_one_hot_label(self,X): T = np.zeros((X.size, 10)) for idx, row in enumerate(T): row[X[idx]] = 1 return T def download_mnist(self): url_base = 'http://yann.lecun.com/exdb/mnist/' key_file = {'train_img' :'train-images-idx3-ubyte.gz', 'train_label':'train-labels-idx1-ubyte.gz', 'test_img' :'t10k-images-idx3-ubyte.gz', 'test_label' :'t10k-labels-idx1-ubyte.gz' } data_files = {} dataset_dir = os.path.dirname(os.path.abspath(__file__)) for data_name, file_name in key_file.items(): req_url = url_base+file_name file_path = dataset_dir + "/" + file_name request = urllib.request.Request( req_url ) response = urllib.request.urlopen(request).read() with open(file_path, mode='wb') as f: f.write(response) data_files[data_name] = file_path return data_files def load_img( self,file_path): img_size = 784 # = 28*28 with gzip.open(file_path, 'rb') as f: data = np.frombuffer(f.read(), np.uint8, offset=16) data = data.reshape(-1, img_size) return data def load_label(self,file_path): with gzip.open(file_path, 'rb') as f: labels = np.frombuffer(f.read(), np.uint8, offset=8) return labels class MyPlot: def __init__(self): pass def disp_graph(self,train_acc_list,test_acc_list): markers = {'train': 'o', 'test': 's'} x = np.arange(len(train_acc_list)) plt.plot(x, train_acc_list, label='train acc') plt.plot(x, test_acc_list, label='test acc', linestyle='--') plt.xlabel("epochs") plt.ylabel("accuracy") plt.ylim(0, 1.0) plt.legend(loc='lower right') plt.show() if __name__ == '__main__': main()
上記を実行すると、以下のように表示されます。
(dl_scratch) C:\Users\end0t\tmp\deep-learning-from-scratch\ch05>python foo5.py 0.14275 0.1407 0.9061833333333333 0.9092 0.9242833333333333 0.9257 0.9384166666666667 0.9379 0.9472333333333334 0.9462 0.9537 0.9511 0.9568166666666666 0.9557 0.9632333333333334 0.9602 0.96755 0.9632 0.9692 0.965 0.97145 0.9673 0.9744 0.9684 0.9755333333333334 0.9698 0.9776 0.9712 0.9781 0.9707 0.9797666666666667 0.9707 0.9784 0.9721