先日のエントリの続きであり、o'reilly「ゼロから作る Deep Learning」5章の写経.
誤差 逆伝播法 による 2層ニューラルネットワーク モデル
pythonコードだけでは分かりづらいので、図示してみました
層ニューラルネットワークに対する誤差 逆伝播法 - python
#!/usr/local/python3/bin/python3 # coding: utf-8 try: import urllib.request except ImportError: raise ImportError('You should use Python 3.x') import os.path import gzip import pickle import sys, os sys.path.append(os.pardir) # 親dirのfileをimportする為 import numpy as np from collections import OrderedDict import matplotlib.pyplot as plt ## https://github.com/oreilly-japan/deep-learning-from-scratch/tree/master/ch05 MNIST_DATASET_DIR = os.path.dirname(os.path.abspath(__file__)) MNIST_SAVE_FILE = MNIST_DATASET_DIR + "/mnist.pkl" MNIST_URL_BASE = 'http://yann.lecun.com/exdb/mnist/' MNIST_GZ_FILES = { 'train_img':'train-images-idx3-ubyte.gz', 'train_label':'train-labels-idx1-ubyte.gz', 'test_img':'t10k-images-idx3-ubyte.gz', 'test_label':'t10k-labels-idx1-ubyte.gz' } MNIST_IMG_SIZE = 784 ###################################################### def main(): # mnist読込み. 初回は yann.lecun.com よりdownload # x_train:教師 data(画像), t_train:教師 data(label) # x_test :test data(画像), t_test :test data(label) (x_train, t_train), (x_test, t_test) = \ load_mnist(normalize=True, one_hot_label=True) # 784=28*28 , 10=0~9 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 # 繰返し回数 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 勾配算出 grad = network.gradient(x_batch, t_batch) # 更新 for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("W1:", network.params['W1']) print("b1:", network.params['b1']) print("W2:", network.params['W2']) print("b2:", network.params['b2']) print("ACCURACY OF TRAIN/TEST:", train_acc, "/", test_acc) # グラフの描画 markers = {'train': 'o', 'test': 's'} x = np.arange(len(train_acc_list)) plt.plot(x, train_acc_list, label='train acc') plt.plot(x, test_acc_list, label='test acc', linestyle='--') plt.xlabel("epochs") plt.ylabel("accuracy") plt.ylim(0, 1.0) plt.legend(loc='lower right') plt.savefig( 'train_neuralnet.png' ) ###################################################### class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01): # 重みの初期化 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # レイヤの生成 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x # x:入力データ, t:教師データ def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1 : t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 設定 grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db return grads ###################################################### class SoftmaxWithLoss: def __init__(self): self.loss = None self.y = None # softmaxの出力 self.t = None # 教師データ def forward(self, x, t): self.t = t self.y = softmax(x) self.loss = cross_entropy_error(self.y, self.t) return self.loss def backward(self, dout=1): batch_size = self.t.shape[0] if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合 dx = (self.y - self.t) / batch_size else: dx = self.y.copy() dx[np.arange(batch_size), self.t] -= 1 dx = dx / batch_size return dx ###################################################### class Affine: def __init__(self, W, b): self.W =W self.b = b self.x = None self.original_x_shape = None # 重み・バイアスパラメータの微分 self.dW = None self.db = None def forward(self, x): # テンソル対応 self.original_x_shape = x.shape x = x.reshape(x.shape[0], -1) self.x = x out = np.dot(self.x, self.W) + self.b return out def backward(self, dout): dx = np.dot(dout, self.W.T) self.dW = np.dot(self.x.T, dout) self.db = np.sum(dout, axis=0) dx = dx.reshape(*self.original_x_shape) # 入力データの形状に戻す(テンソル対応) return dx ###################################################### class Relu: def __init__(self): self.mask = None def forward(self, x): self.mask = (x <= 0) out = x.copy() out[self.mask] = 0 return out def backward(self, dout): dout[self.mask] = 0 dx = dout return dx ###################################################### def softmax(x): if x.ndim == 2: x = x.T x = x - np.max(x, axis=0) y = np.exp(x) / np.sum(np.exp(x), axis=0) return y.T x = x - np.max(x) # オーバーフロー対策 return np.exp(x) / np.sum(np.exp(x)) def cross_entropy_error(y, t): if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size ###################################################### def load_mnist(normalize=True, flatten=True, one_hot_label=False): """MNISTの読み込み params normalize : 画像のピクセル値を0.0~1.0に正規化 one_hot_label : one_hot_labelがTrueの場合、ラベルはone-hot配列として返す flatten : 画像を一次元配列に平にするかどうか returns (訓練画像, 訓練ラベル), (テスト画像, テストラベル) """ if not os.path.exists(MNIST_SAVE_FILE): init_mnist() with open(MNIST_SAVE_FILE, 'rb') as f: dataset = pickle.load(f) if normalize: for key in ('train_img', 'test_img'): dataset[key] = dataset[key].astype(np.float32) dataset[key] /= 255.0 if one_hot_label: dataset['train_label'] = _change_one_hot_label(dataset['train_label']) dataset['test_label'] = _change_one_hot_label(dataset['test_label']) if not flatten: for key in ('train_img', 'test_img'): dataset[key] = dataset[key].reshape(-1, 1, 28, 28) return (dataset['train_img'], dataset['train_label']), \ (dataset['test_img'], dataset['test_label']) def init_mnist(): download_mnist() dataset = {} dataset['train_img'] = load_mnist_img( MNIST_GZ_FILES['train_img']) dataset['test_img'] = load_mnist_img( MNIST_GZ_FILES['test_img']) dataset['train_label'] = load_mnist_label(MNIST_GZ_FILES['train_label']) dataset['test_label'] = load_mnist_label(MNIST_GZ_FILES['test_label']) with open(MNIST_SAVE_FILE, 'wb') as f: pickle.dump(dataset, f, -1) def download_mnist(): for file_name in MNIST_GZ_FILES.values(): file_path = MNIST_DATASET_DIR + "/" + file_name if os.path.exists(file_path): continue print("download", MNIST_URL_BASE + file_name, "to", MNIST_DATASET_DIR) urllib.request.urlretrieve(MNIST_URL_BASE + file_name, file_path) def load_mnist_label(file_name): file_path = MNIST_DATASET_DIR + "/" + file_name # rb = バイナリの読込み with gzip.open(file_path, 'rb') as f: labels = np.frombuffer(f.read(), np.uint8, offset=8) # 上記の「offset」の必要性は理解していません return labels def load_mnist_img(file_name): file_path = MNIST_DATASET_DIR + "/" + file_name # rb = バイナリの読込み with gzip.open(file_path, 'rb') as f: data = np.frombuffer(f.read(), np.uint8, offset=16) # 上記の「offset」の必要性は理解していません # numpy.reshape(-1, ...)で、一次元配列化 data = data.reshape(-1, MNIST_IMG_SIZE) return data def _change_one_hot_label(X): T = np.zeros((X.size, 10)) for idx, row in enumerate(T): row[X[idx]] = 1 return T if __name__ == '__main__': main()
↑こう書くと↓こう表示されます