またも、 o'reilly「ゼロから作る Deep Learning」4章にある勾配降下法の写経.
今回は、2層ニューラルネットワークに対して実行します。
mnist, ミニバッチ, 数値微分, 勾配降下, 損失関数 等、 ディープラーニングに関する基本が揃っています。
まだまだ、全くゼロから、自分でsrc書けませんが
#!/usr/local/python3/bin/python3 # coding: utf-8 try: import urllib.request except ImportError: raise ImportError('You should use Python 3.x') import os.path import gzip import pickle import sys, os sys.path.append(os.pardir) # 親dirのfileをimportする為 import numpy as np import matplotlib.pyplot as plt #from dataset.mnist import load_mnist #from two_layer_net import TwoLayerNet ## https://github.com/oreilly-japan/deep-learning-from-scratch/tree/master/ch04 MNIST_DATASET_DIR = os.path.dirname(os.path.abspath(__file__)) MNIST_SAVE_FILE = MNIST_DATASET_DIR + "/mnist.pkl" MNIST_URL_BASE = 'http://yann.lecun.com/exdb/mnist/' MNIST_GZ_FILES = { 'train_img':'train-images-idx3-ubyte.gz', 'train_label':'train-labels-idx1-ubyte.gz', 'test_img':'t10k-images-idx3-ubyte.gz', 'test_label':'t10k-labels-idx1-ubyte.gz' } MNIST_IMG_SIZE = 784 def main(): # mnist読込み. 初回は yann.lecun.com よりdownload # x_train:教師 data(画像), t_train:教師 data(label) # x_test :test data(画像), t_test :test data(label) (x_train, t_train), (x_test, t_test) = \ load_mnist(normalize=True, one_hot_label=True) # 784=28*28 , 10=0~9 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 # 繰返し回数 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 勾配の計算 #grad = network.numerical_gradient(x_batch, t_batch) grad = network.gradient(x_batch, t_batch) # パラメータの更新 for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc)) # グラフの描画 markers = {'train': 'o', 'test': 's'} x = np.arange(len(train_acc_list)) plt.plot(x, train_acc_list, label='train acc') plt.plot(x, test_acc_list, label='test acc', linestyle='--') plt.xlabel("epochs") plt.ylabel("accuracy") plt.ylim(0, 1.0) plt.legend(loc='lower right') plt.savefig( 'train_neuralnet.png' ) def load_mnist(normalize=True, flatten=True, one_hot_label=False): """MNISTの読み込み params normalize : 画像のピクセル値を0.0~1.0に正規化 one_hot_label : one_hot_labelがTrueの場合、ラベルはone-hot配列として返す flatten : 画像を一次元配列に平にするかどうか returns (訓練画像, 訓練ラベル), (テスト画像, テストラベル) """ if not os.path.exists(MNIST_SAVE_FILE): init_mnist() with open(MNIST_SAVE_FILE, 'rb') as f: dataset = pickle.load(f) if normalize: for key in ('train_img', 'test_img'): dataset[key] = dataset[key].astype(np.float32) dataset[key] /= 255.0 if one_hot_label: dataset['train_label'] = _change_one_hot_label(dataset['train_label']) dataset['test_label'] = _change_one_hot_label(dataset['test_label']) if not flatten: for key in ('train_img', 'test_img'): dataset[key] = dataset[key].reshape(-1, 1, 28, 28) return (dataset['train_img'], dataset['train_label']), \ (dataset['test_img'], dataset['test_label']) def init_mnist(): download_mnist() dataset = {} dataset['train_img'] = load_mnist_img( MNIST_GZ_FILES['train_img']) dataset['test_img'] = load_mnist_img( MNIST_GZ_FILES['test_img']) dataset['train_label'] = load_mnist_label(MNIST_GZ_FILES['train_label']) dataset['test_label'] = load_mnist_label(MNIST_GZ_FILES['test_label']) with open(MNIST_SAVE_FILE, 'wb') as f: pickle.dump(dataset, f, -1) def download_mnist(): for file_name in MNIST_GZ_FILES.values(): file_path = MNIST_DATASET_DIR + "/" + file_name if os.path.exists(file_path): continue print("download", MNIST_URL_BASE + file_name, "to", MNIST_DATASET_DIR) urllib.request.urlretrieve(MNIST_URL_BASE + file_name, file_path) def load_mnist_label(file_name): file_path = MNIST_DATASET_DIR + "/" + file_name # rb = バイナリの読込み with gzip.open(file_path, 'rb') as f: labels = np.frombuffer(f.read(), np.uint8, offset=8) # 上記の「offset」の必要性は理解していません return labels def load_mnist_img(file_name): file_path = MNIST_DATASET_DIR + "/" + file_name # rb = バイナリの読込み with gzip.open(file_path, 'rb') as f: data = np.frombuffer(f.read(), np.uint8, offset=16) # 上記の「offset」の必要性は理解していません # numpy.reshape(-1, ...)で、一次元配列化 data = data.reshape(-1, MNIST_IMG_SIZE) return data def _change_one_hot_label(X): T = np.zeros((X.size, 10)) for idx, row in enumerate(T): row[X[idx]] = 1 return T class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # 重みの初期化 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) def predict(self, x): W1, W2 = self.params['W1'], self.params['W2'] b1, b2 = self.params['b1'], self.params['b2'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 y = softmax(a2) return y # x:入力データ, t:教師データ def loss(self, x, t): y = self.predict(x) return cross_entropy_error(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy # x:入力データ, t:教師データ def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads def gradient(self, x, t): W1, W2 = self.params['W1'], self.params['W2'] b1, b2 = self.params['b1'], self.params['b2'] grads = {} batch_num = x.shape[0] # forward a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 y = softmax(a2) # backward dy = (y - t) / batch_num grads['W2'] = np.dot(z1.T, dy) grads['b2'] = np.sum(dy, axis=0) da1 = np.dot(dy, W2.T) dz1 = sigmoid_grad(a1) * da1 grads['W1'] = np.dot(x.T, dz1) grads['b1'] = np.sum(dz1, axis=0) return grads def numerical_gradient(f, x): h = 1e-4 # 0.0001 grad = np.zeros_like(x) it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) while not it.finished: idx = it.multi_index tmp_val = x[idx] x[idx] = float(tmp_val) + h fxh1 = f(x) # f(x+h) x[idx] = tmp_val - h fxh2 = f(x) # f(x-h) grad[idx] = (fxh1 - fxh2) / (2*h) x[idx] = tmp_val # 値を元に戻す it.iternext() return grad def sigmoid(x): return 1 / (1 + np.exp(-x)) def sigmoid_grad(x): return (1.0 - sigmoid(x)) * sigmoid(x) def relu(x): return np.maximum(0, x) def relu_grad(x): grad = np.zeros(x) grad[x>=0] = 1 return grad def softmax(x): if x.ndim == 2: x = x.T x = x - np.max(x, axis=0) y = np.exp(x) / np.sum(np.exp(x), axis=0) return y.T x = x - np.max(x) # オーバーフロー対策 return np.exp(x) / np.sum(np.exp(x)) def cross_entropy_error(y, t): if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size def softmax_loss(X, t): y = softmax(X) return cross_entropy_error(y, t) if __name__ == '__main__': main()
↑こう書くと↓こう表示されます