%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.python.client import device_lib
[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 incarnation: 9527881224689490097
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 14353956864
 locality {
   bus_id: 1
   links {
 incarnation: 4360946406964860349
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4060 Ti, pci bus id: 0000:01:00.0, compute capability: 8.9"
 xla_global_id: 416903419]
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]



mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# display(type(train_images), type(train_labels), type(test_images), type(test_labels))
(60000, 28, 28)
(10000, 28, 28)
train_images = (train_images / 255.0).astype(np.float32)
test_images = (test_images / 255.0).astype(np.float32)
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
Model: "sequential"
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
 dense (Dense)               (None, 16)                12560     
 dense_1 (Dense)             (None, 16)                272       
 dense_2 (Dense)             (None, 10)                170       
Total params: 13002 (50.79 KB)
Trainable params: 13002 (50.79 KB)
Non-trainable params: 0 (0.00 Byte)


model.fit(train_images, train_labels, epochs=5)
Epoch 1/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.9580 - accuracy: 0.7203
Epoch 2/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.3690 - accuracy: 0.8946
Epoch 3/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.3095 - accuracy: 0.9115
Epoch 4/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.2762 - accuracy: 0.9208
Epoch 5/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.2527 - accuracy: 0.9270
<keras.src.callbacks.History at 0x7fe5d81b56f0>



test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)
313/313 - 0s - loss: 0.2371 - accuracy: 0.9308 - 468ms/epoch - 1ms/step

Test accuracy: 0.9308000206947327


def plot_image(ax, i, predictions_array, true_label, img):

        ax (matplotlib.axes.Axes): 表示するAxes
        i (int): 表示する画像のインデックス
        predictions_array (np.ndarray): 予測結果
        true_label (np.ndarray): 正解ラベル
        img (np.ndarray): 画像
    true_label, img = true_label[i], img[i]
    ax.imshow(img, cmap=plt.cm.binary)

    predicted_label = np.argmax(predictions_array)

    if predicted_label == true_label:
        color = 'blue'
        color = 'red'

    ax.set_xlabel(f"{predicted_label} {100*np.max(predictions_array):2.0f}% ({true_label})", color=color)

def plot_value_array(ax, i, predictions_array, true_label):
    true_label = true_label[i]
    thisplot = ax.bar(range(10), predictions_array, color="#777777")
    ax.set_ylim([0, 1])
    predicted_label = np.argmax(predictions_array)

predictions = model.predict(test_images)
num_rows = 5
num_cols = 3
num_images = num_rows * num_cols
random_indices = np.random.choice(len(test_images), num_images, replace=False)

fig, axes = plt.subplots(num_rows, 2*num_cols, figsize=(2*2*num_cols, 2*num_rows))

for i, idx in enumerate(random_indices):
    # idx = i
    plot_image(axes[i//num_cols, 2*(i%num_cols)], idx, predictions[idx], test_labels, test_images)
    plot_value_array(axes[i//num_cols, 2*(i%num_cols) + 1], idx, predictions[idx], test_labels)

313/313 [==============================] - 0s 738us/step
No description has been provided for this image


class SimpleNeuralNetworkNumpy:
    def __init__(self):
        self.parameters = self.initialize_parameters()

    def initialize_parameters(self):

            Dictionary[str, np.ndarray]: 各層の重みとバイアス
        # 16行784列の行列。
        W1 = np.random.normal(size=(16, 784), scale=0.1).astype(np.float32)
        # 長さ16のベクトル。
        b1 = np.zeros((16,), dtype=np.float32)
        # 16行16列の行列。
        W2 = np.random.normal(size=(16, 16), scale=0.1).astype(np.float32)
        # 長さ16のベクトル。
        b2 = np.zeros((16,), dtype=np.float32)
        # 10行16列の行列。
        W3 = np.random.normal(size=(10, 16), scale=0.1).astype(np.float32)
        # 長さ10のベクトル。
        b3 = np.zeros((10,), dtype=np.float32)
        return {"W1": W1, "b1": b1, "W2": W2, "b2": b2, "W3": W3, "b3": b3}
    def forward_pass(self, x):

            x (np.ndarray): 入力データ (784)
            Tuple[np.ndarray, np.ndarray, np.ndarray]: 各層の出力
        z1 = np.matmul(self.parameters["W1"], x) + self.parameters["b1"]
        a1 = np.maximum(z1, 0) # ReLU
        z2 = np.matmul(self.parameters["W2"], a1) + self.parameters["b2"]
        a2 = np.maximum(z2, 0) # ReLU
        z3 = np.matmul(self.parameters["W3"], a2) + self.parameters["b3"]
        logits = z3 - np.max(z3)
        e_x = np.exp(logits)
        a3 = e_x / np.sum(e_x) # softmax
        return a1, a2, a3

    def compute_loss(self, a3, y):

            a3 (np.ndarray): 出力層の出力 (10)
            y (np.ndarray): 正解ラベル (10)

            float: 交差エントロピー誤差
        return -np.sum(y * np.log(a3))
    def backward_pass(self, x, y, a1, a2, a3):

            x (np.ndarray): 入力データ (784)
            y (np.ndarray): 正解ラベル (10)
            a1 (np.ndarray): 中間層1の出力 (16)
            a2 (np.ndarray): 中間層2の出力 (16)
            a3 (np.ndarray): 出力層の出力 (10)
            Dictionary[str, np.ndarray]: 各層の勾配
        # 出力層の勾配。
        dZ3 = a3 - y
        dW3 = np.outer(dZ3, a2) # テンソル積 (10, 16)
        db3 = dZ3
        # 中間層2の勾配。
        dA2 = np.matmul(self.parameters["W3"].T, dZ3)
        dZ2 = dA2 * np.greater(a2, 0).astype(np.float32)
        dW2 = np.outer(dZ2, a1) # テンソル積 (16, 16)
        db2 = dZ2
        # 中間層1の勾配。
        dA1 = np.matmul(self.parameters["W2"].T, dZ2)
        dZ1 = dA1 * np.greater(a1, 0).astype(np.float32)
        dW1 = np.outer(dZ1, x) # テンソル積 (16, 784)
        db1 = dZ1
        return {"dW3": dW3, "db3": db3, "dW2": dW2, "db2": db2, "dW1": dW1, "db1": db1}
    def update_parameters(self, gradients, learning_rate=0.001):

            gradients (Dictionary[str, np.ndarray]): 各層の勾配
            learning_rate (float, optional): 学習率
        for key in self.parameters:
            self.parameters[key] -= learning_rate * gradients[f"d{key}"]
    def compute_accuracy(self, x, correct_label):

            x (np.ndarray): 入力データ (784)
            correct_label (int): 正解ラベル
            float: 精度
        _, _, a3 = self.forward_pass(x)
        prediction = np.argmax(a3)
        return float(prediction == correct_label)


sample_input = train_images[0].flatten()
sample_label = np.eye(10)[train_labels[0]]
model = SimpleNeuralNetworkNumpy()
a1, a2, a3 = model.forward_pass(sample_input) # 順伝播
display(a1, a2, a3)
loss_before = model.compute_loss(a3, sample_label) # 損失を計算
print(f"更新前の損失 : {loss_before:.4f}")
gradients = model.backward_pass(sample_input, sample_label, a1, a2, a3) # 誤差逆伝播
model.update_parameters(gradients, learning_rate=0.1) # パラメータの更新
a1, a2, a3 = model.forward_pass(sample_input) # 順伝播
loss_after = model.compute_loss(a3, sample_label) # 損失を計算
print(f"更新後の損失 : {loss_after:.4f}")
array([0.        , 0.        , 0.        , 1.9048121 , 0.        ,
       0.        , 0.        , 0.        , 1.3222114 , 0.3456423 ,
       0.50311697, 0.7671288 , 0.        , 0.13859352, 0.        ,
       0.10609262], dtype=float32)
array([0.06325205, 0.        , 0.12483986, 0.        , 0.49925205,
       0.        , 0.        , 0.24345718, 0.02728038, 0.        ,
       0.        , 0.        , 0.306083  , 0.15014657, 0.13507931,
       0.        ], dtype=float32)
array([0.09766851, 0.08993653, 0.11433795, 0.0870287 , 0.10512269,
       0.09292487, 0.10992279, 0.10692065, 0.1011243 , 0.09501296],
更新前の損失 : 2.3760
更新後の損失 : 2.1678


EPOCHS = 1000

model = SimpleNeuralNetworkNumpy()

for epoch in range(EPOCHS):
    a1, a2, a3 = model.forward_pass(sample_input) # 順伝播
    loss = model.compute_loss(a3, sample_label) # 損失を計算
    gradients = model.backward_pass(sample_input, sample_label, a1, a2, a3) # 誤差逆伝播
    model.update_parameters(gradients, learning_rate=0.01) # パラメータの更新
    if (epoch + 1) % 100 == 0:
        print(f"epoch {epoch+1:3d}, loss {loss:.4f}")
epoch 100, loss 0.0101
epoch 200, loss 0.0034
epoch 300, loss 0.0019
epoch 400, loss 0.0013
epoch 500, loss 0.0010
epoch 600, loss 0.0008
epoch 700, loss 0.0006
epoch 800, loss 0.0005
epoch 900, loss 0.0005
epoch 1000, loss 0.0004


# 正解・不正解の画像を表示する。
num_rows = 5
num_cols = 3
num_images = num_rows * num_cols

fig, axes = plt.subplots(num_rows, 2*num_cols, figsize=(2*2*num_cols, 2*num_rows))

for i in range(num_images):
    idx = i
    x = train_images[idx].reshape(784) # 訓練データを使用
    _, _, a3 = model.forward_pass(x)
    plot_image(axes[i//num_cols, 2*(i%num_cols)], idx, a3, train_labels, train_images) # 訓練データを使用
    plot_value_array(axes[i//num_cols, 2*(i%num_cols) + 1], idx, a3, train_labels) # 訓練データを使用
No description has been provided for this image


model = SimpleNeuralNetworkNumpy()

for epoch in range(EPOCHS):
    total_loss = 0.0
    total_accuracy = 0.0
    for i, (x, correct_label) in enumerate(zip(train_images, train_labels)):
        # x = train_images[i]
        # y = train_labels[i]
        x = x.reshape(784)
        y = np.zeros((10,), dtype=np.float32)
        y[correct_label] = 1.0
        # 順伝播。
        a1, a2, a3 = model.forward_pass(x)
        # 損失を計算。
        loss = model.compute_loss(a3, y)
        # 誤差逆伝播。
        gradients = model.backward_pass(x, y, a1, a2, a3)
        # パラメータを更新。
        # 一つのデータごとに更新するので、学習率は小さめにする。
        model.update_parameters(gradients, learning_rate=0.001)
        total_loss += loss
        total_accuracy += model.compute_accuracy(x, correct_label)
    print(f"Epoch: {epoch + 1:2d}: loss: {total_loss / len(train_images):.3f}, accuracy: {total_accuracy / len(train_images):.3f}")
Epoch:  1: loss: 0.809, accuracy: 0.777
Epoch:  2: loss: 0.344, accuracy: 0.932
Epoch:  3: loss: 0.277, accuracy: 0.949
Epoch:  4: loss: 0.233, accuracy: 0.961
Epoch:  5: loss: 0.204, accuracy: 0.968
# テストデータで精度を計算。
total_accuracy = 0.0
for i, (x, y) in enumerate(zip(test_images, test_labels)):
    total_accuracy += model.compute_accuracy(x.reshape(784), y)

print(f"Test accuracy: {total_accuracy / len(test_images)}")
Test accuracy: 0.9365
# 正解・不正解の画像を表示する。
num_rows = 5
num_cols = 3
num_images = num_rows * num_cols
random_indices = np.random.choice(len(test_images), num_images, replace=False)

fig, axes = plt.subplots(num_rows, 2*num_cols, figsize=(2*2*num_cols, 2*num_rows))

for i, idx in enumerate(random_indices):
    # idx = i
    x = test_images[idx].reshape(784)
    _, _, a3 = model.forward_pass(x)
    plot_image(axes[i//num_cols, 2*(i%num_cols)], idx, a3, test_labels, test_images)
    plot_value_array(axes[i//num_cols, 2*(i%num_cols) + 1], idx, a3, test_labels)
No description has been provided for this image


class SimpleNeuralNetworkNumpyBatch:
    def __init__(self):
        self.parameters = self.initialize_parameters()

    def initialize_parameters(self):

            Dictionary[str, np.ndarray]: 各層の重みとバイアス
        # 16行784列の行列。
        W1 = np.random.normal(size=(16, 784), scale=0.1).astype(np.float32)
        # 長さ16の列ベクトル。
        b1 = np.zeros((16, 1), dtype=np.float32)
        # 16行16列の行列。
        W2 = np.random.normal(size=(16, 16), scale=0.1).astype(np.float32)
        # 長さ16の列ベクトル。
        b2 = np.zeros((16, 1), dtype=np.float32)
        # 10行16列の行列。
        W3 = np.random.normal(size=(10, 16), scale=0.1).astype(np.float32)
        # 長さ10の列ベクトル。
        b3 = np.zeros((10, 1), dtype=np.float32)
        return {"W1": W1, "b1": b1, "W2": W2, "b2": b2, "W3": W3, "b3": b3}
    def forward_pass(self, X):

            X (np.ndarray): 入力データのバッチ (batch_size, 784)
            tuple[np.ndarray, np.ndarray, np.ndarray]: 各層の出力
        Z1 = np.matmul(self.parameters["W1"], X) + self.parameters["b1"]
        A1 = np.maximum(Z1, 0) # ReLU
        Z2 = np.matmul(self.parameters["W2"], A1) + self.parameters["b2"]
        A2 = np.maximum(Z2, 0) # ReLU
        Z3 = np.matmul(self.parameters["W3"], A2) + self.parameters["b3"]
        logits = Z3 - np.max(Z3, axis=1, keepdims=True)
        e_x = np.exp(logits)
        A3 = e_x / np.sum(e_x, axis=1, keepdims=True) # softmax
        return A1, A2, A3

    def compute_loss(self, A3, Y):

            A3 (np.ndarray): 出力層の出力 (batch_size, 10)
            Y (np.ndarray): 正解ラベルのバッチ (batch_size, 10)

            float: 交差エントロピー誤差
        m = Y.shape[0]
        return -np.sum(Y * np.log(A3)) / m
    def backward_pass(self, X, Y, A1, A2, A3):

            X (np.ndarray): 入力データのバッチ (batch_size, 784)
            Y (np.ndarray): 正解ラベルのバッチ (batch_size, 10)
            A1 (np.ndarray): 中間層1の出力 (batch_size, 16)
            A2 (np.ndarray): 中間層2の出力 (batch_size, 16)
            A3 (np.ndarray): 出力層の出力 (batch_size, 10)
            Dictionary[str, np.ndarray]: 各層の勾配
        # バッチサイズ取得。
        m = X.shape[0]
        # 出力層の勾配。
        dZ3 = A3 - Y
        dW3 = dZ3 * A2.reshape(m, 1, -1)
        db3 = dZ3
        # 中間層2の勾配。
        dA2 = np.matmul(self.parameters["W3"].T, dZ3)
        dZ2 = dA2 * np.greater(A2, 0).astype(np.float32)
        dW2 = dZ2 * A1.reshape(m, 1, -1)
        db2 = dZ2
        # 中間層1の勾配。
        dA1 = np.matmul(self.parameters["W2"].T, dZ2)
        dZ1 = dA1 * np.greater(A1, 0).astype(np.float32)
        dW1 = dZ1 * X.reshape(m, 1, -1)
        db1 = dZ1
        return {"dW3": dW3, "db3": db3, "dW2": dW2, "db2": db2, "dW1": dW1, "db1": db1}
    def update_parameters(self, gradients, learning_rate=0.1):

            gradients (Dictionary[str, np.ndarray]): 各層の勾配
            learning_rate (float, optional): 学習率
        for key in self.parameters:
            grad_mean = np.mean(gradients[f"d{key}"], axis=0)
            self.parameters[key] -= learning_rate * grad_mean
    def compute_accuracy(self, X, correct_labels):

            X (np.ndarray): 入力データのバッチ (batch_size, 784)
            correct_labels (np.ndarray): 正解ラベルのバッチ (batch_size, 10)
            float: 精度
        _, _, A3 = self.forward_pass(X)
        predictions = np.argmax(A3, axis=1)
        accuracy = np.mean(predictions == correct_labels.reshape(-1, 1))
        return accuracy
model = SimpleNeuralNetworkNumpyBatch()

for epoch in range(EPOCHS):
    total_loss = 0.0
    total_accuracy = 0.0
    batches = 0

    for batch in range(len(train_images) // BATCH_SIZE):
        batch_images = train_images[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]
        X = np.reshape(batch_images, [-1, 784, 1])
        batch_labels = train_labels[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]
        # 正解ラベルをワンホットベクトルに変換。
        Y = np.eye(10)[batch_labels].astype(np.float32)[..., np.newaxis]

        A1, A2, A3 = model.forward_pass(X)
        loss = model.compute_loss(A3, Y)
        gradients = model.backward_pass(X, Y, A1, A2, A3)
        model.update_parameters(gradients, learning_rate=0.01)

        accuracy = model.compute_accuracy(X, batch_labels)

        total_loss += loss
        total_accuracy += accuracy
        batches += 1
    avg_loss = total_loss / batches
    avg_accuracy = total_accuracy / batches
    print(f"Epoch: {epoch + 1:2d}, Loss: {avg_loss:.3f}, Accuracy: {avg_accuracy:.3f}")
Epoch:  1, Loss: 1.469, Accuracy: 0.520
Epoch:  2, Loss: 0.572, Accuracy: 0.844
Epoch:  3, Loss: 0.433, Accuracy: 0.887
Epoch:  4, Loss: 0.384, Accuracy: 0.902
Epoch:  5, Loss: 0.352, Accuracy: 0.910
Epoch:  6, Loss: 0.329, Accuracy: 0.917
Epoch:  7, Loss: 0.310, Accuracy: 0.921
Epoch:  8, Loss: 0.294, Accuracy: 0.926
Epoch:  9, Loss: 0.280, Accuracy: 0.930
Epoch: 10, Loss: 0.267, Accuracy: 0.933
# 得られたモデルでテストデータの精度を計算。
reshaped_test_images = np.reshape(test_images, [-1, 784, 1])
test_accuracy = model.compute_accuracy(reshaped_test_images, test_labels)
print(f"Test accuracy: {test_accuracy}")
Test accuracy: 0.9201
# 予測した結果をプロット。
predictions = model.forward_pass(reshaped_test_images)[-1][..., 0]
num_rows = 5
num_cols = 3
num_images = num_rows * num_cols
random_indices = np.random.choice(len(test_images), num_images, replace=False)

fig, axes = plt.subplots(num_rows, 2*num_cols, figsize=(2*2*num_cols, 2*num_rows))

for i, idx in enumerate(random_indices):
    # idx = i
    plot_image(axes[i//num_cols, 2*(i%num_cols)], idx, predictions[idx], test_labels, test_images)
    plot_value_array(axes[i//num_cols, 2*(i%num_cols) + 1], idx, predictions[idx], test_labels)

No description has been provided for this image



class SimpleNeuralNetworkNumpyAdam(SimpleNeuralNetworkNumpyBatch):
    def __init__(self):
        super().__init__() # ベースクラスの初期化。

    def initialize_adam_parameters(self):
        self.m = {key: np.zeros_like(val) for key, val in self.parameters.items()}
        self.v = {key: np.zeros_like(val) for key, val in self.parameters.items()}
        self.t = 0 # タイムステップ
    def update_parameters_with_adam(self, gradients, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):

            gradients (Dictionary[str, np.ndarray]): 各層の勾配
            learning_rate (float, optional): 学習率
            beta1 (float, optional): 一階モーメントの減衰率
            beta2 (float, optional): 二階モーメントの減衰率
            epsilon (float, optional): 数値安定化のための微小値
        self.t += 1
        for key in self.parameters.keys():
            # バッチ次元に沿って勾配の平均を計算
            grad_mean = np.mean(gradients[f"d{key}"], axis=0)

            # モーメントの更新
            self.m[key] = beta1 * self.m[key] + (1 - beta1) * grad_mean
            self.v[key] = beta2 * self.v[key] + (1 - beta2) * np.square(grad_mean)

            # モーメントのバイアス補正
            m_corrected = self.m[key] / (1 - beta1 ** self.t)
            v_corrected = self.v[key] / (1 - beta2 ** self.t)

            # パラメータの更新
            self.parameters[key] -= learning_rate * m_corrected / (np.sqrt(v_corrected) + epsilon)
model = SimpleNeuralNetworkNumpyAdam()

for epoch in range(EPOCHS):
    total_loss = 0.0
    total_accuracy = 0.0
    batches = 0

    for batch in range(len(train_images) // BATCH_SIZE):
        batch_images = train_images[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]
        X = np.reshape(batch_images, [-1, 784, 1])
        batch_labels = train_labels[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]
        # 正解ラベルをワンホットベクトルに変換。
        Y = np.eye(10)[batch_labels].astype(np.float32)[..., np.newaxis]

        A1, A2, A3 = model.forward_pass(X)
        loss = model.compute_loss(A3, Y)
        gradients = model.backward_pass(X, Y, A1, A2, A3)

        accuracy = model.compute_accuracy(X, batch_labels)

        total_loss += loss
        total_accuracy += accuracy
        batches += 1
    avg_loss = total_loss / batches
    avg_accuracy = total_accuracy / batches
    print(f"Epoch: {epoch + 1:2d}, Loss: {avg_loss:.3f}, Accuracy: {avg_accuracy:.3f}")
Epoch:  1, Loss: 0.520, Accuracy: 0.848
Epoch:  2, Loss: 0.263, Accuracy: 0.927
Epoch:  3, Loss: 0.221, Accuracy: 0.939
Epoch:  4, Loss: 0.196, Accuracy: 0.945
Epoch:  5, Loss: 0.178, Accuracy: 0.951
Epoch:  6, Loss: 0.163, Accuracy: 0.956
Epoch:  7, Loss: 0.152, Accuracy: 0.958
Epoch:  8, Loss: 0.143, Accuracy: 0.961
Epoch:  9, Loss: 0.136, Accuracy: 0.963
Epoch: 10, Loss: 0.131, Accuracy: 0.965
# 得られたモデルでテストデータの精度を計算。
reshaped_test_images = np.reshape(test_images, [-1, 784, 1])
test_accuracy = model.compute_accuracy(reshaped_test_images, test_labels)
print(f"Test accuracy: {test_accuracy}")
Test accuracy: 0.9444
