0


深度学习实验:Softmax实现手写数字识别

文章相关知识点:​​​​​​​AI遮天传 DL-回归与分类_老师我作业忘带了的博客-CSDN博客

MNIST数据集

MNIST手写数字数据集是机器学习领域中广泛使用的图像分类数据集。它包含60,000个训练样本和10,000个测试样本。这些数字已进行尺寸规格化,并在固定尺寸的图像中居中。每个样本都是一个784×1的矩阵,是从原始的28×28灰度图像转换而来的。MNIST中的数字范围是0到9。下面显示了一些示例。 注意:在训练期间,切勿以任何形式使用有关测试样本的信息。

d82bb6e620379b344591a509afe51d7b.png

代码清单

  • data/ 文件夹:存放MNIST数据集。下载数据,解压后存放于该文件夹下。下载链接:MNIST handwritten digit database, Yann LeCun, Corinna Cortes and Chris Burges
  • solver.py 这个文件中实现了训练和测试的流程。;
  • dataloader.py 实现了数据加载器,可用于准备数据以进行训练和测试;
  • visualize.py 实现了plot_loss_and_acc函数,该函数可用于绘制损失和准确率曲线;
  • optimizer.py 实现带momentum的SGD优化器,可用于执行参数更新;
  • loss.py 实现softmax_cross_entropy_loss,包含loss的计算和梯度计算;
  • runner.ipynb 完成所有代码后的执行文件,执行训练和测试过程。

要求

  1. 记录训练和测试的准确率。画出训练损失和准确率曲线;
  2. 比较使用和不使用momentum结果的不同,可以从训练时间,收敛性和准确率等方面讨论差异;
  3. 调整其他超参数,如学习率,Batchsize等,观察这些超参数如何影响分类性能。写下观察结果并将这些新结果记录在报告中。

运行结果如下:

84259c941e8148289a83afc15087a17c.png

a3c743ee83e344f9ad5e00ba3e0b5a39.png

ac816d66a3b74860aaaf9054d5b55d21.png

代码如下:

solver.py

  1. import numpy as np
  2. from layers import FCLayer
  3. from dataloader import build_dataloader
  4. from network import Network
  5. from optimizer import SGD
  6. from loss import SoftmaxCrossEntropyLoss
  7. from visualize import plot_loss_and_acc
  8. class Solver(object):
  9. def __init__(self, cfg):
  10. self.cfg = cfg
  11. # build dataloader
  12. train_loader, val_loader, test_loader = self.build_loader(cfg)
  13. self.train_loader = train_loader
  14. self.val_loader = val_loader
  15. self.test_loader = test_loader
  16. # build model
  17. self.model = self.build_model(cfg)
  18. # build optimizer
  19. self.optimizer = self.build_optimizer(self.model, cfg)
  20. # build evaluation criterion
  21. self.criterion = SoftmaxCrossEntropyLoss()
  22. @staticmethod
  23. def build_loader(cfg):
  24. train_loader = build_dataloader(
  25. cfg['data_root'], cfg['max_epoch'], cfg['batch_size'], shuffle=True, mode='train')
  26. val_loader = build_dataloader(
  27. cfg['data_root'], 1, cfg['batch_size'], shuffle=False, mode='val')
  28. test_loader = build_dataloader(
  29. cfg['data_root'], 1, cfg['batch_size'], shuffle=False, mode='test')
  30. return train_loader, val_loader, test_loader
  31. @staticmethod
  32. def build_model(cfg):
  33. model = Network()
  34. model.add(FCLayer(784, 10))
  35. return model
  36. @staticmethod
  37. def build_optimizer(model, cfg):
  38. return SGD(model, cfg['learning_rate'], cfg['momentum'])
  39. def train(self):
  40. max_epoch = self.cfg['max_epoch']
  41. epoch_train_loss, epoch_train_acc = [], []
  42. for epoch in range(max_epoch):
  43. iteration_train_loss, iteration_train_acc = [], []
  44. for iteration, (images, labels) in enumerate(self.train_loader):
  45. # forward pass
  46. logits = self.model.forward(images)
  47. loss, acc = self.criterion.forward(logits, labels)
  48. # backward_pass
  49. delta = self.criterion.backward()
  50. self.model.backward(delta)
  51. # updata the model weights
  52. self.optimizer.step()
  53. # restore loss and accuracy
  54. iteration_train_loss.append(loss)
  55. iteration_train_acc.append(acc)
  56. # display iteration training info
  57. if iteration % self.cfg['display_freq'] == 0:
  58. print("Epoch [{}][{}]\t Batch [{}][{}]\t Training Loss {:.4f}\t Accuracy {:.4f}".format(
  59. epoch, max_epoch, iteration, len(self.train_loader), loss, acc))
  60. avg_train_loss, avg_train_acc = np.mean(iteration_train_loss), np.mean(iteration_train_acc)
  61. epoch_train_loss.append(avg_train_loss)
  62. epoch_train_acc.append(avg_train_acc)
  63. # validate
  64. avg_val_loss, avg_val_acc = self.validate()
  65. # display epoch training info
  66. print('\nEpoch [{}]\t Average training loss {:.4f}\t Average training accuracy {:.4f}'.format(
  67. epoch, avg_train_loss, avg_train_acc))
  68. # display epoch valiation info
  69. print('Epoch [{}]\t Average validation loss {:.4f}\t Average validation accuracy {:.4f}\n'.format(
  70. epoch, avg_val_loss, avg_val_acc))
  71. return epoch_train_loss, epoch_train_acc
  72. def validate(self):
  73. logits_set, labels_set = [], []
  74. for images, labels in self.val_loader:
  75. logits = self.model.forward(images)
  76. logits_set.append(logits)
  77. labels_set.append(labels)
  78. logits = np.concatenate(logits_set)
  79. labels = np.concatenate(labels_set)
  80. loss, acc = self.criterion.forward(logits, labels)
  81. return loss, acc
  82. def test(self):
  83. logits_set, labels_set = [], []
  84. for images, labels in self.test_loader:
  85. logits = self.model.forward(images)
  86. logits_set.append(logits)
  87. labels_set.append(labels)
  88. logits = np.concatenate(logits_set)
  89. labels = np.concatenate(labels_set)
  90. loss, acc = self.criterion.forward(logits, labels)
  91. return loss, acc
  92. if __name__ == '__main__':
  93. # You can modify the hyerparameters by yourself.
  94. relu_cfg = {
  95. 'data_root': 'data',
  96. 'max_epoch': 10,
  97. 'batch_size': 100,
  98. 'learning_rate': 0.1,
  99. 'momentum': 0.9,
  100. 'display_freq': 50,
  101. 'activation_function': 'relu',
  102. }
  103. runner = Solver(relu_cfg)
  104. relu_loss, relu_acc = runner.train()
  105. test_loss, test_acc = runner.test()
  106. print('Final test accuracy {:.4f}\n'.format(test_acc))
  107. # You can modify the hyerparameters by yourself.
  108. sigmoid_cfg = {
  109. 'data_root': 'data',
  110. 'max_epoch': 10,
  111. 'batch_size': 100,
  112. 'learning_rate': 0.1,
  113. 'momentum': 0.9,
  114. 'display_freq': 50,
  115. 'activation_function': 'sigmoid',
  116. }
  117. runner = Solver(sigmoid_cfg)
  118. sigmoid_loss, sigmoid_acc = runner.train()
  119. test_loss, test_acc = runner.test()
  120. print('Final test accuracy {:.4f}\n'.format(test_acc))
  121. plot_loss_and_acc({
  122. "relu": [relu_loss, relu_acc],
  123. "sigmoid": [sigmoid_loss, sigmoid_acc],
  124. })

dataloader.py

  1. import os
  2. import struct
  3. import numpy as np
  4. class Dataset(object):
  5. def __init__(self, data_root, mode='train', num_classes=10):
  6. assert mode in ['train', 'val', 'test']
  7. # load images and labels
  8. kind = {'train': 'train', 'val': 'train', 'test': 't10k'}[mode]
  9. labels_path = os.path.join(data_root, '{}-labels-idx1-ubyte'.format(kind))
  10. images_path = os.path.join(data_root, '{}-images-idx3-ubyte'.format(kind))
  11. with open(labels_path, 'rb') as lbpath:
  12. magic, n = struct.unpack('>II', lbpath.read(8))
  13. labels = np.fromfile(lbpath, dtype=np.uint8)
  14. with open(images_path, 'rb') as imgpath:
  15. magic, num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
  16. images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
  17. if mode == 'train':
  18. # training images and labels
  19. self.images = images[:55000] # shape: (55000, 784)
  20. self.labels = labels[:55000] # shape: (55000,)
  21. elif mode == 'val':
  22. # validation images and labels
  23. self.images = images[55000:] # shape: (5000, 784)
  24. self.labels = labels[55000:] # shape: (5000, )
  25. else:
  26. # test data
  27. self.images = images # shape: (10000, 784)
  28. self.labels = labels # shape: (10000, )
  29. self.num_classes = 10
  30. def __len__(self):
  31. return len(self.images)
  32. def __getitem__(self, idx):
  33. image = self.images[idx]
  34. label = self.labels[idx]
  35. # Normalize from [0, 255.] to [0., 1.0], and then subtract by the mean value
  36. image = image / 255.0
  37. image = image - np.mean(image)
  38. return image, label
  39. class IterationBatchSampler(object):
  40. def __init__(self, dataset, max_epoch, batch_size=2, shuffle=True):
  41. self.dataset = dataset
  42. self.batch_size = batch_size
  43. self.shuffle = shuffle
  44. def prepare_epoch_indices(self):
  45. indices = np.arange(len(self.dataset))
  46. if self.shuffle:
  47. np.random.shuffle(indices)
  48. num_iteration = len(indices) // self.batch_size + int(len(indices) % self.batch_size)
  49. self.batch_indices = np.split(indices, num_iteration)
  50. def __iter__(self):
  51. return iter(self.batch_indices)
  52. def __len__(self):
  53. return len(self.batch_indices)
  54. class Dataloader(object):
  55. def __init__(self, dataset, sampler):
  56. self.dataset = dataset
  57. self.sampler = sampler
  58. def __iter__(self):
  59. self.sampler.prepare_epoch_indices()
  60. for batch_indices in self.sampler:
  61. batch_images = []
  62. batch_labels = []
  63. for idx in batch_indices:
  64. img, label = self.dataset[idx]
  65. batch_images.append(img)
  66. batch_labels.append(label)
  67. batch_images = np.stack(batch_images)
  68. batch_labels = np.stack(batch_labels)
  69. yield batch_images, batch_labels
  70. def __len__(self):
  71. return len(self.sampler)
  72. def build_dataloader(data_root, max_epoch, batch_size, shuffle=False, mode='train'):
  73. dataset = Dataset(data_root, mode)
  74. sampler = IterationBatchSampler(dataset, max_epoch, batch_size, shuffle)
  75. data_lodaer = Dataloader(dataset, sampler)
  76. return data_lodaer

loss.py

  1. import numpy as np
  2. # a small number to prevent dividing by zero, maybe useful for you
  3. EPS = 1e-11
  4. class SoftmaxCrossEntropyLoss(object):
  5. def forward(self, logits, labels):
  6. """
  7. Inputs: (minibatch)
  8. - logits: forward results from the last FCLayer, shape (batch_size, 10)
  9. - labels: the ground truth label, shape (batch_size, )
  10. """
  11. ############################################################################
  12. # TODO: Put your code here
  13. # Calculate the average accuracy and loss over the minibatch
  14. # Return the loss and acc, which will be used in solver.py
  15. # Hint: Maybe you need to save some arrays for backward
  16. self.one_hot_labels = np.zeros_like(logits)
  17. self.one_hot_labels[np.arange(len(logits)), labels] = 1
  18. self.prob = np.exp(logits) / (EPS + np.exp(logits).sum(axis=1, keepdims=True))
  19. # calculate the accuracy
  20. preds = np.argmax(self.prob, axis=1) # self.prob, not logits.
  21. acc = np.mean(preds == labels)
  22. # calculate the loss
  23. loss = np.sum(-self.one_hot_labels * np.log(self.prob + EPS), axis=1)
  24. loss = np.mean(loss)
  25. ############################################################################
  26. return loss, acc
  27. def backward(self):
  28. ############################################################################
  29. # TODO: Put your code here
  30. # Calculate and return the gradient (have the same shape as logits)
  31. return self.prob - self.one_hot_labels
  32. ############################################################################

network.py

  1. class Network(object):
  2. def __init__(self):
  3. self.layerList = []
  4. self.numLayer = 0
  5. def add(self, layer):
  6. self.numLayer += 1
  7. self.layerList.append(layer)
  8. def forward(self, x):
  9. # forward layer by layer
  10. for i in range(self.numLayer):
  11. x = self.layerList[i].forward(x)
  12. return x
  13. def backward(self, delta):
  14. # backward layer by layer
  15. for i in reversed(range(self.numLayer)): # reversed
  16. delta = self.layerList[i].backward(delta)

optimizer.py

  1. import numpy as np
  2. class SGD(object):
  3. def __init__(self, model, learning_rate, momentum=0.0):
  4. self.model = model
  5. self.learning_rate = learning_rate
  6. self.momentum = momentum
  7. def step(self):
  8. """One backpropagation step, update weights layer by layer"""
  9. layers = self.model.layerList
  10. for layer in layers:
  11. if layer.trainable:
  12. ############################################################################
  13. # TODO: Put your code here
  14. # Calculate diff_W and diff_b using layer.grad_W and layer.grad_b.
  15. # You need to add momentum to this.
  16. # Weight update with momentum
  17. if not hasattr(layer, 'diff_W'):
  18. layer.diff_W = 0.0
  19. layer.diff_W = layer.grad_W + self.momentum * layer.diff_W
  20. layer.diff_b = layer.grad_b
  21. layer.W += -self.learning_rate * layer.diff_W
  22. layer.b += -self.learning_rate * layer.diff_b
  23. # # Weight update without momentum
  24. # layer.W += -self.learning_rate * layer.grad_W
  25. # layer.b += -self.learning_rate * layer.grad_b
  26. ############################################################################

visualize.py

  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. def plot_loss_and_acc(loss_and_acc_dict):
  4. # visualize loss curve
  5. plt.figure()
  6. min_loss, max_loss = 100.0, 0.0
  7. for key, (loss_list, acc_list) in loss_and_acc_dict.items():
  8. min_loss = min(loss_list) if min(loss_list) < min_loss else min_loss
  9. max_loss = max(loss_list) if max(loss_list) > max_loss else max_loss
  10. num_epoch = len(loss_list)
  11. plt.plot(range(1, 1 + num_epoch), loss_list, '-s', label=key)
  12. plt.xlabel('Epoch')
  13. plt.ylabel('Loss')
  14. plt.legend()
  15. plt.xticks(range(0, num_epoch + 1, 2))
  16. plt.axis([0, num_epoch + 1, min_loss - 0.1, max_loss + 0.1])
  17. plt.show()
  18. # visualize acc curve
  19. plt.figure()
  20. min_acc, max_acc = 1.0, 0.0
  21. for key, (loss_list, acc_list) in loss_and_acc_dict.items():
  22. min_acc = min(acc_list) if min(acc_list) < min_acc else min_acc
  23. max_acc = max(acc_list) if max(acc_list) > max_acc else max_acc
  24. num_epoch = len(acc_list)
  25. plt.plot(range(1, 1 + num_epoch), acc_list, '-s', label=key)
  26. plt.xlabel('Epoch')
  27. plt.ylabel('Accuracy')
  28. plt.legend()
  29. plt.xticks(range(0, num_epoch + 1, 2))
  30. plt.axis([0, num_epoch + 1, min_acc, 1.0])
  31. plt.show()

本文转载自: https://blog.csdn.net/suic009/article/details/125904176
版权归原作者 老师我作业忘带了 所有, 如有侵权,请联系我们删除。

“深度学习实验:Softmax实现手写数字识别”的评论:

还没有评论