0


AlexNet模型及代码详解

Alex在2012年提出的alexnet网络结构模型引爆了神经网络的应用热潮,并赢得了2012届图像识别大赛的冠军,使得CNN成为在图像分类上的核心算法模型。

该网络的亮点在于:
(1)首次使用了GPU进行网络加速训练。
(2)使用了ReLU激活函数,而不是传统的Sigmoid激活函数以及Tanh激活函数。
(3)使用了LRN局部响应归一化。
(4)在全连接层的前两层中使用了Droupout随机失活神经元操作,以减少过拟合。

模型组成

  • 输入层
  • 5个卷积层
  • 3个全链接层

输入层: 输入大小为224 x 224的3通道图像,实际上会经过预处理变为227X227X3

第1层:卷积层(卷积、池化)

Conv1

输入:input_size = [224, 224, 3]
卷积层:
kernels = 48 * 2 = 96 组卷积核
kernel_size = 11
padding = [1, 2] (左上围加半圈0,右下围加2倍的半圈0)
stride = 4
输出:output_size = [55, 55, 96]

output=\frac{W-F+2P}{S}+1=\frac{224-11+(1+2)}{4}+1=55

Maxpool1

  • 输入:input_size = [55, 55, 96]
  • 池化层:(只改变尺寸,不改变深度channel) - kernel_size = 3- padding = 0- stride = 2
  • 输出:output_size = [27, 27, 96]

output=\frac{W-F+2P}{S}+1=\frac{55-3+0}{2}+1=27

Conv2

  • 输出:output_size = [27, 27, 256]

output=\frac{W-F+2P}{S}+1=\frac{27-5+(2+2))}{1}+1=27

Maxpool2

  • 输出:output_size = [13, 13, 256]

output=\frac{W-F+2P}{S}+1=\frac{27-3}{2}+1=13

Conv3

  • 输出:output_size = [13, 13, 384]

output=\frac{W-F+2P}{S}+1=\frac{13-3+(1+1))}{1}+1=13

Conv4

  • 输出:output_size = [13, 13, 384]

output=\frac{W-F+2P}{S}+1=\frac{13-3+(1+1))}{1}+1=13

Conv5

  • 输出:output_size = [13, 13, 256]

output=\frac{W-F+2P}{S}+1=\frac{13-3+(1+1))}{1}+1=13

Maxpool3

  • 输出:output_size = [6, 6, 256]

output=\frac{W-F+2P}{S}+1=\frac{13-3+0}{1}+1=6

FC1、FC2、FC3

Maxpool3 → (66256) → FC1 → 4096 → FC2 → 4096 → FC3 → 1000

代码:

1. model.py

  1. import torch.nn as nn
  2. import torch
  3. class AlexNet(nn.Module):
  4. def __init__(self, num_classes=1000, init_weights=False):
  5. super(AlexNet, self).__init__()
  6. # 用nn.Sequential()将网络打包成一个模块,精简代码
  7. self.features = nn.Sequential( # 卷积层提取图像特征
  8. nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55]
  9. nn.ReLU(inplace=True), # 直接修改覆盖原值,节省运算内存
  10. nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27]
  11. nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27]
  12. nn.ReLU(inplace=True),
  13. nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13]
  14. nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13]
  15. nn.ReLU(inplace=True),
  16. nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13]
  17. nn.ReLU(inplace=True),
  18. nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13]
  19. nn.ReLU(inplace=True),
  20. nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6]
  21. )
  22. self.classifier = nn.Sequential( # 全连接层对图像分类
  23. nn.Dropout(p=0.5), # Dropout 随机失活神经元,默认比例为0.5
  24. nn.Linear(128 * 6 * 6, 2048),
  25. nn.ReLU(inplace=True),
  26. nn.Dropout(p=0.5),
  27. nn.Linear(2048, 2048),
  28. nn.ReLU(inplace=True),
  29. nn.Linear(2048, num_classes),
  30. )
  31. if init_weights:
  32. self._initialize_weights()
  33. # 前向传播过程
  34. def forward(self, x):
  35. x = self.features(x)
  36. x = torch.flatten(x, start_dim=1) # 展平后再传入全连接层
  37. x = self.classifier(x)
  38. return x
  39. # 网络权重初始化,实际上 pytorch 在构建网络时会自动初始化权重
  40. def _initialize_weights(self):
  41. for m in self.modules():
  42. if isinstance(m, nn.Conv2d): # 若是卷积层
  43. nn.init.kaiming_normal_(m.weight, mode='fan_out', # 用(何)kaiming_normal_法初始化权重
  44. nonlinearity='relu')
  45. if m.bias is not None:
  46. nn.init.constant_(m.bias, 0) # 初始化偏重为0
  47. elif isinstance(m, nn.Linear): # 若是全连接层
  48. nn.init.normal_(m.weight, 0, 0.01) # 正态分布初始化
  49. nn.init.constant_(m.bias, 0) # 初始化偏重为0

2. train.py

  1. import os
  2. import sys
  3. import json
  4. import torch
  5. import torch.nn as nn
  6. from torchvision import transforms, datasets, utils
  7. import matplotlib.pyplot as plt
  8. import numpy as np
  9. import torch.optim as optim
  10. from tqdm import tqdm
  11. from model import AlexNet
  12. def main():
  13. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  14. print("using {} device.".format(device))
  15. data_transform = {
  16. "train": transforms.Compose([transforms.RandomResizedCrop(224),
  17. transforms.RandomHorizontalFlip(),
  18. transforms.ToTensor(),
  19. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
  20. "val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
  21. transforms.ToTensor(),
  22. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
  23. data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
  24. image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
  25. assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
  26. train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
  27. transform=data_transform["train"])
  28. train_num = len(train_dataset)
  29. # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
  30. flower_list = train_dataset.class_to_idx
  31. cla_dict = dict((val, key) for key, val in flower_list.items())
  32. # write dict into json file
  33. json_str = json.dumps(cla_dict, indent=4)
  34. with open('class_indices.json', 'w') as json_file:
  35. json_file.write(json_str)
  36. batch_size = 32
  37. nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
  38. print('Using {} dataloader workers every process'.format(nw))
  39. train_loader = torch.utils.data.DataLoader(train_dataset,
  40. batch_size=batch_size, shuffle=True,
  41. num_workers=nw)
  42. validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
  43. transform=data_transform["val"])
  44. val_num = len(validate_dataset)
  45. validate_loader = torch.utils.data.DataLoader(validate_dataset,
  46. batch_size=4, shuffle=False,
  47. num_workers=nw)
  48. print("using {} images for training, {} images for validation.".format(train_num,
  49. val_num))
  50. # test_data_iter = iter(validate_loader)
  51. # test_image, test_label = test_data_iter.next()
  52. #
  53. # def imshow(img):
  54. # img = img / 2 + 0.5 # unnormalize
  55. # npimg = img.numpy()
  56. # plt.imshow(np.transpose(npimg, (1, 2, 0)))
  57. # plt.show()
  58. #
  59. # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
  60. # imshow(utils.make_grid(test_image))
  61. net = AlexNet(num_classes=5, init_weights=True)
  62. net.to(device)
  63. loss_function = nn.CrossEntropyLoss()
  64. # pata = list(net.parameters())
  65. optimizer = optim.Adam(net.parameters(), lr=0.0002)
  66. epochs = 10
  67. save_path = './AlexNet.pth'
  68. best_acc = 0.0
  69. train_steps = len(train_loader)
  70. for epoch in range(epochs):
  71. # train
  72. net.train()
  73. running_loss = 0.0
  74. train_bar = tqdm(train_loader, file=sys.stdout)
  75. for step, data in enumerate(train_bar):
  76. images, labels = data
  77. optimizer.zero_grad()
  78. outputs = net(images.to(device))
  79. loss = loss_function(outputs, labels.to(device))
  80. loss.backward()
  81. optimizer.step()
  82. # print statistics
  83. running_loss += loss.item()
  84. train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
  85. epochs,
  86. loss)
  87. # validate
  88. net.eval()
  89. acc = 0.0 # accumulate accurate number / epoch
  90. with torch.no_grad():
  91. val_bar = tqdm(validate_loader, file=sys.stdout)
  92. for val_data in val_bar:
  93. val_images, val_labels = val_data
  94. outputs = net(val_images.to(device))
  95. predict_y = torch.max(outputs, dim=1)[1]
  96. acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
  97. val_accurate = acc / val_num
  98. print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
  99. (epoch + 1, running_loss / train_steps, val_accurate))
  100. if val_accurate > best_acc:
  101. best_acc = val_accurate
  102. torch.save(net.state_dict(), save_path)
  103. print('Finished Training')
  104. if __name__ == '__main__':
  105. main()

3. predict.py

  1. import torch
  2. from model import AlexNet
  3. from PIL import Image
  4. from torchvision import transforms
  5. import matplotlib.pyplot as plt
  6. import json
  7. # 预处理
  8. data_transform = transforms.Compose(
  9. [transforms.Resize((224, 224)),
  10. transforms.ToTensor(),
  11. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
  12. # load image
  13. img = Image.open("rose.jpg")
  14. plt.imshow(img)
  15. # [N, C, H, W]
  16. img = data_transform(img)
  17. # expand batch dimension
  18. img = torch.unsqueeze(img, dim=0)
  19. # read class_indict
  20. try:
  21. json_file = open('./class_indices.json', 'r')
  22. class_indict = json.load(json_file)
  23. except Exception as e:
  24. print(e)
  25. exit(-1)
  26. # create model
  27. model = AlexNet(num_classes=5)
  28. # load model weights
  29. model_weight_path = "./AlexNet.pth"
  30. model.load_state_dict(torch.load(model_weight_path))
  31. # 关闭 Dropout
  32. model.eval()
  33. with torch.no_grad():
  34. # predict class
  35. output = torch.squeeze(model(img)) # 将输出压缩,即压缩掉 batch 这个维度
  36. predict = torch.softmax(output, dim=0)
  37. predict_cla = torch.argmax(predict).numpy()
  38. print(class_indict[str(predict_cla)], predict[predict_cla].item())
  39. plt.show()

本文转载自: https://blog.csdn.net/weixin_42457110/article/details/124980914
版权归原作者 工藤新三 所有, 如有侵权,请联系我们删除。

“AlexNet模型及代码详解”的评论:

还没有评论