基于CIFAR-10的Pytorch深度学习模板构建

基于CIFAR-10的Pytorch深度学习模板构建

构建了一个基于CIFAR-10数据集的pytorch版本深度学习baseline,便于以后更快的迁移到其它深度学习任务中去。代码详情请参看GitHub,如有错误,请指正。

数据集加载


  1. 首先需要从CIFAR-10官网下载打包好的数据集

    CIFAR-10数据集官网:http://www.cs.toronto.edu/~kriz/cifar.html

    • 下载对应Python版本的数据集文件并解压

    • 根据官网上的Python加载方式加载数据集

1
2
3
4
5
6
7
8
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict

file = '/Users/morvan/Downloads/cifar-10-batches-py/data_batch_1'
print(unpickle(file).keys())

输出为:

1
dict_keys([b'batch_label', b'labels', b'data', b'filenames'])

经过以上解码加载之后,对返回值dictdict[b'labels']dict[b'data']分别获取batch1中的标签和数据,同理可以依次获得batch2-5以及test_batch的标签和数据。

dict[b'labels']返回一个长度为10000的列表,每个元素取值范围为0-9,分别对应10个类别的标签。

dict[b'data']返回一个[10000, 3072]的矩阵,存储每张样本图片的RGB像素值。前1024(32x32)是R通道的像素数据,后面依次是B、G通道。

  1. 完成训练和测试集的样本的解压加载之后,开始第二步:构造数据集加载器类。这个类有三个主要的构造函数分别是:
    1. __init__()用于初始化数据集路径和定义一些数据增强Pipline。
    2. __getitem__()用于依据索引获取对应数据增强后的样本(data+label)。
    3. __len__()返回样本集的样本数据量。

具体数据集加载器类构造如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class Cifar10_Dataset(data.Dataset):
def __init__(self, data_roots, trans):
self.transforms = trans
for k, data_root in enumerate(data_roots):
data = unpickle(data_root)[b'data']
label = unpickle(data_root)[b'labels']
if k == 0:
self.all_data = data
self.all_label = label
else:
# shape->(50000, 3072) or (10000, 3072)
self.all_data = np.vstack((self.all_data, data))
# shape->(50000, ) or (10000, )
self.all_label = self.all_label + label


def __getitem__(self, idx):
# 调整数据形状为图片格式 (3x32x32), 之后调整通道和图片宽高的排序(CHW->HWC)
sample = np.reshape(self.all_data[idx], (3, 32, 32)).transpose((1, 2, 0))
img = Image.fromarray(sample)
img = self.transforms(img) # 3x32x32
return img, self.all_label[idx]

def __len__(self):
return len(self.all_label)


if __name__ == '__main__':
# storage location datasets
file = '/Users/morvan/Downloads/cifar-10-batches-py/data_batch_1'
roots = [file]
trans = T.Compose([
T.RandomCrop(32),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
dataset = Cifar10_Dataset(roots, trans)
print(dataset.__getitem__(10)[0].shape)

至此数据集加载器类构造完成。

网络模型搭建


网络模型为ResNet18,这里直接使用Pytorch官方提供的版本(手动更改最后全连接层的节点数为class_num=10)。

  1. ResNet18
1
2
3
4
5
6
7
8
9
10
class ResNet18(nn.Module):
def __init__(self, num_classes):
super(ResNet18, self).__init__()
resnet18 = M.resnet18(pretrained=False)
# 更改ResNet18最后全连接层的节点数以适应cifar10
resnet18.fc = nn.Linear(resnet18.fc.in_features, num_classes)
self.resnet = resnet18

def forward(self, x):
return self.resnet(x)

工具函数及超参数设置


构造了两个工具函数calculation_accuracy()training_process_visualization()分别用于计算准确率和训练过程的可视化。

  1. calculation_accuracy()
1
2
3
4
5
6
7
8
import numpy as np
def calculation_accuracy(pred, label):
pred = pred.cpu().detach().numpy() # 如果没有用GPU加速,则把.cpu()删除
pred = np.argmax(pred, axis=1)
# 如果没有用GPU加速,则把.cpu()删除
right_count = np.sum(pred == label.cpu().numpy())
acc = right_count / label.size(0)
return acc
  1. training_process_visualization()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import matplotlib.pyplot as plt
def training_process_visualization(data):
train_acc = data['train_acc']
train_loss = data['train_loss']
test_acc = data['test_acc']

plt.figure(1)
plt.plot(range(len(train_loss)), train_loss)
plt.title('trian_loss')
plt.ylabel('trian loss')
plt.xlabel('step')
plt.savefig('train_loss.png')

plt.figure(2)
plt.plot(range(len(train_acc)), train_acc)
plt.title('train_acc')
plt.ylabel('train acc')
plt.xlabel('step')
plt.savefig('train_acc.png')

plt.figure(3)
plt.plot(range(len(test_acc)), test_acc)
plt.title('test_acc')
plt.ylabel('test acc')
plt.xlabel('epoch')
plt.savefig('test_acc.png')
plt.show()
  1. config.py

这个文件一般用来存储模型训练过程中的学习率,数据增强,batch size,损失函数等超参数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from torchvision import transforms as T
class Config():
train_roots = ['F:/DeepL/cifar-10-batches-py/data_batch_'
+str(i) for i in range(1, 6)]
test_roots = ['F:/DeepL/cifar-10-batches-py/test_batch']
net = 'Res' # 网络结构
num_classes = 10 # 类别数
nw = 0 # 多线程加载数据集(windows多线程加载有问题,所以改成了0)
wd = 0.001 # 权重衰减
m = 0.9 # SGD动量
bs = 512 # batchsize
epochs = 20
lr = 0.001
# 自定义训练和测试数据集所用的数据增强
train_trans = T.Compose([
T.RandomCrop(32),
T.RandomRotation((-10, 10)),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
test_trans = T.Compose([
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])

训练和测试


训练步骤

  1. 初始化超参数实例
  2. 定义数据集加载器实例
  3. 定义模型、优化器、损失函数、学习率调整器
  4. 用于保存训练过程中的损失和准确率(可以不用)
  5. 通过两层for循环开始迭代训练
    1. 取数据(net.train()
    2. 优化器梯度置零
    3. 前向传播预测
    4. 计算损失并反向传播
    5. 学习率调整
    6. 每个epoch测试一次(net.eval()
  6. 根据记录的数据可视化训练过程

完整的train.py如下所示

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import torch.nn as nn
import torch as t
from tqdm._tqdm import trange
from torch.optim.lr_scheduler import StepLR
from torch.utils import data

from dataset.my_dataset import Cifar10_Dataset
from utils.config import Config
from utils.tools import calculation_accuracy, training_process_visualization
from model.my_net import ResNet18


# 初始化超参数实例
opt = Config()

# 初始化训练集、测试集加载器实例
trainset = Cifar10_Dataset(data_roots=opt.train_roots, trans=opt.train_trans)
trainset_loader = data.DataLoader(trainset, opt.bs, num_workers=opt.nw, shuffle=True)
testset = Cifar10_Dataset(data_roots=opt.test_roots, trans=opt.test_trans)
testset_loader = data.DataLoader(testset, opt.bs, num_workers=opt.nw)

# 定义模型、优化器、损失函数、学习率调整器
net = ResNet18(opt.num_classes)
optimizer = t.optim.SGD(net.parameters(), lr=opt.lr, momentum=opt.m, weight_decay=opt.wd)
loss_func = nn.CrossEntropyLoss()
lr_adjust = StepLR(optimizer, step_size=30, gamma=0.1)

# 用于保存训练过程中的损失和准确率
train_loss = []
train_acc = []
test_acc = []

if t.cuda.is_available():
net.cuda()
loss_func.cuda()

# 更改为tqdm模块内的trange函数以了解训练时间
for epoch in trange(1, opt.epochs+1):
net.train()
for i, (data, label) in enumerate(trainset_loader):
if t.cuda.is_available():
data, label = data.cuda(), label.cuda()

optimizer.zero_grad()
pred = net(data)

loss = loss_func(pred, label)
acc = calculation_accuracy(pred, label)
loss.backward()
optimizer.step()

# 如果没有用GPU加速,则把.cpu()删除
train_loss.append(loss.cpu().detach().numpy())
train_acc.append(acc)
# print('loss:', loss.cpu().detach().numpy(), "acc", acc)
lr_adjust.step()


# 每训练完一轮进行一次测试
net.eval()
for j, (data, label) in enumerate(testset_loader):
if t.cuda.is_available():
data, label = data.cuda(), label.cuda()
test_pred = net(data)

all_pred = test_pred if j == 0 else t.vstack((all_pred, test_pred))
all_label = label if j == 0 else t.cat((all_label, label))

acc = calculation_accuracy(all_pred, all_label)
test_acc.append(acc)


data = {'train_loss':train_loss, 'train_acc':train_acc, 'test_acc':test_acc}
training_process_visualization(data)

Post author: jasonyang
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 3.0 unless stating additionally. 转载请注明出处。