首页 > 编程语言> > CV算法复现（分类算法1/6）：LeNet5

CV算法复现（分类算法1/6）：LeNet5

2020-12-30 20:31:48 作者：互联网

致谢：霹雳吧啦Wz：https://space.bilibili.com/18161609

1 本次要点

1 本次要点

1.1 Python库语法

PIL 和 numpy 中维度顺序：H*W*C
enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列，同时列出数据和数据下标，一般用在 for 循环当中。如
with：上下文管理器。with 语句适用于对资源进行访问的场合，相当于try….except….finlally，确保使用过程中不管是否发生异常都会执行必要的“清理”操作，释放资源，比如文件使用后自动关闭、线程中锁的自动获取和释放等。
：解决多层继承中可能出现的一些问题。使用多继承时，一般要用此函数。

1.2 Pytorch框架语法

pytorch 中 tensor 维度顺序：C*H*W
optimizer.zero_grad()：每计算一次batch后，要将历史梯度清零，防止累加。
item()：得到元素张量里面的元素值。（将张量值变为可计算的值？）
#不计算损失和梯度。（节省内存和计算量）

2 环境

win10，GPU 1060 3G
pytorch 1.4
Python 3.6

3 网络结构

4 代码结构

model.py
utils.py
train.py
test.py
data（存放cifar数据集：需要解压，不能更改压缩包名字）
1.jpg（测试图）

4.1 model.py

import torch.nn as nn
import torch.nn.functional as F

class LeNet(nn.Module):
    def __init__(self): #初始化函数
        super(LeNet, self).__init__() #super解决多层继承中可能出现的一些问题。使用多继承，一般要用此函数。
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16*5*5, 120) #输入要展平成1维向量（16通道，每通道5*5特征图）
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x): #x代表输入的数据
        x = F.relu(self.conv1(x)) # input(3, 32, 32) output(6, 28, 28)
        x = self.pool1(x)         # output(6, 14, 14)
        x = F.relu(self.conv2(x)) # output(16, 10, 10)
        x = self.pool2(x)         # output(16, 5, 5)
        x = x.view(-1, 16*5*5)    # output(16*5*5 = 400)
        x = F.relu(self.fc1(x))   # output(120)
        x = F.relu(self.fc2(x))   # output(84)
        x = self.fc3(x)           # output(10)
        return x

# # 测试网络输入输出维度是否写对
# import torch
# input1 = torch.rand([2, 3, 32, 32]) #B C H W
# print(input1)

# model = LeNet()
# print(model)

# output = model(input1)
# print(output)

4.2 utils.py

import torchvision.transforms as transforms

transform_train = transforms.Compose(
    [transforms.ToTensor(), #将数据转为tensor，维度顺序c*h*w, 值归一化[0,1]
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # 再对数据进行标准化]
)

transform_test = transforms.Compose(
    [transforms.Resize((32, 32)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

clases_cifar10 = ('plane', 'car', 'bird', 'cat', 'deer'
                  'dog', 'frog', 'horse', 'ship', 'truck')

4.3 train.py

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms

import matplotlib.pyplot as plt 
import numpy as np 

from utils import clases_cifar10, transform_train
from model import LeNet


# 1 加载训练集（5万），预处理，打乱顺序并分割成一批批的batch
train_data = torchvision.datasets.CIFAR10(root='M:/CV_data/cifar-10/', train=True,
                                        download=False, transform=transform_train)
# win系统下num_work要设为0.
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32,
                                            shuffle=True, num_workers=0)

# 2 加载验证集（1万），预处理，打乱顺序并分割成1个batch
val_data = torchvision.datasets.CIFAR10(root='M:/CV_data/cifar-10/', train=False,
                                        download=False, transform=transform_train)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=10000,
                                            shuffle=False, num_workers=0)
#创建迭代器对象(每次调用.next()，就会自动迭代集合中下一个元素,由于val集batch就1个，所以调用一次.next()就全部取完了)
val_data_iter = iter(val_loader)
val_images, val_labels = val_data_iter.next()


# 3 初始化模型，损失函数，优化器
net = LeNet()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# 4 开始训练
for epoch in range(5):
    running_loss = 0.0 #累加损失

    for step, data in enumerate(train_loader, start=0): #遍历训练数据，并返回当前index
        inputs, labels = data
        # 每计算一次batch, 将历史梯度清零，防止累加。
        optimizer.zero_grad()
        # forward backward optimize
        outputs = net(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step() # 参数更新

        # 打印训练过程信息
        running_loss += loss.item() # item()得到元素张量里面的数值
        if step % 500 == 499:
            with torch.no_grad():#不计算损失和梯度。（节省内存和计算量）
                outputs = net(val_images) #[batch=10000, 10]
                predict_y = torch.max(outputs, dim=1)[1]
                accuracy = (predict_y == val_labels).sum().item() / val_labels.size(0)

                print('[%d, %5d] train_loss: %.3f val_accuracy: %.3f' %
                        (epoch + 1, step + 1, running_loss / 500, accuracy))
                running_loss = 0.0

# 5 保存模型
print("finished training")
save_path = './Lenet.pth'
torch.save(net.state_dict(), save_path)

输出

4.4 test.py

import torch
from PIL import Image
from model import LeNet
from utils import clases_cifar10, transform_test

net = LeNet()
net.load_state_dict(torch.load('Lenet.pth'))

im = Image.open('1.jpg')
im = transform_test(im) # [c, h, w]
im = torch.unsqueeze(im, dim=0) # [n, c, h, w]

with torch.no_grad(): # 此句可以不要，但大批量测试时，必须加此句，节省内存和计算量。
    outputs = net(im) # 输出值谁最大，预测的就是谁
    predict = torch.softmax(outputs, dim=1) # 将值转换成预测概率
    print(predict)
    max_index = torch.max(predict, dim=1)[1].data.numpy() # 返回一个1*1数组。
    print(clases_cifar10[int(max_index)]) # 打印对应的便签

输出

标签：LeNet5,nn,train,self,torch,算法,val,import,CV
来源： https://blog.csdn.net/weixin_42118657/article/details/111410650