飞桨告诉我谁是最勤劳的小蜜蜂
作者:互联网
谁是最勤劳的小蜜蜂
蜜蜂生来就是为了采蜜,它也是很辛勤,不是在采蜜就是已经采完蜜了,要不然是在蜂巢里吐蜜,直到奉献自己的一生。据悉勤劳的密封翅膀都带花粉的,那我们就来瞅瞅吧。
反正我肉眼看不清他们翅膀、小腿上的花粉,那么就交给机器来解决吧,机器学习嘛。
import os
import zipfile
import random
import json
import paddle
import sys
import numpy as np
from PIL import Image
from PIL import ImageEnhance
import paddle
import matplotlib.pyplot as plt
%matplotlib inline
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/__init__.py:107: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
from collections import MutableMapping
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/rcsetup.py:20: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
from collections import Iterable, Mapping
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/colors.py:53: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
from collections import Sized
一、数据准备
- (1)解压原始数据集
- (2)按照比例划分训练集与验证集
- (3)乱序,生成数据列表
- (4)构造训练数据集提供器和验证数据集提供器
# 已解压可注释
# !unzip -q data/data71008/花粉数据集archive.zip -d dataset
import paddle
import paddle.vision.transforms as T
import numpy as np
from PIL import Image
class BeeDataset(paddle.io.Dataset):
"""
2类Bee数据集类的定义
"""
def __init__(self,mode='train',rate=0.2):
"""
初始化函数
"""
self.all_data = []
self.data = []
with open('dataset/PollenDataset/pollen_data.csv') as f:
next(f)
for line in f.readlines():
info = line.strip().split(',')
if len(info) > 0:
self.all_data.append([info[1].strip(), info[2].strip()])
self.transforms = T.Compose([
T.Resize((64,64)), # (h,w) 180,300 图片缩放
T.ToTensor(), # 数据的格式转换和标准化、 HWC => CHW
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
if mode=='train':
self.data=self.all_data[int(len(self.all_data)*rate):len(self.all_data)]
else:
self.data=self.all_data[0:int(len(self.all_data)*rate)]
def get_origin_data(self):
return self.data
def __getitem__(self, index):
"""
根据索引获取单个样本
"""
image_file, label = self.data[index]
image_file=os.path.join('dataset/PollenDataset/images',image_file)
image = Image.open(image_file)
if image.mode != 'RGB':
image = image.convert('RGB')
image = self.transforms(image)
return image, np.array(label, dtype='int64')
def __len__(self):
"""
获取样本总数
"""
return len(self.data)
bee=BeeDataset()
train_dataset=BeeDataset(mode='train',rate=0.3)
test_dataset=BeeDataset(mode='test',rate=0.3)
print('train_data len: {}, test_data len:{}'.format(train_dataset.__len__(), test_dataset.__len__()))
train_data len: 500, test_data len:214
二、模型配置
复习下卷积嘛,用啥摩托车,手写好了。
#定义卷积网络
import paddle.nn as nn
import paddle.nn.functional as F
from visualdl import LogWriter
class MyCNN(nn.Layer):
def __init__(self):
super(MyCNN,self).__init__()
self.hidden1 = nn.Conv2D(in_channels=3, #通道数
out_channels=64, #卷积核个数
kernel_size =3, #卷积核大小
stride=1) #步长
self.hidden2 = nn.Conv2D(in_channels=64,
out_channels = 128,
kernel_size =3,
stride=1)
self.hidden3 = nn.MaxPool2D(kernel_size=2, #池化核大小
stride=2) #池化步长
self.hidden4 = nn.Linear(in_features=128*30*30,out_features=2)
#网络的前向计算过程
def forward(self,input):
x = self.hidden1(input)
with LogWriter(logdir="./chk_points/conv1/") as writer:
for i in range(10):
myimg=x[i][0:3].numpy()
print(myimg.shape)
print(myimg)
writer.add_image(tag='conv1',img=myimg, step=i)
x=F.relu(x)
# print(x.shape)
x = self.hidden2(x)
x=F.relu(x)
# print(x.shape)
x = self.hidden3(x)
# print(x.shape)
#卷积层的输出特征图如何当作全连接层的输入使用呢?
#卷积层的输出数据格式是[N,C,H,W],在输入全连接层的时候,会自动将数据拉平.
#也就是对每个样本,自动将其转化为长度为K的向量,其中K=C×H×W,一个mini-batch的数据维度变成了N×K的二维向量。
x = paddle.reshape(x, shape=[-1, 128*30*30])
x = self.hidden4(x)
out = F.softmax(x)
return out
三、模型训练 && 四、模型评估
import paddle
from paddle import Model
myCNN=MyCNN()
model= Model(myCNN)
model.summary((1,3, 64, 64))
(3, 62, 62)
[[[ 0.7852405 1.1204937 1.0803782 ... 0.79842985 0.6884388
0.8554362 ]
[ 0.67254984 1.4634426 1.4180253 ... 1.1190094 0.7330806
0.36009166]
[ 0.77952844 1.25177 0.62106866 ... 0.781536 0.5499747
1.185901 ]
...
[ 1.1787175 1.165362 0.4543565 ... 0.45226488 1.7040001
1.3579762 ]
[ 1.3305937 0.9702562 0.6092412 ... 0.56712335 1.6329365
0.7801384 ]
[ 0.5835558 0.7375888 1.1013889 ... 1.4835536 0.8401654
2.0126145 ]]
[[-1.8209429 -1.010055 -2.013126 ... -0.6853134 -1.2335591
-1.0887115 ]
[-1.1185786 -1.1789582 -1.3160777 ... -1.0062238 -1.5497901
-0.7702041 ]
[-1.0398924 -1.2710589 -0.8835699 ... -1.412867 -0.75782824
-0.66315925]
...
[-0.45574644 -0.63937426 -0.54133946 ... -0.7855961 -0.99309015
-0.50211173]
[-1.2958944 -1.4259287 -1.220675 ... -0.6283496 -0.8967111
-0.879598 ]
[-0.4558576 -0.9036231 -0.61986226 ... -1.2891432 -0.8856837
-1.3334211 ]]
[[-0.16028228 -1.4461167 -0.8729783 ... 0.03628984 0.48390952
-0.23797147]
[-0.1558084 -0.35822743 -0.15980428 ... -0.86909014 0.12811811
-0.19449231]
[-1.0637001 -0.90246856 -0.6251344 ... 0.20142564 -0.9001509
-0.6823548 ]
...
[-0.49504218 0.37776464 -0.37616605 ... -0.1840626 -0.74749
-0.11741641]
[-0.7924826 -0.7401607 -0.12474468 ... -0.9140016 -0.7107928
-0.23936886]
[-1.1824067 -0.1575087 -0.43380502 ... -0.540684 -0.65324664
-0.5257259 ]]]
---------------------------------------------------------------------------
error Traceback (most recent call last)
<ipython-input-40-198a0f8bb235> in <module>
3 myCNN=MyCNN()
4 model= Model(myCNN)
----> 5 model.summary((1,3, 64, 64))
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/hapi/model.py in summary(self, input_size, dtype)
1879 else:
1880 _input_size = self._inputs
-> 1881 return summary(self.network, _input_size, dtype)
1882
1883 def _verify_spec(self, specs, shapes=None, dtypes=None, is_input=False):
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/hapi/model_summary.py in summary(net, input_size, dtypes)
147
148 _input_size = _check_input(_input_size)
--> 149 result, params_info = summary_string(net, _input_size, dtypes)
150 print(result)
151
</opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/decorator.py:decorator-gen-342> in summary_string(model, input_size, dtypes)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/base.py in _decorate_function(func, *args, **kwargs)
313 def _decorate_function(func, *args, **kwargs):
314 with self:
--> 315 return func(*args, **kwargs)
316
317 @decorator.decorator
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/hapi/model_summary.py in summary_string(model, input_size, dtypes)
274
275 # make a forward pass
--> 276 model(*x)
277
278 # remove these hooks
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/layers.py in __call__(self, *inputs, **kwargs)
889 self._built = True
890
--> 891 outputs = self.forward(*inputs, **kwargs)
892
893 for forward_post_hook in self._forward_post_hooks.values():
<ipython-input-39-e30af9085378> in forward(self, input)
26 print(myimg.shape)
27 print(myimg)
---> 28 writer.add_image(tag='conv1',img=myimg, step=i)
29 x=F.relu(x)
30 # print(x.shape)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/visualdl/writer/writer.py in add_image(self, tag, img, step, walltime, dataformats)
191 self._get_file_writer().add_record(
192 image(tag=tag, image_array=img, step=step, walltime=walltime,
--> 193 dataformats=dataformats))
194
195 def add_text(self, tag, text_string, step=None, walltime=None):
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/visualdl/component/base_component.py in image(tag, image_array, step, walltime, dataformats)
169 image_array = denormalization(image_array)
170 image_array = convert_to_HWC(image_array, dataformats)
--> 171 image_bytes = imgarray2bytes(image_array)
172 image = Record.Image(encoded_image_string=image_bytes)
173 return Record(values=[
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/visualdl/component/base_component.py in imgarray2bytes(np_array)
70 import cv2
71
---> 72 np_array = cv2.cvtColor(np_array, cv2.COLOR_BGR2RGB)
73 ret, buf = cv2.imencode(".png", np_array)
74 img_bin = Image.fromarray(np.uint8(buf)).tobytes("raw")
error: OpenCV(4.1.1) /io/opencv/modules/imgproc/src/color.simd_helpers.hpp:92: error: (-2:Unspecified error) in function 'cv::impl::{anonymous}::CvtHelper<VScn, VDcn, VDepth, sizePolicy>::CvtHelper(cv::InputArray, cv::OutputArray, int) [with VScn = cv::impl::{anonymous}::Set<3, 4>; VDcn = cv::impl::{anonymous}::Set<3, 4>; VDepth = cv::impl::{anonymous}::Set<0, 2, 5>; cv::impl::{anonymous}::SizePolicy sizePolicy = (cv::impl::<unnamed>::SizePolicy)2u; cv::InputArray = const cv::_InputArray&; cv::OutputArray = const cv::_OutputArray&]'
> Invalid number of channels in input image:
> 'VScn::contains(scn)'
> where
> 'scn' is 62
# 模型训练配置
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.00001,parameters=model.parameters()),# 优化器
loss=paddle.nn.CrossEntropyLoss(), # 损失函数
metrics=paddle.metric.Accuracy()) # 评估指标
# 训练可视化VisualDL工具的回调函数
visualdl = paddle.callbacks.VisualDL(log_dir='visualdl_log')
# 启动模型全流程训练
model.fit(train_dataset, # 训练数据集
# test_dataset, # 评估数据集
epochs=100, # 总的训练轮次
batch_size=256, # 批次计算的样本量大小
shuffle=True, # 是否打乱样本集
verbose=1, # 日志展示格式
save_dir='./chk_points/', # 分阶段的训练模型存储路径
callbacks=[visualdl]) # 回调函数使用
###模型存储
将我们训练得到的模型进行保存,以便后续评估和测试使用。
model.evaluate(eval_data=test_dataset, verbose=2)
model.save('model_save_dir')
五、模型预测
print('测试数据集样本量:{}'.format(len(test_dataset)))
# 执行预测
result = model.predict(test_dataset)
# 样本映射
LABEL_MAP = ['偷懒的小蜜蜂','勤劳的小蜜蜂']
# 随机取样本展示
indexs = [2, 38, 56, 92, 100, 101]
for idx in range(test_dataset.__len__()):
predict_label = np.argmax(result[0][idx])
real_label = test_dataset.__getitem__(idx)[1]
print('样本ID:{}, 真实标签:{}, 预测值:{}'.format(idx, LABEL_MAP[real_label], LABEL_MAP[predict_label]))
origin_data=test_dataset.get_origin_data()
print(origin_data.__len__())
# 定义画图方法
from PIL import Image
import matplotlib.font_manager as font_manager
fontpath = 'SIMHEI.TTF'
font = font_manager.FontProperties(fname=fontpath, size=10)
def show_img(img, predict):
plt.figure()
plt.title(predict, FontProperties=font)
plt.imshow(img, cmap=plt.cm.binary)
plt.show()
# 抽样展示
for i in range(10):
img_path='dataset/PollenDataset/images/' + origin_data[i][0]
real_label=int(origin_data[i][1])
predict_label= int(np.argmax(result[0][i]))
img=Image.open(img_path)
title='样本ID:{}, 真实标签:{}, 预测值:{}'.format(idx, LABEL_MAP[real_label], LABEL_MAP[predict_label])
show_img(img, title)
format(idx, LABEL_MAP[real_label], LABEL_MAP[predict_label])
show_img(img, title)
奥奥,看起来小蜜蜂们都很勤劳啊,哈哈哈。
https://aistudio.baidu.com/aistudio/projectdetail/1549057
标签:...,image,paddle,self,飞桨,小蜜蜂,import,data,勤劳 来源: https://blog.csdn.net/livingbody/article/details/114683052