计算机视觉-街景符号识别3构建模型
作者:互联网
import os, sys, glob, shutil, json import cv2 from PIL import Image import numpy as np import torch from torch.utils.data.dataset import Dataset import torchvision.transforms as transforms class SVHNDataset(Dataset): def __init__(self, img_path, img_label, transform=None): self.img_path = img_path self.img_label = img_label if transform is not None: self.transform = transform else: self.transform = None def __getitem__(self, index): img = Image.open(self.img_path[index]).convert('RGB') if self.transform is not None: img = self.transform(img) # 原始SVHN中类别10为数字0 lbl = np.array(self.img_label[index], dtype=np.int) lbl = list(lbl) + (5 - len(lbl)) * [10] return img, torch.from_numpy(np.array(lbl[:5])) def __len__(self): return len(self.img_path) train_path = glob.glob('./mchar_train/*.png') train_path.sort() train_json = json.load(open('train.json')) train_label = [train_json[x]['label'] for x in train_json] train_loader = torch.utils.data.DataLoader( SVHNDataset(train_path, train_label, transforms.Compose([ transforms.Resize((64, 128)), transforms.ColorJitter(0.3, 0.3, 0.2), transforms.RandomRotation(5), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])), batch_size=10, # 每批样本个数 shuffle=False, # 是否打乱顺序 num_workers=0, # 读取的线程个数 ) torch.manual_seed(0) torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True import torchvision.models as models import torchvision.transforms as transforms import torchvision.datasets as datasets import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.autograd import Variable from torch.utils.data.dataset import Dataset # 定义模型 class SVHN_Model1(nn.Module): def __init__(self): super(SVHN_Model1, self).__init__() # CNN提取特征模块 self.cnn = nn.Sequential( nn.Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2)), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2)), nn.ReLU(), nn.MaxPool2d(2), ) # self.fc1 = nn.Linear(32*3*7, 11) self.fc2 = nn.Linear(32*3*7, 11) self.fc3 = nn.Linear(32*3*7, 11) self.fc4 = nn.Linear(32*3*7, 11) self.fc5 = nn.Linear(32*3*7, 11) self.fc6 = nn.Linear(32*3*7, 11) def forward(self, img): feat = self.cnn(img) feat = feat.view(feat.shape[0], -1) c1 = self.fc1(feat) c2 = self.fc2(feat) c3 = self.fc3(feat) c4 = self.fc4(feat) c5 = self.fc5(feat) c6 = self.fc6(feat) return c1, c2, c3, c4, c5, c6 model = SVHN_Model1() device=torch.device("cuda")#设置GPU # 损失函数 criterion = nn.CrossEntropyLoss() criterion = criterion.to(device)#损失函数在GPU上运行 model= model.to(device)#模型放在GPU上 # 优化器 optimizer = torch.optim.Adam(model.parameters(), 0.005) optimizer loss_plot, c0_plot = [], [] # 迭代10个Epoch for epoch in range(10): for data in train_loader: img,target=data img=img.to(device)#输入图片放在GPU上 target=target.to(device)#输入的标签放在GPU上 c0, c1, c2, c3, c4, c5 = model(img) loss = criterion(c0, target[:, 0].long()) + \ criterion(c1, target[:, 1].long()) + \ criterion(c2, target[:, 2].long()) + \ criterion(c3, target[:, 3].long()) + \ criterion(c4, target[:, 4].long()) loss /= 5 optimizer.zero_grad() loss.backward() optimizer.step() loss_plot.append(loss.item()) c0_plot.append((c0.argmax(1) == target[:, 0]).sum().item()*1.0 / c0.shape[0]) print(epoch) torch.save(model,"moxing")
标签:img,nn,街景,self,torch,train,import,视觉,识别 来源: https://www.cnblogs.com/zhaoyids/p/15835475.html