2000字范文,分享全网优秀范文,学习好帮手!
2000字范文 > pytorch1.2用FCN语义分割手提包数据集

pytorch1.2用FCN语义分割手提包数据集

时间:2020-05-05 14:38:29

相关推荐

pytorch1.2用FCN语义分割手提包数据集

基于pytorch1.2的FCN语义分割手提包数据集

前言代码结果分析loss损失函数图像素准确度(PA)变化曲线图标签与分割结果对比图 总结

前言

作为一只刚刚入门深度学习的菜鸟来说,这是第一次编写使用FCN的代码来做语义分割,过程还是挺头疼的,别人的代码一看就懂,自己一写就懵。这篇博客仅记录一下自己的体验。

关于手提包数据集在下述连接有着详细的阐释,这里仅仅是根据个人理解对原文代码的修改。

原文参考链接:/u014453898/article/details/92080859

代码

自定义数据集代码

import torch.nn as nnfrom torch.utils.data import Dataset, DataLoaderfrom torchvision import transformsimport osimport cv2import pdbfrom onehot import onehotimport torchclass BagDataset(Dataset):def __init__(self, mode):self.tranform = pose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])self.img = os.listdir('last')if mode == 'train':self.img = self.img[:int(0.6*len(self.img))]elif mode == 'val':self.img = self.img[int(0.6*len(self.img)):int(0.8*len(self.img))]else:self.img = self.img[int(0.8*len(self.img)):]def __len__(self):return len(self.img)def __getitem__(self, idx):img_name = self.img[idx]imgA = cv2.imread('last/'+img_name)imgA = cv2.resize(imgA, (160, 160))imgB = cv2.imread('last_msk/' + img_name, 0)imgB = cv2.resize(imgB, (160, 160))imgB = imgB / 255imgB = imgB.astype('uint8')imgB = onehot(imgB, 2)imgB = imgB.swapaxes(0, 2).swapaxes(1, 2)imgB = torch.FloatTensor(imgB)imgA = self.tranform(imgA)return imgA, imgBtrain_db = BagDataset(mode='train')val_db = BagDataset(mode='val')test_db = BagDataset(mode='test')train_loader = DataLoader(train_db, batch_size=4, shuffle=True, num_workers=4)val_loader = DataLoader(val_db, batch_size=4, shuffle=True, num_workers=4)test_loader = DataLoader(test_db, batch_size=4, shuffle=True, num_workers=4)if __name__ == '__main__':for batch in train_loader:break

onehot函数

import numpy as npdef onehot(data, n):buf = np.zeros(data.shape + (n, ))nmsk = np.arange(data.size)*n + data.ravel()buf.ravel()[nmsk-1] = 1return buf

FCN模型代码

import torchimport torch.nn as nnfrom torchvision import modelsimport timeimport visdomfrom BagData import train_loader, test_loader, val_loaderimport torch.optim as optimfrom torchvision.models.vgg import VGGimport numpy as npclass FCN32s(nn.Module):def __init__(self, n_class):super(FCN32s, self).__init__()self.n_class = n_classself.feature = models.vgg16(pretrained=True).featuresself.feature[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=100)self.module = nn.Sequential(nn.Conv2d(512, 4096, kernel_size=7),nn.ReLU(inplace=True),nn.Dropout2d(),nn.Conv2d(4096, 4096, kernel_size=1),nn.ReLU(inplace=True),nn.Dropout2d())self.classifier = nn.Conv2d(4096, self.n_class, kernel_size=1)self.deconv1 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=64, stride=32, bias=False)def forward(self, x):h = xx = self.feature(x)x = self.module(x)score = self.classifier(x)upsample = self.deconv1(score)upsample = upsample[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]].contiguous()return upsampleclass FCN16s(nn.Module):def __init__(self, n_class):super(FCN16s, self).__init__()self.n_class = n_classself.feature = list(models.vgg16(pretrained=True).features)self.feature[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=100)self.feature1 = nn.Sequential(*self.feature[:24])self.feature2 = nn.Sequential(*self.feature[24:])self.module = nn.Sequential(nn.Conv2d(512, 4096, kernel_size=7),nn.ReLU(inplace=True),nn.Dropout2d(),nn.Conv2d(4096, 4096, kernel_size=1),nn.ReLU(inplace=True),nn.Dropout2d())self.classifier1 = nn.Conv2d(4096, self.n_class, kernel_size=1)self.classifier2 = nn.Conv2d(512, self.n_class, kernel_size=1)self.deconv1 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=4, stride=2, bias=False)self.deconv2 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=32, stride=16, bias=False)self.bn = nn.BatchNorm2d(self.n_class)def forward(self, x):# pool4 之后的输出num = self.feature1(x)num1 = self.feature2(num)num2 = self.module(num1)score1 = self.classifier1(num2)score2 = self.classifier2(num)upsample1 = self.bn(self.deconv1(score1))score2 = score2[:, :, 5:5 + upsample1.size()[2], 5:5 + upsample1.size()[3]]upsample1 += score2upsample = self.bn(self.deconv2(upsample1))upsample = upsample[:, :, 27:27 + x.size()[2], 27:27 + x.size()[3]].contiguous()return upsampleclass FCN8s(nn.Module):def __init__(self, n_class):super(FCN8s, self).__init__()self.n_class = n_classself.feature = list(models.vgg16(pretrained=True).features)self.feature[0] = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=100)self.feature1 = nn.Sequential(*self.feature[:17])self.feature2 = nn.Sequential(*self.feature[17:24])self.feature3 = nn.Sequential(*self.feature[24:])self.module = nn.Sequential(nn.Conv2d(512, 4096, kernel_size=7),nn.ReLU(inplace=True),nn.Dropout2d(p=0.5),nn.Conv2d(4096, 4096, kernel_size=1),nn.ReLU(inplace=True),nn.Dropout2d(p=0.5))self.classifier1 = nn.Conv2d(4096, self.n_class, kernel_size=1)self.classifier2 = nn.Conv2d(512, self.n_class, kernel_size=1)self.classifier3 = nn.Conv2d(256, self.n_class, kernel_size=1)self.deconv1 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=4, stride=2, bias=False)self.deconv2 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=4, stride=2, bias=False)self.deconv3 = nn.ConvTranspose2d(self.n_class, self.n_class, kernel_size=16, stride=8, bias=False)self.bn = nn.BatchNorm2d(self.n_class)def forward(self, x):num = self.feature1(x)num1 = self.feature2(num)num2 = self.feature3(num1)num3 = self.module(num2)score1 = self.classifier1(num3) # pool5后的得分值score2 = self.classifier2(num1) # pool4后的得分值score3 = self.classifier3(num) # pool3后的得分值upsample1 = self.bn(self.deconv1(score1)) # 对pool5 2倍上采样score2 = score2[:, :, 5:5 + upsample1.size()[2], 5:5 + upsample1.size()[3]]upsample1 += score2upsample2 = self.bn(self.deconv2(upsample1)) # 对求和后的结果进行2倍上采样score3 = score3[:, :, 9:9 + upsample2.size()[2], 9:9 + upsample2.size()[3]]upsample2 += score3upsample = self.bn(self.deconv3(upsample2)) # 对求和后的结果进行2倍上采样upsample = upsample[:, :, 31:31 + x.size()[2], 31:31 + x.size()[3]].contiguous()return upsampleif __name__ == '__main__':x = torch.rand(3, 3, 224, 224)fcn = FCN8s(20)out = fcn(x)print(out.shape)

主函数

import torchimport torch.nn as nnimport timeimport visdomfrom BagData import train_loader, val_loader, test_loaderimport torch.optim as optimimport numpy as npfrom Myfcn import FCN8sdevice = torch.device('cuda')# torch.manual_seed(1234)def evalute(model, loader):model.eval()correct = 0for step, (x, y) in enumerate(loader):x, y = x.to(device), y.to(device)with torch.no_grad():logits = model(x)output = torch.sigmoid(logits)pred = output.argmin(dim=1)y = y.argmin(dim=1)correct += torch.eq(pred, y).sum().float().item()y = y.cpu().data.numpy().copy()total = len(y.flatten()) * (step + 1)acc = correct / totalprint('acc:', acc)return accdef main():vis = visdom.Visdom()fcn_model = FCN8s(n_class=2).to(device)optimizer = optim.SGD(fcn_model.parameters(), lr=1e-2, momentum=0.7)criteon = nn.BCELoss().to(device)best_acc, best_epoch = 0, 0global_step = 0vis.line([0], [-1], win='loss', opts=dict(title='loss'))vis.line([0], [-1], win='val_acc', opts=dict(title='val_acc'))for epoch in range(100):for step, (x, y) in enumerate(train_loader):x = torch.autograd.Variable(x)y = torch.autograd.Variable(y)x = x.to(device)y = y.to(device)fcn_model.train()logits = fcn_model(x)output = torch.sigmoid(logits)loss = criteon(output, y)optimizer.zero_grad()loss.backward()optimizer.step()output_np = output.cpu().data.numpy().copy()output_np = np.argmin(output_np, axis=1)y_np = y.cpu().data.numpy().copy()y_np = np.argmin(y_np, axis=1)if step % 20 == 0:print('epoch {}, {}/{}, loss is {}'.format(epoch, step, len(train_loader), loss.data))vis.close(win='pred')vis.close(win='label')vis.images(output_np[:, None, :, :], win='pred', opts=dict(title='pred'))vis.images(y_np[:, None, :, :], win='label', opts=dict(title='label'))vis.line([loss.item()], [global_step], win='loss', update='append')global_step += 1if epoch % 1 == 0:val_acc = evalute(fcn_model, val_loader)if val_acc > best_acc:best_epoch = epochbest_acc = val_acctorch.save(fcn_model.state_dict(), 'best.mdl')vis.line([val_acc], [global_step], win='val_acc', update='append')print('best acc:', best_acc, 'best epoch:', best_epoch)fcn_model.load_state_dict(torch.load('best.mdl'))print('loaded from ckpt!')test_acc = evalute(fcn_model, test_loader)print('test acc:', test_acc)if __name__ == '__main__':main()

结果分析

根据最后的输出可以看出像素准确度(PA)为0.879546875,最好的epoch为第20个epoch,最后测试集的像素准确度(PA)为0.8733546875

loss损失函数图
像素准确度(PA)变化曲线图
标签与分割结果对比图

总结

FCN 采用跨层方法,既同时兼顾全局语义信息和局部位置信息,又能从抽象特征中恢复出像素所属的类别,把图像级别的分类进一步延伸到了像素级别的分类,成功地将原本用于图像分类的网络转变为用于图像分割的网络。

FCN 在分割过程中能够恢复像素所属的类别,但是仍然存在两个问腿:

①图像经过池化操作后,特征图的分辨率不断降低,部分像素的空间位置信息丢失;

②分割过程未能有效地考虑图像上下文信息,无法充分利用丰富的空间位置信息,导致局部特征和全局特征的利用率失衡。

FCN 未能有效地解决这两个问题,致使分割结果粗糙、分割边界不连续。后续可以采用优化卷积结构,添加条件随机场(CRF)等方法改进。

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。