利用神经网络对图像分类(对29种不同云层图像分类-pytorch)
原标题:利用神经网络对图像分类(对29种不同云层图像分类-pytorch)
原文来自:CSDN 原文链接:https://blog.csdn.net/qq_43487391/article/details/102933578
参加了一个气象比赛,记录一下训练过程
数据集是在比赛官网上下载的;点这里
说明一下,数据集大部分应该是比赛主办方在网上爬的,所以下载不了数据集也没关系,自己写个爬虫程序下载一下一样的。
我用的是pytorch框架写的,主要是用了个残差网络进行训练,具体代码如下:
# -*- encoding:utf-8 -*- import torch import os from torchvision.models.resnet import resnet152 from torch.utils.data import Dataset,DataLoader import PIL.Image as Image import cv2 import torch.nn as nn import argparse import numpy as np from torchvision.transforms.transforms import Compose,Resize,ToTensor,Normalize from utils import plot_image, plot_curve, one_hot # from tensorboardX import SummaryWriter os.environ['CUDA_LAUNCH_BLOCKING'] = "1" def parse(): arg = argparse.ArgumentParser(description="jiao du fen lei") arg.add_argument("--train_csv",type=str,help="please input train csv") arg.add_argument("--val_csv",type=str,help="please input val csv") arg.add_argument("--h", type=int,default=256, help="image h") arg.add_argument("--w", type=int, default=256, help="image w") arg.add_argument("--lr",type=float,default=0.001,help="learning") arg.add_argument("--batchsize",type=int,default=64) arg.add_argument("--seed",type=int,default=42) return arg.parse_args() class fenleidata(Dataset): def __init__(self,imagepaths,labels,transfrom=None): super(fenleidata,self).__init__() self.imagepaths = imagepaths self.labels = labels self.transfrom = transfrom def __getitem__(self, index): image = Image.open(self.imagepaths[index]) #numpy image = image.convert('RGB') # 有rgb和灰度图像,必须都转化成rgb if self.transfrom: image = self.transfrom(image) #image = torch.Tensor(image)/255. label = int(self.labels[index]) return image,label def __len__(self): return len(self.imagepaths) def main(): opt =parse() np.random.seed(opt.seed) torch.manual_seed(opt.seed) torch.cuda.manual_seed_all(opt.seed) # write = SummaryWriter() ##################################### ''' csv文件格式: # 文件名,标签 1.jpg,0 2.jpg,1 ...... ''' train_csv = open(opt.train_csv,'r') path = '你的路径/train/' train_imagepaths = [] train_labels =[] train_datas = train_csv.readlines() for data in train_datas: data = data.strip().split(',') imagepaths = os.path.join(path, data[0]) # if len(data[1]) == 1: train_imagepaths.append(imagepaths) train_labels.append(data[1]) # else: # if len(data[1]) == 2: # train_imagepaths.append(imagepaths) # train_labels.append(data[1][0]) # train_imagepaths.append(imagepaths) # train_labels.append(data[1][1]) # else: # if len(data[1]) == 3: # train_imagepaths.append(imagepaths) # train_labels.append(data[1][0]) # train_imagepaths.append(imagepaths) # train_labels.append(data[1][1]) # train_imagepaths.append(imagepaths) # train_labels.append(data[1][2]) ################################ val_csv = open(opt.val_csv, 'r') val_imagepaths = [] val_labels = [] val_datas = val_csv.readlines() for data in val_datas: data = data.strip().split(',') imagepaths = os.path.join(path, data[0]) # if len(data[1]) == 1: val_imagepaths.append(imagepaths) val_labels.append(data[1]) # else: # if len(data[1]) == 2: # val_imagepaths.append(imagepaths) # val_labels.append(data[1][0]) # val_imagepaths.append(imagepaths) # val_labels.append(data[1][1]) # else: # if len(data[1]) == 3: # val_imagepaths.append(imagepaths) # val_labels.append(data[1][0]) # val_imagepaths.append(imagepaths) # val_labels.append(data[1][1]) # val_imagepaths.append(imagepaths) # val_labels.append(data[1][2]) ###################################### train_tranfrom = Compose([Resize((opt.h, opt.w)),ToTensor(),Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]) val_tranfrom = Compose([Resize((opt.h, opt.w)), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) ######################################## CUDA = torch.cuda.is_available() net = resnet152(pretrained=False,num_classes=29) net.avgpool = nn.AdaptiveAvgPool2d((1, 1)) if CUDA: net.cuda() critem = nn.CrossEntropyLoss() if CUDA: critem = critem.cuda() optim = torch.optim.Adam(net.parameters(),lr=opt.lr) #########可以更换sgd trainfenlei = fenleidata(train_imagepaths,train_labels,train_tranfrom) valfenlei = fenleidata(val_imagepaths,val_labels,val_tranfrom) traindataset = DataLoader(trainfenlei,batch_size=opt.batchsize,shuffle=True,pin_memory=True) valdatset = DataLoader(valfenlei,batch_size=8) ###############train#################################### best_acc = 0. train_loss = [] epochs = 100 for i in range(epochs): net.train() for j,(image,label) in enumerate(traindataset): if CUDA: image = image.cuda() label = label.cuda() out = net(image) # out = out.unsqueeze(0) optim.zero_grad() loss = critem(out,label) loss.backward() optim.step() train_loss.append(loss.item()) print('epoch:{},iter:{},loss:{}'.format(i+1,j+1,float(loss))) # write.add_scalar('scalar/loss',float(loss),len(traindataset)*i+j) if i%5==0: correct = 0 net.eval() with torch.no_grad(): for j,(image,label) in enumerate(valdatset): if CUDA: image =image.cuda() label = label.cuda() n = label.shape[0] out = net(image) # out = out.unsqueeze(0) _, predict = out.topk(1, 1) predict = predict.t() correct += float(torch.sum(predict.eq(label))) / n acc = float(correct) / len(valdatset) print("epoch{},val_acc: {}".format(i+1,float(acc))) # write.add_scalar('scalar/acc',float(acc),i) if float(acc) > best_acc: best_acc = float(acc) torch.save(net.state_dict(),'best.pth') plot_curve(train_loss) if __name__=="__main__": main()
里面有个用来loss可视化的模块,放在utils.py
import torchfrom matplotlib import pyplot as pltdef plot_curve(data): _ = plt.figure() plt.plot(range(len(data)), data, color='blue') plt.legend(['value'], loc='upper right') plt.xlabel('step') plt.ylabel('value') plt.show()def plot_image(img, label, name): _ = plt.figure() for i in range(6): plt.subplot(2, 3, i+1) plt.tight_layout() plt.imshow(img[i][0]*0.3081+0.1307, cmap='gray', interpolation='none') plt.title("{}: {}".format(name, label[i].item())) plt.xticks([]) plt.yticks([]) plt.show()def one_hot(label, depth=10): out = torch.zeros(label.size(0), depth) idx = torch.LongTensor(label).view(-1, 1) out.scatter_(dim=1, index=idx, value=1) return out
因为主办方已经给了train的csv文件了,接下来只要分val的csv文件,这里很简单不详细说了
之后输入csv文件路径就可以进行训练了;得到了一个best.pth文件如下
这个best.pth是网络训练出来保存的参数,也是使得验证集准确度最高时保存的参数
训练的loss曲线如下:
之后用这个保存的参数进行测试,测试集用的主办方的,测试代码如下:
# -*- encoding:utf-8 -*- import torch from torchvision.models.resnet import resnet152 import torch.nn as nn import os import PIL.Image as Image from torchvision.transforms.transforms import Compose,Resize,ToTensor,Normalize import pandas as pd # 定义模型 net = resnet152(pretrained=False, num_classes=29) net.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # 载入参数 net.load_state_dict(torch.load('你的路径/best.pth')) CUDA = torch.cuda.is_available() if CUDA: net.cuda() path = '/home/mllabs/hl/qixiang/test/' img_names = os.listdir(path) list_1 = [] with torch.no_grad(): # 为了防止跟踪历史(和使用内存),你还可以用“with torch.no_grad(): ”来包装代码块 net.eval() for img_name in img_names: img_path = path + img_name img = Image.open(img_path) img = img.convert('RGB') # print(img) img = Resize((256, 256))(img) # print(img) img = ToTensor()(img) # 注意:torchvision.transforms.ToTensor() 函数接受PIL Image或numpy.ndarray,将其先由HWC转置为CHW格式,再转为float后每个像素除以255. # print(img) img = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5,))(img) # 均值和标准差都是0.5 # print(img) img = img.unsqueeze(0) # print(img) if CUDA: img = img.cuda() output = net(img) # print(output) _, predict = output.topk(1, 1) # 取最大值 print('图片{}的分类结果为{}'.format(img_name, str(int(predict)))) list_1.append([img_name, int(str(int(predict)))]) pd.DataFrame(list_1).to_csv("knn_res1.csv")
把生成的csv文件上传上去就可以检测结果啦,因为只用到一个残差网络resnet152,对于简单的分类还可以,复杂的多标签的分类就要多花点功夫试下别的网络。
目前最好的分数是0.59,我测试的结果在0.42左右,虽然排不上名,但加入多标签还是有很大空间往上爬滴!
免责声明:本文来自互联网新闻客户端自媒体,不代表本网的观点和立场。
合作及投稿邮箱:E-mail:editor@tusaishared.com
热门资源
Python 爬虫(二)...
所谓爬虫就是模拟客户端发送网络请求,获取网络响...
TensorFlow从1到2...
原文第四篇中,我们介绍了官方的入门案例MNIST,功...
TensorFlow从1到2...
“回归”这个词,既是Regression算法的名称,也代表...
机器学习中的熵、...
熵 (entropy) 这一词最初来源于热力学。1948年,克...
TensorFlow2.0(10...
前面的博客中我们说过,在加载数据和预处理数据时...
智能在线
400-630-6780
聆听.建议反馈
E-mail: support@tusaishared.com