torch 学习笔记(7)——DataLoad 操作以LeNet 为例(详解+储备知识补充)python 储备知识补充
OS 操作补充
(1)os.path.abspath(file)&os.path.dirname三级安全教育内容
(2)os.walk(filedir)
(3)lambda匿名函数&dswith()&filter()&list()
DataLoad 机制详解import
os
# os.path.dirname 功能:去掉⽂件名,返回⽬录
# os.path.abspath(__file__) 作⽤: 获取当前脚本的完整路径
BASE_DIR = os .path .abspath (__file__)
print (BASE_DIR )
BASE_DIR = os .path .dirname (os .path .abspath (__file__))
print (BASE_DIR )
datat_dir = os .path .join (BASE_DIR , "data", "RMB_data")
import os
# os.walk(filedir)这个函数会返回三个对象
# root(⽬录路径,元组格式)
# dirs(⼦⽬录名,它是⼀个列表,因为在⼀个⽬录路径下会有很多很多⼦⽬录,,元组格式)
# files(⽂件名,它也是⼀个列表,因为同⼀⽬录下⼀班有多个⽂件,元组格式)
BASE_DIR = os .path .dirname (os .path .abspath (__file__))
datat_dir = os .path .abspath (os .path .join (BASE_DIR , "data", "RMB_data")) # 待分割数据集路径
print (datat_dir )
root , dirs , files = os .walk (datat_dir )
print (root , type (root ))
print (dirs , type (dirs ))
print (files , type (files ))import
os
import random
import shutil
BASE_DIR = os .path .dirname (os .path .abspath (__file__))
datat_dir = os .path .abspath (os .path .join (BASE_DIR , "data", "RMB_data")) # 待分割数据集路径
print (datat_dir )
root , dirs , files = os .walk (datat_dir )
for root , dirs , files in os .walk (datat_dir ):
# 逐⽂件遍历
for sub_dir in dirs :
imgs = os .listdir (os .path .join (root , sub_dir )) # 获取图像绝对路径
# lambda 来创建匿名函数,lambda 作为⼀个表达式,定义了⼀个匿名函数。x 为函数⼊⼝参数,x.endswith('.jpg')为函数体。
# x.endswith('.jpg')判断字符串是否以'.jpg'结尾
# filter(函数,序列)函数⽤于过滤序列,过滤掉不符合条件的元素,返回由符合条件元素组成的新列表
# list() ⽅法⽤于将可迭代对象(字符串、列表、元祖、字典)转换为列表
imgs = list (filter (lambda x : x .endswith ('.jpg'), imgs ))
print (imgs )
案例数据集划分
# data:搜集(Img ,Label )、划分(train :训练模型、valid :验证模型是否过拟合、test :测试模型性能)、读取(DataLoader )、预处理(transforms )# DataLoad 包括Sample 和DataSet ,其中,Sample ⽣成index ;DataSet ⽤于根据Index 读取Img 、Label
# torch 。utils.data.DataLoader 是⽤来构建可迭代的数据装载器
# 参数:datat:Datat 类,决定数据从哪读取及如何读取;batchsize :batch ⼤⼩;num_works :
是否多进程读取数据;shuffle :每个epoch 是否乱序;drop # epoch :所有训练样本都输⼊到模型中
# iteraction :⼀批样本输⼊到模型中
# batchsize :batch ⼤⼩,决定epoch 中含有多少个iteration
# torch.utils.data.DataSet()Datat 抽象类,所有⾃定义的Datat 需要继承他,并通过__getitem__()复写,getitem ⽤于接收⼀个index ,返回⼀个sample
import os
import random
import shutil
BASE_DIR = os.path.dirname(os.path.abspath(__file__))# 存储代码.py所在⽂件夹
# ⽤于⽣成存储⽂件夹
def makedir(new_dir):
if not ists(new_dir):
os.makedirs(new_dir)
if __name__ =='__main__':
datat_dir = os.path.abspath(os.path.join(BASE_DIR,"data","RMB_data"))# 待分割数据集路径
split_dir = os.path.abspath(os.path.join(BASE_DIR,"data","rmb_split"))# 分割后的⼀级⽂件夹路径
train_dir = os.path.join(split_dir,"train")# 训练集路径
valid_dir = os.path.join(split_dir,"valid")# 验证集路径
test_dir = os.path.join(split_dir,"test")# 测试集路径
train_pct =0.8# 训练集⽐重
valid_pct =0.1# 验证集⽐重
test_pct =0.1# 测试集⽐重
# 遍历datat_dir下所有⽂件夹及⼦⽂件
for root, dirs, files in os.walk(datat_dir):
# 逐⽂件夹遍历
for sub_dir in dirs:
imgs = os.listdir(os.path.join(root, sub_dir))# 获取图像绝对路径
imgs =list(filter(lambda x: x.endswith('.jpg'), imgs))# 获取图像列表
random.shuffle(imgs)# 图像列表乱序
img_count =len(imgs)# 记录列表长度,⽤于之后的数据集分割
train_point =int(img_count * train_pct)# 训练集长度
valid_point =int(img_count *(train_pct + valid_pct))# 验证集长度
# warning机制,没遍历到任何图像
if img_count ==0:
print("{}⽬录下,⽆图⽚,请检查".format(os.path.join(root, sub_dir)))
一张考卷
import sys
# 按数据集长度遍历数据集内部图像
for i in range(img_count):
# ⾸先填充训练集
if i < train_point:
# 存储图像地址
out_dir = os.path.join(train_dir, sub_dir)
# 其次,验证集
elif i < valid_point:
out_dir = os.path.join(valid_dir, sub_dir)
# 最后,预测集
el:
out_dir = os.path.join(test_dir, sub_dir)
# 创建输出路径
makedir(out_dir)
古筝自学# 创建图像输出地址
target_path = os.path.join(out_dir, imgs[i])
src_path = os.path.join(datat_dir, sub_dir, imgs[i])
# 将图⽚复制过去
# 显⽰数据集划分情况
print('Class:{}, train:{}, valid:{}, test:{}'.format(sub_dir, train_point, valid_point - train_point,
img_count - valid_point))
模型搭建
粮食安全lenet.py
as nn
functional as F
class LeNet(nn.Module):
def__init__(lf, class):
super(LeNet, lf).__init__()
lf.fc1 = nn.Linear(16*5*5,120)
lf.fc2 = nn.Linear(120,84)
lf.fc3 = nn.Linear(84, class)
def forward(lf, x):
out = F.v1(x))
out = F.max_pool2d(out,2)
out = F.v2(out))
out = F.max_pool2d(out,2)
out = out.view(out.size(0),-1)
out = F.relu(lf.fc1(out))
out = F.relu(lf.fc2(out))
out = lf.fc3(out)
return out
def initialize_weights(lf):
for m dules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
m._()
elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1)
m._()
elif isinstance(m, nn.Linear):
al_(m.weight.data,0,0.1)
m._()
class LeNet2(nn.Module):
def__init__(lf, class):
super(LeNet2, lf).__init__()
lf.features = nn.Sequential(
nn.Conv2d(3,6,5),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Conv2d(6,16,5),
nn.ReLU(),
nn.MaxPool2d(2,2)
)
lf.classifier = nn.Sequential(
nn.Linear(16*5*5,120),
nn.ReLU(),
nn.Linear(120,84),
nn.ReLU(),
nn.Linear(84, class)
)
沙画
x = lf.features(x)
x = x.view(x.size()[0],-1)
x = lf.classifier(x)
return x
class LeNet_bn(nn.Module):
def__init__(lf, class):
super(LeNet_bn, lf).__init__()
lf.bn1 = nn.BatchNorm2d(num_features=6)
lf.bn2 = nn.BatchNorm2d(num_features=16)
lf.fc1 = nn.Linear(16*5*5,120)
lf.bn3 = nn.BatchNorm1d(num_features=120)
lf.fc2 = nn.Linear(120,84)
lf.fc3 = nn.Linear(84, class)
def forward(lf, x):
out = lf.conv1(x)
out = lf.bn1(out)
out = F.relu(out)
out = F.max_pool2d(out,2)
out = lf.conv2(out)生肖守护
out = lf.bn2(out)
out = F.relu(out)
out = F.max_pool2d(out,2)
out = out.view(out.size(0),-1)
out = lf.fc1(out)
out = lf.bn3(out)
out = F.relu(out)
out = F.relu(lf.fc2(out))
out = lf.fc3(out)电脑图标不见了怎么恢复
return out
def initialize_weights(lf):
for m dules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
m._()
素描画法elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m._()
elif isinstance(m, nn.Linear):
al_(m.weight.data,0,1)
m._()
commen_tools.py