从数据预处理开始,⽤最简单的3D的CNN实现五折交叉验证的MRI图像⼆分类
(pytorch)
⽂章⽬录
前⾔
本⽂从数据预处理开始,基于LeNet搭建⼀个最简单的3D的CNN,计算医学图像分类常⽤指标AUC,ACC,Sep,Sen,并⽤5折交叉验证来提升预测指标,来实现3D的MRI图像⼆分类
⼀、将nii图像数据转成npy格式
⾸先将nii图像数据转成npy格式,⽅便输⼊⽹络
关于秋天的歌词
import nibabel as nib
import os
import numpy as np
ansform import resize
import pandas as pd
def mkdir(path):
if not ists(path):
os.makedirs(path)
img_path ='E:\TSC\deep_learing_need\data for paper\FLAIR3'#nii⽂件
save_path ='E:\TSC\deep_learing_need\data for paper\FLAIR3_npy'#npy⽂件
mkdir(save_path)
#FLAIR3_ ⽂件类型命名举例
label_pd = pd.read_excel('E:\TSC\deep_learing_need\clinical_features.xlsx',sheet_name ='label')#label excel 存放每⼀个数据的pid和对应的label,如:pid :100,label:0
for img_name in os.listdir(img_path):
net_data =[]
豫西大峡谷漂流pid = img_name.split('_')[1].split('.')[0]
print(pid)
print(img_name)
荷的诗句
label = label_pd[label_pd['pid']==int(pid)]['label']
print(os.path.join(img_path,img_name))
img_data = nib.load(os.path.join(img_path,img_name))
img = _fdata()
img = resize(img,(128,128,128), order=0)#将图像⼤⼩进⾏统⼀缩放,⽅便输⼊⽹络,分别为(h,w,c),可根据⾃⼰的数据集来更改
# img = nib.load(os.path.join(img_path,img_name).get_fdata() #载⼊
img = np.array(img)
#nomalization
if np.min(img)< np.max(img):
img = img - np.min(img)
img = img / np.max(img)
if np.unique(label==1):
label_data =1
net_data.append([img,label_data])
np.save(os.path.join(save_path,pid), net_data)#保存
if np.unique(label==0):
label_data =0
net_data.append([img,label_data])
np.save(os.path.join(save_path,pid), net_data)#保存
print('Done!')
⼆、加载数据
1.加载数据,Datat.py:
import torch
# 定义GetLoader类,继承Datat⽅法,并重写__getitem__()和__len__()⽅法
class GetLoader(torch.utils.data.Datat):
# 初始化函数,得到数据
def__init__(lf, data_root, data_label):
lf.data = data_root
lf.label = data_label
# index是根据batchsize划分数据后得到的索引,最后将data和对应的labels进⾏⼀起返回
def__getitem__(lf, index):
data = lf.data[index]
labels = lf.label[index]
sor(data).float(), sor(labels).float()
# 该函数返回数据⼤⼩长度,⽬的是DataLoader⽅便划分,如果不知道⼤⼩,DataLoader会⼀脸懵逼
def__len__(lf):
return len(lf.data)
1.⼀些其他函数,utils.py:
from skimage import transform,exposure
from sklearn import model_lection, preprocessing, metrics, feature_lection
import os
import numpy as np
import random
import torch
#加载npy数据和label
def load_npy_data(data_dir,split):
datanp=[]#images
truenp=[]#labels
for file in os.listdir(data_dir):
data=np.load(os.path.join(data_dir,file),allow_pickle=True)
# data[0][0] = resize(data[0][0], (224,224,224))
运城关帝庙if(split =='train'):
data_sug= ate(data[0][0],60)#旋转60度,不改变⼤⼩
data_sug2 = posure.adjust_gamma(data[0][0], gamma=0.5)#变亮
datanp.append(data_sug)
truenp.append(data[0][1])
datanp.append(data_sug2)
truenp.append(data[0][1])
datanp.append(data[0][0])
truenp.append(data[0][1])
datanp = np.array(datanp)
#numpy.array可使⽤ shape。list不能使⽤shape。可以使⽤np.array(list A)进⾏转换。
#不能随意加维度
pand_dims(datanp,axis=4)#加维度,from(1256,256,128)to(256,256,128,1),according the cnn tabel.png datanp = anspo(0,4,1,2,3)
truenp = np.array(truenp)
print(datanp.shape, truenp.shape)
print(np.min(datanp), np.max(datanp), np.mean(datanp), np.median(datanp))
return datanp,truenp
#定义随机种⼦
def t_ed(ed):
random.ed(ed)
np.random.ed(ed)
torch.manual_ed(ed)
if torch.cuda.is_available():
torch.cuda.manual_ed_all(ed)
torch.backends.cudnn.deterministic =True
高兴的造句def_init_fn(worker_id):
np.random.ed(int(12)+ worker_id)
#计算分类的各项指标
def calculate(score, label, th):
score = np.array(score)
label = np.array(label)
pred = np.zeros_like(label)
pred[score >= th]=1
pred[score < th]=0
TP =len(pred[(pred >0.5)&(label >0.5)])
FN =len(pred[(pred <0.5)&(label >0.5)])
TN =len(pred[(pred <0.5)&(label <0.5)])
FP =len(pred[(pred >0.5)&(label <0.5)])
AUC = _auc_score(label, score)
result ={'AUC': AUC,'acc':(TP + TN)/(TP + TN + FP + FN),'n':(TP)/(TP + FN +0.0001),
'spe':(TN)/(TN + FP +0.0001)}
# print('acc',(TP+TN),(TP+TN+FP+FN),'spe',(TN),(TN+FP),'n',(TP),(TP+FN))
return result
def mkdir(path):
if not ists(path):
os.makedirs(path)
⼆、建模 model.py
as nn
functional as F
class LeNet(nn.Module):
def __init__(lf):
super(LeNet, lf).__init__()
lf.pool1 = nn.MaxPool3d(2,2)
lf.pool2 = nn.MaxPool3d(2,2)
lf.fc1 = nn.Linear(32*8*8*8,120)
lf.fc2 = nn.Linear(120,84)
lf.fc3 = nn.Linear(84,1)
def forward(lf, x):
x = F.v1(x)) # input(1,128,128,128)output(16,65,65,65) x = lf.pool1(x) # output(16,32,32,32)
x = F.v2(x)) # output(32,16,16)
x = lf.pool2(x) # output(32,8,8)
福字书法图片x = x.view(-1,32*8*8*8) # output(32*8*8*8)
x = F.relu(lf.fc1(x)) # output(120)
x = F.relu(lf.fc2(x)) # output(84)
x = lf.fc3(x) # output(10)
return x
⼆、训练 train.py
接下来就是训练了
from model import LeNet
from skimage import transform,exposure
from sklearn import model_lection, preprocessing, metrics, feature_lection import os
import time
import numpy as np
import pandas as pd
import torch
躲避的爱歌词
from torch.utils import data as torch_data
import functional as torch_functional
from Datat import GetLoader
del_lection import StratifiedKFold
ics import roc_auc_score
from utils import mkdir,load_npy_data,calculate,_init_fn,t_ed
t_ed(12)
device = torch.device("cuda"if torch.cuda.is_available()el"cpu")
train_path ='/home/mist/cloud/T1_sag_npy/train'
model_path ='cloud/model_save_t1_sag'
model_floder ='model_t1_sag_10.26_lenet'
电影行业
save_path = os.path.join(model_path, model_floder)
save_path = os.path.join(model_path, model_floder)
mkdir(save_path)
datanp_train,truenp_train = load_npy_data(train_path,'1')
# 通过GetLoader将数据进⾏加载,返回Datat对象,包含data和labels
train_data_retriever2 = GetLoader(datanp_train, truenp_train)
class Trainer:
def__init__(
lf,
model,
device,
optimizer,
criterion
):
lf.device = device
lf.optimizer = optimizer
lf.best_valid_score =0# np.inf
lf.n_patience =0
lf.lastmodel =None
def fit(lf, epochs, train_loader, valid_loader, modility, save_path, patience, fold): best_auc =0
for n_epoch in range(1, epochs +1):
lf.info_message("EPOCH: {}", n_epoch)
train_loss, train_auc, train_time, rst_train = lf.train_epoch(train_loader)
valid_loss, valid_auc, valid_time, rst_val = lf.valid_epoch(valid_loader)
lf.info_message(
"[Epoch Train: {}] loss: {:.4f}, auc: {:.4f},time: {:.2f} s ",
n_epoch, train_loss, train_auc, train_time
)
lf.info_message(
"[Epoch Valid: {}] loss: {:.4f}, auc: {:.4f}, time: {:.2f} s",
n_epoch, valid_loss, valid_auc, valid_time
)
# if True:
# if lf.best_valid_score > valid_loss:
if lf.best_valid_score < valid_auc and n_epoch >20:
# if lf.best_valid_score > valid_loss:
lf.save_model(n_epoch, modility, save_path, valid_loss, valid_auc, fold) lf.info_message(
"loss decrea from {:.4f} to {:.4f}. Saved model to '{}'",
lf.best_valid_score, valid_auc, lf.lastmodel
)
lf.best_valid_score = valid_auc
lf.n_patience =0
final_rst_train = rst_train
final_rst_val = rst_val
el:
lf.n_patience +=1
if lf.n_patience >= patience:
lf.info_message("\nValid auc didn't improve last {} epochs.", patience) break
all_rst =[final_rst_train, final_rst_val]