深度学习模型融合stacking
当你的深度学习模型变得很多时,选⼀个确定的模型也是⼀个头痛的问题。或者你可以把他们都⽤起来,就进⾏模型融合。我主要使⽤stacking和blend⽅法。先把代码贴出来,⼤家可以看⼀下。
1import numpy as np
2import pandas as pd
3import matplotlib.pyplot as plt
ics import roc_curve
5
6 SEED = 222
7 np.random.ed(SEED)
del_lection import train_test_split
9
ics import roc_auc_score
11from sklearn.svm import SVC,LinearSVC
12from sklearn.naive_bayes import GaussianNB
ble import RandomForestClassifier,GradientBoostingClassifier
14from sklearn.linear_model import LogisticRegression
ighbors import KNeighborsClassifier
ural_network import MLPClassifier
17
乳酸菌素颗粒18 df = pd.read_csv('input.csv')
19
20def get_train_test(): # 数据处理
21
22 y = 1 * (df.cand_pty_affiliation == "REP")
23 x = df.drop(['cand_pty_affiliation'],axis=1)
24 x = pd.get_dummies(x,spar=True)
25 x.lumns[x.std()==0],axis=1,inplace=True)
26return train_test_split(x,y,test_size=0.95,random_state=SEED)
27
28def get_models(): # 模型定义
29 nb = GaussianNB()
30 svc = SVC(C=100,probability=True)
31 knn = KNeighborsClassifier(n_neighbors=3)
32 lr = LogisticRegression(C=100,random_state=SEED)
33 nn = MLPClassifier((80, 10), early_stopping=Fal, random_state=SEED)
34 gb = GradientBoostingClassifier(n_estimators =100, random_state = SEED)
35 rf = RandomForestClassifier(n_estimators=1,max_depth=3,random_state=SEED)
36
37 models = {'svm':svc,
38'knn':knn,
39'naive bayes':nb,
40'mlp-nn':nn,
晋楚城濮之战41'random forest':rf,
42'gbm':gb,
43'logistic':lr,
44 }
45return models大专自我鉴定
46
47def train_ba_learnres(ba_learners,inp,out,verbo=True): # 训练基本模型
48if verbo:print("fitting models.")
49for i,(name,m) in enumerate(ba_learners.items()):
50if verbo:print("%s..." % name,end="",flush=Fal)
51 m.fit(inp,out)
52if verbo:print("done")
53
54def predict_ba_learners(pred_ba_learners,inp,verbo=True): # 把基本学习器的输出作为融合学习的特征,这⾥计算特征
55 p = np.zeros((inp.shape[0],len(pred_ba_learners)))
56if verbo:print("Generating ba learner predictions.")
57for i,(name,m) in enumerate(pred_ba_learners.items()):
58if verbo:print("%s..." % name,end="",flush=Fal)
59 p_ = m.predict_proba(inp)
60 p[:,i] = p_[:,1]
61if verbo:print("done")
62return p
63
64def enmble_predict(ba_learners,meta_learner,inp,verbo=True): # 融合学习进⾏预测
65 p_pred = predict_ba_learners(ba_learners,inp,verbo=verbo) # 测试数据必须先经过基本学习器计算特征
抗疫主题画66return p_pred,meta_learner.predict_proba(p_pred)[:,1]
67
68def ennmble_by_blend(): # blend融合
69 xtrain_ba, xpred_ba, ytrain_ba, ypred_ba = train_test_split(
70 xtrain, ytrain, test_size=0.5, random_state=SEED
战疫有我
71 ) # 把数据切分成两部分
72
73 train_ba_learnres(ba_learners, xtrain_ba, ytrain_ba) # 训练基本模型
74
75 p_ba = predict_ba_learners(ba_learners, xpred_ba) # 把基本学习器的输出作为融合学习的特征,这⾥计算特征
76 meta_learner.fit(p_ba, ypred_ba) # 融合学习器的训练
77 p_pred, p = enmble_predict(ba_learners, meta_learner, xtest) # 融合学习进⾏预测
78print("\nEnmble ROC-AUC score: %.3f" % roc_auc_score(ytest, p))
79
80
81from sklearn.ba import clone
82def stacking(ba_learners,meta_learner,X,y,generator): # stacking进⾏融合
83print("Fitting final ",end="")
84 train_ba_learnres(ba_learners,X,y,verbo=Fal)
85print("done")
86
87print("Generating ")
88 cv_preds,cv_y = [],[]
89for i,(train_inx,test_idx) in enumerate(generator.split(X)):
90 fold_xtrain,fold_ytrain = X[train_inx,:],y[train_inx]
91 fold_xtest,fold_ytest = X[test_idx,:],y[test_idx]
92
93 fold_ba_learners = {name:clone(model)
94for name,model in ba_learners.items()}
95 train_ba_learnres(fold_ba_learners,fold_xtrain,fold_ytrain,verbo=Fal)
96 fold_P_ba = predict_ba_learners(fold_ba_learners,fold_xtest,verbo=Fal) 97
98 cv_preds.append(fold_P_ba)
99 cv_y.append(fold_ytest)
100
101print("Fold %i done" %(i+1))
102print("CV-predictions done")
103 cv_preds = np.vstack(cv_preds)
104 cv_y = np.hstack(cv_y)
105
106print("Fitting ",end="")
107 meta_learner.fit(cv_preds,cv_y)
108print("done")
109
110return ba_learners,meta_learner
111
112def enmble_by_stack():
del_lection import KFold
114 cv_ba_learners,cv_meta_learner = stacking(
115 get_models(),clone(meta_learner),xtrain.values,ytrain.values,KFold(2))
116 P_pred,p = enmble_predict(cv_ba_learners,cv_meta_learner,xtest,verbo=Fal) 117print("\nEnmble ROC-AUC score: %.3f" %roc_auc_score(ytest,p))
118
119def plot_roc_curve(ytest,p_ba_learners,p_enmble,labels,ens_label):
120 plt.figure(figsize=(10,8))
121 plt.plot([0,1],[0,1],'k--')
122 cm = [ainbow(i)
123for i in np.linspace(0,1.0, p_ba_learners.shape[1] +1)]
124for i in range(p_ba_learners.shape[1]):
125 p = p_ba_learners[:,i]
126 fpr,tpr,_ = roc_curve(ytest,p)
127 plt.plot(fpr,tpr,label = labels[i],c=cm[i+1])
128 fpr, tpr, _ = roc_curve(ytest, p_enmble)
129 plt.plot(fpr, tpr, label=ens_label, c=cm[0])饮水思源造句
130 plt.xlabel('Fal positive rate')
131 plt.ylabel('True positive rate')
132 plt.title('ROC curve')
133 plt.legend(frameon=Fal)
134 plt.show()
135
ble import SuperLearner
137def u_pack():
138 sl =SuperLearner(
139 folds=10,random_state=SEED,verbo=2,
140# backend="multiprocessing"
141 )
142# Add the ba learners and the meta learner
143 sl.add(list(ba_learners.values()),proba=True)
144 sl.add_meta(meta_learner,proba=True)
145# Train the enmble
146 sl.fit(xtrain,ytrain)
147# Predict the test t
148 p_sl=sl.predict_proba(xtest)
149
操心是什么意思>亲子阅读的重要性
150print("\nSuper Learner ROC-AUC score: %.3f" % roc_auc_score(ytest,p_sl[:,1]))
151
152if__name__ == "__main__":
153 xtrain, xtest, ytrain, ytest = get_train_test()
154 ba_learners = get_models()
155
156 meta_learner = GradientBoostingClassifier(
157 n_estimators=1000,
158 loss="exponential",
159 max_depth=4,
160 subsample=0.5,
161 learning_rate=0.005,
162 random_state=SEED
163 )
164
165# ennmble_by_blend() # blend进⾏融合166# enmble_by_stack() # stack进⾏融合167 u_pack() # 调⽤包进⾏融合