梦想者sklearn的模型训练与预测
⽬录
sklearn的模型训练与预测
sklearn是强⼤的python机器学习⼯具,⽀持丰富的机器学习算法和数据预处理,在学术界和企业中应⽤⼴泛,下⾯是sklearn的代码编写流程和各种算法使⽤⽰例(以分类为例)。
分类任务流程三步⾛
1. 创建模型对象
mistress
2. 训练
3. 预测与性能评价
xgboost算法分类
the facebookimport numpy as np
import json
import math
import time泰坦尼克号英文名
import oscrave
import random
del_lection import train_test_split
from sklearn import metrics
def main():
time_begin = time.time()
在线学习网
# 原始数据(省略)
data = d.data
labels = d.labels
# 数据标准化
from sklearn.preprocessing import StandardScaler
data = StandardScaler().fit_transform(data)
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.3)
# 1.创建模型对象
import sklearn
from xgboost import XGBClassifier
clf = XGBClassifier(learning_rate=0.1,
n_estimators=1000,# 树的个数--1000棵树建⽴xgboost
max_depth=6,# 树的深度
min_child_weight=1,# 叶⼦节点最⼩权重
gamma=0.,# 惩罚项中叶⼦结点个数前的参数
subsample=0.8,# 随机选择80%样本建⽴决策树
colsample_btree=0.8,# 随机选择80%特征建⽴决策树
objective='multi:softmax',# 指定损失函数
scale_pos_weight=1,# 解决样本个数不平衡的问题
random_state=27# 随机数
)
# 2.训练
clf = clf.fit(x_train, y_train, eval_t=[(x_test, y_test)], eval_metric="mlogloss", early_stopping_rounds=10, verbo=True)
# 3.预测与性能评价
np.t_printoptions(threshold=np.inf)
predicted = clf.predict(x_test)
男生怎么美白脸部皮肤
predicted = np.array(predicted)
print(metrics.classification_report(y_test, predicted))
bearings
fusion_matrix(y_test, predicted))
time_end = time.time()
print("total time is ", time_end-time_begin)
# 程序⼊⼝
if __name__ =="__main__":
main()
随机森林算法分类
n_estimators是随机森林的⼀个重要调优参数,表⽰树的个数。
import numpy as np
import json
import math
import time
import os
import random
del_lection import train_test_split
from sklearn import metrics
def main():
time_begin = time.time()
# 原始数据(省略)
data = d.data
labels = d.labels
# 数据标准化
from sklearn.preprocessing import StandardScaler
data = StandardScaler().fit_transform(data)
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.3)
# 1.创建模型对象
import sklearn
from xgboost import XGBClassifier
clf = ble.RandomForestClassifier(n_estimators=100)
# 2.训练
transformation
clf = clf.fit(x_train, y_train, eval_t=[(x_test, y_test)], eval_metric="mlogloss", early_stopping_rounds=10, verbo=True)
# 3.预测与性能评价
np.t_printoptions(threshold=np.inf)
cockneypredicted = clf.predict(x_test)
predicted = np.array(predicted)
print(metrics.classification_report(y_test, predicted))
fusion_matrix(y_test, predicted))
time_end = time.time()
print("total time is ", time_end-time_begin)
# 程序⼊⼝
if __name__ =="__main__":
main()