利⽤xgboost进⾏多元时间序列预测利⽤2019年ccf的数据集进⾏测试()
主要思路进⾏划分出很多窗⼝,这⾥以1320*4条数据为⼀个窗⼝
因为每个省都有60个车型,⼀共22个省,就是1320,预测未来4个⽉,所以⽤的4
import pandas as pd
train_sales = pd.read_csv('C:\\Train\\train_sales_data.csv',header=0)
train_arch = pd.read_csv('C:\\Train\\train_arch_data.csv',header=0)
data=(train_arch,on=("adcode","model","regYear","regMonth"),how='inner')
data=data.drop(['province_x','province_y'], axis=1);
print(data)
import copy
categoricals = ['model', 'adcode','bodyType']
for feature in categoricals:
df = _dummies(data[feature], drop_first=True))
data= pd.concat([data, df], axis=1)
data.drop(columns=feature, inplace=True)
print(data.head())
print(data.iloc[1320*20:1320*24,:].values)
def to_supervid(data):
x = data.iloc[0:1320*20,:].values
y = data.iloc[1320*4:1320*24,2].values
return x, y
data_x,data_y=to_supervid(data)
print(data_x.shape)
print(data_y.shape)
train_x,test_x=data_x[0:1320*16],data_x[1320*16:26399+1]
train_y,test_y=data_y[0:1320*16],data_y[1320*16:26399+1]
print('-----------test_x------------')
print(test_x)
from numpy import nan
from numpy import isnan
from pandas import read_csv
from pandas import to_numeric
ics import r2_score
import lightgbm as lgb
# multivariate multi-step encoder-decoder lstm
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
ics import mean_squared_error
from matplotlib import pyplot
ics import r2_score
christina perri
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from numpy.random import ed
import numpy as np
import xgboost as xgb
import pandas as pd
#ics import roc_auc_score
ics import explained_variance_score
import matplotlib.pyplot as plt
from hyperopt import STATUS_OK,STATUS_RUNNING, fmin, hp, tpe,space_eval, partial
del_lection import train_test_split
ics import roc_auc_score
print("---------DMatrix----------")
dtrain = xgb.DMatrix(train_x, label=train_y)
dvalid = xgb.DMatrix(test_x, label=test_y)
##训练参数
SEED = 314159265
VALID_SIZE = 0.25
##训练参数
def model_run(params):
print("")
print("Training with params: ")
print(params)
num_boost_round=int(params['n_estimators'])
print("watchlist")
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
print("")
gbm = ain(params, dtrain, num_boost_round, evals=watchlist, verbo_eval=True)
print("")
check = gbm.predict(xgb.DMatrix(test_x), ntree_limit=gbm.best_iteration+1)
print("explained_")
buttscore = get_score (test_y, check)
print("pr...")
print('Check error value: {:.6f}'.format(score))
## print("Predict ")
## test_prediction = gbm.predict(xgb.DMatrix(test[features]), ntree_limit=gbm.best_iteration+1) return {
'loss': score,
'status': STATUS_OK,
'stats_running': STATUS_RUNNING
}
def optimize(
#trials,
random_state=SEED):
## This is the optimization function that given a space (space here) of
## hyperparameters and a scoring function (score here), finds the best hyperparameters.
space = {
hyip
'n_estimators': hp.quniform('n_estimators', 20, 60, 1),
'eta': hp.quniform('eta', 0.02, 0.4, 0.02),
'max_depth': hp.choice('max_depth', np.arange(1, 20, dtype=int)),
'min_child_weight': hp.quniform('min_child_weight', 1, 6, 1),
'subsample': hp.quniform('subsample', 0.5, 1, 0.05),
'gamma': hp.quniform('gamma', 0.5, 1, 0.05),
'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1, 0.05),
jugate'eval_metric': 'rm',
'objective': 'reg:linear',
'nthread': 4,
维克多英语听力下载
'booster': 'gbtree',
'tree_method': 'exact',
'silent': 1,
'ed': random_state
}
print("---------开始训练参数----------")
best = fmin(model_run, space, algo=tpe.suggest, max_evals=2000)
##print("---------------"+best+"-----------")
##获取最优的参数
best_params = space_eval(space, best)
print("BEST PARAMETERS: " + str(best_params))
return best_params
##定义计分函数
def get_score(pre,real):
temp=[]
pre_t=[]
real_t=[]
und().astype(int)
for i in range(60):
for j in range(4):
pre_t.append(pre[1320*j+22*i:1320*j+22*(i+1)])
real_t.append(real[1320*j+22*i:1320*j+22*(i+1)])
temp.append(((mean_squared_error(pre_t,real_t))**0.5)/np.mean(real_t))
理由的英文return sum(temp)/60
print("---------开始优化参数----------")
best_params=optimize()
#print(test_prediction)
print("---------优化完成----------")
print(best_params)
##训练模型
##训练模型
print(best_params)
第一名英文print("---------正式训练模型----------")
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
model_gbm = ain(best_params, dtrain, 180, evals=watchlist,early_stopping_rounds=50,verbo_eval=True)
print("---------正式预测模型----------")
print("Predict ")
test_prediction = model_gbm.predict(xgb.DMatrix(data.iloc[1320*20:1320*24,:].values), ntree_limit=model_gbm.best_iteration+1)
print("---------预测完成----------")
print(test_prediction)
print("---------预测完成----------")
print(best_params)
print(test_prediction.shape)
test_prediction=und().astype(int)
f = open('C:\\', 'w')
山木培训课程
total = 0
for id in range(1320*4):
qv
str1 =str(test_prediction[total])
str1 += '\n'
total += 1
f.write(str1)
f.clo()
print("持久化完成")
test_prediction1=model_gbm.predict(xgb.DMatrix(test_x), ntree_limit=model_gbm.best_iteration+1)少儿篮球培训
test_prediction1=und().astype(int)
score =get_score(test_y, test_prediction1)
print(1-score)
效果虽然不咋地,但是停供了⼀个⽐较简单的思路,排名400+,⼤佬们可以在这个的基础上进⾏改进,也许可以提⾼很多,本⼈渣渣代码