首页 > 英文翻译

利用xgboost进行多元时间序列预测

更新时间:2023-05-20 08:22:28 阅读：评论：0

利⽤xgboost进⾏多元时间序列预测利⽤2019年ccf的数据集进⾏测试（）

主要思路进⾏划分出很多窗⼝，这⾥以1320*4条数据为⼀个窗⼝

因为每个省都有60个车型，⼀共22个省，就是1320，预测未来4个⽉，所以⽤的4

import pandas as pd

train_sales = pd.read_csv('C:\\Train\\train_sales_data.csv',header=0)

train_arch = pd.read_csv('C:\\Train\\train_arch_data.csv',header=0)

data=(train_arch,on=("adcode","model","regYear","regMonth"),how='inner')

data=data.drop(['province_x','province_y'], axis=1);

print(data)

import copy

categoricals = ['model', 'adcode','bodyType']

for feature in categoricals:

df = _dummies(data[feature], drop_first=True))

data= pd.concat([data, df], axis=1)

data.drop(columns=feature, inplace=True)

print(data.head())

print(data.iloc[1320*20:1320*24,:].values)

def to_supervid(data):

x = data.iloc[0:1320*20,:].values

y = data.iloc[1320*4:1320*24,2].values

return x, y

data_x,data_y=to_supervid(data)

print(data_x.shape)

print(data_y.shape)

train_x,test_x=data_x[0:1320*16],data_x[1320*16:26399+1]

train_y,test_y=data_y[0:1320*16],data_y[1320*16:26399+1]

print('-----------test_x------------')

print(test_x)

from numpy import nan

from numpy import isnan

from pandas import read_csv

from pandas import to_numeric

ics import r2_score

import lightgbm as lgb

# multivariate multi-step encoder-decoder lstm

from math import sqrt

from numpy import split

from numpy import array

from pandas import read_csv

ics import mean_squared_error

from matplotlib import pyplot

ics import r2_score

christina perri

from sklearn.preprocessing import MinMaxScaler

from sklearn.preprocessing import LabelEncoder

from numpy.random import ed

import numpy as np

import xgboost as xgb

import pandas as pd

#ics import roc_auc_score

ics import explained_variance_score

import matplotlib.pyplot as plt

from hyperopt import STATUS_OK,STATUS_RUNNING, fmin, hp, tpe,space_eval, partial

del_lection import train_test_split

ics import roc_auc_score

print("---------DMatrix----------")

dtrain = xgb.DMatrix(train_x, label=train_y)

dvalid = xgb.DMatrix(test_x, label=test_y)

##训练参数

SEED = 314159265

VALID_SIZE = 0.25

##训练参数

def model_run(params):

print("")

print("Training with params: ")

print(params)

num_boost_round=int(params['n_estimators'])

print("watchlist")

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]

print("")

gbm = ain(params, dtrain, num_boost_round, evals=watchlist, verbo_eval=True)

print("")

check = gbm.predict(xgb.DMatrix(test_x), ntree_limit=gbm.best_iteration+1)

print("explained_")

buttscore = get_score (test_y, check)

print("pr...")

print('Check error value: {:.6f}'.format(score))

## print("Predict ")

## test_prediction = gbm.predict(xgb.DMatrix(test[features]), ntree_limit=gbm.best_iteration+1) return {

'loss': score,

'status': STATUS_OK,

'stats_running': STATUS_RUNNING

}

def optimize(

#trials,

random_state=SEED):

## This is the optimization function that given a space (space here) of

## hyperparameters and a scoring function (score here), finds the best hyperparameters.

space = {

hyip

'n_estimators': hp.quniform('n_estimators', 20, 60, 1),

'eta': hp.quniform('eta', 0.02, 0.4, 0.02),

'max_depth': hp.choice('max_depth', np.arange(1, 20, dtype=int)),

'min_child_weight': hp.quniform('min_child_weight', 1, 6, 1),

'subsample': hp.quniform('subsample', 0.5, 1, 0.05),

'gamma': hp.quniform('gamma', 0.5, 1, 0.05),

'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1, 0.05),

jugate'eval_metric': 'rm',

'objective': 'reg:linear',

'nthread': 4,

维克多英语听力下载

'booster': 'gbtree',

'tree_method': 'exact',

'silent': 1,

'ed': random_state

}

print("---------开始训练参数----------")

best = fmin(model_run, space, algo=tpe.suggest, max_evals=2000)

##print("---------------"+best+"-----------")

##获取最优的参数

best_params = space_eval(space, best)

print("BEST PARAMETERS: " + str(best_params))

return best_params

##定义计分函数

def get_score(pre,real):

temp=[]

pre_t=[]

real_t=[]

und().astype(int)

for i in range(60):

for j in range(4):

pre_t.append(pre[1320*j+22*i:1320*j+22*(i+1)])

real_t.append(real[1320*j+22*i:1320*j+22*(i+1)])

temp.append(((mean_squared_error(pre_t,real_t))**0.5)/np.mean(real_t))

理由的英文return sum(temp)/60

print("---------开始优化参数----------")

best_params=optimize()

#print(test_prediction)

print("---------优化完成----------")

print(best_params)

##训练模型

print(best_params)

第一名英文print("---------正式训练模型----------")

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]

model_gbm = ain(best_params, dtrain, 180, evals=watchlist,early_stopping_rounds=50,verbo_eval=True)

print("---------正式预测模型----------")

print("Predict ")

test_prediction = model_gbm.predict(xgb.DMatrix(data.iloc[1320*20:1320*24,:].values), ntree_limit=model_gbm.best_iteration+1)

print("---------预测完成----------")

print(test_prediction)

print("---------预测完成----------")

print(best_params)

print(test_prediction.shape)

test_prediction=und().astype(int)

f = open('C:\\', 'w')

山木培训课程

total = 0

for id in range(1320*4):

str1 =str(test_prediction[total])

str1 += '\n'

total += 1

f.write(str1)

f.clo()

print("持久化完成")

test_prediction1=model_gbm.predict(xgb.DMatrix(test_x), ntree_limit=model_gbm.best_iteration+1)少儿篮球培训

test_prediction1=und().astype(int)

score =get_score(test_y, test_prediction1)

print(1-score)

效果虽然不咋地，但是停供了⼀个⽐较简单的思路，排名400+，⼤佬们可以在这个的基础上进⾏改进，也许可以提⾼很多，本⼈渣渣代码

本文发布于:2023-05-20 08:22:28，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/90/115567.html

上一篇：二手车交易价格预测：建模调参

下一篇：mothur操作流程命令

标签：参数训练数据思路培训预测

留言与评论（共有 0 条评论）