import pandas as pd 电信电视机顶盒import os del_lection import train_test_split del_lection import cross_val_score import numpy as np from collections import defaultdict import DecisionTreeClassifier from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import OneHotEncoder # 添加文件路径,路径最好不要出现中文 dataFolder = "’G:\Python36\leagues_NBA_2014_games_games.csv’"; data_filename = os.path.join(dataFolder, "basketball", "leagues_NBA_2014_games_games.csv"); with open(data_filename) as data_filename: #读取数据的同时,将第一二列合并为一列 datat = pd.read_csv(data_filename, par_dates=[[0,1]] 蝴蝶效应1#我们可以通过下面这个方法对数据进行查看 #datat.iloc[:5] #对数据重新命名头部 lumns = [“Date”, “Visitor Team”, “VisitorPts”, “Home Team”, “HomePts”, “OT?”, “Notes”] #对数据进行排序并重新建立索引。当然也可以不进行排序,这个影响并不大。 #sort()排序函数已经在pandas0.20以后被弃用,一旦使用将报错,只能使用sort_values()/sort_index() datat.sort_values(by=["Date"], ascending=True, inplace=True) datat = _index(drop=True) |
#创建(默认)字典,用于存储球队上次比赛的结果。该语句作用为当字典内不存在查找的key时,返回括号内(factory_function)类型的默认值;int默认值0 won_last = defaultdict(bool) #新增一列HomeWin,该列判断对应的数据是否主场胜利。将是否主场胜利作为预测结果 datat[“HomeWin”] = datat[“VisitorPts”] < datat[“HomePts”] 海参的种类y_true = datat[“HomeWin”].values #创建两个能帮助我们进行预测的特征,分别是这两支队伍上场比赛的胜负情况。赢得上场比赛,大致可以说明该球队水平较高。 datat["HomeLastWin"] = 0 datat["VisitorLastWin"] = 0 for index, row in datat.iterrows(): home_team = row["Home Team"] visitor_team = row["Visitor Team"] row["HomeLastWin"] = won_last[home_team] row["VisitorLastWin"] = won_last[visitor_team] datat.iloc[index] = row 母系遗传#用当前比赛(遍历到的那一行数据所表示的比赛)的结果更新两支球队上场比赛的获胜情况,以便下次再遍历到这两支球队时使用。 won_last[home_team] = row["HomeWin"] won_last[visitor_team] = not row["HomeWin"] |
#我们再次设定random_state的值为14 clf = DecisionTreeClassifier(random_state=14) X_previouswins = datat[["HomeLastWin", "VisitorLastWin"]].values # 交叉检验 #X_train, X_test, y_train, y_test = train_test_split(X_previouswins, y_true, random_state=14) #clf.fit(X_train,y_train) #将数据导入分类器,并通过交叉检验。 scores = cross_val_score(clf, X_previouswins, y_true, scoring='accuracy',cv=3) print(“初次使用决策树Accuracy: {0:.1f}%".an(scores) * 100)) |
standings_filename = os.path.join(dataFolder, "basketball","leagues_NBA_2013_standings_expanded-standings.csv"); with open (standings_filename) as standings_filename: standings = pd.read_csv(standings_filename,skiprows=[0,]); datat["HomeTeamRanksHigher"] = 0 for index, row in datat.iterrows(): home_team = row["Home Team"] visitor_team = row["Visitor Team"] if home_team == "New Orleans Pelicans": home_team = "New Orleans Hornets" elif visitor_team == "New Orleans Pelicans": visitor_team = "New Orleans Hornets" # ries搜索,[]里面可以通过条件选择出对应行主场排名 home_rank = standings[standings["Team"] ==home_team]["Rk"].values[0] # 客场排名 visitor_rank = standings[standings["Team"] ==visitor_team]["Rk"].values[0] # 判断一个队伍在主场获胜的排名和客场获胜的排名 row["HomeTeamRanksHigher"] = bool(home_rank > visitor_rank) 辛胜秦国 datat.iloc[index] = row |
X_homehigher = datat[["HomeLastWin","VisitorLastWin","HomeTeamRanksHigher"]].values # 建立决策树 clf = DecisionTreeClassifier(random_state=14) scores = cross_val_score(clf, X_homehigher, y_true,scoring='accuracy',cv=3) print("增加新特征值Accuracy: {0:.1f}%".an(scores) * 100)) |
#创建字典,保存上场比赛的获胜队伍,在数据框中建立新特征。 杭州美食介绍last_match_winner = defaultdict(int) datat["HomeTeamWonLast"] = 0 for index, row in datat.iterrows(): home_team = row["Home Team"] visitor_team = row["Visitor Team"] teams = tuple(sorted([home_team, visitor_team])) row["HomeTeamWonLast"] = 1 if last_match_winner[teams] == row["Home Team"] el 0 datat.iloc[index] = row winner = row["Home Team"] if row["HomeWin"] el row["Visitor Team"] last_match_winner[teams] = winner X_lastwinner = datat[["HomeLastWin", "VisitorLastWin", "HomeTeamRanksHigher", "HomeTeamWonLast"]].values clf = DecisionTreeClassifier(random_state=14) scores = cross_val_score(clf, X_lastwinner, y_true, scoring='accuracy', cv=3) print("再次增加特征Accuracy: {0:.1f}%".an(scores) * 100)) |
encoding = LabelEncoder() t=encoding.fit(datat["Home Team"].values) home_teams = t.transform(datat["Home Team"].values) visitor_teams = t.transform(datat["Visitor Team"].values) X_teams = np.vstack([home_teams, visitor_teams]).T onehot = OneHotEncoder(categories='auto') k=onehot.fit(X_teams) X_teams_expanded = k.transform(X_teams).toden() # 创建决策树对象 clf = DecisionTreeClassifier(random_state=14) scores = cross_val_score(clf, X_teams_expanded, y_true,scoring='accuracy',cv=3) 花椰菜print("整合新数据Accuracy: {0:.1f}%".an(scores) * 100)) |
本文发布于:2023-08-02 18:53:12,感谢您对本站的认可!
本文链接:https://www.wtabcd.cn/fanwen/fan/89/1105759.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |