forked from kordjamshidi/CMPS-3240-6240-Fall2017
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata reader
113 lines (94 loc) · 4.97 KB
/
data reader
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import pandas as pd
data = pd.read_csv("/Users/tplusd/Desktop/2016-2017.csv")
def result (row):
if row['FTHG'] > row['FTAG']:
return 'Home Win'
if row['FTHG'] < row['FTAG']:
return 'Home Loss'
if row['FTHG'] == row['FTAG']:
return 'Draw'
data['Result']=data.apply(lambda row:result (row),axis=1)
def result2 (row):
if row['FTHG'] - row['FTAG'] == 1:
return 'One goal win'
if row['FTHG'] - row['FTAG'] == 2:
return 'Two goals win'
if row['FTHG'] - row['FTAG'] >= 3:
return 'Three or more goals win'
if row['FTHG'] - row['FTAG'] == -1:
return 'One goal loss'
if row['FTHG'] - row['FTAG'] == -2:
return 'Two goals loss'
if row['FTHG'] - row['FTAG'] <= -3:
return 'Three or more goals loss'
if row['FTHG'] == row['FTAG']:
return 'Draw'
data['Advanced result']=data.apply(lambda row:result2 (row),axis=1)
def Home_point (row):
if row['Result'] =='Home Loss':
return 0
if row['Result'] =='Home Win':
return 3
if row['Result'] =='Draw':
return 1
data['Home Point']=data.apply(lambda row:Home_point (row),axis=1)
def Away_point (row):
if row['Result'] =='Home Loss':
return 3
if row['Result'] =='Home Win':
return 0
if row['Result'] =='Draw':
return 1
data['Away Point']=data.apply(lambda row:Away_point (row),axis=1)
data['Round']=""
for i in range(0,len(data)):
data.loc[i,'Round'] = int(i/10)+1
data['Home Streak']=""
data['Away Streak']=""
data['Advanced result']=""
for i in range(0,len(data)):
data.loc[i,'Home Streak']=0
data.loc[i,'Away Streak']=0
for i in range(70,len(data)):
for j in range(0,i-1):
if data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'HomeTeam'] == data.loc[j,'HomeTeam'] and data.loc[j,'Result']=='Home Loss':
data.loc[i,'Home Streak'] += 0
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'HomeTeam'] == data.loc[j,'HomeTeam'] and data.loc[j,'Result']=='Home Win':
data.loc[i,'Home Streak'] += 3
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'HomeTeam'] == data.loc[j,'HomeTeam'] and data.loc[j,'Result']=='Draw':
data.loc[i,'Home Streak'] += 1
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'HomeTeam'] == data.loc[j,'AwayTeam'] and data.loc[j,'Result']=='Home Loss':
data.loc[i,'Home Streak'] += 3
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'HomeTeam'] == data.loc[j,'AwayTeam'] and data.loc[j,'Result']=='Home Win':
data.loc[i,'Home Streak'] += 0
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'HomeTeam'] == data.loc[j,'AwayTeam'] and data.loc[j,'Result']=='Draw':
data.loc[i,'Home Streak'] += 1
else:
data.loc[i,'Home Streak'] += 0
for i in range(70,len(data)):
for j in range(0,i-1):
if data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'AwayTeam'] == data.loc[j,'HomeTeam'] and data.loc[j,'Result']=='Home Loss':
data.loc[i,'Away Streak'] += 0
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'AwayTeam'] == data.loc[j,'HomeTeam'] and data.loc[j,'Result']=='Home Win':
data.loc[i,'Away Streak'] += 3
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'AwayTeam'] == data.loc[j,'HomeTeam'] and data.loc[j,'Result']=='Draw':
data.loc[i,'Away Streak'] += 1
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'AwayTeam'] == data.loc[j,'AwayTeam'] and data.loc[j,'Result']=='Home Loss':
data.loc[i,'Away Streak'] += 3
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'AwayTeam'] == data.loc[j,'AwayTeam'] and data.loc[j,'Result']=='Home Win':
data.loc[i,'Away Streak'] += 0
elif data.loc[i,'Round']-data.loc[j,'Round'] <=7 and data.loc[i,'AwayTeam'] == data.loc[j,'AwayTeam'] and data.loc[j,'Result']=='Draw':
data.loc[i,'Away Streak'] += 1
else:
data.loc[i,'Away Streak'] += 0
columns_keep =['HomeTeam','AwayTeam','Result','Home Point','Away Point','Home Streak','Away Streak','Round','Advanced result']
data = data[columns_keep]
rank = pd.read_csv("/Users/tplusd/Desktop/2016-2017 ranking.csv")
rank.rename(columns={'Unnamed: 0':'Team'}, inplace=True )
join1 = pd.merge(data,rank, how='left', left_on=['HomeTeam'], right_on=['Team'])
join2 = pd.merge(join1,rank,how='left',left_on=['AwayTeam'],right_on=['Team'])
join2.rename(columns={'\tELO rank_x':'Home ELO rank'}, inplace=True )
join2.rename(columns={'\tELO rank_y':'Away ELO rank'}, inplace=True )
columns_keep =['HomeTeam','AwayTeam','Home ELO rank','Away ELO rank','Result','Home Point','Away Point','Home Streak','Away Streak','Round','Advanced result']
join2 = join2[columns_keep]
join2