Summary
As a quick summary, what I am going to do is take historical results (last 4 years, 2012-2015 data) to calculate the number of goals teams score and concede. These averages are compared to the league average and used to create values for attacking strength and defensive strength for every team, which are then turned into goal expectation figures. This metric is put into a Poisson Distribution formula which works out the probability of every result when two teams face each other and a probability matrix is created which can further be used to calculate draw probability and win/lose probability of both teams.
Loading Packages
library(dplyr)
Attaching package: <U+393C><U+3E31>dplyr<U+393C><U+3E32>
The following objects are masked from <U+393C><U+3E31>package:stats<U+393C><U+3E32>:
filter, lag
The following objects are masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:
intersect, setdiff, setequal, union
Home Team Data Grouping
data_hf<-main_data %>% group_by(home) %>% summarise(hgoal=sum(hgoal))
data_ha<-main_data %>% group_by(home) %>% summarise(vgoal=sum(vgoal))
data_h_matches<-main_data %>% group_by(home) %>% summarise(vgoal=n())
Home Team Table
home_table<-cbind(data_hf[,1],data_h_matches[,2],data_hf[,2],data_ha[,2]) %>% as.data.frame()
mean_goals_hf_af<-cbind(home_table[,3]/home_table[,2],home_table[,4]/home_table[,2]) %>% as.data.frame()
main_home_table<-cbind(home_table,mean_goals_hf_af) %>% as.data.frame()
names_for_home_data<-c("Club","Matches Played","Goals for","Goals against","Mean goals for","Mean goals against")
names(main_home_table)<-names_for_home_data
main_home_table
#average_home_values<-colMeans(main_home_table[,2:6])
Visitor Team Data Grouping
data_af<-main_data %>% group_by(visitor) %>% summarise(vgoal=sum(vgoal))
data_aa<-main_data %>% group_by(visitor) %>% summarise(hgoal=sum(hgoal))
data_a_matches<-main_data %>% group_by(visitor) %>% summarise(vgoal=n())
Visitor Team Table
visitor_table<-cbind(data_ha[,1],data_a_matches[,2],data_af[,2],data_aa[,2]) %>% as.data.frame()
mean_goals_af_aa<-cbind(visitor_table[,3]/visitor_table[,2],visitor_table[,4]/visitor_table[,2]) %>% as.data.frame()
main_visitor_table<-cbind(visitor_table,mean_goals_af_aa) %>% as.data.frame()
names_for_visitor_data<-c("Club","Matches Played","Goals for","Goals against","Mean goals for","Mean goals against")
names(main_visitor_table)<-names_for_visitor_data
main_visitor_table
#average_visitor_values<-colMeans(main_visitor_table[,2:6])
Attacking - Defensive Strength Variable Creation
average_home_values<-colMeans(main_home_table[,2:6])
average_visitor_values<-colMeans(main_visitor_table[,2:6])
attacking_str_home<-round(main_home_table[,5]/average_home_values[[4]],2)
defensive_str_home<-round(main_home_table[,6]/average_home_values[[5]],2)
attacking_str_visitor<-round(main_visitor_table[,5]/average_visitor_values[[4]],2)
defensive_str_visitor<-round(main_visitor_table[,6]/average_visitor_values[[5]],2)
Attacking - Defensive Strength Table
attack_def_table<-cbind(main_home_table[,1],attacking_str_home,defensive_str_home,attacking_str_visitor,defensive_str_visitor) %>% as.data.frame()
names(attack_def_table)<-c("Club","Home Attacking Strength","Home Defensive Strength","Visitor Attacking Strength","Visitor Defensive Strength")
attack_def_table
Desired Teams selected and goal expectancy metrics calculated for both teams
Home_Team<-"Bayern Munchen"
Visitor_Team<-"Borussia Dortmund"
Home_Team_GE_Matrix<-as.numeric(as.character(attack_def_table[attack_def_table$Club==Home_Team,2]))*as.numeric(as.character(attack_def_table[attack_def_table$Club==Visitor_Team,5]))*average_home_values[[4]]
Visitor_Team_GE_Matrix<-as.numeric(as.character(attack_def_table[attack_def_table$Club==Visitor_Team,4]))*as.numeric(as.character(attack_def_table[attack_def_table$Club==Home_Team,3]))*average_visitor_values[[4]]
Probability Matrix 0-10 Goals (Rows denote Home Team Goals and columns denote Visitor Team Goals)
prob_matrix<-data.frame()
for (i in 0:10 ){ # i= Home Team Goals
for (j in 0:10){ # j= Visitor Team Goals
prob_matrix[i+1,j+1]<-round(dpois(i,Home_Team_GE_Matrix)*dpois(j,Visitor_Team_GE_Matrix)*100,2)
}
}
row.names(prob_matrix)<-0:10
colnames(prob_matrix)<-0:10
prob_matrix
LS0tDQp0aXRsZTogIkZvb3RiYWxsIEdvYWxzIFByZWRpY3Rpb24gTW9kZWwiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCg0KI1N1bW1hcnkNCiMjIyNBcyBhIHF1aWNrIHN1bW1hcnksIHdoYXQgSSBhbSBnb2luZyB0byBkbyBpcyB0YWtlIGhpc3RvcmljYWwgcmVzdWx0cyAobGFzdCA0IHllYXJzLCAyMDEyLTIwMTUgZGF0YSkgdG8gY2FsY3VsYXRlIHRoZSBudW1iZXIgb2YgZ29hbHMgdGVhbXMgc2NvcmUgYW5kIGNvbmNlZGUuIFRoZXNlIGF2ZXJhZ2VzIGFyZSBjb21wYXJlZCB0byB0aGUgbGVhZ3VlIGF2ZXJhZ2UgYW5kIHVzZWQgdG8gY3JlYXRlIHZhbHVlcyBmb3IgYXR0YWNraW5nIHN0cmVuZ3RoIGFuZCBkZWZlbnNpdmUgc3RyZW5ndGggZm9yIGV2ZXJ5IHRlYW0sIHdoaWNoIGFyZSB0aGVuIHR1cm5lZCBpbnRvIGdvYWwgZXhwZWN0YXRpb24gZmlndXJlcy4gVGhpcyBtZXRyaWMgaXMgcHV0IGludG8gYSBQb2lzc29uIERpc3RyaWJ1dGlvbiBmb3JtdWxhIHdoaWNoIHdvcmtzIG91dCB0aGUgcHJvYmFiaWxpdHkgb2YgZXZlcnkgcmVzdWx0IHdoZW4gdHdvIHRlYW1zIGZhY2UgZWFjaCBvdGhlciBhbmQgYSBwcm9iYWJpbGl0eSBtYXRyaXggaXMgY3JlYXRlZCB3aGljaCBjYW4gZnVydGhlciBiZSB1c2VkIHRvIGNhbGN1bGF0ZSBkcmF3IHByb2JhYmlsaXR5IGFuZCB3aW4vbG9zZSBwcm9iYWJpbGl0eSBvZiBib3RoIHRlYW1zLg0KDQojIyNMb2FkaW5nIFBhY2thZ2VzDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQpgYGANCg0KDQojIyNEYXRhIEV4dHJhY3Rpb24NCmBgYHtyfQ0Kc2FtcGxlX2RhdGE8LWxvYWQoIkM6XFxVc2Vyc1xcYW5zaGlcXERvd25sb2Fkc1xcZ2VybWFueS5yZGEiKSAgI2Zyb20gZ2l0aHViIGh0dHBzOi8vZ2l0aHViLmNvbS9qYWxhcGljL2VuZ3NvY2NlcmRhdGEvYmxvYi9tYXN0ZXIvZGF0YS9nZXJtYW55LnJkYQ0KDQpnZXJtYW55X2RhdGE8LWFzLmRhdGEuZnJhbWUoZ2VybWFueSkgICAgIyMjIyMjIyMgZGF0YSB3aXRoIDE2MTIwIHJvd3MgZnJvbSAxOTYzLTIwMTYNCg0KbWFpbl9kYXRhPC1nZXJtYW55X2RhdGEgJT4lIGZpbHRlcihTZWFzb24+MjAxMSkgICAjIGRhdGEgd2l0aCAxMjI0IHJvd3MgMjAxMi0yMDE1DQptYWluX2RhdGENCmBgYA0KDQojIyNIb21lIFRlYW0gRGF0YSBHcm91cGluZw0KYGBge3J9DQpkYXRhX2hmPC1tYWluX2RhdGEgJT4lIGdyb3VwX2J5KGhvbWUpICU+JSBzdW1tYXJpc2UoaGdvYWw9c3VtKGhnb2FsKSkNCmRhdGFfaGE8LW1haW5fZGF0YSAlPiUgZ3JvdXBfYnkoaG9tZSkgJT4lIHN1bW1hcmlzZSh2Z29hbD1zdW0odmdvYWwpKQ0KZGF0YV9oX21hdGNoZXM8LW1haW5fZGF0YSAlPiUgZ3JvdXBfYnkoaG9tZSkgJT4lIHN1bW1hcmlzZSh2Z29hbD1uKCkpDQpgYGANCg0KIyMjSG9tZSBUZWFtIFRhYmxlDQpgYGB7cn0NCg0KaG9tZV90YWJsZTwtY2JpbmQoZGF0YV9oZlssMV0sZGF0YV9oX21hdGNoZXNbLDJdLGRhdGFfaGZbLDJdLGRhdGFfaGFbLDJdKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQoNCm1lYW5fZ29hbHNfaGZfYWY8LWNiaW5kKGhvbWVfdGFibGVbLDNdL2hvbWVfdGFibGVbLDJdLGhvbWVfdGFibGVbLDRdL2hvbWVfdGFibGVbLDJdKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQptYWluX2hvbWVfdGFibGU8LWNiaW5kKGhvbWVfdGFibGUsbWVhbl9nb2Fsc19oZl9hZikgJT4lIGFzLmRhdGEuZnJhbWUoKQ0KDQpuYW1lc19mb3JfaG9tZV9kYXRhPC1jKCJDbHViIiwiTWF0Y2hlcyBQbGF5ZWQiLCJHb2FscyBmb3IiLCJHb2FscyBhZ2FpbnN0IiwiTWVhbiBnb2FscyBmb3IiLCJNZWFuIGdvYWxzIGFnYWluc3QiKQ0KbmFtZXMobWFpbl9ob21lX3RhYmxlKTwtbmFtZXNfZm9yX2hvbWVfZGF0YQ0KDQptYWluX2hvbWVfdGFibGUNCg0KDQpgYGANCg0KIyMjSG9tZSBUZWFtIFRhYmxlIHdpdGggVG90YWwgYW5kIEF2ZXJhZ2UgZmlndXJlcyBhdCBlbmQNCmBgYHtyfQ0KbWFpbl9ob21lX3RhYmxlX3RvdDwtcmJpbmQobWFpbl9ob21lX3RhYmxlLGMoIlRvdGFsIixjb2xTdW1zKG1haW5faG9tZV90YWJsZVssMjo2XSkpLGMoIkF2ZXJhZ2UiLGFzLm51bWVyaWMoY29sTWVhbnMobWFpbl9ob21lX3RhYmxlWywyOjZdKSkpKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQptYWluX2hvbWVfdGFibGVfdG90DQpgYGANCg0KDQoNCiMjI1Zpc2l0b3IgVGVhbSBEYXRhIEdyb3VwaW5nDQpgYGB7cn0NCg0KZGF0YV9hZjwtbWFpbl9kYXRhICU+JSBncm91cF9ieSh2aXNpdG9yKSAlPiUgc3VtbWFyaXNlKHZnb2FsPXN1bSh2Z29hbCkpDQpkYXRhX2FhPC1tYWluX2RhdGEgJT4lIGdyb3VwX2J5KHZpc2l0b3IpICU+JSBzdW1tYXJpc2UoaGdvYWw9c3VtKGhnb2FsKSkNCmRhdGFfYV9tYXRjaGVzPC1tYWluX2RhdGEgJT4lIGdyb3VwX2J5KHZpc2l0b3IpICU+JSBzdW1tYXJpc2UodmdvYWw9bigpKQ0KDQpgYGANCg0KIyMjVmlzaXRvciBUZWFtIFRhYmxlDQpgYGB7cn0NCg0KdmlzaXRvcl90YWJsZTwtY2JpbmQoZGF0YV9oYVssMV0sZGF0YV9hX21hdGNoZXNbLDJdLGRhdGFfYWZbLDJdLGRhdGFfYWFbLDJdKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQoNCm1lYW5fZ29hbHNfYWZfYWE8LWNiaW5kKHZpc2l0b3JfdGFibGVbLDNdL3Zpc2l0b3JfdGFibGVbLDJdLHZpc2l0b3JfdGFibGVbLDRdL3Zpc2l0b3JfdGFibGVbLDJdKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQptYWluX3Zpc2l0b3JfdGFibGU8LWNiaW5kKHZpc2l0b3JfdGFibGUsbWVhbl9nb2Fsc19hZl9hYSkgJT4lIGFzLmRhdGEuZnJhbWUoKQ0KDQpuYW1lc19mb3JfdmlzaXRvcl9kYXRhPC1jKCJDbHViIiwiTWF0Y2hlcyBQbGF5ZWQiLCJHb2FscyBmb3IiLCJHb2FscyBhZ2FpbnN0IiwiTWVhbiBnb2FscyBmb3IiLCJNZWFuIGdvYWxzIGFnYWluc3QiKQ0KbmFtZXMobWFpbl92aXNpdG9yX3RhYmxlKTwtbmFtZXNfZm9yX3Zpc2l0b3JfZGF0YQ0KDQptYWluX3Zpc2l0b3JfdGFibGUNCg0KYGBgDQoNCiMjI1Zpc2l0b3IgVGVhbSBUYWJsZSB3aXRoIFRvdGFsIGFuZCBBdmVyYWdlIGZpZ3VyZXMgYXQgZW5kDQpgYGB7cn0NCm1haW5fdmlzaXRvcl90YWJsZV90b3Q8LXJiaW5kKG1haW5fdmlzaXRvcl90YWJsZSxjKCJUb3RhbCIsY29sU3VtcyhtYWluX3Zpc2l0b3JfdGFibGVbLDI6Nl0pKSxjKCJBdmVyYWdlIiwoY29sTWVhbnMobWFpbl92aXNpdG9yX3RhYmxlWywyOjZdKSkpKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQoNCg0KbWFpbl92aXNpdG9yX3RhYmxlX3RvdA0KYGBgDQoNCiMjIyBBdHRhY2tpbmcgLSBEZWZlbnNpdmUgU3RyZW5ndGggVmFyaWFibGUgQ3JlYXRpb24NCmBgYHtyfQ0KYXZlcmFnZV9ob21lX3ZhbHVlczwtY29sTWVhbnMobWFpbl9ob21lX3RhYmxlWywyOjZdKQ0KYXZlcmFnZV92aXNpdG9yX3ZhbHVlczwtY29sTWVhbnMobWFpbl92aXNpdG9yX3RhYmxlWywyOjZdKQ0KDQphdHRhY2tpbmdfc3RyX2hvbWU8LXJvdW5kKG1haW5faG9tZV90YWJsZVssNV0vYXZlcmFnZV9ob21lX3ZhbHVlc1tbNF1dLDIpDQpkZWZlbnNpdmVfc3RyX2hvbWU8LXJvdW5kKG1haW5faG9tZV90YWJsZVssNl0vYXZlcmFnZV9ob21lX3ZhbHVlc1tbNV1dLDIpDQoNCmF0dGFja2luZ19zdHJfdmlzaXRvcjwtcm91bmQobWFpbl92aXNpdG9yX3RhYmxlWyw1XS9hdmVyYWdlX3Zpc2l0b3JfdmFsdWVzW1s0XV0sMikNCmRlZmVuc2l2ZV9zdHJfdmlzaXRvcjwtcm91bmQobWFpbl92aXNpdG9yX3RhYmxlWyw2XS9hdmVyYWdlX3Zpc2l0b3JfdmFsdWVzW1s1XV0sMikNCmBgYA0KDQojIyNBdHRhY2tpbmcgLSBEZWZlbnNpdmUgU3RyZW5ndGggVGFibGUNCg0KYGBge3J9DQphdHRhY2tfZGVmX3RhYmxlPC1jYmluZChtYWluX2hvbWVfdGFibGVbLDFdLGF0dGFja2luZ19zdHJfaG9tZSxkZWZlbnNpdmVfc3RyX2hvbWUsYXR0YWNraW5nX3N0cl92aXNpdG9yLGRlZmVuc2l2ZV9zdHJfdmlzaXRvcikgJT4lIGFzLmRhdGEuZnJhbWUoKQ0KbmFtZXMoYXR0YWNrX2RlZl90YWJsZSk8LWMoIkNsdWIiLCJIb21lIEF0dGFja2luZyBTdHJlbmd0aCIsIkhvbWUgRGVmZW5zaXZlIFN0cmVuZ3RoIiwiVmlzaXRvciBBdHRhY2tpbmcgU3RyZW5ndGgiLCJWaXNpdG9yIERlZmVuc2l2ZSBTdHJlbmd0aCIpDQoNCmF0dGFja19kZWZfdGFibGUNCmBgYA0KDQojIyNEZXNpcmVkIFRlYW1zIHNlbGVjdGVkIGFuZCBnb2FsIGV4cGVjdGFuY3kgbWV0cmljcyBjYWxjdWxhdGVkIGZvciBib3RoIHRlYW1zDQoNCmBgYHtyfQ0KSG9tZV9UZWFtPC0iQmF5ZXJuIE11bmNoZW4iDQpWaXNpdG9yX1RlYW08LSJCb3J1c3NpYSBEb3J0bXVuZCINCg0KDQpIb21lX1RlYW1fR0VfTWF0cml4PC1hcy5udW1lcmljKGFzLmNoYXJhY3RlcihhdHRhY2tfZGVmX3RhYmxlW2F0dGFja19kZWZfdGFibGUkQ2x1Yj09SG9tZV9UZWFtLDJdKSkqYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoYXR0YWNrX2RlZl90YWJsZVthdHRhY2tfZGVmX3RhYmxlJENsdWI9PVZpc2l0b3JfVGVhbSw1XSkpKmF2ZXJhZ2VfaG9tZV92YWx1ZXNbWzRdXSAjSG9tZSBUZWFtIEdvYWwgRXhwZWN0YW5jeT0gSG9tZSBhdHRhY2tpbmcgc3RyZW5ndGggeCBBd2F5IGRlZmVuc2l2ZSBzdHJlbmd0aCB4IEF2ZXJhZ2UgZ29hbHMgaG9tZQ0KDQpWaXNpdG9yX1RlYW1fR0VfTWF0cml4PC1hcy5udW1lcmljKGFzLmNoYXJhY3RlcihhdHRhY2tfZGVmX3RhYmxlW2F0dGFja19kZWZfdGFibGUkQ2x1Yj09VmlzaXRvcl9UZWFtLDRdKSkqYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoYXR0YWNrX2RlZl90YWJsZVthdHRhY2tfZGVmX3RhYmxlJENsdWI9PUhvbWVfVGVhbSwzXSkpKmF2ZXJhZ2VfdmlzaXRvcl92YWx1ZXNbWzRdXSAjQXdheSBUZWFtIEdvYWwgRXhwZWN0YW5jeT0gQXdheSBhdHRhY2tpbmcgc3RyZW5ndGggeCBIb21lIGRlZmVuc2l2ZSBzdHJlbmd0aCB4IEF2ZXJhZ2UgZ29hbHMgYXdheQ0KDQpgYGANCg0KIyMjUHJvYmFiaWxpdHkgTWF0cml4IDAtMTAgR29hbHMgKFJvd3MgZGVub3RlIEhvbWUgVGVhbSBHb2FscyBhbmQgY29sdW1ucyBkZW5vdGUgVmlzaXRvciBUZWFtIEdvYWxzKSANCmBgYHtyfQ0KcHJvYl9tYXRyaXg8LWRhdGEuZnJhbWUoKQ0KDQpmb3IgKGkgaW4gMDoxMCApeyAgICAgICMgaT0gSG9tZSBUZWFtIEdvYWxzDQogIA0KICBmb3IgKGogaW4gMDoxMCl7ICAgICAgIyBqPSBWaXNpdG9yIFRlYW0gR29hbHMNCiAgICANCiAgICBwcm9iX21hdHJpeFtpKzEsaisxXTwtcm91bmQoZHBvaXMoaSxIb21lX1RlYW1fR0VfTWF0cml4KSpkcG9pcyhqLFZpc2l0b3JfVGVhbV9HRV9NYXRyaXgpKjEwMCwyKQ0KICAgIA0KICB9DQp9DQoNCnJvdy5uYW1lcyhwcm9iX21hdHJpeCk8LTA6MTANCmNvbG5hbWVzKHByb2JfbWF0cml4KTwtMDoxMA0KcHJvYl9tYXRyaXgNCmBgYA0KDQo=