Summary

As a quick summary, what I am going to do is take historical results (last 4 years, 2012-2015 data) to calculate the number of goals teams score and concede. These averages are compared to the league average and used to create values for attacking strength and defensive strength for every team, which are then turned into goal expectation figures. This metric is put into a Poisson Distribution formula which works out the probability of every result when two teams face each other and a probability matrix is created which can further be used to calculate draw probability and win/lose probability of both teams.

Loading Packages

library(dplyr)

Attaching package: <U+393C><U+3E31>dplyr<U+393C><U+3E32>

The following objects are masked from <U+393C><U+3E31>package:stats<U+393C><U+3E32>:

    filter, lag

The following objects are masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:

    intersect, setdiff, setequal, union

Data Extraction

sample_data<-load("C:\\Users\\anshi\\Downloads\\germany.rda")  #from github https://github.com/jalapic/engsoccerdata/blob/master/data/germany.rda
germany_data<-as.data.frame(germany)    ######## data with 16120 rows from 1963-2016
main_data<-germany_data %>% filter(Season>2011)   # data with 1224 rows 2012-2015
main_data

Home Team Data Grouping

data_hf<-main_data %>% group_by(home) %>% summarise(hgoal=sum(hgoal))
data_ha<-main_data %>% group_by(home) %>% summarise(vgoal=sum(vgoal))
data_h_matches<-main_data %>% group_by(home) %>% summarise(vgoal=n())

Home Team Table

home_table<-cbind(data_hf[,1],data_h_matches[,2],data_hf[,2],data_ha[,2]) %>% as.data.frame()
mean_goals_hf_af<-cbind(home_table[,3]/home_table[,2],home_table[,4]/home_table[,2]) %>% as.data.frame()
main_home_table<-cbind(home_table,mean_goals_hf_af) %>% as.data.frame()
names_for_home_data<-c("Club","Matches Played","Goals for","Goals against","Mean goals for","Mean goals against")
names(main_home_table)<-names_for_home_data
main_home_table
#average_home_values<-colMeans(main_home_table[,2:6])

Home Team Table with Total and Average figures at end

main_home_table_tot<-rbind(main_home_table,c("Total",colSums(main_home_table[,2:6])),c("Average",as.numeric(colMeans(main_home_table[,2:6])))) %>% as.data.frame()
main_home_table_tot

Visitor Team Data Grouping

data_af<-main_data %>% group_by(visitor) %>% summarise(vgoal=sum(vgoal))
data_aa<-main_data %>% group_by(visitor) %>% summarise(hgoal=sum(hgoal))
data_a_matches<-main_data %>% group_by(visitor) %>% summarise(vgoal=n())

Visitor Team Table

visitor_table<-cbind(data_ha[,1],data_a_matches[,2],data_af[,2],data_aa[,2]) %>% as.data.frame()
mean_goals_af_aa<-cbind(visitor_table[,3]/visitor_table[,2],visitor_table[,4]/visitor_table[,2]) %>% as.data.frame()
main_visitor_table<-cbind(visitor_table,mean_goals_af_aa) %>% as.data.frame()
names_for_visitor_data<-c("Club","Matches Played","Goals for","Goals against","Mean goals for","Mean goals against")
names(main_visitor_table)<-names_for_visitor_data
main_visitor_table
#average_visitor_values<-colMeans(main_visitor_table[,2:6])

Visitor Team Table with Total and Average figures at end

main_visitor_table_tot<-rbind(main_visitor_table,c("Total",colSums(main_visitor_table[,2:6])),c("Average",(colMeans(main_visitor_table[,2:6])))) %>% as.data.frame()
main_visitor_table_tot

Attacking - Defensive Strength Variable Creation

average_home_values<-colMeans(main_home_table[,2:6])
average_visitor_values<-colMeans(main_visitor_table[,2:6])
attacking_str_home<-round(main_home_table[,5]/average_home_values[[4]],2)
defensive_str_home<-round(main_home_table[,6]/average_home_values[[5]],2)
attacking_str_visitor<-round(main_visitor_table[,5]/average_visitor_values[[4]],2)
defensive_str_visitor<-round(main_visitor_table[,6]/average_visitor_values[[5]],2)

Attacking - Defensive Strength Table

attack_def_table<-cbind(main_home_table[,1],attacking_str_home,defensive_str_home,attacking_str_visitor,defensive_str_visitor) %>% as.data.frame()
names(attack_def_table)<-c("Club","Home Attacking Strength","Home Defensive Strength","Visitor Attacking Strength","Visitor Defensive Strength")
attack_def_table

Desired Teams selected and goal expectancy metrics calculated for both teams

Home_Team<-"Bayern Munchen"
Visitor_Team<-"Borussia Dortmund"
Home_Team_GE_Matrix<-as.numeric(as.character(attack_def_table[attack_def_table$Club==Home_Team,2]))*as.numeric(as.character(attack_def_table[attack_def_table$Club==Visitor_Team,5]))*average_home_values[[4]]
Visitor_Team_GE_Matrix<-as.numeric(as.character(attack_def_table[attack_def_table$Club==Visitor_Team,4]))*as.numeric(as.character(attack_def_table[attack_def_table$Club==Home_Team,3]))*average_visitor_values[[4]]

Probability Matrix 0-10 Goals (Rows denote Home Team Goals and columns denote Visitor Team Goals)

prob_matrix<-data.frame()
for (i in 0:10 ){      # i= Home Team Goals
  
  for (j in 0:10){      # j= Visitor Team Goals
    
    prob_matrix[i+1,j+1]<-round(dpois(i,Home_Team_GE_Matrix)*dpois(j,Visitor_Team_GE_Matrix)*100,2)
    
  }
}
row.names(prob_matrix)<-0:10
colnames(prob_matrix)<-0:10
prob_matrix
LS0tDQp0aXRsZTogIkZvb3RiYWxsIEdvYWxzIFByZWRpY3Rpb24gTW9kZWwiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCg0KI1N1bW1hcnkNCiMjIyNBcyBhIHF1aWNrIHN1bW1hcnksIHdoYXQgSSBhbSBnb2luZyB0byBkbyBpcyB0YWtlIGhpc3RvcmljYWwgcmVzdWx0cyAobGFzdCA0IHllYXJzLCAyMDEyLTIwMTUgZGF0YSkgdG8gY2FsY3VsYXRlIHRoZSBudW1iZXIgb2YgZ29hbHMgdGVhbXMgc2NvcmUgYW5kIGNvbmNlZGUuIFRoZXNlIGF2ZXJhZ2VzIGFyZSBjb21wYXJlZCB0byB0aGUgbGVhZ3VlIGF2ZXJhZ2UgYW5kIHVzZWQgdG8gY3JlYXRlIHZhbHVlcyBmb3IgYXR0YWNraW5nIHN0cmVuZ3RoIGFuZCBkZWZlbnNpdmUgc3RyZW5ndGggZm9yIGV2ZXJ5IHRlYW0sIHdoaWNoIGFyZSB0aGVuIHR1cm5lZCBpbnRvIGdvYWwgZXhwZWN0YXRpb24gZmlndXJlcy4gVGhpcyBtZXRyaWMgaXMgcHV0IGludG8gYSBQb2lzc29uIERpc3RyaWJ1dGlvbiBmb3JtdWxhIHdoaWNoIHdvcmtzIG91dCB0aGUgcHJvYmFiaWxpdHkgb2YgZXZlcnkgcmVzdWx0IHdoZW4gdHdvIHRlYW1zIGZhY2UgZWFjaCBvdGhlciBhbmQgYSBwcm9iYWJpbGl0eSBtYXRyaXggaXMgY3JlYXRlZCB3aGljaCBjYW4gZnVydGhlciBiZSB1c2VkIHRvIGNhbGN1bGF0ZSBkcmF3IHByb2JhYmlsaXR5IGFuZCB3aW4vbG9zZSBwcm9iYWJpbGl0eSBvZiBib3RoIHRlYW1zLg0KDQojIyNMb2FkaW5nIFBhY2thZ2VzDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQpgYGANCg0KDQojIyNEYXRhIEV4dHJhY3Rpb24NCmBgYHtyfQ0Kc2FtcGxlX2RhdGE8LWxvYWQoIkM6XFxVc2Vyc1xcYW5zaGlcXERvd25sb2Fkc1xcZ2VybWFueS5yZGEiKSAgI2Zyb20gZ2l0aHViIGh0dHBzOi8vZ2l0aHViLmNvbS9qYWxhcGljL2VuZ3NvY2NlcmRhdGEvYmxvYi9tYXN0ZXIvZGF0YS9nZXJtYW55LnJkYQ0KDQpnZXJtYW55X2RhdGE8LWFzLmRhdGEuZnJhbWUoZ2VybWFueSkgICAgIyMjIyMjIyMgZGF0YSB3aXRoIDE2MTIwIHJvd3MgZnJvbSAxOTYzLTIwMTYNCg0KbWFpbl9kYXRhPC1nZXJtYW55X2RhdGEgJT4lIGZpbHRlcihTZWFzb24+MjAxMSkgICAjIGRhdGEgd2l0aCAxMjI0IHJvd3MgMjAxMi0yMDE1DQptYWluX2RhdGENCmBgYA0KDQojIyNIb21lIFRlYW0gRGF0YSBHcm91cGluZw0KYGBge3J9DQpkYXRhX2hmPC1tYWluX2RhdGEgJT4lIGdyb3VwX2J5KGhvbWUpICU+JSBzdW1tYXJpc2UoaGdvYWw9c3VtKGhnb2FsKSkNCmRhdGFfaGE8LW1haW5fZGF0YSAlPiUgZ3JvdXBfYnkoaG9tZSkgJT4lIHN1bW1hcmlzZSh2Z29hbD1zdW0odmdvYWwpKQ0KZGF0YV9oX21hdGNoZXM8LW1haW5fZGF0YSAlPiUgZ3JvdXBfYnkoaG9tZSkgJT4lIHN1bW1hcmlzZSh2Z29hbD1uKCkpDQpgYGANCg0KIyMjSG9tZSBUZWFtIFRhYmxlDQpgYGB7cn0NCg0KaG9tZV90YWJsZTwtY2JpbmQoZGF0YV9oZlssMV0sZGF0YV9oX21hdGNoZXNbLDJdLGRhdGFfaGZbLDJdLGRhdGFfaGFbLDJdKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQoNCm1lYW5fZ29hbHNfaGZfYWY8LWNiaW5kKGhvbWVfdGFibGVbLDNdL2hvbWVfdGFibGVbLDJdLGhvbWVfdGFibGVbLDRdL2hvbWVfdGFibGVbLDJdKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQptYWluX2hvbWVfdGFibGU8LWNiaW5kKGhvbWVfdGFibGUsbWVhbl9nb2Fsc19oZl9hZikgJT4lIGFzLmRhdGEuZnJhbWUoKQ0KDQpuYW1lc19mb3JfaG9tZV9kYXRhPC1jKCJDbHViIiwiTWF0Y2hlcyBQbGF5ZWQiLCJHb2FscyBmb3IiLCJHb2FscyBhZ2FpbnN0IiwiTWVhbiBnb2FscyBmb3IiLCJNZWFuIGdvYWxzIGFnYWluc3QiKQ0KbmFtZXMobWFpbl9ob21lX3RhYmxlKTwtbmFtZXNfZm9yX2hvbWVfZGF0YQ0KDQptYWluX2hvbWVfdGFibGUNCg0KDQpgYGANCg0KIyMjSG9tZSBUZWFtIFRhYmxlIHdpdGggVG90YWwgYW5kIEF2ZXJhZ2UgZmlndXJlcyBhdCBlbmQNCmBgYHtyfQ0KbWFpbl9ob21lX3RhYmxlX3RvdDwtcmJpbmQobWFpbl9ob21lX3RhYmxlLGMoIlRvdGFsIixjb2xTdW1zKG1haW5faG9tZV90YWJsZVssMjo2XSkpLGMoIkF2ZXJhZ2UiLGFzLm51bWVyaWMoY29sTWVhbnMobWFpbl9ob21lX3RhYmxlWywyOjZdKSkpKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQptYWluX2hvbWVfdGFibGVfdG90DQpgYGANCg0KDQoNCiMjI1Zpc2l0b3IgVGVhbSBEYXRhIEdyb3VwaW5nDQpgYGB7cn0NCg0KZGF0YV9hZjwtbWFpbl9kYXRhICU+JSBncm91cF9ieSh2aXNpdG9yKSAlPiUgc3VtbWFyaXNlKHZnb2FsPXN1bSh2Z29hbCkpDQpkYXRhX2FhPC1tYWluX2RhdGEgJT4lIGdyb3VwX2J5KHZpc2l0b3IpICU+JSBzdW1tYXJpc2UoaGdvYWw9c3VtKGhnb2FsKSkNCmRhdGFfYV9tYXRjaGVzPC1tYWluX2RhdGEgJT4lIGdyb3VwX2J5KHZpc2l0b3IpICU+JSBzdW1tYXJpc2UodmdvYWw9bigpKQ0KDQpgYGANCg0KIyMjVmlzaXRvciBUZWFtIFRhYmxlDQpgYGB7cn0NCg0KdmlzaXRvcl90YWJsZTwtY2JpbmQoZGF0YV9oYVssMV0sZGF0YV9hX21hdGNoZXNbLDJdLGRhdGFfYWZbLDJdLGRhdGFfYWFbLDJdKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQoNCm1lYW5fZ29hbHNfYWZfYWE8LWNiaW5kKHZpc2l0b3JfdGFibGVbLDNdL3Zpc2l0b3JfdGFibGVbLDJdLHZpc2l0b3JfdGFibGVbLDRdL3Zpc2l0b3JfdGFibGVbLDJdKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQptYWluX3Zpc2l0b3JfdGFibGU8LWNiaW5kKHZpc2l0b3JfdGFibGUsbWVhbl9nb2Fsc19hZl9hYSkgJT4lIGFzLmRhdGEuZnJhbWUoKQ0KDQpuYW1lc19mb3JfdmlzaXRvcl9kYXRhPC1jKCJDbHViIiwiTWF0Y2hlcyBQbGF5ZWQiLCJHb2FscyBmb3IiLCJHb2FscyBhZ2FpbnN0IiwiTWVhbiBnb2FscyBmb3IiLCJNZWFuIGdvYWxzIGFnYWluc3QiKQ0KbmFtZXMobWFpbl92aXNpdG9yX3RhYmxlKTwtbmFtZXNfZm9yX3Zpc2l0b3JfZGF0YQ0KDQptYWluX3Zpc2l0b3JfdGFibGUNCg0KYGBgDQoNCiMjI1Zpc2l0b3IgVGVhbSBUYWJsZSB3aXRoIFRvdGFsIGFuZCBBdmVyYWdlIGZpZ3VyZXMgYXQgZW5kDQpgYGB7cn0NCm1haW5fdmlzaXRvcl90YWJsZV90b3Q8LXJiaW5kKG1haW5fdmlzaXRvcl90YWJsZSxjKCJUb3RhbCIsY29sU3VtcyhtYWluX3Zpc2l0b3JfdGFibGVbLDI6Nl0pKSxjKCJBdmVyYWdlIiwoY29sTWVhbnMobWFpbl92aXNpdG9yX3RhYmxlWywyOjZdKSkpKSAlPiUgYXMuZGF0YS5mcmFtZSgpDQoNCg0KbWFpbl92aXNpdG9yX3RhYmxlX3RvdA0KYGBgDQoNCiMjIyBBdHRhY2tpbmcgLSBEZWZlbnNpdmUgU3RyZW5ndGggVmFyaWFibGUgQ3JlYXRpb24NCmBgYHtyfQ0KYXZlcmFnZV9ob21lX3ZhbHVlczwtY29sTWVhbnMobWFpbl9ob21lX3RhYmxlWywyOjZdKQ0KYXZlcmFnZV92aXNpdG9yX3ZhbHVlczwtY29sTWVhbnMobWFpbl92aXNpdG9yX3RhYmxlWywyOjZdKQ0KDQphdHRhY2tpbmdfc3RyX2hvbWU8LXJvdW5kKG1haW5faG9tZV90YWJsZVssNV0vYXZlcmFnZV9ob21lX3ZhbHVlc1tbNF1dLDIpDQpkZWZlbnNpdmVfc3RyX2hvbWU8LXJvdW5kKG1haW5faG9tZV90YWJsZVssNl0vYXZlcmFnZV9ob21lX3ZhbHVlc1tbNV1dLDIpDQoNCmF0dGFja2luZ19zdHJfdmlzaXRvcjwtcm91bmQobWFpbl92aXNpdG9yX3RhYmxlWyw1XS9hdmVyYWdlX3Zpc2l0b3JfdmFsdWVzW1s0XV0sMikNCmRlZmVuc2l2ZV9zdHJfdmlzaXRvcjwtcm91bmQobWFpbl92aXNpdG9yX3RhYmxlWyw2XS9hdmVyYWdlX3Zpc2l0b3JfdmFsdWVzW1s1XV0sMikNCmBgYA0KDQojIyNBdHRhY2tpbmcgLSBEZWZlbnNpdmUgU3RyZW5ndGggVGFibGUNCg0KYGBge3J9DQphdHRhY2tfZGVmX3RhYmxlPC1jYmluZChtYWluX2hvbWVfdGFibGVbLDFdLGF0dGFja2luZ19zdHJfaG9tZSxkZWZlbnNpdmVfc3RyX2hvbWUsYXR0YWNraW5nX3N0cl92aXNpdG9yLGRlZmVuc2l2ZV9zdHJfdmlzaXRvcikgJT4lIGFzLmRhdGEuZnJhbWUoKQ0KbmFtZXMoYXR0YWNrX2RlZl90YWJsZSk8LWMoIkNsdWIiLCJIb21lIEF0dGFja2luZyBTdHJlbmd0aCIsIkhvbWUgRGVmZW5zaXZlIFN0cmVuZ3RoIiwiVmlzaXRvciBBdHRhY2tpbmcgU3RyZW5ndGgiLCJWaXNpdG9yIERlZmVuc2l2ZSBTdHJlbmd0aCIpDQoNCmF0dGFja19kZWZfdGFibGUNCmBgYA0KDQojIyNEZXNpcmVkIFRlYW1zIHNlbGVjdGVkIGFuZCBnb2FsIGV4cGVjdGFuY3kgbWV0cmljcyBjYWxjdWxhdGVkIGZvciBib3RoIHRlYW1zDQoNCmBgYHtyfQ0KSG9tZV9UZWFtPC0iQmF5ZXJuIE11bmNoZW4iDQpWaXNpdG9yX1RlYW08LSJCb3J1c3NpYSBEb3J0bXVuZCINCg0KDQpIb21lX1RlYW1fR0VfTWF0cml4PC1hcy5udW1lcmljKGFzLmNoYXJhY3RlcihhdHRhY2tfZGVmX3RhYmxlW2F0dGFja19kZWZfdGFibGUkQ2x1Yj09SG9tZV9UZWFtLDJdKSkqYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoYXR0YWNrX2RlZl90YWJsZVthdHRhY2tfZGVmX3RhYmxlJENsdWI9PVZpc2l0b3JfVGVhbSw1XSkpKmF2ZXJhZ2VfaG9tZV92YWx1ZXNbWzRdXSAjSG9tZSBUZWFtIEdvYWwgRXhwZWN0YW5jeT0gSG9tZSBhdHRhY2tpbmcgc3RyZW5ndGggeCBBd2F5IGRlZmVuc2l2ZSBzdHJlbmd0aCB4IEF2ZXJhZ2UgZ29hbHMgaG9tZQ0KDQpWaXNpdG9yX1RlYW1fR0VfTWF0cml4PC1hcy5udW1lcmljKGFzLmNoYXJhY3RlcihhdHRhY2tfZGVmX3RhYmxlW2F0dGFja19kZWZfdGFibGUkQ2x1Yj09VmlzaXRvcl9UZWFtLDRdKSkqYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoYXR0YWNrX2RlZl90YWJsZVthdHRhY2tfZGVmX3RhYmxlJENsdWI9PUhvbWVfVGVhbSwzXSkpKmF2ZXJhZ2VfdmlzaXRvcl92YWx1ZXNbWzRdXSAjQXdheSBUZWFtIEdvYWwgRXhwZWN0YW5jeT0gQXdheSBhdHRhY2tpbmcgc3RyZW5ndGggeCBIb21lIGRlZmVuc2l2ZSBzdHJlbmd0aCB4IEF2ZXJhZ2UgZ29hbHMgYXdheQ0KDQpgYGANCg0KIyMjUHJvYmFiaWxpdHkgTWF0cml4IDAtMTAgR29hbHMgKFJvd3MgZGVub3RlIEhvbWUgVGVhbSBHb2FscyBhbmQgY29sdW1ucyBkZW5vdGUgVmlzaXRvciBUZWFtIEdvYWxzKSANCmBgYHtyfQ0KcHJvYl9tYXRyaXg8LWRhdGEuZnJhbWUoKQ0KDQpmb3IgKGkgaW4gMDoxMCApeyAgICAgICMgaT0gSG9tZSBUZWFtIEdvYWxzDQogIA0KICBmb3IgKGogaW4gMDoxMCl7ICAgICAgIyBqPSBWaXNpdG9yIFRlYW0gR29hbHMNCiAgICANCiAgICBwcm9iX21hdHJpeFtpKzEsaisxXTwtcm91bmQoZHBvaXMoaSxIb21lX1RlYW1fR0VfTWF0cml4KSpkcG9pcyhqLFZpc2l0b3JfVGVhbV9HRV9NYXRyaXgpKjEwMCwyKQ0KICAgIA0KICB9DQp9DQoNCnJvdy5uYW1lcyhwcm9iX21hdHJpeCk8LTA6MTANCmNvbG5hbWVzKHByb2JfbWF0cml4KTwtMDoxMA0KcHJvYl9tYXRyaXgNCmBgYA0KDQo=