#load package and data 
library(vcd) #install using "install.packages("vcd")" if required 
data("Bundesliga")
head(Bundesliga, 5)
##              HomeTeam             AwayTeam HomeGoals AwayGoals Round Year
## 1       Werder Bremen    Borussia Dortmund         3         2     1 1963
## 2   Hertha BSC Berlin      1. FC Nuernberg         1         1     1 1963
## 3   Preussen Muenster         Hamburger SV         1         1     1 1963
## 4 Eintracht Frankfurt 1. FC Kaiserslautern         1         1     1 1963
## 5       Karlsruher SC       Meidericher SV         1         4     1 1963
##                  Date
## 1 1963-08-24 10:30:00
## 2 1963-08-24 10:30:00
## 3 1963-08-24 10:30:00
## 4 1963-08-24 10:30:00
## 5 1963-08-24 10:30:00

Background

This dataset comprises of soccer scores from the German top-division soccer league Bundesliga since its beginning in the 1963/64 season up to the 2008/09 season. The Bundesliga features 18 teams, where each team plays every other team twice- once at home (at their own stadium), and once away (at the other team’s stadium). At the end of the season, the bottom two teams get automatically relegated to the lower 2. Bundesliga division, and the top two teams from the 2. Bundesliga get automatically promoted to the Bundesliga. The third-from-the-bottom team in the Bundesliga has to play a two-legged match (one home and one away) with the third placed team in the 2. Bundesliga to determine who will be included in the top division the following season. The top three teams from the Bundesliga are also automatically selected for participation in the multinational UEFA Champions Leauge tournament, where top teams from soccer leagues in Europe compete for the trophy, starting with a group stage, followed by a knockout style tournament to determine the eventual winner. The fourth team, meanwhile, has to participate in a two-legged tie with a similarly placed team from a different league (for instance, the English Premier League, or La Liga, which are the top divisions in England and Spain, respectively) to qualify for the Champions League.

Historical Success

We are interested in finding the most successful club in top-division German soccer. To do this, we first add a Results column in the dataset and populate it with the result of each match for the home team.

# conditionally populate the HomeResult column (ties are possible, and frequent) 
Bundesliga$HomeResult <- ifelse(Bundesliga$HomeGoals > 
                                  Bundesliga$AwayGoals, 
                                "Win",
                                ifelse(Bundesliga$HomeGoals <
                                         Bundesliga$AwayGoals,
                                       "Loss",
                                       "Draw"))
table(Bundesliga$HomeResult) 
## 
## Draw Loss  Win 
## 3634 3103 7281

Historical results suggest there may be some home-field advantage. The home team won 7281 games between 1963-2009 while losing only 3103 games with 3634 draws. Next, we check to see which team is the most successful at their home stadium.

table(Bundesliga$HomeTeam, Bundesliga$HomeResult)
##                            
##                             Draw Loss Win
##   1. FC Kaiserslautern       177  121 414
##   1. FC Koeln                147  149 382
##   1. FC Nuernberg            137  120 200
##   1. FC Saarbruecken          32   29  22
##   1. FSV Mainz 05             14   16  21
##   Alemannia Aachen            17   20  31
##   Arminia Bielefeld           73   95 121
##   Bayer Leverkusen           139   98 275
##   Bayer Uerdingen             61   59 101
##   Bayern Muenchen            131   72 547
##   Blau-Weiss 90 Berlin         8    7   2
##   Borussia Dortmund          182  128 402
##   Borussia Moenchengladbach  194  132 373
##   Borussia Neunkirchen        11   16  22
##   Darmstadt 98                10   14  10
##   Dynamo Dresden              24   19  27
##   Eintracht Braunschweig      89   66 181
##   Eintracht Frankfurt        168  149 378
##   Energie Cottbus             16   44  42
##   FC Homburg                  17   16  18
##   FC St. Pauli                39   41  39
##   Fortuna Duesseldorf        112   92 172
##   Fortuna Koeln                6    5   6
##   Hamburger SV               188  144 448
##   Hannover 96                108   97 150
##   Hansa Rostock               57   65  84
##   Hertha BSC Berlin          119   92 261
##   Karlsruher SC              134   97 175
##   KFC Uerdingen                6    7   4
##   Kickers Offenbach           26   32  61
##   Meidericher SV              13    7  27
##   MSV Duisburg               137  114 176
##   Preussen Muenster            4    6   5
##   Rot-Weiss Essen             43   30  46
##   Rot-Weiss Oberhausen        18   19  31
##   SC Freiburg                 45   55  70
##   Schalke 04                 194  140 361
##   SpVgg Unterhaching          11    6  17
##   SSV Ulm                      4    6   7
##   Stuttgarter Kickers          8   17  11
##   Tasmania Berlin              3   12   2
##   Tennis Borussia Berlin      10   14  10
##   TSV 1860 Muenchen           76   93 167
##   VfB Leipzig                  9    6   2
##   VfB Stuttgart              157  140 449
##   VfL Bochum                 153  153 257
##   VfL Wolfsburg               49   47 108
##   Waldhof Mannheim            41   27  51
##   Wattenscheid 09             22   22  26
##   Werder Bremen              176  127 460
##   Wuppertaler SV              14   17  20
##   1899 Hoffenheim              5    3   9

The table shows that Bayern Munchen, a Munich-based club, is historically the most successful at home, winning a total of 547 games at the Grunwald Stadion, the Olympic Stadion, and the Allianz Arena combined.

library(dplyr) # for data manipulation
winPercentages <- Bundesliga %>%  select(HomeTeam, HomeResult, Year) %>%
  group_by(HomeTeam) %>%
  summarise(winPerc = length(which(HomeResult=="Win"))/n())
winPercentages
## # A tibble: 52 x 2
##    HomeTeam             winPerc
##    <fct>                  <dbl>
##  1 1. FC Kaiserslautern   0.581
##  2 1. FC Koeln            0.563
##  3 1. FC Nuernberg        0.438
##  4 1. FC Saarbruecken     0.265
##  5 1. FSV Mainz 05        0.412
##  6 Alemannia Aachen       0.456
##  7 Arminia Bielefeld      0.419
##  8 Bayer Leverkusen       0.537
##  9 Bayer Uerdingen        0.457
## 10 Bayern Muenchen        0.729
## # … with 42 more rows
#next, sort this table
winPercentages <- arrange(winPercentages, desc(winPercentages$winPerc))
winPercentages
## # A tibble: 52 x 2
##    HomeTeam             winPerc
##    <fct>                  <dbl>
##  1 Bayern Muenchen        0.729
##  2 Werder Bremen          0.603
##  3 VfB Stuttgart          0.602
##  4 1. FC Kaiserslautern   0.581
##  5 Meidericher SV         0.574
##  6 Hamburger SV           0.574
##  7 Borussia Dortmund      0.565
##  8 1. FC Koeln            0.563
##  9 Hertha BSC Berlin      0.553
## 10 Eintracht Frankfurt    0.544
## # … with 42 more rows

Bayern Munchen also leads the Bundesliga in terms of at-home wining percentage, having won 72.9333333 percent of their home games.

# to create a table with at-home winning percentages for each year       for each club
winPercByYear <- Bundesliga %>% select(HomeTeam, HomeResult, Year) %>% 
  group_by(HomeTeam, Year) %>% 
  summarise(winPerc = length(which(HomeResult=="Win"))/n())
winPercByYear
## # A tibble: 826 x 3
## # Groups:   HomeTeam [52]
##    HomeTeam              Year winPerc
##    <fct>                <int>   <dbl>
##  1 1. FC Kaiserslautern  1963   0.467
##  2 1. FC Kaiserslautern  1964   0.6  
##  3 1. FC Kaiserslautern  1965   0.412
##  4 1. FC Kaiserslautern  1966   0.588
##  5 1. FC Kaiserslautern  1967   0.353
##  6 1. FC Kaiserslautern  1968   0.588
##  7 1. FC Kaiserslautern  1969   0.529
##  8 1. FC Kaiserslautern  1970   0.765
##  9 1. FC Kaiserslautern  1971   0.647
## 10 1. FC Kaiserslautern  1972   0.588
## # … with 816 more rows