This is a markdown for Premier League Statistics Dashboard which contains the premier league teams information that participated in the last 10 Season, for the result visit dashboard here : Premier League Dashboard

In this project, The dataset was obtained from Opta, as an official data collector from premier league. Let’s read the dataset

pl <- read.csv("PLData/stats.csv")

Then inspect the datasets, some preprocessed steps need to be done. Get some Glimpse of the datasets

glimpse(pl)
Rows: 240
Columns: 42
$ team                 <chr> "Manchester United", "Chelsea", "Liverpool", "...
$ wins                 <dbl> 28, 24, 20, 19, 17, 16, 16, 15, 15, 14, 12, 12...
$ losses               <dbl> 5, 3, 10, 8, 12, 14, 15, 16, 10, 12, 16, 21, 1...
$ goals                <dbl> 83, 64, 57, 63, 57, 47, 52, 52, 52, 45, 44, 35...
$ total_yel_card       <dbl> 60, 62, 44, 59, 48, 84, 38, 77, 65, 48, 64, 85...
$ total_red_card       <dbl> 1, 4, 0, 3, 3, 4, 3, 6, 2, 1, 2, 2, 1, 4, 1, 2...
$ total_scoring_att    <dbl> 698, 636, 668, 638, 520, 404, 419, 478, 465, 5...
$ ontarget_scoring_att <dbl> 256, 216, 214, 226, 184, 120, 132, 155, 153, 1...
$ hit_woodwork         <dbl> 21, 14, 15, 19, 6, 7, 8, 5, 9, 9, 7, 8, 9, 5, ...
$ att_hd_goal          <dbl> 12, 16, 8, 10, 5, 10, 15, 12, 9, 13, 7, 3, 5, ...
$ att_pen_goal         <dbl> 5, 3, 6, 10, 6, 6, 3, 5, 8, 2, 4, 1, 6, 2, 5, ...
$ att_freekick_goal    <dbl> 1, 6, 1, 3, 2, 0, 0, 3, 2, 0, 1, 2, 1, 0, 0, 2...
$ att_ibox_goal        <dbl> 72, 41, 46, 53, 44, 38, 42, 43, 44, 32, 37, 27...
$ att_obox_goal        <dbl> 11, 23, 11, 10, 13, 9, 10, 9, 8, 13, 7, 8, 9, ...
$ goal_fastbreak       <dbl> 9, 6, 3, 7, 9, 3, 3, 2, 3, 2, 3, 3, 6, 5, 6, 2...
$ total_offside        <dbl> 80, 127, 120, 111, 149, 95, 87, 142, 81, 141, ...
$ clean_sheet          <dbl> 16, 22, 20, 12, 6, 12, 13, 8, 14, 12, 9, 9, 13...
$ goals_conceded       <dbl> 27, 24, 27, 35, 54, 52, 47, 54, 36, 42, 49, 59...
$ saves                <dbl> 2, 4, 1, 6, 11, 13, 4, 22, 5, 14, 18, 4, 1, 0,...
$ outfielder_block     <dbl> 81, 74, 65, 73, 128, 101, 95, 114, 120, 96, 11...
$ interception         <dbl> 254, 292, 246, 214, 276, 235, 277, 282, 303, 2...
$ total_tackle         <dbl> 890, 982, 969, 998, 995, 811, 855, 864, 860, 9...
$ last_man_tackle      <dbl> 1, 0, 2, 1, 2, 0, 0, 2, 1, 1, 1, 1, 0, 1, 0, 2...
$ total_clearance      <dbl> 1222, 1206, 1115, 1202, 1412, 1037, 1022, 1095...
$ head_clearance       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ own_goals            <dbl> 1, 1, 0, 1, 2, 3, 3, 2, 3, 0, 1, 2, 1, 2, 2, 2...
$ penalty_conceded     <dbl> 5, 3, 1, 3, 7, 8, 6, 11, 4, 8, 7, 4, 4, 3, 2, ...
$ pen_goals_conceded   <dbl> 3, 2, 1, 3, 6, 6, 6, 6, 2, 6, 7, 4, 3, 2, 2, 7...
$ total_pass           <dbl> 18723, 16759, 17154, 18458, 14914, 12400, 1123...
$ total_through_ball   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ total_long_balls     <dbl> 2397, 2270, 2800, 2045, 2408, 2403, 2378, 2544...
$ backward_pass        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ total_cross          <dbl> 918, 897, 1107, 873, 796, 942, 1067, 911, 867,...
$ corner_taken         <dbl> 258, 231, 282, 278, 181, 217, 280, 205, 208, 2...
$ touches              <dbl> 25686, 24010, 24150, 25592, 22200, 18932, 1801...
$ big_chance_missed    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ clearance_off_line   <dbl> 1, 2, 1, 1, 2, 6, 2, 8, 2, 4, 5, 5, 5, 4, 9, 8...
$ dispossessed         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ penalty_save         <dbl> 2, 1, 0, 0, 0, 2, 0, 5, 1, 2, 0, 0, 0, 1, 0, 1...
$ total_high_claim     <dbl> 37, 74, 51, 88, 51, 44, 63, 56, 83, 79, 78, 64...
$ punches              <dbl> 25, 22, 27, 27, 24, 21, 21, 25, 15, 30, 37, 25...
$ season               <chr> "2006-2007", "2006-2007", "2006-2007", "2006-2...

Pre-Processing Data

From the information above, we can conclude some data are missing. while it is missing, the datatype already in a proper structure. Let’s treat some missing values here on :

missed <- sapply(pl,function(x) sum(is.na(x)))
missed
                team                 wins               losses 
                   0                    0                    0 
               goals       total_yel_card       total_red_card 
                   0                    0                    0 
   total_scoring_att ontarget_scoring_att         hit_woodwork 
                   0                    0                    0 
         att_hd_goal         att_pen_goal    att_freekick_goal 
                   0                    0                    0 
       att_ibox_goal        att_obox_goal       goal_fastbreak 
                   0                    0                    0 
       total_offside          clean_sheet       goals_conceded 
                   0                    0                    0 
               saves     outfielder_block         interception 
                  20                    0                    0 
        total_tackle      last_man_tackle      total_clearance 
                   0                    0                    0 
      head_clearance            own_goals     penalty_conceded 
                  20                    0                    0 
  pen_goals_conceded           total_pass   total_through_ball 
                   0                    0                   20 
    total_long_balls        backward_pass          total_cross 
                   0                   80                    0 
        corner_taken              touches    big_chance_missed 
                   0                    0                   80 
  clearance_off_line         dispossessed         penalty_save 
                   0                   20                    0 
    total_high_claim              punches               season 
                   0                    0                    0 

Missing Values Treatment

From the information above, the variables that contains missing values are :

  1. head_clearance
  2. saves
  3. total_through_ball
  4. backward_pass
  5. big_chance_missed
  6. dispossed

Lets consider those column which has missing values to be imputed with the mean of their columns respectively

pl$head_clearance[is.na(pl$head_clearance)] <- mean(pl$head_clearance, na.rm = T)
pl$saves[is.na(pl$saves)] <- mean(pl$saves, na.rm = T)
pl$total_through_ball[is.na(pl$total_through_ball)] <- mean(pl$total_through_ball, na.rm = T)
pl$backward_pass[is.na(pl$backward_pass)] <- mean(pl$backward_pass, na.rm = T)
pl$big_chance_missed[is.na(pl$big_chance_missed)] <- mean(pl$big_chance_missed, na.rm = T)
pl$dispossessed[is.na(pl$dispossessed)] <- mean(pl$dispossessed, na.rm = T)

let’s check again for the missing values for each features

colSums(is.na(pl))
                team                 wins               losses 
                   0                    0                    0 
               goals       total_yel_card       total_red_card 
                   0                    0                    0 
   total_scoring_att ontarget_scoring_att         hit_woodwork 
                   0                    0                    0 
         att_hd_goal         att_pen_goal    att_freekick_goal 
                   0                    0                    0 
       att_ibox_goal        att_obox_goal       goal_fastbreak 
                   0                    0                    0 
       total_offside          clean_sheet       goals_conceded 
                   0                    0                    0 
               saves     outfielder_block         interception 
                   0                    0                    0 
        total_tackle      last_man_tackle      total_clearance 
                   0                    0                    0 
      head_clearance            own_goals     penalty_conceded 
                   0                    0                    0 
  pen_goals_conceded           total_pass   total_through_ball 
                   0                    0                    0 
    total_long_balls        backward_pass          total_cross 
                   0                    0                    0 
        corner_taken              touches    big_chance_missed 
                   0                    0                    0 
  clearance_off_line         dispossessed         penalty_save 
                   0                    0                    0 
    total_high_claim              punches               season 
                   0                    0                    0 

Data Wrangling

Lets explore deeper to this datasets, we can conclude the standings of each seasons based on their total winnings, total loses and draws. Lets generate points of each season for each team as well as the goal differences

pl$pts <- (pl$wins*3) + (38-pl$wins-pl$losses)
pl$gd <-  pl$goals - pl$goals_conceded
pl$draw <- 38-pl$wins-pl$losses

Create Insights

Standings Table

So we want to know the premier league standings every season, simply sort based on the points of the team that they got every season and also sort based on the goal differences. those procedure based on the premier league rules

pl_db <- pl %>% 
  arrange(season,desc(pts),desc(gd)) %>% 
  group_by(season) %>% 
  mutate(rank = rank(-pts, ties.method = "first")) %>% 
  ungroup()

standings <- pl_db[,c("team","wins","losses","draw","pts","gd","rank","season")]
kable(head(standings,10))
team wins losses draw pts gd rank season
Manchester United 28 5 5 89 56 1 2006-2007
Chelsea 24 3 11 83 40 2 2006-2007
Liverpool 20 10 8 68 30 3 2006-2007
Arsenal 19 8 11 68 28 4 2006-2007
Tottenham Hotspur 17 12 9 60 3 5 2006-2007
Everton 15 10 13 58 16 6 2006-2007
Bolton Wanderers 16 14 8 56 -5 7 2006-2007
Reading 16 15 7 55 5 8 2006-2007
Portsmouth 14 12 12 54 3 9 2006-2007
Blackburn Rovers 15 16 7 52 -2 10 2006-2007

Plot

st <- standings %>% 
        filter(team %in% c("Arsenal","Chelsea")) %>% 
        ggplot(aes(season,rank, color = team, group = team,
                   text = glue("Rank : {rank}
                                Points : {pts}
                                Goal_Diff : {gd}")
        ))  +
        geom_line(show.legend = FALSE) +
        labs(title = "STANDINGS",
             x = "Season",
             y = "Rank"
        ) + geom_point(shape = 18,show.legend = FALSE) +
        scale_y_reverse(lim=c(20,1)) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 25),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_text(color="white", size=14, face="bold"),
              axis.title.y = element_text(color="white", size=14, face="bold"),
              plot.title = element_text(hjust = 0.5, face = "bold", color = "white"),
              panel.background = element_rect(fill = "#BFD5E3", colour = "#6D9EC1",
                                size = 2, linetype = "solid"),
              panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                colour = "white"), 
              panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                colour = "white"),
              plot.background = element_rect(fill = "#242c34")
              )
  

      ggplotly(st, tooltip = "text")%>%
       layout(legend=list(
                          x = 100, 
                          y = 0.5,
                          font = list(
                                      family = "sans-serif",
                                      size = 10,
                                      color = "#000"),
                          bgcolor = "#E2E2E2",
                          bordercolor = "#FFFFFF",
                          borderwidth = 2))

Offensive scores and the top ten bar with select input based on season

We could know the offensive score of each teams based on the features in this datasets, we want to analyze based on :

  1. goals
  2. total_scoring_att <- total shots
  3. ontarget_scoring_att <- shots on target
  4. total_cross <- cross inside the third box
  5. att_hd_goal <- goal by header
  6. touches <- the player touches on the ball
  7. total_pass <- total pass

we can create parameter for accuracy such as 1. goals 2. xGoal <- expectation of shots on target to become a goal 3. xShots <- total shot to become shots on target 4. xHdgoal <- total cross to become a header goal 5. xPlay <- total touches to become pass

We would like to quantify and take the average of 5 parameters to become a offensive rating

pl_db <- pl_db %>% 
  mutate(xGoal = round(goals/ontarget_scoring_att*10,2),
         xShots = round(ontarget_scoring_att/total_scoring_att*10,2),
         xHdgoal = round(att_hd_goal/total_cross*10,2),
         xPlay = round(total_pass/touches*10,2),
         goalrating = round(goals/10,2)
        )
pl_db$nons_rating_off <- rowMeans(pl_db[,c("xGoal","xShots","xHdgoal","xPlay","goalrating")])
pl_db$offensive_rating <- round(pl_db$nons_rating_off*9/5.385,2) 


offensive_score <- pl_db[,c("team","goals","xGoal","xShots","xHdgoal","xPlay","offensive_rating","season","rank")]
kable(head(offensive_score,10))
team goals xGoal xShots xHdgoal xPlay offensive_rating season rank
Manchester United 83 3.24 3.67 0.13 7.29 7.56 2006-2007 1
Chelsea 64 2.96 3.40 0.18 6.98 6.66 2006-2007 2
Liverpool 57 2.66 3.20 0.07 7.10 6.26 2006-2007 3
Arsenal 63 2.79 3.54 0.11 7.21 6.67 2006-2007 4
Tottenham Hotspur 57 3.10 3.54 0.06 6.72 6.39 2006-2007 5
Everton 52 3.40 3.29 0.10 6.38 6.14 2006-2007 6
Bolton Wanderers 47 3.92 2.97 0.11 6.55 6.10 2006-2007 7
Reading 52 3.94 3.15 0.14 6.23 6.24 2006-2007 8
Portsmouth 45 2.42 3.54 0.17 6.51 5.73 2006-2007 9
Blackburn Rovers 52 3.35 3.24 0.13 6.74 6.24 2006-2007 10

Ploting

op <- offensive_score %>% 
        filter(team %in% c("Arsenal","Chelsea")) %>% 
        ggplot(aes(season, offensive_rating, color = team, group = team,
                   text = glue("Rank_League : {rank}
                         Goals : {goals}
                         xGoal : {xGoal}
                         xShots : {xShots}
                         ")
        ))  +
        geom_line(show.legend = FALSE) +
        labs(title = "OFFENSIVE SCORE",
             x = "Season",
             y = "Rating"
        ) + 
        geom_point(shape = 18,show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 25),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_text(color="white", size=12, face="bold"),
              axis.title.y = element_text(color="white", size=12, face="bold"),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#242c34"))

      ggplotly(op, tooltip = "text")%>%
       layout(legend=list(
                          x = 100, 
                          y = 0.5,
                          font = list(
                                      family = "sans-serif",
                                      size = 10,
                                      color = "#000"),
                          bgcolor = "#E2E2E2",
                          bordercolor = "#FFFFFF",
                          borderwidth = 2))

Defensive Score and the top ten bar with select input based on season

The defensive stats provide the information about team in how they face their opponents in each season. Some features that can be used are :

  1. clean_sheet
  2. goals_conceded
  3. saves
  4. outfielder_block
  5. interception
  6. total_tackle
  7. total_clearance
pl_db <- pl_db %>% 
  mutate(clean_sheet_rating = round(clean_sheet/max(clean_sheet)*10,2),
         goals_conceded_rating = round((1-(goals_conceded/max(goals_conceded)))*10,2),
         saves_rating = round(saves/max(saves)*10,2),
         outfielder_block_rating = round(outfielder_block/max(outfielder_block)*10,2),
         interception_rating = round(interception/max(interception)*10,2),
         total_tackle_rating = round(total_tackle/max(total_tackle)*10,2),
         total_clearance_rating = round(total_clearance/max(total_clearance)*10,2),
         
        )
pl_db$nons_rating_def <- rowMeans(pl_db[,c("clean_sheet_rating","goals_conceded_rating","saves_rating","outfielder_block_rating","interception_rating","total_tackle_rating","total_clearance_rating")])
pl_db$defensive_rating <- round(pl_db$nons_rating_def*10/max(pl_db$nons_rating_def),2) 


defensive_score <- pl_db[,c("team","goals_conceded","defensive_rating","clean_sheet_rating","goals_conceded_rating","saves_rating","outfielder_block_rating","interception_rating","total_tackle_rating","total_clearance_rating","season","rank")]
kable(head(defensive_score,10))
team goals_conceded defensive_rating clean_sheet_rating goals_conceded_rating saves_rating outfielder_block_rating interception_rating total_tackle_rating total_clearance_rating season rank
Manchester United 27 7.18 6.67 6.97 0.11 3.60 2.91 8.17 5.50 2006-2007 1
Chelsea 24 8.00 9.17 7.30 0.23 3.29 3.35 9.02 5.43 2006-2007 2
Liverpool 27 7.40 8.33 6.97 0.06 2.89 2.82 8.90 5.02 2006-2007 3
Arsenal 35 6.70 5.00 6.07 0.34 3.24 2.45 9.16 5.41 2006-2007 4
Tottenham Hotspur 54 6.65 2.50 3.93 0.62 5.69 3.17 9.14 6.36 2006-2007 5
Everton 36 7.41 5.83 5.96 0.28 5.33 3.47 7.90 6.24 2006-2007 6
Bolton Wanderers 52 6.18 5.00 4.16 0.74 4.49 2.69 7.45 4.67 2006-2007 7
Reading 47 6.39 5.42 4.72 0.23 4.22 3.18 7.85 4.60 2006-2007 8
Portsmouth 42 6.73 5.00 5.28 0.80 4.27 2.52 8.71 5.24 2006-2007 9
Blackburn Rovers 54 6.28 3.33 3.93 1.25 5.07 3.23 7.93 4.93 2006-2007 10

Plot

dp <- defensive_score %>% 
        filter(team %in% c("Arsenal","Chelsea")) %>% 
        ggplot(aes(season, defensive_rating, color = team, group = team,
                   text = glue("Rank_League : {rank}
                                Intercep_Rate : {interception_rating}
                                Clean_Sheet_Rate : {clean_sheet_rating}")
        ))  +
        geom_line(show.legend = FALSE) +
        labs(title = "DEFENSIVE SCORE",
             x = "Season",
             y = "Rating"
        ) + geom_point(shape = 18,show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 25),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_text(color="white", size=12, face="bold"),
              axis.title.y = element_text(color="white", size=12, face="bold"),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#242c34"))

      ggplotly(dp, tooltip = "text")%>%
       layout(legend=list(
                          x = 100, 
                          y = 0.5,
                          font = list(
                                      family = "sans-serif",
                                      size = 10,
                                      color = "#000"),
                          bgcolor = "#E2E2E2",
                          bordercolor = "#FFFFFF",
                          borderwidth = 2))

make another table to show top 10

for capstone

Overall Performance

Create the summary for value box

big6 <- pl_db %>% 
  filter(rank %in% c(1:6)) %>% 
  group_by(rank,team) %>%
  count(team) %>% 
  ungroup() %>% 
  group_by(team) %>% 
  summarise(total = sum(n)) %>% 
  arrange(desc(total)) %>% 
  head(6)

success <- pl_db %>% 
  filter(rank %in% c(1)) %>% 
  group_by(rank,team) %>%
  count(team) %>% 
  ungroup() %>% 
  arrange(desc(n))

wincounts <- pl_db %>% 
  group_by(team) %>%
  summarise(countwins = sum(wins)) %>% 
  ungroup() %>% 
  arrange(desc(countwins)) %>% 
  head(6)

Offense Performance

Create the goals summary from 12 seasons

bestof <- pl_db %>%
  group_by(team) %>% 
  summarise(Total_Goals = sum(goals), Count_of_Season = n()) %>%
  mutate(Average_Goals = Total_Goals/Count_of_Season ) %>%
  arrange(desc(Average_Goals)) %>% 
  filter(Count_of_Season > 4) %>% 
  head(20) %>% 
  mutate(rank_avg_goals = rank(-Average_Goals, ties.method = "first"))
  

worstof <- bestof %>% 
  arrange(Average_Goals)

Plot All Offense

pgoalseasonall <- bestof %>% 
        filter(rank_avg_goals %in% c(1:10)) %>% 
        ggplot(aes(Average_Goals, y = reorder(team, Average_Goals), fill = Average_Goals,
                   text = glue("Avg_Goals : {Average_Goals}")
        ))  +
        labs(
             x = "Goals",
             y = "Team"
        ) + geom_col(show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "black", 
                                         size = 10, angle = 25),
              axis.text.y = element_text(face = "bold", color = "black", 
                                         size = 10),
              axis.title.x = element_text(color="black", size=12, face="bold"),
              axis.title.y = element_blank(),
              plot.title = element_text(hjust = 0.5, face = "bold", color ="black"),
              panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                              colour = "white"), 
              panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                              colour = "white"),
              plot.background = element_rect(fill = "#ffffff")) +
        scale_fill_gradient(low = "pink", high = "#D81B60", na.value = NA)
      
      
      ggplotly(pgoalseasonall, tooltip = "text")%>%
        layout(showlegend=F)

Create Each Season Goals Summary

pl_db_goals <- pl %>% 
  arrange(season,desc(goals)) %>% 
  group_by(season) %>% 
  mutate(rank_in_goals = rank(-goals, ties.method = "first")) %>% 
  ungroup()  
  
pl_goals_season_best <- pl_db_goals[,c("team","goals","rank_in_goals","season")]

pl_goals_season_worst <- pl_goals_season_best %>% 
  arrange(season,desc(rank_in_goals))

Plot Offense

pgoalseasonbest <- pl_goals_season_best %>% 
        filter(season %in% c("2017-2018"),rank_in_goals %in% c(1:10)) %>% 
        ggplot(aes(goals, y = reorder(team, goals), fill = goals,
                   text = glue("Goals : {goals}")
        ))  +
        labs(title = "GOALS SCORED",
             x = "Goals",
             y = "Team"
        ) + geom_col(show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 25),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_text(color="white", size=12, face="bold"),
              axis.title.y = element_blank(),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#242c34")) +
        scale_fill_gradient(low = "yellow", high = "red", na.value = NA)

      ggplotly(pgoalseasonbest, tooltip = "text")%>%
       layout(showlegend=F)
pgoalseasonworst <- pl_goals_season_worst %>% 
        filter(season %in% c("2006-2007"), rank_in_goals %in% c(10:20)) %>% 
        ggplot(aes(goals, y = reorder(team, -goals), fill = goals,
                   text = glue("Goals : {goals}")
        ))  +
        labs(title = "GOALS SCORED",
             x = "Goals",
             y = "Team"
        ) + geom_col(show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 25),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_text(color="white", size=12, face="bold"),
              axis.title.y = element_blank(),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#242c34")) +
        scale_fill_gradient(low = "yellow", high = "red", na.value = NA)

      ggplotly(pgoalseasonworst, tooltip = "text")%>%
       layout(showlegend=F)

Defensive Performance

Create the goals_conceded summary from 12 seasons

bestdf <- pl_db %>%
  group_by(team) %>% 
  summarise(Total_Goals_Conceded = sum(goals_conceded), Count_of_Season = n()) %>%
  mutate(Average_Goals_Conceded = Total_Goals_Conceded/Count_of_Season ) %>% 
  arrange(Average_Goals_Conceded) %>% 
  filter(Count_of_Season > 4) %>% 
  head(20) %>% 
  mutate(rank_avg_goals_conceded = rank(Average_Goals_Conceded, ties.method = "first"))

worstdf <- bestdf %>% 
  arrange(desc(Average_Goals_Conceded))

bestdf
# A tibble: 20 x 5
   team     Total_Goals_Conc~ Count_of_Season Average_Goals_C~ rank_avg_goals_c~
   <chr>                <dbl>           <int>            <dbl>             <int>
 1 Manches~               386              12             32.2                 1
 2 Chelsea                407              12             33.9                 2
 3 Manches~               470              12             39.2                 3
 4 Liverpo~               472              12             39.3                 4
 5 Arsenal                481              12             40.1                 5
 6 Everton                526              12             43.8                 6
 7 Tottenh~               535              12             44.6                 7
 8 Southam~               284               6             47.3                 8
 9 Stoke C~               525              10             52.5                 9
10 Crystal~               268               5             53.6                10
11 Swansea~               383               7             54.7                11
12 Fulham                 439               8             54.9                12
13 Aston V~               554              10             55.4                13
14 West Ha~               624              11             56.7                14
15 West Br~               512               9             56.9                15
16 Sunderl~               569              10             56.9                16
17 Newcast~               581              10             58.1                17
18 Blackbu~               354               6             59                  18
19 Bolton ~               359               6             59.8                19
20 Wigan A~               430               7             61.4                20

Plot All Defense

pgoalconcededseasonall <- bestdf %>% 
        filter(rank_avg_goals_conceded %in% c(1:10)) %>% 
        ggplot(aes(Average_Goals_Conceded, y = reorder(team, -Average_Goals_Conceded), fill = Average_Goals_Conceded,
                   text = glue("Avg_Goals_Conceded/Season : {Average_Goals_Conceded}")
        ))  +
        labs(
             x = "Goals",
             y = "Team"
        ) + geom_col(show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "black", 
                                         size = 10, angle = 0),
              axis.text.y = element_text(face = "bold", color = "black", 
                                         size = 10),
              axis.title.x = element_text(color="black", size=12, face="bold"),
              axis.title.y = element_blank(),
              plot.title = element_text(hjust = 0.5, face = "bold", color ="black"),
              panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                              colour = "white"), 
              panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                              colour = "white"),
              plot.background = element_rect(fill = "#ffffff")) +
        scale_fill_gradient(low = "pink", high = "#D81B60", na.value = NA)
      
      
      ggplotly(pgoalconcededseasonall, tooltip = "text")%>%
        layout(showlegend=F)

Create Each Season Goals_Conceded Summary

pl_db_goals_conceded <- pl %>% 
  arrange(season,goals_conceded) %>% 
  group_by(season) %>% 
  mutate(rank_in_goals_conceded = rank(goals_conceded, ties.method = "first")) %>% 
  ungroup()  
  
pl_goals_conceded_season_best <- pl_db_goals_conceded[,c("team","goals_conceded","rank_in_goals_conceded","season")]

pl_goals_conceded_season_worst <- pl_goals_conceded_season_best %>% 
  arrange(season,desc(rank_in_goals_conceded))

pl_goals_conceded_season_best
# A tibble: 240 x 4
   team              goals_conceded rank_in_goals_conceded season   
   <chr>                      <dbl>                  <int> <chr>    
 1 Chelsea                       24                      1 2006-2007
 2 Manchester United             27                      2 2006-2007
 3 Liverpool                     27                      3 2006-2007
 4 Arsenal                       35                      4 2006-2007
 5 Everton                       36                      5 2006-2007
 6 Aston Villa                   41                      6 2006-2007
 7 Portsmouth                    42                      7 2006-2007
 8 Manchester City               44                      8 2006-2007
 9 Reading                       47                      9 2006-2007
10 Newcastle United              47                     10 2006-2007
# ... with 230 more rows

Plot Defense

pgoalconcededseasonbest <- pl_goals_conceded_season_best %>% 
        filter(season %in% c("2017-2018"),rank_in_goals_conceded %in% c(1:10)) %>% 
        ggplot(aes(goals_conceded, y = reorder(team, -goals_conceded), fill = goals_conceded,
                   text = glue("Goals_Conceded : {goals_conceded}")
        ))  +
        labs(
             x = "Goals Conceded",
             y = "Team"
        ) + geom_col(show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "black", 
                                             size = 10, angle = 0),
              axis.text.y = element_text(face = "bold", color = "black", 
                                             size = 10),
              axis.title.x = element_text(color="black", size=12, face="bold"),
              axis.title.y = element_blank(),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#ffffff")) +
        scale_fill_gradient(low = "pink", high = "#D81B60", na.value = NA)

      ggplotly(pgoalconcededseasonbest, tooltip = "text")%>%
       layout(showlegend=F)
pgoalconcededseasonworst <- pl_goals_conceded_season_worst %>% 
        filter(season %in% c("2006-2007"),rank_in_goals_conceded %in% c(10:20)) %>% 
        ggplot(aes(goals_conceded, y = reorder(team, goals_conceded), fill = goals_conceded,
                   text = glue("Goals_Conceded : {goals_conceded}")
        ))  +
        labs(
             x = "Goals Conceded",
             y = "Team"
        ) + geom_col(show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "black", 
                                             size = 10, angle = 0),
              axis.text.y = element_text(face = "bold", color = "black", 
                                             size = 10),
              axis.title.x = element_text(color="black", size=12, face="bold"),
              axis.title.y = element_blank(),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#ffffff")) +
        scale_fill_gradient(low = "pink", high = "#D81B60", na.value = NA)

      ggplotly(pgoalconcededseasonworst, tooltip = "text")%>%
       layout(showlegend=F)

Offensive Performance Trend

goalstrend <- pl_db %>% 
        filter(team %in% c("Arsenal","Chelsea")) %>% 
        ggplot(aes(season, goals, color = team, group = team,
                   text = glue("Goals : {goals}
                                Rank_League : {rank}
                                Offensive_Rating : {offensive_rating}
                                ")
        ))  +
        geom_line(show.legend = FALSE) +
        labs(title = "GOALS TREND",
             
             y = "Goals"
        ) + geom_point(shape = 18,show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 25),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_blank(),
              axis.title.y = element_text(color="white", size=12, face="bold"),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#242c34"))

      ggplotly(goalstrend, tooltip = "text")%>%
       layout(legend=list(
                          x = 0, 
                          y = 1,
                          font = list(
                                      family = "sans-serif",
                                      size = 10,
                                      color = "#000"),
                          bgcolor = "#E2E2E2",
                          bordercolor = "#E2E2E2",
                          borderwidth = 2))

Defensive Performance Trend

goalsconcededtrend <- pl_db %>% 
        filter(team %in% c("Arsenal","Chelsea")) %>% 
        ggplot(aes(season, goals_conceded, color = team, group = team,
                   text = glue("Goals_Concede : {goals_conceded}
                                Rank_League : {rank}
                                Defensive_Rating : {defensive_rating}
                                ")
        ))  +
        geom_line(show.legend = FALSE) +
        labs(title = "GOALS CONCEDED TREND",
             
             y = "Goals"
        ) + geom_point(shape = 18,show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 25),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_blank(),
              axis.title.y = element_text(color="white", size=12, face="bold"),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#242c34"))

      ggplotly(goalsconcededtrend, tooltip = "text")%>%
       layout(legend=list(
                          x = 0, 
                          y = 1,
                          font = list(
                                      family = "sans-serif",
                                      size = 10,
                                      color = "#000"),
                          bgcolor = "#E2E2E2",
                          bordercolor = "#E2E2E2",
                          borderwidth = 2))

Trivia

Analyze based on times

l_ha <- read.csv("PLData/resultsnew.csv")
glimpse(l_ha)
Rows: 10,424
Columns: 23
$ Season   <chr> "1993-94", "1993-94", "1993-94", "1993-94", "1993-94", "19...
$ Date     <chr> "1993-08-14 00:00:00", "1993-08-14 00:00:00", "1993-08-14 ...
$ HomeTeam <chr> "Arsenal", "Aston Villa", "Chelsea", "Liverpool", "Man Cit...
$ AwayTeam <chr> "Coventry", "QPR", "Blackburn", "Sheffield Weds", "Leeds",...
$ FTHG     <int> 0, 4, 1, 2, 1, 0, 0, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 1...
$ FTAG     <int> 3, 1, 2, 0, 1, 1, 3, 1, 2, 2, 2, 1, 0, 0, 0, 1, 3, 1, 0, 3...
$ FTR      <chr> "A", "H", "A", "H", "D", "A", "A", "H", "A", "A", "A", "A"...
$ HTHG     <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ HTAG     <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ HTR      <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""...
$ Referee  <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""...
$ HS       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ AS       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ HST      <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ AST      <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ HC       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ AC       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ HF       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ AF       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ HY       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ AY       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ HR       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ AR       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
pl_ha <- l_ha[,1:7]
pl_ha$Date <- as_datetime(pl_ha$Date)
pl_ha$Month <- month(pl_ha$Date,label = T,abbr = F)
pl_ha$Year <- year(pl_ha$Date)
pl_ha <- pl_ha %>% 
  filter(Year %in% c(2006:2018)) %>% 
  filter(Season != "2005-06") %>% 
  filter(Season != "2018-19")

hg <- pl_ha %>% 
  group_by(HomeTeam,Month) %>% 
  summarise(total_home_goals = sum(FTHG))

hg
# A tibble: 389 x 3
# Groups:   HomeTeam [39]
   HomeTeam Month     total_home_goals
   <chr>    <ord>                <int>
 1 Arsenal  January                 50
 2 Arsenal  February                49
 3 Arsenal  March                   34
 4 Arsenal  April                   57
 5 Arsenal  May                     41
 6 Arsenal  August                  28
 7 Arsenal  September               53
 8 Arsenal  October                 55
 9 Arsenal  November                44
10 Arsenal  December                68
# ... with 379 more rows
ag <- pl_ha %>% 
  group_by(AwayTeam,Month) %>% 
  summarise(total_away_goals = sum(FTAG))

ag
# A tibble: 390 x 3
# Groups:   AwayTeam [39]
   AwayTeam Month     total_away_goals
   <chr>    <ord>                <int>
 1 Arsenal  January                 40
 2 Arsenal  February                25
 3 Arsenal  March                   36
 4 Arsenal  April                   45
 5 Arsenal  May                     29
 6 Arsenal  August                  25
 7 Arsenal  September               41
 8 Arsenal  October                 44
 9 Arsenal  November                34
10 Arsenal  December                63
# ... with 380 more rows
join <- hg %>% 
  full_join(ag, by = c("HomeTeam" = "AwayTeam", "Month" = "Month"))
join[is.na(join)] <- 0
pl_aghg <- join
pl_aghg$total_goals <- pl_aghg$total_home_goals + pl_aghg$total_away_goals

Plot

plotpl_aghg <- pl_aghg %>% 
  filter(HomeTeam == "Arsenal") %>% 
  ggplot(aes(total_goals, Month, fill = total_goals, 
                   text = glue("goals : {total_goals}")
        ))  +
        labs(title = "Coba",
             x = "Goals",
             y = "Team"
        ) + geom_col(show.legend = FALSE, fill = "pink") +
        theme(axis.text.x = element_text(face = "bold", color = "black", 
                                             size = 10, angle = 0),
              axis.text.y = element_text(face = "bold", color = "black", 
                                             size = 10),
              axis.title.x = element_text(color="black", size=12, face="bold"),
              axis.title.y = element_blank(),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#ffffff"))

      ggplotly(plotpl_aghg, tooltip = "text")%>%
       layout(showlegend=F)

Max

best_pl_aghg <- pl_aghg %>% 
  group_by(Month) %>% 
  summarise(max = max(total_goals)) %>% 
  inner_join(pl_aghg, by = c("max" = "total_goals", "Month" = "Month")) %>% 
  distinct(Month, .keep_all = TRUE)

Plot

best_pl_aghg$number <- c(1:10)
plot_best_pl_aghg <- best_pl_aghg %>%
        ggplot(aes(max, y = reorder(Month, max), fill = max, 
                   text = glue("max : {max}")
        ))  +
        labs(
             x = "Goals",
             y = "Team"
        ) + geom_col(show.legend = FALSE) +
        theme(axis.text.x = element_text(face = "bold", color = "black", 
                                             size = 10, angle = 0),
              axis.text.y = element_text(face = "bold", color = "black", 
                                             size = 10),
              axis.title.x = element_text(color="black", size=12, face="bold"),
              axis.title.y = element_blank(),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#ffffff")) +
        scale_fill_gradient(low = "pink", high = "#D81B60", na.value = NA) +
        geom_text(aes(label = HomeTeam), colour = "black", fontface = "bold") +
        xlim(0,150)

      ggplotly(plot_best_pl_aghg, tooltip = "text")%>%
       layout(showlegend=F)

Summary of Stats

ot <- pl_db[,1:8]

otp <- ot %>%
  group_by(team) %>% 
  summarise(shots =sum(total_scoring_att), on_target = sum(ontarget_scoring_att), goals = sum(goals)) %>% 
  mutate(average_season_shots = shots/12, average_season_ontarget = on_target/12, average_season_goals = goals/12) %>% 
  arrange(desc(average_season_ontarget),desc(average_season_shots)) %>% 
  head(10)

Plot

otpp <- otp %>%
  ggplot(aes(average_season_ontarget, y = average_season_shots
        ))  +
        labs(title = "Most Effective Offense",
             x = "Average Shots On Target per Season",
             y = "Average Shots per Season"
        ) + 
        geom_point(show.legend = FALSE, color = "red",alpha = 0.5, size = 2) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 0),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_text(color="white", size=12, face="bold"),
              axis.title.y = element_text(color="white", size=12, face="bold"),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#242c34")) +
        geom_text(aes(label = team), colour = "black", nudge_y = 7, size = 2)

      ggplotly(otpp)%>%
       layout(showlegend=F)
dfp <- pl_db %>%
  group_by(team) %>% 
  summarise(goals_scored =sum(goals), conceded = sum(goals_conceded)) %>% 
  mutate(average_goals_scored = goals_scored/12, average_goals_conceded = conceded/12) %>% 
  arrange(desc(average_goals_scored),desc(average_goals_conceded)) %>% 
  head(10)
ofpp <- dfp %>%
  ggplot(aes(average_goals_scored, y = average_goals_conceded
        ))  +
        labs(title = "Most Effective Team Performance",
             subtitle = "Based on Goals and Conceded Goals",
             x = "Average Goals per Season",
             y = "Average Goals Conceded per Season"
        ) + 
        geom_point(show.legend = FALSE, color = "red",alpha = 0.5, size = 2) +
        theme(axis.text.x = element_text(face = "bold", color = "white", 
                                             size = 10, angle = 0),
              axis.text.y = element_text(face = "bold", color = "white", 
                                             size = 10),
              axis.title.x = element_text(color="white", size=12, face="bold"),
              axis.title.y = element_text(color="white", size=12, face="bold"),
                  plot.title = element_text(hjust = 0.5, face = "bold", color ="white"),
                  plot.subtitle = element_text(hjust = 0.5, face = "bold", color ="white"),
                  panel.grid.major = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"), 
                  panel.grid.minor = element_line(size = 2.5, linetype = 'solid',
                                colour = "white"),
                  plot.background = element_rect(fill = "#242c34")) +
        geom_text(aes(label = team), colour = "black", nudge_y = 0.5, size = 2)

      ggplotly(ofpp)%>%
       layout(showlegend=F)

Summary of each season

The maximum of features