Dataset-English Premier league

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

importing data set

data <-read.csv('C:/Downloads/final_dataset.csv')
colnames(data)
##  [1] "X"             "Date"          "HomeTeam"      "AwayTeam"     
##  [5] "FTHG"          "FTAG"          "FTR"           "HTGS"         
##  [9] "ATGS"          "HTGC"          "ATGC"          "HTP"          
## [13] "ATP"           "HM1"           "HM2"           "HM3"          
## [17] "HM4"           "HM5"           "AM1"           "AM2"          
## [21] "AM3"           "AM4"           "AM5"           "MW"           
## [25] "HTFormPtsStr"  "ATFormPtsStr"  "HTFormPts"     "ATFormPts"    
## [29] "HTWinStreak3"  "HTWinStreak5"  "HTLossStreak3" "HTLossStreak5"
## [33] "ATWinStreak3"  "ATWinStreak5"  "ATLossStreak3" "ATLossStreak5"
## [37] "HTGD"          "ATGD"          "DiffPts"       "DiffFormPts"

Using group by to find goals scored by home team

data %>%
  group_by(HomeTeam) %>%
  summarize(Total_Goals = mean(FTHG))
## # A tibble: 44 × 2
##    HomeTeam    Total_Goals
##    <chr>             <dbl>
##  1 Arsenal            2.21
##  2 Aston Villa        1.24
##  3 Birmingham         1.22
##  4 Blackburn          1.38
##  5 Blackpool          1.58
##  6 Bolton             1.33
##  7 Bournemouth        1.47
##  8 Bradford           1.05
##  9 Brighton           1.26
## 10 Burnley            1.07
## # ℹ 34 more rows

Visualization of goals scored by home team

library(ggplot2)

data %>%
  group_by(HomeTeam) %>%
  summarize(Total_Goals = mean(FTHG)) %>%
  ggplot(aes(x = HomeTeam, y = Total_Goals)) +
  geom_bar(stat = "identity", fill ='brown') +
  labs(x = "Home Team", y = "Average Goals (FTHG)") +
  ggtitle("Average Goals Scored by Home Teams") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 50, hjust = 1))

Using group by to find goals scored by away team

data %>%
  group_by(AwayTeam) %>%
  summarize(Total_Goals = mean(FTAG))
## # A tibble: 44 × 2
##    AwayTeam    Total_Goals
##    <chr>             <dbl>
##  1 Arsenal           1.64 
##  2 Aston Villa       1.12 
##  3 Birmingham        0.835
##  4 Blackburn         1.10 
##  5 Blackpool         1.32 
##  6 Bolton            1.04 
##  7 Bournemouth       1.07 
##  8 Bradford          0.526
##  9 Brighton          0.526
## 10 Burnley           0.842
## # ℹ 34 more rows

Visualization of goals scored by away team

library(ggplot2)
data %>%
  group_by(AwayTeam) %>%
  summarize(Total_Goals = mean(FTAG)) %>%
  ggplot(aes(x = AwayTeam, y = Total_Goals)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(x = " Away Team", y = "Average Goals (FTAG)") +
  ggtitle("Average Goals Scored by Away Teams") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 50, hjust = 1))

Using group by to find the mean of shots on target by home team

home_team_shots_summary <- data %>%
  group_by(HomeTeam) %>%
  summarize(
    Mean_HomeTeamShots = mean(HTGC),
    Mean_HomeTeamShotsOnTarget = mean(HTGC)
  )

Visualization of shots on target by home team

library(ggplot2)

ggplot(home_team_shots_summary, aes(x = HomeTeam)) +
  geom_bar(aes(y = Mean_HomeTeamShots), stat = "identity", fill = "blue", alpha = 1) +
  geom_bar(aes(y = Mean_HomeTeamShotsOnTarget), stat = "identity", fill = "chocolate", alpha = 1) +
  labs(title = "Mean of shots on Target by HomeTeam",
       y = "Mean Shots",
       x = "Home Team") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 50, hjust = 1))

Using Group by to find mean of goals scored by home and away team

mean_home_goals <- data %>%
  group_by(HomeTeam) %>%
  summarize(Mean_Goals_Scored_Home = mean(FTHG))

mean_away_goals <- data %>%
  group_by(AwayTeam) %>%
  summarize(Mean_Goals_Scored_Away = mean(FTAG))

combined_data <- full_join(mean_home_goals, mean_away_goals, by = c("HomeTeam" = "AwayTeam"))


colnames(combined_data) <- c("Team", "Mean_Goals_Scored_Home", "Mean_Goals_Scored_Away")

Visualization of goals scored by home and away teams

library(ggplot2)
ggplot(combined_data, aes(x = Team)) +
  geom_bar(aes(y = Mean_Goals_Scored_Home), fill = "brown", alpha = 0.8, stat = "identity", position = "dodge") +
  geom_bar(aes(y = Mean_Goals_Scored_Away), fill = "blue", alpha = 0.8, stat = "identity", position = "dodge") +
  labs(x = "Team", y = "Mean Goals Scored") +
  ggtitle("Mean Goals Scored by Home and Away by Team") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle =50, hjust = 1))