Dataset-English Premier league
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <-read.csv('C:/Downloads/final_dataset.csv')
colnames(data)
## [1] "X" "Date" "HomeTeam" "AwayTeam"
## [5] "FTHG" "FTAG" "FTR" "HTGS"
## [9] "ATGS" "HTGC" "ATGC" "HTP"
## [13] "ATP" "HM1" "HM2" "HM3"
## [17] "HM4" "HM5" "AM1" "AM2"
## [21] "AM3" "AM4" "AM5" "MW"
## [25] "HTFormPtsStr" "ATFormPtsStr" "HTFormPts" "ATFormPts"
## [29] "HTWinStreak3" "HTWinStreak5" "HTLossStreak3" "HTLossStreak5"
## [33] "ATWinStreak3" "ATWinStreak5" "ATLossStreak3" "ATLossStreak5"
## [37] "HTGD" "ATGD" "DiffPts" "DiffFormPts"
data %>%
group_by(HomeTeam) %>%
summarize(Total_Goals = mean(FTHG))
## # A tibble: 44 × 2
## HomeTeam Total_Goals
## <chr> <dbl>
## 1 Arsenal 2.21
## 2 Aston Villa 1.24
## 3 Birmingham 1.22
## 4 Blackburn 1.38
## 5 Blackpool 1.58
## 6 Bolton 1.33
## 7 Bournemouth 1.47
## 8 Bradford 1.05
## 9 Brighton 1.26
## 10 Burnley 1.07
## # ℹ 34 more rows
library(ggplot2)
data %>%
group_by(HomeTeam) %>%
summarize(Total_Goals = mean(FTHG)) %>%
ggplot(aes(x = HomeTeam, y = Total_Goals)) +
geom_bar(stat = "identity", fill ='brown') +
labs(x = "Home Team", y = "Average Goals (FTHG)") +
ggtitle("Average Goals Scored by Home Teams") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 50, hjust = 1))
data %>%
group_by(AwayTeam) %>%
summarize(Total_Goals = mean(FTAG))
## # A tibble: 44 × 2
## AwayTeam Total_Goals
## <chr> <dbl>
## 1 Arsenal 1.64
## 2 Aston Villa 1.12
## 3 Birmingham 0.835
## 4 Blackburn 1.10
## 5 Blackpool 1.32
## 6 Bolton 1.04
## 7 Bournemouth 1.07
## 8 Bradford 0.526
## 9 Brighton 0.526
## 10 Burnley 0.842
## # ℹ 34 more rows
library(ggplot2)
data %>%
group_by(AwayTeam) %>%
summarize(Total_Goals = mean(FTAG)) %>%
ggplot(aes(x = AwayTeam, y = Total_Goals)) +
geom_bar(stat = "identity", fill = "red") +
labs(x = " Away Team", y = "Average Goals (FTAG)") +
ggtitle("Average Goals Scored by Away Teams") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 50, hjust = 1))
home_team_shots_summary <- data %>%
group_by(HomeTeam) %>%
summarize(
Mean_HomeTeamShots = mean(HTGC),
Mean_HomeTeamShotsOnTarget = mean(HTGC)
)
library(ggplot2)
ggplot(home_team_shots_summary, aes(x = HomeTeam)) +
geom_bar(aes(y = Mean_HomeTeamShots), stat = "identity", fill = "blue", alpha = 1) +
geom_bar(aes(y = Mean_HomeTeamShotsOnTarget), stat = "identity", fill = "chocolate", alpha = 1) +
labs(title = "Mean of shots on Target by HomeTeam",
y = "Mean Shots",
x = "Home Team") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 50, hjust = 1))
mean_home_goals <- data %>%
group_by(HomeTeam) %>%
summarize(Mean_Goals_Scored_Home = mean(FTHG))
mean_away_goals <- data %>%
group_by(AwayTeam) %>%
summarize(Mean_Goals_Scored_Away = mean(FTAG))
combined_data <- full_join(mean_home_goals, mean_away_goals, by = c("HomeTeam" = "AwayTeam"))
colnames(combined_data) <- c("Team", "Mean_Goals_Scored_Home", "Mean_Goals_Scored_Away")
library(ggplot2)
ggplot(combined_data, aes(x = Team)) +
geom_bar(aes(y = Mean_Goals_Scored_Home), fill = "brown", alpha = 0.8, stat = "identity", position = "dodge") +
geom_bar(aes(y = Mean_Goals_Scored_Away), fill = "blue", alpha = 0.8, stat = "identity", position = "dodge") +
labs(x = "Team", y = "Mean Goals Scored") +
ggtitle("Mean Goals Scored by Home and Away by Team") +
theme_minimal() +
theme(axis.text.x = element_text(angle =50, hjust = 1))