Dataset-English Premier League
library(tidyverse) #importing the required libraries
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <-read.csv('C:/Downloads/final_dataset.csv ')
summary(data)
## X Date HomeTeam AwayTeam
## Min. : 0 Length:6840 Length:6840 Length:6840
## 1st Qu.:1710 Class :character Class :character Class :character
## Median :3420 Mode :character Mode :character Mode :character
## Mean :3420
## 3rd Qu.:5129
## Max. :6839
## FTHG FTAG FTR HTGS
## Min. :0.000 Min. :0.00 Length:6840 Min. : 0.00
## 1st Qu.:1.000 1st Qu.:0.00 Class :character 1st Qu.: 11.00
## Median :1.000 Median :1.00 Mode :character Median : 23.00
## Mean :1.527 Mean :1.13 Mean : 24.42
## 3rd Qu.:2.000 3rd Qu.:2.00 3rd Qu.: 35.00
## Max. :9.000 Max. :7.00 Max. :102.00
## ATGS HTGC ATGC HTP
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. :0.0000
## 1st Qu.: 11.00 1st Qu.:11.0 1st Qu.:11.00 1st Qu.:0.8889
## Median : 23.00 Median :23.0 Median :23.00 Median :1.1724
## Mean : 24.51 Mean :24.5 Mean :24.35 Mean :1.2090
## 3rd Qu.: 35.00 3rd Qu.:36.0 3rd Qu.:36.00 3rd Qu.:1.5556
## Max. :105.00 Max. :85.0 Max. :82.00 Max. :2.7368
## ATP HM1 HM2 HM3
## Min. :0.0000 Length:6840 Length:6840 Length:6840
## 1st Qu.:0.9062 Class :character Class :character Class :character
## Median :1.1923 Mode :character Mode :character Mode :character
## Mean :1.2268
## 3rd Qu.:1.5625
## Max. :2.7619
## HM4 HM5 AM1 AM2
## Length:6840 Length:6840 Length:6840 Length:6840
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## AM3 AM4 AM5 MW
## Length:6840 Length:6840 Length:6840 Min. : 1.0
## Class :character Class :character Class :character 1st Qu.:10.0
## Mode :character Mode :character Mode :character Median :19.5
## Mean :19.5
## 3rd Qu.:29.0
## Max. :38.0
## HTFormPtsStr ATFormPtsStr HTFormPts ATFormPts
## Length:6840 Length:6840 Min. : 0.000 Min. : 0.000
## Class :character Class :character 1st Qu.: 4.000 1st Qu.: 4.000
## Mode :character Mode :character Median : 6.000 Median : 6.000
## Mean : 6.243 Mean : 6.414
## 3rd Qu.: 9.000 3rd Qu.: 9.000
## Max. :15.000 Max. :15.000
## HTWinStreak3 HTWinStreak5 HTLossStreak3 HTLossStreak5
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.06228 Mean :0.01798 Mean :0.0576 Mean :0.01433
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## ATWinStreak3 ATWinStreak5 ATLossStreak3 ATLossStreak5
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06287 Mean :0.01652 Mean :0.05102 Mean :0.01023
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## HTGD ATGD DiffPts DiffFormPts
## Min. :-3.00000 Min. :-3.33333 Min. :-2.36364 Min. :-2.25000
## 1st Qu.:-0.50000 1st Qu.:-0.46429 1st Qu.:-0.45161 1st Qu.:-0.17647
## Median :-0.07692 Median :-0.05000 Median : 0.00000 Median : 0.00000
## Mean :-0.00969 Mean : 0.01454 Mean :-0.01775 Mean :-0.01809
## 3rd Qu.: 0.40000 3rd Qu.: 0.42105 3rd Qu.: 0.42857 3rd Qu.: 0.15385
## Max. : 4.00000 Max. : 3.50000 Max. : 2.28571 Max. : 2.25000
colnames(data)#coloumn name of the data
## [1] "X" "Date" "HomeTeam" "AwayTeam"
## [5] "FTHG" "FTAG" "FTR" "HTGS"
## [9] "ATGS" "HTGC" "ATGC" "HTP"
## [13] "ATP" "HM1" "HM2" "HM3"
## [17] "HM4" "HM5" "AM1" "AM2"
## [21] "AM3" "AM4" "AM5" "MW"
## [25] "HTFormPtsStr" "ATFormPtsStr" "HTFormPts" "ATFormPts"
## [29] "HTWinStreak3" "HTWinStreak5" "HTLossStreak3" "HTLossStreak5"
## [33] "ATWinStreak3" "ATWinStreak5" "ATLossStreak3" "ATLossStreak5"
## [37] "HTGD" "ATGD" "DiffPts" "DiffFormPts"
Dataset Information: English Premier league dataset
The data set involves around 35 columns
The main columns include the home team, away team,date,FTHG,FTAG,etc
The goal is to observe and analyze the English Premier league data set and predict winners in different category.
data<- c("Liverpool", "Manchester City", "Manchester United", "Chelsea", "Arsenal")
points <- c(99, 98, 85, 78, 68)
data <- data.frame(Team = data, Points = points)
library(ggplot2)
ggplot(data, aes(x = Team, y = Points, fill = Team)) +
geom_bar(stat = "identity") +
labs(title = "English Premier League Team Performance",
x = "Team",
y = "Total Points") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
data <- c("Liverpool", "Manchester City", "Manchester United", "Chelsea", "Arsenal")
goals_scored <- c(85, 95, 75, 70, 65)
data <- data.frame(Team = data, GoalsScored = goals_scored)
barplot(data$GoalsScored,
names.arg = data$Team,
main = "Goals Scored by English Premier League Teams",
xlab = "Team",
ylab = "Goals Scored",
col = "red",
ylim = c(0, max(data$GoalsScored) + 10) # Set y-axis limits
)