Introduction to Statistics Assignment 2

Unpredictable Elements in Cricket

Sukhpreet Singh Sethi (s3856149) & Leon Lanvin Lobo (s3894164)

Last updated: 28 May, 2023

Introduction

Problem Statement

Data

Descriptive Statistics and Visualisation

df <- read_csv("IPL_Matches_2008_2022.csv")
data <- df[,c('TossWinner','WinningTeam','Team1','Team2','TossDecision','Season')]
data$tossanalysis <- -1
data$tossanalysis[data$TossWinner == data$WinningTeam] <- 1
data$tossanalysis[!data$TossWinner == data$WinningTeam] <- 0
data <- data[(data$tossanalysis==1 | data$tossanalysis==0),]
data$homeanalysis <- -1
data$homeanalysis[data$Team1 == data$WinningTeam] <- 1
data$homeanalysis[data$Team2 == data$WinningTeam] <- 0

Decsriptive Statistics Cont 1.

knitr::kable((head(data)))
TossWinner WinningTeam Team1 Team2 TossDecision Season tossanalysis homeanalysis
Rajasthan Royals Gujarat Titans Rajasthan Royals Gujarat Titans bat 2022 0 0
Rajasthan Royals Rajasthan Royals Royal Challengers Bangalore Rajasthan Royals field 2022 1 0
Lucknow Super Giants Royal Challengers Bangalore Royal Challengers Bangalore Lucknow Super Giants field 2022 0 1
Gujarat Titans Gujarat Titans Rajasthan Royals Gujarat Titans field 2022 1 0
Sunrisers Hyderabad Punjab Kings Sunrisers Hyderabad Punjab Kings bat 2022 0 0
Mumbai Indians Mumbai Indians Delhi Capitals Mumbai Indians field 2022 1 0

Decsriptive Statistics Cont 2.

Season<- 1:15
grouped_df <- data[,c("tossanalysis","Season")]
tdf <- table(grouped_df  )
tdf <- as.data.frame(tdf)
toss_winner_won <- tdf$Freq[tdf$tossanalysis==1]
toss_winner_lost <- tdf$Freq[tdf$tossanalysis==0]
Year <- unique(tdf$Season)
hgrouped_df <- data[,c("homeanalysis","Season")]
hdf <- table(hgrouped_df)
hdf <- as.data.frame(hdf)
home_team_won <- hdf$Freq[hdf$homeanalysis==1]
away_team_won <- hdf$Freq[hdf$homeanalysis==0]
ddf <- data.frame(Season,Year,toss_winner_won, toss_winner_lost,home_team_won ,away_team_won)
knitr::kable(head(ddf))
Season Year toss_winner_won toss_winner_lost home_team_won away_team_won
1 2007/08 28 30 30 28
2 2009 33 24 31 26
3 2009/10 31 29 33 27
4 2011 38 34 39 33
5 2012 33 41 33 41
6 2013 36 40 54 22

Decsriptive Statistics Cont 3.

plot(Season, ddf$toss_winner_lost, type = "o", col = "red", xlab = "Seasons", ylab = "Number of Matches", main = "Toss Analysis",ylim= c(20,50))
lines(Season, ddf$toss_winner_won, col = "green", type="o")
legend("topright", legend = c("Loss", "Win"), col = c("red", "green"), lty = 1 )

- From the above graph we can see the trend of toss impacting match result.

Decsriptive Statistics Cont 4.

plot(Season, ddf$away_team_won, type = "o", col = "red", xlab = "Seasons", ylab = "Number of Matches", main = "Home Ground Analysis", ylim= c(20,60))
lines(Season, ddf$home_team_won, col = "green", type="o")
legend("topright", legend = c("Away team won", "Home team won"), col = c("red", "green"), lty = 1 )

-From the above graph we can see the trend of home ground impacting match result.

Decsriptive Statistics Cont 5.

ddf%>%summarise(Min = min(ddf$toss_winner_won,na.rm = TRUE),
          Q1 = quantile(ddf$toss_winner_won,probs = .25,na.rm = TRUE),
          Median = median(ddf$toss_winner_won, na.rm = TRUE),
          Q3 = quantile(ddf$toss_winner_won,probs = .75,na.rm = TRUE),
          Max = max(ddf$toss_winner_won,na.rm = TRUE),
          Mean = mean(ddf$toss_winner_won, na.rm = TRUE),
          SD = sd(ddf$toss_winner_won, na.rm = TRUE)) -> table1
knitr::kable(table1)
Min Q1 Median Q3 Max Mean SD
25 30.5 33 35.5 38 32.6 3.621365
ddf%>%summarise(Min = min(ddf$toss_winner_lost,na.rm = TRUE),
          Q1 = quantile(ddf$toss_winner_lost,probs = .25,na.rm = TRUE),
          Median = median(ddf$toss_winner_lost, na.rm = TRUE),
          Q3 = quantile(ddf$toss_winner_lost,probs = .75,na.rm = TRUE),
          Max = max(ddf$toss_winner_lost,na.rm = TRUE),
          Mean = mean(ddf$toss_winner_lost, na.rm = TRUE),
          SD = sd(ddf$toss_winner_lost, na.rm = TRUE))-> table2
knitr::kable(table2)
Min Q1 Median Q3 Max Mean SD
23 25.5 29 34.5 41 30.46667 5.853774

Hypothesis Testing-1

pttest <- t.test(ddf$toss_winner_won, ddf$toss_winner_lost ,
                 paired = TRUE,
                 alternative = "two.sided",
                 conf.level = .95)
pttest
## 
##  Paired t-test
## 
## data:  ddf$toss_winner_won and ddf$toss_winner_lost
## t = 1.211, df = 14, p-value = 0.246
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.645080  5.911746
## sample estimates:
## mean difference 
##        2.133333

Discussion on Hypothesis Testing-1

Hypothesis Testing-2

par(mfrow = c(1, 1))
#home team won the toss and the match
htw<-nrow(data[(data$WinningTeam==data$Team1 & data$TossWinner==data$Team1),]) #203 
#home team lost the toss and won the match
hlw<-nrow(data[(data$WinningTeam==data$Team1 & data$TossWinner==data$Team2),]) #277
#away team won the toss and the match
aww<-nrow(data[(data$WinningTeam==data$Team2 & data$TossWinner==data$Team2),]) #286
#away team lost the toss and won the match
alw<-nrow(data[(data$WinningTeam==data$Team2 & data$TossWinner==data$Team1),]) #180
table <- matrix(c(htw, hlw, aww, alw), nrow = 2)
table2<- table %>% prop.table(margin = 2)
barplot(table2,main = "Home Team Wins vs Away Team Wins",ylab="Matches won",
ylim=c(0,0.7),legend=rownames(table2),beside=TRUE,args.legend=c(x = "topleft",horiz=TRUE,title="Stumble Direction"),xlab="Team 1 vs Team 2",col=c("green","red"))
legend("topright", legend = c("Won Toss","Lost Toss"), fill = c("green","red"), title = "Legend")

- In the above bar plot we can see the impact of toss on the result, when played on home team & away team.

Hypothesis Testing-2 Cont.

result <- chisq.test(table)
result
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table
## X-squared = 33.717, df = 1, p-value = 6.374e-09

Discussion on Hypothesis Testing-2

Hypothesis Testing-3

ggrouped_df <- data[,c("tossanalysis","homeanalysis","Season")]
ddf <- table(ggrouped_df  )
ddf <- as.data.frame(ddf)
#won toss and match on home ground
hww <- ddf$Freq[(ddf$homeanalysis==1 & ddf$tossanalysis==1)]
#won toss and match on away ground
aww<- ddf$Freq[(ddf$homeanalysis==0 & ddf$tossanalysis==1)]
pttest <- t.test(hww, aww ,
                 paired = FALSE,
                 alternative = "less",
                 conf.level = .95)
pttest 
## 
##  Welch Two Sample t-test
## 
## data:  hww and aww
## t = -2.3932, df = 27.645, p-value = 0.01187
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf -1.598338
## sample estimates:
## mean of x mean of y 
##  13.53333  19.06667

Discussion on Hypothesis Testing-3

References