Sukhpreet Singh Sethi (s3856149) & Leon Lanvin Lobo (s3894164)
Last updated: 28 May, 2023
df <- read_csv("IPL_Matches_2008_2022.csv")
data <- df[,c('TossWinner','WinningTeam','Team1','Team2','TossDecision','Season')]
data$tossanalysis <- -1
data$tossanalysis[data$TossWinner == data$WinningTeam] <- 1
data$tossanalysis[!data$TossWinner == data$WinningTeam] <- 0
data <- data[(data$tossanalysis==1 | data$tossanalysis==0),]
data$homeanalysis <- -1
data$homeanalysis[data$Team1 == data$WinningTeam] <- 1
data$homeanalysis[data$Team2 == data$WinningTeam] <- 0| TossWinner | WinningTeam | Team1 | Team2 | TossDecision | Season | tossanalysis | homeanalysis |
|---|---|---|---|---|---|---|---|
| Rajasthan Royals | Gujarat Titans | Rajasthan Royals | Gujarat Titans | bat | 2022 | 0 | 0 |
| Rajasthan Royals | Rajasthan Royals | Royal Challengers Bangalore | Rajasthan Royals | field | 2022 | 1 | 0 |
| Lucknow Super Giants | Royal Challengers Bangalore | Royal Challengers Bangalore | Lucknow Super Giants | field | 2022 | 0 | 1 |
| Gujarat Titans | Gujarat Titans | Rajasthan Royals | Gujarat Titans | field | 2022 | 1 | 0 |
| Sunrisers Hyderabad | Punjab Kings | Sunrisers Hyderabad | Punjab Kings | bat | 2022 | 0 | 0 |
| Mumbai Indians | Mumbai Indians | Delhi Capitals | Mumbai Indians | field | 2022 | 1 | 0 |
Season<- 1:15
grouped_df <- data[,c("tossanalysis","Season")]
tdf <- table(grouped_df )
tdf <- as.data.frame(tdf)
toss_winner_won <- tdf$Freq[tdf$tossanalysis==1]
toss_winner_lost <- tdf$Freq[tdf$tossanalysis==0]
Year <- unique(tdf$Season)
hgrouped_df <- data[,c("homeanalysis","Season")]
hdf <- table(hgrouped_df)
hdf <- as.data.frame(hdf)
home_team_won <- hdf$Freq[hdf$homeanalysis==1]
away_team_won <- hdf$Freq[hdf$homeanalysis==0]
ddf <- data.frame(Season,Year,toss_winner_won, toss_winner_lost,home_team_won ,away_team_won)
knitr::kable(head(ddf))| Season | Year | toss_winner_won | toss_winner_lost | home_team_won | away_team_won |
|---|---|---|---|---|---|
| 1 | 2007/08 | 28 | 30 | 30 | 28 |
| 2 | 2009 | 33 | 24 | 31 | 26 |
| 3 | 2009/10 | 31 | 29 | 33 | 27 |
| 4 | 2011 | 38 | 34 | 39 | 33 |
| 5 | 2012 | 33 | 41 | 33 | 41 |
| 6 | 2013 | 36 | 40 | 54 | 22 |
plot(Season, ddf$toss_winner_lost, type = "o", col = "red", xlab = "Seasons", ylab = "Number of Matches", main = "Toss Analysis",ylim= c(20,50))
lines(Season, ddf$toss_winner_won, col = "green", type="o")
legend("topright", legend = c("Loss", "Win"), col = c("red", "green"), lty = 1 )
- From the above graph we can see the trend of toss impacting match
result.
plot(Season, ddf$away_team_won, type = "o", col = "red", xlab = "Seasons", ylab = "Number of Matches", main = "Home Ground Analysis", ylim= c(20,60))
lines(Season, ddf$home_team_won, col = "green", type="o")
legend("topright", legend = c("Away team won", "Home team won"), col = c("red", "green"), lty = 1 )
-From the above graph we can see the trend of home ground impacting
match result.
ddf%>%summarise(Min = min(ddf$toss_winner_won,na.rm = TRUE),
Q1 = quantile(ddf$toss_winner_won,probs = .25,na.rm = TRUE),
Median = median(ddf$toss_winner_won, na.rm = TRUE),
Q3 = quantile(ddf$toss_winner_won,probs = .75,na.rm = TRUE),
Max = max(ddf$toss_winner_won,na.rm = TRUE),
Mean = mean(ddf$toss_winner_won, na.rm = TRUE),
SD = sd(ddf$toss_winner_won, na.rm = TRUE)) -> table1
knitr::kable(table1)| Min | Q1 | Median | Q3 | Max | Mean | SD |
|---|---|---|---|---|---|---|
| 25 | 30.5 | 33 | 35.5 | 38 | 32.6 | 3.621365 |
ddf%>%summarise(Min = min(ddf$toss_winner_lost,na.rm = TRUE),
Q1 = quantile(ddf$toss_winner_lost,probs = .25,na.rm = TRUE),
Median = median(ddf$toss_winner_lost, na.rm = TRUE),
Q3 = quantile(ddf$toss_winner_lost,probs = .75,na.rm = TRUE),
Max = max(ddf$toss_winner_lost,na.rm = TRUE),
Mean = mean(ddf$toss_winner_lost, na.rm = TRUE),
SD = sd(ddf$toss_winner_lost, na.rm = TRUE))-> table2
knitr::kable(table2)| Min | Q1 | Median | Q3 | Max | Mean | SD |
|---|---|---|---|---|---|---|
| 23 | 25.5 | 29 | 34.5 | 41 | 30.46667 | 5.853774 |
pttest <- t.test(ddf$toss_winner_won, ddf$toss_winner_lost ,
paired = TRUE,
alternative = "two.sided",
conf.level = .95)
pttest##
## Paired t-test
##
## data: ddf$toss_winner_won and ddf$toss_winner_lost
## t = 1.211, df = 14, p-value = 0.246
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.645080 5.911746
## sample estimates:
## mean difference
## 2.133333
par(mfrow = c(1, 1))
#home team won the toss and the match
htw<-nrow(data[(data$WinningTeam==data$Team1 & data$TossWinner==data$Team1),]) #203
#home team lost the toss and won the match
hlw<-nrow(data[(data$WinningTeam==data$Team1 & data$TossWinner==data$Team2),]) #277
#away team won the toss and the match
aww<-nrow(data[(data$WinningTeam==data$Team2 & data$TossWinner==data$Team2),]) #286
#away team lost the toss and won the match
alw<-nrow(data[(data$WinningTeam==data$Team2 & data$TossWinner==data$Team1),]) #180
table <- matrix(c(htw, hlw, aww, alw), nrow = 2)
table2<- table %>% prop.table(margin = 2)
barplot(table2,main = "Home Team Wins vs Away Team Wins",ylab="Matches won",
ylim=c(0,0.7),legend=rownames(table2),beside=TRUE,args.legend=c(x = "topleft",horiz=TRUE,title="Stumble Direction"),xlab="Team 1 vs Team 2",col=c("green","red"))
legend("topright", legend = c("Won Toss","Lost Toss"), fill = c("green","red"), title = "Legend")
- In the above bar plot we can see the impact of toss on the result,
when played on home team & away team.
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table
## X-squared = 33.717, df = 1, p-value = 6.374e-09
ggrouped_df <- data[,c("tossanalysis","homeanalysis","Season")]
ddf <- table(ggrouped_df )
ddf <- as.data.frame(ddf)
#won toss and match on home ground
hww <- ddf$Freq[(ddf$homeanalysis==1 & ddf$tossanalysis==1)]
#won toss and match on away ground
aww<- ddf$Freq[(ddf$homeanalysis==0 & ddf$tossanalysis==1)]
pttest <- t.test(hww, aww ,
paired = FALSE,
alternative = "less",
conf.level = .95)
pttest ##
## Welch Two Sample t-test
##
## data: hww and aww
## t = -2.3932, df = 27.645, p-value = 0.01187
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -1.598338
## sample estimates:
## mean of x mean of y
## 13.53333 19.06667