Introduction

We will use dataset obtained from Kaggle. It contains dataset named “delivery” and “matches”.

For suggestions and inquiries:

Let’s head to the analysis.

Loading the packages

library(dplyr)
library(ggplot2)
library(ggthemes)
library(kableExtra)

Import

You can find the dataset at: https://www.kaggle.com/nowke9/ipldata

library(readr)
ball <- read_csv("R:/Datasets/IPL/deliveries.csv")
matches <- read_csv("R:/Datasets/IPL/matches.csv")

Head of the dataset

head(ball) %>%
    kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(full_width = T,
                  font_size = 15) %>%
    scroll_box(width = "800px")
match_id inning batting_team bowling_team over ball batsman non_striker bowler is_super_over wide_runs bye_runs legbye_runs noball_runs penalty_runs batsman_runs extra_runs total_runs player_dismissed dismissal_kind fielder
1 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 1 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 0 0 0 NA NA NA
2 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 2 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 0 0 0 NA NA NA
3 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 3 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 4 0 4 NA NA NA
4 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 4 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 0 0 0 NA NA NA
5 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 5 DA Warner S Dhawan TS Mills 0 2 0 0 0 0 0 2 2 NA NA NA
6 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 6 S Dhawan DA Warner TS Mills 0 0 0 0 0 0 0 0 0 NA NA NA
head(matches) %>%
   kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(full_width = T,
                  font_size = 15) %>%
    scroll_box(width = "800px")
id season city date team1 team2 toss_winner toss_decision result dl_applied winner win_by_runs win_by_wickets player_of_match venue umpire1 umpire2 umpire3
1 1 2017 Hyderabad 2017-04-05 Sunrisers Hyderabad Royal Challengers Bangalore Royal Challengers Bangalore field normal 0 Sunrisers Hyderabad 35 0 Yuvraj Singh Rajiv Gandhi International Stadium, Uppal AY Dandekar NJ Llong NA
2 2 2017 Pune 2017-04-06 Mumbai Indians Rising Pune Supergiant Rising Pune Supergiant field normal 0 Rising Pune Supergiant 0 7 SPD Smith Maharashtra Cricket Association Stadium A Nand Kishore S Ravi NA
3 3 2017 Rajkot 2017-04-07 Gujarat Lions Kolkata Knight Riders Kolkata Knight Riders field normal 0 Kolkata Knight Riders 0 10 CA Lynn Saurashtra Cricket Association Stadium Nitin Menon CK Nandan NA
4 4 2017 Indore 2017-04-08 Rising Pune Supergiant Kings XI Punjab Kings XI Punjab field normal 0 Kings XI Punjab 0 6 GJ Maxwell Holkar Cricket Stadium AK Chaudhary C Shamshuddin NA
5 5 2017 Bangalore 2017-04-08 Royal Challengers Bangalore Delhi Daredevils Royal Challengers Bangalore bat normal 0 Royal Challengers Bangalore 15 0 KM Jadhav M Chinnaswamy Stadium NA NA NA
6 6 2017 Hyderabad 2017-04-09 Gujarat Lions Sunrisers Hyderabad Sunrisers Hyderabad field normal 0 Sunrisers Hyderabad 0 9 Rashid Khan Rajiv Gandhi International Stadium, Uppal A Deshmukh NJ Llong NA

Individual Records

Most number of runs

ball %>% 
    group_by(batsman) %>% 
    summarise(Runs = sum(batsman_runs)) %>% 
    arrange(-Runs) %>% 
    head(15) %>% 
    kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` ungrouping output (override with `.groups` argument)
batsman Runs
1 V Kohli 5434
2 SK Raina 5415
3 RG Sharma 4914
4 DA Warner 4741
5 S Dhawan 4632
6 CH Gayle 4560
7 MS Dhoni 4477
8 RV Uthappa 4446
9 AB de Villiers 4428
10 G Gambhir 4223
11 AM Rahane 3850
12 KD Karthik 3688
13 SR Watson 3614
14 AT Rayudu 3326
15 YK Pathan 3241

Most wickets

ball %>%
    filter(dismissal_kind != "run out") %>%
    group_by(bowler) %>% 
    summarise(wickets = sum(table(dismissal_kind))) %>%
    arrange(-wickets) %>% 
  head(15)%>%
kable(digits = 2, format = "html", row.names = TRUE, col.names = c ("bowler", "wickets")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15)%>%
    scroll_box(height = "300px")
## `summarise()` ungrouping output (override with `.groups` argument)
bowler wickets
1 SL Malinga 170
2 A Mishra 156
3 Harbhajan Singh 150
4 PP Chawla 149
5 DJ Bravo 147
6 B Kumar 133
7 R Ashwin 125
8 SP Narine 122
9 UT Yadav 119
10 RA Jadeja 108
11 A Nehra 106
12 R Vinay Kumar 105
13 Z Khan 103
14 YS Chahal 100
15 DW Steyn 96

Highest score by a batsman in a match

top_scores = ball %>% 
    group_by(batsman, match_id) %>% 
    summarise(runs = sum(batsman_runs)) %>% 
    arrange(-runs) %>%
    select(batsman, runs) %>% 
    head(10) %>%
    ungroup() %>%
    mutate(Rank = as.factor(1:10))
## `summarise()` regrouping output by 'batsman' (override with `.groups` argument)
ggplot(top_scores)+
    aes(reorder(Rank, runs), runs)+
    geom_bar(stat = "identity", width = 0.03, alpha = 0.3)+
    geom_point(size=7, color="pink", alpha=0.4, shape=20, stroke=5)+
    scale_x_discrete(labels = top_scores[order(top_scores$runs),]$batsman) +
    theme_classic()+
    geom_text(aes(Rank, runs, label = runs))+
    labs(x = "", y = "Runs", title = "Highest runs scored by a player in a match", subtitle = "")+
    coord_flip()+
    theme(axis.text = element_text(size = 12, face = "bold"), title  =  element_text(size = 16))

Bowlers with 5 or more wickets

ball %>%
    filter(dismissal_kind != "run out") %>%
    group_by(bowler, match_id) %>% 
    summarise(wickets = sum(table(dismissal_kind))) %>%
    arrange(-wickets) %>% 
    filter(wickets >= 5) %>% 
    select(bowler, wickets) %>%
    table() %>% 
    kable(digits = 2, format = "html", row.names = TRUE, col.names = c("5 Wickets", "6 Wickets")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` regrouping output by 'bowler' (override with `.groups` argument)
5 Wickets 6 Wickets
A Joseph 0 1
A Kumble 1 0
A Mishra 1 0
A Zampa 0 1
AD Mascarenhas 1 0
AJ Tye 1 0
AS Rajpoot 1 0
B Kumar 1 0
Harbhajan Singh 1 0
I Sharma 1 0
JD Unadkat 2 0
JP Faulkner 2 0
L Balaji 1 0
MM Patel 1 0
RA Jadeja 1 0
SL Malinga 1 0
Sohail Tanvir 0 1
SP Narine 1 0

Most dismissed batsman by a bowler

ball %>%
    filter(dismissal_kind != "run out") %>%
    group_by(bowler, batsman) %>% 
    summarise(total_dismissed = sum(table(dismissal_kind))) %>%
    arrange(-total_dismissed) %>% 
    head(10) %>%
    kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` regrouping output by 'bowler' (override with `.groups` argument)
bowler batsman total_dismissed
1 Z Khan MS Dhoni 7
2 A Mishra RG Sharma 6
3 A Nehra V Kohli 6
4 B Kumar AM Rahane 6
5 B Kumar PA Patel 6
6 MM Sharma AT Rayudu 6
7 PP Chawla AT Rayudu 6
8 PP Ojha MS Dhoni 6
9 R Ashwin RV Uthappa 6
10 R Vinay Kumar RG Sharma 6

Batsman with most Runs against a single bowler

ball %>% 
    group_by(batsman, bowler) %>% 
    summarise(runs = sum(table(batsman_runs)))%>%
    arrange(-runs) %>% 
  head(10) %>%
    kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` regrouping output by 'batsman' (override with `.groups` argument)
batsman bowler runs
1 SK Raina Harbhajan Singh 125
2 S Dhawan Harbhajan Singh 123
3 V Kohli R Ashwin 119
4 RG Sharma PP Chawla 113
5 RG Sharma SP Narine 111
6 CH Gayle Harbhajan Singh 103
7 V Kohli A Mishra 103
8 SK Raina PP Chawla 102
9 MS Dhoni PP Ojha 100
10 V Kohli RA Jadeja 99

Batsman with most dismissal by its kind

ball %>% 
    group_by(player_dismissed, dismissal_kind) %>% 
    summarise(total = sum(table(dismissal_kind)))%>%
    arrange(-total) %>% 
    na.omit() %>%
    group_by(dismissal_kind) %>%
    slice(which.max(total)) %>% 
    kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` regrouping output by 'player_dismissed' (override with `.groups` argument)
player_dismissed dismissal_kind total
1 SR Watson bowled 32
2 SK Raina caught 112
3 SK Raina caught and bowled 7
4 DA Warner hit wicket 1
5 BB McCullum lbw 16
6 A Mishra obstructing the field 1
7 Harbhajan Singh retired hurt 2
8 G Gambhir run out 16
9 SK Raina stumped 8

Bowlers with most dismissal by its kind

ball %>% 
    group_by(bowler, dismissal_kind) %>% 
    summarise(total = sum(table(dismissal_kind)))%>%
    arrange(-total) %>% 
    na.omit() %>%
    group_by(dismissal_kind) %>%
    slice(which.max(total)) %>% 
    kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` regrouping output by 'bowler' (override with `.groups` argument)
bowler dismissal_kind total
1 SL Malinga bowled 63
2 DJ Bravo caught 113
3 Harbhajan Singh caught and bowled 10
4 S Sreesanth hit wicket 2
5 PP Chawla lbw 16
6 K Ahmed obstructing the field 1
7 BB Sran retired hurt 1
8 R Vinay Kumar run out 22
9 A Mishra stumped 26

Most Maiden Overs in IPL 2008-19

maidens <- as.data.frame(ball %>% 
                            group_by(bowler, match_id, over) %>%
                            summarise(runs = sum(total_runs)) %>% 
                            filter(runs == 0) %>%
                            ungroup() %>% 
                            select(bowler) %>% 
                            table()) %>%
    arrange(-Freq)
## `summarise()` regrouping output by 'bowler', 'match_id' (override with `.groups` argument)
colnames(maidens) = c ("Bowler", "No")
maidens %>%
    filter(No > 5) %>%
    ggplot() +
    aes(reorder(Bowler, No), No) + 
    geom_bar(stat = "identity", width = 0.54) +
    geom_text(aes(Bowler, No, label = No), hjust = -0.1)+
    theme_minimal() + 
    coord_flip()+ 
    theme(axis.text.x =  element_blank())+
    labs(x = "", y = "", title = "Most Maiden Overs in IPL (2008-19)")

Highest run scorer of a team

ball %>% 
    group_by(batting_team, batsman) %>% 
    summarise(runs = sum(batsman_runs)) %>% 
    arrange(-runs) %>%
    group_by(batting_team) %>%
    slice(which.max(runs))%>%
kable(digits = 2, format = "html", row.names = TRUE, col.names = c ("team", "batsman", "runs")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15)
## `summarise()` regrouping output by 'batting_team' (override with `.groups` argument)
team batsman runs
1 Chennai Super Kings SK Raina 4574
2 Deccan Chargers AC Gilchrist 1220
3 Delhi Capitals S Dhawan 542
4 Delhi Daredevils V Sehwag 2174
5 Gujarat Lions SK Raina 841
6 Kings XI Punjab SE Marsh 2489
7 Kochi Tuskers Kerala BB McCullum 357
8 Kolkata Knight Riders G Gambhir 3035
9 Mumbai Indians RG Sharma 3744
10 Pune Warriors RV Uthappa 1103
11 Rajasthan Royals AM Rahane 2840
12 Rising Pune Supergiant SPD Smith 472
13 Rising Pune Supergiants AM Rahane 480
14 Royal Challengers Bangalore V Kohli 5434
15 Sunrisers Hyderabad DA Warner 3306

Highest Wicket Taker of a team

ball %>%
    filter(dismissal_kind != "run out") %>%
    group_by(bowling_team, bowler) %>% 
    summarise(wickets = sum(table(dismissal_kind))) %>%
    arrange(-wickets) %>% 
    group_by(bowling_team) %>% 
    slice(which.max(wickets))%>%
kable(digits = 2, format = "html", row.names = TRUE, col.names = c ("team", "bowler", "wickets")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` regrouping output by 'bowling_team' (override with `.groups` argument)
team bowler wickets
1 Chennai Super Kings DJ Bravo 104
2 Deccan Chargers PP Ojha 60
3 Delhi Capitals K Rabada 25
4 Delhi Daredevils A Mishra 86
5 Gujarat Lions DS Kulkarni 21
6 Kings XI Punjab PP Chawla 84
7 Kochi Tuskers Kerala RP Singh 13
8 Kolkata Knight Riders SP Narine 122
9 Mumbai Indians SL Malinga 170
10 Pune Warriors R Sharma 34
11 Rajasthan Royals SK Trivedi 65
12 Rising Pune Supergiant JD Unadkat 24
13 Rising Pune Supergiants A Zampa 12
14 Royal Challengers Bangalore YS Chahal 100
15 Sunrisers Hyderabad B Kumar 109

Players with most catches, runouts and stumping

ball %>%
    group_by(dismissal_kind, fielder) %>% 
    summarise(total = sum(table(dismissal_kind))) %>%
    arrange(-total) %>% 
    group_by(dismissal_kind) %>% 
    slice(which.max(total)) %>%
    na.omit() %>%
    kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15)
## `summarise()` regrouping output by 'dismissal_kind' (override with `.groups` argument)
dismissal_kind fielder total
1 caught KD Karthik 109
2 run out MS Dhoni 23
3 stumped MS Dhoni 38

Most number of Man of the Match Awards

matches %>% 
    select(player_of_match) %>%
    table() %>%
    data.frame() %>%
    arrange(-Freq) %>%
    head(10) %>%
    ggplot()+
    aes(reorder(., Freq), Freq)+
    geom_bar(stat = "identity", width = 0.03, alpha = 0.3)+
    geom_point(size=7, color="pink", alpha=0.4, shape=20, stroke=5)+
    geom_text(aes(., Freq, label = Freq)) + 
    theme_classic() + 
    coord_flip()+
    labs(x = "", y = "", title = "Most number of MoM awards")

Highest total individual runs against a team (aggregate)

ball %>% 
    group_by(bowling_team, batsman) %>% 
    summarise(total_runs = sum(batsman_runs))%>%
    arrange(-total_runs) %>% 
    na.omit() %>%
    group_by(bowling_team) %>%
    slice(which.max(total_runs)) %>%
  kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` regrouping output by 'bowling_team' (override with `.groups` argument)
bowling_team batsman total_runs
1 Chennai Super Kings V Kohli 749
2 Deccan Chargers R Dravid 339
3 Delhi Capitals AD Russell 118
4 Delhi Daredevils V Kohli 763
5 Gujarat Lions DA Warner 336
6 Kings XI Punjab DA Warner 833
7 Kochi Tuskers Kerala SR Tendulkar 100
8 Kolkata Knight Riders DA Warner 835
9 Mumbai Indians SK Raina 824
10 Pune Warriors CH Gayle 383
11 Rajasthan Royals SK Raina 620
12 Rising Pune Supergiant PA Patel 108
13 Rising Pune Supergiants V Kohli 188
14 Royal Challengers Bangalore MS Dhoni 808
15 Sunrisers Hyderabad SR Watson 531

Highest individual wickets against a team (aggregate)

ball %>%
    filter(dismissal_kind != "run out") %>%
    group_by(batting_team, bowler) %>% 
    summarise(wickets = sum(table(dismissal_kind))) %>%
    arrange(-wickets) %>% 
    group_by(batting_team) %>%
    slice(which.max(wickets)) %>%
  kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
## `summarise()` regrouping output by 'batting_team' (override with `.groups` argument)
batting_team bowler wickets
1 Chennai Super Kings SL Malinga 31
2 Deccan Chargers SL Malinga 19
3 Delhi Capitals B Kumar 6
4 Delhi Daredevils Harbhajan Singh 21
5 Gujarat Lions B Kumar 12
6 Kings XI Punjab UT Yadav 29
7 Kochi Tuskers Kerala I Sharma 5
8 Kolkata Knight Riders B Kumar 27
9 Mumbai Indians DJ Bravo 28
10 Pune Warriors SL Malinga 12
11 Rajasthan Royals A Mishra 30
12 Rising Pune Supergiant AJ Tye 5
13 Rising Pune Supergiants SR Watson 5
14 Royal Challengers Bangalore A Nehra 23
15 Sunrisers Hyderabad DJ Bravo 17

Team Records

IPL Winners

matches %>%
    select(season, id, winner) %>%
    group_by(season) %>%
    slice(which.max(id)) %>%
    select(season, winner) %>% 
    kable(digits = 2, format = "html", row.names = TRUE) %>%
    kable_styling(bootstrap_options = c("striped", "hover"),
                  full_width = T,
                  font_size = 15) %>%
    scroll_box(height = "300px")
season winner
1 2008 Rajasthan Royals
2 2009 Deccan Chargers
3 2010 Chennai Super Kings
4 2011 Chennai Super Kings
5 2012 Kolkata Knight Riders
6 2013 Mumbai Indians
7 2014 Kolkata Knight Riders
8 2015 Mumbai Indians
9 2016 Sunrisers Hyderabad
10 2017 Mumbai Indians
11 2018 Chennai Super Kings
12 2019 Mumbai Indians

Decision at Toss

toss = as.data.frame(table(matches$toss_decision))
toss = mutate(toss, percentage = (Freq/sum(Freq))*100)
pie(toss$Freq, labels = round(toss$percentage), main = "Toss Decision (%)", col = c("red", "blue"))
legend("topright", c("bat","field"), cex = 0.8, fill = c("red", "blue"))

Teams winning by highest margin of runs

team = matches %>%
    select(winner, win_by_runs) %>%
    arrange(-win_by_runs) %>%
    head(10) %>%
    ungroup() %>% 
    mutate(Rank = as.factor(1:10))
  
ggplot(team)+
    aes(reorder(Rank, win_by_runs), win_by_runs)+
    geom_bar(stat = "identity", width = 0.03, alpha = 0.3)+
    geom_point(size=7, color="cyan", alpha=0.4, shape=20, stroke=5)+
    scale_x_discrete(labels = team[order(team$win_by_runs),]$winner) +
    theme_classic()+
    geom_text(aes(Rank, win_by_runs, label = win_by_runs))+
    labs(x = "", y = "", title = "Highest win by runs", subtitle = "")+
    coord_flip()+
    theme(axis.text = element_text(size = 12, face = "bold"), title  =  element_text(size = 16))

Teams scoring highest runs in a match

team2 = ball %>% 
    group_by(match_id, batting_team) %>%
    summarise(runs = sum(total_runs)) %>%
    arrange(-runs) %>%
    head(10) %>%
    ungroup() %>% 
    mutate(Rank = as.factor(1:10))
## `summarise()` regrouping output by 'match_id' (override with `.groups` argument)
ggplot(team2)+
    aes(reorder(Rank, runs), runs)+
    geom_bar(stat = "identity", width = 0.03, alpha = 0.3)+
    geom_point(size=7, color="cyan", alpha=0.4, shape=20, stroke=5)+
    scale_x_discrete(labels = team2[order(team2$runs),]$batting_team) +
    theme_classic()+
    geom_text(aes(Rank, runs, label = runs))+
    labs(x = "", y = "", title = "Highest runs by a team in a match", subtitle = "")+
    coord_flip()+
    theme(axis.text = element_text(size = 12, face = "bold"), title  =  element_text(size = 16))