An Analysis of the Womens Premier League 2023

Deepak Varughese
2023-06-13

Introduction

This is an analysis of player performances in the cricket tournament Women’s Premier League played in India in March 2023. This data analysis is submitted as part of the Data Visualization Capstone Project offered by JHU.

First we will import the data from cricsheet and take a quick look at the data available. The data is available at https://cricsheet.org/matches/ and has been downloaded to the project folder. We can see that the data has 4999 rows and 22 columns. The data type is also mentioned.

data <- import(here("all_matches.csv"))

glimpse(data)
Rows: 4,999
Columns: 22
$ match_id               <int> 1358929, 1358929, 1358929, 1358929, 1…
$ season                 <chr> "2022/23", "2022/23", "2022/23", "202…
$ start_date             <IDate> 2023-03-04, 2023-03-04, 2023-03-04,…
$ venue                  <chr> "Dr DY Patil Sports Academy, Mumbai",…
$ innings                <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ ball                   <dbl> 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1.1, 1.…
$ batting_team           <chr> "Mumbai Indians", "Mumbai Indians", "…
$ bowling_team           <chr> "Gujarat Giants", "Gujarat Giants", "…
$ striker                <chr> "YH Bhatia", "YH Bhatia", "YH Bhatia"…
$ non_striker            <chr> "HK Matthews", "HK Matthews", "HK Mat…
$ bowler                 <chr> "A Gardner", "A Gardner", "A Gardner"…
$ runs_off_bat           <int> 0, 0, 0, 0, 1, 1, 6, 4, 0, 1, 0, 0, 0…
$ extras                 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
$ wides                  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ noballs                <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ byes                   <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ legbyes                <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ penalty                <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ wicket_type            <chr> "", "", "", "", "", "", "", "", "", "…
$ player_dismissed       <chr> "", "", "", "", "", "", "", "", "", "…
$ other_wicket_type      <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ other_player_dismissed <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…

Batting Analysis

For the first section we will look at Batting Analysis of teams. We will look at 1. The most runs in the tournament 2. The highest strike rates in the tournament (with a minimum of 16 balls faced)

##  Data Prep Batting
batting_data <- data %>% 
  select(batting_team, match_id, striker, runs_off_bat) %>% 
  group_by(striker) %>% 
  summarize(total_runs =  sum(runs_off_bat), 
            total_balls = n(),
            no_of_innings = n_distinct(match_id)
            ) %>% 
  mutate(strike_rate = (total_runs / total_balls)*100)


Viz 1. A gt table (because even tables need to look good)

most_runs_gt <- batting_data %>% 
  arrange(desc(total_runs)) %>% 
  head(.,5) %>% 
  mutate(team = c("Delhi Capitals", "Mumbai Indians", "UP Warriors", "Mumbai Indians", "Mumbai Indians")) %>% 
  mutate(logo = c("https://logowik.com/content/uploads/images/delhi-capitals3041.jpg" , "https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png","https://upload.wikimedia.org/wikipedia/en/thumb/a/a2/UP_Warriorz_Logo.webp/301px-UP_Warriorz_Logo.webp.png",
    "https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png","https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png")) %>% 
  select(striker, logo, total_runs,total_balls,no_of_innings,  strike_rate  ) %>% 
  gt() %>% 
  gt_img_rows(columns = logo, height = 20)

most_runs_gt_table <- most_runs_gt %>% 
  tab_header(title = "Top Scorers of WPL 2023"
                          ) %>%
  fmt_number(columns = vars(strike_rate), decimals = 2) %>% 
  cols_label(
    striker = "Batter",
    total_runs = "Runs",
    total_balls = "Balls",
    no_of_innings = "Innings",
    strike_rate = "SR",
    logo = ""
  ) %>%  
  gt_theme_538()

most_runs_gt_table
Top Scorers of WPL 2023
Batter Runs Balls Innings SR
MM Lanning 345 260 9 132.69
NR Sciver 332 239 10 138.91
TM McGrath 302 196 8 154.08
H Kaur 281 214 9 131.31
HK Matthews 271 221 10 122.62

Viz 2. A bar chart with players on the x axis and runs scored on the y axis

## Visualization for most runs

most_runs_viz <- batting_data %>% 
  arrange(desc(total_runs)) %>% 
  head(.,10) %>% 
  ggplot(aes(reorder(striker, total_runs), y = total_runs))+
  geom_col()+
  coord_flip()+
  theme_economist()+
  theme(
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank()
  )+
  ggtitle("Most Runs in WPL 2023")


most_runs_viz

Viz 3. A gt table of players with highest strike rate

# Strike Rate

strike_rate_gt <- batting_data %>% 
  arrange(desc(strike_rate)) %>% 
  head(.,5) %>% 
  mutate(team = c("Delhi Capitals", "Gujarat Giants", "RCB", "UP Warriorz", "Delhi Capitals")) %>% 
  mutate(logo = c("https://logowik.com/content/uploads/images/delhi-capitals3041.jpg" , "https://telugu.mykhel.com/img/1200x60x675/2023/02/gujaratgiants-1677427143.jpg","https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRhQeM6pkJFIuz04WddU0ZSoxziK4Z2MQqclMSdeE-J&s",
                  "https://upload.wikimedia.org/wikipedia/en/thumb/a/a2/UP_Warriorz_Logo.webp/301px-UP_Warriorz_Logo.webp.png","https://logowik.com/content/uploads/images/delhi-capitals3041.jpg")) %>% 
  select(striker, logo, total_runs,total_balls,no_of_innings,  strike_rate  ) %>% 
  gt() %>% 
  gt_img_rows(columns = logo, height = 20)


strike_rate_gt_table <- strike_rate_gt %>% 
  tab_header(title = "Highest SR of WPL 2023"
  ) %>%
  fmt_number(columns = vars(strike_rate), decimals = 2) %>% 
  cols_label(
    striker = "Batter",
    total_runs = "Runs",
    total_balls = "Balls",
    no_of_innings = "Innings",
    strike_rate = "SR",
    logo = ""
  ) %>%  
  gt_theme_538()


strike_rate_gt_table
Highest SR of WPL 2023
Batter Runs Balls Innings SR
Shafali Verma 252 144 9 175.00
SIR Dunkley 121 70 6 172.86
SFM Devine 266 156 8 170.51
GM Harris 230 144 5 159.72
RP Yadav 38 24 3 158.33

Viz 4. A bar chart with players with the highest strike rate with strike rate on the y axis

## Strike Rate Viz

sr_viz <- batting_data %>% 
  arrange(desc(strike_rate)) %>% 
  head(.,10) %>% 
  ggplot(aes(reorder(striker, strike_rate), y = strike_rate))+
  geom_col()+
  coord_flip()+
  theme_economist()+
  theme(
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank()
  )+
  ggtitle("Highest Strike Rate in WPL 2023")

sr_viz

Viz 5. A Scatter Plot showing Runs Scored vs Strike Rate

runs_vs_sr <- batting_data %>% 
  filter(total_balls >= 50) %>% 
  ggplot(aes(x= total_runs, y = strike_rate)) +
  geom_point() + geom_text_repel(aes(label = striker), size = 2.5)+ 
  gghighlight(max(total_runs) > 250 & max(strike_rate) > 140)+
  theme_economist()+ 
  theme( plot.background = element_blank(), 
         panel.grid.major = element_blank(), 
         panel.grid.minor = element_blank(), 
         panel.border = element_blank(), 
         #axis.title = element_blank(), 
         axis.ticks = element_blank() )+ 
  xlab("Runs Scored")+ 
  ylab("Strike Rate")+
  ggtitle("Runs Scored vs Strike Rate in WPL 2023")


runs_vs_sr

Bowling Analysis

For this we will look at

  1. the highest wicket takers in the tournament
  2. The most economical bowlers in the tournament.
bowling_data <- data %>% 
  mutate(runs_conceded = rowSums(select(., runs_off_bat, wides, noballs), na.rm =TRUE)) %>% 
  
  mutate(bowler_wicket = 
           case_when(
             wicket_type = NA ~ "No", 
             wicket_type == "bowled" ~ "Yes",
             wicket_type == "caught" ~ "Yes",
             wicket_type == "caught and bowled" ~ "Yes", 
             wicket_type == "lbw" ~ "Yes", 
             wicket_type == "retired hurt" ~ "No", 
             wicket_type == "run out" ~ "No" , 
             wicket_type == "stumped" ~ "Yes"
           )) %>% 
  group_by(bowler) %>% 
  summarize(
    wickets = sum(bowler_wicket == "Yes", na.rm = TRUE),
    total_runs_conceded =  sum(runs_conceded), 
            total_balls_bowled = n(),
            no_of_innings = n_distinct(match_id),
            economy = ((total_runs_conceded / total_balls_bowled) * 6))

Viz 6 : A gt table with highest wickets in the tournament

wickets_gt_object <- bowling_data %>% 
  arrange(desc(wickets)) %>% 
  head(.,5) %>% 
  mutate(team = c("Mumbai Indians", "UP Warriorz", "Mumbai Indians", "Mumbai Indians", "Mumbai Indians")) %>% 
  mutate(logo = c("https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png" , "https://upload.wikimedia.org/wikipedia/en/thumb/a/a2/UP_Warriorz_Logo.webp/301px-UP_Warriorz_Logo.webp.png","https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png","https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png", "https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png")) %>% 
  select(bowler, logo, no_of_innings, wickets, total_runs_conceded, total_balls_bowled, economy) %>% 
  gt() %>% 
  gt_img_rows(columns = logo, height = 20) 


wickets_gt_table <- wickets_gt_object %>%
  tab_header(title = "Most Wickets in WPL 2023"
  ) %>%
  fmt_number(columns = vars(economy), decimals = 2) %>% 
  cols_label(
    bowler = "Bowler",
    logo = "",
    no_of_innings = "Innings",
    wickets = "Wickets",
    total_runs_conceded = "Runs",
    total_balls_bowled = "Balls",
    economy = "Economy"
  ) %>%  
  gt_theme_538()

wickets_gt_table
Most Wickets in WPL 2023
Bowler Innings Wickets Runs Balls Economy
HK Matthews 10 16 202 209 5.80
S Ecclestone 9 16 235 216 6.53
AC Kerr 10 15 211 196 6.46
IECM Wong 10 15 210 202 6.24
S Ishaque 10 15 244 211 6.94

Viz 7 : A Chart with highest wickets

wickets_dataviz <- bowling_data %>% 
  arrange(desc(wickets)) %>% 
  head(., 10) %>% 
  ggplot(aes(reorder(bowler, wickets), y = wickets))+
  geom_point()+
  geom_segment(aes(x = bowler, xend = bowler, y =0, yend = wickets))+
  coord_flip()+
  theme_economist()+
  theme(
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank()
  )+
  ggtitle("Most Wickets in WPL 2023")
  
wickets_dataviz

Viz 8 : A gt table describing the players with the best economy

econ_gt_object <- bowling_data %>%
  filter(total_balls_bowled >= 30) %>% 
  arrange(economy) %>% 
  head(., 5) %>% 
  mutate(team = c("Delhi Capitals", "Mumbai Indians", "Mumbai Indians", "Mumbai Indians", "Delhi Capitals")) %>% 
  mutate(logo = c("https://logowik.com/content/uploads/images/delhi-capitals3041.jpg" , "https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png","https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png","https://1000logos.net/wp-content/uploads/2022/08/Mumbai-Indians-Logo.png", "https://logowik.com/content/uploads/images/delhi-capitals3041.jpg")) %>% 
  select(bowler, logo, no_of_innings, wickets, total_runs_conceded, total_balls_bowled, economy) %>% 
  gt() %>% 
  gt_img_rows(columns = logo, height = 20)


econ_chart <- econ_gt_object %>%
  tab_header(title = "Best Economy in WPL 2023 (Min 30 Balls)"
  ) %>%
  fmt_number(columns = vars(economy), decimals = 2) %>% 
  cols_label(
    bowler = "Bowler",
    logo = "",
    no_of_innings = "Innings",
    wickets = "Wickets",
    total_runs_conceded = "Runs",
    total_balls_bowled = "Balls",
    economy = "Economy"
  ) %>%  
  gt_theme_538()


econ_chart
Best Economy in WPL 2023 (Min 30 Balls)
Bowler Innings Wickets Runs Balls Economy
M Kapp 9 9 206 221 5.59
HK Matthews 10 16 202 209 5.80
IECM Wong 10 15 210 202 6.24
AC Kerr 10 15 211 196 6.46
S Pandey 9 10 211 196 6.46

Viz 9 : Runs Given vs Wickets Taken to identify the bowler who was most economical AND took the most wickets.

## Wickets vs Economy

wickets_vs_econ <- bowling_data %>% 
  filter(total_balls_bowled >= 75) %>% 
  ggplot(aes(x= wickets, y = economy)) +
  geom_point() + geom_text_repel(aes(label = bowler), size = 2.5)+ 
  gghighlight(max(wickets) > 12)+
  theme_economist()+ 
  theme( plot.background = element_blank(), 
         panel.grid.major = element_blank(), 
         panel.grid.minor = element_blank(), 
         panel.border = element_blank(), 
         #axis.title = element_blank(), 
         axis.ticks = element_blank() )+ 
  xlab("Wickets Taken")+ 
  ylab("Economy Rate")+
  ggtitle("Wickets Taken vs Economy Rates in WPL 2023")


wickets_vs_econ