Introduction

Brief description of IPL and project goals.

Data

Description of matches.csv and deliveries.csv.

deliveries <- read_csv("deliveries.csv")
## Rows: 260920 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): batting_team, bowling_team, batter, bowler, non_striker, extras_typ...
## dbl (8): match_id, inning, over, ball, batsman_runs, extra_runs, total_runs,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
matches <- read_csv("matches.csv")
## Rows: 1095 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (15): season, city, match_type, player_of_match, venue, team1, team2, t...
## dbl   (4): id, result_margin, target_runs, target_overs
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(deliveries)
## Rows: 260,920
## Columns: 17
## $ match_id         <dbl> 335982, 335982, 335982, 335982, 335982, 335982, 33598…
## $ inning           <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ batting_team     <chr> "Kolkata Knight Riders", "Kolkata Knight Riders", "Ko…
## $ bowling_team     <chr> "Royal Challengers Bangalore", "Royal Challengers Ban…
## $ over             <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,…
## $ ball             <dbl> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5,…
## $ batter           <chr> "SC Ganguly", "BB McCullum", "BB McCullum", "BB McCul…
## $ bowler           <chr> "P Kumar", "P Kumar", "P Kumar", "P Kumar", "P Kumar"…
## $ non_striker      <chr> "BB McCullum", "SC Ganguly", "SC Ganguly", "SC Gangul…
## $ batsman_runs     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 6, 4, 0, 0, 0, 0, 4, 1,…
## $ extra_runs       <dbl> 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ total_runs       <dbl> 1, 0, 1, 0, 0, 0, 1, 0, 4, 4, 6, 4, 0, 0, 0, 1, 4, 1,…
## $ extras_type      <chr> "legbyes", NA, "wides", NA, NA, NA, "legbyes", NA, NA…
## $ is_wicket        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ player_dismissed <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ dismissal_kind   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ fielder          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
glimpse(matches)
## Rows: 1,095
## Columns: 20
## $ id              <dbl> 335982, 335983, 335984, 335985, 335986, 335987, 335988…
## $ season          <chr> "2007/08", "2007/08", "2007/08", "2007/08", "2007/08",…
## $ city            <chr> "Bangalore", "Chandigarh", "Delhi", "Mumbai", "Kolkata…
## $ date            <date> 2008-04-18, 2008-04-19, 2008-04-19, 2008-04-20, 2008-…
## $ match_type      <chr> "League", "League", "League", "League", "League", "Lea…
## $ player_of_match <chr> "BB McCullum", "MEK Hussey", "MF Maharoof", "MV Bouche…
## $ venue           <chr> "M Chinnaswamy Stadium", "Punjab Cricket Association S…
## $ team1           <chr> "Royal Challengers Bangalore", "Kings XI Punjab", "Del…
## $ team2           <chr> "Kolkata Knight Riders", "Chennai Super Kings", "Rajas…
## $ toss_winner     <chr> "Royal Challengers Bangalore", "Chennai Super Kings", …
## $ toss_decision   <chr> "field", "bat", "bat", "bat", "bat", "bat", "bat", "fi…
## $ winner          <chr> "Kolkata Knight Riders", "Chennai Super Kings", "Delhi…
## $ result          <chr> "runs", "runs", "wickets", "wickets", "wickets", "wick…
## $ result_margin   <dbl> 140, 33, 9, 5, 5, 6, 9, 6, 3, 66, 7, 9, 10, 4, 13, 7, …
## $ target_runs     <dbl> 223, 241, 130, 166, 111, 167, 143, 209, 215, 183, 136,…
## $ target_overs    <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20…
## $ super_over      <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",…
## $ method          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ umpire1         <chr> "Asad Rauf", "MR Benson", "Aleem Dar", "SJ Davis", "BF…
## $ umpire2         <chr> "RE Koertzen", "SL Shastri", "GA Pratapkumar", "DJ Har…

Visualization 1: Top 10 Batsmen by Total Runs (Bar Chart)

Description

This bar chart shows the top 10 batsmen in IPL history based on the total number of runs scored. It highlights the most successful run-scorers and allows easy comparison of overall batting performance.

top_batsmen <- deliveries %>%
  group_by(batter) %>%
  summarise(total_runs = sum(batsman_runs, na.rm = TRUE)) %>%
  arrange(desc(total_runs)) %>%
  slice_head(n = 10)

ggplot(top_batsmen, aes(x = reorder(batter, total_runs), y = total_runs)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Top 10 IPL Batsmen by Total Runs",
    x = "Batsman",
    y = "Total Runs"
  ) +
  theme_minimal()

Visualization 2: Top 10 Bowlers by Total Wickets (Bar Chart)

Description

This bar chart displays the top 10 bowlers in IPL history by total wickets taken.
It helps identify the most impactful bowlers across all seasons.

top_bowlers <- deliveries %>%
  filter(is_wicket == 1) %>%
  group_by(bowler) %>%
  summarise(total_wickets = n()) %>%
  arrange(desc(total_wickets)) %>%
  slice_head(n = 10)

ggplot(top_bowlers, aes(x = reorder(bowler, total_wickets), y = total_wickets)) +
  geom_col(fill = "darkred") +
  coord_flip() +
  labs(
    title = "Top 10 IPL Bowlers by Total Wickets",
    x = "Bowler",
    y = "Total Wickets"
  ) +
  theme_minimal()

Visualization 3: Virat Kohli’s Runs per Season (Line Plot)

Description

This line plot shows Virat Kohli’s total runs scored in each IPL season.
It illustrates changes in performance over time and highlights consistency and peak seasons.

kohli_season <- deliveries %>%
  inner_join(matches, by = c("match_id" = "id")) %>%
  filter(batter == "V Kohli") %>%
  group_by(season) %>%
  summarise(season_runs = sum(batsman_runs, na.rm = TRUE))

ggplot(kohli_season, aes(x = season, y = season_runs, group = 1)) +
  geom_line(color = "blue") +
  geom_point(color = "blue") +
  labs(
    title = "Virat Kohli: Runs Scored per IPL Season",
    x = "Season",
    y = "Total Runs"
  ) +
  theme_minimal()

Visualization 4: Total IPL Runs per Season (Line Plot)

Description

This visualization shows the total number of runs scored across all matches in each IPL season.
It provides insight into how scoring trends have changed over time in the league.

season_runs <- deliveries %>%
  inner_join(matches, by = c("match_id" = "id")) %>%
  group_by(season) %>%
  summarise(total_runs = sum(total_runs, na.rm = TRUE))

ggplot(season_runs, aes(x = season, y = total_runs, group = 1)) +
  geom_line(color = "darkgreen") +
  geom_point(color = "darkgreen") +
  labs(
    title = "Total Runs Scored per IPL Season",
    x = "Season",
    y = "Total Runs"
  ) +
  theme_minimal()

Visualization 5: Distribution of Runs per Innings (Box Plot)

Description

This box plot shows the distribution of runs scored per innings across all IPL matches. It highlights the median performance, spread, and presence of high-scoring innings

innings_runs <- deliveries %>%
  group_by(match_id, inning) %>%
  summarise(runs = sum(total_runs, na.rm = TRUE))
## `summarise()` has grouped output by 'match_id'. You can override using the
## `.groups` argument.
ggplot(innings_runs, aes(y = runs)) +
  geom_boxplot(fill = "orange") +
  labs(
    title = "Distribution of Runs per Innings in IPL",
    y = "Runs per Innings",
    x = ""
  ) +
  theme_minimal()

Visualization 6: Batter Strike Rate vs Average (Scatter Plot)

Description

This scatter plot compares batting average and strike rate for IPL batsmen.
It helps identify aggressive versus consistent batters.

batting_stats <- deliveries %>%
  group_by(batter) %>%
  summarise(
    runs = sum(batsman_runs),
    balls = n(),
    outs = sum(is_wicket)
  ) %>%
  filter(outs > 20) %>%
  mutate(
    average = runs / outs,
    strike_rate = (runs / balls) * 100
  )

ggplot(batting_stats, aes(x = average, y = strike_rate)) +
  geom_point(alpha = 0.5, color = "purple") +
  labs(
    title = "Batting Average vs Strike Rate (IPL)",
    x = "Batting Average",
    y = "Strike Rate"
  ) +
  theme_minimal()

Visualization 7: Toss Decision vs Match Wins (Bar Chart)

Description

This bar chart shows how often teams win matches based on their toss decision (bat or field).
It provides insight into whether toss strategy impacts match outcomes.

toss_results <- matches %>%
  mutate(toss_win = toss_winner == winner) %>%
  group_by(toss_decision) %>%
  summarise(wins = sum(toss_win, na.rm = TRUE))

ggplot(toss_results, aes(x = toss_decision, y = wins)) +
  geom_col(fill = "darkcyan") +
  labs(
    title = "Match Wins Based on Toss Decision",
    x = "Toss Decision",
    y = "Number of Wins"
  ) +
  theme_minimal()

Visualization 8 (REQUIRED): Interactive Plot – Top Batsmen Runs

Description

This interactive bar chart allows users to hover over bars to see exact run totals.
Interactivity improves engagement and allows more precise exploration of player performance.

p <- ggplot(top_batsmen, aes(x = reorder(batter, total_runs), y = total_runs)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Top 10 IPL Batsmen by Total Runs (Interactive)",
    x = "Batsman",
    y = "Total Runs"
  ) +
  theme_minimal()

ggplotly(p)