Brief description of IPL and project goals.
Description of matches.csv and deliveries.csv.
deliveries <- read_csv("deliveries.csv")
## Rows: 260920 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): batting_team, bowling_team, batter, bowler, non_striker, extras_typ...
## dbl (8): match_id, inning, over, ball, batsman_runs, extra_runs, total_runs,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
matches <- read_csv("matches.csv")
## Rows: 1095 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): season, city, match_type, player_of_match, venue, team1, team2, t...
## dbl (4): id, result_margin, target_runs, target_overs
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(deliveries)
## Rows: 260,920
## Columns: 17
## $ match_id <dbl> 335982, 335982, 335982, 335982, 335982, 335982, 33598…
## $ inning <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ batting_team <chr> "Kolkata Knight Riders", "Kolkata Knight Riders", "Ko…
## $ bowling_team <chr> "Royal Challengers Bangalore", "Royal Challengers Ban…
## $ over <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,…
## $ ball <dbl> 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5,…
## $ batter <chr> "SC Ganguly", "BB McCullum", "BB McCullum", "BB McCul…
## $ bowler <chr> "P Kumar", "P Kumar", "P Kumar", "P Kumar", "P Kumar"…
## $ non_striker <chr> "BB McCullum", "SC Ganguly", "SC Ganguly", "SC Gangul…
## $ batsman_runs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 6, 4, 0, 0, 0, 0, 4, 1,…
## $ extra_runs <dbl> 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ total_runs <dbl> 1, 0, 1, 0, 0, 0, 1, 0, 4, 4, 6, 4, 0, 0, 0, 1, 4, 1,…
## $ extras_type <chr> "legbyes", NA, "wides", NA, NA, NA, "legbyes", NA, NA…
## $ is_wicket <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ player_dismissed <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ dismissal_kind <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ fielder <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
glimpse(matches)
## Rows: 1,095
## Columns: 20
## $ id <dbl> 335982, 335983, 335984, 335985, 335986, 335987, 335988…
## $ season <chr> "2007/08", "2007/08", "2007/08", "2007/08", "2007/08",…
## $ city <chr> "Bangalore", "Chandigarh", "Delhi", "Mumbai", "Kolkata…
## $ date <date> 2008-04-18, 2008-04-19, 2008-04-19, 2008-04-20, 2008-…
## $ match_type <chr> "League", "League", "League", "League", "League", "Lea…
## $ player_of_match <chr> "BB McCullum", "MEK Hussey", "MF Maharoof", "MV Bouche…
## $ venue <chr> "M Chinnaswamy Stadium", "Punjab Cricket Association S…
## $ team1 <chr> "Royal Challengers Bangalore", "Kings XI Punjab", "Del…
## $ team2 <chr> "Kolkata Knight Riders", "Chennai Super Kings", "Rajas…
## $ toss_winner <chr> "Royal Challengers Bangalore", "Chennai Super Kings", …
## $ toss_decision <chr> "field", "bat", "bat", "bat", "bat", "bat", "bat", "fi…
## $ winner <chr> "Kolkata Knight Riders", "Chennai Super Kings", "Delhi…
## $ result <chr> "runs", "runs", "wickets", "wickets", "wickets", "wick…
## $ result_margin <dbl> 140, 33, 9, 5, 5, 6, 9, 6, 3, 66, 7, 9, 10, 4, 13, 7, …
## $ target_runs <dbl> 223, 241, 130, 166, 111, 167, 143, 209, 215, 183, 136,…
## $ target_overs <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20…
## $ super_over <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",…
## $ method <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ umpire1 <chr> "Asad Rauf", "MR Benson", "Aleem Dar", "SJ Davis", "BF…
## $ umpire2 <chr> "RE Koertzen", "SL Shastri", "GA Pratapkumar", "DJ Har…
This bar chart shows the top 10 batsmen in IPL history based on the total number of runs scored. It highlights the most successful run-scorers and allows easy comparison of overall batting performance.
top_batsmen <- deliveries %>%
group_by(batter) %>%
summarise(total_runs = sum(batsman_runs, na.rm = TRUE)) %>%
arrange(desc(total_runs)) %>%
slice_head(n = 10)
ggplot(top_batsmen, aes(x = reorder(batter, total_runs), y = total_runs)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 IPL Batsmen by Total Runs",
x = "Batsman",
y = "Total Runs"
) +
theme_minimal()
This bar chart displays the top 10 bowlers in IPL history by total
wickets taken.
It helps identify the most impactful bowlers across all seasons.
top_bowlers <- deliveries %>%
filter(is_wicket == 1) %>%
group_by(bowler) %>%
summarise(total_wickets = n()) %>%
arrange(desc(total_wickets)) %>%
slice_head(n = 10)
ggplot(top_bowlers, aes(x = reorder(bowler, total_wickets), y = total_wickets)) +
geom_col(fill = "darkred") +
coord_flip() +
labs(
title = "Top 10 IPL Bowlers by Total Wickets",
x = "Bowler",
y = "Total Wickets"
) +
theme_minimal()
This line plot shows Virat Kohli’s total runs scored in each IPL
season.
It illustrates changes in performance over time and highlights
consistency and peak seasons.
kohli_season <- deliveries %>%
inner_join(matches, by = c("match_id" = "id")) %>%
filter(batter == "V Kohli") %>%
group_by(season) %>%
summarise(season_runs = sum(batsman_runs, na.rm = TRUE))
ggplot(kohli_season, aes(x = season, y = season_runs, group = 1)) +
geom_line(color = "blue") +
geom_point(color = "blue") +
labs(
title = "Virat Kohli: Runs Scored per IPL Season",
x = "Season",
y = "Total Runs"
) +
theme_minimal()
This visualization shows the total number of runs scored across all
matches in each IPL season.
It provides insight into how scoring trends have changed over time in
the league.
season_runs <- deliveries %>%
inner_join(matches, by = c("match_id" = "id")) %>%
group_by(season) %>%
summarise(total_runs = sum(total_runs, na.rm = TRUE))
ggplot(season_runs, aes(x = season, y = total_runs, group = 1)) +
geom_line(color = "darkgreen") +
geom_point(color = "darkgreen") +
labs(
title = "Total Runs Scored per IPL Season",
x = "Season",
y = "Total Runs"
) +
theme_minimal()
This box plot shows the distribution of runs scored per innings across all IPL matches. It highlights the median performance, spread, and presence of high-scoring innings
innings_runs <- deliveries %>%
group_by(match_id, inning) %>%
summarise(runs = sum(total_runs, na.rm = TRUE))
## `summarise()` has grouped output by 'match_id'. You can override using the
## `.groups` argument.
ggplot(innings_runs, aes(y = runs)) +
geom_boxplot(fill = "orange") +
labs(
title = "Distribution of Runs per Innings in IPL",
y = "Runs per Innings",
x = ""
) +
theme_minimal()
This scatter plot compares batting average and strike rate for IPL
batsmen.
It helps identify aggressive versus consistent batters.
batting_stats <- deliveries %>%
group_by(batter) %>%
summarise(
runs = sum(batsman_runs),
balls = n(),
outs = sum(is_wicket)
) %>%
filter(outs > 20) %>%
mutate(
average = runs / outs,
strike_rate = (runs / balls) * 100
)
ggplot(batting_stats, aes(x = average, y = strike_rate)) +
geom_point(alpha = 0.5, color = "purple") +
labs(
title = "Batting Average vs Strike Rate (IPL)",
x = "Batting Average",
y = "Strike Rate"
) +
theme_minimal()
This bar chart shows how often teams win matches based on their toss
decision (bat or field).
It provides insight into whether toss strategy impacts match
outcomes.
toss_results <- matches %>%
mutate(toss_win = toss_winner == winner) %>%
group_by(toss_decision) %>%
summarise(wins = sum(toss_win, na.rm = TRUE))
ggplot(toss_results, aes(x = toss_decision, y = wins)) +
geom_col(fill = "darkcyan") +
labs(
title = "Match Wins Based on Toss Decision",
x = "Toss Decision",
y = "Number of Wins"
) +
theme_minimal()
This interactive bar chart allows users to hover over bars to see
exact run totals.
Interactivity improves engagement and allows more precise exploration of
player performance.
p <- ggplot(top_batsmen, aes(x = reorder(batter, total_runs), y = total_runs)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 IPL Batsmen by Total Runs (Interactive)",
x = "Batsman",
y = "Total Runs"
) +
theme_minimal()
ggplotly(p)