The Indian Premier League (IPL) is one of the most popular cricket tournaments in the world. This project explores patterns and insights from IPL matches using data visualization techniques in R.
The project includes multiple visualization types such as bar charts, line charts, heatmaps, histograms, boxplots, scatter plots, and interactive graphics.
The objectives of this analysis are:
library(tidyverse)
library(ggplot2)
library(plotly)
library(lubridate)
library(viridis)
library(reshape2)
library(scales)
ipl <- read.csv("IPL Matches.csv")
ipl$date <- as.Date(ipl$date)
ipl$year <- year(ipl$date)
ipl_clean <- ipl %>%
filter(!is.na(winner))
str(ipl_clean)
## 'data.frame': 812 obs. of 18 variables:
## $ id : int 335982 335983 335984 335985 335986 335987 335988 335989 335990 335991 ...
## $ city : chr "Bangalore" "Chandigarh" "Delhi" "Mumbai" ...
## $ date : Date, format: "2008-04-18" "2008-04-19" ...
## $ player_of_match: chr "BB McCullum" "MEK Hussey" "MF Maharoof" "MV Boucher" ...
## $ venue : chr "M Chinnaswamy Stadium" "Punjab Cricket Association Stadium, Mohali" "Feroz Shah Kotla" "Wankhede Stadium" ...
## $ neutral_venue : int 0 0 0 0 0 0 0 0 0 0 ...
## $ team1 : chr "Royal Challengers Bangalore" "Kings XI Punjab" "Delhi Daredevils" "Mumbai Indians" ...
## $ team2 : chr "Kolkata Knight Riders" "Chennai Super Kings" "Rajasthan Royals" "Royal Challengers Bangalore" ...
## $ toss_winner : chr "Royal Challengers Bangalore" "Chennai Super Kings" "Rajasthan Royals" "Mumbai Indians" ...
## $ toss_decision : chr "field" "bat" "bat" "bat" ...
## $ winner : chr "Kolkata Knight Riders" "Chennai Super Kings" "Delhi Daredevils" "Royal Challengers Bangalore" ...
## $ result : chr "runs" "runs" "wickets" "wickets" ...
## $ result_margin : int 140 33 9 5 5 6 9 6 3 66 ...
## $ eliminator : chr "N" "N" "N" "N" ...
## $ method : chr NA NA NA NA ...
## $ umpire1 : chr "Asad Rauf" "MR Benson" "Aleem Dar" "SJ Davis" ...
## $ umpire2 : chr "RE Koertzen" "SL Shastri" "GA Pratapkumar" "DJ Harper" ...
## $ year : num 2008 2008 2008 2008 2008 ...
city_matches <- ipl_clean %>%
count(city, sort = TRUE) %>%
top_n(10, n)
p1 <- ggplot(city_matches,
aes(x = reorder(city, n), y = n, fill = n)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 Cities Hosting IPL Matches",
x = "City",
y = "Number of Matches"
) +
theme_minimal() +
scale_fill_viridis_c()
p1
team_wins <- ipl_clean %>%
count(winner, sort = TRUE)
p2 <- ggplot(team_wins,
aes(x = reorder(winner, n), y = n, fill = n)) +
geom_col() +
coord_flip() +
labs(
title = "Most Successful IPL Teams",
x = "Team",
y = "Total Wins"
) +
theme_bw() +
scale_fill_viridis_c()
p2
toss_trend <- ipl_clean %>%
group_by(year, toss_decision) %>%
summarise(matches = n(), .groups = "drop")
p3 <- ggplot(toss_trend,
aes(x = year,
y = matches,
color = toss_decision,
group = toss_decision)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
labs(
title = "Toss Decision Trends Over the Years",
x = "Year",
y = "Number of Matches",
color = "Toss Decision"
) +
theme_minimal()
p3
p4 <- ggplot(ipl_clean,
aes(x = result_margin)) +
geom_histogram(
bins = 30,
fill = "steelblue",
color = "white"
) +
labs(
title = "Distribution of Match Result Margins",
x = "Result Margin",
y = "Frequency"
) +
theme_minimal()
p4
player_awards <- ipl_clean %>%
count(player_of_match, sort = TRUE) %>%
top_n(10, n)
p5 <- ggplot(player_awards,
aes(x = reorder(player_of_match, n),
y = n,
fill = n)) +
geom_col() +
coord_flip() +
labs(
title = "Top Players of the Match Awards",
x = "Player",
y = "Awards Won"
) +
theme_light() +
scale_fill_viridis_c()
p5
venue_margin <- ipl_clean %>%
filter(!is.na(venue)) %>%
group_by(venue) %>%
filter(n() >= 10)
p6 <- ggplot(venue_margin,
aes(x = reorder(venue, result_margin, median),
y = result_margin)) +
geom_boxplot(fill = "orange") +
coord_flip() +
labs(
title = "Distribution of Result Margins by Venue",
x = "Venue",
y = "Result Margin"
) +
theme_minimal()
p6
heat_data <- ipl_clean %>%
count(team1, winner)
p7 <- ggplot(heat_data,
aes(x = team1,
y = winner,
fill = n)) +
geom_tile() +
scale_fill_viridis_c() +
labs(
title = "Team vs Winner Heatmap",
x = "Team 1",
y = "Winning Team",
fill = "Matches"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90))
p7
interactive_plot <- ggplot(team_wins,
aes(x = reorder(winner, n),
y = n,
fill = n,
text = paste("Team:", winner,
"<br>Total Wins:", n))) +
geom_col() +
coord_flip() +
labs(
title = "Interactive IPL Team Wins",
x = "Team",
y = "Wins"
) +
theme_minimal()
ggplotly(interactive_plot, tooltip = "text")
This project explored IPL match data using multiple visualization techniques. The analysis revealed patterns in team performances, venue characteristics, toss strategies, and player achievements.
The use of interactive graphics improved user engagement and allowed deeper exploration of the dataset.
Overall, data visualization provides powerful insights into sports analytics and helps uncover trends hidden within large datasets.