library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Load IPL matches dataset
matches <- read.csv("matches.csv")
# Load IPL deliveries dataset
deliveries <- read.csv("deliveries.csv")
# Calculate total runs for each match
total_runs_by_match <- deliveries %>%
group_by(match_id) %>%
summarise(total_runs = sum(total_runs))
## `summarise()` ungrouping output (override with `.groups` argument)
# Add match_id to matches dataset
matches <- matches %>%
mutate(match_id = id)
# Join total runs with matches dataset
matches <- matches %>%
left_join(total_runs_by_match, by = "match_id")
# Find the match with the highest score
highest_score_match <- matches %>%
filter(total_runs == max(total_runs))
# Extract the highest score
highest_score <- highest_score_match$total_runs
# Create a histogram of score distribution
ggplot(matches, aes(x = total_runs)) +
geom_histogram(bins = 20, fill = "blue", color = "black") +
labs(title = "IPL Score Distribution",
x = "Total Runs", y = "Frequency") +
theme_minimal()

# Print the results
cat("Highest score in IPL:", highest_score, "\n")
## Highest score in IPL: 469