task9-updated.R

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

# Load IPL matches dataset
matches <- read.csv("matches.csv")
# Load IPL deliveries dataset
deliveries <- read.csv("deliveries.csv")

# Calculate total runs for each match
total_runs_by_match <- deliveries %>%
  group_by(match_id) %>%
  summarise(total_runs = sum(total_runs))

## `summarise()` ungrouping output (override with `.groups` argument)

# Add match_id to matches dataset
matches <- matches %>%
  mutate(match_id = id)

# Join total runs with matches dataset
matches <- matches %>%
  left_join(total_runs_by_match, by = "match_id")

# Find the match with the highest score
highest_score_match <- matches %>%
  filter(total_runs == max(total_runs))

# Extract the highest score
highest_score <- highest_score_match$total_runs

# Create a histogram of score distribution
ggplot(matches, aes(x = total_runs)) +
  geom_histogram(bins = 20, fill = "blue", color = "black") +
  labs(title = "IPL Score Distribution",
       x = "Total Runs", y = "Frequency") +
  theme_minimal()

# Print the results
cat("Highest score in IPL:", highest_score, "\n")

## Highest score in IPL: 469

task9-updated.R

rstudio

2024-12-17