library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(methods)
library(readr)
# Loading the dataset
Movie <- read_csv("Movie.csv", show_col_types = FALSE)
# Converting column names to lowercase
colnames(Movie) <- tolower(colnames(Movie))
# Column names
score_col <- "score"
box_office_col <- "box_office"
genre_col <- "genre"
#Using Methods
setClass("MovieAnalysis",
slots = list(
summary = "ANY",
avg_box_office = "data.frame",
plot = "list"
))
setMethod("show", "MovieAnalysis", function(object) {
cat("Movie Analysis Summary")
print(object@summary)
cat("Average Box Office by Genre ")
print(object@avg_box_office)
cat("\nPlots are stored in object@plot as a list. Use results$data@plot$<name> to view.\n")
})
#The main analysis function
analyze_movies <- function(df, score_col, box_office_col, genre_col,
actions = c("summary", "avg_box_office", "plots"),
save_plots = FALSE) {
required_cols <- c(score_col, box_office_col, genre_col)
missing_cols <- setdiff(required_cols, colnames(df))
if(length(missing_cols) > 0) stop(paste("Missing columns:", paste(missing_cols, collapse=", ")))
# Computing the summary
summary_res <- if("summary" %in% actions) summary(df[[score_col]]) else NULL
# Average box office per genre
avg_box_office_res <- if("avg_box_office" %in% actions) {
res <- aggregate(df[[box_office_col]] ~ df[[genre_col]], FUN = mean)
colnames(res) <- c(genre_col, paste0("avg_", box_office_col))
res
} else NULL
# Generating the plots
plots <- list()
if("plots" %in% actions) {
plots$scatter <- ggplot(df, aes_string(x = score_col, y = box_office_col, color = genre_col)) +
geom_point(size = 3, alpha = 0.7) +
theme_minimal(base_size = 14) +
labs(title = "Scores vs Box Office by Genre",
x = score_col,
y = paste0(box_office_col, " (millions)"))
plots$hist_scores <- ggplot(df, aes_string(x = score_col)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
theme_minimal(base_size = 14) +
labs(title = "Distribution of Scores")
plots$box_office_by_genre <- ggplot(df, aes_string(x = genre_col, y = box_office_col)) +
geom_boxplot(fill = "lightgreen") +
theme_minimal(base_size = 14) +
labs(title = "Box Office by Genre")
plots$avg_box_office_bar <- ggplot(avg_box_office_res, aes_string(x = genre_col, y = paste0("avg_", box_office_col))) +
geom_col(fill = "orange") +
theme_minimal(base_size = 14) +
labs(title = "Average Box Office by Genre")
if(save_plots) {
for(nm in names(plots)) {
ggsave(filename = paste0(nm, ".png"), plot = plots[[nm]], width = 8, height = 6)
}
}
}
#Printing summary and averages
analysis <- new("MovieAnalysis",
summary = summary_res,
avg_box_office = avg_box_office_res,
plot = plots)
results <- list(data = analysis)
class(results) <- "movie_results"
show(analysis)
return(results)
}
# Analysis
results <- analyze_movies(Movie,
score_col = score_col,
box_office_col = box_office_col,
genre_col = genre_col,
save_plots = FALSE)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Movie Analysis Summary Min. 1st Qu. Median Mean 3rd Qu. Max.
## 29.20 45.70 55.25 55.98 64.92 94.60
## Average Box Office by Genre genre avg_box_office
## 1 action 89.968667
## 2 adventure 88.710500
## 3 animated 93.551000
## 4 comedy 58.636051
## 5 documentary 4.577667
## 6 drama 49.414000
## 7 fantasy 169.526667
## 8 horror 39.747455
## 9 musical 4.929000
## 10 rom-comedy 37.867846
## 11 sci-fi 39.438333
## 12 suspense 32.227300
## 13 western 42.615500
##
## Plots are stored in object@plot as a list. Use results$data@plot$<name> to view.
# Plots
results$data@plot$scatter

results$data@plot$hist_scores

results$data@plot$box_office_by_genre

results$data@plot$avg_box_office_bar
