library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(methods)
library(readr)

# Loading the dataset
Movie <- read_csv("Movie.csv", show_col_types = FALSE)

# Converting column names to lowercase
colnames(Movie) <- tolower(colnames(Movie))

# Column names
score_col <- "score"
box_office_col <- "box_office"
genre_col <- "genre"

#Using Methods
setClass("MovieAnalysis",
         slots = list(
           summary = "ANY",
           avg_box_office = "data.frame",
           plot = "list"
         ))

setMethod("show", "MovieAnalysis", function(object) {
  cat("Movie Analysis Summary")
  print(object@summary)
  cat("Average Box Office by Genre ")
  print(object@avg_box_office)
  cat("\nPlots are stored in object@plot as a list. Use results$data@plot$<name> to view.\n")
})

#The main analysis function 
analyze_movies <- function(df, score_col, box_office_col, genre_col,
                           actions = c("summary", "avg_box_office", "plots"),
                           save_plots = FALSE) {
  
  required_cols <- c(score_col, box_office_col, genre_col)
  missing_cols <- setdiff(required_cols, colnames(df))
  if(length(missing_cols) > 0) stop(paste("Missing columns:", paste(missing_cols, collapse=", ")))
  
  # Computing the summary
  summary_res <- if("summary" %in% actions) summary(df[[score_col]]) else NULL
  
  # Average box office per genre
  avg_box_office_res <- if("avg_box_office" %in% actions) {
    res <- aggregate(df[[box_office_col]] ~ df[[genre_col]], FUN = mean)
    colnames(res) <- c(genre_col, paste0("avg_", box_office_col))
    res
  } else NULL
  
  # Generating the plots
  plots <- list()
  if("plots" %in% actions) {
    
    plots$scatter <- ggplot(df, aes_string(x = score_col, y = box_office_col, color = genre_col)) +
      geom_point(size = 3, alpha = 0.7) +
      theme_minimal(base_size = 14) +
      labs(title = "Scores vs Box Office by Genre",
           x = score_col,
           y = paste0(box_office_col, " (millions)"))
    
    plots$hist_scores <- ggplot(df, aes_string(x = score_col)) +
      geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
      theme_minimal(base_size = 14) +
      labs(title = "Distribution of Scores")
    
    plots$box_office_by_genre <- ggplot(df, aes_string(x = genre_col, y = box_office_col)) +
      geom_boxplot(fill = "lightgreen") +
      theme_minimal(base_size = 14) +
      labs(title = "Box Office by Genre")
    
    plots$avg_box_office_bar <- ggplot(avg_box_office_res, aes_string(x = genre_col, y = paste0("avg_", box_office_col))) +
      geom_col(fill = "orange") +
      theme_minimal(base_size = 14) +
      labs(title = "Average Box Office by Genre")
    
    if(save_plots) {
      for(nm in names(plots)) {
        ggsave(filename = paste0(nm, ".png"), plot = plots[[nm]], width = 8, height = 6)
      }
    }
  }
  
#Printing summary and averages
analysis <- new("MovieAnalysis",
                  summary = summary_res,
                  avg_box_office = avg_box_office_res,
                  plot = plots)
  
  results <- list(data = analysis)
  class(results) <- "movie_results"
  
show(analysis)
  
return(results)
}

# Analysis
results <- analyze_movies(Movie,
                          score_col = score_col,
                          box_office_col = box_office_col,
                          genre_col = genre_col,
                          save_plots = FALSE)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Movie Analysis Summary   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.20   45.70   55.25   55.98   64.92   94.60 
## Average Box Office by Genre          genre avg_box_office
## 1       action      89.968667
## 2    adventure      88.710500
## 3     animated      93.551000
## 4       comedy      58.636051
## 5  documentary       4.577667
## 6        drama      49.414000
## 7      fantasy     169.526667
## 8       horror      39.747455
## 9      musical       4.929000
## 10  rom-comedy      37.867846
## 11      sci-fi      39.438333
## 12    suspense      32.227300
## 13     western      42.615500
## 
## Plots are stored in object@plot as a list. Use results$data@plot$<name> to view.
# Plots
results$data@plot$scatter

results$data@plot$hist_scores

results$data@plot$box_office_by_genre

results$data@plot$avg_box_office_bar