Load required libraries

library(ggplot2movies) library(dplyr)

Load the movie dataset

data(movies)

1. Range of Years of Production

oldest_movie <- min(movies\(year, na.rm = TRUE) newest_movie <- max(movies\)year, na.rm = TRUE) cat(“Oldest Movie:”, oldest_movie, “”) cat(“Newest Movie:”, newest_movie, “”)

2. Proportion of Movies with Budget and Top 5 Most Expensive Movies

movies_with_budget <- sum(!is.na(movies\(budget)) movies_without_budget <- sum(is.na(movies\)budget)) proportion_with_budget <- movies_with_budget / nrow(movies) proportion_without_budget <- movies_without_budget / nrow(movies)

cat(“Proportion with Budget:”, proportion_with_budget, “”) cat(“Proportion without Budget:”, proportion_without_budget, “”)

top_expensive_movies <- head(movies[order(-movies$budget), ], 5) cat(“Top 5 Most Expensive Movies:”) print(top_expensive_movies)

3. Top 5 Longest Movies

top_longest_movies <- head(movies[order(-movies$length), ], 5) cat(“Top 5 Longest Movies:”) print(top_longest_movies)

4. Shortest and Longest Short Movies

short_movies <- movies[movies\(mpaa == "R", ] shortest_short_movie <- min(short_movies\)length, na.rm = TRUE) longest_short_movie <- max(short_movies$length, na.rm = TRUE)

cat(“Shortest Short Movie:”, shortest_short_movie, “minutes”) cat(“Longest Short Movie:”, longest_short_movie, “minutes”)

5. Number of Movies in Each Genre (Bar Plot)

genre_counts <- colSums(movies[, c(“Action”, “Animation”, “Comedy”, “Drama”, “Documentary”, “Romance”, “Short”)]) barplot(genre_counts, names.arg = names(genre_counts), xlab = “Genre”, ylab = “Number of Movies”)

6. Average Rating of Movies Within Each Genre (Bar Plot)

genre_avg_ratings <- sapply(names(genre_counts), function(genre) { mean(movies[movies[, genre] == 1, ]$rating, na.rm = TRUE) }) barplot(genre_avg_ratings, names.arg = names(genre_avg_ratings), xlab = “Genre”, ylab = “Average Rating”)

7. Average Rating of Movies Within Each Genre (2000-2005)

genre_avg_ratings_2000_2005 <- sapply(names(genre_counts), function(genre) { mean(movies[movies\(year >= 2000 & movies\)year <= 2005 & movies[, genre] == 1, ]$rating, na.rm = TRUE) }) barplot(genre_avg_ratings_2000_2005, names.arg = names(genre_avg_ratings_2000_2005), xlab = “Genre”, ylab = “Average Rating (2000-2005)”)

8. Number of Movies in Each Genre by Year (Line Plot)

movies\(year <- as.numeric(movies\)year) movies_sub <- movies[movies$year >= 1990, ] genre_counts_by_year <- aggregate(. ~ year, data = movies_sub[, c(“year”, “Action”, “Animation”, “Comedy”, “Drama”, “Documentary”, “Romance”)], sum)

plot(genre_counts_by_year\(year, genre_counts_by_year\)Action, type = “l”, col = “red”, xlab = “Year”, ylab = “Number of Movies”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Animation, col = “blue”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Comedy, col = “green”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Drama, col = “purple”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Documentary, col = “orange”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Romance, col = “pink”)

9. Custom Questions

Question 1: Highest Rated Movies by Genre

highest_rated_by_genre <- function() { sapply(names(genre_counts), function(genre) { top_movie <- movies[movies[, genre] == 1, ][which.max(movies[movies[, genre] == 1, ]\(rating), ] cat("Genre:", genre, "- Movie:", top_movie\)title, “- Rating:”, top_movie$rating, “”) }) }

Question 2: Highest Rated Movie of 1997

highest_rated_movie_1997 <- function() { top_movie_1997 <- movies[movies\(year == 1997, ][which.max(movies[movies\)year == 1997, ]\(rating), ] cat("Highest Rated Movie of 1997:", top_movie_1997\)title, “- Rating:”, top_movie_1997$rating, “”) }

Question 3: Year with the Highest Average Rating

year_with_highest_avg_rating <- function() { year_with_highest_avg_rating <- aggregate(rating ~ year, data = movies, FUN = mean, na.rm = TRUE) max_avg_rating_year <- year_with_highest_avg_rating[which.max(year_with_highest_avg_rating\(rating), ] cat("Year with the highest average rating:", max_avg_rating_year\)year, “- Average Rating:”, max_avg_rating_year$rating, “”) }

Call the functions for custom questions

cat(“Question 1: Highest Rated Movies by Genre”) highest_rated_by_genre()

cat(“Question 2: Highest Rated Movie of 1997”) highest_rated_movie_1997()

cat(“Question 3: Year with the Highest Average Rating”) year_with_highest_avg_rating()

Save the R Markdown file

rmarkdown::render(“C:/Users/Patrick/OneDrive/Documents/movie_analysis.Rmd”)

Convert the HTML output to PDF

rmarkdown::render(“C:/Users/Patrick/OneDrive/Documents/movie_analysis.Rmd”, output_format = “pdf_document”)