library(ggplot2movies) library(dplyr)
data(movies)
oldest_movie <- min(movies\(year, na.rm = TRUE) newest_movie <- max(movies\)year, na.rm = TRUE) cat(“Oldest Movie:”, oldest_movie, “”) cat(“Newest Movie:”, newest_movie, “”)
movies_with_budget <- sum(!is.na(movies\(budget)) movies_without_budget <- sum(is.na(movies\)budget)) proportion_with_budget <- movies_with_budget / nrow(movies) proportion_without_budget <- movies_without_budget / nrow(movies)
cat(“Proportion with Budget:”, proportion_with_budget, “”) cat(“Proportion without Budget:”, proportion_without_budget, “”)
top_expensive_movies <- head(movies[order(-movies$budget), ], 5) cat(“Top 5 Most Expensive Movies:”) print(top_expensive_movies)
top_longest_movies <- head(movies[order(-movies$length), ], 5) cat(“Top 5 Longest Movies:”) print(top_longest_movies)
short_movies <- movies[movies\(mpaa == "R", ] shortest_short_movie <- min(short_movies\)length, na.rm = TRUE) longest_short_movie <- max(short_movies$length, na.rm = TRUE)
cat(“Shortest Short Movie:”, shortest_short_movie, “minutes”) cat(“Longest Short Movie:”, longest_short_movie, “minutes”)
genre_counts <- colSums(movies[, c(“Action”, “Animation”, “Comedy”, “Drama”, “Documentary”, “Romance”, “Short”)]) barplot(genre_counts, names.arg = names(genre_counts), xlab = “Genre”, ylab = “Number of Movies”)
genre_avg_ratings <- sapply(names(genre_counts), function(genre) { mean(movies[movies[, genre] == 1, ]$rating, na.rm = TRUE) }) barplot(genre_avg_ratings, names.arg = names(genre_avg_ratings), xlab = “Genre”, ylab = “Average Rating”)
genre_avg_ratings_2000_2005 <- sapply(names(genre_counts), function(genre) { mean(movies[movies\(year >= 2000 & movies\)year <= 2005 & movies[, genre] == 1, ]$rating, na.rm = TRUE) }) barplot(genre_avg_ratings_2000_2005, names.arg = names(genre_avg_ratings_2000_2005), xlab = “Genre”, ylab = “Average Rating (2000-2005)”)
movies\(year <- as.numeric(movies\)year) movies_sub <- movies[movies$year >= 1990, ] genre_counts_by_year <- aggregate(. ~ year, data = movies_sub[, c(“year”, “Action”, “Animation”, “Comedy”, “Drama”, “Documentary”, “Romance”)], sum)
plot(genre_counts_by_year\(year, genre_counts_by_year\)Action, type = “l”, col = “red”, xlab = “Year”, ylab = “Number of Movies”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Animation, col = “blue”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Comedy, col = “green”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Drama, col = “purple”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Documentary, col = “orange”) lines(genre_counts_by_year\(year, genre_counts_by_year\)Romance, col = “pink”)
highest_rated_by_genre <- function() { sapply(names(genre_counts), function(genre) { top_movie <- movies[movies[, genre] == 1, ][which.max(movies[movies[, genre] == 1, ]\(rating), ] cat("Genre:", genre, "- Movie:", top_movie\)title, “- Rating:”, top_movie$rating, “”) }) }
highest_rated_movie_1997 <- function() { top_movie_1997 <- movies[movies\(year == 1997, ][which.max(movies[movies\)year == 1997, ]\(rating), ] cat("Highest Rated Movie of 1997:", top_movie_1997\)title, “- Rating:”, top_movie_1997$rating, “”) }
year_with_highest_avg_rating <- function() { year_with_highest_avg_rating <- aggregate(rating ~ year, data = movies, FUN = mean, na.rm = TRUE) max_avg_rating_year <- year_with_highest_avg_rating[which.max(year_with_highest_avg_rating\(rating), ] cat("Year with the highest average rating:", max_avg_rating_year\)year, “- Average Rating:”, max_avg_rating_year$rating, “”) }
cat(“Question 1: Highest Rated Movies by Genre”) highest_rated_by_genre()
cat(“Question 2: Highest Rated Movie of 1997”) highest_rated_movie_1997()
cat(“Question 3: Year with the Highest Average Rating”) year_with_highest_avg_rating()
rmarkdown::render(“C:/Users/Patrick/OneDrive/Documents/movie_analysis.Rmd”)
rmarkdown::render(“C:/Users/Patrick/OneDrive/Documents/movie_analysis.Rmd”, output_format = “pdf_document”)