library(dplyr) library(readr) movies <- read_csv(“https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv”)
#1. rename(): (4 points) #Rename the “Film” column to “movie_title” and “Year” to “release_year”.
q1 <- movies %>% rename(movie_title = Film, release_year = Year) head(q1)
#2. select(): (4 points) #Create a new dataframe with only the columns: movie_title, release_year, Genre, Profitability,
q2 <- q1 %>% select(movie_title, release_year, Genre, Profitability) head(q2)
#3. filter(): (4 points) #Filter the dataset to include only movies released after 2000 with a Rotten Tomatoes % higher than 80.
q3 <- q1 %>% filter(release_year > 2000,
Rotten Tomatoes %
> 80) head(q3)
#4. mutate(): (4 points) #Add a new column called “Profitability_millions” that converts the Profitability to millions of dollars.
q3_Cleaned <- q3 %>% mutate(Worldwide Gross
=
as.numeric(gsub(“[$,]”, ““, Worldwide Gross
)),
Profitability = as.numeric(Profitability))
q4 <- q3_Cleaned %>% mutate(Profitability_millions =
Profitability * Worldwide Gross
)
(head(q4))
#5. arrange(): (3 points) #Sort the filtered dataset by Rotten Tomatoes % in descending order, and then by Profitability in descending order. five <- four %>% arrange(desc(Rotten Tomatoes %) , desc(Profitability_millions))
q5 <- q4 %>% arrange(desc(Rotten Tomatoes %
),
desc(Profitability_millions))
head(q5)
#6. Combining functions: (3 points) #Use the pipe operator (%>%) to chain these operations together, starting with the original dataset and ending with a final dataframe that incorporates all the above transformations.
q6 <- movies %>% rename(movie_title = Film, release_year =
Year) %>% select(movie_title, release_year, Genre, Profitability,
Rotten Tomatoes %
, Worldwide Gross
) %>%
mutate(Worldwide Gross
= as.numeric(gsub(“[$,]”, ““,
Worldwide Gross
)),
Profitability = as.numeric(Profitability)) %>%
mutate(Profitability_millions = Profitability *
Worldwide Gross
) %>% filter(release_year > 2000,
Rotten Tomatoes %
> 80) %>%
arrange(desc(Rotten Tomatoes %
),
desc(Profitability_millions)) (head(q6))
#7. Interpret question 6 (1 point) #From the resulting data, are the best movies the most popular? “Movies with the highest Rotten Tomatoes scores, like WALL-E and Midnight in Paris, are often among the most critically acclaimed, but they don’t always generate the highest profits. In this dataset, films such as Waitress, which has a lower Rotten Tomatoes score, demonstrate significantly greater profitability, highlighting that critical praise does not always align with commercial success.”
#EXTRA CREDIT (4 points) Create a summary data-frame that shows the average rating and Profitability_millions for movies by Genre. Hint: You’ll need to use group_by() and summarize().
XTRA_cleaned <- movies %>% rename(movie_title = Film,
release_year = Year) %>% mutate(Worldwide Gross
=
as.numeric(gsub(“[$,]”, ““, Worldwide Gross
)),
Profitability = as.numeric(Profitability)) %>%
mutate(Profitability_millions = Profitability *
Worldwide Gross
)
XTRA <- XTRA_cleaned %>% group_by(Genre) %>% summarize(
average_rating = mean(Rotten Tomatoes %
, na.rm = TRUE),
average_profitability_millions = mean(Profitability_millions, na.rm =
TRUE) )
print(head(XTRA))