Untitled.knit

library(dplyr) library(readr) movies <- read_csv(“https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv”)

#1. rename(): (4 points) #Rename the “Film” column to “movie_title” and “Year” to “release_year”.

q1 <- movies %>% rename(movie_title = Film, release_year = Year) head(q1)

#2. select(): (4 points) #Create a new dataframe with only the columns: movie_title, release_year, Genre, Profitability,

q2 <- q1 %>% select(movie_title, release_year, Genre, Profitability) head(q2)

#3. filter(): (4 points) #Filter the dataset to include only movies released after 2000 with a Rotten Tomatoes % higher than 80.

q3 <- q1 %>% filter(release_year > 2000, Rotten Tomatoes % > 80) head(q3)

#4. mutate(): (4 points) #Add a new column called “Profitability_millions” that converts the Profitability to millions of dollars.

q3_Cleaned <- q3 %>% mutate(Worldwide Gross = as.numeric(gsub(“[$,]”, ““, Worldwide Gross)), Profitability = as.numeric(Profitability))

q4 <- q3_Cleaned %>% mutate(Profitability_millions = Profitability * Worldwide Gross)

(head(q4))

#5. arrange(): (3 points) #Sort the filtered dataset by Rotten Tomatoes % in descending order, and then by Profitability in descending order. five <- four %>% arrange(desc(Rotten Tomatoes %) , desc(Profitability_millions))

q5 <- q4 %>% arrange(desc(Rotten Tomatoes %), desc(Profitability_millions))

head(q5)

#6. Combining functions: (3 points) #Use the pipe operator (%>%) to chain these operations together, starting with the original dataset and ending with a final dataframe that incorporates all the above transformations.

q6 <- movies %>% rename(movie_title = Film, release_year = Year) %>% select(movie_title, release_year, Genre, Profitability, Rotten Tomatoes %, Worldwide Gross) %>% mutate(Worldwide Gross = as.numeric(gsub(“[$,]”, ““, Worldwide Gross)),
Profitability = as.numeric(Profitability)) %>% mutate(Profitability_millions = Profitability * Worldwide Gross) %>% filter(release_year > 2000, Rotten Tomatoes % > 80) %>% arrange(desc(Rotten Tomatoes %), desc(Profitability_millions)) (head(q6))

#7. Interpret question 6 (1 point) #From the resulting data, are the best movies the most popular? “Movies with the highest Rotten Tomatoes scores, like WALL-E and Midnight in Paris, are often among the most critically acclaimed, but they don’t always generate the highest profits. In this dataset, films such as Waitress, which has a lower Rotten Tomatoes score, demonstrate significantly greater profitability, highlighting that critical praise does not always align with commercial success.”

#EXTRA CREDIT (4 points) Create a summary data-frame that shows the average rating and Profitability_millions for movies by Genre. Hint: You’ll need to use group_by() and summarize().

XTRA_cleaned <- movies %>% rename(movie_title = Film, release_year = Year) %>% mutate(Worldwide Gross = as.numeric(gsub(“[$,]”, ““, Worldwide Gross)),
Profitability = as.numeric(Profitability)) %>% mutate(Profitability_millions = Profitability * Worldwide Gross)

XTRA <- XTRA_cleaned %>% group_by(Genre) %>% summarize( average_rating = mean(Rotten Tomatoes %, na.rm = TRUE), average_profitability_millions = mean(Profitability_millions, na.rm = TRUE) )

print(head(XTRA))