library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
# Load the movies dataset
movies <- read_csv("https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv")
## Rows: 77 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Film, Genre, Lead Studio, Worldwide Gross
## dbl (4): Audience score %, Profitability, Rotten Tomatoes %, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
1. rename(): (4 points)
Rename the “Film” column to “movie_title” and “Year” to
“release_year”.
q1 <- movies %>%
rename(movie_title = Film, release_year = Year)
head (q1 , 3)
## # A tibble: 3 × 8
## movie_title Genre `Lead Studio` `Audience score %` Profitability
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Zack and Miri Make a Por… Roma… The Weinstei… 70 1.75
## 2 Youth in Revolt Come… The Weinstei… 52 1.09
## 3 You Will Meet a Tall Dar… Come… Independent 35 1.21
## # ℹ 3 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## # release_year <dbl>
2. select(): (4 points)
Create a new dataframe with only the columns: movie_title,
release_year, Genre, Profitability,
q2 <- q1 %>%
select(movie_title, release_year, Genre, Profitability)
head(q2)
## # A tibble: 6 × 4
## movie_title release_year Genre Profitability
## <chr> <dbl> <chr> <dbl>
## 1 Zack and Miri Make a Porno 2008 Romance 1.75
## 2 Youth in Revolt 2010 Comedy 1.09
## 3 You Will Meet a Tall Dark Stranger 2010 Comedy 1.21
## 4 When in Rome 2010 Comedy 0
## 5 What Happens in Vegas 2008 Comedy 6.27
## 6 Water For Elephants 2011 Drama 3.08
3. filter(): (4 points)
Filter the dataset to include only movies released after 2000 with a
Rotten Tomatoes % higher than 80.
q3 <- q1 %>%
select(movie_title, release_year, Genre, Profitability , `Rotten Tomatoes %`) %>%
filter(release_year >2000,
`Rotten Tomatoes %` > 80)
head(q3)
## # A tibble: 6 × 5
## movie_title release_year Genre Profitability `Rotten Tomatoes %`
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 WALL-E 2008 Animati… 2.90 96
## 2 Waitress 2007 Romance 11.1 89
## 3 Tangled 2010 Animati… 1.37 89
## 4 Rachel Getting Married 2008 Drama 1.38 85
## 5 My Week with Marilyn 2011 Drama 0.826 83
## 6 Midnight in Paris 2011 Romence 8.74 93
4. mutate(): (4 points)
Add a new column called “Profitability_millions” that converts the
Profitability to millions of dollars.
q4 <- q3 %>%
mutate(Profitability_millions = Profitability*1000000)
head(q4)
## # A tibble: 6 × 6
## movie_title release_year Genre Profitability `Rotten Tomatoes %`
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 WALL-E 2008 Animati… 2.90 96
## 2 Waitress 2007 Romance 11.1 89
## 3 Tangled 2010 Animati… 1.37 89
## 4 Rachel Getting Married 2008 Drama 1.38 85
## 5 My Week with Marilyn 2011 Drama 0.826 83
## 6 Midnight in Paris 2011 Romence 8.74 93
## # ℹ 1 more variable: Profitability_millions <dbl>
5. arrange(): (3 points)
Sort the filtered dataset by Rotten Tomatoes % in descending order,
and then by Profitability in descending order.
five <- four %>% arrange(desc(Rotten Tomatoes %) ,
desc(Profitability_millions))
q5 <- q4 %>%
arrange(desc(`Rotten Tomatoes %`) , desc(Profitability_millions))
head(q5)
## # A tibble: 6 × 6
## movie_title release_year Genre Profitability `Rotten Tomatoes %`
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 WALL-E 2008 Animation 2.90 96
## 2 Midnight in Paris 2011 Romence 8.74 93
## 3 Enchanted 2007 Comedy 4.01 93
## 4 Knocked Up 2007 Comedy 6.64 91
## 5 Waitress 2007 Romance 11.1 89
## 6 A Serious Man 2009 Drama 4.38 89
## # ℹ 1 more variable: Profitability_millions <dbl>
6. Combining functions: (3 points)
Use the pipe operator (%>%) to chain these operations
together,
7. Interpret question 6 (1 point)
From the resulting data, are the best movies the most popular?
After analyzing the data between profitability, and Rotton Tomatoes
scores, there is no strong correlation between the best and the most
popular movies
.
.