# this line specifies options for default options for all R Chunks
knitr::opts_chunk$set(echo=T, highlight=T)
## Setup ====
# suppress scientific notation
options(scipen=100)
# install and load packages we'll need
if (!require("pacman")) install.packages("pacman", repos = "http://cran.us.r-project.org")
## Loading required package: pacman
p_load(tidyverse, ggthemes, magrittr, lubridate)
# tidyverse - a large suite of packages that work together
# ggthemes - smaller add-on for tidyverse graphics package, ggplot2
# magrittr - needed for piping
# lubridate - needed for dealing with dates
# verify packages
# remove # in front of library if needed
# library()
# run full R Script file
source("HW4_Ari_Cohen.R")
# call mojo_tidy function
movies_2018 <- mojo_tidy(in_data="mojo_2018.csv")
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
movies_2019 <- mojo_tidy(in_data="mojo_2019.csv")
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
movies_2020 <- mojo_tidy(in_data="mojo_2020.csv")
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
movies_20211003 <- mojo_tidy(in_data="mojo_20211003.csv")
# use bind_rows command to stack these datasets into one
movies_2019_2021 <- bind_rows(movies_20211003,
movies_2020,
movies_2019,
movies_2018 ) |>
glimpse()
## Rows: 1,365
## Columns: 11
## $ date <date> 2021-10-03, 2021-10-02, 2021-10-01, 2021-09-30, 2021-0…
## $ month <ord> Oct, Oct, Oct, Sep, Sep, Sep, Sep, Sep, Sep, Sep, Sep, …
## $ day <ord> Sun, Sat, Fri, Thu, Wed, Tue, Mon, Sun, Sat, Fri, Thu, …
## $ day_num <int> 276, 275, 274, 273, 272, 271, 270, 269, 268, 267, 266, …
## $ num_releases <int> 19, 20, 20, 29, 29, 29, 29, 29, 29, 29, 32, 32, 32, 31,…
## $ num1_release <chr> "Venom: Let There Be Carnage", "Venom: Let There Be Car…
## $ num1_gross <dbl> 21210000, 31600000, 37290000, 782384, 961808, 1175926, …
## $ top10_gross <dbl> 30396547, 44964330, 46917120, 1937236, 2405897, 3065418…
## $ num1_pct_gross <dbl> 69.77766, 70.27793, 79.48058, 40.38661, 39.97711, 38.36…
## $ pct_chg_day <dbl> -32.4, -4.2, 2321.9, -19.5, -21.5, 27.7, -71.3, -40.5, …
## $ pct_chg_wk <dbl> 263.1, 219.7, 357.1, -25.4, -14.6, -16.2, -16.5, -19.4,…
# addition for Lecture 11
# use bind_rows command to stack these datasets into one
movies_2018_2021 <- bind_rows(movies_20211003,
movies_2020,
movies_2019,
movies_2018) |>
glimpse() |>
filter(date <= "2021-08-30") |>
write_csv("movie_gross_by_day_2018-2021_08_30")
## Rows: 1,365
## Columns: 11
## $ date <date> 2021-10-03, 2021-10-02, 2021-10-01, 2021-09-30, 2021-0…
## $ month <ord> Oct, Oct, Oct, Sep, Sep, Sep, Sep, Sep, Sep, Sep, Sep, …
## $ day <ord> Sun, Sat, Fri, Thu, Wed, Tue, Mon, Sun, Sat, Fri, Thu, …
## $ day_num <int> 276, 275, 274, 273, 272, 271, 270, 269, 268, 267, 266, …
## $ num_releases <int> 19, 20, 20, 29, 29, 29, 29, 29, 29, 29, 32, 32, 32, 31,…
## $ num1_release <chr> "Venom: Let There Be Carnage", "Venom: Let There Be Car…
## $ num1_gross <dbl> 21210000, 31600000, 37290000, 782384, 961808, 1175926, …
## $ top10_gross <dbl> 30396547, 44964330, 46917120, 1937236, 2405897, 3065418…
## $ num1_pct_gross <dbl> 69.77766, 70.27793, 79.48058, 40.38661, 39.97711, 38.36…
## $ pct_chg_day <dbl> -32.4, -4.2, 2321.9, -19.5, -21.5, 27.7, -71.3, -40.5, …
## $ pct_chg_wk <dbl> 263.1, 219.7, 357.1, -25.4, -14.6, -16.2, -16.5, -19.4,…
# plotting 1 year of data
(gross_2019 <- mojo_plot(in_data=movies_2019, yr="2019"))
# using same function to plot 3 years of data.
(gross_2019_2021 <- mojo_plot(in_data=movies_2019_2021, yr="2019 - 2021"))
# add code to run mojo_plot_longer here
# using same function to plot 3 years of data.
(gross_2018_2021 <- mojo_plot_longer(in_data=movies_2018_2021,
yrs= "2018 - 2021",
increment= "3 months"))
# summarize our data by month and then filter out Sep and Oct of this year
# BLS data only available until Aug of this year
movies_mnth_smry <- mojo_smry(movies_2018_2021) |>
filter(!c(year==2021 & month %in% c("Sep", "Oct")))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
# import bls data, skip firt 11 rows, suppress message
cpi <- read_csv("cpi_urban.csv", skip=11, show_col_types = FALSE) |>
# drop half columns
select(Year:Dec) |>
# filter data to time frame of choice
filter(Year >= 2018) |>
# stretch out data to have one observation per row.
pivot_longer(cols=Jan:Dec, names_to="month", values_to="cpi_urban") |>
# remove rows with NAs (data hasn't been reported yet.)
filter(!is.na(cpi_urban))
cpi_urban<-bls_tidy(in_data=“cpi_urban.csv”, skip_rows= 11, start_yr= 2018, var_nm= “cpi_urban”)
unemp <- bls_tidy(in_data=“unemp.csv”, skip_rows=11, start_yr=2018, var_nm=“unemp”)
imports <- bls_tidy(in_data=“imports.csv”, skip_rows=10, start_yr=2018, var_nm=“imports”)
exports <- bls_tidy(in_data=“exports.csv”, skip_rows=10, start_yr=2018, var_nm=“exports”)
bls_data <- full_join(cpi_urban, unemp, by=c(“Year”, “month”)) |> rename(“year” = “Year”)
full_join(imports, by=c(“Year”, “month”)) |>
full_join(exports, by=c(“Year”, “month”)) |>
rename(“year” = “Year”)
movies_bls <- full_join(bls_data,
movies_mnth_smry, by=c("year", "month")) |>
mutate(date = paste0(year, month, "_01"),
date = ymd(date)) |>
relocate(date, .before=year) |>
glimpse() |>
write_csv("HW4_Ari_Cohen_tidy_data.csv")