TP Session ID: bua455f21

# this line specifies options for default options for all R Chunks
knitr::opts_chunk$set(echo=T, highlight=T)

## Setup ====

# suppress scientific notation
options(scipen=100)

# install and load packages we'll need
if (!require("pacman")) install.packages("pacman", repos = "http://cran.us.r-project.org")

## Loading required package: pacman

p_load(tidyverse, ggthemes, magrittr, lubridate)

# tidyverse - a large suite of packages that work together
# ggthemes - smaller add-on for tidyverse graphics package, ggplot2
# magrittr - needed for piping
# lubridate - needed for dealing with dates

# verify packages 
# remove # in front of library if needed
# library()

# run full R Script file
source("HW4_Ari_Cohen.R")

# call mojo_tidy function

movies_2018 <- mojo_tidy(in_data="mojo_2018.csv")

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

movies_2019 <- mojo_tidy(in_data="mojo_2019.csv")

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

movies_2020 <- mojo_tidy(in_data="mojo_2020.csv")

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

movies_20211003 <- mojo_tidy(in_data="mojo_20211003.csv")


# use bind_rows command to stack these datasets into one
movies_2019_2021 <- bind_rows(movies_20211003, 
                              movies_2020, 
                              movies_2019,
                              movies_2018 ) |>
  
  glimpse()

## Rows: 1,365
## Columns: 11
## $ date           <date> 2021-10-03, 2021-10-02, 2021-10-01, 2021-09-30, 2021-0…
## $ month          <ord> Oct, Oct, Oct, Sep, Sep, Sep, Sep, Sep, Sep, Sep, Sep, …
## $ day            <ord> Sun, Sat, Fri, Thu, Wed, Tue, Mon, Sun, Sat, Fri, Thu, …
## $ day_num        <int> 276, 275, 274, 273, 272, 271, 270, 269, 268, 267, 266, …
## $ num_releases   <int> 19, 20, 20, 29, 29, 29, 29, 29, 29, 29, 32, 32, 32, 31,…
## $ num1_release   <chr> "Venom: Let There Be Carnage", "Venom: Let There Be Car…
## $ num1_gross     <dbl> 21210000, 31600000, 37290000, 782384, 961808, 1175926, …
## $ top10_gross    <dbl> 30396547, 44964330, 46917120, 1937236, 2405897, 3065418…
## $ num1_pct_gross <dbl> 69.77766, 70.27793, 79.48058, 40.38661, 39.97711, 38.36…
## $ pct_chg_day    <dbl> -32.4, -4.2, 2321.9, -19.5, -21.5, 27.7, -71.3, -40.5, …
## $ pct_chg_wk     <dbl> 263.1, 219.7, 357.1, -25.4, -14.6, -16.2, -16.5, -19.4,…

# addition for Lecture 11
# use bind_rows command to stack these datasets into one
movies_2018_2021 <- bind_rows(movies_20211003, 
                              movies_2020, 
                              movies_2019, 
                              movies_2018) |>
  
  glimpse() |>
  
  filter(date <= "2021-08-30") |> 
  
  write_csv("movie_gross_by_day_2018-2021_08_30")

## Rows: 1,365
## Columns: 11
## $ date           <date> 2021-10-03, 2021-10-02, 2021-10-01, 2021-09-30, 2021-0…
## $ month          <ord> Oct, Oct, Oct, Sep, Sep, Sep, Sep, Sep, Sep, Sep, Sep, …
## $ day            <ord> Sun, Sat, Fri, Thu, Wed, Tue, Mon, Sun, Sat, Fri, Thu, …
## $ day_num        <int> 276, 275, 274, 273, 272, 271, 270, 269, 268, 267, 266, …
## $ num_releases   <int> 19, 20, 20, 29, 29, 29, 29, 29, 29, 29, 32, 32, 32, 31,…
## $ num1_release   <chr> "Venom: Let There Be Carnage", "Venom: Let There Be Car…
## $ num1_gross     <dbl> 21210000, 31600000, 37290000, 782384, 961808, 1175926, …
## $ top10_gross    <dbl> 30396547, 44964330, 46917120, 1937236, 2405897, 3065418…
## $ num1_pct_gross <dbl> 69.77766, 70.27793, 79.48058, 40.38661, 39.97711, 38.36…
## $ pct_chg_day    <dbl> -32.4, -4.2, 2321.9, -19.5, -21.5, 27.7, -71.3, -40.5, …
## $ pct_chg_wk     <dbl> 263.1, 219.7, 357.1, -25.4, -14.6, -16.2, -16.5, -19.4,…

# plotting 1 year of data
(gross_2019 <- mojo_plot(in_data=movies_2019, yr="2019"))

# using same function to plot 3 years of data.
(gross_2019_2021 <- mojo_plot(in_data=movies_2019_2021, yr="2019 - 2021"))

# add code to run mojo_plot_longer here
# using same function to plot 3 years of data.
(gross_2018_2021 <- mojo_plot_longer(in_data=movies_2018_2021,
                              yrs= "2018 - 2021",
                              increment= "3 months"))

# summarize our data by month and then filter out Sep and Oct of this year
# BLS data only available until Aug of this year
movies_mnth_smry <- mojo_smry(movies_2018_2021) |>
  
  filter(!c(year==2021 & month %in% c("Sep", "Oct")))

## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

# import bls data, skip firt 11 rows, suppress message
cpi <- read_csv("cpi_urban.csv", skip=11, show_col_types = FALSE) |>
  
# drop half columns
  select(Year:Dec) |>
  
# filter data to time frame of choice  
  filter(Year >= 2018) |>
  
# stretch out data to have one observation per row.
  pivot_longer(cols=Jan:Dec,  names_to="month", values_to="cpi_urban") |>
  
# remove rows with NAs (data hasn't been reported yet.)
  filter(!is.na(cpi_urban))

call new function for cpi_urban dataset

cpi_urban<-bls_tidy(in_data=“cpi_urban.csv”, skip_rows= 11, start_yr= 2018, var_nm= “cpi_urban”)

call new function for unemployment dataset

unemp <- bls_tidy(in_data=“unemp.csv”, skip_rows=11, start_yr=2018, var_nm=“unemp”)

call new function for cpi_urban dataset

imports <- bls_tidy(in_data=“imports.csv”, skip_rows=10, start_yr=2018, var_nm=“imports”)

call new function for unemployment dataset

exports <- bls_tidy(in_data=“exports.csv”, skip_rows=10, start_yr=2018, var_nm=“exports”)

in HW 4 create 2 more function calls

bls_data <- full_join(cpi_urban, unemp, by=c(“Year”, “month”)) |> rename(“year” = “Year”)

full_join(imports, by=c(“Year”, “month”)) |>

full_join(exports, by=c(“Year”, “month”)) |>

rename(“year” = “Year”)

movies_bls <- full_join(bls_data, 
                        movies_mnth_smry, by=c("year", "month")) |>
  
  mutate(date = paste0(year, month, "_01"),
         date = ymd(date)) |>
  
  relocate(date, .before=year) |>
  
  glimpse() |>
  
  write_csv("HW4_Ari_Cohen_tidy_data.csv")

Lecture 9 and 10

Ari Cohen

10/15/2021

TP Session ID: bua455f21

call new function for cpi_urban dataset

call new function for unemployment dataset

call new function for cpi_urban dataset

call new function for unemployment dataset

in HW 4 create 2 more function calls