Author

Mark Suksunpantep

Published

September 15, 2024

Setup

Code
```{r}
#|label: setup

# this line specifies options for default options for all R chunks
knitr::opts_chunk$set(echo=T,  
                      highlight=T)

# suppress scientific notation
options(scipen=100)

# install helper package (pacman) if needed
if (!require("pacman")) install.packages("pacman", repos = "http://lib.stat.cmu.edu/R/CRAN/")
```
Loading required package: pacman
Code
```{r}
# install and load required packages
# pacman should be first package in parentheses and then list others
pacman::p_load(pacman, tidyverse, gridExtra, magrittr, kableExtra)

# verify packages (comment out in finished documents)
p_loaded()
```
 [1] "kableExtra" "magrittr"   "gridExtra"  "lubridate"  "forcats"   
 [6] "stringr"    "dplyr"      "purrr"      "readr"      "tidyr"     
[11] "tibble"     "ggplot2"    "tidyverse"  "pacman"    
  • Create a new chunk and include setup code from instructions here.

  • This is Chunk 1.

HW 3 - Part 1

Chunk 2: Import and Modify Categorical Variables

Code
```{r}
#|label: import and modify categorical variables

# to suppress message about import include show_col_types=F
mojo_23 <- read_csv("data/Box_Office_Mojo_Week3_HW3.csv", show_col_types=F) |>
  glimpse()

# use select with ! to exclude (drop) num1 text variable
mojo_23_mod <- mojo_23 |>
  select(!num1) 

# create factor variables monthF and wkdayF
# levels option specifies order (default is alphabetical)
# labels option specifies appearance of values in data, tables and plots
mojo_23_mod <- mojo_23_mod |>
  mutate(monthF = factor(month,
                         levels=c("Jan", "Feb", "Mar", "Apr", 
                                  "May", "Jun", "Jul", "Aug", 
                                  "Sep", "Oct", "Nov", "Dec")),
           
         wkdayF = factor(day, 
                         levels=c("Mon", "Tue", "Wed", "Thu", 
                                  "Fri", "Sat", "Sun"),
                         labels= c("M", "T", "W", "Th", 
                                   "F", "Sa", "Su"))) |>
  glimpse()

# use examples above create factor variable quartF
# remove # from the three lines below this one once R code is complete

 mojo_23_mod <- mojo_23_mod |>
   mutate(quartF = factor(quart, levels = c(1,2,3,4),
labels = c("1st Qtr", "2nd Qtr", "3rd Qtr", "4th Qtr"))) |>
   glimpse()
```
Rows: 365
Columns: 8
$ date         <date> 2023-12-31, 2023-12-30, 2023-12-29, 2023-12-28, 2023-12-…
$ month        <chr> "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "…
$ day          <chr> "Sun", "Sat", "Fri", "Thu", "Wed", "Tue", "Mon", "Sun", "…
$ quart        <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
$ top10gross   <dbl> 23078184, 40050370, 37348409, 33261609, 33892628, 4178886…
$ num1gross    <dbl> 5208897, 8637841, 8630268, 7988504, 8135639, 8970413, 181…
$ num_releases <dbl> 43, 44, 44, 46, 45, 45, 44, 40, 41, 41, 40, 40, 39, 39, 4…
$ num1         <chr> "Wonka", "Wonka", "Wonka", "Wonka", "Wonka", "Wonka", "Th…
Rows: 365
Columns: 9
$ date         <date> 2023-12-31, 2023-12-30, 2023-12-29, 2023-12-28, 2023-12-…
$ month        <chr> "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "…
$ day          <chr> "Sun", "Sat", "Fri", "Thu", "Wed", "Tue", "Mon", "Sun", "…
$ quart        <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
$ top10gross   <dbl> 23078184, 40050370, 37348409, 33261609, 33892628, 4178886…
$ num1gross    <dbl> 5208897, 8637841, 8630268, 7988504, 8135639, 8970413, 181…
$ num_releases <dbl> 43, 44, 44, 46, 45, 45, 44, 40, 41, 41, 40, 40, 39, 39, 4…
$ monthF       <fct> Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, De…
$ wkdayF       <fct> Su, Sa, F, Th, W, T, M, Su, Sa, F, Th, W, T, M, Su, Sa, F…
Rows: 365
Columns: 10
$ date         <date> 2023-12-31, 2023-12-30, 2023-12-29, 2023-12-28, 2023-12-…
$ month        <chr> "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "…
$ day          <chr> "Sun", "Sat", "Fri", "Thu", "Wed", "Tue", "Mon", "Sun", "…
$ quart        <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
$ top10gross   <dbl> 23078184, 40050370, 37348409, 33261609, 33892628, 4178886…
$ num1gross    <dbl> 5208897, 8637841, 8630268, 7988504, 8135639, 8970413, 181…
$ num_releases <dbl> 43, 44, 44, 46, 45, 45, 44, 40, 41, 41, 40, 40, 39, 39, 4…
$ monthF       <fct> Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, De…
$ wkdayF       <fct> Su, Sa, F, Th, W, T, M, Su, Sa, F, Th, W, T, M, Su, Sa, F…
$ quartF       <fct> 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th…

HW 3 - Part 2

Chunk 3: Modify and Create Numerical Variables

Code
```{r}
#|label: modify and create numerical variables

# provided code has divided top10gross and num1gross by 1 million
# values are rounded to 2 decimal place 

mojo_23_mod <- mojo_23_mod |>
  mutate(top10grossM = (top10gross/1000000) |> round(2),  # change scale and round
          num1grossM = (num1gross/1000000) |> round(2)) |>
  glimpse()
  

# use as.integer to convert number of releases to an integer variable
# calculate num1_pct = num1gross/top10gross * 100
# round num1_pct to two decimal places

 mojo_23_mod <- mojo_23_mod |>
   mutate(num_releases = as.integer(num_releases) ,
          num1_pct = (num1gross/top10gross * 100 ) |> round(2)) |>
   glimpse()
```
Rows: 365
Columns: 12
$ date         <date> 2023-12-31, 2023-12-30, 2023-12-29, 2023-12-28, 2023-12-…
$ month        <chr> "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "…
$ day          <chr> "Sun", "Sat", "Fri", "Thu", "Wed", "Tue", "Mon", "Sun", "…
$ quart        <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
$ top10gross   <dbl> 23078184, 40050370, 37348409, 33261609, 33892628, 4178886…
$ num1gross    <dbl> 5208897, 8637841, 8630268, 7988504, 8135639, 8970413, 181…
$ num_releases <dbl> 43, 44, 44, 46, 45, 45, 44, 40, 41, 41, 40, 40, 39, 39, 4…
$ monthF       <fct> Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, De…
$ wkdayF       <fct> Su, Sa, F, Th, W, T, M, Su, Sa, F, Th, W, T, M, Su, Sa, F…
$ quartF       <fct> 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th…
$ top10grossM  <dbl> 23.08, 40.05, 37.35, 33.26, 33.89, 41.79, 58.55, 17.02, 2…
$ num1grossM   <dbl> 5.21, 8.64, 8.63, 7.99, 8.14, 8.97, 18.15, 5.00, 9.00, 13…
Rows: 365
Columns: 13
$ date         <date> 2023-12-31, 2023-12-30, 2023-12-29, 2023-12-28, 2023-12-…
$ month        <chr> "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "Dec", "…
$ day          <chr> "Sun", "Sat", "Fri", "Thu", "Wed", "Tue", "Mon", "Sun", "…
$ quart        <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
$ top10gross   <dbl> 23078184, 40050370, 37348409, 33261609, 33892628, 4178886…
$ num1gross    <dbl> 5208897, 8637841, 8630268, 7988504, 8135639, 8970413, 181…
$ num_releases <int> 43, 44, 44, 46, 45, 45, 44, 40, 41, 41, 40, 40, 39, 39, 4…
$ monthF       <fct> Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, De…
$ wkdayF       <fct> Su, Sa, F, Th, W, T, M, Su, Sa, F, Th, W, T, M, Su, Sa, F…
$ quartF       <fct> 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th Qtr, 4th…
$ top10grossM  <dbl> 23.08, 40.05, 37.35, 33.26, 33.89, 41.79, 58.55, 17.02, 2…
$ num1grossM   <dbl> 5.21, 8.64, 8.63, 7.99, 8.14, 8.97, 18.15, 5.00, 9.00, 13…
$ num1_pct     <dbl> 22.57, 21.57, 23.11, 24.02, 24.00, 21.47, 31.00, 29.38, 3…

HW 3 - Part 3

Chunk 4: Group and Summarize Data

Code
```{r}
#|label: group and summarize data

# group data by quartF and wkdayF and calculate the following summary statistics for each weekday in each quarter:

# max_num, the maximum number of releases (num_releases)
# mean_num1grossM, mean of the number 1 gross (num1grossM)
# ungroup is NOT required at the end of this portion of code, but you are welcome to add it

 mojo_qtr_smry <- mojo_23_mod |>
   select(quartF, wkdayF, num_releases, num1grossM) |>
   group_by(quartF, wkdayF) |>
   summarize(max_num = max(num_releases, na.rm = T) ,
             mean_num1grossM = mean(num1grossM, na.rm = T) |> round(2) )
```
`summarise()` has grouped output by 'quartF'. You can override using the
`.groups` argument.

HW 3 - Part 4

Chunk 5: Reshape Data to Create a Table

Code
```{r}
#|label: reshape data to create a table

# reshape the data mojo_qtr_smry to a wide format to create a summary table
# the table will have 
# 1 column showing the 4 quarters 
# 1 column for each day of the week
# 4 rows, 1 for each quarter of the year

 mojo_qtr_wide <- mojo_qtr_smry |>
   pivot_wider(id_cols=quartF, names_from=wkdayF, values_from=mean_num1grossM) |>
   rename(Qtr = quartF) 


# use kable to present summary table mojo_qtr_wide
# more complete table formatting in Week 3 lecture notes

 mojo_qtr_wide |>
   kable()
```
Qtr M T W Th F Sa Su
1st Qtr 5.23 4.17 2.71 2.53 14.57 15.29 11.63
2nd Qtr 8.13 7.83 7.70 6.79 27.85 27.14 21.00
3rd Qtr 7.13 7.47 6.41 5.38 16.87 15.87 13.92
4th Qtr 3.57 3.55 2.78 2.89 16.23 13.09 8.64

HW 3 - Part 5

Chunk 6: Reshaping and Plotting Data

Code
```{r reshaping and plotting data}
# reshape data to long format for plot
# the long form of the data will have 
#  1 column for days of the week
#  1 column for the mean values

 mojo_qtr_long <- mojo_qtr_wide |>
   pivot_longer(cols=M:Su, names_to="Day", values_to="mean_num1grossM") |>
   glimpse()


# Day variable is converted to a factor to specify order (levels)
# this code is complete but will only run once code above is completed

 mojo_qtr_long <- mojo_qtr_long|>
   mutate(Day=factor(Day, levels=c("M","T", "W", "Th", "F", "Sa", "Su"))) |>
   glimpse()


# basic barplot code
# aesthetic for barplot: aes(x=Qtr, y=mean_num1grossM, fill=Day)
# in geom_bar, after comma include these options: 
# stat="identity", position="dodge"

 (mojo_qtr_barplot <- mojo_qtr_long |>
   ggplot() +
   geom_bar(aes(x=Qtr, y=mean_num1grossM, fill=Day), stat="identity", position="dodge" ) + 
   theme_classic())


# additional code to format barplot
# this code is complete but will only run once code above is completed

 (mojo_qtr_barplot + 
    theme(legend.position ="bottom") +
    guides(fill = guide_legend(nrow = 1)) +
    labs(x="", y="Mean Daily Gross ($M)", 
         title = "Mean Daily Gross of Top Film by Quarter and Day of Week",
         caption = "Data Source: www.boxofficemojo.com") +
    scale_fill_brewer(palette = "Spectral"))

# Code to export final barplot:
#   By default, this code exports the last ggplot created
#   Edit code by replacing firstname and lastname with your 
#     first and last names, respectively.
# ggsave("img/HW3_Barplot_firstname_lastname.png", height=4, width=6) 
```
Rows: 28
Columns: 3
Groups: Qtr [4]
$ Qtr             <fct> 1st Qtr, 1st Qtr, 1st Qtr, 1st Qtr, 1st Qtr, 1st Qtr, …
$ Day             <chr> "M", "T", "W", "Th", "F", "Sa", "Su", "M", "T", "W", "…
$ mean_num1grossM <dbl> 5.23, 4.17, 2.71, 2.53, 14.57, 15.29, 11.63, 8.13, 7.8…
Rows: 28
Columns: 3
Groups: Qtr [4]
$ Qtr             <fct> 1st Qtr, 1st Qtr, 1st Qtr, 1st Qtr, 1st Qtr, 1st Qtr, …
$ Day             <fct> M, T, W, Th, F, Sa, Su, M, T, W, Th, F, Sa, Su, M, T, …
$ mean_num1grossM <dbl> 5.23, 4.17, 2.71, 2.53, 14.57, 15.29, 11.63, 8.13, 7.8…