Load Packages

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(vip)        
## 
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
## 
##     vi
library(fastshap)   
## Warning: package 'fastshap' was built under R version 4.0.3
## 
## Attaching package: 'fastshap'
## The following object is masked from 'package:vip':
## 
##     gen_friedman
## The following object is masked from 'package:dplyr':
## 
##     explain
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
library(tree)
## Warning: package 'tree' was built under R version 4.0.3
## Registered S3 method overwritten by 'tree':
##   method     from
##   print.tree cli
library(imputeTS)
## Warning: package 'imputeTS' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

Load data, transform to date class, extract month and year

births.df <- read_csv("C:\\Wake Forest\\BAN7070\\Week 2\\Daily_Birth.csv") %>%
  clean_names() %>%
  mutate(date = as.Date(date, format = "%m/%d/%Y")) %>%
  mutate(month = format(date, "%m")) %>%
  mutate(day = format(date, "%d")) %>%
  mutate(year = format(date, "%Y"))
## Parsed with column specification:
## cols(
##   Date = col_character(),
##   Num_Births = col_double()
## )
?as.Date
## starting httpd help server ...
##  done
head(births.df) 
## # A tibble: 6 x 5
##   date       num_births month day   year 
##   <date>          <dbl> <chr> <chr> <chr>
## 1 1977-01-01        208 01    01    1977 
## 2 1977-01-02        241 01    02    1977 
## 3 1977-01-03        274 01    03    1977 
## 4 1977-01-04        256 01    04    1977 
## 5 1977-01-05        294 01    05    1977 
## 6 1977-01-06        281 01    06    1977

Aggregate

month.df <- births.df %>%
  group_by(year, month) %>%
  summarize(sum_units = sum(num_births))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
head(month.df)   
## # A tibble: 6 x 3
## # Groups:   year [1]
##   year  month sum_units
##   <chr> <chr>     <dbl>
## 1 1977  01         8000
## 2 1977  02         7446
## 3 1977  03         8682
## 4 1977  04         8477
## 5 1977  05         8683
## 6 1977  06         8163
# I chose to index by month each year in order to understand seasonality within each year.  We can see from the plot that there is a dip in births in the winter and a rise in births peaking in the spring, coming down marginally but holding for the summer months.

# I chose to aggregate by total in each month to help craft that seasonal picture a bit more clearly.

Create ts object for aggregated data and plot

birth.month.ts <- ts(month.df, start = c(1977, 1), frequency = 12)
plot(birth.month.ts)