Load Packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(vip)
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
library(fastshap)
## Warning: package 'fastshap' was built under R version 4.0.3
##
## Attaching package: 'fastshap'
## The following object is masked from 'package:vip':
##
## gen_friedman
## The following object is masked from 'package:dplyr':
##
## explain
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
library(tree)
## Warning: package 'tree' was built under R version 4.0.3
## Registered S3 method overwritten by 'tree':
## method from
## print.tree cli
library(imputeTS)
## Warning: package 'imputeTS' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
Aggregate
month.df <- births.df %>%
group_by(year, month) %>%
summarize(sum_units = sum(num_births))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
head(month.df)
## # A tibble: 6 x 3
## # Groups: year [1]
## year month sum_units
## <chr> <chr> <dbl>
## 1 1977 01 8000
## 2 1977 02 7446
## 3 1977 03 8682
## 4 1977 04 8477
## 5 1977 05 8683
## 6 1977 06 8163
# I chose to index by month each year in order to understand seasonality within each year. We can see from the plot that there is a dip in births in the winter and a rise in births peaking in the spring, coming down marginally but holding for the summer months.
# I chose to aggregate by total in each month to help craft that seasonal picture a bit more clearly.
Create ts object for aggregated data and plot
birth.month.ts <- ts(month.df, start = c(1977, 1), frequency = 12)
plot(birth.month.ts)
