## 'data.frame': 5067 obs. of 5 variables:
## $ region : Factor w/ 15 levels "buffalo_rochester",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ type : Factor w/ 2 levels "conventional",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ date : Factor w/ 169 levels "2015-01-04","2015-01-11",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ price : num 1.4 1.54 1.52 1.5 1.33 1.36 1.44 1.41 1.43 1.35 ...
## $ quantity: num 116 106 107 114 154 ...
## region type date
## buffalo_rochester : 338 conventional:2535 Min. :2015-01-04
## harrisburg_scranton : 338 organic :2532 1st Qu.:2015-10-25
## hartford_springfield: 338 Median :2016-08-14
## indianapolis : 338 Mean :2016-08-13
## los_angeles : 338 3rd Qu.:2017-06-04
## louisville : 338 Max. :2018-03-25
## (Other) :3039
## price quantity
## Min. :0.460 Min. : 0.380
## 1st Qu.:1.110 1st Qu.: 8.992
## Median :1.400 Median : 78.873
## Mean :1.436 Mean : 359.717
## 3rd Qu.:1.730 3rd Qu.: 336.317
## Max. :2.950 Max. :5470.227
##
data %>%
group_by(region) %>%
summarise(
start_date = min(date),
end_date = max(date)
) %>%
mutate(
total_week = difftime(end_date, start_date, units="weeks")
)## # A tibble: 15 x 4
## region start_date end_date total_week
## <fct> <date> <date> <drtn>
## 1 buffalo_rochester 2015-01-04 2018-03-25 168 weeks
## 2 harrisburg_scranton 2015-01-04 2018-03-25 168 weeks
## 3 hartford_springfield 2015-01-04 2018-03-25 168 weeks
## 4 indianapolis 2015-01-04 2018-03-25 168 weeks
## 5 los_angeles 2015-01-04 2018-03-25 168 weeks
## 6 louisville 2015-01-04 2018-03-25 168 weeks
## 7 new_york 2015-01-04 2018-03-25 168 weeks
## 8 orlando 2015-01-04 2018-03-25 168 weeks
## 9 philadelphia 2015-01-04 2018-03-25 168 weeks
## 10 phoenix_tucson 2015-01-04 2018-03-25 168 weeks
## 11 plains 2015-01-04 2018-03-25 168 weeks
## 12 roanoke 2015-01-04 2018-03-25 168 weeks
## 13 san_diego 2015-01-04 2018-03-25 168 weeks
## 14 spokane 2015-01-04 2018-03-25 168 weeks
## 15 west_tex_new_mexico 2015-01-04 2018-03-25 168 weeks
la_conventional <- data %>%
filter(region == 'los_angeles', type == 'conventional')
head(la_conventional)## region type date price quantity
## 1 los_angeles conventional 2015-01-04 0.85 2682.160
## 2 los_angeles conventional 2015-01-11 0.85 2713.700
## 3 los_angeles conventional 2015-01-18 0.89 2800.680
## 4 los_angeles conventional 2015-01-25 0.96 2329.987
## 5 los_angeles conventional 2015-02-01 0.74 4031.949
## 6 los_angeles conventional 2015-02-08 0.90 2641.033
ggplot(la_conventional, aes(x = date, y = quantity)) +
geom_line() +
scale_y_comma() +
labs(
title = "Weekly Sales Demand",
x = NULL,
y = NULL
)+
theme_minimal()# Latest year
data_test <- la_conventional %>%
filter(row_number() > (n()-52))
# The rest
data_train <- la_conventional %>%
filter(row_number() <= (n()-52))
ggplot(la_conventional, aes(x=date, y=quantity)) +
geom_line(data = data_train, col='black') +
geom_line(data = data_test, col = 'darkgrey') +
scale_y_comma() +
labs(
title = "Weekly Sales Demand",
x = NULL,
y = NULL
)+
theme_minimal()Classical Decomposition
data_train %>%
pull(quantity) %>%
ts(start = c(2015,1), frequency = 52) %>%
decompose() %>%
autoplot() +
theme_minimal()Loess Decomposition
data_train %>%
pull(quantity) %>%
ts(start = c(2015,1), frequency = 52) %>%
stl(s.window = "periodic") %>%
autoplot() +
theme_minimal()data_train %>%
pull(quantity) %>%
ts(start = c(2015,1), frequency = 52) %>%
ggseasonplot() +
scale_y_comma() +
labs(
title = "Seasonality Figure",
x = NULL,
y = NULL
)+
theme_minimal()data_train %>%
pull(quantity) %>%
ts(start = c(2015,1), frequency = 52) %>%
decompose() %>%
pluck(5) %>%
data.frame(figure=.) %>%
ggplot(aes(x=seq(1,nrow(.)), y=figure)) +
geom_line() +
scale_y_comma() +
labs(
title = "Seasonality Figure",
x = NULL,
y = NULL
)+
theme_minimal()## Warning: The chosen seasonal unit root test encountered an error when testing for the second difference.
## From stl(): series is not periodic or has less than two periods
## 1 seasonal differences will be used. Consider using a different unit root test.
## Series: ts_train
## ARIMA(1,1,1)(0,1,0)[52]
##
## Coefficients:
## ar1 ma1
## 0.2195 -0.8920
## s.e. 0.1494 0.0803
##
## sigma^2 estimated as 293446: log likelihood=-493.26
## AIC=992.52 AICc=992.92 BIC=998.99
Adjusting seasonality:
seasonality <- data_train %>%
pull(quantity) %>%
ts(start = c(2015,1), frequency = 52) %>%
decompose() %>%
pluck(2)
seasonal_adjusted <- ts_train - seasonality
autoplot(seasonal_adjusted) +
scale_y_comma() +
labs(
title = "Seasonally Adjusted Time Series",
x = NULL,
y = NULL
)+
theme_minimal()##
## Augmented Dickey-Fuller Test
##
## data: seasonal_adjusted
## Dickey-Fuller = -3.0814, Lag order = 4, p-value = 0.1276
## alternative hypothesis: stationary
## Series: seasonal_adjusted
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.2068 -0.9104
## s.e. 0.1083 0.0523
##
## sigma^2 estimated as 153623: log likelihood=-856.93
## AIC=1719.87 AICc=1720.08 BIC=1728.13
##
## Box-Ljung test
##
## data: model_arima$residuals
## X-squared = 15.642, df = 13, p-value = 0.269
data_forecast <- forecast(model_arima, h=52) %>%
as.data.frame() %>%
bind_cols(data_test) %>%
mutate(
weekly_year = week(date),
`Seasonal` = `Point Forecast` + seasonality[weekly_year],
`Upper` = `Hi 95` + seasonality[weekly_year],
`Lower` = `Lo 95` + seasonality[weekly_year]
)
ggplot(la_conventional, aes(x=date, y=quantity)) +
geom_line(data = data_forecast, aes(y=`Seasonal`), col='hotpink') +
geom_line(data = data_train, col='black') +
geom_line(data = data_test, col = 'darkgrey') +
geom_ribbon(data = data_forecast, aes(ymax=Upper, ymin=Lower), fill='lightpink', alpha=0.4) +
scale_y_comma() +
labs(
title = "Weekly Sales Demand",
x = NULL,
y = NULL
)+
theme_minimal()## [1] 0.1776002
## [1] 616.3419