[Video]
# Read the data from Excel into R
mydata <- read_excel("exercise1.xlsx")
## New names:
## * `` -> ...1
# Look at the first few lines of mydata
head(mydata)
## # A tibble: 6 x 4
## ...1 Sales AdBudget GDP
## <chr> <dbl> <dbl> <dbl>
## 1 Mar-81 1020. 659. 252.
## 2 Jun-81 889. 589 291.
## 3 Sep-81 795 512. 291.
## 4 Dec-81 1004. 614. 292.
## 5 Mar-82 1058. 647. 279.
## 6 Jun-82 944. 602 254
# Create a ts object called myts
myts <- ts(mydata[, 2:4], start = c(1981, 1), frequency = 4)
# Plot the data with facetting
autoplot(myts, facets = TRUE)
# Plot the data without facetting
autoplot(myts, facets = FALSE)
# Plot the three series
autoplot(gold)
autoplot(woolyrnq)
autoplot(gas)
# Find the outlier in the gold series
goldoutlier <- which.max(gold)
# Look at the seasonal frequencies of the three series
frequency(gold)
## [1] 1
frequency(woolyrnq)
## [1] 4
frequency(gas)
## [1] 12
# Load the fpp2 package
library(fpp2)
# Create plots of the a10 data
autoplot(a10)
ggseasonplot(a10)
# Produce a polar coordinate season plot for the a10 data
ggseasonplot(a10, polar = TRUE)
# Restrict the ausbeer data to start in 1992
beer <- window(ausbeer, start = 1992)
# Make plots of the beer data
autoplot(beer)
ggsubseriesplot(beer)
[Video]
# Create an autoplot of the oil data
autoplot(oil)
# Create a lag plot of the oil data
gglagplot(oil)
# Create an ACF plot of the oil data
ggAcf(oil)
# Plots of annual sunspot numbers
autoplot(sunspot.year)
ggAcf(sunspot.year)
# Save the lag corresponding to maximum autocorrelation
maxlag_sunspot <- 1
# Plot the traffic on the Hyndsight blog
autoplot(hyndsight)
ggAcf(hyndsight)
# Save the lag corresponding to maximum autocorrelation
maxlag_hyndsight <- 7
Now that you have seen ACF plots for various time series, you should be able to identify characteristics of the time series from the ACF plot alone.
Match the ACF plots shown (A-D) to their corresponding time plots (1-4).
[Video]
# Plot the original series
autoplot(goog)
# Plot the differenced series
autoplot(diff(goog))
# ACF of the differenced series
ggAcf(diff(goog))
# Ljung-Box test of the differenced series
Box.test(diff(goog), lag = 10, type = "Ljung")
##
## Box-Ljung test
##
## data: diff(goog)
## X-squared = 13.123, df = 10, p-value = 0.2169
[Video]
# Use naive() to forecast the goog series
fcgoog <- naive(goog, h = 20)
# Plot and summarize the forecasts
autoplot(fcgoog)
summary(fcgoog)
##
## Forecast method: Naive method
##
## Model Information:
## Call: naive(y = goog, h = 20)
##
## Residual sd: 8.7285
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.4212612 8.734286 5.829407 0.06253998 0.9741428 1 0.03871446
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1001 813.67 802.4765 824.8634 796.5511 830.7889
## 1002 813.67 797.8401 829.4999 789.4602 837.8797
## 1003 813.67 794.2824 833.0576 784.0192 843.3208
## 1004 813.67 791.2831 836.0569 779.4322 847.9078
## 1005 813.67 788.6407 838.6993 775.3910 851.9490
## 1006 813.67 786.2518 841.0882 771.7374 855.6025
## 1007 813.67 784.0549 843.2850 768.3777 858.9623
## 1008 813.67 782.0102 845.3298 765.2505 862.0895
## 1009 813.67 780.0897 847.2503 762.3133 865.0266
## 1010 813.67 778.2732 849.0667 759.5353 867.8047
## 1011 813.67 776.5456 850.7944 756.8931 870.4469
## 1012 813.67 774.8948 852.4452 754.3684 872.9715
## 1013 813.67 773.3115 854.0285 751.9470 875.3930
## 1014 813.67 771.7880 855.5520 749.6170 877.7230
## 1015 813.67 770.3180 857.0220 747.3688 879.9711
## 1016 813.67 768.8962 858.4437 745.1944 882.1455
## 1017 813.67 767.5183 859.8217 743.0870 884.2530
## 1018 813.67 766.1802 861.1597 741.0407 886.2993
## 1019 813.67 764.8789 862.4610 739.0505 888.2895
## 1020 813.67 763.6114 863.7286 737.1120 890.2280
# Use snaive() to forecast the ausbeer series
fcbeer <- snaive(ausbeer, h = 16)
# Plot and summarize the forecasts
autoplot(fcbeer)
summary(fcbeer)
##
## Forecast method: Seasonal naive method
##
## Model Information:
## Call: snaive(y = ausbeer, h = 16)
##
## Residual sd: 19.1207
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 3.098131 19.32591 15.50935 0.838741 3.69567 1 0.01093868
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2010 Q3 419 394.2329 443.7671 381.1219 456.8781
## 2010 Q4 488 463.2329 512.7671 450.1219 525.8781
## 2011 Q1 414 389.2329 438.7671 376.1219 451.8781
## 2011 Q2 374 349.2329 398.7671 336.1219 411.8781
## 2011 Q3 419 383.9740 454.0260 365.4323 472.5677
## 2011 Q4 488 452.9740 523.0260 434.4323 541.5677
## 2012 Q1 414 378.9740 449.0260 360.4323 467.5677
## 2012 Q2 374 338.9740 409.0260 320.4323 427.5677
## 2012 Q3 419 376.1020 461.8980 353.3932 484.6068
## 2012 Q4 488 445.1020 530.8980 422.3932 553.6068
## 2013 Q1 414 371.1020 456.8980 348.3932 479.6068
## 2013 Q2 374 331.1020 416.8980 308.3932 439.6068
## 2013 Q3 419 369.4657 468.5343 343.2438 494.7562
## 2013 Q4 488 438.4657 537.5343 412.2438 563.7562
## 2014 Q1 414 364.4657 463.5343 338.2438 489.7562
## 2014 Q2 374 324.4657 423.5343 298.2438 449.7562
[Video]
# Check the residuals from the naive forecasts applied to the goog series
goog %>% naive() %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from Naive method
## Q* = 13.123, df = 10, p-value = 0.2169
##
## Model df: 0. Total lags used: 10
# Do they look like white noise (TRUE or FALSE)
googwn <- TRUE
# Check the residuals from the seasonal naive forecasts applied to the ausbeer series
ausbeer %>% snaive() %>% checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 60.535, df = 8, p-value = 3.661e-10
##
## Model df: 0. Total lags used: 8
# Do they look like white noise (TRUE or FALSE)
beerwn <- FALSE
[Video]
# Create the training data as train
train <- subset(gold, end = 1000)
# Compute naive forecasts and save to naive_fc
naive_fc <- naive(train, h = 108)
# Compute mean forecasts and save to mean_fc
mean_fc <- meanf(train, h = 108)
# Use accuracy() to compute RMSE statistics
accuracy(naive_fc, gold)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1079897 6.358087 3.20366 0.0201449 0.8050646 1.014334
## Test set -6.5383495 15.842361 13.63835 -1.7462269 3.4287888 4.318139
## ACF1 Theil's U
## Training set -0.3086638 NA
## Test set 0.9793153 5.335899
accuracy(mean_fc, gold)
## ME RMSE MAE MPE MAPE MASE
## Training set -4.239671e-15 59.17809 53.63397 -2.390227 14.230224 16.981449
## Test set 1.319363e+01 19.55255 15.66875 3.138577 3.783133 4.960998
## ACF1 Theil's U
## Training set 0.9907254 NA
## Test set 0.9793153 6.123788
# Assign one of the two forecasts as bestforecasts
bestforecasts <- naive_fc
# Create three training series omitting the last 1, 2, and 3 years
train1 <- window(vn[, "Melbourne"], end = c(2014, 4))
## Warning in window.default(x, ...): 'end' value not changed
train2 <- window(vn[, "Melbourne"], end = c(2013, 4))
## Warning in window.default(x, ...): 'end' value not changed
train3 <- window(vn[, "Melbourne"], end = c(2012, 4))
## Warning in window.default(x, ...): 'end' value not changed
# Produce forecasts using snaive()
fc1 <- snaive(train1, h = 4)
fc2 <- snaive(train2, h = 4)
fc3 <- snaive(train3, h = 4)
# Use accuracy() to compare the MAPE of each series
# accuracy(fc1, vn[, "Melbourne"])["Test set", "MAPE"]
# accuracy(fc2, vn[, "Melbourne"])["Test set", "MAPE"]
# accuracy(fc3, vn[, "Melbourne"])["Test set", "MAPE"]
[Video]
# Compute cross-validated errors for up to 8 steps ahead
e <- tsCV(goog, forecastfunction = naive, h = 8)
# Compute the MSE values and remove missing values
mse <- colMeans(e^2, na.rm = TRUE)
# Plot the MSE values against the forecast horizon
data.frame(h = 1:8, MSE = mse) %>%
ggplot(aes(x = h, y = MSE)) + geom_point()
[Video]
# Use ses() to forecast the next 10 years of winning times
fc <- ses(marathon, h = 10)
# Use summary() to see the model parameters
summary(fc)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = marathon, h = 10)
##
## Smoothing parameters:
## alpha = 0.3457
##
## Initial states:
## l = 167.1741
##
## sigma: 5.519
##
## AIC AICc BIC
## 988.4474 988.6543 996.8099
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.8874349 5.472771 3.826294 -0.7097395 2.637644 0.8925685
## ACF1
## Training set -0.01211236
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2017 130.3563 123.2835 137.4292 119.5394 141.1733
## 2018 130.3563 122.8727 137.8399 118.9111 141.8015
## 2019 130.3563 122.4833 138.2293 118.3156 142.3970
## 2020 130.3563 122.1123 138.6003 117.7482 142.9644
## 2021 130.3563 121.7573 138.9553 117.2053 143.5074
## 2022 130.3563 121.4164 139.2963 116.6839 144.0288
## 2023 130.3563 121.0880 139.6247 116.1816 144.5310
## 2024 130.3563 120.7708 139.9418 115.6966 145.0161
## 2025 130.3563 120.4639 140.2488 115.2271 145.4856
## 2026 130.3563 120.1661 140.5466 114.7717 145.9409
# Use autoplot() to plot the forecasts
autoplot(fc)
# Add the one-step forecasts for the training data to the plot
autoplot(fc) + autolayer(fitted(fc))
# Create a training set using subset()
train <- subset(marathon, end = length(marathon) - 20)
# Compute SES and naive forecasts, save to fcses and fcnaive
fcses <- ses(train, h = 20)
fcnaive <- naive(train, h = 20)
# Calculate forecast accuracy measures
accuracy(fcses, marathon)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.0851741 5.863790 4.155948 -0.8603998 2.827993 0.8990906
## Test set 0.4574579 2.493971 1.894237 0.3171919 1.463862 0.4097960
## ACF1 Theil's U
## Training set -0.01595953 NA
## Test set -0.12556096 0.6870735
accuracy(fcnaive, marathon)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.4638047 6.904742 4.622391 -0.4086317 3.123559 1.0000000
## Test set 0.2266667 2.462113 1.846667 0.1388780 1.429608 0.3995047
## ACF1 Theil's U
## Training set -0.3589323 NA
## Test set -0.1255610 0.6799062
# Save the best forecasts as fcbest
fcbest <- fcnaive
[Video]
# Produce 10 year forecasts of austa using holt()
fcholt <- holt(austa, h = 10)
# Look at fitted model using summary()
summary(fcholt)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = austa, h = 10)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.0085
##
## Initial states:
## l = 0.656
## b = 0.1706
##
## sigma: 0.1952
##
## AIC AICc BIC
## 17.14959 19.14959 25.06719
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.00372838 0.1840662 0.1611085 -1.222083 5.990319 0.7907078
## ACF1
## Training set 0.2457733
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2016 7.030683 6.780483 7.280882 6.648036 7.413330
## 2017 7.202446 6.847114 7.557778 6.659013 7.745879
## 2018 7.374209 6.937169 7.811249 6.705814 8.042604
## 2019 7.545972 7.039179 8.052765 6.770899 8.321045
## 2020 7.717736 7.148723 8.286748 6.847506 8.587965
## 2021 7.889499 7.263543 8.515455 6.932181 8.846816
## 2022 8.061262 7.382302 8.740222 7.022882 9.099642
## 2023 8.233025 7.504136 8.961915 7.118285 9.347766
## 2024 8.404788 7.628444 9.181133 7.217472 9.592105
## 2025 8.576552 7.754792 9.398311 7.319779 9.833324
# Plot the forecasts
autoplot(fcholt)
# Check that the residuals look like white noise
checkresiduals(fcholt)
##
## Ljung-Box test
##
## data: Residuals from Holt's method
## Q* = 4.8886, df = 3, p-value = 0.1801
##
## Model df: 4. Total lags used: 7
[Video]
# Plot the data
autoplot(a10)
# Produce 3 year forecasts
fc <- hw(a10, seasonal = "multiplicative", h = 36)
# Check if residuals look like white noise
checkresiduals(fc)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method
## Q* = 75.764, df = 8, p-value = 3.467e-13
##
## Model df: 16. Total lags used: 24
whitenoise <- FALSE
# Plot forecasts
autoplot(fc)
# Create training data with subset()
train <- subset(hyndsight, end = length(hyndsight) - 28)
# Holt-Winters additive forecasts as fchw
fchw <- hw(train, seasonal = "additive", h = 28)
# Seasonal naive forecasts as fcsn
fcsn <- snaive(train, h = 28)
# Find better forecasts with accuracy()
accuracy(fchw, hyndsight)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -3.976241 228.2440 165.0244 -2.407211 13.9955 0.7492131 0.1900853
## Test set -3.999460 201.7656 152.9584 -3.218292 10.5558 0.6944332 0.3013328
## Theil's U
## Training set NA
## Test set 0.4868701
accuracy(fcsn, hyndsight)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 10.50 310.3282 220.2636 -2.1239387 18.01077 1.0000000 0.4255730
## Test set 0.25 202.7610 160.4643 -0.6888732 10.25880 0.7285101 0.3089795
## Theil's U
## Training set NA
## Test set 0.450266
# Plot the better forecasts
autoplot(fchw)
[Video]
# Fit ETS model to austa in fitaus
fitaus <- ets(austa)
# Check residuals
checkresiduals(fitaus)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,N)
## Q* = 4.8886, df = 3, p-value = 0.1801
##
## Model df: 4. Total lags used: 7
# Plot forecasts
autoplot(forecast(fitaus))
# Repeat for hyndsight data in fiths
fiths <- ets(hyndsight)
checkresiduals(fiths)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 68.616, df = 5, p-value = 1.988e-13
##
## Model df: 9. Total lags used: 14
autoplot(forecast(fiths))
# Which model(s) fails test? (TRUE or FALSE)
fitausfail <- FALSE
fithsfail <- TRUE
# Function to return ETS forecasts
fets <- function(y, h) {
forecast(ets(y), h = h)
}
# Apply tsCV() for both methods
# e1 <- tsCV(cement, fets, h = 4)
# e2 <- tsCV(cement, snaive, h = 4)
# Compute MSE of resulting errors (watch out for missing values)
# mean(e1^2, na.rm = TRUE)
# mean(e2^2, na.rm = TRUE)
# Copy the best forecast MSE
# bestmse <- mean(e2^2, na.rm = TRUE)
Look at this series of plots and guess which is the appropriate ETS model for each plot. Recall from the video:
# Trend = {N,A,Ad}
# Seasonal = {N,A,M}
# Error = {A,M}
The simplest approach is to look for which time series and models are seasonal.
# Plot the lynx series
autoplot(lynx)
# Use ets() to model the lynx series
fit <- ets(lynx)
# Use summary() to look at model and parameters
summary(fit)
## ETS(M,N,N)
##
## Call:
## ets(y = lynx)
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 2372.8047
##
## sigma: 0.9594
##
## AIC AICc BIC
## 2058.138 2058.356 2066.346
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 8.975647 1198.452 842.0649 -52.12968 101.3686 1.013488 0.3677583
# Plot 20-year forecasts of the lynx series
fit %>% forecast(h = 20) %>% autoplot()
[Video]
# Plot the series
autoplot(a10)
# Try four values of lambda in Box-Cox transformations
a10 %>% BoxCox(lambda = 0.0) %>% autoplot()
a10 %>% BoxCox(lambda = 0.1) %>% autoplot()
a10 %>% BoxCox(lambda = 0.2) %>% autoplot()
a10 %>% BoxCox(lambda = 0.3) %>% autoplot()
# Compare with BoxCox.lambda()
BoxCox.lambda(a10)
## [1] 0.1313326
# Plot the US female murder rate
autoplot(wmurders)
# Plot the differenced murder rate
autoplot(diff(wmurders))
# Plot the ACF of the differenced murder rate
ggAcf(diff(wmurders))
# Plot the data
autoplot(h02)
# Take logs and seasonal differences of h02
difflogh02 <- diff(log(h02), lag = 12)
# Plot difflogh02
autoplot(difflogh02)
# Take another difference and plot
ddifflogh02 <- diff(difflogh02)
autoplot(ddifflogh02)
# Plot ACF of ddifflogh02
ggAcf(ddifflogh02)
[Video]
# Fit an automatic ARIMA model to the austa series
fit <- auto.arima(austa)
# Check that the residuals look like white noise
checkresiduals(fit)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1) with drift
## Q* = 2.297, df = 5, p-value = 0.8067
##
## Model df: 2. Total lags used: 7
residualsok <- TRUE
# Summarize the model
summary(fit)
## Series: austa
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## 0.3006 0.1735
## s.e. 0.1647 0.0390
##
## sigma^2 estimated as 0.03376: log likelihood=10.62
## AIC=-15.24 AICc=-14.46 BIC=-10.57
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0008313383 0.1759116 0.1520309 -1.069983 5.513269 0.7461559
## ACF1
## Training set -0.000571993
# Find the AICc value and the number of differences used
AICc <- -14.46
d <- 1
# Plot forecasts of fit
fit %>% forecast(h = 10) %>% autoplot()
# Plot forecasts from an ARIMA(0,1,1) model with no drift
austa %>% Arima(order = c(0,1,1), include.constant = FALSE) %>% forecast() %>% autoplot()
# Plot forecasts from an ARIMA(2,1,3) model with drift
austa %>% Arima(order = c(2,1,3), include.constant = TRUE) %>% forecast() %>% autoplot()
# Plot forecasts from an ARIMA(0,0,1) model with a constant
austa %>% Arima(order = c(0,0,1), include.constant = TRUE) %>% forecast() %>% autoplot()
# Plot forecasts from an ARIMA(0,2,1) model with no constant
austa %>% Arima(order = c(0,2,1), include.constant = FALSE) %>% forecast() %>% autoplot()
# Set up forecast functions for ETS and ARIMA models
fets <- function(x, h) {
forecast(ets(x), h = h)
}
farima <- function(x, h) {
forecast(auto.arima(x), h = h)
}
# Compute CV errors for ETS on austa as e1
e1 <- tsCV(austa, fets, h = 1)
# Compute CV errors for ARIMA on austa as e2
e2 <- tsCV(austa, farima, h = 1)
# Find MSE of each model class
mean(e1^2, na.rm = TRUE)
## [1] 0.05623684
mean(e2^2, na.rm = TRUE)
## [1] 0.04336277
# Plot 10-year forecasts using the best model class
austa %>% farima(h = 10) %>% autoplot()
[Video]
# Check that the logged h02 data have stable variance
h02 %>% log() %>% autoplot()
# Fit a seasonal ARIMA model to h02 with lambda = 0
fit <- auto.arima(h02, lambda = 0)
# Summarize the fitted model
summary(fit)
## Series: h02
## ARIMA(2,1,1)(0,1,2)[12]
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ar1 ar2 ma1 sma1 sma2
## -1.1358 -0.5753 0.3683 -0.5318 -0.1817
## s.e. 0.1608 0.0965 0.1884 0.0838 0.0881
##
## sigma^2 estimated as 0.004278: log likelihood=248.25
## AIC=-484.51 AICc=-484.05 BIC=-465
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003931805 0.0501571 0.03629816 -0.5323365 4.611253 0.5987988
## ACF1
## Training set -0.003740269
# Record the amount of lag-1 differencing and seasonal differencing used
d <- 1
D <- 1
# Plot 2-year forecasts
fit %>% forecast(h = 24) %>% autoplot()
# Find an ARIMA model for euretail
fit1 <- auto.arima(euretail)
# Don't use a stepwise search
fit2 <- auto.arima(euretail, stepwise = FALSE)
# AICc of better model
AICc <- 68.39
# Compute 2-year forecasts from better model
fit2 %>% forecast(h = 8) %>% autoplot()
# Use 20 years of the qcement data beginning in 1988
train <- window(qcement, start = 1988, end = c(2007, 4))
# Fit an ARIMA and an ETS model to the training data
fit1 <- auto.arima(train)
fit2 <- ets(train)
# Check that both models have white noise residuals
checkresiduals(fit1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,1)(2,1,1)[4] with drift
## Q* = 3.3058, df = 3, p-value = 0.3468
##
## Model df: 6. Total lags used: 9
checkresiduals(fit2)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,N,M)
## Q* = 6.3457, df = 3, p-value = 0.09595
##
## Model df: 6. Total lags used: 9
# Produce forecasts for each model
fc1 <- forecast(fit1, h = 25)
fc2 <- forecast(fit2, h = 25)
# Use accuracy() to find best model based on RMSE
accuracy(fc1, qcement)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.006205705 0.1001195 0.07988903 -0.6704455 4.372443 0.5458078
## Test set -0.158835253 0.1996098 0.16882205 -7.3332836 7.719241 1.1534049
## ACF1 Theil's U
## Training set -0.01133907 NA
## Test set 0.29170452 0.7282225
accuracy(fc2, qcement)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01406512 0.1022079 0.07958478 0.4938163 4.371823 0.5437292
## Test set -0.13495515 0.1838791 0.15395141 -6.2508975 6.986077 1.0518075
## ACF1 Theil's U
## Training set -0.03346295 NA
## Test set 0.53438371 0.680556
bettermodel <- fit2
[Video]
# Time plot of both variables
autoplot(advert, facets = TRUE)
# Fit ARIMA model
fit <- auto.arima(advert[, "sales"], xreg = advert[, "advert"], stationary = TRUE)
# Check model. Increase in sales for each unit increase in advertising
salesincrease <- coefficients(fit)[3]
# Forecast fit as fc
fc <- forecast(fit, xreg = rep(10, 6))
# Plot fc with x and y labels
autoplot(fc) + xlab("Month") + ylab("Sales")
# Time plots of demand and temperatures
# autoplot(elec[, c("Demand", "Temperature")], facets = TRUE)
# Matrix of regressors
# xreg <- cbind(MaxTemp = elec[, "Temperature"],
# MaxTempSq = elec[, "Temperature"]^2,
# Workday = elec[, "Workday"])
# Fit model
# fit <- auto.arima(elec[, "Demand"], xreg = xreg)
# Forecast fit one day ahead
# forecast(fit, xreg = cbind(20, 20^2, 1))
[Video]
# Set up harmonic regressors of order 13
harmonics <- fourier(gasoline, K = 13)
# Fit regression model with ARIMA errors
fit <- auto.arima(gasoline, xreg = harmonics, seasonal = FALSE)
# Forecasts next 3 years
newharmonics <- fourier(gasoline, K = 13, h = 156)
fc <- forecast(fit, xreg = newharmonics)
# Plot forecasts fc
autoplot(fc)
# Fit a harmonic regression using order 10 for each type of seasonality
fit <- tslm(taylor ~ fourier(taylor, K = c(10, 10)))
# Forecast 20 working days ahead
fc <- forecast(fit, newdata = data.frame(fourier(taylor, K = c(10, 10), h = 20 * 48)))
# Plot the forecasts
autoplot(fc)
# Check the residuals of fit
checkresiduals(fit)
##
## Breusch-Godfrey test for serial correlation of order up to 672
##
## data: Residuals from Linear regression model
## LM test = 3938.9, df = 672, p-value < 2.2e-16
# Plot the calls data
autoplot(calls)
# Set up the xreg matrix
xreg <- fourier(calls, K = c(10, 0))
# Fit a dynamic regression model
fit <- auto.arima(calls, xreg = xreg, seasonal = FALSE, stationary = TRUE)
# Check the residuals
checkresiduals(fit)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(5,0,1) errors
## Q* = 6846.8, df = 1663, p-value < 2.2e-16
##
## Model df: 27. Total lags used: 1690
# Plot forecast for 10 working days ahead
fc <- forecast(fit, xreg = fourier(calls, c(10, 0), h = 1690))
autoplot(fc)
[Video]
# Plot the gas data
autoplot(gas)
# Fit a TBATS model to the gas data
fit <- tbats(gas)
# Forecast the series for the next 5 years
fc <- forecast(fit, h = 12 * 5)
# Plot the forecasts
autoplot(fc)
# Record the Box-Cox parameter and the order of the Fourier terms
lambda <- 0.082
K <- 5
[Video]
Michael is a hybrid thinker and doer—a byproduct of being a CliftonStrengths “Learner” over time. With 20+ years of engineering, design, and product experience, he helps organizations identify market needs, mobilize internal and external resources, and deliver delightful digital customer experiences that align with business goals. He has been entrusted with problem-solving for brands—ranging from Fortune 500 companies to early-stage startups to not-for-profit organizations.
Michael earned his BS in Computer Science from New York Institute of Technology and his MBA from the University of Maryland, College Park. He is also a candidate to receive his MS in Applied Analytics from Columbia University.
LinkedIn | Twitter | www.michaelmallari.com/data | www.columbia.edu/~mm5470