Question 1
The data set for question 1 is using Australian tourist visitors data set, it is a quarterly data set from 2000 to 2015.
Part A: Trend
austourists
## Qtr1 Qtr2 Qtr3 Qtr4
## 1999 30.05251 19.14850 25.31769 27.59144
## 2000 32.07646 23.48796 28.47594 35.12375
## 2001 36.83848 25.00702 30.72223 28.69376
## 2002 36.64099 23.82461 29.31168 31.77031
## 2003 35.17788 19.77524 29.60175 34.53884
## 2004 41.27360 26.65586 28.27986 35.19115
## 2005 42.20566 24.64917 32.66734 37.25735
## 2006 45.24246 29.35048 36.34421 41.78208
## 2007 49.27660 31.27540 37.85063 38.83704
## 2008 51.23690 31.83855 41.32342 42.79900
## 2009 55.70836 33.40714 42.31664 45.15712
## 2010 59.57608 34.83733 44.84168 46.97125
## 2011 60.01903 38.37118 46.97586 50.73380
## 2012 61.64687 39.29957 52.67121 54.33232
## 2013 66.83436 40.87119 51.82854 57.49191
## 2014 65.25147 43.06121 54.76076 59.83447
## 2015 73.25703 47.69662 61.09777 66.05576
head(austourists)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1999 30.05251 19.14850 25.31769 27.59144
## 2000 32.07646 23.48796
plot(austourists)
autoplot(austourists)
The data is not stationary as it follows a clear upwards trend.
Part B & C: Holt Winter’s Additive Seasonality
library(fpp2)
aust <- window(austourists,start=2005)
fit1 <- hw(aust,seasonal="additive", n=4)
fit2 <- hw(aust,seasonal="multiplicative", n=4)
autoplot(aust) +
autolayer(fit1, series="HW additive forecasts", PI=FALSE) +
autolayer(fit2, series="HW multiplicative forecasts",
PI=FALSE) +
xlab("Year") +
ylab("Visitor nights (millions)") +
ggtitle("International visitors nights in Australia") +
guides(colour=guide_legend(title="Forecast"))
Forecast
summary(fit1)
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = aust, seasonal = "additive", n = 4)
##
## Smoothing parameters:
## alpha = 0.3063
## beta = 1e-04
## gamma = 0.4263
##
## Initial states:
## l = 32.2597
## b = 0.7014
## s = 1.3106 -1.6935 -9.3132 9.6962
##
## sigma: 1.9494
##
## AIC AICc BIC
## 234.4171 239.7112 250.4748
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.008115785 1.763305 1.374062 -0.2860248 2.973922 0.4502579
## ACF1
## Training set -0.06272507
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2016 Q1 76.09837 73.60011 78.59664 72.27761 79.91914
## 2016 Q2 51.60333 48.99039 54.21626 47.60718 55.59947
## 2016 Q3 63.96867 61.24582 66.69153 59.80443 68.13292
## 2016 Q4 68.37170 65.54313 71.20027 64.04578 72.69762
## 2017 Q1 78.90404 75.53440 82.27369 73.75061 84.05747
## 2017 Q2 54.40899 50.95325 57.86473 49.12389 59.69409
## 2017 Q3 66.77434 63.23454 70.31414 61.36069 72.18799
## 2017 Q4 71.17737 67.55541 74.79933 65.63806 76.71667
Part D: Moving Average
library(ggplot2)
library(quantmod)
library(dplyr)
myShare <- "IBM"
myStartDate <- '2020-01-01'
myEndDate <- Sys.Date()
df <- getSymbols(myShare, from = myStartDate, to = myEndDate, warnings = FALSE, auto.assign = FALSE)
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
df <- data.frame(df)
#Change the names in the data frame
names(df) <- c('Open', 'High', 'Low', 'Close','Volume','Adjusted')
# Convert the row names to a column
df$Date <- as.Date(rownames(df))
# Calculating 4 day moving average
df$MA4 <- TTR::SMA( df$Close, n = 4)
pl <- ggplot(df , aes(x = Date))
pl <- pl + geom_line(aes(y = Close, color = "Close"), group = 1)
pl <- pl + geom_line(aes(y = MA4, color = "MA4"),group = 1)
pl <- pl + labs(title ="4 period Moving Average")
pl
## Warning: Removed 3 row(s) containing missing values (geom_path).
Question 2
For the second question I will be using daily TESLA (TSLA) stock prices from January 1st, 2020 to October 1st, 2021
Part A: Trend
tesla <- getSymbols('TSLA', from = myStartDate, to = myEndDate, warnings = FALSE, auto.assign = FALSE)
head(tesla)
## TSLA.Open TSLA.High TSLA.Low TSLA.Close TSLA.Volume TSLA.Adjusted
## 2020-01-02 84.900 86.140 84.342 86.052 47660500 86.052
## 2020-01-03 88.100 90.800 87.384 88.602 88892500 88.602
## 2020-01-06 88.094 90.312 88.000 90.308 50665000 90.308
## 2020-01-07 92.280 94.326 90.672 93.812 89410500 93.812
## 2020-01-08 94.740 99.698 93.646 98.428 155721500 98.428
## 2020-01-09 99.420 99.760 94.574 96.268 142202000 96.268
chart_Series(tesla)
The data is not stationary as it follows an upwards trend and mean is not 0
Part B: Exponential Smoothing
se_model <- ses(tesla$TSLA.Close, h = 6)
summary(se_model)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = tesla$TSLA.Close, h = 6)
##
## Smoothing parameters:
## alpha = 0.9288
##
## Initial states:
## l = 86.1998
##
## sigma: 19.2639
##
## AIC AICc BIC
## 5311.432 5311.487 5323.706
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.678396 19.22027 13.19708 0.4108609 3.303592 0.9949489
## ACF1
## Training set -0.007101349
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 443 775.267 750.5793 799.9546 737.5104 813.0235
## 444 775.267 741.5725 808.9614 723.7357 826.7982
## 445 775.267 734.5097 816.0242 712.9341 837.5998
## 446 775.267 728.5017 822.0322 703.7457 846.7882
## 447 775.267 723.1822 827.3517 695.6101 854.9238
## 448 775.267 718.3577 832.1762 688.2318 862.3021
plot(se_model)
Part C: Holt’s Exponential Smoothing
holt_model <- holt(tesla$TSLA.Close, h = 6)
summary(holt_model)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = tesla$TSLA.Close, h = 6)
##
## Smoothing parameters:
## alpha = 0.9206
## beta = 1e-04
##
## Initial states:
## l = 84.865
## b = 1.5656
##
## sigma: 19.2343
##
## AIC AICc BIC
## 5312.058 5312.195 5332.514
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.007641797 19.14703 13.11434 -0.1653969 3.292211 0.9887104
## ACF1
## Training set 0.001064577
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 443 776.9764 752.3267 801.6261 739.2779 814.6748
## 444 778.5416 745.0360 812.0473 727.2992 829.7841
## 445 780.1069 739.6376 820.5761 718.2145 841.9993
## 446 781.6722 735.2717 828.0726 710.7088 852.6356
## 447 783.2374 731.5812 834.8936 704.2361 862.2387
## 448 784.8027 728.3773 841.2281 698.5074 871.0979
plot(holt_model)
Part D: Double Moving Average
df2 <- data.frame(tesla)
#Change the names in the data frame
names(df2) <- c('Open', 'High', 'Low', 'Close','Volume','Adjusted')
# Convert the row names to a column
df2$Date <- as.Date(rownames(df2))
# Calculating 6 day moving average
df2$MA6 <- TTR::SMA(df2$Close, n = 6)
df2$DMA6 <- TTR::SMA( df2$MA6, n = 6)
pl2 <- ggplot(df2 , aes(x = Date))
pl2 <- pl2 + geom_line(aes(y = df2$Close, color = "Close"), group = 1)
pl2 <- pl2 + geom_line(aes(y = df2$MA6, color = "MA6"),group = 1)
pl2 <- pl2+ geom_line(aes(y=df2$DMA6, color = "DMA6"), group=1)
pl2 <- pl2 + labs(title ="6 Period Double Moving Average")
pl2
## Warning: Use of `df2$Close` is discouraged. Use `Close` instead.
## Warning: Use of `df2$MA6` is discouraged. Use `MA6` instead.
## Warning: Use of `df2$DMA6` is discouraged. Use `DMA6` instead.
## Warning: Removed 5 row(s) containing missing values (geom_path).
## Warning: Removed 10 row(s) containing missing values (geom_path).
Question 3
For question 3 I will be using US GDP per capita dataset from the WDI library. The data is from 1960 to 2020.
plot(us, ylab="Per Capita GDP", xlab="Year")
ƒrom the code run below – Auto correlation function (ACF) plot shows a cut off range for 2 lag periods.
The auto.arima() function is different from the manual form of AR models. The model selects the lag cut off range.
The PACF shows two spikes at the first and 12th lag and and the ACF shows a tapering pattern. An AR(2,2,1) model is indicated.
please follow the following link: https://online.stat.psu.edu/stat510/lesson/3/3.1
Can you help me interpret these results?
autoplot(us)
us %>% diff() %>% ggtsdisplay(main="")
ARIMA Output
- The auto ARIMA model selects 2 lags based on the ACF plot
- How do we interpret the accuracy of the model – MAPE, MAE, etc?
arima_model <- auto.arima(us)
accuracy(arima_model)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 105.3186 663.8716 428.7066 1.011964 1.889548 0.3863758 -0.06261455
Forecast Values for 6 periods
fore_arima = forecast::forecast(arima_model, h=6)
fore_arima
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2021 62727.80 61839.85 63615.76 61369.79 64085.81
## 2022 64127.68 62395.71 65859.65 61478.87 66776.50
## 2023 66298.82 63978.11 68619.54 62749.60 69848.05
## 2024 67909.40 65177.25 70641.55 63730.94 72087.87
## 2025 68885.68 65773.24 71998.12 64125.61 73645.74
## 2026 69771.12 66229.24 73313.00 64354.28 75187.96
plot(fore_arima)