library(ggplot2);library(ggthemes);library(gridExtra) # For plots
## Warning: package 'ggthemes' was built under R version 4.3.3
## Warning: package 'gridExtra' was built under R version 4.3.3
library(quantmod);library(xts);library(zoo) # For using xts class objects
## Warning: package 'quantmod' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast) # Set of forecasting functions
## Warning: package 'forecast' was built under R version 4.3.3
library(fpp); library(fpp2) # Datasets from Forecasting text by Rob Hyndman
## Warning: package 'fpp' was built under R version 4.3.3
## Loading required package: fma
## Warning: package 'fma' was built under R version 4.3.3
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 4.3.3
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 4.3.3
## Warning: package 'fpp2' was built under R version 4.3.3
##
## Attaching package: 'fpp2'
## The following objects are masked from 'package:fpp':
##
## ausair, ausbeer, austa, austourists, debitcards, departures,
## elecequip, euretail, guinearice, oil, sunspotarea, usmelec
library(tseries) # for a statistical test
library(dplyr) # Data wrangling
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## SECTION 1: TIME SERIES ANALYSIS ##
#Q1) What type of data structure is goog?
goog = readRDS('goog23.rds')
class(goog)
## [1] "xts" "zoo"
#Q2) What was Alphabet's stock price for June, 2007?
goog['2007-06',]
## price
## Jun 2007 13.0187
#Q3) Using the monthly stock price for Alphabet, what is the average stock price for the year 2007?
mean(goog['2007',])
## [1] 13.66773
#Q4) How many months of data are included in this dataset?
nrow(goog)
## [1] 203
#option 2
nmonths(goog)
## [1] 203
length(index(goog))
## [1] 203
#Q5) With time-series data, the past is often a good predictor of the future. Let us see if this is true for our data. What is the correlation between Alphabet's stock price and one-month lagged stock price? You can use lag() to obtain a one-month lag for Alphabet's stock price. When computing correlation with cor(), be sure to set use='complete.obs'.
goog_lag = lag(goog)
cor(goog,goog_lag, use = 'complete.obs')
## price
## price 0.9930809
# Q6) In order to have access to a wider array of forecasting models, we will convert the data to a "ts" data type. Also, we will split the data into a train and test sample, using the train sample to estimate a model and the test sample to evaluate it. We will use data from Jan, 2007 to Dec, 2017 for the train sample and the rest for the test sample. The code below will convert goog to a “ts” object and split the data.
#How many months of data does train contain?
google = ts(goog,start=c(2007,01),frequency=12)
train = window(google,start=c(2007,01),end=c(2017,12))
test = window(google,start=c(2018,01),end=c(2023,11))
length(train)
## [1] 132
#Q7) Autocorrelation examines correlation of a variable and its lagged values. Construct a plot of autocorrelations for train using ggAcf() from the forecast package. Which lag has the strongest autocorrelation?
install.packages("forecast")
## Warning: package 'forecast' is in use and will not be installed
library(forecast)
ggAcf(train)

#graph explanation:
# Significance Bands: The blue dashed lines are typically set at ±1.96/√n (where n is the sample size) and represent the significance bounds for the ACF. If a bar extends beyond this boundary, the correlation for that lag is statistically significant.
# Strongest Autocorrelation: The lag with the strongest autocorrelation is identified by the tallest bar extending furthest from the zero line, within the significance bounds. In your plot, the first lag (lag 1) has the tallest bar and does not cross the significance bounds, indicating it has the strongest positive autocorrelation.
## SECTION 2: Simple Forecasting Methods ##
#Q1) A very simple prediction, often the baseline in linear regression, is to use the average. Use the average to make a prediction for the stock price over the 71 months of the test sample. Let's call this average_model. What is the point forecast of the stock price for November 2023?
average_model = meanf(train,h = 71)
average_model
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2018 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2019 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2020 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2021 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2022 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2023 22.1933 7.17525 37.21135 -0.8720666 45.25867
window(average_model$mean,c(2023,11))
## Nov
## 2023 22.1933
#Q2) Let us examine the accuracy of the above prediction from average_model on the train sample. Specifically, what is the RMSE of the prediction in the train sample? Hint: Use accuracy() from library(forecast)
accuracy(average_model)
## ME RMSE MAE MPE MAPE MASE
## Training set 5.344788e-16 11.57155 9.905325 -27.8625 53.83724 2.209192
## ACF1
## Training set 0.9629783
#Q3) What is the RMSE of the average_model on the test sample?
accuracy(average_model,x = google)
## ME RMSE MAE MPE MAPE MASE
## Training set 5.344788e-16 11.57155 9.905325 -27.86250 53.83724 2.209192
## Test set 6.868414e+01 75.51904 68.684137 72.43341 72.43341 15.318673
## ACF1 Theil's U
## Training set 0.9629783 NA
## Test set 0.9511974 9.709656
#Q4) Next, let us examine another simple prediction, one that assumes the future will be the same as the last observation. Let’s call this naive_model. Use naive_model to construct a forecast for stock price over the next 71 months of the test sample. What is the point forecast of the stock price for November 2023?
naive_model = naive(train,h=71)
naive_model
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2018 52.32 50.35829 54.28171 49.31982 55.32018
## Feb 2018 52.32 49.54572 55.09428 48.07711 56.56289
## Mar 2018 52.32 48.92222 55.71778 47.12354 57.51646
## Apr 2018 52.32 48.39658 56.24342 46.31965 58.32035
## May 2018 52.32 47.93348 56.70651 45.61140 59.02860
## Jun 2018 52.32 47.51481 57.12519 44.97110 59.66890
## Jul 2018 52.32 47.12980 57.51019 44.38228 60.25772
## Aug 2018 52.32 46.77145 57.86855 43.83422 60.80578
## Sep 2018 52.32 46.43487 58.20513 43.31947 61.32053
## Oct 2018 52.32 46.11653 58.52347 42.83261 61.80739
## Nov 2018 52.32 45.81375 58.82625 42.36954 62.27046
## Dec 2018 52.32 45.52444 59.11556 41.92709 62.71291
## Jan 2019 52.32 45.24696 59.39304 41.50271 63.13729
## Feb 2019 52.32 44.97996 59.66004 41.09437 63.54563
## Mar 2019 52.32 44.72233 59.91767 40.70037 63.93963
## Apr 2019 52.32 44.47316 60.16684 40.31930 64.32070
## May 2019 52.32 44.23167 60.40833 39.94996 64.69004
## Jun 2019 52.32 43.99717 60.64283 39.59133 65.04867
## Jul 2019 52.32 43.76911 60.87089 39.24254 65.39746
## Aug 2019 52.32 43.54697 61.09303 38.90281 65.73719
## Sep 2019 52.32 43.33032 61.30968 38.57147 66.06853
## Oct 2019 52.32 43.11877 61.52123 38.24793 66.39207
## Nov 2019 52.32 42.91197 61.72803 37.93166 66.70834
## Dec 2019 52.32 42.70963 61.93037 37.62220 67.01780
## Jan 2020 52.32 42.51145 62.12855 37.31912 67.32088
## Feb 2020 52.32 42.31721 62.32279 37.02205 67.61795
## Mar 2020 52.32 42.12666 62.51334 36.73063 67.90937
## Apr 2020 52.32 41.93961 62.70039 36.44456 68.19544
## May 2020 52.32 41.75587 62.88413 36.16356 68.47644
## Jun 2020 52.32 41.57528 63.06472 35.88736 68.75264
## Jul 2020 52.32 41.39766 63.24233 35.61573 69.02427
## Aug 2020 52.32 41.22290 63.41710 35.34844 69.29155
## Sep 2020 52.32 41.05084 63.58916 35.08530 69.55470
## Oct 2020 52.32 40.88137 63.75863 34.82612 69.81388
## Nov 2020 52.32 40.71437 63.92563 34.57072 70.06928
## Dec 2020 52.32 40.54974 64.09026 34.31895 70.32105
## Jan 2021 52.32 40.38739 64.25261 34.07065 70.56935
## Feb 2021 52.32 40.22721 64.41279 33.82568 70.81432
## Mar 2021 52.32 40.06913 64.57087 33.58391 71.05609
## Apr 2021 52.32 39.91306 64.72694 33.34522 71.29477
## May 2021 52.32 39.75893 64.88107 33.10950 71.53050
## Jun 2021 52.32 39.60667 65.03333 32.87664 71.76336
## Jul 2021 52.32 39.45621 65.18379 32.64653 71.99347
## Aug 2021 52.32 39.30749 65.33251 32.41909 72.22091
## Sep 2021 52.32 39.16045 65.47955 32.19421 72.44579
## Oct 2021 52.32 39.01504 65.62496 31.97182 72.66818
## Nov 2021 52.32 38.87120 65.76880 31.75183 72.88817
## Dec 2021 52.32 38.72888 65.91112 31.53418 73.10582
## Jan 2022 52.32 38.58804 66.05196 31.31877 73.32123
## Feb 2022 52.32 38.44862 66.19138 31.10556 73.53444
## Mar 2022 52.32 38.31059 66.32941 30.89446 73.74554
## Apr 2022 52.32 38.17391 66.46609 30.68543 73.95457
## May 2022 52.32 38.03854 66.60146 30.47839 74.16161
## Jun 2022 52.32 37.90444 66.73556 30.27330 74.36670
## Jul 2022 52.32 37.77157 66.86842 30.07010 74.56990
## Aug 2022 52.32 37.63991 67.00009 29.86874 74.77126
## Sep 2022 52.32 37.50942 67.13058 29.66917 74.97083
## Oct 2022 52.32 37.38007 67.25993 29.47134 75.16865
## Nov 2022 52.32 37.25183 67.38817 29.27522 75.36478
## Dec 2022 52.32 37.12467 67.51533 29.08074 75.55926
## Jan 2023 52.32 36.99856 67.64144 28.88788 75.75212
## Feb 2023 52.32 36.87349 67.76651 28.69660 75.94340
## Mar 2023 52.32 36.74942 67.89058 28.50685 76.13315
## Apr 2023 52.32 36.62633 68.01367 28.31860 76.32140
## May 2023 52.32 36.50419 68.13581 28.13181 76.50819
## Jun 2023 52.32 36.38300 68.25700 27.94646 76.69354
## Jul 2023 52.32 36.26272 68.37728 27.76251 76.87749
## Aug 2023 52.32 36.14333 68.49667 27.57992 77.06008
## Sep 2023 52.32 36.02482 68.61518 27.39867 77.24133
## Oct 2023 52.32 35.90716 68.73284 27.21873 77.42127
## Nov 2023 52.32 35.79034 68.84966 27.04007 77.59993
window(naive_model$mean,c(2023,11))
## Nov
## 2023 52.32
#Q5) What is the RMSE of the naive_model on the test sample?
accuracy(naive_model,x = google)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3040406 1.53073 1.155649 0.7780316 6.007544 0.2577452
## Test set 38.5574377 49.72214 38.634156 35.0126394 35.162368 8.6166038
## ACF1 Theil's U
## Training set -0.02859181 NA
## Test set 0.95119742 5.554911
## SECTION 3: Exponential Smoothing ##
#Q1) There are a number of exponential smoothing models that differ in how they handle errors, trend, and seasonality. Let us fit an exponential smoothing model using the following function:
#Q2) The trend for ets_model is
#Q3) What is AICc for ets_model?
ets_aaa = ets(train,model = 'AAA')
ets_aaa
## ETS(A,A,A)
##
## Call:
## ets(y = train, model = "AAA")
##
## Smoothing parameters:
## alpha = 0.9989
## beta = 0.028
## gamma = 1e-04
##
## Initial states:
## l = 14.0029
## b = 0.1373
## s = 0.8312 0.6072 1.0255 -0.2578 -0.4842 -0.0997
## -1.0373 -0.1506 -0.3994 -0.2227 0.0444 0.1433
##
## sigma: 1.4714
##
## AIC AICc BIC
## 763.4382 768.8066 812.4458
#Q4) Do the residuals look like white noise? To answer this question, examine an Acf() or ggAcf() plot and the result of the Ljung-Box test.
checkresiduals(ets_aaa)

##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,A)
## Q* = 23.157, df = 24, p-value = 0.5105
##
## Model df: 0. Total lags used: 24
## 1. Time Plot: A plot of residuals over time. You want to see no obvious patterns or trends in this plot. Residuals should look random and centered around zero.
## 2. ACF Plot: An Autocorrelation Function (ACF) plot of the residuals. For residuals to resemble white noise, you would expect most (if not all) autocorrelation coefficients to fall within the confidence interval (usually marked by blue dashed lines), indicating no significant autocorrelation at different lags.
# The ACF plot reveals that most autocorrelation values lie within the confidence bounds; however, there are a few spikes that exceed the bounds, suggesting some degree of autocorrelation at specific lags.
## 3. Histogram: A histogram of the residuals with a superimposed normal distribution curve. For white noise, you'd expect the residuals to be approximately normally distributed, with the histogram resembling the shape of the bell curve.
#The histogram with the overlaid density plot suggests that the distribution of residuals is somewhat skewed and not perfectly normal.
## 4. Ljung-Box Test: The output will usually include a Ljung-Box test statistic and p-value. The Ljung-Box test is a type of statistical test of whether any of a group of autocorrelations of a time series are different from zero. A high p-value (typically > 0.05) suggests that the residuals do not significantly deviate from white noise.
# To conclude that the residuals resemble white noise, you would want to see:
# No clear patterns or trends in the time plot of residuals.
# Autocorrelation coefficients within the confidence bounds in the ACF plot.
# A histogram of residuals that approximates a normal distribution.
# A Ljung-Box test with a p-value greater than a chosen significance level, often 0.05, indicating no significant autocorrelation.
# If your residuals meet these criteria, you can be reasonably confident that they resemble white noise, suggesting your model has adequately captured the information in the data, and what's left (the residuals) is essentially random.
#Considering these observations, the residuals do not perfectly resemble white noise due to the presence of a few significant spikes in the ACF plot and the skew in the distribution. The Ljung-Box test would give a more definitive statistical answer, but based on this visual inspection, the answer would lean towards:
#ANS) Residuals are not white noise
#Q5) Use ets_model to construct a forecast for stock price over the next 71 months of the test sample. What is the point forecast of the stock price for November 2023?
ets_aaa_forecast = forecast(ets_aaa,h=71)
ets_aaa_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2018 52.17380 50.28809 54.05950 49.28986 55.05773
## Feb 2018 52.61698 49.91412 55.31984 48.48331 56.75065
## Mar 2018 52.89254 49.53659 56.24849 47.76006 58.02502
## Apr 2018 53.25833 49.32994 57.18672 47.25038 59.26628
## May 2018 54.04955 49.59752 58.50159 47.24076 60.85835
## Jun 2018 53.70498 48.76200 58.64796 46.14535 61.26461
## Jul 2018 55.18509 49.77444 60.59573 46.91022 63.45996
## Aug 2018 55.34279 49.48170 61.20388 46.37902 64.30656
## Sep 2018 56.11168 49.81322 62.41014 46.47902 65.74434
## Oct 2018 57.93731 51.21160 64.66301 47.65123 68.22339
## Nov 2018 58.06144 50.91640 65.20648 47.13404 68.98884
## Dec 2018 58.82797 51.26982 66.38613 47.26877 70.38717
## Jan 2019 58.68226 50.71582 66.64870 46.49864 70.86588
## Feb 2019 59.12544 50.75465 67.49624 46.32342 71.92746
## Mar 2019 59.40100 50.62887 68.17314 45.98518 72.81683
## Apr 2019 59.76679 50.59562 68.93797 45.74069 73.79290
## May 2019 60.55802 50.98953 70.12651 45.92428 75.19176
## Jun 2019 60.21345 50.24888 70.17801 44.97396 75.45294
## Jul 2019 61.69355 51.33374 72.05337 45.84958 77.53752
## Aug 2019 61.85126 51.09667 72.60584 45.40353 78.29898
## Sep 2019 62.62014 51.47096 73.76933 45.56894 79.67135
## Oct 2019 64.44577 52.90191 75.98964 46.79095 82.10059
## Nov 2019 64.56990 52.63105 76.50876 46.31100 82.82881
## Dec 2019 65.33644 53.00208 77.67079 46.47267 84.20020
## Jan 2020 65.19073 52.46014 77.92131 45.72097 84.66048
## Feb 2020 65.63391 52.50632 78.76150 45.55699 85.71082
## Mar 2020 65.90947 52.38392 79.43502 45.22392 86.59502
## Apr 2020 66.27526 52.35067 80.19985 44.97943 87.57109
## May 2020 67.06648 52.74166 81.39131 45.15856 88.97441
## Jun 2020 66.72191 51.99558 81.44824 44.19994 89.24388
## Jul 2020 68.20202 53.07282 83.33121 45.06391 91.34012
## Aug 2020 68.35972 52.82623 83.89321 44.60330 92.11614
## Sep 2020 69.12861 53.18932 85.06790 44.75157 93.50565
## Oct 2020 70.95424 54.60759 87.30088 45.95420 95.95428
## Nov 2020 71.07837 54.32275 87.83398 45.45287 96.70387
## Dec 2020 71.84490 54.67866 89.01114 45.59140 98.09840
## Jan 2021 71.69919 54.12056 89.27782 44.81500 98.58338
## Feb 2021 72.14237 54.14968 90.13507 44.62492 99.65983
## Mar 2021 72.41793 54.00940 90.82647 44.26451 100.57136
## Apr 2021 72.78372 53.95754 91.60991 43.99156 101.57589
## May 2021 73.57495 54.32929 92.82061 44.14125 103.00865
## Jun 2021 73.23038 53.56338 92.89737 43.15231 103.30845
## Jul 2021 74.71048 54.62028 94.80069 43.98516 105.43580
## Aug 2021 74.86819 54.35287 95.38350 43.49272 106.24365
## Sep 2021 75.63707 54.69473 96.57942 43.60852 107.66562
## Oct 2021 77.46270 56.09140 98.83400 44.77812 110.14729
## Nov 2021 77.58683 55.78463 99.38904 44.24324 110.93043
## Dec 2021 78.35337 56.11830 100.58843 44.34777 112.35896
## Jan 2022 78.20766 55.53771 100.87760 43.53697 112.87834
## Feb 2022 78.65084 55.54409 101.75759 43.31211 113.98957
## Mar 2022 78.92640 55.38085 102.47194 42.91659 114.93620
## Apr 2022 79.29219 55.30586 103.27852 42.60826 115.97612
## May 2022 80.08341 55.65429 104.51254 42.72230 117.44453
## Jun 2022 79.73884 54.86493 104.61276 41.69747 117.78021
## Jul 2022 81.21895 55.89823 106.53967 42.49425 119.94365
## Aug 2022 81.37665 55.60711 107.14619 41.96554 120.78776
## Sep 2022 82.14554 55.92517 108.36591 42.04494 122.24614
## Oct 2022 83.97117 57.29794 110.64439 43.17799 124.76435
## Nov 2022 84.09530 56.96720 111.22340 42.60645 125.58415
## Dec 2022 84.86183 57.27683 112.44683 42.67422 127.04944
## Jan 2023 84.71612 56.67216 112.76008 41.82658 127.60566
## Feb 2023 85.15930 56.65440 113.66421 41.56482 128.75379
## Mar 2023 85.43486 56.46700 114.40273 41.13233 129.73739
## Apr 2023 85.80066 56.36780 115.23351 40.78699 130.81432
## May 2023 86.59188 56.69202 116.49174 40.86399 132.31977
## Jun 2023 86.24731 55.87842 116.61619 39.80210 132.69251
## Jul 2023 87.72741 56.88748 118.56734 40.56181 134.89301
## Aug 2023 87.88512 56.57213 119.19810 39.99603 135.77420
## Sep 2023 88.65400 56.86594 120.44207 40.03836 137.26965
## Oct 2023 90.47963 58.21449 122.74478 41.13435 139.82492
## Nov 2023 90.60376 57.85952 123.34800 40.52577 140.68176
window(ets_aaa_forecast$mean,c(2023,11))
## Nov
## 2023 90.60376
#Q6) What is the RMSE of ets_model on the test sample?
accuracy(ets_aaa_forecast,x = google)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1095535 1.379364 1.082399 0.09097052 5.898302 0.2414082
## Test set 19.8756330 30.394350 21.311753 16.00116802 18.597786 4.7531756
## ACF1 Theil's U
## Training set 0.04419854 NA
## Test set 0.93901599 3.211076
## SECTION 4: ARIMA ##
#Q1) Now, let’s use an ARIMA model to forecast stock price. Since, there are a large number of parameters with which to define the ARIMA model, use the auto.arima() function to automatically determine the best parameters. Set it up to do an exhaustive search by setting stepwise to F and approximation to F. For instance, if your dataset is called train, run
# auto.arima(train, approximation = F, stepwise = F)
#Call this auto_arima_model. How many ordinary autoregressive lag variables have been used in auto_arima_model?
auto_arima_model = auto.arima(train, approximation = F, stepwise = F)
auto_arima_model
## Series: train
## ARIMA(0,2,1)(2,0,0)[12]
##
## Coefficients:
## ma1 sar1 sar2
## -0.9785 0.0045 0.2601
## s.e. 0.0204 0.0889 0.1010
##
## sigma^2 = 2.202: log likelihood = -236.49
## AIC=480.98 AICc=481.3 BIC=492.45
# ARIMA(p,d,q) model, where:
# p is the order (number of time lags) of the autoregressive model.
# d is the degree of differencing (the number of times the data have had past values subtracted).
# q is the order of the moving average model.
# The first set of parameters (0,2,1) refers to the non-seasonal part of the model:
# 0 indicates that there are zero ordinary autoregressive (AR) lag variables used in the non-seasonal component of the model.
# 2 indicates the degree of differencing in the non-seasonal component.
# 1 indicates that there is one moving average (MA) term in the non-seasonal component.
#The second set of parameters (2,0,0)[12] refers to the seasonal part of the model:
# 2 indicates that there are two seasonal AR terms.
# 0 indicates that there is no differencing in the seasonal component.
# 0 indicates that there are no seasonal MA terms.
# [12] indicates the seasonal period, which in this case is 12, suggesting a yearly seasonality in monthly data.
# Given this information, in your auto_arima_model, there are no ordinary autoregressive lag variables used in the non-seasonal part of the model (p=0).
# The model does include two seasonal autoregressive terms, but since your question specifically asks about "ordinary" autoregressive lag variables,
# the answer would be 0 for the non-seasonal component of the ARIMA model.
#Q2) What is the number of ordinary differences used in auto_arima_model?
#Ans: 2
#Q3) How many ordinary moving average lags have been used in auto_arima_model?
#Ans: 1
#Q4) How many seasonal autoregressive lag variables have been used in auto_arima_model?
#Ans: 2
#Q5) Do the residuals look like white noise? To answer this question, examine an Acf() or ggAcf() plot and the result of the Ljung-Box test.
checkresiduals(auto_arima_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,1)(2,0,0)[12]
## Q* = 30.77, df = 21, p-value = 0.07756
##
## Model df: 3. Total lags used: 24
#aNS: Resemble white noise
#Q6) Use auto_arima_model to construct a forecast for stock price over the next 71 months of the test sample. What is the point forecast of the stock price for November 2023?
auto_arima_forecast = forecast(auto_arima_model,h=71)
auto_arima_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2018 52.51485 50.61321 54.41650 49.60654 55.42316
## Feb 2018 52.32960 49.61110 55.04810 48.17201 56.48719
## Mar 2018 53.34111 49.97580 56.70643 48.19431 58.48792
## Apr 2018 53.07912 49.15166 57.00658 47.07259 59.08565
## May 2018 54.04425 49.60665 58.48185 47.25752 60.83097
## Jun 2018 53.86082 48.94850 58.77313 46.34808 61.37356
## Jul 2018 55.25959 49.89826 60.62093 47.06014 63.45904
## Aug 2018 55.63540 49.84447 61.42632 46.77894 64.49185
## Sep 2018 56.16946 49.96407 62.37485 46.67913 65.65979
## Oct 2018 56.67308 50.06525 63.28092 46.56727 66.77890
## Nov 2018 56.72594 49.72537 63.72652 46.01948 67.43241
## Dec 2018 57.30722 49.92183 64.69260 46.01225 68.60219
## Jan 2019 58.02931 50.26303 65.79560 46.15181 69.90682
## Feb 2019 58.76857 50.62695 66.91018 46.31704 71.22010
## Mar 2019 59.25214 50.73982 67.76446 46.23367 72.27061
## Apr 2019 60.64110 51.76193 69.52027 47.06159 74.22062
## May 2019 61.80794 52.56515 71.05074 47.67230 75.94358
## Jun 2019 61.47354 51.86979 71.07729 46.78587 76.16121
## Jul 2019 62.15940 52.19691 72.12190 46.92309 77.39572
## Aug 2019 62.67239 52.35297 72.99180 46.89021 78.45457
## Sep 2019 63.32850 52.65364 74.00335 47.00272 79.65428
## Oct 2019 64.47546 53.44635 75.50457 47.60789 81.34303
## Nov 2019 64.93419 53.55175 76.31664 47.52624 82.34214
## Dec 2019 65.65828 53.92319 77.39336 47.71101 83.60554
## Jan 2020 66.10865 53.89340 78.32390 47.42704 84.79026
## Feb 2020 66.46023 53.76902 79.15143 47.05070 85.86975
## Mar 2020 67.12197 53.95836 80.28557 46.98997 87.25397
## Apr 2020 67.45649 53.82346 81.08951 46.60658 88.30640
## May 2020 68.10922 54.00928 82.20916 46.54522 89.67322
## Jun 2020 68.45647 53.89168 83.02125 46.18155 90.73138
## Jul 2020 69.21985 54.19192 84.24778 46.23662 92.20308
## Aug 2020 69.71636 54.22667 85.20605 46.02692 93.40579
## Sep 2020 70.25467 54.30431 86.20504 45.86070 94.64865
## Oct 2020 70.78727 54.37707 87.19747 45.69003 95.88451
## Nov 2020 71.19953 54.33010 88.06896 45.39996 96.99910
## Dec 2020 71.75043 54.42217 89.07869 45.24915 98.25171
## Jan 2021 72.33674 54.54888 90.12460 45.13256 99.54092
## Feb 2021 72.92707 54.67968 91.17445 45.02010 100.83403
## Mar 2021 73.45227 54.74529 92.15926 44.84241 102.06213
## Apr 2021 74.21153 55.04474 93.37832 44.89846 103.52461
## May 2021 74.91443 55.28751 94.54136 44.89764 104.93123
## Jun 2021 75.22546 55.13795 95.31297 44.50427 105.94665
## Jul 2021 75.80374 55.25511 96.35237 44.37733 107.23016
## Aug 2021 76.33586 55.32547 97.34625 44.20325 108.46848
## Sep 2021 76.90540 55.43253 98.37826 44.06549 109.74531
## Oct 2021 77.60259 55.66645 99.53873 44.05417 111.15102
## Nov 2021 78.12022 55.71994 100.52050 43.86195 112.37849
## Dec 2021 78.70750 55.84215 101.57284 43.73796 113.67703
## Jan 2022 79.22373 55.86637 102.58109 43.50172 114.94574
## Feb 2022 79.71428 55.86406 103.56451 43.23851 116.19005
## Mar 2022 80.28523 55.94123 104.62922 43.05430 117.51615
## Apr 2022 80.77210 55.93335 105.61084 42.78451 118.75968
## May 2022 81.34149 56.00696 106.67603 42.59566 120.08732
## Jun 2022 81.82967 55.99825 107.66109 42.32392 121.33542
## Jul 2022 82.42729 56.09783 108.75674 42.15986 122.69471
## Aug 2022 82.95528 56.12660 109.78396 41.92436 123.98621
## Sep 2022 83.49432 56.16517 110.82346 41.69800 125.29063
## Oct 2022 84.03243 56.20155 111.86332 41.46877 126.59610
## Nov 2022 84.53845 56.20451 112.87239 41.20543 127.87147
## Dec 2022 85.08084 56.24249 113.91918 40.97639 129.18528
## Jan 2023 85.63212 56.28766 114.97657 40.75365 130.51058
## Feb 2023 86.18433 56.33236 116.03630 40.52968 131.83898
## Mar 2023 86.71996 56.35903 117.08089 40.28693 133.15299
## Apr 2023 87.31610 56.44475 118.18745 40.10244 134.52975
## May 2023 87.89794 56.51469 119.28120 39.90139 135.89449
## Jun 2023 88.37749 56.48082 120.27417 39.59574 137.15925
## Jul 2023 88.92705 56.51543 121.33867 39.35775 138.49635
## Aug 2023 89.46429 56.53616 122.39241 39.10507 139.82350
## Sep 2023 90.01131 56.56512 123.45749 38.85978 141.16283
## Oct 2023 90.59153 56.62570 124.55736 38.64527 142.53778
## Nov 2023 91.12490 56.63782 125.61197 38.38147 143.86833
#Q7) What is the RMSE of auto_arima_model on the test sample?
accuracy(auto_arima_forecast,x = google)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1542287 1.455439 1.105696 0.4908096 5.870911 0.2466041
## Test set 19.1694470 29.684817 20.651638 15.3077909 17.968530 4.6059499
## ACF1 Theil's U
## Training set -0.03059774 NA
## Test set 0.93723326 3.128917
#Q8) Let us see if we can improve our ARIMA model by a variance stabilizing transformation. BoxCox.lambda() is a handy function for identifying the optimal value of lambda to stabilize variance. What is the optimal value of lambda?
auto.arima(train, approximation = F, stepwise = F, lambda = BoxCox.lambda(train))
## Series: train
## ARIMA(0,1,0)(2,0,0)[12] with drift
## Box Cox transformation: lambda= 0.5605232
##
## Coefficients:
## sar1 sar2 drift
## -0.0146 0.2236 0.0649
## s.e. 0.0874 0.0999 0.0401
##
## sigma^2 = 0.1482: log likelihood = -59.95
## AIC=127.89 AICc=128.21 BIC=139.4
BoxCox.lambda(train)
## [1] 0.5605232
#Q9) Rather than using auto.arima(), let us specify an ARIMA model. Call this arima_model. What is the AICc for arima_model?
arima_model = Arima(train,
order = c(0,2,1),
seasonal = c(3,1,0),
lambda=BoxCox.lambda(train))
arima_model
## Series: train
## ARIMA(0,2,1)(3,1,0)[12]
## Box Cox transformation: lambda= 0.5605232
##
## Coefficients:
## ma1 sar1 sar2 sar3
## -0.9996 -0.8493 -0.5164 -0.3566
## s.e. 0.0299 0.0906 0.1207 0.1056
##
## sigma^2 = 0.1617: log likelihood = -69.96
## AIC=149.92 AICc=150.45 BIC=163.77
#Q10) Examine the results of Ljung-Box test (using the default of 24 lags) to see if the residuals resemble white noise.
checkresiduals(arima_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,1)(3,1,0)[12]
## Q* = 19.616, df = 20, p-value = 0.4822
##
## Model df: 4. Total lags used: 24
#ANS: Resemble white noise
#Q11) Use arima_model to construct a forecast for stock price over the next 71 months of the test sample. What is the point forecast of the stock price for November 2023?
arima_forecast = forecast(arima_model,h=71)
arima_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2018 53.25728 50.32182 56.26563 48.79755 57.88750
## Feb 2018 53.48578 49.32634 57.79247 47.18457 60.13143
## Mar 2018 53.43773 48.33989 58.75880 45.73255 61.66510
## Apr 2018 52.46195 46.61500 58.61034 43.64330 61.98574
## May 2018 54.76884 48.08957 61.82682 44.70911 65.71449
## Jun 2018 53.92719 46.64619 61.66816 42.98080 65.94962
## Jul 2018 56.92262 48.84232 65.54145 44.78626 70.31884
## Aug 2018 57.13551 48.46110 66.43129 44.12488 71.59981
## Sep 2018 57.78854 48.51611 67.76597 43.89804 73.32843
## Oct 2018 59.26022 49.34624 69.96282 44.42328 75.94224
## Nov 2018 58.90682 48.51014 70.18007 43.36832 76.49623
## Dec 2018 59.40921 48.47817 71.30474 43.09014 77.98497
## Jan 2019 60.36662 48.73873 73.07336 43.02954 80.22806
## Feb 2019 61.33771 49.02865 74.84158 43.00731 82.46388
## Mar 2019 61.92997 48.99059 76.18268 42.68522 84.24795
## Apr 2019 62.76079 49.18121 77.77336 42.58707 86.28782
## May 2019 64.82761 50.48906 80.71679 43.54237 89.74158
## Jun 2019 63.12834 48.43978 79.49826 41.36309 88.82846
## Jul 2019 67.79276 52.06873 85.31029 44.49044 95.29236
## Aug 2019 68.06933 51.79347 86.26638 43.97687 96.65807
## Sep 2019 68.69031 51.83208 87.59803 43.76130 98.41607
## Oct 2019 72.96110 55.09694 92.99127 46.54208 104.44952
## Nov 2019 73.76775 55.30009 94.53179 46.48074 106.42947
## Dec 2019 75.16240 56.02409 96.72681 46.90435 109.09893
## Jan 2020 75.85174 55.84692 98.49646 46.35895 111.52377
## Feb 2020 75.54751 54.82402 99.13219 45.04969 112.74299
## Mar 2020 77.14901 55.47310 101.90166 45.28585 116.21470
## Apr 2020 76.68165 54.34805 102.32066 43.91028 117.19140
## May 2020 79.88375 56.38756 106.89755 45.42386 122.57899
## Jun 2020 77.89681 53.99265 105.56213 42.91791 121.68208
## Jul 2020 82.28256 57.03909 111.49671 45.34331 128.51874
## Aug 2020 82.75429 56.75602 112.95822 44.76121 130.59505
## Sep 2020 83.86775 57.02380 115.15123 44.68122 133.45006
## Oct 2020 86.20802 58.32818 118.75577 45.53426 137.81264
## Nov 2020 85.87657 57.38846 119.28152 44.37982 138.88787
## Dec 2020 87.20304 57.84543 121.71790 44.47942 142.00475
## Jan 2021 88.81861 58.35155 124.75995 44.53410 145.92437
## Feb 2021 90.17213 58.64558 127.49542 44.40582 149.51573
## Mar 2021 91.12454 58.61491 129.76021 43.99669 152.60193
## Apr 2021 93.38666 59.66685 133.55450 44.54600 157.33169
## May 2021 96.95729 61.79039 138.88602 46.03704 163.71723
## Jun 2021 94.72778 59.17337 137.41218 43.37668 162.78308
## Jul 2021 98.75652 61.66282 143.29562 45.18523 169.77102
## Aug 2021 99.52545 61.50218 145.34293 44.68386 172.62869
## Sep 2021 100.89724 61.83361 148.10187 44.61464 176.25504
## Oct 2021 105.00065 64.36573 154.09952 46.45212 183.38104
## Nov 2021 105.63435 64.09969 155.99358 45.86679 186.08013
## Dec 2021 107.44404 64.78030 159.28461 46.10202 190.29064
## Jan 2022 108.96931 64.83446 162.83700 45.61949 195.12868
## Feb 2022 109.72027 64.28749 165.45675 44.63536 198.95515
## Mar 2022 110.93827 64.12486 168.62737 43.99223 203.37741
## Apr 2022 111.15163 63.18921 170.58069 42.70890 206.47589
## May 2022 114.89069 65.01196 176.78827 43.75599 214.20244
## Jun 2022 113.32516 62.72270 176.57423 41.36422 214.93974
## Jul 2022 118.47886 65.63146 184.51559 43.31708 224.56664
## Aug 2022 119.29743 65.19788 187.19399 42.48932 228.45974
## Sep 2022 120.75415 65.26135 190.64887 42.08201 233.20196
## Oct 2022 124.28281 66.90511 196.64229 42.97993 240.72234
## Nov 2022 124.59793 66.10475 198.70651 41.87137 243.95136
## Dec 2022 126.29596 66.35856 202.46750 41.63402 249.03904
## Jan 2023 128.14692 66.53238 206.74414 41.25152 254.88318
## Feb 2023 129.57744 66.39740 210.50299 40.62718 260.16265
## Mar 2023 131.12581 66.35714 214.40841 40.08809 265.60590
## Apr 2023 132.72249 66.35819 218.37580 39.58997 271.12068
## May 2023 136.75641 68.15518 225.38456 40.52547 279.98598
## Jun 2023 134.75858 65.53000 224.87079 37.96154 280.57519
## Jul 2023 140.72976 68.72941 234.32711 39.99983 292.15182
## Aug 2023 141.76805 68.32924 237.61808 39.20458 296.94086
## Sep 2023 143.44556 68.39777 241.71397 38.78420 302.62151
## Oct 2023 148.56019 70.94570 250.14240 40.29723 313.09089
## Nov 2023 149.56638 70.52404 253.41277 39.49739 317.87316
#Q12) What is the RMSE of arima_model on the test sample?
accuracy(arima_forecast,x = google)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01875138 1.414883 1.038791 0.01475292 5.477284 0.2316822
## Test set -1.16572279 18.435041 13.723167 -3.91532499 13.862165 3.0606878
## ACF1 Theil's U
## Training set 0.009541535 NA
## Test set 0.920142748 2.235569