library(ggplot2);library(ggthemes);library(gridExtra)  # For plots 
## Warning: package 'ggthemes' was built under R version 4.3.3
## Warning: package 'gridExtra' was built under R version 4.3.3
library(quantmod);library(xts);library(zoo) # For using xts class objects
## Warning: package 'quantmod' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.3
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast) # Set of forecasting functions
## Warning: package 'forecast' was built under R version 4.3.3
library(fpp); library(fpp2) # Datasets from Forecasting text by Rob Hyndman
## Warning: package 'fpp' was built under R version 4.3.3
## Loading required package: fma
## Warning: package 'fma' was built under R version 4.3.3
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 4.3.3
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 4.3.3
## Warning: package 'fpp2' was built under R version 4.3.3
## 
## Attaching package: 'fpp2'
## The following objects are masked from 'package:fpp':
## 
##     ausair, ausbeer, austa, austourists, debitcards, departures,
##     elecequip, euretail, guinearice, oil, sunspotarea, usmelec
library(tseries) # for a statistical test
library(dplyr) # Data wrangling
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## SECTION 1: TIME SERIES ANALYSIS ## 

#Q1) What type of data structure is goog? 

goog = readRDS('goog23.rds')
class(goog)
## [1] "xts" "zoo"
#Q2) What was Alphabet's stock price for June, 2007? 

goog['2007-06',]
##            price
## Jun 2007 13.0187
#Q3) Using the monthly stock price for Alphabet, what is the average stock price for the year 2007?

mean(goog['2007',])
## [1] 13.66773
#Q4) How many months of data are included in this dataset? 

nrow(goog)
## [1] 203
#option 2 

nmonths(goog)
## [1] 203
length(index(goog))
## [1] 203
#Q5) With time-series data, the past is often a good predictor of the future. Let us see if this is true for our data. What is the correlation between Alphabet's stock price and one-month lagged stock price? You can use lag() to obtain a one-month lag for Alphabet's stock price. When computing correlation with cor(), be sure to set use='complete.obs'.

goog_lag = lag(goog)
cor(goog,goog_lag, use = 'complete.obs')
##           price
## price 0.9930809
# Q6) In order to have access to a wider array of forecasting models, we will convert the data to a "ts" data type. Also, we will split the data into a train and test sample, using the train sample to estimate a model and the test sample to evaluate it. We will use data from Jan, 2007 to Dec, 2017 for the train sample and the rest for the test sample. The code below will convert goog to a “ts” object and split the data.

#How many months of data does train contain? 

google = ts(goog,start=c(2007,01),frequency=12)
train = window(google,start=c(2007,01),end=c(2017,12))
test = window(google,start=c(2018,01),end=c(2023,11))

length(train)
## [1] 132
#Q7) Autocorrelation examines correlation of a variable and its lagged values. Construct a plot of autocorrelations for train using ggAcf() from the forecast package. Which lag has the strongest autocorrelation?

install.packages("forecast")
## Warning: package 'forecast' is in use and will not be installed
library(forecast)

ggAcf(train)

#graph explanation: 

# Significance Bands: The blue dashed lines are typically set at ±1.96/√n (where n is the sample size) and represent the significance bounds for the ACF. If a bar extends beyond this boundary, the correlation for that lag is statistically significant.
# Strongest Autocorrelation: The lag with the strongest autocorrelation is identified by the tallest bar extending furthest from the zero line, within the significance bounds. In your plot, the first lag (lag 1) has the tallest bar and does not cross the significance bounds, indicating it has the strongest positive autocorrelation.
## SECTION 2: Simple Forecasting Methods ##

#Q1) A very simple prediction, often the baseline in linear regression, is to use the average. Use the average to make a prediction for the stock price over the 71 months of the test sample. Let's call this average_model. What is the point forecast of the stock price for November 2023?

average_model = meanf(train,h = 71)
average_model
##          Point Forecast   Lo 80    Hi 80      Lo 95    Hi 95
## Jan 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2018        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2019        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2020        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2021        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Dec 2022        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jan 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Feb 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Mar 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Apr 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## May 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jun 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Jul 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Aug 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Sep 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Oct 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
## Nov 2023        22.1933 7.17525 37.21135 -0.8720666 45.25867
window(average_model$mean,c(2023,11))
##          Nov
## 2023 22.1933
#Q2) Let us examine the accuracy of the above prediction from average_model on the train sample. Specifically, what is the RMSE of the prediction in the train sample? Hint: Use accuracy() from library(forecast)

accuracy(average_model)
##                        ME     RMSE      MAE      MPE     MAPE     MASE
## Training set 5.344788e-16 11.57155 9.905325 -27.8625 53.83724 2.209192
##                   ACF1
## Training set 0.9629783
#Q3) What is the RMSE of the average_model on the test sample?

accuracy(average_model,x = google)
##                        ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 5.344788e-16 11.57155  9.905325 -27.86250 53.83724  2.209192
## Test set     6.868414e+01 75.51904 68.684137  72.43341 72.43341 15.318673
##                   ACF1 Theil's U
## Training set 0.9629783        NA
## Test set     0.9511974  9.709656
#Q4) Next, let us examine another simple prediction, one that assumes the future will be the same as the last observation. Let’s call this naive_model. Use naive_model to construct a forecast for stock price over the next 71 months of the test sample. What is the point forecast of the stock price for November 2023?

naive_model = naive(train,h=71)
naive_model
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2018          52.32 50.35829 54.28171 49.31982 55.32018
## Feb 2018          52.32 49.54572 55.09428 48.07711 56.56289
## Mar 2018          52.32 48.92222 55.71778 47.12354 57.51646
## Apr 2018          52.32 48.39658 56.24342 46.31965 58.32035
## May 2018          52.32 47.93348 56.70651 45.61140 59.02860
## Jun 2018          52.32 47.51481 57.12519 44.97110 59.66890
## Jul 2018          52.32 47.12980 57.51019 44.38228 60.25772
## Aug 2018          52.32 46.77145 57.86855 43.83422 60.80578
## Sep 2018          52.32 46.43487 58.20513 43.31947 61.32053
## Oct 2018          52.32 46.11653 58.52347 42.83261 61.80739
## Nov 2018          52.32 45.81375 58.82625 42.36954 62.27046
## Dec 2018          52.32 45.52444 59.11556 41.92709 62.71291
## Jan 2019          52.32 45.24696 59.39304 41.50271 63.13729
## Feb 2019          52.32 44.97996 59.66004 41.09437 63.54563
## Mar 2019          52.32 44.72233 59.91767 40.70037 63.93963
## Apr 2019          52.32 44.47316 60.16684 40.31930 64.32070
## May 2019          52.32 44.23167 60.40833 39.94996 64.69004
## Jun 2019          52.32 43.99717 60.64283 39.59133 65.04867
## Jul 2019          52.32 43.76911 60.87089 39.24254 65.39746
## Aug 2019          52.32 43.54697 61.09303 38.90281 65.73719
## Sep 2019          52.32 43.33032 61.30968 38.57147 66.06853
## Oct 2019          52.32 43.11877 61.52123 38.24793 66.39207
## Nov 2019          52.32 42.91197 61.72803 37.93166 66.70834
## Dec 2019          52.32 42.70963 61.93037 37.62220 67.01780
## Jan 2020          52.32 42.51145 62.12855 37.31912 67.32088
## Feb 2020          52.32 42.31721 62.32279 37.02205 67.61795
## Mar 2020          52.32 42.12666 62.51334 36.73063 67.90937
## Apr 2020          52.32 41.93961 62.70039 36.44456 68.19544
## May 2020          52.32 41.75587 62.88413 36.16356 68.47644
## Jun 2020          52.32 41.57528 63.06472 35.88736 68.75264
## Jul 2020          52.32 41.39766 63.24233 35.61573 69.02427
## Aug 2020          52.32 41.22290 63.41710 35.34844 69.29155
## Sep 2020          52.32 41.05084 63.58916 35.08530 69.55470
## Oct 2020          52.32 40.88137 63.75863 34.82612 69.81388
## Nov 2020          52.32 40.71437 63.92563 34.57072 70.06928
## Dec 2020          52.32 40.54974 64.09026 34.31895 70.32105
## Jan 2021          52.32 40.38739 64.25261 34.07065 70.56935
## Feb 2021          52.32 40.22721 64.41279 33.82568 70.81432
## Mar 2021          52.32 40.06913 64.57087 33.58391 71.05609
## Apr 2021          52.32 39.91306 64.72694 33.34522 71.29477
## May 2021          52.32 39.75893 64.88107 33.10950 71.53050
## Jun 2021          52.32 39.60667 65.03333 32.87664 71.76336
## Jul 2021          52.32 39.45621 65.18379 32.64653 71.99347
## Aug 2021          52.32 39.30749 65.33251 32.41909 72.22091
## Sep 2021          52.32 39.16045 65.47955 32.19421 72.44579
## Oct 2021          52.32 39.01504 65.62496 31.97182 72.66818
## Nov 2021          52.32 38.87120 65.76880 31.75183 72.88817
## Dec 2021          52.32 38.72888 65.91112 31.53418 73.10582
## Jan 2022          52.32 38.58804 66.05196 31.31877 73.32123
## Feb 2022          52.32 38.44862 66.19138 31.10556 73.53444
## Mar 2022          52.32 38.31059 66.32941 30.89446 73.74554
## Apr 2022          52.32 38.17391 66.46609 30.68543 73.95457
## May 2022          52.32 38.03854 66.60146 30.47839 74.16161
## Jun 2022          52.32 37.90444 66.73556 30.27330 74.36670
## Jul 2022          52.32 37.77157 66.86842 30.07010 74.56990
## Aug 2022          52.32 37.63991 67.00009 29.86874 74.77126
## Sep 2022          52.32 37.50942 67.13058 29.66917 74.97083
## Oct 2022          52.32 37.38007 67.25993 29.47134 75.16865
## Nov 2022          52.32 37.25183 67.38817 29.27522 75.36478
## Dec 2022          52.32 37.12467 67.51533 29.08074 75.55926
## Jan 2023          52.32 36.99856 67.64144 28.88788 75.75212
## Feb 2023          52.32 36.87349 67.76651 28.69660 75.94340
## Mar 2023          52.32 36.74942 67.89058 28.50685 76.13315
## Apr 2023          52.32 36.62633 68.01367 28.31860 76.32140
## May 2023          52.32 36.50419 68.13581 28.13181 76.50819
## Jun 2023          52.32 36.38300 68.25700 27.94646 76.69354
## Jul 2023          52.32 36.26272 68.37728 27.76251 76.87749
## Aug 2023          52.32 36.14333 68.49667 27.57992 77.06008
## Sep 2023          52.32 36.02482 68.61518 27.39867 77.24133
## Oct 2023          52.32 35.90716 68.73284 27.21873 77.42127
## Nov 2023          52.32 35.79034 68.84966 27.04007 77.59993
window(naive_model$mean,c(2023,11))
##        Nov
## 2023 52.32
#Q5) What is the RMSE of the naive_model on the test sample?

accuracy(naive_model,x = google)
##                      ME     RMSE       MAE        MPE      MAPE      MASE
## Training set  0.3040406  1.53073  1.155649  0.7780316  6.007544 0.2577452
## Test set     38.5574377 49.72214 38.634156 35.0126394 35.162368 8.6166038
##                     ACF1 Theil's U
## Training set -0.02859181        NA
## Test set      0.95119742  5.554911
## SECTION 3: Exponential Smoothing ##

#Q1) There are a number of exponential smoothing models that differ in how they handle errors, trend, and seasonality. Let us fit an exponential smoothing model using the following function:

#Q2) The trend for ets_model is

#Q3)  What is AICc for ets_model?

ets_aaa = ets(train,model = 'AAA')
ets_aaa
## ETS(A,A,A) 
## 
## Call:
##  ets(y = train, model = "AAA") 
## 
##   Smoothing parameters:
##     alpha = 0.9989 
##     beta  = 0.028 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 14.0029 
##     b = 0.1373 
##     s = 0.8312 0.6072 1.0255 -0.2578 -0.4842 -0.0997
##            -1.0373 -0.1506 -0.3994 -0.2227 0.0444 0.1433
## 
##   sigma:  1.4714
## 
##      AIC     AICc      BIC 
## 763.4382 768.8066 812.4458
#Q4) Do the residuals look like white noise? To answer this question, examine an Acf() or ggAcf() plot and the result of the Ljung-Box test.

checkresiduals(ets_aaa)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,A,A)
## Q* = 23.157, df = 24, p-value = 0.5105
## 
## Model df: 0.   Total lags used: 24
## 1. Time Plot: A plot of residuals over time. You want to see no obvious patterns or trends in this plot. Residuals should look random and centered around zero.

## 2. ACF Plot: An Autocorrelation Function (ACF) plot of the residuals. For residuals to resemble white noise, you would expect most (if not all) autocorrelation coefficients to fall within the confidence interval (usually marked by blue dashed lines), indicating no significant autocorrelation at different lags.

# The ACF plot reveals that most autocorrelation values lie within the confidence bounds; however, there are a few spikes that exceed the bounds, suggesting some degree of autocorrelation at specific lags.

## 3. Histogram: A histogram of the residuals with a superimposed normal distribution curve. For white noise, you'd expect the residuals to be approximately normally distributed, with the histogram resembling the shape of the bell curve.

#The histogram with the overlaid density plot suggests that the distribution of residuals is somewhat skewed and not perfectly normal.

## 4. Ljung-Box Test: The output will usually include a Ljung-Box test statistic and p-value. The Ljung-Box test is a type of statistical test of whether any of a group of autocorrelations of a time series are different from zero. A high p-value (typically > 0.05) suggests that the residuals do not significantly deviate from white noise.

# To conclude that the residuals resemble white noise, you would want to see:

# No clear patterns or trends in the time plot of residuals.
# Autocorrelation coefficients within the confidence bounds in the ACF plot.
# A histogram of residuals that approximates a normal distribution.
# A Ljung-Box test with a p-value greater than a chosen significance level, often 0.05, indicating no significant autocorrelation.
# If your residuals meet these criteria, you can be reasonably confident that they resemble white noise, suggesting your model has adequately captured the information in the data, and what's left (the residuals) is essentially random.

#Considering these observations, the residuals do not perfectly resemble white noise due to the presence of a few significant spikes in the ACF plot and the skew in the distribution. The Ljung-Box test would give a more definitive statistical answer, but based on this visual inspection, the answer would lean towards:

#ANS) Residuals are not white noise
#Q5) Use ets_model to construct a forecast for stock price over the next 71 months of the test sample. What is the point forecast of the stock price for November 2023? 

ets_aaa_forecast = forecast(ets_aaa,h=71)
ets_aaa_forecast
##          Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## Jan 2018       52.17380 50.28809  54.05950 49.28986  55.05773
## Feb 2018       52.61698 49.91412  55.31984 48.48331  56.75065
## Mar 2018       52.89254 49.53659  56.24849 47.76006  58.02502
## Apr 2018       53.25833 49.32994  57.18672 47.25038  59.26628
## May 2018       54.04955 49.59752  58.50159 47.24076  60.85835
## Jun 2018       53.70498 48.76200  58.64796 46.14535  61.26461
## Jul 2018       55.18509 49.77444  60.59573 46.91022  63.45996
## Aug 2018       55.34279 49.48170  61.20388 46.37902  64.30656
## Sep 2018       56.11168 49.81322  62.41014 46.47902  65.74434
## Oct 2018       57.93731 51.21160  64.66301 47.65123  68.22339
## Nov 2018       58.06144 50.91640  65.20648 47.13404  68.98884
## Dec 2018       58.82797 51.26982  66.38613 47.26877  70.38717
## Jan 2019       58.68226 50.71582  66.64870 46.49864  70.86588
## Feb 2019       59.12544 50.75465  67.49624 46.32342  71.92746
## Mar 2019       59.40100 50.62887  68.17314 45.98518  72.81683
## Apr 2019       59.76679 50.59562  68.93797 45.74069  73.79290
## May 2019       60.55802 50.98953  70.12651 45.92428  75.19176
## Jun 2019       60.21345 50.24888  70.17801 44.97396  75.45294
## Jul 2019       61.69355 51.33374  72.05337 45.84958  77.53752
## Aug 2019       61.85126 51.09667  72.60584 45.40353  78.29898
## Sep 2019       62.62014 51.47096  73.76933 45.56894  79.67135
## Oct 2019       64.44577 52.90191  75.98964 46.79095  82.10059
## Nov 2019       64.56990 52.63105  76.50876 46.31100  82.82881
## Dec 2019       65.33644 53.00208  77.67079 46.47267  84.20020
## Jan 2020       65.19073 52.46014  77.92131 45.72097  84.66048
## Feb 2020       65.63391 52.50632  78.76150 45.55699  85.71082
## Mar 2020       65.90947 52.38392  79.43502 45.22392  86.59502
## Apr 2020       66.27526 52.35067  80.19985 44.97943  87.57109
## May 2020       67.06648 52.74166  81.39131 45.15856  88.97441
## Jun 2020       66.72191 51.99558  81.44824 44.19994  89.24388
## Jul 2020       68.20202 53.07282  83.33121 45.06391  91.34012
## Aug 2020       68.35972 52.82623  83.89321 44.60330  92.11614
## Sep 2020       69.12861 53.18932  85.06790 44.75157  93.50565
## Oct 2020       70.95424 54.60759  87.30088 45.95420  95.95428
## Nov 2020       71.07837 54.32275  87.83398 45.45287  96.70387
## Dec 2020       71.84490 54.67866  89.01114 45.59140  98.09840
## Jan 2021       71.69919 54.12056  89.27782 44.81500  98.58338
## Feb 2021       72.14237 54.14968  90.13507 44.62492  99.65983
## Mar 2021       72.41793 54.00940  90.82647 44.26451 100.57136
## Apr 2021       72.78372 53.95754  91.60991 43.99156 101.57589
## May 2021       73.57495 54.32929  92.82061 44.14125 103.00865
## Jun 2021       73.23038 53.56338  92.89737 43.15231 103.30845
## Jul 2021       74.71048 54.62028  94.80069 43.98516 105.43580
## Aug 2021       74.86819 54.35287  95.38350 43.49272 106.24365
## Sep 2021       75.63707 54.69473  96.57942 43.60852 107.66562
## Oct 2021       77.46270 56.09140  98.83400 44.77812 110.14729
## Nov 2021       77.58683 55.78463  99.38904 44.24324 110.93043
## Dec 2021       78.35337 56.11830 100.58843 44.34777 112.35896
## Jan 2022       78.20766 55.53771 100.87760 43.53697 112.87834
## Feb 2022       78.65084 55.54409 101.75759 43.31211 113.98957
## Mar 2022       78.92640 55.38085 102.47194 42.91659 114.93620
## Apr 2022       79.29219 55.30586 103.27852 42.60826 115.97612
## May 2022       80.08341 55.65429 104.51254 42.72230 117.44453
## Jun 2022       79.73884 54.86493 104.61276 41.69747 117.78021
## Jul 2022       81.21895 55.89823 106.53967 42.49425 119.94365
## Aug 2022       81.37665 55.60711 107.14619 41.96554 120.78776
## Sep 2022       82.14554 55.92517 108.36591 42.04494 122.24614
## Oct 2022       83.97117 57.29794 110.64439 43.17799 124.76435
## Nov 2022       84.09530 56.96720 111.22340 42.60645 125.58415
## Dec 2022       84.86183 57.27683 112.44683 42.67422 127.04944
## Jan 2023       84.71612 56.67216 112.76008 41.82658 127.60566
## Feb 2023       85.15930 56.65440 113.66421 41.56482 128.75379
## Mar 2023       85.43486 56.46700 114.40273 41.13233 129.73739
## Apr 2023       85.80066 56.36780 115.23351 40.78699 130.81432
## May 2023       86.59188 56.69202 116.49174 40.86399 132.31977
## Jun 2023       86.24731 55.87842 116.61619 39.80210 132.69251
## Jul 2023       87.72741 56.88748 118.56734 40.56181 134.89301
## Aug 2023       87.88512 56.57213 119.19810 39.99603 135.77420
## Sep 2023       88.65400 56.86594 120.44207 40.03836 137.26965
## Oct 2023       90.47963 58.21449 122.74478 41.13435 139.82492
## Nov 2023       90.60376 57.85952 123.34800 40.52577 140.68176
window(ets_aaa_forecast$mean,c(2023,11))
##           Nov
## 2023 90.60376
#Q6) What is the RMSE of ets_model on the test sample?

accuracy(ets_aaa_forecast,x = google)
##                      ME      RMSE       MAE         MPE      MAPE      MASE
## Training set  0.1095535  1.379364  1.082399  0.09097052  5.898302 0.2414082
## Test set     19.8756330 30.394350 21.311753 16.00116802 18.597786 4.7531756
##                    ACF1 Theil's U
## Training set 0.04419854        NA
## Test set     0.93901599  3.211076
## SECTION 4: ARIMA ##

#Q1) Now, let’s use an ARIMA model to forecast stock price. Since, there are a large number of parameters with which to define the ARIMA model, use the auto.arima() function to automatically determine the best parameters. Set it up to do an exhaustive search by setting stepwise to F and approximation to F. For instance, if your dataset is called train, run

# auto.arima(train, approximation = F, stepwise = F)

#Call this auto_arima_model. How many ordinary autoregressive lag variables have been used in auto_arima_model?

auto_arima_model = auto.arima(train, approximation = F, stepwise = F)
auto_arima_model
## Series: train 
## ARIMA(0,2,1)(2,0,0)[12] 
## 
## Coefficients:
##           ma1    sar1    sar2
##       -0.9785  0.0045  0.2601
## s.e.   0.0204  0.0889  0.1010
## 
## sigma^2 = 2.202:  log likelihood = -236.49
## AIC=480.98   AICc=481.3   BIC=492.45
# ARIMA(p,d,q) model, where:
  
# p is the order (number of time lags) of the autoregressive model.
# d is the degree of differencing (the number of times the data have had past values subtracted).
# q is the order of the moving average model.

# The first set of parameters (0,2,1) refers to the non-seasonal part of the model:
  
# 0 indicates that there are zero ordinary autoregressive (AR) lag variables used in the non-seasonal component of the model.
# 2 indicates the degree of differencing in the non-seasonal component.
# 1 indicates that there is one moving average (MA) term in the non-seasonal component.

#The second set of parameters (2,0,0)[12] refers to the seasonal part of the model:
  
# 2 indicates that there are two seasonal AR terms.
# 0 indicates that there is no differencing in the seasonal component.
# 0 indicates that there are no seasonal MA terms.
# [12] indicates the seasonal period, which in this case is 12, suggesting a yearly seasonality in monthly data.

# Given this information, in your auto_arima_model, there are no ordinary autoregressive lag variables used in the non-seasonal part of the model (p=0). 
# The model does include two seasonal autoregressive terms, but since your question specifically asks about "ordinary" autoregressive lag variables, 
# the answer would be 0 for the non-seasonal component of the ARIMA model.
#Q2) What is the number of ordinary differences used in auto_arima_model? 

#Ans: 2

#Q3) How many ordinary moving average lags have been used in auto_arima_model?
#Ans: 1 

#Q4) How many seasonal autoregressive lag variables have been used in auto_arima_model?
#Ans: 2
#Q5) Do the residuals look like white noise? To answer this question, examine an Acf() or ggAcf() plot and the result of the Ljung-Box test.

checkresiduals(auto_arima_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,2,1)(2,0,0)[12]
## Q* = 30.77, df = 21, p-value = 0.07756
## 
## Model df: 3.   Total lags used: 24
#aNS: Resemble white noise 
#Q6) Use auto_arima_model to construct a forecast for stock price over the next 71 months of the test sample. What is the point forecast of the stock price for November 2023?

auto_arima_forecast = forecast(auto_arima_model,h=71)
auto_arima_forecast
##          Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## Jan 2018       52.51485 50.61321  54.41650 49.60654  55.42316
## Feb 2018       52.32960 49.61110  55.04810 48.17201  56.48719
## Mar 2018       53.34111 49.97580  56.70643 48.19431  58.48792
## Apr 2018       53.07912 49.15166  57.00658 47.07259  59.08565
## May 2018       54.04425 49.60665  58.48185 47.25752  60.83097
## Jun 2018       53.86082 48.94850  58.77313 46.34808  61.37356
## Jul 2018       55.25959 49.89826  60.62093 47.06014  63.45904
## Aug 2018       55.63540 49.84447  61.42632 46.77894  64.49185
## Sep 2018       56.16946 49.96407  62.37485 46.67913  65.65979
## Oct 2018       56.67308 50.06525  63.28092 46.56727  66.77890
## Nov 2018       56.72594 49.72537  63.72652 46.01948  67.43241
## Dec 2018       57.30722 49.92183  64.69260 46.01225  68.60219
## Jan 2019       58.02931 50.26303  65.79560 46.15181  69.90682
## Feb 2019       58.76857 50.62695  66.91018 46.31704  71.22010
## Mar 2019       59.25214 50.73982  67.76446 46.23367  72.27061
## Apr 2019       60.64110 51.76193  69.52027 47.06159  74.22062
## May 2019       61.80794 52.56515  71.05074 47.67230  75.94358
## Jun 2019       61.47354 51.86979  71.07729 46.78587  76.16121
## Jul 2019       62.15940 52.19691  72.12190 46.92309  77.39572
## Aug 2019       62.67239 52.35297  72.99180 46.89021  78.45457
## Sep 2019       63.32850 52.65364  74.00335 47.00272  79.65428
## Oct 2019       64.47546 53.44635  75.50457 47.60789  81.34303
## Nov 2019       64.93419 53.55175  76.31664 47.52624  82.34214
## Dec 2019       65.65828 53.92319  77.39336 47.71101  83.60554
## Jan 2020       66.10865 53.89340  78.32390 47.42704  84.79026
## Feb 2020       66.46023 53.76902  79.15143 47.05070  85.86975
## Mar 2020       67.12197 53.95836  80.28557 46.98997  87.25397
## Apr 2020       67.45649 53.82346  81.08951 46.60658  88.30640
## May 2020       68.10922 54.00928  82.20916 46.54522  89.67322
## Jun 2020       68.45647 53.89168  83.02125 46.18155  90.73138
## Jul 2020       69.21985 54.19192  84.24778 46.23662  92.20308
## Aug 2020       69.71636 54.22667  85.20605 46.02692  93.40579
## Sep 2020       70.25467 54.30431  86.20504 45.86070  94.64865
## Oct 2020       70.78727 54.37707  87.19747 45.69003  95.88451
## Nov 2020       71.19953 54.33010  88.06896 45.39996  96.99910
## Dec 2020       71.75043 54.42217  89.07869 45.24915  98.25171
## Jan 2021       72.33674 54.54888  90.12460 45.13256  99.54092
## Feb 2021       72.92707 54.67968  91.17445 45.02010 100.83403
## Mar 2021       73.45227 54.74529  92.15926 44.84241 102.06213
## Apr 2021       74.21153 55.04474  93.37832 44.89846 103.52461
## May 2021       74.91443 55.28751  94.54136 44.89764 104.93123
## Jun 2021       75.22546 55.13795  95.31297 44.50427 105.94665
## Jul 2021       75.80374 55.25511  96.35237 44.37733 107.23016
## Aug 2021       76.33586 55.32547  97.34625 44.20325 108.46848
## Sep 2021       76.90540 55.43253  98.37826 44.06549 109.74531
## Oct 2021       77.60259 55.66645  99.53873 44.05417 111.15102
## Nov 2021       78.12022 55.71994 100.52050 43.86195 112.37849
## Dec 2021       78.70750 55.84215 101.57284 43.73796 113.67703
## Jan 2022       79.22373 55.86637 102.58109 43.50172 114.94574
## Feb 2022       79.71428 55.86406 103.56451 43.23851 116.19005
## Mar 2022       80.28523 55.94123 104.62922 43.05430 117.51615
## Apr 2022       80.77210 55.93335 105.61084 42.78451 118.75968
## May 2022       81.34149 56.00696 106.67603 42.59566 120.08732
## Jun 2022       81.82967 55.99825 107.66109 42.32392 121.33542
## Jul 2022       82.42729 56.09783 108.75674 42.15986 122.69471
## Aug 2022       82.95528 56.12660 109.78396 41.92436 123.98621
## Sep 2022       83.49432 56.16517 110.82346 41.69800 125.29063
## Oct 2022       84.03243 56.20155 111.86332 41.46877 126.59610
## Nov 2022       84.53845 56.20451 112.87239 41.20543 127.87147
## Dec 2022       85.08084 56.24249 113.91918 40.97639 129.18528
## Jan 2023       85.63212 56.28766 114.97657 40.75365 130.51058
## Feb 2023       86.18433 56.33236 116.03630 40.52968 131.83898
## Mar 2023       86.71996 56.35903 117.08089 40.28693 133.15299
## Apr 2023       87.31610 56.44475 118.18745 40.10244 134.52975
## May 2023       87.89794 56.51469 119.28120 39.90139 135.89449
## Jun 2023       88.37749 56.48082 120.27417 39.59574 137.15925
## Jul 2023       88.92705 56.51543 121.33867 39.35775 138.49635
## Aug 2023       89.46429 56.53616 122.39241 39.10507 139.82350
## Sep 2023       90.01131 56.56512 123.45749 38.85978 141.16283
## Oct 2023       90.59153 56.62570 124.55736 38.64527 142.53778
## Nov 2023       91.12490 56.63782 125.61197 38.38147 143.86833
#Q7) What is the RMSE of auto_arima_model on the test sample?

accuracy(auto_arima_forecast,x = google)
##                      ME      RMSE       MAE        MPE      MAPE      MASE
## Training set  0.1542287  1.455439  1.105696  0.4908096  5.870911 0.2466041
## Test set     19.1694470 29.684817 20.651638 15.3077909 17.968530 4.6059499
##                     ACF1 Theil's U
## Training set -0.03059774        NA
## Test set      0.93723326  3.128917
#Q8) Let us see if we can improve our ARIMA model by a variance stabilizing transformation. BoxCox.lambda() is a handy function for identifying the optimal value of lambda to stabilize variance. What is the optimal value of lambda?

auto.arima(train, approximation = F, stepwise = F, lambda = BoxCox.lambda(train))
## Series: train 
## ARIMA(0,1,0)(2,0,0)[12] with drift 
## Box Cox transformation: lambda= 0.5605232 
## 
## Coefficients:
##          sar1    sar2   drift
##       -0.0146  0.2236  0.0649
## s.e.   0.0874  0.0999  0.0401
## 
## sigma^2 = 0.1482:  log likelihood = -59.95
## AIC=127.89   AICc=128.21   BIC=139.4
BoxCox.lambda(train)
## [1] 0.5605232
#Q9) Rather than using auto.arima(), let us specify an ARIMA model. Call this arima_model. What is the AICc for arima_model?

arima_model = Arima(train,
      order = c(0,2,1),
      seasonal = c(3,1,0),
      lambda=BoxCox.lambda(train))

arima_model
## Series: train 
## ARIMA(0,2,1)(3,1,0)[12] 
## Box Cox transformation: lambda= 0.5605232 
## 
## Coefficients:
##           ma1     sar1     sar2     sar3
##       -0.9996  -0.8493  -0.5164  -0.3566
## s.e.   0.0299   0.0906   0.1207   0.1056
## 
## sigma^2 = 0.1617:  log likelihood = -69.96
## AIC=149.92   AICc=150.45   BIC=163.77
#Q10) Examine the results of Ljung-Box test (using the default of 24 lags) to see if the residuals resemble white noise.

checkresiduals(arima_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,2,1)(3,1,0)[12]
## Q* = 19.616, df = 20, p-value = 0.4822
## 
## Model df: 4.   Total lags used: 24
#ANS: Resemble white noise 
#Q11) Use arima_model to construct a forecast for stock price over the next 71 months of the test sample. What is the point forecast of the stock price for November 2023?

arima_forecast = forecast(arima_model,h=71)
arima_forecast
##          Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## Jan 2018       53.25728 50.32182  56.26563 48.79755  57.88750
## Feb 2018       53.48578 49.32634  57.79247 47.18457  60.13143
## Mar 2018       53.43773 48.33989  58.75880 45.73255  61.66510
## Apr 2018       52.46195 46.61500  58.61034 43.64330  61.98574
## May 2018       54.76884 48.08957  61.82682 44.70911  65.71449
## Jun 2018       53.92719 46.64619  61.66816 42.98080  65.94962
## Jul 2018       56.92262 48.84232  65.54145 44.78626  70.31884
## Aug 2018       57.13551 48.46110  66.43129 44.12488  71.59981
## Sep 2018       57.78854 48.51611  67.76597 43.89804  73.32843
## Oct 2018       59.26022 49.34624  69.96282 44.42328  75.94224
## Nov 2018       58.90682 48.51014  70.18007 43.36832  76.49623
## Dec 2018       59.40921 48.47817  71.30474 43.09014  77.98497
## Jan 2019       60.36662 48.73873  73.07336 43.02954  80.22806
## Feb 2019       61.33771 49.02865  74.84158 43.00731  82.46388
## Mar 2019       61.92997 48.99059  76.18268 42.68522  84.24795
## Apr 2019       62.76079 49.18121  77.77336 42.58707  86.28782
## May 2019       64.82761 50.48906  80.71679 43.54237  89.74158
## Jun 2019       63.12834 48.43978  79.49826 41.36309  88.82846
## Jul 2019       67.79276 52.06873  85.31029 44.49044  95.29236
## Aug 2019       68.06933 51.79347  86.26638 43.97687  96.65807
## Sep 2019       68.69031 51.83208  87.59803 43.76130  98.41607
## Oct 2019       72.96110 55.09694  92.99127 46.54208 104.44952
## Nov 2019       73.76775 55.30009  94.53179 46.48074 106.42947
## Dec 2019       75.16240 56.02409  96.72681 46.90435 109.09893
## Jan 2020       75.85174 55.84692  98.49646 46.35895 111.52377
## Feb 2020       75.54751 54.82402  99.13219 45.04969 112.74299
## Mar 2020       77.14901 55.47310 101.90166 45.28585 116.21470
## Apr 2020       76.68165 54.34805 102.32066 43.91028 117.19140
## May 2020       79.88375 56.38756 106.89755 45.42386 122.57899
## Jun 2020       77.89681 53.99265 105.56213 42.91791 121.68208
## Jul 2020       82.28256 57.03909 111.49671 45.34331 128.51874
## Aug 2020       82.75429 56.75602 112.95822 44.76121 130.59505
## Sep 2020       83.86775 57.02380 115.15123 44.68122 133.45006
## Oct 2020       86.20802 58.32818 118.75577 45.53426 137.81264
## Nov 2020       85.87657 57.38846 119.28152 44.37982 138.88787
## Dec 2020       87.20304 57.84543 121.71790 44.47942 142.00475
## Jan 2021       88.81861 58.35155 124.75995 44.53410 145.92437
## Feb 2021       90.17213 58.64558 127.49542 44.40582 149.51573
## Mar 2021       91.12454 58.61491 129.76021 43.99669 152.60193
## Apr 2021       93.38666 59.66685 133.55450 44.54600 157.33169
## May 2021       96.95729 61.79039 138.88602 46.03704 163.71723
## Jun 2021       94.72778 59.17337 137.41218 43.37668 162.78308
## Jul 2021       98.75652 61.66282 143.29562 45.18523 169.77102
## Aug 2021       99.52545 61.50218 145.34293 44.68386 172.62869
## Sep 2021      100.89724 61.83361 148.10187 44.61464 176.25504
## Oct 2021      105.00065 64.36573 154.09952 46.45212 183.38104
## Nov 2021      105.63435 64.09969 155.99358 45.86679 186.08013
## Dec 2021      107.44404 64.78030 159.28461 46.10202 190.29064
## Jan 2022      108.96931 64.83446 162.83700 45.61949 195.12868
## Feb 2022      109.72027 64.28749 165.45675 44.63536 198.95515
## Mar 2022      110.93827 64.12486 168.62737 43.99223 203.37741
## Apr 2022      111.15163 63.18921 170.58069 42.70890 206.47589
## May 2022      114.89069 65.01196 176.78827 43.75599 214.20244
## Jun 2022      113.32516 62.72270 176.57423 41.36422 214.93974
## Jul 2022      118.47886 65.63146 184.51559 43.31708 224.56664
## Aug 2022      119.29743 65.19788 187.19399 42.48932 228.45974
## Sep 2022      120.75415 65.26135 190.64887 42.08201 233.20196
## Oct 2022      124.28281 66.90511 196.64229 42.97993 240.72234
## Nov 2022      124.59793 66.10475 198.70651 41.87137 243.95136
## Dec 2022      126.29596 66.35856 202.46750 41.63402 249.03904
## Jan 2023      128.14692 66.53238 206.74414 41.25152 254.88318
## Feb 2023      129.57744 66.39740 210.50299 40.62718 260.16265
## Mar 2023      131.12581 66.35714 214.40841 40.08809 265.60590
## Apr 2023      132.72249 66.35819 218.37580 39.58997 271.12068
## May 2023      136.75641 68.15518 225.38456 40.52547 279.98598
## Jun 2023      134.75858 65.53000 224.87079 37.96154 280.57519
## Jul 2023      140.72976 68.72941 234.32711 39.99983 292.15182
## Aug 2023      141.76805 68.32924 237.61808 39.20458 296.94086
## Sep 2023      143.44556 68.39777 241.71397 38.78420 302.62151
## Oct 2023      148.56019 70.94570 250.14240 40.29723 313.09089
## Nov 2023      149.56638 70.52404 253.41277 39.49739 317.87316
#Q12) What is the RMSE of arima_model on the test sample?

accuracy(arima_forecast,x = google)
##                       ME      RMSE       MAE         MPE      MAPE      MASE
## Training set -0.01875138  1.414883  1.038791  0.01475292  5.477284 0.2316822
## Test set     -1.16572279 18.435041 13.723167 -3.91532499 13.862165 3.0606878
##                     ACF1 Theil's U
## Training set 0.009541535        NA
## Test set     0.920142748  2.235569