set.seed(100)
ar_3_sim1 = arima.sim(list(order =c(3,0,0), ar=c(0.7, 0.2,-0.5)), n = 200)
ar_3_sim2 = arima.sim(list(order =c(3,0,0), ar=c(-0.8, 0.1, 0.3)), n = 200)
par(mfrow=c(1,2))
plot.ts(ar_3_sim1)
plot.ts(ar_3_sim2)
acf(ar_3_sim1, lwd = 3)
acf(ar_3_sim2, lwd = 3)
pacf(ar_3_sim1, lwd = 3)
pacf(ar_3_sim2, lwd = 3)
### From both simulated models plot, we observe that ACF data goes to zero. For the PACF we observe a cut off around lag value of 3.
ma_3_sim1 = arima.sim(list(order =c(0,0,3), ma=c(0.9, 0.6,-0.1)), n = 200)
ma_3_sim2 = arima.sim(list(order =c(0,0,3), ma=c(0.1, 0.4, -0.4)), n = 200)
par(mfrow=c(1,2))
plot.ts(ma_3_sim1)
plot.ts(ma_3_sim2)
acf(ma_3_sim1, lwd = 3)
acf(ma_3_sim2, lwd = 3)
pacf(ma_3_sim1, lwd = 3)
pacf(ma_3_sim2, lwd = 3)
### From both models plot, we observe that ACF data goes to zero. For the PACF, we observe a cut off around lag value of 7.
ar_2_ma_2_sim1 = arima.sim(list(order =c(2,0,2), ar=c(0.7, 0.2), ma=c(-0.3, 0.5)), n = 200)
ar_2_ma_2_sim2 = arima.sim(list(order =c(2,0,2), ar=c(-0.3, 0.6), ma=c(-0.5, 0.1)), n = 200)
par(mfrow=c(1,2))
plot.ts(ar_2_ma_2_sim1)
plot.ts(ar_2_ma_2_sim2)
acf(ar_2_ma_2_sim1, lwd = 3)
acf(ar_2_ma_2_sim2, lwd = 3)
pacf(ar_2_ma_2_sim1, lwd = 3)
pacf(ar_2_ma_2_sim2, lwd = 3)
### From both models plot, we observe that ACF data goes to zero. For both PACF plot, we observe they have clear cut off and for first model cutoff is at lag value of 2 and that of second model cutoff is at lag value of 1.
ar_3_ma_3.sim1 = arima.sim(list(order =c(3,0,3), ar=c(0.3, -0.2, 0.4), ma=c(0.2, -0.7, 0.8)), n = 200)
ar_3_ma_3.sim2 = arima.sim(list(order =c(3,0,3), ar=c(0.4, -0.5, -0.1), ma=c(0.6, -0.2, -0.7)), n = 200)
par(mfrow=c(1,2))
plot.ts(ar_3_ma_3.sim1)
plot.ts(ar_3_ma_3.sim2)
acf(ar_3_ma_3.sim1, lwd = 3)
acf(ar_3_ma_3.sim2, lwd = 3)
pacf(ar_3_ma_3.sim1, lwd = 3)
pacf(ar_3_ma_3.sim2, lwd = 3)
### From both models plot, we objectively observe that ACF data goes to zero. For both PACF plot, we observe for first model cutoff is at lag value of 5 and that of second model cutoff is at lag value of 8.
library(astsa)
plot.ts(cmort)
acf(cmort, lwd = 3)
pacf(cmort, lwd = 3)
### It looks like there is no pattern as data is stationary with constant mean and constant variance. Also as observed in ACF plot it readily goes to zero which means data is stationary ## b) Use the data “gtemp” in astsa package – annual global mean land-ocean temperature deviations
plot.ts(gtemp)
acf(gtemp, lwd = 3)
pacf(gtemp, lwd = 3)
### In this case data looks like non stationary as trend is moving up and there is a non constant mean as well. ACF is relatively stable with not approaching clear zero.
diff.gtemp = diff(gtemp, lag=1)
plot.ts(diff.gtemp)
acf(diff.gtemp, lwd = 3)
pacf(diff.gtemp, lwd = 3)
### It looks like there is no pattern as data is stationary with constant mean and constant variance. Also as observed in ACF plot it readily goes to zero which means data is stationary
load("/Users/shamstabrez/Documents/Data_Driven_Decision/electricity.RData")
plot.ts(electricity)
acf(electricity, lwd = 3)
pacf(electricity, lwd = 3)
### From the time series plot we observe data as non stationary as there is an up trend which means non constant mean. We also observe non constant variance or heteroscedasticity. ## e) Based on the data “electricity” in electricity.RData – monthly US electricity generated, use the log transformed series, log(𝑌𝑡)
log_electricity = log(electricity)
plot.ts(log_electricity)
acf(log_electricity, lwd = 3)
pacf(log_electricity, lwd = 3)
### From the log transformed electricity time series plot we observe data as non stationary as there is an up trend which means non constant mean. We also observe constant variance or homoscedasticity. ## f) Based on the data “electricity” in electricity.RData – monthly US electricity generated, use 1 order differenced log transformed series, log(𝑌𝑡)′=log(𝑌𝑡)−log(𝑌𝑡−1)
diff_log_electricity = diff(log(electricity), lag=1)
plot.ts(diff_log_electricity)
acf(diff_log_electricity, lwd = 3)
pacf(diff_log_electricity, lwd = 3)
### From the first order difference log transformed electricity time series plot we observe data as stationary as there is no trend which means constant mean and constant variance or homoscedasticity. ## Exercise 3: Model Fit ## Use the data “cmort” in astsa package examined in Exercise 2
plot(cmort)
acf(cmort, lwd = 3)
pacf(cmort, lwd = 3)
### Both ACF and PACF plots have ACF dropping to zero and PACF has cutoff with lag value of 2 which shows an AR(2) model may be a fit for this data. ## b) Fit the model(s) which you chose in part (a) using sarima function in R and interpret the result.
fit.cmort = sarima(cmort, p=2, d=0, q=0)
## initial value 2.300097
## iter 2 value 2.148325
## iter 3 value 1.756362
## iter 4 value 1.751728
## iter 5 value 1.737837
## iter 6 value 1.737835
## iter 7 value 1.737834
## iter 8 value 1.737832
## iter 9 value 1.737831
## iter 10 value 1.737830
## iter 11 value 1.737829
## iter 12 value 1.737824
## iter 13 value 1.737818
## iter 14 value 1.737810
## iter 15 value 1.737805
## iter 16 value 1.737804
## iter 17 value 1.737804
## iter 18 value 1.737804
## iter 18 value 1.737804
## iter 18 value 1.737804
## final value 1.737804
## converged
## initial value 1.740035
## iter 2 value 1.740030
## iter 3 value 1.740016
## iter 4 value 1.740012
## iter 5 value 1.740006
## iter 6 value 1.740000
## iter 7 value 1.739986
## iter 8 value 1.739968
## iter 9 value 1.739959
## iter 10 value 1.739956
## iter 11 value 1.739954
## iter 12 value 1.739953
## iter 13 value 1.739952
## iter 14 value 1.739952
## iter 15 value 1.739952
## iter 16 value 1.739951
## iter 17 value 1.739951
## iter 18 value 1.739951
## iter 19 value 1.739950
## iter 20 value 1.739950
## iter 21 value 1.739949
## iter 22 value 1.739949
## iter 23 value 1.739949
## iter 24 value 1.739948
## iter 24 value 1.739948
## iter 24 value 1.739948
## final value 1.739948
## converged
### We can see that the p values of both AR coefficients are significant and AIC value is 6.333521 ## c) Check model diagnostics and explain if your chosen model seems appropriate. ### Model diagnostics plot above we can see that all residuals appear as white noise as within the boundaries. In Ljung-Box statistic, we observe all lags p-values is above 0.05. Hence we can say that model is appropriate. ## d) Make a prediction for next 4 values based on your final model.
sarima.for(cmort, 4, p=2, d=0, q=0)
## $pred
## Time Series:
## Start = c(1979, 41)
## End = c(1979, 44)
## Frequency = 52
## [1] 87.66207 86.85311 87.46615 87.37190
##
## $se
## Time Series:
## Start = c(1979, 41)
## End = c(1979, 44)
## Frequency = 52
## [1] 5.689543 6.193387 7.148343 7.612531
diff_gtemp = diff(gtemp, lag=1)
plot.ts(diff_gtemp)
acf(diff_gtemp, lwd = 3)
pacf(diff_gtemp, lwd = 3)
### In ACF and PACF plots, we observe ACF plot cutoff is at value 2 which shows an ma(2) model may be a fit for this data, And on PACF plot cutoff is at value 5 which shows ar(5) model may be a for for this data. We will fit data to both the models. ## b) Fit the model(s) which you chose in part (a) using sarima function and interpret the result. If you have multiple models, report all of them.
fit_diff_gtemp_ma2 = sarima(diff.gtemp, p=0, d=0, q=2)
## initial value -2.213853
## iter 2 value -2.315754
## iter 3 value -2.346899
## iter 4 value -2.347576
## iter 5 value -2.348789
## iter 6 value -2.348964
## iter 7 value -2.349074
## iter 8 value -2.349076
## iter 8 value -2.349076
## final value -2.349076
## converged
## initial value -2.346874
## iter 2 value -2.346888
## iter 3 value -2.346891
## iter 4 value -2.346892
## iter 5 value -2.346893
## iter 5 value -2.346893
## iter 5 value -2.346893
## final value -2.346893
## converged
fit_diff_gtemp_ma2
## $fit
##
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
## Q), period = S), xreg = xmean, include.mean = FALSE, transform.pars = trans,
## fixed = fixed, optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ma1 ma2 xmean
## -0.5134 -0.1915 0.0065
## s.e. 0.0830 0.0780 0.0025
##
## sigma^2 estimated as 0.00911: log likelihood = 119.71, aic = -231.41
##
## $degrees_of_freedom
## [1] 126
##
## $ttable
## Estimate SE t.value p.value
## ma1 -0.5134 0.0830 -6.1847 0.0000
## ma2 -0.1915 0.0780 -2.4552 0.0154
## xmean 0.0065 0.0025 2.5425 0.0122
##
## $AIC
## [1] -1.793892
##
## $AICc
## [1] -1.792404
##
## $BIC
## [1] -1.705216
fit_diff_gtemp_ar5 = sarima(diff.gtemp, p=5, d=0, q=0)
## initial value -2.197587
## iter 2 value -2.302454
## iter 3 value -2.338428
## iter 4 value -2.355919
## iter 5 value -2.356806
## iter 6 value -2.357706
## iter 7 value -2.358177
## iter 8 value -2.358216
## iter 9 value -2.358216
## iter 9 value -2.358216
## iter 9 value -2.358216
## final value -2.358216
## converged
## initial value -2.369568
## iter 2 value -2.369639
## iter 3 value -2.369649
## iter 4 value -2.369685
## iter 5 value -2.369688
## iter 6 value -2.369688
## iter 6 value -2.369688
## iter 6 value -2.369688
## final value -2.369688
## converged
fit_diff_gtemp_ar5
## $fit
##
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
## Q), period = S), xreg = xmean, include.mean = FALSE, transform.pars = trans,
## fixed = fixed, optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 xmean
## -0.4592 -0.4483 -0.3914 -0.0844 -0.1955 0.0064
## s.e. 0.0862 0.0952 0.0971 0.0949 0.0862 0.0032
##
## sigma^2 estimated as 0.008693: log likelihood = 122.65, aic = -231.29
##
## $degrees_of_freedom
## [1] 123
##
## $ttable
## Estimate SE t.value p.value
## ar1 -0.4592 0.0862 -5.3243 0.0000
## ar2 -0.4483 0.0952 -4.7114 0.0000
## ar3 -0.3914 0.0971 -4.0290 0.0001
## ar4 -0.0844 0.0949 -0.8898 0.3753
## ar5 -0.1955 0.0862 -2.2664 0.0252
## xmean 0.0064 0.0032 1.9751 0.0505
##
## $AIC
## [1] -1.792972
##
## $AICc
## [1] -1.787634
##
## $BIC
## [1] -1.637788
sarima.for(diff_gtemp, 4, p=0, d=0, q=2)
## $pred
## Time Series:
## Start = 2010
## End = 2013
## Frequency = 1
## [1] -0.007650622 -0.007983511 0.006464234 0.006464234
##
## $se
## Time Series:
## Start = 2010
## End = 2013
## Frequency = 1
## [1] 0.09544714 0.10729062 0.10883624 0.10883624