library(astsa)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following object is masked from 'package:astsa':
##
## gas
##### PROBLEM 3
# Load GNP data
data(gnp)
gnp_diff <- diff(log(gnp)) # Differenced log GNP data
# Fit AR(1) model
ar1_model <- Arima(gnp_diff, order = c(1, 0, 0))
checkresiduals(ar1_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 9.8979, df = 7, p-value = 0.1944
##
## Model df: 1. Total lags used: 8
# The residuals of the AR(1) model should ideally show no autocorrelation if the model fits well. We assess this by plotting the ACF of the residuals. If significant autocorrelations remain, it suggests the AR(1) model may be too simplistic
# Fit ARMA(1,2) model
arma12_model <- Arima(gnp_diff, order = c(1, 0, 2))
checkresiduals(arma12_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,2) with non-zero mean
## Q* = 6.0802, df = 5, p-value = 0.2985
##
## Model df: 3. Total lags used: 8
# The ARMA(1,2) model includes additional MA terms, potentially improving fit. We again check residual autocorrelation with the ACF plot. The residuals of this model should exhibit a better “white noise” structure compared to the AR(1) model.
# Compare model diagnostics
summary(ar1_model)
## Series: gnp_diff
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.3467 0.0083
## s.e. 0.0627 0.0010
##
## sigma^2 = 9.112e-05: log likelihood = 718.61
## AIC=-1431.22 AICc=-1431.11 BIC=-1421.01
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 5.572162e-06 0.009502405 0.00713417 -Inf Inf 0.6207883
## ACF1
## Training set -0.02706632
summary(arma12_model)
## Series: gnp_diff
## ARIMA(1,0,2) with non-zero mean
##
## Coefficients:
## ar1 ma1 ma2 mean
## 0.2407 0.0761 0.1623 0.0083
## s.e. 0.2066 0.2026 0.0851 0.0010
##
## sigma^2 = 9.04e-05: log likelihood = 720.47
## AIC=-1430.95 AICc=-1430.67 BIC=-1413.93
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.005792e-05 0.00942203 0.007112098 -Inf Inf 0.6188677 0.00495519
# By comparing AIC/BIC values and residual diagnostics, we conclude which model better captures the patterns in the differenced log GNP data. Lower AIC/BIC values and reduced autocorrelation in residuals typically indicate a better fit. We write a short statement on which model appears preferable based on these diagnostics.
# ACF plots to compare residual autocorrelations
acf(residuals(ar1_model), main = "Residuals ACF - AR(1)")

acf(residuals(arma12_model), main = "Residuals ACF - ARMA(1,2)")

# Conclusion:
# Write a comparison based on the residual diagnostics output.
##### PROBLEM 4
# Load unemployment data from astsa
data(unemp)
# Differencing and plotting to inspect seasonality
plot(unemp, main = "Unemployment Data")

acf(unemp, main = "ACF of Unemployment Data")

# Fit seasonal ARIMA model based on observed seasonality
unemp_model <- auto.arima(unemp, seasonal = TRUE)
summary(unemp_model)
## Series: unemp
## ARIMA(5,0,0)(2,1,1)[12] with drift
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 sar1 sar2 sma1
## 1.1093 0.0739 -0.2046 0.0959 -0.1147 -0.1796 -0.1278 -0.5318
## s.e. 0.0530 0.0806 0.0782 0.0794 0.0529 0.1155 0.0889 0.1076
## drift
## 1.1571
## s.e. 0.8226
##
## sigma^2 = 437.9: log likelihood = -1606.32
## AIC=3232.64 AICc=3233.27 BIC=3271.5
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01297075 20.32682 15.55884 -0.2326888 4.421711 0.2140045
## ACF1
## Training set -0.004070416
# Forecasting the next 12 months
unemp_forecast <- forecast(unemp_model, h = 12)
plot(unemp_forecast, main = "12-Month Forecast for Unemployment")

# Display forecast details
unemp_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1979 685.7619 658.9440 712.5797 644.7476 726.7762
## Feb 1979 694.0970 654.0442 734.1498 632.8415 755.3525
## Mar 1979 664.8749 611.6960 718.0538 583.5448 746.2050
## Apr 1979 601.6628 537.7136 665.6119 503.8610 699.4645
## May 1979 572.1428 497.5148 646.7708 458.0091 686.2765
## Jun 1979 690.1026 606.3946 773.8105 562.0823 818.1228
## Jul 1979 678.4570 586.7011 770.2129 538.1284 818.7855
## Aug 1979 647.2766 548.7077 745.8454 496.5286 798.0246
## Sep 1979 633.0690 528.6110 737.5270 473.3142 792.8237
## Oct 1979 611.4053 501.9925 720.8181 444.0729 778.7377
## Nov 1979 633.5862 519.9879 747.1846 459.8525 807.3199
## Dec 1979 630.0159 512.9300 747.1018 450.9484 809.0833
acf2(diff(diff(unemp),12), 50)

## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.21 0.33 0.15 0.17 0.10 0.06 -0.06 -0.02 -0.09 -0.17 -0.08 -0.48 -0.18
## PACF 0.21 0.29 0.05 0.05 0.01 -0.02 -0.12 -0.03 -0.05 -0.15 0.02 -0.43 -0.02
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## ACF -0.16 -0.11 -0.15 -0.09 -0.09 0.03 -0.01 0.02 -0.02 0.01 -0.02 0.09
## PACF 0.15 0.03 -0.04 -0.01 0.00 0.01 0.01 -0.01 -0.16 0.01 -0.27 0.05
## [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
## ACF -0.05 -0.01 0.03 0.08 0.01 0.03 -0.05 0.01 0.02 -0.06 -0.02 -0.12
## PACF -0.01 -0.05 0.05 0.09 -0.04 0.02 -0.07 -0.01 -0.08 -0.08 -0.23 -0.08
## [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49]
## ACF 0.01 -0.03 -0.03 -0.10 -0.02 -0.13 0.00 -0.06 0.01 0.02 0.11 0.13
## PACF 0.06 -0.07 -0.01 0.03 -0.03 -0.11 -0.04 0.01 0.00 -0.03 -0.04 0.02
## [,50]
## ACF 0.10
## PACF 0.03
sarima(unemp, 2, 1, 0, 0, 1, 1, 12)
## initial value 3.340809
## iter 2 value 3.105512
## iter 3 value 3.086631
## iter 4 value 3.079778
## iter 5 value 3.069447
## iter 6 value 3.067659
## iter 7 value 3.067426
## iter 8 value 3.067418
## iter 8 value 3.067418
## final value 3.067418
## converged
## initial value 3.065481
## iter 2 value 3.065478
## iter 3 value 3.065477
## iter 3 value 3.065477
## iter 3 value 3.065477
## final value 3.065477
## converged
## <><><><><><><><><><><><><><>
##
## Coefficients:
## Estimate SE t.value p.value
## ar1 0.1351 0.0513 2.6326 0.0088
## ar2 0.2464 0.0515 4.7795 0.0000
## sma1 -0.6953 0.0381 -18.2362 0.0000
##
## sigma^2 estimated as 449.637 on 356 degrees of freedom
##
## AIC = 8.991114 AICc = 8.991303 BIC = 9.034383
##

sarima.for(unemp, 12, 2, 1, 0, 0, 1, 1, 12)

## $pred
## Jan Feb Mar Apr May Jun Jul Aug
## 1979 676.4664 685.1172 653.2388 585.6939 553.8813 664.4072 647.0657 611.0828
## Sep Oct Nov Dec
## 1979 594.6414 569.3997 587.5801 581.1833
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 1979 21.20465 32.07710 43.70167 53.66329 62.85364 71.12881 78.73590
## Aug Sep Oct Nov Dec
## 1979 85.75096 92.28663 98.41329 104.19488 109.67935
#Model Fit:
# After fitting the model, we check diagnostic plots to confirm that the residuals resemble white noise, suggesting a good fit.
#Forecasting:
# We generate a 12-month forecast. If the forecasted values maintain seasonality, it confirms the model has effectively captured seasonal patterns in the data. The forecast plot shows predicted values with confidence intervals, which help assess forecast reliability.
#Interpretation:
# Based on the forecast, we interpret expected trends in unemployment over the next year, discussing any observed seasonal effects or overall trends in the forecasted values.
##### PROBLEM 5
# Load Johnson & Johnson earnings data
data(jj)
# Log-transform the data
log_jj <- log(jj)
plot(log_jj, main = "Log of Johnson & Johnson Earnings Data")

# Fit seasonal ARIMA model to log-transformed data
jj_model <- auto.arima(log_jj, seasonal = TRUE)
summary(jj_model)
## Series: log_jj
## ARIMA(2,0,0)(1,1,0)[4] with drift
##
## Coefficients:
## ar1 ar2 sar1 drift
## 0.2686 0.2855 -0.2695 0.0382
## s.e. 0.1137 0.1214 0.1212 0.0042
##
## sigma^2 = 0.007793: log likelihood = 82.47
## AIC=-154.95 AICc=-154.14 BIC=-143.04
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.002030151 0.08396647 0.06504478 NaN Inf 0.4058742 -0.03591245
# Forecast the next 4 quarters
jj_forecast <- forecast(jj_model, h = 4)
plot(jj_forecast, main = "4-Quarter Forecast for Johnson & Johnson Earnings (log-transformed)")

# Display forecasted values
jj_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1981 Q1 2.918596 2.805466 3.031725 2.745579 3.091612
## 1981 Q2 2.836761 2.719621 2.953900 2.657611 3.015910
## 1981 Q3 2.938476 2.814546 3.062405 2.748942 3.128010
## 1981 Q4 2.600120 2.474659 2.725581 2.408244 2.791996
plot(diff(diff(log(jj)),4))

acf2(diff(diff(log(jj)),4))

## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF -0.44 0.15 -0.09 -0.21 0.11 -0.13 0.27 -0.07 -0.07 0.12 -0.21 0.21
## PACF -0.44 -0.05 -0.05 -0.33 -0.16 -0.19 0.13 0.08 -0.14 0.04 -0.03 0.14
## [,13] [,14] [,15] [,16] [,17] [,18] [,19]
## ACF -0.18 0.19 -0.09 -0.04 0.08 -0.14 0.14
## PACF -0.06 0.07 -0.01 -0.01 -0.01 -0.06 0.00
sarima(log(jj),1,1,0,1,1,0,4)
## initial value -2.232392
## iter 2 value -2.403794
## iter 3 value -2.409520
## iter 4 value -2.410263
## iter 5 value -2.410266
## iter 6 value -2.410266
## iter 6 value -2.410266
## final value -2.410266
## converged
## initial value -2.381009
## iter 2 value -2.381164
## iter 3 value -2.381165
## iter 3 value -2.381165
## iter 3 value -2.381165
## final value -2.381165
## converged
## <><><><><><><><><><><><><><>
##
## Coefficients:
## Estimate SE t.value p.value
## ar1 -0.5152 0.1009 -5.1064 0.000
## sar1 -0.3294 0.1109 -2.9697 0.004
##
## sigma^2 estimated as 0.008467914 on 77 degrees of freedom
##
## AIC = -1.848505 AICc = -1.846506 BIC = -1.758525
##

sarima.for(log(jj),4,1,1,0,1,1,0,4)

## $pred
## Qtr1 Qtr2 Qtr3 Qtr4
## 1981 2.902126 2.821452 2.919034 2.575784
##
## $se
## Qtr1 Qtr2 Qtr3 Qtr4
## 1981 0.09202127 0.10226343 0.12338542 0.13568573
#For the Johnson & Johnson earnings data, we log-transform the series to stabilize variance. Since the data are quarterly, we apply a seasonal ARIMA model using auto.arima().
#
#Model Selection:
# auto.arima() identifies the best seasonal model, which we check to ensure it aligns with the observed patterns in the data. The model output provides parameters for both seasonal and non-seasonal components.
#Forecasting:
# We forecast the next four quarters and plot the forecast along with the original data. The forecast plot includes confidence intervals, showing the likely range of future earnings.
#Interpretation:
# The seasonal ARIMA model captures both seasonal and trend components. Based on the 4-quarter forecast, we discuss expected quarterly earnings trends, potential seasonal effects, and the stability of the model as indicated by forecast intervals. This interpretation helps us understand short-term expectations for Johnson & Johnson's earnings trajectory.