library(astsa)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:astsa':
## 
##     gas
##### PROBLEM 3
# Load GNP data
data(gnp)
gnp_diff <- diff(log(gnp))  # Differenced log GNP data

# Fit AR(1) model
ar1_model <- Arima(gnp_diff, order = c(1, 0, 0))
checkresiduals(ar1_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 9.8979, df = 7, p-value = 0.1944
## 
## Model df: 1.   Total lags used: 8
# The residuals of the AR(1) model should ideally show no autocorrelation if the model fits well. We assess this by plotting the ACF of the residuals. If significant autocorrelations remain, it suggests the AR(1) model may be too simplistic

# Fit ARMA(1,2) model
arma12_model <- Arima(gnp_diff, order = c(1, 0, 2))
checkresiduals(arma12_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,2) with non-zero mean
## Q* = 6.0802, df = 5, p-value = 0.2985
## 
## Model df: 3.   Total lags used: 8
# The ARMA(1,2) model includes additional MA terms, potentially improving fit. We again check residual autocorrelation with the ACF plot. The residuals of this model should exhibit a better “white noise” structure compared to the AR(1) model.

# Compare model diagnostics
summary(ar1_model)
## Series: gnp_diff 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1    mean
##       0.3467  0.0083
## s.e.  0.0627  0.0010
## 
## sigma^2 = 9.112e-05:  log likelihood = 718.61
## AIC=-1431.22   AICc=-1431.11   BIC=-1421.01
## 
## Training set error measures:
##                        ME        RMSE        MAE  MPE MAPE      MASE
## Training set 5.572162e-06 0.009502405 0.00713417 -Inf  Inf 0.6207883
##                     ACF1
## Training set -0.02706632
summary(arma12_model)
## Series: gnp_diff 
## ARIMA(1,0,2) with non-zero mean 
## 
## Coefficients:
##          ar1     ma1     ma2    mean
##       0.2407  0.0761  0.1623  0.0083
## s.e.  0.2066  0.2026  0.0851  0.0010
## 
## sigma^2 = 9.04e-05:  log likelihood = 720.47
## AIC=-1430.95   AICc=-1430.67   BIC=-1413.93
## 
## Training set error measures:
##                        ME       RMSE         MAE  MPE MAPE      MASE       ACF1
## Training set 1.005792e-05 0.00942203 0.007112098 -Inf  Inf 0.6188677 0.00495519
# By comparing AIC/BIC values and residual diagnostics, we conclude which model better captures the patterns in the differenced log GNP data. Lower AIC/BIC values and reduced autocorrelation in residuals typically indicate a better fit. We write a short statement on which model appears preferable based on these diagnostics.

# ACF plots to compare residual autocorrelations
acf(residuals(ar1_model), main = "Residuals ACF - AR(1)")

acf(residuals(arma12_model), main = "Residuals ACF - ARMA(1,2)")

# Conclusion:
# Write a comparison based on the residual diagnostics output.


##### PROBLEM 4
# Load unemployment data from astsa
data(unemp)

# Differencing and plotting to inspect seasonality
plot(unemp, main = "Unemployment Data")

acf(unemp, main = "ACF of Unemployment Data")

# Fit seasonal ARIMA model based on observed seasonality
unemp_model <- auto.arima(unemp, seasonal = TRUE)
summary(unemp_model)
## Series: unemp 
## ARIMA(5,0,0)(2,1,1)[12] with drift 
## 
## Coefficients:
##          ar1     ar2      ar3     ar4      ar5     sar1     sar2     sma1
##       1.1093  0.0739  -0.2046  0.0959  -0.1147  -0.1796  -0.1278  -0.5318
## s.e.  0.0530  0.0806   0.0782  0.0794   0.0529   0.1155   0.0889   0.1076
##        drift
##       1.1571
## s.e.  0.8226
## 
## sigma^2 = 437.9:  log likelihood = -1606.32
## AIC=3232.64   AICc=3233.27   BIC=3271.5
## 
## Training set error measures:
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.01297075 20.32682 15.55884 -0.2326888 4.421711 0.2140045
##                      ACF1
## Training set -0.004070416
# Forecasting the next 12 months
unemp_forecast <- forecast(unemp_model, h = 12)
plot(unemp_forecast, main = "12-Month Forecast for Unemployment")

# Display forecast details
unemp_forecast
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 1979       685.7619 658.9440 712.5797 644.7476 726.7762
## Feb 1979       694.0970 654.0442 734.1498 632.8415 755.3525
## Mar 1979       664.8749 611.6960 718.0538 583.5448 746.2050
## Apr 1979       601.6628 537.7136 665.6119 503.8610 699.4645
## May 1979       572.1428 497.5148 646.7708 458.0091 686.2765
## Jun 1979       690.1026 606.3946 773.8105 562.0823 818.1228
## Jul 1979       678.4570 586.7011 770.2129 538.1284 818.7855
## Aug 1979       647.2766 548.7077 745.8454 496.5286 798.0246
## Sep 1979       633.0690 528.6110 737.5270 473.3142 792.8237
## Oct 1979       611.4053 501.9925 720.8181 444.0729 778.7377
## Nov 1979       633.5862 519.9879 747.1846 459.8525 807.3199
## Dec 1979       630.0159 512.9300 747.1018 450.9484 809.0833
acf2(diff(diff(unemp),12), 50)

##      [,1] [,2] [,3] [,4] [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12] [,13]
## ACF  0.21 0.33 0.15 0.17 0.10  0.06 -0.06 -0.02 -0.09 -0.17 -0.08 -0.48 -0.18
## PACF 0.21 0.29 0.05 0.05 0.01 -0.02 -0.12 -0.03 -0.05 -0.15  0.02 -0.43 -0.02
##      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## ACF  -0.16 -0.11 -0.15 -0.09 -0.09  0.03 -0.01  0.02 -0.02  0.01 -0.02  0.09
## PACF  0.15  0.03 -0.04 -0.01  0.00  0.01  0.01 -0.01 -0.16  0.01 -0.27  0.05
##      [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
## ACF  -0.05 -0.01  0.03  0.08  0.01  0.03 -0.05  0.01  0.02 -0.06 -0.02 -0.12
## PACF -0.01 -0.05  0.05  0.09 -0.04  0.02 -0.07 -0.01 -0.08 -0.08 -0.23 -0.08
##      [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49]
## ACF   0.01 -0.03 -0.03 -0.10 -0.02 -0.13  0.00 -0.06  0.01  0.02  0.11  0.13
## PACF  0.06 -0.07 -0.01  0.03 -0.03 -0.11 -0.04  0.01  0.00 -0.03 -0.04  0.02
##      [,50]
## ACF   0.10
## PACF  0.03
sarima(unemp, 2, 1, 0, 0, 1, 1, 12)
## initial  value 3.340809 
## iter   2 value 3.105512
## iter   3 value 3.086631
## iter   4 value 3.079778
## iter   5 value 3.069447
## iter   6 value 3.067659
## iter   7 value 3.067426
## iter   8 value 3.067418
## iter   8 value 3.067418
## final  value 3.067418 
## converged
## initial  value 3.065481 
## iter   2 value 3.065478
## iter   3 value 3.065477
## iter   3 value 3.065477
## iter   3 value 3.065477
## final  value 3.065477 
## converged
## <><><><><><><><><><><><><><>
##  
## Coefficients: 
##      Estimate     SE  t.value p.value
## ar1    0.1351 0.0513   2.6326  0.0088
## ar2    0.2464 0.0515   4.7795  0.0000
## sma1  -0.6953 0.0381 -18.2362  0.0000
## 
## sigma^2 estimated as 449.637 on 356 degrees of freedom 
##  
## AIC = 8.991114  AICc = 8.991303  BIC = 9.034383 
## 

sarima.for(unemp, 12, 2, 1, 0, 0, 1, 1, 12)

## $pred
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1979 676.4664 685.1172 653.2388 585.6939 553.8813 664.4072 647.0657 611.0828
##           Sep      Oct      Nov      Dec
## 1979 594.6414 569.3997 587.5801 581.1833
## 
## $se
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 1979  21.20465  32.07710  43.70167  53.66329  62.85364  71.12881  78.73590
##            Aug       Sep       Oct       Nov       Dec
## 1979  85.75096  92.28663  98.41329 104.19488 109.67935
#Model Fit:
#  After fitting the model, we check diagnostic plots to confirm that the residuals resemble white noise, suggesting a good fit.
#Forecasting:
#  We generate a 12-month forecast. If the forecasted values maintain seasonality, it confirms the model has effectively captured seasonal patterns in the data. The forecast plot shows predicted values with confidence intervals, which help assess forecast reliability.
#Interpretation:
#  Based on the forecast, we interpret expected trends in unemployment over the next year, discussing any observed seasonal effects or overall trends in the forecasted values.
##### PROBLEM 5
# Load Johnson & Johnson earnings data
data(jj)

# Log-transform the data
log_jj <- log(jj)
plot(log_jj, main = "Log of Johnson & Johnson Earnings Data")

# Fit seasonal ARIMA model to log-transformed data
jj_model <- auto.arima(log_jj, seasonal = TRUE)
summary(jj_model)
## Series: log_jj 
## ARIMA(2,0,0)(1,1,0)[4] with drift 
## 
## Coefficients:
##          ar1     ar2     sar1   drift
##       0.2686  0.2855  -0.2695  0.0382
## s.e.  0.1137  0.1214   0.1212  0.0042
## 
## sigma^2 = 0.007793:  log likelihood = 82.47
## AIC=-154.95   AICc=-154.14   BIC=-143.04
## 
## Training set error measures:
##                       ME       RMSE        MAE MPE MAPE      MASE        ACF1
## Training set 0.002030151 0.08396647 0.06504478 NaN  Inf 0.4058742 -0.03591245
# Forecast the next 4 quarters
jj_forecast <- forecast(jj_model, h = 4)
plot(jj_forecast, main = "4-Quarter Forecast for Johnson & Johnson Earnings (log-transformed)")

# Display forecasted values
jj_forecast
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 1981 Q1       2.918596 2.805466 3.031725 2.745579 3.091612
## 1981 Q2       2.836761 2.719621 2.953900 2.657611 3.015910
## 1981 Q3       2.938476 2.814546 3.062405 2.748942 3.128010
## 1981 Q4       2.600120 2.474659 2.725581 2.408244 2.791996
plot(diff(diff(log(jj)),4))

acf2(diff(diff(log(jj)),4))

##       [,1]  [,2]  [,3]  [,4]  [,5]  [,6] [,7]  [,8]  [,9] [,10] [,11] [,12]
## ACF  -0.44  0.15 -0.09 -0.21  0.11 -0.13 0.27 -0.07 -0.07  0.12 -0.21  0.21
## PACF -0.44 -0.05 -0.05 -0.33 -0.16 -0.19 0.13  0.08 -0.14  0.04 -0.03  0.14
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19]
## ACF  -0.18  0.19 -0.09 -0.04  0.08 -0.14  0.14
## PACF -0.06  0.07 -0.01 -0.01 -0.01 -0.06  0.00
sarima(log(jj),1,1,0,1,1,0,4)
## initial  value -2.232392 
## iter   2 value -2.403794
## iter   3 value -2.409520
## iter   4 value -2.410263
## iter   5 value -2.410266
## iter   6 value -2.410266
## iter   6 value -2.410266
## final  value -2.410266 
## converged
## initial  value -2.381009 
## iter   2 value -2.381164
## iter   3 value -2.381165
## iter   3 value -2.381165
## iter   3 value -2.381165
## final  value -2.381165 
## converged
## <><><><><><><><><><><><><><>
##  
## Coefficients: 
##      Estimate     SE t.value p.value
## ar1   -0.5152 0.1009 -5.1064   0.000
## sar1  -0.3294 0.1109 -2.9697   0.004
## 
## sigma^2 estimated as 0.008467914 on 77 degrees of freedom 
##  
## AIC = -1.848505  AICc = -1.846506  BIC = -1.758525 
## 

sarima.for(log(jj),4,1,1,0,1,1,0,4)

## $pred
##          Qtr1     Qtr2     Qtr3     Qtr4
## 1981 2.902126 2.821452 2.919034 2.575784
## 
## $se
##            Qtr1       Qtr2       Qtr3       Qtr4
## 1981 0.09202127 0.10226343 0.12338542 0.13568573
#For the Johnson & Johnson earnings data, we log-transform the series to stabilize variance. Since the data are quarterly, we apply a seasonal ARIMA model using auto.arima().
#
#Model Selection:
 # auto.arima() identifies the best seasonal model, which we check to ensure it aligns with the observed patterns in the data. The model output provides parameters for both seasonal and non-seasonal components.
#Forecasting:
 # We forecast the next four quarters and plot the forecast along with the original data. The forecast plot includes confidence intervals, showing the likely range of future earnings.
#Interpretation:
 # The seasonal ARIMA model captures both seasonal and trend components. Based on the 4-quarter forecast, we discuss expected quarterly earnings trends, potential seasonal effects, and the stability of the model as indicated by forecast intervals. This interpretation helps us understand short-term expectations for Johnson & Johnson's earnings trajectory.