require(forecast)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
require(tseries)
## Loading required package: tseries
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# set wd
setwd("/cloud/project")
CaseShiller <- read.csv("SPCS20RPSNSA.csv")
head(CaseShiller)
##         DATE SPCS20RPSNSA
## 1 2000-01-01        76273
## 2 2000-02-01        70994
## 3 2000-03-01        72795
## 4 2000-04-01        79561
## 5 2000-05-01        92584
## 6 2000-06-01       102644
names(CaseShiller)[2] <- "Units"
CSUnits <- CaseShiller$Units
tCaseShiller <- ts(CSUnits,start = c(2000,1),frequency =12)
tCaseShiller
##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 2000  76273  70994  72795  79561  92584 102644 111101 115467 108515 105465
## 2001  82219  74830  79845  87896 104767 116105 128434 134301 124706 114915
## 2002  88055  86304  92158 102771 119604 128773 136760 137574 129074 121173
## 2003  98388  93151  91847  99957 115874 126973 140118 148022 152053 148106
## 2004 109956 103519 107453 120437 139938 156260 172124 181017 170892 157723
## 2005 128895 118615 121230 134260 152506 167019 174593 185773 178574 170738
## 2006 125286 111890 112609 120659 139293 149866 155827 154134 139104 131550
## 2007 103883  95847  97341 100334 111420 115492 120100 119970 106051  93938
## 2008  63340  58810  59843  67549  78031  87398  95594  98122  96276  91098
## 2009  60884  58571  59041  66930  76049  84761  94819 100020  97760  93178
## 2010  76091  64778  63241  73478  88513  96982  95506  89432  77664  71882
## 2011  62799  59633  61078  67030  75876  81813  85062  88907  85044  81202
## 2012  66290  62286  62933  69622  81327  90260  96362  98867  91446  87953
## 2013  71834  66303  67785  77177  93802 106251 117285 121585 116771 108089
## 2014  74457  68733  82253  97663 117655 135017 145504 146071 135660 126030
## 2015  90602  88397  94834 116237 137286 156020 168197 170974 157723 142132
## 2016 102089 100275 105039 125720 149999 170600 177849 180035 165421 154404
## 2017 115657 105929 113728 131765 160030 179299 187764 186846 167066 156461
## 2018 115835 106092 113330 132887 157783 173699 179221 176919 156849 144944
## 2019 102852  94807 103023 125583 154962 173422 184025 180512 167109 154186
## 2020 113523 107280 111681 119551 123320 133495 152182                     
##         Nov    Dec
## 2000  95800  89610
## 2001  98758  94180
## 2002 108518 107725
## 2003 133614 125061
## 2004 144715 139743
## 2005 151945 140717
## 2006 119093 113372
## 2007  77518  72062
## 2008  79649  72964
## 2009  89157  87548
## 2010  66990  66269
## 2011  74035  71070
## 2012  79986  78699
## 2013  92646  86339
## 2014 111213 105633
## 2015 123844 116677
## 2016 138048 129020
## 2017 140193 131554
## 2018 127983 119510
## 2019 137458 130486
## 2020
fit <- auto.arima(tCaseShiller)
fit
## Series: tCaseShiller 
## ARIMA(2,0,3)(0,1,2)[12] 
## 
## Coefficients:
##          ar1     ar2     ma1     ma2     ma3     sma1     sma2
##       0.1306  0.7665  1.5831  1.2141  0.5325  -0.5623  -0.2748
## s.e.  0.1137  0.1082  0.1154  0.1165  0.0676   0.0769   0.0747
## 
## sigma^2 = 11018636:  log likelihood = -2243.82
## AIC=4503.64   AICc=4504.27   BIC=4531.31
accuracy(fit)
##                   ME     RMSE      MAE      MPE     MAPE      MASE        ACF1
## Training set 169.732 3189.208 2320.011 0.107296 2.120731 0.1769278 -0.01372461
plot(forecast(fit,12),xlab = "Date" , ylab = "Units" , main = "Arima forecast for Case-Shiller Index")

pred_values <- forecast(fit,12)
pred_values
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Aug 2020       163979.6 159723.46 168235.8 157470.40 170488.8
## Sep 2020       158142.3 149697.92 166586.6 145227.74 171056.8
## Oct 2020       146863.1 134241.06 159485.1 127559.37 166166.8
## Nov 2020       131525.0 115975.87 147074.0 107744.68 155305.2
## Dec 2020       124575.8 106914.51 142237.1  97565.19 151586.4
## Jan 2021       110231.8  90821.11 129642.4  80545.73 139917.8
## Feb 2021       103452.6  82653.98 124251.3  71643.84 135261.4
## Mar 2021       107412.4  85419.82 129405.0  73777.65 141047.2
## Apr 2021       117945.7  94967.11 140924.3  82802.97 153088.5
## May 2021       129640.9 105801.65 153480.1  93181.93 166099.8
## Jun 2021       141424.8 116858.59 165991.0 103854.02 178995.6
## Jul 2021       153542.1 128336.94 178747.2 114994.16 192090.0
qqnorm(fit$residuals)
qqline(fit$residuals)

Box.test(fit$residuals,type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  fit$residuals
## X-squared = 0.047094, df = 1, p-value = 0.8282
ltCaseShiller <- log(tCaseShiller)
head(ltCaseShiller)
##           Jan      Feb      Mar      Apr      May      Jun
## 2000 11.24207 11.17035 11.19540 11.28428 11.43587 11.53902
fit2 <- stl(ltCaseShiller , s.window = "period")
plot(fit2,main = "Seasonal Decomposition of log(Case-Shiller Units")

ggseasonplot(tCaseShiller,year.labels = TRUE , col =rainbow(20))

fit3 <- auto.arima(ltCaseShiller)
fit3
## Series: ltCaseShiller 
## ARIMA(1,0,2)(0,1,1)[12] 
## 
## Coefficients:
##          ar1     ma1     ma2     sma1
##       0.9747  0.5696  0.4974  -0.8616
## s.e.  0.0154  0.0566  0.0625   0.0489
## 
## sigma^2 = 0.0008674:  log likelihood = 488.17
## AIC=-966.35   AICc=-966.09   BIC=-949.05
fitAccuracy <- data.frame(accuracy(fit))
fitAccuracy2 <-data.frame(accuracy(fit3))
fitAccuracyFinal <- rbind(fitAccuracy,fitAccuracy2)
fitAccuracyFinal
##                         ME         RMSE          MAE        MPE      MAPE
## Training set  1.697320e+02 3.189208e+03 2.320011e+03 0.10729604 2.1207306
## Training set1 1.756949e-03 2.848129e-02 2.035564e-02 0.01518637 0.1761878
##                    MASE         ACF1
## Training set  0.1769278 -0.013724614
## Training set1 0.1704253  0.002815914
plot(forecast(fit3,12),xlab="Date",ylab="Units",main="Arima Forecast for Case-Shiller Index")