require(forecast)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
require(tseries)
## Loading required package: tseries
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# set wd
setwd("/cloud/project")
CaseShiller <- read.csv("SPCS20RPSNSA.csv")
head(CaseShiller)
## DATE SPCS20RPSNSA
## 1 2000-01-01 76273
## 2 2000-02-01 70994
## 3 2000-03-01 72795
## 4 2000-04-01 79561
## 5 2000-05-01 92584
## 6 2000-06-01 102644
names(CaseShiller)[2] <- "Units"
CSUnits <- CaseShiller$Units
tCaseShiller <- ts(CSUnits,start = c(2000,1),frequency =12)
tCaseShiller
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2000 76273 70994 72795 79561 92584 102644 111101 115467 108515 105465
## 2001 82219 74830 79845 87896 104767 116105 128434 134301 124706 114915
## 2002 88055 86304 92158 102771 119604 128773 136760 137574 129074 121173
## 2003 98388 93151 91847 99957 115874 126973 140118 148022 152053 148106
## 2004 109956 103519 107453 120437 139938 156260 172124 181017 170892 157723
## 2005 128895 118615 121230 134260 152506 167019 174593 185773 178574 170738
## 2006 125286 111890 112609 120659 139293 149866 155827 154134 139104 131550
## 2007 103883 95847 97341 100334 111420 115492 120100 119970 106051 93938
## 2008 63340 58810 59843 67549 78031 87398 95594 98122 96276 91098
## 2009 60884 58571 59041 66930 76049 84761 94819 100020 97760 93178
## 2010 76091 64778 63241 73478 88513 96982 95506 89432 77664 71882
## 2011 62799 59633 61078 67030 75876 81813 85062 88907 85044 81202
## 2012 66290 62286 62933 69622 81327 90260 96362 98867 91446 87953
## 2013 71834 66303 67785 77177 93802 106251 117285 121585 116771 108089
## 2014 74457 68733 82253 97663 117655 135017 145504 146071 135660 126030
## 2015 90602 88397 94834 116237 137286 156020 168197 170974 157723 142132
## 2016 102089 100275 105039 125720 149999 170600 177849 180035 165421 154404
## 2017 115657 105929 113728 131765 160030 179299 187764 186846 167066 156461
## 2018 115835 106092 113330 132887 157783 173699 179221 176919 156849 144944
## 2019 102852 94807 103023 125583 154962 173422 184025 180512 167109 154186
## 2020 113523 107280 111681 119551 123320 133495 152182
## Nov Dec
## 2000 95800 89610
## 2001 98758 94180
## 2002 108518 107725
## 2003 133614 125061
## 2004 144715 139743
## 2005 151945 140717
## 2006 119093 113372
## 2007 77518 72062
## 2008 79649 72964
## 2009 89157 87548
## 2010 66990 66269
## 2011 74035 71070
## 2012 79986 78699
## 2013 92646 86339
## 2014 111213 105633
## 2015 123844 116677
## 2016 138048 129020
## 2017 140193 131554
## 2018 127983 119510
## 2019 137458 130486
## 2020
fit <- auto.arima(tCaseShiller)
fit
## Series: tCaseShiller
## ARIMA(2,0,3)(0,1,2)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 sma1 sma2
## 0.1306 0.7665 1.5831 1.2141 0.5325 -0.5623 -0.2748
## s.e. 0.1137 0.1082 0.1154 0.1165 0.0676 0.0769 0.0747
##
## sigma^2 = 11018636: log likelihood = -2243.82
## AIC=4503.64 AICc=4504.27 BIC=4531.31
accuracy(fit)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 169.732 3189.208 2320.011 0.107296 2.120731 0.1769278 -0.01372461
plot(forecast(fit,12),xlab = "Date" , ylab = "Units" , main = "Arima forecast for Case-Shiller Index")

pred_values <- forecast(fit,12)
pred_values
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Aug 2020 163979.6 159723.46 168235.8 157470.40 170488.8
## Sep 2020 158142.3 149697.92 166586.6 145227.74 171056.8
## Oct 2020 146863.1 134241.06 159485.1 127559.37 166166.8
## Nov 2020 131525.0 115975.87 147074.0 107744.68 155305.2
## Dec 2020 124575.8 106914.51 142237.1 97565.19 151586.4
## Jan 2021 110231.8 90821.11 129642.4 80545.73 139917.8
## Feb 2021 103452.6 82653.98 124251.3 71643.84 135261.4
## Mar 2021 107412.4 85419.82 129405.0 73777.65 141047.2
## Apr 2021 117945.7 94967.11 140924.3 82802.97 153088.5
## May 2021 129640.9 105801.65 153480.1 93181.93 166099.8
## Jun 2021 141424.8 116858.59 165991.0 103854.02 178995.6
## Jul 2021 153542.1 128336.94 178747.2 114994.16 192090.0
qqnorm(fit$residuals)
qqline(fit$residuals)

Box.test(fit$residuals,type = "Ljung-Box")
##
## Box-Ljung test
##
## data: fit$residuals
## X-squared = 0.047094, df = 1, p-value = 0.8282
ltCaseShiller <- log(tCaseShiller)
head(ltCaseShiller)
## Jan Feb Mar Apr May Jun
## 2000 11.24207 11.17035 11.19540 11.28428 11.43587 11.53902
fit2 <- stl(ltCaseShiller , s.window = "period")
plot(fit2,main = "Seasonal Decomposition of log(Case-Shiller Units")

ggseasonplot(tCaseShiller,year.labels = TRUE , col =rainbow(20))

fit3 <- auto.arima(ltCaseShiller)
fit3
## Series: ltCaseShiller
## ARIMA(1,0,2)(0,1,1)[12]
##
## Coefficients:
## ar1 ma1 ma2 sma1
## 0.9747 0.5696 0.4974 -0.8616
## s.e. 0.0154 0.0566 0.0625 0.0489
##
## sigma^2 = 0.0008674: log likelihood = 488.17
## AIC=-966.35 AICc=-966.09 BIC=-949.05
fitAccuracy <- data.frame(accuracy(fit))
fitAccuracy2 <-data.frame(accuracy(fit3))
fitAccuracyFinal <- rbind(fitAccuracy,fitAccuracy2)
fitAccuracyFinal
## ME RMSE MAE MPE MAPE
## Training set 1.697320e+02 3.189208e+03 2.320011e+03 0.10729604 2.1207306
## Training set1 1.756949e-03 2.848129e-02 2.035564e-02 0.01518637 0.1761878
## MASE ACF1
## Training set 0.1769278 -0.013724614
## Training set1 0.1704253 0.002815914
plot(forecast(fit3,12),xlab="Date",ylab="Units",main="Arima Forecast for Case-Shiller Index")
