#1. Plot the data
df<-read.csv("INDPRO.csv")
#str(df)
#head(df)
#tail(df)
library(zoo)
## Warning: package 'zoo' was built under R version 4.1.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
df$DATE<-as.Date(df$DATE) #converting to date object
#str(df$DATE)
library(ggplot2)
#library("ggbreak")
p <- ggplot(df, aes(x=DATE, y=INDPRO)) +
geom_line() + xlab("")+ylab("INDPRO ")+ labs(title = "Industrial Production Index (INDPRO) 2007-2021")
p2<-p+scale_x_date(date_labels = "%b %y", breaks="3 month")+ scale_y_continuous(limits=c(0,120), breaks=seq(0,120, by = 20))+
theme(axis.text.x = element_text(angle = 90))+theme(axis.ticks.y = element_blank(),plot.title = element_text(hjust = 0.5))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "gray"))
p2
p2 + geom_smooth(method = "lm", se = FALSE, colour="#F2AA4CFF", linetypes=3, )
## Warning: Ignoring unknown parameters: linetypes
## `geom_smooth()` using formula 'y ~ x'
#First convert df to a TS-object
library(tsbox)
## Warning: package 'tsbox' was built under R version 4.1.2
dat<-ts_ts(df)
## [time]: 'DATE' [value]: 'INDPRO'
#dat
class(dat)
## [1] "ts"
summary(dat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 84.20 95.97 99.41 98.11 101.64 104.17
#add trend line
# Calculate slope and intercept of line of best fit
#coef(lm(dat~time(dat)))
#p2 + geom_abline(intercept = -720.7929199, slope = 0.4065293)
From plots above, we can se that stationarity does not hold The mean is not constant and the variance is not equal So, we need to apply transformation before proceeding to apply time series
#decomposition
plot(decompose(dat))
#cyclical pattern
boxplot(dat~cycle(dat))
#making the series stationary
#variance
plot(dat)
abline(lm(dat~time(dat)))
plot(log(dat))
abline(lm(log(dat)~time(log(dat))))
#the distance from the mean line should be equal
#make the mean cosntant
#differentiation
plot(diff(log(dat)))
abline(lm(diff(log(dat))~time(diff(log(dat)))))
#there are still some issues with the variance however, the are still
library(astsa)
culer = c("cyan4", 4, 2, 6)
par(mfrow = c(4,1), cex.main=1)
x = window(diff(log(dat)), start=2007)
## Warning in window.default(x, ...): 'start' value not changed
out = stl(x, s.window=15)$time.series
tsplot(x, main=" ", ylab=" ", col=gray(.7))
text(x, labels=1:4, col=culer, cex=1.25)
tsplot(out[,1], main="Seasonal", ylab=" ",col=gray(.7))
text(out[,1], labels=1:4, col=culer, cex=1.25)
tsplot(out[,2], main="Trend", ylab=" ", col=gray(.7))
text(out[,2], labels=1:4, col=culer, cex=1.25)
tsplot(out[,3], main="Noise", ylab=" ", col=gray(.7))
text(out[,3], labels=1:4, col=culer, cex=1.25)
plot(decompose(diff(log(dat))))
#2. IV. Estimated ACF and PACF
#ACF to obtain q
acf(dat)
acf(diff(log(dat)))
#PACF to obtain p
pacf(dat)
pacf(diff(log(dat)))
FITING ARIMA MODELS
library(astsa)
sarima(log(dat), p=1,d=1,q=1) #to obtain coeeficients
## initial value -4.204797
## iter 2 value -4.222598
## iter 3 value -4.233443
## iter 4 value -4.234429
## iter 5 value -4.238291
## iter 6 value -4.240237
## iter 7 value -4.241495
## iter 8 value -4.242018
## iter 9 value -4.242023
## iter 10 value -4.242035
## iter 11 value -4.242054
## iter 12 value -4.242065
## iter 13 value -4.242067
## iter 14 value -4.242067
## iter 14 value -4.242067
## iter 14 value -4.242067
## final value -4.242067
## converged
## initial value -4.243294
## iter 2 value -4.243294
## iter 3 value -4.243302
## iter 4 value -4.243302
## iter 5 value -4.243303
## iter 6 value -4.243303
## iter 7 value -4.243303
## iter 8 value -4.243303
## iter 8 value -4.243303
## final value -4.243303
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ma1 constant
## -0.2285 0.4991 0.0001
## s.e. 0.1989 0.1729 0.0013
##
## sigma^2 estimated as 0.0002061: log likelihood = 499.91, aic = -991.83
##
## $degrees_of_freedom
## [1] 174
##
## $ttable
## Estimate SE t.value p.value
## ar1 -0.2285 0.1989 -1.1489 0.2522
## ma1 0.4991 0.1729 2.8872 0.0044
## constant 0.0001 0.0013 0.1055 0.9161
##
## $AIC
## [1] -5.603531
##
## $AICc
## [1] -5.602748
##
## $BIC
## [1] -5.531754
#printing psi-weights
round( ARMAtoMA(ar=-0.2285, ma=0.4991, 10), 3)#Convert ARMA Process to Infinite MA Process
## [1] 0.271 -0.062 0.014 -0.003 0.001 0.000 0.000 0.000 0.000 0.000
#5 terms before reaching 0: 0.271 -0.062 0.014 -0.003 0.001 0.000
round( ARMAtoAR(ar=-0.2285, ma=0.4991, 20), 3)#Print Convert ARMA Process to Infinite AR Process
## [1] -0.271 0.135 -0.067 0.034 -0.017 0.008 -0.004 0.002 -0.001 0.001
## [11] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
#10 terms before reaching 0: :-0.271 0.135 -0.067 0.034 -0.017 0.008 -0.004 0.002 -0.001 0.001
fit1<-sarima(log(dat), p=1,d=1,q=1)
## initial value -4.204797
## iter 2 value -4.222598
## iter 3 value -4.233443
## iter 4 value -4.234429
## iter 5 value -4.238291
## iter 6 value -4.240237
## iter 7 value -4.241495
## iter 8 value -4.242018
## iter 9 value -4.242023
## iter 10 value -4.242035
## iter 11 value -4.242054
## iter 12 value -4.242065
## iter 13 value -4.242067
## iter 14 value -4.242067
## iter 14 value -4.242067
## iter 14 value -4.242067
## final value -4.242067
## converged
## initial value -4.243294
## iter 2 value -4.243294
## iter 3 value -4.243302
## iter 4 value -4.243302
## iter 5 value -4.243303
## iter 6 value -4.243303
## iter 7 value -4.243303
## iter 8 value -4.243303
## iter 8 value -4.243303
## final value -4.243303
## converged
fit2<-sarima(log(dat), p=2,d=1,q=1)
## initial value -4.202001
## iter 2 value -4.232950
## iter 3 value -4.245571
## iter 4 value -4.245805
## iter 5 value -4.245828
## iter 6 value -4.246083
## iter 7 value -4.246206
## iter 8 value -4.246249
## iter 9 value -4.246253
## iter 9 value -4.246253
## iter 9 value -4.246253
## final value -4.246253
## converged
## initial value -4.250432
## iter 2 value -4.250439
## iter 3 value -4.250440
## iter 4 value -4.250440
## iter 4 value -4.250440
## iter 4 value -4.250440
## final value -4.250440
## converged
fit3<-sarima(log(dat), p=1,d=1,q=2)
## initial value -4.204797
## iter 2 value -4.231631
## iter 3 value -4.245545
## iter 4 value -4.246186
## iter 5 value -4.246227
## iter 6 value -4.246311
## iter 7 value -4.246555
## iter 8 value -4.246826
## iter 9 value -4.246987
## iter 10 value -4.247065
## iter 11 value -4.247067
## iter 12 value -4.247069
## iter 12 value -4.247069
## iter 12 value -4.247069
## final value -4.247069
## converged
## initial value -4.248034
## iter 2 value -4.248054
## iter 3 value -4.248057
## iter 4 value -4.248058
## iter 5 value -4.248058
## iter 6 value -4.248059
## iter 7 value -4.248059
## iter 8 value -4.248060
## iter 9 value -4.248060
## iter 10 value -4.248060
## iter 10 value -4.248060
## final value -4.248060
## converged
fit4<-sarima(log(dat), p=2,d=1,q=2)
## initial value -4.202001
## iter 2 value -4.219564
## iter 3 value -4.244131
## iter 4 value -4.245202
## iter 5 value -4.245338
## iter 6 value -4.245500
## iter 7 value -4.246043
## iter 8 value -4.246234
## iter 9 value -4.246286
## iter 10 value -4.246287
## iter 11 value -4.246288
## iter 12 value -4.246290
## iter 13 value -4.246291
## iter 14 value -4.246291
## iter 15 value -4.246291
## iter 16 value -4.246292
## iter 17 value -4.246294
## iter 18 value -4.246295
## iter 19 value -4.246296
## iter 20 value -4.246296
## iter 20 value -4.246296
## iter 20 value -4.246296
## final value -4.246296
## converged
## initial value -4.250544
## iter 2 value -4.250551
## iter 3 value -4.250557
## iter 4 value -4.250560
## iter 5 value -4.250564
## iter 6 value -4.250570
## iter 7 value -4.250577
## iter 8 value -4.250578
## iter 9 value -4.250578
## iter 10 value -4.250578
## iter 11 value -4.250579
## iter 12 value -4.250580
## iter 13 value -4.250581
## iter 14 value -4.250581
## iter 14 value -4.250581
## iter 14 value -4.250581
## final value -4.250581
## converged
fit1$AIC
## [1] -5.603531
fit2$AIC #this model offers a better fit, however not noticeable difference exits
## [1] -5.606506
fit3$AIC
## [1] -5.601747
fit4$AIC
## [1] -5.595488
fit1$BIC #this model offers a better fit, however not noticeable difference exits
## [1] -5.531754
fit2$BIC
## [1] -5.516785
fit3$BIC
## [1] -5.512025
fit4$BIC
## [1] -5.487822
sarima(log(dat), p=1,d=1,q=1)
## initial value -4.204797
## iter 2 value -4.222598
## iter 3 value -4.233443
## iter 4 value -4.234429
## iter 5 value -4.238291
## iter 6 value -4.240237
## iter 7 value -4.241495
## iter 8 value -4.242018
## iter 9 value -4.242023
## iter 10 value -4.242035
## iter 11 value -4.242054
## iter 12 value -4.242065
## iter 13 value -4.242067
## iter 14 value -4.242067
## iter 14 value -4.242067
## iter 14 value -4.242067
## final value -4.242067
## converged
## initial value -4.243294
## iter 2 value -4.243294
## iter 3 value -4.243302
## iter 4 value -4.243302
## iter 5 value -4.243303
## iter 6 value -4.243303
## iter 7 value -4.243303
## iter 8 value -4.243303
## iter 8 value -4.243303
## final value -4.243303
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ma1 constant
## -0.2285 0.4991 0.0001
## s.e. 0.1989 0.1729 0.0013
##
## sigma^2 estimated as 0.0002061: log likelihood = 499.91, aic = -991.83
##
## $degrees_of_freedom
## [1] 174
##
## $ttable
## Estimate SE t.value p.value
## ar1 -0.2285 0.1989 -1.1489 0.2522
## ma1 0.4991 0.1729 2.8872 0.0044
## constant 0.0001 0.0013 0.1055 0.9161
##
## $AIC
## [1] -5.603531
##
## $AICc
## [1] -5.602748
##
## $BIC
## [1] -5.531754
library(astsa)
#It might be useful at times to write an ARMA model in its causal or invertible forms.
sarima(log(dat), p=1,d=1,q=1) #to obtain coeeficients
## initial value -4.204797
## iter 2 value -4.222598
## iter 3 value -4.233443
## iter 4 value -4.234429
## iter 5 value -4.238291
## iter 6 value -4.240237
## iter 7 value -4.241495
## iter 8 value -4.242018
## iter 9 value -4.242023
## iter 10 value -4.242035
## iter 11 value -4.242054
## iter 12 value -4.242065
## iter 13 value -4.242067
## iter 14 value -4.242067
## iter 14 value -4.242067
## iter 14 value -4.242067
## final value -4.242067
## converged
## initial value -4.243294
## iter 2 value -4.243294
## iter 3 value -4.243302
## iter 4 value -4.243302
## iter 5 value -4.243303
## iter 6 value -4.243303
## iter 7 value -4.243303
## iter 8 value -4.243303
## iter 8 value -4.243303
## final value -4.243303
## converged
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ar1 ma1 constant
## -0.2285 0.4991 0.0001
## s.e. 0.1989 0.1729 0.0013
##
## sigma^2 estimated as 0.0002061: log likelihood = 499.91, aic = -991.83
##
## $degrees_of_freedom
## [1] 174
##
## $ttable
## Estimate SE t.value p.value
## ar1 -0.2285 0.1989 -1.1489 0.2522
## ma1 0.4991 0.1729 2.8872 0.0044
## constant 0.0001 0.0013 0.1055 0.9161
##
## $AIC
## [1] -5.603531
##
## $AICc
## [1] -5.602748
##
## $BIC
## [1] -5.531754
#printing psi-weights
round( ARMAtoMA(ar=-0.2285, ma=0.4991, 100), 3)#Convert ARMA Process to Infinite MA Process
## [1] 0.271 -0.062 0.014 -0.003 0.001 0.000 0.000 0.000 0.000 0.000
## [11] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [21] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [31] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [41] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [51] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [61] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [71] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [81] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [91] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
#5 terms before converging to 0: 0.271 -0.062 0.014 -0.003 0.001 0.000
#psi-weights are square summable
#we can say the model is causal
round( ARMAtoAR(ar=-0.2285, ma=0.4991, 100), 3)#Print Convert ARMA Process to Infinite AR Process
## [1] -0.271 0.135 -0.067 0.034 -0.017 0.008 -0.004 0.002 -0.001 0.001
## [11] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [21] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [31] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [41] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [51] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [61] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [71] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [81] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [91] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
#10 terms before convering to 0: :-0.271 0.135 -0.067 0.034 -0.017 0.008 -0.004 0.002 -0.001 0.001
#we can say the model is causal
#prediction Predict the number of months we want the predictin
set.seed(1998)
x <- ts(arima.sim(list(order = c(1,1,0), ar=.9), n=150)[-1])
y <- window(x, start=1, end=100)
sarima.for(dat, n.ahead = 50, p = 1, d = 1, q = 1, plot.all=TRUE)
## $pred
## Jan Feb Mar Apr May Jun Jul Aug
## 2021
## 2022 102.1383 102.1462 102.1614 102.1750 102.1889 102.2028 102.2166 102.2305
## 2023 102.2999 102.3138 102.3277 102.3415 102.3554 102.3693 102.3832 102.3971
## 2024 102.4665 102.4803 102.4942 102.5081 102.5220 102.5358 102.5497 102.5636
## 2025 102.6330 102.6469 102.6607 102.6746 102.6885 102.7024 102.7163 102.7301
## Sep Oct Nov Dec
## 2021 102.2099 102.0969
## 2022 102.2444 102.2583 102.2722 102.2860
## 2023 102.4109 102.4248 102.4387 102.4526
## 2024 102.5775 102.5914 102.6052 102.6191
## 2025 102.7440 102.7579 102.7718 102.7856
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 2021
## 2022 2.683188 3.137831 3.532764 3.888120 4.213545 4.515592 4.798662
## 2023 6.232711 6.440751 6.642278 6.837868 7.028018 7.213156 7.393660
## 2024 8.395578 8.551159 8.703959 8.854123 9.001782 9.147058 9.290062
## 2025 10.105681 10.235300 10.363299 10.489735 10.614666 10.738143 10.860217
## Aug Sep Oct Nov Dec
## 2021 1.325703 2.146691
## 2022 5.065940 5.319806 5.562097 5.794265 6.017482
## 2023 7.569861 7.742053 7.910497 8.075429 8.237059
## 2024 9.430898 9.569662 9.706442 9.841321 9.974377
## 2025 10.980933 11.100337 11.218470 11.335372 11.451080
text(85, 205, "PAST"); text(115, 205, "FUTURE")
abline(v=100, lty=2, col=4)
lines(x)