#1. Plot the data

df<-read.csv("INDPRO.csv")
#str(df)
#head(df)
#tail(df)
library(zoo)
## Warning: package 'zoo' was built under R version 4.1.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
df$DATE<-as.Date(df$DATE) #converting to date object
#str(df$DATE)
library(ggplot2)
#library("ggbreak") 
p <- ggplot(df, aes(x=DATE, y=INDPRO)) +
  geom_line() +   xlab("")+ylab("INDPRO ")+ labs(title = "Industrial Production Index (INDPRO) 2007-2021")

p2<-p+scale_x_date(date_labels = "%b %y", breaks="3 month")+  scale_y_continuous(limits=c(0,120), breaks=seq(0,120, by = 20))+
  theme(axis.text.x = element_text(angle = 90))+theme(axis.ticks.y = element_blank(),plot.title = element_text(hjust = 0.5))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "gray"))
p2

p2 + geom_smooth(method = "lm", se = FALSE, colour="#F2AA4CFF", linetypes=3, )
## Warning: Ignoring unknown parameters: linetypes
## `geom_smooth()` using formula 'y ~ x'

#First convert df to a TS-object
library(tsbox)
## Warning: package 'tsbox' was built under R version 4.1.2
dat<-ts_ts(df)
## [time]: 'DATE' [value]: 'INDPRO'
#dat
class(dat)
## [1] "ts"
summary(dat)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   84.20   95.97   99.41   98.11  101.64  104.17
#add  trend line
# Calculate slope and intercept of line of best fit
#coef(lm(dat~time(dat)))
#p2 + geom_abline(intercept = -720.7929199, slope = 0.4065293)

From plots above, we can se that stationarity does not hold The mean is not constant and the variance is not equal So, we need to apply transformation before proceeding to apply time series

#decomposition
plot(decompose(dat))

#cyclical pattern
boxplot(dat~cycle(dat))

#making the series stationary
#variance
plot(dat)
abline(lm(dat~time(dat)))

plot(log(dat))
abline(lm(log(dat)~time(log(dat))))

#the distance from the mean line should be equal
#make the mean cosntant
#differentiation
plot(diff(log(dat)))
abline(lm(diff(log(dat))~time(diff(log(dat)))))

#there are still some issues with the variance however, the are still
library(astsa)
culer = c("cyan4", 4, 2, 6)
par(mfrow = c(4,1), cex.main=1)
x = window(diff(log(dat)), start=2007)
## Warning in window.default(x, ...): 'start' value not changed
out = stl(x, s.window=15)$time.series
tsplot(x, main=" ", ylab=" ", col=gray(.7))
text(x, labels=1:4, col=culer, cex=1.25)
tsplot(out[,1], main="Seasonal", ylab=" ",col=gray(.7))
text(out[,1], labels=1:4, col=culer, cex=1.25)
tsplot(out[,2], main="Trend", ylab=" ", col=gray(.7))
text(out[,2], labels=1:4, col=culer, cex=1.25)
tsplot(out[,3], main="Noise", ylab=" ", col=gray(.7))
text(out[,3], labels=1:4, col=culer, cex=1.25)

plot(decompose(diff(log(dat))))

#2. IV. Estimated ACF and PACF

#ACF to obtain q
acf(dat)

acf(diff(log(dat)))

#PACF to obtain p
pacf(dat)

pacf(diff(log(dat)))

FITING ARIMA MODELS

library(astsa)
sarima(log(dat), p=1,d=1,q=1) #to obtain coeeficients
## initial  value -4.204797 
## iter   2 value -4.222598
## iter   3 value -4.233443
## iter   4 value -4.234429
## iter   5 value -4.238291
## iter   6 value -4.240237
## iter   7 value -4.241495
## iter   8 value -4.242018
## iter   9 value -4.242023
## iter  10 value -4.242035
## iter  11 value -4.242054
## iter  12 value -4.242065
## iter  13 value -4.242067
## iter  14 value -4.242067
## iter  14 value -4.242067
## iter  14 value -4.242067
## final  value -4.242067 
## converged
## initial  value -4.243294 
## iter   2 value -4.243294
## iter   3 value -4.243302
## iter   4 value -4.243302
## iter   5 value -4.243303
## iter   6 value -4.243303
## iter   7 value -4.243303
## iter   8 value -4.243303
## iter   8 value -4.243303
## final  value -4.243303 
## converged

## $fit
## 
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
##     xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##         REPORT = 1, reltol = tol))
## 
## Coefficients:
##           ar1     ma1  constant
##       -0.2285  0.4991    0.0001
## s.e.   0.1989  0.1729    0.0013
## 
## sigma^2 estimated as 0.0002061:  log likelihood = 499.91,  aic = -991.83
## 
## $degrees_of_freedom
## [1] 174
## 
## $ttable
##          Estimate     SE t.value p.value
## ar1       -0.2285 0.1989 -1.1489  0.2522
## ma1        0.4991 0.1729  2.8872  0.0044
## constant   0.0001 0.0013  0.1055  0.9161
## 
## $AIC
## [1] -5.603531
## 
## $AICc
## [1] -5.602748
## 
## $BIC
## [1] -5.531754
#printing psi-weights
round( ARMAtoMA(ar=-0.2285, ma=0.4991, 10), 3)#Convert ARMA Process to Infinite MA Process
##  [1]  0.271 -0.062  0.014 -0.003  0.001  0.000  0.000  0.000  0.000  0.000
#5 terms before reaching 0: 0.271 -0.062  0.014 -0.003  0.001  0.000 

round( ARMAtoAR(ar=-0.2285, ma=0.4991, 20), 3)#Print Convert ARMA Process to Infinite AR Process
##  [1] -0.271  0.135 -0.067  0.034 -0.017  0.008 -0.004  0.002 -0.001  0.001
## [11]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
#10 terms before reaching 0: :-0.271  0.135 -0.067  0.034 -0.017  0.008 -0.004  0.002 -0.001  0.001 
fit1<-sarima(log(dat), p=1,d=1,q=1)
## initial  value -4.204797 
## iter   2 value -4.222598
## iter   3 value -4.233443
## iter   4 value -4.234429
## iter   5 value -4.238291
## iter   6 value -4.240237
## iter   7 value -4.241495
## iter   8 value -4.242018
## iter   9 value -4.242023
## iter  10 value -4.242035
## iter  11 value -4.242054
## iter  12 value -4.242065
## iter  13 value -4.242067
## iter  14 value -4.242067
## iter  14 value -4.242067
## iter  14 value -4.242067
## final  value -4.242067 
## converged
## initial  value -4.243294 
## iter   2 value -4.243294
## iter   3 value -4.243302
## iter   4 value -4.243302
## iter   5 value -4.243303
## iter   6 value -4.243303
## iter   7 value -4.243303
## iter   8 value -4.243303
## iter   8 value -4.243303
## final  value -4.243303 
## converged

fit2<-sarima(log(dat), p=2,d=1,q=1)
## initial  value -4.202001 
## iter   2 value -4.232950
## iter   3 value -4.245571
## iter   4 value -4.245805
## iter   5 value -4.245828
## iter   6 value -4.246083
## iter   7 value -4.246206
## iter   8 value -4.246249
## iter   9 value -4.246253
## iter   9 value -4.246253
## iter   9 value -4.246253
## final  value -4.246253 
## converged
## initial  value -4.250432 
## iter   2 value -4.250439
## iter   3 value -4.250440
## iter   4 value -4.250440
## iter   4 value -4.250440
## iter   4 value -4.250440
## final  value -4.250440 
## converged

fit3<-sarima(log(dat), p=1,d=1,q=2)
## initial  value -4.204797 
## iter   2 value -4.231631
## iter   3 value -4.245545
## iter   4 value -4.246186
## iter   5 value -4.246227
## iter   6 value -4.246311
## iter   7 value -4.246555
## iter   8 value -4.246826
## iter   9 value -4.246987
## iter  10 value -4.247065
## iter  11 value -4.247067
## iter  12 value -4.247069
## iter  12 value -4.247069
## iter  12 value -4.247069
## final  value -4.247069 
## converged
## initial  value -4.248034 
## iter   2 value -4.248054
## iter   3 value -4.248057
## iter   4 value -4.248058
## iter   5 value -4.248058
## iter   6 value -4.248059
## iter   7 value -4.248059
## iter   8 value -4.248060
## iter   9 value -4.248060
## iter  10 value -4.248060
## iter  10 value -4.248060
## final  value -4.248060 
## converged

fit4<-sarima(log(dat), p=2,d=1,q=2)
## initial  value -4.202001 
## iter   2 value -4.219564
## iter   3 value -4.244131
## iter   4 value -4.245202
## iter   5 value -4.245338
## iter   6 value -4.245500
## iter   7 value -4.246043
## iter   8 value -4.246234
## iter   9 value -4.246286
## iter  10 value -4.246287
## iter  11 value -4.246288
## iter  12 value -4.246290
## iter  13 value -4.246291
## iter  14 value -4.246291
## iter  15 value -4.246291
## iter  16 value -4.246292
## iter  17 value -4.246294
## iter  18 value -4.246295
## iter  19 value -4.246296
## iter  20 value -4.246296
## iter  20 value -4.246296
## iter  20 value -4.246296
## final  value -4.246296 
## converged
## initial  value -4.250544 
## iter   2 value -4.250551
## iter   3 value -4.250557
## iter   4 value -4.250560
## iter   5 value -4.250564
## iter   6 value -4.250570
## iter   7 value -4.250577
## iter   8 value -4.250578
## iter   9 value -4.250578
## iter  10 value -4.250578
## iter  11 value -4.250579
## iter  12 value -4.250580
## iter  13 value -4.250581
## iter  14 value -4.250581
## iter  14 value -4.250581
## iter  14 value -4.250581
## final  value -4.250581 
## converged

fit1$AIC
## [1] -5.603531
fit2$AIC #this model offers a better fit, however not noticeable difference exits
## [1] -5.606506
fit3$AIC
## [1] -5.601747
fit4$AIC
## [1] -5.595488
fit1$BIC #this model offers a better fit, however not noticeable difference exits
## [1] -5.531754
fit2$BIC 
## [1] -5.516785
fit3$BIC
## [1] -5.512025
fit4$BIC 
## [1] -5.487822
sarima(log(dat), p=1,d=1,q=1)
## initial  value -4.204797 
## iter   2 value -4.222598
## iter   3 value -4.233443
## iter   4 value -4.234429
## iter   5 value -4.238291
## iter   6 value -4.240237
## iter   7 value -4.241495
## iter   8 value -4.242018
## iter   9 value -4.242023
## iter  10 value -4.242035
## iter  11 value -4.242054
## iter  12 value -4.242065
## iter  13 value -4.242067
## iter  14 value -4.242067
## iter  14 value -4.242067
## iter  14 value -4.242067
## final  value -4.242067 
## converged
## initial  value -4.243294 
## iter   2 value -4.243294
## iter   3 value -4.243302
## iter   4 value -4.243302
## iter   5 value -4.243303
## iter   6 value -4.243303
## iter   7 value -4.243303
## iter   8 value -4.243303
## iter   8 value -4.243303
## final  value -4.243303 
## converged

## $fit
## 
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
##     xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##         REPORT = 1, reltol = tol))
## 
## Coefficients:
##           ar1     ma1  constant
##       -0.2285  0.4991    0.0001
## s.e.   0.1989  0.1729    0.0013
## 
## sigma^2 estimated as 0.0002061:  log likelihood = 499.91,  aic = -991.83
## 
## $degrees_of_freedom
## [1] 174
## 
## $ttable
##          Estimate     SE t.value p.value
## ar1       -0.2285 0.1989 -1.1489  0.2522
## ma1        0.4991 0.1729  2.8872  0.0044
## constant   0.0001 0.0013  0.1055  0.9161
## 
## $AIC
## [1] -5.603531
## 
## $AICc
## [1] -5.602748
## 
## $BIC
## [1] -5.531754
library(astsa)
#It might be useful at times to write an ARMA model in its causal or invertible forms.

sarima(log(dat), p=1,d=1,q=1) #to obtain coeeficients
## initial  value -4.204797 
## iter   2 value -4.222598
## iter   3 value -4.233443
## iter   4 value -4.234429
## iter   5 value -4.238291
## iter   6 value -4.240237
## iter   7 value -4.241495
## iter   8 value -4.242018
## iter   9 value -4.242023
## iter  10 value -4.242035
## iter  11 value -4.242054
## iter  12 value -4.242065
## iter  13 value -4.242067
## iter  14 value -4.242067
## iter  14 value -4.242067
## iter  14 value -4.242067
## final  value -4.242067 
## converged
## initial  value -4.243294 
## iter   2 value -4.243294
## iter   3 value -4.243302
## iter   4 value -4.243302
## iter   5 value -4.243303
## iter   6 value -4.243303
## iter   7 value -4.243303
## iter   8 value -4.243303
## iter   8 value -4.243303
## final  value -4.243303 
## converged

## $fit
## 
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
##     xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##         REPORT = 1, reltol = tol))
## 
## Coefficients:
##           ar1     ma1  constant
##       -0.2285  0.4991    0.0001
## s.e.   0.1989  0.1729    0.0013
## 
## sigma^2 estimated as 0.0002061:  log likelihood = 499.91,  aic = -991.83
## 
## $degrees_of_freedom
## [1] 174
## 
## $ttable
##          Estimate     SE t.value p.value
## ar1       -0.2285 0.1989 -1.1489  0.2522
## ma1        0.4991 0.1729  2.8872  0.0044
## constant   0.0001 0.0013  0.1055  0.9161
## 
## $AIC
## [1] -5.603531
## 
## $AICc
## [1] -5.602748
## 
## $BIC
## [1] -5.531754
#printing psi-weights
round( ARMAtoMA(ar=-0.2285, ma=0.4991, 100), 3)#Convert ARMA Process to Infinite MA Process
##   [1]  0.271 -0.062  0.014 -0.003  0.001  0.000  0.000  0.000  0.000  0.000
##  [11]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [21]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [31]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [41]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [51]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [61]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [71]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [81]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [91]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
#5 terms before converging to 0: 0.271 -0.062  0.014 -0.003  0.001  0.000 
#psi-weights are square summable

#we can say the model is causal

round( ARMAtoAR(ar=-0.2285, ma=0.4991, 100), 3)#Print Convert ARMA Process to Infinite AR Process
##   [1] -0.271  0.135 -0.067  0.034 -0.017  0.008 -0.004  0.002 -0.001  0.001
##  [11]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [21]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [31]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [41]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [51]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [61]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [71]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [81]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
##  [91]  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000
#10 terms before convering to 0: :-0.271  0.135 -0.067  0.034 -0.017  0.008 -0.004  0.002 -0.001  0.001 
#we can say the model is causal

#prediction Predict the number of months we want the predictin

set.seed(1998)
x <- ts(arima.sim(list(order = c(1,1,0), ar=.9), n=150)[-1])
y <- window(x, start=1, end=100)

sarima.for(dat, n.ahead = 50, p = 1, d = 1, q = 1, plot.all=TRUE)
## $pred
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2021                                                                        
## 2022 102.1383 102.1462 102.1614 102.1750 102.1889 102.2028 102.2166 102.2305
## 2023 102.2999 102.3138 102.3277 102.3415 102.3554 102.3693 102.3832 102.3971
## 2024 102.4665 102.4803 102.4942 102.5081 102.5220 102.5358 102.5497 102.5636
## 2025 102.6330 102.6469 102.6607 102.6746 102.6885 102.7024 102.7163 102.7301
##           Sep      Oct      Nov      Dec
## 2021                   102.2099 102.0969
## 2022 102.2444 102.2583 102.2722 102.2860
## 2023 102.4109 102.4248 102.4387 102.4526
## 2024 102.5775 102.5914 102.6052 102.6191
## 2025 102.7440 102.7579 102.7718 102.7856
## 
## $se
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2021                                                                      
## 2022  2.683188  3.137831  3.532764  3.888120  4.213545  4.515592  4.798662
## 2023  6.232711  6.440751  6.642278  6.837868  7.028018  7.213156  7.393660
## 2024  8.395578  8.551159  8.703959  8.854123  9.001782  9.147058  9.290062
## 2025 10.105681 10.235300 10.363299 10.489735 10.614666 10.738143 10.860217
##            Aug       Sep       Oct       Nov       Dec
## 2021                                1.325703  2.146691
## 2022  5.065940  5.319806  5.562097  5.794265  6.017482
## 2023  7.569861  7.742053  7.910497  8.075429  8.237059
## 2024  9.430898  9.569662  9.706442  9.841321  9.974377
## 2025 10.980933 11.100337 11.218470 11.335372 11.451080
text(85, 205, "PAST"); text(115, 205, "FUTURE")
abline(v=100, lty=2, col=4)
lines(x)