I did a timeseries analysis on a data set and created arima models that could be used for forecasting.

mpt.1<-read.csv("~/1 UW Tacoma/560 data mining/rcode/time series/ts.csv")
mpt<-mpt.1[,-1]
#View(mpt.1)
library(tseries)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(TTR)
ts.mpt<-ts(mpt, frequency =12, start =c(1997,1))
summary(ts.mpt)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       8     562    1039    1277    1676    7546
#plots
plot(ts.mpt)

options(scipen = "999")

# check for white noise

Box.test(mpt,lag=6,type = c("Box-Pierce")) # p-value< 0.05, no white noise. Analysis can be carried out
## 
##  Box-Pierce test
## 
## data:  mpt
## X-squared = 304.7, df = 6, p-value < 0.00000000000000022
# breaking down into different components
sma<-SMA(ts.mpt)
decom<-decompose(sma)
#seasonal
plot(decom$seasonal)

#trend
plot(decom$trend)

#randomness
plot(decom$random)

#acf & pacf for original data

acf(ts.mpt)

pacf(ts.mpt)

#ADF & KPSS tests
kpss.test(ts.mpt)  #p-value less than .05, stochastic trend (randomness), need to detrend.
## Warning in kpss.test(ts.mpt): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  ts.mpt
## KPSS Level = 3.5407, Truncation lag parameter = 3, p-value = 0.01
adf.test(ts.mpt) #p-value less than .05, stationary.
## Warning in adf.test(ts.mpt): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts.mpt
## Dickey-Fuller = -5.6708, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#differencing

diff.mpt<-diff(ts.mpt)
plot(diff.mpt)

#acf &pacf
acf(diff.mpt, lag.max = 30)

pacf(diff.mpt, lag.max = 30)

# check for p
arima(mpt,c(1,1,1))
## 
## Call:
## arima(x = mpt, order = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.3335  -0.9320
## s.e.  0.0675   0.0253
## 
## sigma^2 estimated as 718816:  log likelihood = -2065.59,  aic = 4137.18
arima(mpt,c(2,1,1))
## 
## Call:
## arima(x = mpt, order = c(2, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ma1
##       0.2907  0.1983  -0.9506
## s.e.  0.0654  0.0651   0.0193
## 
## sigma^2 estimated as 693427:  log likelihood = -2061.01,  aic = 4130.03
arima(mpt,c(3,1,1))
## 
## Call:
## arima(x = mpt, order = c(3, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ar3      ma1
##       0.3059  0.2270  -0.1360  -0.9401
## s.e.  0.0657  0.0663   0.0657   0.0231
## 
## sigma^2 estimated as 681626:  log likelihood = -2058.9,  aic = 4127.81
arima(mpt,c(5,1,1))# AIC increases from this point
## 
## Call:
## arima(x = mpt, order = c(5, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ar3      ar4    ar5      ma1
##       0.2851  0.2431  -0.1134  -0.0999  0.015  -0.9310
## s.e.  0.0693  0.0680   0.0679   0.0704  0.078   0.0292
## 
## sigma^2 estimated as 675956:  log likelihood = -2057.88,  aic = 4129.75
arima(mpt,c(6,1,1)) 
## 
## Call:
## arima(x = mpt, order = c(6, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ar3      ar4      ar5     ar6      ma1
##       0.2921  0.2523  -0.1062  -0.1035  -0.0121  0.0738  -0.9384
## s.e.  0.0690  0.0685   0.0684   0.0704   0.0830  0.0789   0.0279
## 
## sigma^2 estimated as 673639:  log likelihood = -2057.44,  aic = 4130.87
# check for r
arima(mpt,c(1,1,1))
## 
## Call:
## arima(x = mpt, order = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.3335  -0.9320
## s.e.  0.0675   0.0253
## 
## sigma^2 estimated as 718816:  log likelihood = -2065.59,  aic = 4137.18
arima(mpt,c(1,1,2))
## 
## Call:
## arima(x = mpt, order = c(1, 1, 2))
## 
## Coefficients:
##          ar1      ma1     ma2
##       0.5989  -1.2176  0.2565
## s.e.  0.1128   0.1232  0.1120
## 
## sigma^2 estimated as 706014:  log likelihood = -2063.29,  aic = 4134.58
arima(mpt,c(2,1,4)) #convergence issue, stop it at here.
## Warning in arima(mpt, c(2, 1, 4)): possible convergence problem: optim gave
## code = 1
## 
## Call:
## arima(x = mpt, order = c(2, 1, 4))
## 
## Coefficients:
##           ar1     ar2     ma1      ma2      ma3      ma4
##       -0.7487  0.2510  0.1215  -0.6649  -0.0506  -0.2563
## s.e.   0.1596  0.1596  0.1497   0.0888   0.1326   0.0777
## 
## sigma^2 estimated as 661325:  log likelihood = -2055.72,  aic = 4125.44
arima(mpt,c(2,1,5))
## 
## Call:
## arima(x = mpt, order = c(2, 1, 5))
## 
## Coefficients:
##           ar1      ar2     ma1      ma2      ma3      ma4      ma5
##       -1.3674  -0.5444  0.7359  -0.2462  -0.4947  -0.4086  -0.2972
## s.e.   0.3425   0.4234  0.3317   0.2615   0.2614   0.1020   0.1347
## 
## sigma^2 estimated as 668270:  log likelihood = -2056.46,  aic = 4128.92
arima(mpt,c(2,1,2))
## 
## Call:
## arima(x = mpt, order = c(2, 1, 2))
## 
## Coefficients:
##           ar1     ar2      ma1      ma2
##       -0.0347  0.3190  -0.6128  -0.3176
## s.e.   0.2317  0.0895   0.2397   0.2231
## 
## sigma^2 estimated as 687487:  log likelihood = -2059.95,  aic = 4129.89
arima(mpt,c(3,1,2))
## 
## Call:
## arima(x = mpt, order = c(3, 1, 2))
## 
## Coefficients:
##          ar1     ar2     ar3     ma1      ma2
##       -0.678  0.4829  0.1611  0.0474  -0.9459
## s.e.   0.066  0.0797  0.0656  0.0207   0.0206
## 
## sigma^2 estimated as 674750:  log likelihood = -2058.3,  aic = 4128.59
arima(mpt,c(4,1,2))
## 
## Call:
## arima(x = mpt, order = c(4, 1, 2))
## 
## Coefficients:
##          ar1     ar2      ar3      ar4      ma1      ma2
##       0.2263  0.2578  -0.0987  -0.1040  -0.8728  -0.0530
## s.e.  0.4698  0.1550   0.1283   0.0923   0.4682   0.4396
## 
## sigma^2 estimated as 676007:  log likelihood = -2057.89,  aic = 4129.77
arima(mpt,c(5,1,2)) # #AIC =4126.53
## 
## Call:
## arima(x = mpt, order = c(5, 1, 2))
## 
## Coefficients:
##           ar1     ar2     ar3      ar4      ar5     ma1      ma2
##       -0.6924  0.5104  0.1261  -0.1984  -0.1217  0.0703  -0.9251
## s.e.   0.0683  0.0862  0.0866   0.0819   0.0671  0.0295   0.0294
## 
## sigma^2 estimated as 657710:  log likelihood = -2055.26,  aic = 4126.53
arima(mpt,c(1,1,1))
## 
## Call:
## arima(x = mpt, order = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.3335  -0.9320
## s.e.  0.0675   0.0253
## 
## sigma^2 estimated as 718816:  log likelihood = -2065.59,  aic = 4137.18
arima(mpt,c(2,1,1))
## 
## Call:
## arima(x = mpt, order = c(2, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ma1
##       0.2907  0.1983  -0.9506
## s.e.  0.0654  0.0651   0.0193
## 
## sigma^2 estimated as 693427:  log likelihood = -2061.01,  aic = 4130.03
arima(mpt,c(3,1,1))
## 
## Call:
## arima(x = mpt, order = c(3, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ar3      ma1
##       0.3059  0.2270  -0.1360  -0.9401
## s.e.  0.0657  0.0663   0.0657   0.0231
## 
## sigma^2 estimated as 681626:  log likelihood = -2058.9,  aic = 4127.81
arima(mpt,c(4,1,1))
## 
## Call:
## arima(x = mpt, order = c(4, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ar3      ar4      ma1
##       0.2824  0.2408  -0.1119  -0.0959  -0.9292
## s.e.  0.0682  0.0670   0.0675   0.0673   0.0284
## 
## sigma^2 estimated as 676039:  log likelihood = -2057.89,  aic = 4127.79
arima(mpt,c(5,1,1))
## 
## Call:
## arima(x = mpt, order = c(5, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ar3      ar4    ar5      ma1
##       0.2851  0.2431  -0.1134  -0.0999  0.015  -0.9310
## s.e.  0.0693  0.0680   0.0679   0.0704  0.078   0.0292
## 
## sigma^2 estimated as 675956:  log likelihood = -2057.88,  aic = 4129.75
arima(mpt,c(1,1,3)) #convergence issue.
## 
## Call:
## arima(x = mpt, order = c(1, 1, 3))
## 
## Coefficients:
##          ar1      ma1     ma2      ma3
##       0.1986  -0.8479  0.2188  -0.2918
## s.e.  0.1576   0.1470  0.1212   0.0722
## 
## sigma^2 estimated as 675774:  log likelihood = -2057.82,  aic = 4125.64
arima(mpt,c(2,1,3))
## Warning in arima(mpt, c(2, 1, 3)): possible convergence problem: optim gave
## code = 1
## 
## Call:
## arima(x = mpt, order = c(2, 1, 3))
## 
## Coefficients:
##           ar1     ar2      ma1      ma2     ma3
##       -0.4053  0.5933  -0.1911  -0.9501  0.2215
## s.e.   0.1122  0.1122   0.1236   0.0257  0.1115
## 
## sigma^2 estimated as 683731:  log likelihood = -2059.74,  aic = 4131.48
arima(mpt,c(3,1,3))
## 
## Call:
## arima(x = mpt, order = c(3, 1, 3))
## 
## Coefficients:
##           ar1     ar2     ar3     ma1      ma2      ma3
##       -0.9539  0.3173  0.2714  0.3308  -0.9302  -0.2658
## s.e.   0.2367  0.1823  0.0972  0.2392   0.0300   0.2227
## 
## sigma^2 estimated as 672210:  log likelihood = -2057.77,  aic = 4129.54
arima(mpt,c(4,1,3)) # AIC= 4126.32
## 
## Call:
## arima(x = mpt, order = c(4, 1, 3))
## 
## Coefficients:
##          ar1      ar2     ar3     ar4      ma1     ma2      ma3
##       0.0279  -0.6394  0.2877  0.1156  -0.6743  0.7432  -0.9458
## s.e.  0.0676   0.0659  0.0690  0.0684   0.0245  0.0284   0.0347
## 
## sigma^2 estimated as 649595:  log likelihood = -2055.16,  aic = 4126.32
arima(mpt,c(5,1,3)) # produces NaNs
## 
## Call:
## arima(x = mpt, order = c(5, 1, 3))
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##           ar1      ar2     ar3      ar4      ar5     ma1     ma2      ma3
##       -0.2515  -0.2326  0.2083  -0.0307  -0.1334  -0.395  0.1416  -0.5965
## s.e.      NaN   0.1258  0.0803   0.0778   0.0604     NaN     NaN   0.1602
## 
## sigma^2 estimated as 669089:  log likelihood = -2056.61,  aic = 4131.22
require(forecast)
## Loading required package: forecast
auto.arima(mpt) # gives ARIMA(5,1,3) #AIC= 4131.22
## Series: mpt 
## ARIMA(5,1,3) 
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##           ar1      ar2     ar3      ar4      ar5     ma1     ma2      ma3
##       -0.2515  -0.2326  0.2083  -0.0307  -0.1334  -0.395  0.1416  -0.5965
## s.e.      NaN   0.1258  0.0803   0.0778   0.0604     NaN     NaN   0.1602
## 
## sigma^2 estimated as 690936:  log likelihood=-2056.61
## AIC=4131.22   AICc=4131.96   BIC=4163.02
mod.1<-arima(mpt,order=c(5,1,2))
mod.2<-arima(mpt,order=c(4,1,3))
mod.3<-arima(mpt,order=c(5,1,3))
plot(mod.1)

a<-predict(mod.1,n.ahead=300)
a$pred[261] #Aug,2018
## [1] 2751.848
a$se[261]
## [1] 1655.946
b<-predict(mod.2,n.ahead=300) # standard error is low with this model
b$pred[261]
## [1] 2600.761
b$se[261]
## [1] 1626.982
c<-predict(mod.3,n.ahead=300)
c$pred[261]
## [1] 2626.479
c$se[261]
## [1] 1657.637
mpt.test<-window(mpt,start=130)
mpt.test
##   [1]  612  906  591 1694  712 1216  857 1243  965 1259 1075 1040  893  986
##  [15] 1040 1844  630 1528 1105 1268 1129 1278  876 1262  834  701  751  846
##  [29]  663 1397  825 1359 1072 1223 1182 1586 1739 1440  827  840  602 1162
##  [43]  786  946 1426 1232 1086 1453 1547 1882  892  688  816 1484  870  863
##  [57] 1222 1536 1038 1091 1697 1806 5118 7546 5327 4123 1701 1455 2042 1870
##  [71] 1412 1351 1808 2284 1260 2253 1674 2542 1872 1509 2564 2239 1431 6984
##  [85] 1812 1947 1564 1923 1853 2606 1736 1677 2938 2812 2585 2654 2514 2267
##  [99] 2016 2209 2737 3016 2075 1929 2808 2554 2124 2178 2108 2616 1423  399
## [113]   80 1353 3451 3246 4062 3534 2773 3001 2695 2855 3562 4403 2272
## attr(,"tsp")
## [1] 130 254   1