I did a timeseries analysis on a data set and created arima models that could be used for forecasting.
mpt.1<-read.csv("~/1 UW Tacoma/560 data mining/rcode/time series/ts.csv")
mpt<-mpt.1[,-1]
#View(mpt.1)
library(tseries)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(TTR)
ts.mpt<-ts(mpt, frequency =12, start =c(1997,1))
summary(ts.mpt)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8 562 1039 1277 1676 7546
#plots
plot(ts.mpt)

options(scipen = "999")
# check for white noise
Box.test(mpt,lag=6,type = c("Box-Pierce")) # p-value< 0.05, no white noise. Analysis can be carried out
##
## Box-Pierce test
##
## data: mpt
## X-squared = 304.7, df = 6, p-value < 0.00000000000000022
# breaking down into different components
sma<-SMA(ts.mpt)
decom<-decompose(sma)
#seasonal
plot(decom$seasonal)

#trend
plot(decom$trend)

#randomness
plot(decom$random)

#acf & pacf for original data
acf(ts.mpt)

pacf(ts.mpt)

#ADF & KPSS tests
kpss.test(ts.mpt) #p-value less than .05, stochastic trend (randomness), need to detrend.
## Warning in kpss.test(ts.mpt): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: ts.mpt
## KPSS Level = 3.5407, Truncation lag parameter = 3, p-value = 0.01
adf.test(ts.mpt) #p-value less than .05, stationary.
## Warning in adf.test(ts.mpt): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ts.mpt
## Dickey-Fuller = -5.6708, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#differencing
diff.mpt<-diff(ts.mpt)
plot(diff.mpt)

#acf &pacf
acf(diff.mpt, lag.max = 30)

pacf(diff.mpt, lag.max = 30)

# check for p
arima(mpt,c(1,1,1))
##
## Call:
## arima(x = mpt, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## 0.3335 -0.9320
## s.e. 0.0675 0.0253
##
## sigma^2 estimated as 718816: log likelihood = -2065.59, aic = 4137.18
arima(mpt,c(2,1,1))
##
## Call:
## arima(x = mpt, order = c(2, 1, 1))
##
## Coefficients:
## ar1 ar2 ma1
## 0.2907 0.1983 -0.9506
## s.e. 0.0654 0.0651 0.0193
##
## sigma^2 estimated as 693427: log likelihood = -2061.01, aic = 4130.03
arima(mpt,c(3,1,1))
##
## Call:
## arima(x = mpt, order = c(3, 1, 1))
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 0.3059 0.2270 -0.1360 -0.9401
## s.e. 0.0657 0.0663 0.0657 0.0231
##
## sigma^2 estimated as 681626: log likelihood = -2058.9, aic = 4127.81
arima(mpt,c(5,1,1))# AIC increases from this point
##
## Call:
## arima(x = mpt, order = c(5, 1, 1))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1
## 0.2851 0.2431 -0.1134 -0.0999 0.015 -0.9310
## s.e. 0.0693 0.0680 0.0679 0.0704 0.078 0.0292
##
## sigma^2 estimated as 675956: log likelihood = -2057.88, aic = 4129.75
arima(mpt,c(6,1,1))
##
## Call:
## arima(x = mpt, order = c(6, 1, 1))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1
## 0.2921 0.2523 -0.1062 -0.1035 -0.0121 0.0738 -0.9384
## s.e. 0.0690 0.0685 0.0684 0.0704 0.0830 0.0789 0.0279
##
## sigma^2 estimated as 673639: log likelihood = -2057.44, aic = 4130.87
# check for r
arima(mpt,c(1,1,1))
##
## Call:
## arima(x = mpt, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## 0.3335 -0.9320
## s.e. 0.0675 0.0253
##
## sigma^2 estimated as 718816: log likelihood = -2065.59, aic = 4137.18
arima(mpt,c(1,1,2))
##
## Call:
## arima(x = mpt, order = c(1, 1, 2))
##
## Coefficients:
## ar1 ma1 ma2
## 0.5989 -1.2176 0.2565
## s.e. 0.1128 0.1232 0.1120
##
## sigma^2 estimated as 706014: log likelihood = -2063.29, aic = 4134.58
arima(mpt,c(2,1,4)) #convergence issue, stop it at here.
## Warning in arima(mpt, c(2, 1, 4)): possible convergence problem: optim gave
## code = 1
##
## Call:
## arima(x = mpt, order = c(2, 1, 4))
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 ma4
## -0.7487 0.2510 0.1215 -0.6649 -0.0506 -0.2563
## s.e. 0.1596 0.1596 0.1497 0.0888 0.1326 0.0777
##
## sigma^2 estimated as 661325: log likelihood = -2055.72, aic = 4125.44
arima(mpt,c(2,1,5))
##
## Call:
## arima(x = mpt, order = c(2, 1, 5))
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 ma4 ma5
## -1.3674 -0.5444 0.7359 -0.2462 -0.4947 -0.4086 -0.2972
## s.e. 0.3425 0.4234 0.3317 0.2615 0.2614 0.1020 0.1347
##
## sigma^2 estimated as 668270: log likelihood = -2056.46, aic = 4128.92
arima(mpt,c(2,1,2))
##
## Call:
## arima(x = mpt, order = c(2, 1, 2))
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.0347 0.3190 -0.6128 -0.3176
## s.e. 0.2317 0.0895 0.2397 0.2231
##
## sigma^2 estimated as 687487: log likelihood = -2059.95, aic = 4129.89
arima(mpt,c(3,1,2))
##
## Call:
## arima(x = mpt, order = c(3, 1, 2))
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2
## -0.678 0.4829 0.1611 0.0474 -0.9459
## s.e. 0.066 0.0797 0.0656 0.0207 0.0206
##
## sigma^2 estimated as 674750: log likelihood = -2058.3, aic = 4128.59
arima(mpt,c(4,1,2))
##
## Call:
## arima(x = mpt, order = c(4, 1, 2))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2
## 0.2263 0.2578 -0.0987 -0.1040 -0.8728 -0.0530
## s.e. 0.4698 0.1550 0.1283 0.0923 0.4682 0.4396
##
## sigma^2 estimated as 676007: log likelihood = -2057.89, aic = 4129.77
arima(mpt,c(5,1,2)) # #AIC =4126.53
##
## Call:
## arima(x = mpt, order = c(5, 1, 2))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2
## -0.6924 0.5104 0.1261 -0.1984 -0.1217 0.0703 -0.9251
## s.e. 0.0683 0.0862 0.0866 0.0819 0.0671 0.0295 0.0294
##
## sigma^2 estimated as 657710: log likelihood = -2055.26, aic = 4126.53
arima(mpt,c(1,1,1))
##
## Call:
## arima(x = mpt, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## 0.3335 -0.9320
## s.e. 0.0675 0.0253
##
## sigma^2 estimated as 718816: log likelihood = -2065.59, aic = 4137.18
arima(mpt,c(2,1,1))
##
## Call:
## arima(x = mpt, order = c(2, 1, 1))
##
## Coefficients:
## ar1 ar2 ma1
## 0.2907 0.1983 -0.9506
## s.e. 0.0654 0.0651 0.0193
##
## sigma^2 estimated as 693427: log likelihood = -2061.01, aic = 4130.03
arima(mpt,c(3,1,1))
##
## Call:
## arima(x = mpt, order = c(3, 1, 1))
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 0.3059 0.2270 -0.1360 -0.9401
## s.e. 0.0657 0.0663 0.0657 0.0231
##
## sigma^2 estimated as 681626: log likelihood = -2058.9, aic = 4127.81
arima(mpt,c(4,1,1))
##
## Call:
## arima(x = mpt, order = c(4, 1, 1))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1
## 0.2824 0.2408 -0.1119 -0.0959 -0.9292
## s.e. 0.0682 0.0670 0.0675 0.0673 0.0284
##
## sigma^2 estimated as 676039: log likelihood = -2057.89, aic = 4127.79
arima(mpt,c(5,1,1))
##
## Call:
## arima(x = mpt, order = c(5, 1, 1))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1
## 0.2851 0.2431 -0.1134 -0.0999 0.015 -0.9310
## s.e. 0.0693 0.0680 0.0679 0.0704 0.078 0.0292
##
## sigma^2 estimated as 675956: log likelihood = -2057.88, aic = 4129.75
arima(mpt,c(1,1,3)) #convergence issue.
##
## Call:
## arima(x = mpt, order = c(1, 1, 3))
##
## Coefficients:
## ar1 ma1 ma2 ma3
## 0.1986 -0.8479 0.2188 -0.2918
## s.e. 0.1576 0.1470 0.1212 0.0722
##
## sigma^2 estimated as 675774: log likelihood = -2057.82, aic = 4125.64
arima(mpt,c(2,1,3))
## Warning in arima(mpt, c(2, 1, 3)): possible convergence problem: optim gave
## code = 1
##
## Call:
## arima(x = mpt, order = c(2, 1, 3))
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## -0.4053 0.5933 -0.1911 -0.9501 0.2215
## s.e. 0.1122 0.1122 0.1236 0.0257 0.1115
##
## sigma^2 estimated as 683731: log likelihood = -2059.74, aic = 4131.48
arima(mpt,c(3,1,3))
##
## Call:
## arima(x = mpt, order = c(3, 1, 3))
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3
## -0.9539 0.3173 0.2714 0.3308 -0.9302 -0.2658
## s.e. 0.2367 0.1823 0.0972 0.2392 0.0300 0.2227
##
## sigma^2 estimated as 672210: log likelihood = -2057.77, aic = 4129.54
arima(mpt,c(4,1,3)) # AIC= 4126.32
##
## Call:
## arima(x = mpt, order = c(4, 1, 3))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3
## 0.0279 -0.6394 0.2877 0.1156 -0.6743 0.7432 -0.9458
## s.e. 0.0676 0.0659 0.0690 0.0684 0.0245 0.0284 0.0347
##
## sigma^2 estimated as 649595: log likelihood = -2055.16, aic = 4126.32
arima(mpt,c(5,1,3)) # produces NaNs
##
## Call:
## arima(x = mpt, order = c(5, 1, 3))
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3
## -0.2515 -0.2326 0.2083 -0.0307 -0.1334 -0.395 0.1416 -0.5965
## s.e. NaN 0.1258 0.0803 0.0778 0.0604 NaN NaN 0.1602
##
## sigma^2 estimated as 669089: log likelihood = -2056.61, aic = 4131.22
require(forecast)
## Loading required package: forecast
auto.arima(mpt) # gives ARIMA(5,1,3) #AIC= 4131.22
## Series: mpt
## ARIMA(5,1,3)
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3
## -0.2515 -0.2326 0.2083 -0.0307 -0.1334 -0.395 0.1416 -0.5965
## s.e. NaN 0.1258 0.0803 0.0778 0.0604 NaN NaN 0.1602
##
## sigma^2 estimated as 690936: log likelihood=-2056.61
## AIC=4131.22 AICc=4131.96 BIC=4163.02
mod.1<-arima(mpt,order=c(5,1,2))
mod.2<-arima(mpt,order=c(4,1,3))
mod.3<-arima(mpt,order=c(5,1,3))
plot(mod.1)

a<-predict(mod.1,n.ahead=300)
a$pred[261] #Aug,2018
## [1] 2751.848
a$se[261]
## [1] 1655.946
b<-predict(mod.2,n.ahead=300) # standard error is low with this model
b$pred[261]
## [1] 2600.761
b$se[261]
## [1] 1626.982
c<-predict(mod.3,n.ahead=300)
c$pred[261]
## [1] 2626.479
c$se[261]
## [1] 1657.637
mpt.test<-window(mpt,start=130)
mpt.test
## [1] 612 906 591 1694 712 1216 857 1243 965 1259 1075 1040 893 986
## [15] 1040 1844 630 1528 1105 1268 1129 1278 876 1262 834 701 751 846
## [29] 663 1397 825 1359 1072 1223 1182 1586 1739 1440 827 840 602 1162
## [43] 786 946 1426 1232 1086 1453 1547 1882 892 688 816 1484 870 863
## [57] 1222 1536 1038 1091 1697 1806 5118 7546 5327 4123 1701 1455 2042 1870
## [71] 1412 1351 1808 2284 1260 2253 1674 2542 1872 1509 2564 2239 1431 6984
## [85] 1812 1947 1564 1923 1853 2606 1736 1677 2938 2812 2585 2654 2514 2267
## [99] 2016 2209 2737 3016 2075 1929 2808 2554 2124 2178 2108 2616 1423 399
## [113] 80 1353 3451 3246 4062 3534 2773 3001 2695 2855 3562 4403 2272
## attr(,"tsp")
## [1] 130 254 1