The objective of this analysis is an estimate of military expenditure over a period of 5 years, by determining which one is the best model, ETS, HoltWinters or Arima?

library(dplyr)
library(lubridate)
library(forecast)
library(TTR)
library(ggplot2)
library(tseries)

Accuracy ETS model

ds <- read.csv("d:/UPWORK-IRL-1/dn_clean_ts_1.csv")
ds02 <- select(ds,-c("X","Year","hdi","Name"))
ds02 <- as.ts(ds02)

ts <- window(ds02, start = 2044, end = 2314) #train
tr <- window(ds02, start = 2315, end = 2405) #test
## Warning in window.default(x, ...): 'end' value not changed
plot(tr)

Mean method

accuracy_ETS <- accuracy(meanf(ts,h=475), tr) #ok
accuracy_ETS
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -5.700792e-16 2.350760 1.832038 -1.450730 9.163121 0.7275384
## Test set      2.862396e-03 2.499219 1.907167 -1.679778 9.693489 0.7573733
##                    ACF1 Theil's U
## Training set 0.07622679        NA
## Test set     0.02075756 0.6372819

Accuracy HoltWinter model

ds <- read.csv("d:/UPWORK-IRL-1/dn_clean_ts_1.csv")
ds02 <- select(ds,-c("X","Year","hdi","Name"))
test02 <- ds02[2315:2405,]
train02 <- ds02[2044:2314,]

Model HoltWinters

df_holt1 <- HoltWinters(x = train02, gamma = F)

Forecast

f_me_holt <- forecast(df_holt1, h = 475)
plot(f_me_holt)#ok

Acccuracy HoltWinters

accuracy_holt <- accuracy(f_me_holt,test02)
accuracy_holt
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.1687680 2.466238 1.931718 -2.289210 9.710092 0.7671232
## Test set     -0.1416884 2.514707 1.898007 -2.382097 9.721400 0.7537358
##                    ACF1
## Training set 0.08209031
## Test set             NA

Accuracy Arima model

Looking for military expenditure(me) in period 2019-2013

ds <- read.csv("d:/UPWORK-IRL-1/dn_clean_ts_1.csv")
ds02 <- select(ds,-c("X","Year","hdi","Name"))
testme <- ds02[2315:2405,]
trainme <- ds02[2044:2314,]
adf.test(trainme)
## Warning in adf.test(trainme): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  trainme
## Dickey-Fuller = -5.9222, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
df_arima_me <- Arima(y = trainme, order = c(1,1,1))
f_arima_df_me <- forecast(df_arima_me, h = 475)
plot(f_arima_df_me)

Accuracy model Arima

accuracy_arima <- accuracy(f_arima_df_me,testme) #ok
accuracy_arima
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.169583006 2.343930 1.830812 -0.619336 9.082284 0.7270513
## Test set     0.002419139 2.499097 1.906639 -1.681789 9.691013 0.7571639
##                    ACF1
## Training set -0.0137157
## Test set             NA

The best model

1. accuracy_ETS
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -5.700792e-16 2.350760 1.832038 -1.450730 9.163121 0.7275384
## Test set      2.862396e-03 2.499219 1.907167 -1.679778 9.693489 0.7573733
##                    ACF1 Theil's U
## Training set 0.07622679        NA
## Test set     0.02075756 0.6372819

2. accuracy_holt
##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.1687680 2.466238 1.931718 -2.289210 9.710092 0.7671232
## Test set     -0.1416884 2.514707 1.898007 -2.382097 9.721400 0.7537358
##                    ACF1
## Training set 0.08209031
## Test set             NA

3. accuracy_arima
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.169583006 2.343930 1.830812 -0.619336 9.082284 0.7270513
## Test set     0.002419139 2.499097 1.906639 -1.681789 9.691013 0.7571639
##                    ACF1
## Training set -0.0137157
## Test set             NA

Can be concluded that the best model is Arima model model with the smallest MAPE=9.082284
MAPE

Looking for military expenditure(me) in period 2019-2013

ds <- read.csv("d:/UPWORK-IRL-1/dn_clean_ts_1.csv")
ds02 <- select(ds,-c("X","Year","hdi","Name"))
testme <- ds02[2315:2405,]
trainme <- ds02[2044:2314,]
adf.test(trainme)
## Warning in adf.test(trainme): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  trainme
## Dickey-Fuller = -5.9222, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
df_arima_me <- Arima(y = trainme, order = c(1,1,1))
f_arima_df_me <- forecast(df_arima_me, h = 475)
#plot(f_arima_df_me)

Looking for me > 0.8

ddme <- data.frame(f_arima_df_me$fitted)
find_me <- ddme %>% filter(f_arima_df_me.fitted >= 0.8)
head(find_me)
##   f_arima_df_me.fitted
## 1             19.99251
## 2             20.05176
## 3             20.00897
## 4             20.15686
## 5             20.68358
## 6             20.90445

How much the maximal military expenditure(me) in period 2019-2023

ddme <- data.frame(f_arima_df_me$fitted)
max(ddme$f_arima_df_me.fitted)#ok
## [1] 21.64691
#[1] 21.64691

Conclusion

Maximal military expenditure(me) in period 2019-2023 is 21.64691