The objective of this analysis is an estimate of military expenditure over a period of 5 years, by determining which one is the best model, ETS, HoltWinters or Arima?
library(dplyr)
library(lubridate)
library(forecast)
library(TTR)
library(ggplot2)
library(tseries)
ds <- read.csv("d:/UPWORK-IRL-1/dn_clean_ts_1.csv")
ds02 <- select(ds,-c("X","Year","hdi","Name"))
ds02 <- as.ts(ds02)
ts <- window(ds02, start = 2044, end = 2314) #train
tr <- window(ds02, start = 2315, end = 2405) #test
## Warning in window.default(x, ...): 'end' value not changed
plot(tr)
accuracy_ETS <- accuracy(meanf(ts,h=475), tr) #ok
accuracy_ETS
## ME RMSE MAE MPE MAPE MASE
## Training set -5.700792e-16 2.350760 1.832038 -1.450730 9.163121 0.7275384
## Test set 2.862396e-03 2.499219 1.907167 -1.679778 9.693489 0.7573733
## ACF1 Theil's U
## Training set 0.07622679 NA
## Test set 0.02075756 0.6372819
ds <- read.csv("d:/UPWORK-IRL-1/dn_clean_ts_1.csv")
ds02 <- select(ds,-c("X","Year","hdi","Name"))
test02 <- ds02[2315:2405,]
train02 <- ds02[2044:2314,]
df_holt1 <- HoltWinters(x = train02, gamma = F)
f_me_holt <- forecast(df_holt1, h = 475)
plot(f_me_holt)#ok
accuracy_holt <- accuracy(f_me_holt,test02)
accuracy_holt
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1687680 2.466238 1.931718 -2.289210 9.710092 0.7671232
## Test set -0.1416884 2.514707 1.898007 -2.382097 9.721400 0.7537358
## ACF1
## Training set 0.08209031
## Test set NA
Looking for military expenditure(me) in period 2019-2013
ds <- read.csv("d:/UPWORK-IRL-1/dn_clean_ts_1.csv")
ds02 <- select(ds,-c("X","Year","hdi","Name"))
testme <- ds02[2315:2405,]
trainme <- ds02[2044:2314,]
adf.test(trainme)
## Warning in adf.test(trainme): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: trainme
## Dickey-Fuller = -5.9222, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
df_arima_me <- Arima(y = trainme, order = c(1,1,1))
f_arima_df_me <- forecast(df_arima_me, h = 475)
plot(f_arima_df_me)
accuracy_arima <- accuracy(f_arima_df_me,testme) #ok
accuracy_arima
## ME RMSE MAE MPE MAPE MASE
## Training set 0.169583006 2.343930 1.830812 -0.619336 9.082284 0.7270513
## Test set 0.002419139 2.499097 1.906639 -1.681789 9.691013 0.7571639
## ACF1
## Training set -0.0137157
## Test set NA
1. accuracy_ETS
## ME RMSE MAE MPE MAPE MASE
## Training set -5.700792e-16 2.350760 1.832038 -1.450730 9.163121 0.7275384
## Test set 2.862396e-03 2.499219 1.907167 -1.679778 9.693489 0.7573733
## ACF1 Theil's U
## Training set 0.07622679 NA
## Test set 0.02075756 0.6372819
2. accuracy_holt
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1687680 2.466238 1.931718 -2.289210 9.710092 0.7671232
## Test set -0.1416884 2.514707 1.898007 -2.382097 9.721400 0.7537358
## ACF1
## Training set 0.08209031
## Test set NA
3. accuracy_arima
## ME RMSE MAE MPE MAPE MASE
## Training set 0.169583006 2.343930 1.830812 -0.619336 9.082284 0.7270513
## Test set 0.002419139 2.499097 1.906639 -1.681789 9.691013 0.7571639
## ACF1
## Training set -0.0137157
## Test set NA
Can be concluded that the best model is Arima model model with the smallest MAPE=9.082284
MAPE
ds <- read.csv("d:/UPWORK-IRL-1/dn_clean_ts_1.csv")
ds02 <- select(ds,-c("X","Year","hdi","Name"))
testme <- ds02[2315:2405,]
trainme <- ds02[2044:2314,]
adf.test(trainme)
## Warning in adf.test(trainme): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: trainme
## Dickey-Fuller = -5.9222, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
df_arima_me <- Arima(y = trainme, order = c(1,1,1))
f_arima_df_me <- forecast(df_arima_me, h = 475)
#plot(f_arima_df_me)
ddme <- data.frame(f_arima_df_me$fitted)
find_me <- ddme %>% filter(f_arima_df_me.fitted >= 0.8)
head(find_me)
## f_arima_df_me.fitted
## 1 19.99251
## 2 20.05176
## 3 20.00897
## 4 20.15686
## 5 20.68358
## 6 20.90445
ddme <- data.frame(f_arima_df_me$fitted)
max(ddme$f_arima_df_me.fitted)#ok
## [1] 21.64691
#[1] 21.64691
Maximal military expenditure(me) in period 2019-2023 is 21.64691