In this document, I reproduce the time series analysis of malaria in Afghanistan (Link to paper).
#graphics options
source("setPowerPointStyle.R")
setPowerPointStyle()
#reading data file
my_ts=read.csv('12936_2016_1602_MOESM2_ESM.csv')
#extracting malaria series
malaria=ts(my_ts$malaria_avg, start=c(2005, 1), end=c(2015, 9), frequency=12)
#log_transform and differencing
malaria_pp=diff(log(malaria))
#generating Fig 3a
plot(malaria_pp,main='Time series of malaria incidence',ylab='log transormed and differenced')
#generating Fig 3b
acf(malaria_pp,main="Autocorrelation function")
#generating Fig 3c
pacf(malaria_pp,main="Partial autocorrelation function")
#graphics options
source("setPowerPointStyle.R")
setPowerPointStyle()
#split series in training set (Jan 2005- Dec 2013) and test set (Jan 2014- Sept 2015)
train_series=window(malaria_pp, c(2005, 1), c(2013, 12))
test_series=window(malaria_pp, c(2014, 1), c(2015, 9))
#model 1
model1=arima(train_series,order=c(4,1,1),seasonal = list(order=c(1,0,1)))
#model 2
model2=arima(train_series,order=c(1,1,1),seasonal = list(order=c(1,0,1)))
library(forecast)
source("setPowerPointStyle.R")
setPowerPointStyle()
model1_pred=forecast(model1,h=21)
plot(model1_pred, ylab="malaria incidence (log and diff)")
points(test_series,col='orange',pch = 20)
model2_pred=forecast(model2,h=21)
plot(model2_pred,ylab="malaria incidence (log and diff)")
points(test_series,col='orange',pch = 20)
model1_MSE=mean((model1_pred$mean-test_series)^2)
print(model1_MSE)
## [1] 0.01610619
model2_MSE=mean((model2_pred$mean-test_series)^2)
print(model2_MSE)
## [1] 0.01716086