library(readxl)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
# Load data
setwd("C:/Users/favou/OneDrive/Documents")
Data <- read_excel("icammda_pricemeaslesdata22.xls")
head(Data)
## # A tibble: 6 × 4
## `Time(Month)` Year Month `Jere LGA, Borno State`
## <dbl> <dbl> <chr> <dbl>
## 1 1 2016 Jan 12
## 2 2 2016 Feb 7
## 3 3 2016 Mar 1
## 4 4 2016 Apr 17
## 5 5 2016 May 50
## 6 6 2016 Jun 2
# Extract series
Data_2 <- Data$`Jere LGA, Borno State`
Creates a monthly date sequence from January 2016 to match the number of observations in the dataset.
# Date sequence
dates_data <- seq(as.Date("2016/1/1"), by="month", length.out=nrow(Data))
Plots the original (raw) time series of measles cases over time to visualize trends and fluctuations.
# Plot raw data
plot(Data$`Jere LGA, Borno State` ~ dates_data, type='l', col='red',
xlab="Time (months)", ylab="Measles Cases",
main="Time Series Plot of Measles Cases in Jere LGA, Borno State")
Performs the Augmented Dickey-Fuller (ADF) test to check if the time
series is stationary (i.e., constant mean and variance over time).
# Stationarity test
adf.test(Data_2)
##
## Augmented Dickey-Fuller Test
##
## data: Data_2
## Dickey-Fuller = -3.8246, Lag order = 4, p-value = 0.02172
## alternative hypothesis: stationary
Removes trend (non-stationarity) by differencing: Takes first difference of the series (diff()). Tests stationarity again with adf.test(). Plots the differenced series.
# Differencing
Measles_case_d1 <- diff(Data_2)
adf.test(Measles_case_d1)
## Warning in adf.test(Measles_case_d1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: Measles_case_d1
## Dickey-Fuller = -5.0428, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
plot(Measles_case_d1, main="Differenced Measles Cases", type='l')
Plots ACF and PACF of differenced data to identify possible ARIMA model
parameters (AR, MA).
acf(Measles_case_d1, main="ACF - Differenced Series")
pacf(Measles_case_d1, main="PACF - Differenced Series")
#Converts the data into a time series object
ts_measles <- ts(Data_2, start=c(2016,1), frequency=12)
ts_measles
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2016 12 7 1 17 50 2 8 166 131 59 39 9
## 2017 0 4 0 1 1 0 0 0 1 0 2 6
## 2018 0 2 2 1 2 3 0 0 2 0 11 244
## 2019 343 474 658 468 56 34 0 3 0 1 1 1
## 2020 1 0 41 30 546 275 310 58 262 87 53 178
## 2021 26 205 123 138 174 352 242 271 46 79 90 73
## 2022 157 84 233 134 162 83 138 158 1 0 2 0
# Decomposition
decomposed <- decompose(ts_measles)
plot(decomposed)
auto.arima(sqrt(ts_measles))
## Series: sqrt(ts_measles)
## ARIMA(1,1,0)
##
## Coefficients:
## ar1
## -0.2335
## s.e. 0.1061
##
## sigma^2 = 21.87: log likelihood = -245.33
## AIC=494.66 AICc=494.81 BIC=499.5
Fits an ARIMA(1,1,0)(0,0,1) model to the square-root-transformed series:
(1,1,0) = AR(1), differenced once, no MA component.
Seasonal part (0,0,1) = one seasonal MA term.
The square root transformation stabilizes variance.
# ARIMA fit
cases_fit <- arima(x=sqrt(ts_measles), order=c(1,1,0),
seasonal=list(order=c(0,0,1)))
summary(cases_fit)
##
## Call:
## arima(x = sqrt(ts_measles), order = c(1, 1, 0), seasonal = list(order = c(0,
## 0, 1)))
##
## Coefficients:
## ar1 sma1
## -0.1856 -0.1842
## s.e. 0.1115 0.1256
##
## sigma^2 estimated as 20.98: log likelihood = -244.3, aic = 494.59
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.02615164 4.5526 3.078198 NaN Inf 0.9517179 0.01862956
# Forecast
cases_forecast <- forecast(cases_fit, h=24)
cases_forecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2023 -0.20326985 -6.072703 5.666163 -9.179793 8.773253
## Feb 2023 0.05873514 -7.510793 7.628263 -11.517859 11.635329
## Mar 2023 -0.99065818 -10.052644 8.071328 -14.849770 12.868454
## Apr 2023 -0.32484884 -10.648064 9.998366 -16.112844 15.463146
## May 2023 -0.68124201 -12.130561 10.768077 -18.191464 16.828980
## Jun 2023 -0.15975139 -12.633413 12.313910 -19.236570 18.917067
## Jul 2023 -0.53597673 -13.956108 12.884155 -21.060297 19.988343
## Aug 2023 -0.65782193 -14.961919 13.646275 -22.534051 21.218407
## Sep 2023 1.74930633 -13.387224 16.885836 -21.400018 24.898631
## Oct 2023 1.90410117 -14.021409 17.829611 -22.451864 26.260067
## Nov 2023 1.63539480 -15.041811 18.312601 -23.870191 27.140980
## Dec 2023 1.88733016 -15.509121 19.283782 -24.718247 28.492907
## Jan 2024 1.84056413 -15.981054 19.662183 -25.415249 29.096378
## Feb 2024 1.84924517 -16.431257 20.129747 -26.108370 29.806860
## Mar 2024 1.84763373 -16.872452 20.567719 -26.782267 30.477534
## Apr 2024 1.84793286 -17.303106 20.998971 -27.441053 31.136919
## May 2024 1.84787733 -17.724363 21.420117 -28.085280 31.781035
## Jun 2024 1.84788764 -18.136726 21.832502 -28.715942 32.411717
## Jul 2024 1.84788573 -18.540755 22.236526 -29.333849 33.029621
## Aug 2024 1.84788608 -18.936930 22.632703 -29.939748 33.635520
## Sep 2024 1.84788602 -19.325695 23.021467 -30.534311 34.230083
## Oct 2024 1.84788603 -19.707448 23.403220 -31.118153 34.813925
## Nov 2024 1.84788603 -20.082558 23.778330 -31.691833 35.387606
## Dec 2024 1.84788603 -20.451358 24.147130 -32.255865 35.951637
plot(cases_forecast, ylab="√ of Cases", xlab="Time (years)", lwd=1)
legend("topleft", c("Measles Cases", "Forecast", "80% CI", "95% CI"),
cex=0.6, col=c(1,4,5,8), lty=c(1,1,1,2))
# Residual check
checkresiduals(cases_fit)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)(0,0,1)[12]
## Q* = 27.214, df = 15, p-value = 0.02704
##
## Model df: 2. Total lags used: 17