library(readxl)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tseries)
# Load data
setwd("C:/Users/favou/OneDrive/Documents")
Data <- read_excel("icammda_pricemeaslesdata22.xls")
head(Data)
## # A tibble: 6 × 4
##   `Time(Month)`  Year Month `Jere LGA, Borno State`
##           <dbl> <dbl> <chr>                   <dbl>
## 1             1  2016 Jan                        12
## 2             2  2016 Feb                         7
## 3             3  2016 Mar                         1
## 4             4  2016 Apr                        17
## 5             5  2016 May                        50
## 6             6  2016 Jun                         2
# Extract series
Data_2 <- Data$`Jere LGA, Borno State`

Creates a monthly date sequence from January 2016 to match the number of observations in the dataset.

# Date sequence
dates_data <- seq(as.Date("2016/1/1"), by="month", length.out=nrow(Data))

Plots the original (raw) time series of measles cases over time to visualize trends and fluctuations.

# Plot raw data
plot(Data$`Jere LGA, Borno State` ~ dates_data, type='l', col='red',
     xlab="Time (months)", ylab="Measles Cases",
     main="Time Series Plot of Measles Cases in Jere LGA, Borno State")

Performs the Augmented Dickey-Fuller (ADF) test to check if the time series is stationary (i.e., constant mean and variance over time).

# Stationarity test
adf.test(Data_2)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Data_2
## Dickey-Fuller = -3.8246, Lag order = 4, p-value = 0.02172
## alternative hypothesis: stationary

Removes trend (non-stationarity) by differencing: Takes first difference of the series (diff()). Tests stationarity again with adf.test(). Plots the differenced series.

# Differencing
Measles_case_d1 <- diff(Data_2)
adf.test(Measles_case_d1)
## Warning in adf.test(Measles_case_d1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Measles_case_d1
## Dickey-Fuller = -5.0428, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
plot(Measles_case_d1, main="Differenced Measles Cases", type='l')

Plots ACF and PACF of differenced data to identify possible ARIMA model parameters (AR, MA).

acf(Measles_case_d1, main="ACF - Differenced Series")

pacf(Measles_case_d1, main="PACF - Differenced Series")

#Converts the data into a time series object
ts_measles <- ts(Data_2, start=c(2016,1), frequency=12)
ts_measles
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2016  12   7   1  17  50   2   8 166 131  59  39   9
## 2017   0   4   0   1   1   0   0   0   1   0   2   6
## 2018   0   2   2   1   2   3   0   0   2   0  11 244
## 2019 343 474 658 468  56  34   0   3   0   1   1   1
## 2020   1   0  41  30 546 275 310  58 262  87  53 178
## 2021  26 205 123 138 174 352 242 271  46  79  90  73
## 2022 157  84 233 134 162  83 138 158   1   0   2   0
# Decomposition
decomposed <- decompose(ts_measles)
plot(decomposed)

auto.arima(sqrt(ts_measles))
## Series: sqrt(ts_measles) 
## ARIMA(1,1,0) 
## 
## Coefficients:
##           ar1
##       -0.2335
## s.e.   0.1061
## 
## sigma^2 = 21.87:  log likelihood = -245.33
## AIC=494.66   AICc=494.81   BIC=499.5

Fits an ARIMA(1,1,0)(0,0,1) model to the square-root-transformed series:

(1,1,0) = AR(1), differenced once, no MA component.

Seasonal part (0,0,1) = one seasonal MA term.

The square root transformation stabilizes variance.

# ARIMA fit
cases_fit <- arima(x=sqrt(ts_measles), order=c(1,1,0),
                   seasonal=list(order=c(0,0,1)))
summary(cases_fit)
## 
## Call:
## arima(x = sqrt(ts_measles), order = c(1, 1, 0), seasonal = list(order = c(0, 
##     0, 1)))
## 
## Coefficients:
##           ar1     sma1
##       -0.1856  -0.1842
## s.e.   0.1115   0.1256
## 
## sigma^2 estimated as 20.98:  log likelihood = -244.3,  aic = 494.59
## 
## Training set error measures:
##                       ME   RMSE      MAE MPE MAPE      MASE       ACF1
## Training set -0.02615164 4.5526 3.078198 NaN  Inf 0.9517179 0.01862956
# Forecast
cases_forecast <- forecast(cases_fit, h=24)
cases_forecast
##          Point Forecast      Lo 80     Hi 80      Lo 95     Hi 95
## Jan 2023    -0.20326985  -6.072703  5.666163  -9.179793  8.773253
## Feb 2023     0.05873514  -7.510793  7.628263 -11.517859 11.635329
## Mar 2023    -0.99065818 -10.052644  8.071328 -14.849770 12.868454
## Apr 2023    -0.32484884 -10.648064  9.998366 -16.112844 15.463146
## May 2023    -0.68124201 -12.130561 10.768077 -18.191464 16.828980
## Jun 2023    -0.15975139 -12.633413 12.313910 -19.236570 18.917067
## Jul 2023    -0.53597673 -13.956108 12.884155 -21.060297 19.988343
## Aug 2023    -0.65782193 -14.961919 13.646275 -22.534051 21.218407
## Sep 2023     1.74930633 -13.387224 16.885836 -21.400018 24.898631
## Oct 2023     1.90410117 -14.021409 17.829611 -22.451864 26.260067
## Nov 2023     1.63539480 -15.041811 18.312601 -23.870191 27.140980
## Dec 2023     1.88733016 -15.509121 19.283782 -24.718247 28.492907
## Jan 2024     1.84056413 -15.981054 19.662183 -25.415249 29.096378
## Feb 2024     1.84924517 -16.431257 20.129747 -26.108370 29.806860
## Mar 2024     1.84763373 -16.872452 20.567719 -26.782267 30.477534
## Apr 2024     1.84793286 -17.303106 20.998971 -27.441053 31.136919
## May 2024     1.84787733 -17.724363 21.420117 -28.085280 31.781035
## Jun 2024     1.84788764 -18.136726 21.832502 -28.715942 32.411717
## Jul 2024     1.84788573 -18.540755 22.236526 -29.333849 33.029621
## Aug 2024     1.84788608 -18.936930 22.632703 -29.939748 33.635520
## Sep 2024     1.84788602 -19.325695 23.021467 -30.534311 34.230083
## Oct 2024     1.84788603 -19.707448 23.403220 -31.118153 34.813925
## Nov 2024     1.84788603 -20.082558 23.778330 -31.691833 35.387606
## Dec 2024     1.84788603 -20.451358 24.147130 -32.255865 35.951637
plot(cases_forecast, ylab="√ of Cases", xlab="Time (years)", lwd=1)
legend("topleft", c("Measles Cases", "Forecast", "80% CI", "95% CI"),
       cex=0.6, col=c(1,4,5,8), lty=c(1,1,1,2))

# Residual check
checkresiduals(cases_fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)(0,0,1)[12]
## Q* = 27.214, df = 15, p-value = 0.02704
## 
## Model df: 2.   Total lags used: 17