1 Introduction

2 Causes and Symptoms

2.1 Causes

  • Airborne spread: via droplets from coughs or sneezes of an infected person
  • A person is contagious from 4 days before and after the rash appears.

2.2 Symptoms

Symptoms usually appear 10–14 days after exposure and this progress in stages

  • Early stage (Prodromal) High fever, Cough, Runny nose

  • Second satge (Rash Phase) Red, blotchy rash starts on the face and behind ears, then spreads down to trunk, arms, and legs

  • Third stage (Convalescent Phase) Person becomes less contagious after rash fades

3 Aims

The aim of this study is to examine historical measles surveillance data in Nigeria to identify key trends, patterns, and seasonal variations in incidence,

4 Objectives

5 Research Question

6 Reference

7 Time Series and Forecasting

7.1 Loading Data and Setup

# Require Package
library(zoo)
## Warning: package 'zoo' was built under R version 4.5.1
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.1
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.1
jere_data <- read.csv(file="~/OLUWABUKUNMI_TAFA__Standard_Slip/icammda_pricemeaslesdata.csv")
jere_data
##    Time.Month. Jere
## 1            1   12
## 2            2    7
## 3            3    1
## 4            4   17
## 5            5   50
## 6            6    2
## 7            7    8
## 8            8  166
## 9            9  131
## 10          10   59
## 11          11   39
## 12          12    9
## 13          13    0
## 14          14    4
## 15          15    0
## 16          16    1
## 17          17    1
## 18          18    0
## 19          19    0
## 20          20    0
## 21          21    1
## 22          22    0
## 23          23    2
## 24          24    6
## 25          25    0
## 26          26    2
## 27          27    2
## 28          28    1
## 29          29    2
## 30          30    3
## 31          31    0
## 32          32    0
## 33          33    2
## 34          34    0
## 35          35   11
## 36          36  244
## 37          37  343
## 38          38  474
## 39          39  658
## 40          40  468
## 41          41   56
## 42          42   34
## 43          43    0
## 44          44    3
## 45          45    0
## 46          46    1
## 47          47    1
## 48          48    1
## 49          49    1
## 50          50    0
## 51          51   41
## 52          52   30
## 53          53  546
## 54          54  275
## 55          55  310
## 56          56   58
## 57          57  262
## 58          58   87
## 59          59   53
## 60          60  178
## 61          61   26
## 62          62  205
## 63          63  123
## 64          64  138
## 65          65  174
## 66          66  352
## 67          67  242
## 68          68  271
## 69          69   46
## 70          70   79
## 71          71   90
## 72          72   73
## 73          73  157
## 74          74   84
## 75          75  233
## 76          76  134
## 77          77  162
## 78          78   83
## 79          79  138
## 80          80  158
## 81          81    1
## 82          82    0
## 83          83    2
## 84          84    0
names(jere_data)
## [1] "Time.Month." "Jere"
names(jere_data) <- c("Years", "Cases")
names(jere_data)
## [1] "Years" "Cases"
# to inspect our data
head(jere_data)
##   Years Cases
## 1     1    12
## 2     2     7
## 3     3     1
## 4     4    17
## 5     5    50
## 6     6     2
tail(jere_data)
##    Years Cases
## 79    79   138
## 80    80   158
## 81    81     1
## 82    82     0
## 83    83     2
## 84    84     0
str(jere_data)
## 'data.frame':    84 obs. of  2 variables:
##  $ Years: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Cases: int  12 7 1 17 50 2 8 166 131 59 ...
# Extracts the `Cases` column
jere_data$Cases
##  [1]  12   7   1  17  50   2   8 166 131  59  39   9   0   4   0   1   1   0   0
## [20]   0   1   0   2   6   0   2   2   1   2   3   0   0   2   0  11 244 343 474
## [39] 658 468  56  34   0   3   0   1   1   1   1   0  41  30 546 275 310  58 262
## [58]  87  53 178  26 205 123 138 174 352 242 271  46  79  90  73 157  84 233 134
## [77] 162  83 138 158   1   0   2   0
Cases <- jere_data$Cases

setwd(“C:/Users/User/Desktop/Mahidol-Oxford Bangkok”)

7.2 Assign Monthly Dates

# Create a proper monthly time series index 
# date
jere_data$Years <- seq(as.Date("2016-01-01"), by = "month", length.out = 84)
# * Replaces `Years` column with **monthly dates** starting from Jan 2016, for 84 months (7 years).

Creating a proper monthly time series index

7.3 Plotting Raw Data

# **Basic Plot**
plot(Cases~jere_data$Years, data=jere_data, col="black",xlab="Years",
     type='l', lwd=3)

# Plot
plot(Cases~Years, data=jere_data,col='red', xlab = "Years",type='l', lwd=3, main="Time Series plot of Measles Cases in Jere LGA (Borno State)")

7.4 Stationarity Check

# Test
adf.test(jere_data$Cases)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  jere_data$Cases
## Dickey-Fuller = -3.8246, Lag order = 4, p-value = 0.02172
## alternative hypothesis: stationary
# adf.test means Augmented Dickey–Fuller test
# it's use to check if the series is stationary (p-value < 0.05 = stationary).
  • if the p-value is greater than 0.05 then it’s stationary(no need to difference)

  • if the p-value is less than 0.05 then it’s not stationary(difference needed)

7.5 Differences

# Mealse difference 1
Mealse_case<-diff(jere_data$Cases)
adf.test(Mealse_case)
## Warning in adf.test(Mealse_case): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Mealse_case
## Dickey-Fuller = -5.0428, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# If p-value < 0.05 the series is stationary.
# If p-value > 0.05 the series is non-stationary (you may need differencing)

To get second difference, you must difference again Second mealse difference Mealse_case_d2<-diff(jere_data$Cases) adf.test(Mealse_case_d2)

7.6 Plotting Difference and ACF/PACF

# plot
par(mfrow=c(1,1))
plot(Mealse_case, main="Differences of Mealse Cases", type='l')

acf(Mealse_case, main="ACF of  Measles cases in Jere LGA (Borno State)", lag.max=10, ylim=c(-1,1))

pacf(Mealse_case, main="PACF of  Measles cases in Jere LGA (Borno State)", lag.max=10, ylim=c(-1,1))

# ACF means (Autocorrelation Function)
# PACF means partial Autocorrelation Function
# lag.max: this tell r how many lags to calculate and displays
# it's use to sets the maximum lag (how many time steps back) to compute autocorrelation/partial autocorrelation`

We use this to guild ARIMA, where ACF is for Moving Average,MA(q) PACF for Auto Regressive,AR(p)

7.7 Time Series Object

jere_data$Year <- seq(as.Date("2016-01-01"), by = "month", length.out = 84)

# ARIMA fit
Cases_1<-ts(c(Cases), start = c(2016, 1), frequency = 12)
Cases_1
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2016  12   7   1  17  50   2   8 166 131  59  39   9
## 2017   0   4   0   1   1   0   0   0   1   0   2   6
## 2018   0   2   2   1   2   3   0   0   2   0  11 244
## 2019 343 474 658 468  56  34   0   3   0   1   1   1
## 2020   1   0  41  30 546 275 310  58 262  87  53 178
## 2021  26 205 123 138 174 352 242 271  46  79  90  73
## 2022 157  84 233 134 162  83 138 158   1   0   2   0

7.8 Manual ARIMA Fit

cases_fit<-arima(x=sqrt(Cases_1), order=c(1,1,0),           seasonal=list(order=c(0,0,1)))
cases_fit
## 
## Call:
## arima(x = sqrt(Cases_1), order = c(1, 1, 0), seasonal = list(order = c(0, 0, 
##     1)))
## 
## Coefficients:
##           ar1     sma1
##       -0.1856  -0.1842
## s.e.   0.1115   0.1256
## 
## sigma^2 estimated as 20.98:  log likelihood = -244.3,  aic = 494.59
# Square-root transformation helps stabilize variance.

fitted(cases_fit)
##              Jan         Feb         Mar         Apr         May         Jun
## 2016  3.46063752  3.43657951  2.76792944  1.35209397  3.60171389  6.43934270
## 2017  3.51832060  0.68215334  1.94786134 -0.12998953  0.18647282  1.90906526
## 2018  2.89469717  0.21173477  1.51021570  1.20622556  0.92716190  1.68870469
## 2019 13.86949987 17.76056438 21.18569735 24.96925895 22.28949147 10.10195182
## 2020  0.14353344  0.26133635 -0.63679996  5.82888185  8.37581337 20.83243222
## 2021 12.05874766  6.67720566 11.31008355 11.75436931  8.86469650 13.70549699
## 2022 10.00072749 10.38295996  9.83018380 14.13345773 11.46380443 11.58291625
##              Jul         Aug         Sep         Oct         Nov         Dec
## 2016  2.47030033  2.73655963 11.02454177 11.64605423  8.34372386  6.45833386
## 2017  0.12069789 -1.83685180 -0.07564563  1.53161365  0.56627637  1.77805729
## 2018  1.69523512 -0.01657220 -0.19795121  1.43357620  0.10650902  2.57762053
## 2019  6.44986109  1.07933127  1.11364256  0.58551938  0.22320719 -1.40194093
## 2020 19.03013974 17.29658622  9.67547099 14.51913960 10.45754713  7.21779561
## 2021 17.98969977 17.93416856 15.09488943  9.53527231  9.08244826  8.24793295
## 2022 10.23006530 11.52896022 13.94798088  3.26683586  0.11115530  1.09717255
# fitted it gives you the predicted values 

7.9 Auto ARIMA

# ARIMA Model
arima_model <- auto.arima(sqrt(Cases_1), ic = "aic", trace =T)
## 
##  ARIMA(2,1,2)(1,0,1)[12] with drift         : 499.4077
##  ARIMA(0,1,0)            with drift         : 499.3606
##  ARIMA(1,1,0)(1,0,0)[12] with drift         : 496.8347
##  ARIMA(0,1,1)(0,0,1)[12] with drift         : 497.1494
##  ARIMA(0,1,0)                               : 497.3669
##  ARIMA(1,1,0)            with drift         : 496.6547
##  ARIMA(1,1,0)(0,0,1)[12] with drift         : 496.5915
##  ARIMA(1,1,0)(1,0,1)[12] with drift         : 498.4702
##  ARIMA(1,1,0)(0,0,2)[12] with drift         : 498.4692
##  ARIMA(1,1,0)(1,0,2)[12] with drift         : Inf
##  ARIMA(0,1,0)(0,0,1)[12] with drift         : 497.3204
##  ARIMA(2,1,0)(0,0,1)[12] with drift         : 497.5704
##  ARIMA(1,1,1)(0,0,1)[12] with drift         : 497.8492
##  ARIMA(2,1,1)(0,0,1)[12] with drift         : 499.5704
##  ARIMA(1,1,0)(0,0,1)[12]                    : 494.5929
##  ARIMA(1,1,0)                               : 494.6626
##  ARIMA(1,1,0)(1,0,1)[12]                    : 496.4714
##  ARIMA(1,1,0)(0,0,2)[12]                    : 496.4702
##  ARIMA(1,1,0)(1,0,0)[12]                    : 494.8373
##  ARIMA(1,1,0)(1,0,2)[12]                    : 498.464
##  ARIMA(0,1,0)(0,0,1)[12]                    : 495.3209
##  ARIMA(2,1,0)(0,0,1)[12]                    : 495.5724
##  ARIMA(1,1,1)(0,0,1)[12]                    : 495.8514
##  ARIMA(0,1,1)(0,0,1)[12]                    : 495.1504
##  ARIMA(2,1,1)(0,0,1)[12]                    : 497.5724
## 
##  Best model: ARIMA(1,1,0)(0,0,1)[12]
arima_model
## Series: sqrt(Cases_1) 
## ARIMA(1,1,0)(0,0,1)[12] 
## 
## Coefficients:
##           ar1     sma1
##       -0.1856  -0.1842
## s.e.   0.1115   0.1256
## 
## sigma^2 = 21.49:  log likelihood = -244.3
## AIC=494.59   AICc=494.9   BIC=501.85
# Akaike Information criterion
# this is uae to find the best models

# ARIMA model with the lowest AIC
summary(arima_model)
## Series: sqrt(Cases_1) 
## ARIMA(1,1,0)(0,0,1)[12] 
## 
## Coefficients:
##           ar1     sma1
##       -0.1856  -0.1842
## s.e.   0.1115   0.1256
## 
## sigma^2 = 21.49:  log likelihood = -244.3
## AIC=494.59   AICc=494.9   BIC=501.85
## 
## Training set error measures:
##                       ME   RMSE      MAE MPE MAPE      MASE       ACF1
## Training set -0.02615164 4.5526 3.078198 NaN  Inf 0.4495227 0.01862956
# this tell how good the models is

# Compare the AIC and BIC values of different models
arima_model_aic <- auto.arima(sqrt(Cases_1), ic = "aic")
arima_model_bic <- auto.arima(sqrt(Cases_1), ic = "bic")
summary(arima_model_aic)
## Series: sqrt(Cases_1) 
## ARIMA(1,1,0)(0,0,1)[12] 
## 
## Coefficients:
##           ar1     sma1
##       -0.1856  -0.1842
## s.e.   0.1115   0.1256
## 
## sigma^2 = 21.49:  log likelihood = -244.3
## AIC=494.59   AICc=494.9   BIC=501.85
## 
## Training set error measures:
##                       ME   RMSE      MAE MPE MAPE      MASE       ACF1
## Training set -0.02615164 4.5526 3.078198 NaN  Inf 0.4495227 0.01862956
summary(arima_model_bic)
## Series: sqrt(Cases_1) 
## ARIMA(0,1,0) 
## 
## sigma^2 = 22.88:  log likelihood = -247.68
## AIC=497.37   AICc=497.42   BIC=499.79
## 
## Training set error measures:
##                       ME     RMSE      MAE  MPE MAPE      MASE       ACF1
## Training set -0.04119807 4.755071 3.195897 -Inf  Inf 0.4667107 -0.2360093
# Akaike Information criterion
# Bayesian Information Criterion

Testing Both AIC and BIC, for solid model comparison

7.10 Decomposition

# to decompose 
decomp <- stl(Cases_1, s.window="periodic")
plot(decomp)

This breaks down the series into trend, seasonal, and remainder

7.11 Forecasting

forecast<-forecast(cases_fit,h=36)
forecast
##          Point Forecast      Lo 80     Hi 80      Lo 95     Hi 95
## Jan 2023    -0.20326985  -6.072703  5.666163  -9.179793  8.773253
## Feb 2023     0.05873514  -7.510793  7.628263 -11.517859 11.635329
## Mar 2023    -0.99065818 -10.052644  8.071328 -14.849770 12.868454
## Apr 2023    -0.32484884 -10.648064  9.998366 -16.112844 15.463146
## May 2023    -0.68124201 -12.130561 10.768077 -18.191464 16.828980
## Jun 2023    -0.15975139 -12.633413 12.313910 -19.236570 18.917067
## Jul 2023    -0.53597673 -13.956108 12.884155 -21.060297 19.988343
## Aug 2023    -0.65782193 -14.961919 13.646275 -22.534051 21.218407
## Sep 2023     1.74930633 -13.387224 16.885836 -21.400018 24.898631
## Oct 2023     1.90410117 -14.021409 17.829611 -22.451864 26.260067
## Nov 2023     1.63539480 -15.041811 18.312601 -23.870191 27.140980
## Dec 2023     1.88733016 -15.509121 19.283782 -24.718247 28.492907
## Jan 2024     1.84056413 -15.981054 19.662183 -25.415249 29.096378
## Feb 2024     1.84924517 -16.431257 20.129747 -26.108370 29.806860
## Mar 2024     1.84763373 -16.872452 20.567719 -26.782267 30.477534
## Apr 2024     1.84793286 -17.303106 20.998971 -27.441053 31.136919
## May 2024     1.84787733 -17.724363 21.420117 -28.085280 31.781035
## Jun 2024     1.84788764 -18.136726 21.832502 -28.715942 32.411717
## Jul 2024     1.84788573 -18.540755 22.236526 -29.333849 33.029621
## Aug 2024     1.84788608 -18.936930 22.632703 -29.939748 33.635520
## Sep 2024     1.84788602 -19.325695 23.021467 -30.534311 34.230083
## Oct 2024     1.84788603 -19.707448 23.403220 -31.118153 34.813925
## Nov 2024     1.84788603 -20.082558 23.778330 -31.691833 35.387606
## Dec 2024     1.84788603 -20.451358 24.147130 -32.255865 35.951637
## Jan 2025     1.84788603 -20.814157 24.509929 -32.810718 36.506491
## Feb 2025     1.84788603 -21.171239 24.867011 -33.356828 37.052600
## Mar 2025     1.84788603 -21.522866 25.218638 -33.894595 37.590367
## Apr 2025     1.84788603 -21.869280 25.565052 -34.424390 38.120162
## May 2025     1.84788603 -22.210707 25.906479 -34.946557 38.642329
## Jun 2025     1.84788603 -22.547356 26.243128 -35.461417 39.157189
## Jul 2025     1.84788603 -22.879422 26.575194 -35.969268 39.665040
## Aug 2025     1.84788603 -23.207087 26.902859 -36.470389 40.166161
## Sep 2025     1.84788603 -23.530522 27.226294 -36.965040 40.660812
## Oct 2025     1.84788603 -23.849887 27.545659 -37.453466 41.149238
## Nov 2025     1.84788603 -24.165331 27.861103 -37.935896 41.631668
## Dec 2025     1.84788603 -24.476995 28.172767 -38.412545 42.108317
plot(forecast(cases_fit,h=36), ylab="Cases", xlab="Years",lwd=3)
legend("topleft", c("Measles cases","Forecast","80% EB","95% EB"), cex=0.5, col=c(1, 4,5,8),lty=c(1, 1, 1, 2))

+ The forecast is for 3 years ahead

  • The legend(80%EB and 95%EB) means Error Bands, for confidence intervals

7.12 Residual Check

# Check Model Residuals
# Plot the residuals of the ARIMA model
checkresiduals(arima_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)(0,0,1)[12]
## Q* = 27.214, df = 15, p-value = 0.02704
## 
## Model df: 2.   Total lags used: 17
# Check for autocorrelation in the residuals
acf(residuals(arima_model))

# Check for normality of the residuals
shapiro.test(residuals(arima_model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(arima_model)
## W = 0.93917, p-value = 0.0006229