DATA - setting up database from step 2 script (result)
result <- read_excel("result_after_step2.xlsx")
result$date_ym <- as.Date(result$date_ym)
# filter out september (NA because of delta)
result <- result[!(format(result$date_ym, "%Y-%m-%d") == "2022-09-01"),]
#new data frame with national sums
monthly_sums_national <- result %>%
group_by(date_ym) %>%
summarise(
monthly_total_blue = sum(m_n_blue, na.rm = TRUE),
monthly_total_green = sum(m_n_green, na.rm = TRUE),
monthly_total_yellow = sum(m_n_yellow, na.rm = TRUE),
monthly_total_white = sum(m_n_white, na.rm = TRUE),
monthly_total_red = sum(m_n_red, na.rm = TRUE),
monthly_total_orange = sum(m_n_orange, na.rm = TRUE),
monthly_total_notriage = sum(m_n_nt, na.rm = TRUE),
monthly_national = sum(m_n_blue + m_n_green + m_n_yellow +
m_n_white + m_n_red + m_n_orange +
m_n_nt, na.rm = TRUE),
MNU_gb = sum(m_n_blue + m_n_green, na.rm = TRUE),
MNU_gbw = sum(m_n_blue + m_n_green + m_n_white, na.rm = TRUE),
MNU_bw = sum(m_n_blue + m_n_white, na.rm = TRUE)
)
monthly_sums_national$national_population <- 10639726 # resident population 2023 from INE
# normalise per inhabitant (for plots)
monthly_sums_national <- monthly_sums_national %>%
group_by(date_ym) %>%
mutate(
ALL_national = monthly_national/national_population*10^5,
MNUGr_nat = MNU_gbw/national_population*10^5,
MNU_nat = MNU_bw/national_population*10^5,
MNU_gb_nat = MNU_gb/national_population*10^5)
# variable time t = 1,n
monthly_sums_national <- monthly_sums_national %>%
ungroup() %>% # important step!!
arrange(date_ym) %>% # sort by date
mutate(t = row_number()) %>% # assign t starting from 1
ungroup()
monthly_sums_national <- monthly_sums_national %>%
mutate(
year = as.numeric(format(date_ym, "%Y")),
month = as.numeric(format(date_ym, "%m")))
## Seasonality: Serfling Model (sine cosine)
#Serfling Model - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1915276/
monthly_sums_national <- monthly_sums_national %>%
ungroup() %>%
mutate(
sin12 = sin(2 * pi * t / 12),
cos12 = cos(2 * pi * t / 12))
# intervention and trend
monthly_sums_national$int <- ifelse(monthly_sums_national$year >= 2024, 1, 0)
monthly_sums_national$trend <- pmax(0, monthly_sums_national$t - 16 + 1)
models - green blue white
# model 1: simple LM
m_nat_ols <- lm(MNU_gbw~offset(log(national_population))+t+int+trend+sin12+cos12, monthly_sums_national)
summary(m_nat_ols)
##
## Call:
## lm(formula = MNU_gbw ~ offset(log(national_population)) + t +
## int + trend + sin12 + cos12, data = monthly_sums_national)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15156 -5959 -1066 7641 15392
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 225788.828 5363.050 42.101 < 2e-16 ***
## t -2339.260 591.464 -3.955 0.00059 ***
## int 14713.913 7369.259 1.997 0.05732 .
## trend 3.574 836.456 0.004 0.99663
## sin12 -3223.142 2598.888 -1.240 0.22689
## cos12 8526.998 2598.888 3.281 0.00315 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9604 on 24 degrees of freedom
## Multiple R-squared: 0.7655, Adjusted R-squared: 0.7166
## F-statistic: 15.67 on 5 and 24 DF, p-value: 6.967e-07
monthly_sums_national$pred.cases_gbw1 <- predict(m_nat_ols, type="response") #predict cases according to the model
monthly_sums_national$pred_gbw1 <- predict(m_nat_ols, type="response")/(monthly_sums_national$national_population)*10^5
# model 2: different way of doing neg bionomial (minimal AIC diff but v different results)
m_nat_nb2 <- glmmTMB(MNU_gbw ~ t + int + trend + sin12 + cos12
+ offset(log(national_population)),
data = monthly_sums_national,
family = nbinom2)
monthly_sums_national$pred.cases_gbw21 <- predict(m_nat_nb2, type="response") #predict cases according to the model
monthly_sums_national$pred_gbw21 <- predict(m_nat_nb2, type="response")/(monthly_sums_national$national_population)*10^5
# no need for lags' corrections for autocorrelation - ACF plots seem okay (I think)
# model 3: negative binomial w/ lead and lags (ll models)
monthly_sums_national <- monthly_sums_national %>%
mutate(
# Leads (anticipatory effects)
int_lead2 = ifelse(t >= 14, 1, 0), # t=14,15 before intervention
int_lead1 = ifelse(t >= 15, 1, 0), # t=15 just before intervention
# Main intervention
int = ifelse(t >= 16, 1, 0), # t=16 onwards
# Lags (delayed effects)
int_lag1 = ifelse(t >= 17, 1, 0), # t=17 onwards (1 month delay)
int_lag2 = ifelse(t >= 18, 1, 0), # t=18 onwards (2 month delay)
int_lag3 = ifelse(t >= 19, 1, 0), # t=19 onwards (3 month delay)
# Post-intervention trend
trend = ifelse(t >= 16, t - 16 + 1, 0)) # Counts months since intervention
m_nat_nbll <- glmmTMB(MNU_gbw ~ t + int + trend + sin12 + cos12
+ int_lead2 + int_lead1 + int_lag1 + int_lag2 + int_lag3
+ offset(log(national_population)),
data = monthly_sums_national,
family = nbinom2)
monthly_sums_national$pred.cases_gbw3 <- predict(m_nat_nbll, type="response") #predict cases according to the model
monthly_sums_national$pred_gbw3 <- predict(m_nat_nbll, type="response")/(monthly_sums_national$national_population)*10^5
summaries and ACF plots
summary(m_nat_ols)
##
## Call:
## lm(formula = MNU_gbw ~ offset(log(national_population)) + t +
## int + trend + sin12 + cos12, data = monthly_sums_national)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15156 -5959 -1066 7641 15392
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 225788.828 5363.050 42.101 < 2e-16 ***
## t -2339.260 591.464 -3.955 0.00059 ***
## int 14713.913 7369.259 1.997 0.05732 .
## trend 3.574 836.456 0.004 0.99663
## sin12 -3223.142 2598.888 -1.240 0.22689
## cos12 8526.998 2598.888 3.281 0.00315 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9604 on 24 degrees of freedom
## Multiple R-squared: 0.7655, Adjusted R-squared: 0.7166
## F-statistic: 15.67 on 5 and 24 DF, p-value: 6.967e-07
acf(residuals(m_nat_ols), main = "OLS Model - ACF of Residuals")

summary(m_nat_nb2)
## Family: nbinom2 ( log )
## Formula:
## MNU_gbw ~ t + int + trend + sin12 + cos12 + offset(log(national_population))
## Data: monthly_sums_national
##
## AIC BIC logLik -2*log(L) df.resid
## 642.9 652.7 -314.4 628.9 23
##
##
## Dispersion parameter for nbinom2 family (): 515
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.850694 0.024485 -157.27 < 2e-16 ***
## t -0.011207 0.002694 -4.16 3.19e-05 ***
## int 0.079708 0.033975 2.35 0.018972 *
## trend -0.002027 0.003853 -0.53 0.598755
## sin12 -0.016989 0.011939 -1.42 0.154732
## cos12 0.044919 0.011966 3.75 0.000174 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(residuals(m_nat_nb2, type = "pearson"), main = "Negative Binomial - ACF of Residuals")

summary(m_nat_nbll)
## Family: nbinom2 ( log )
## Formula:
## MNU_gbw ~ t + int + trend + sin12 + cos12 + int_lead2 + int_lead1 +
## int_lag1 + int_lag2 + int_lag3 + offset(log(national_population))
## Data: monthly_sums_national
##
## AIC BIC logLik -2*log(L) df.resid
## 641.5 658.3 -308.7 617.5 18
##
##
## Dispersion parameter for nbinom2 family (): 754
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.850843 0.025158 -153.07 < 2e-16 ***
## t -0.011800 0.003290 -3.59 0.000335 ***
## int -0.057226 0.051999 -1.10 0.271102
## trend -0.008204 0.005960 -1.38 0.168661
## sin12 -0.002726 0.014745 -0.18 0.853315
## cos12 0.040632 0.011135 3.65 0.000263 ***
## int_lead2 -0.009236 0.048290 -0.19 0.848318
## int_lead1 0.062488 0.052288 1.20 0.232057
## int_lag1 0.068794 0.052237 1.32 0.187858
## int_lag2 0.079803 0.052546 1.52 0.128828
## int_lag3 0.013770 0.048775 0.28 0.777706
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(residuals(m_nat_nbll, type = "pearson"), main = "NB with A/D effects - ACF of Residuals")

# MODEL 1 OLS
plot(monthly_sums_national$t, monthly_sums_national$MNUGr_nat, # green blue white normalised per population
ylab = "ED visits per 100000 residents",
ylim = c(500, 3000),
xlab = "months",
col = "steelblue",
xaxt = "n",
cex.lab = 0.8, cex.axis = 0.8, cex.main = 0.9)
ticks <- seq(1, max(monthly_sums_national$t), by = 12)
axis(1, at = ticks, labels = ticks, cex.axis = 0.7)
abline(v = 16, lty = 2)
lines(monthly_sums_national$t[1:16],monthly_sums_national$pred_gbw21[1:16],col="steelblue")
lines(monthly_sums_national$t[16:30],monthly_sums_national$pred_gbw21[16:30],col="steelblue")
# trend line if no intervention had occurred
newdata <- monthly_sums_national
newdata$int <- newdata$trend <- 0 #assumes no intervention
newdata$pred.cases_gbw21 <- predict(m_nat_ols,newdata, type="response")/(newdata$national_population)*10^5
lines(newdata$t[16:30],newdata$pred.cases_gbw21[16:30],col="grey",lty=2)

# MODEL 2 NB
plot(monthly_sums_national$t, monthly_sums_national$MNUGr_nat, # green blue white normalised per population
ylab = "ED visits per 100000 residents",
ylim = c(0, 3000),
xlab = "months",
col = "steelblue",
xaxt = "n",
cex.lab = 0.8, cex.axis = 0.8, cex.main = 0.9)
ticks <- seq(1, max(monthly_sums_national$t), by = 5)
axis(1, at = ticks, labels = ticks, cex.axis = 0.7)
abline(v = 16, lty = 2)
lines(monthly_sums_national$t[1:16],monthly_sums_national$pred_gbw21[1:16],col="steelblue")
lines(monthly_sums_national$t[16:30],monthly_sums_national$pred_gbw21[16:30],col="steelblue")
# trend line if no intervention had occurred
newdata <- monthly_sums_national
newdata$int <- newdata$trend <- 0 #assumes no intervention
newdata$pred.cases_gbw21 <- predict(m_nat_nb2,newdata, type="response")/(newdata$national_population)*10^5
lines(newdata$t[16:30],newdata$pred.cases_gbw21[16:30],col="grey",lty=2)

# MODEL 3 NB LagsLeads
plot(monthly_sums_national$t, monthly_sums_national$MNUGr_nat, # green blue white normalised per population
ylab = "ED visits per 100000 residents",
ylim = c(0, 3000),
xlab = "months",
col = "steelblue",
xaxt = "n",
cex.lab = 0.8, cex.axis = 0.8, cex.main = 0.9)
ticks <- seq(1, max(monthly_sums_national$t), by = 3)
axis(1, at = ticks, labels = ticks, cex.axis = 0.7)
abline(v = 16, lty = 2)
lines(monthly_sums_national$t[1:16],monthly_sums_national$pred_gbw3[1:16],col="steelblue")
lines(monthly_sums_national$t[16:30],monthly_sums_national$pred_gbw3[16:30],col="steelblue")
# trend line if no intervention had occurred
newdata <- monthly_sums_national
newdata$int <- newdata$trend <- 0 #assumes no intervention
newdata$pred.cases_gbw3 <- predict(m_nat_nbll,newdata, type="response")/(newdata$national_population)*10^5
lines(newdata$t[16:30],newdata$pred.cases_gbw3[16:30],col="grey",lty=2)
