# had to remove this from knitting, it was not working when I tried to knit, couldn't fix it. keeping it here just for reference.

clean_subset <- read_excel("clean_result_29LHU.xlsx")
# this file has 28 LHU - old LHU removed, as well as 24 (PPP) 11, 14, 19 ( > 5% missings))

demo <- read_excel("~/Library/CloudStorage/OneDrive-ensp.unl.pt/ENSP 2024/ULS_paper/LHU_demograph_SES.xlsx")
# this file has confounders - telephone triage, 65+, mpp_weighted.
demo$telephone <- ym(demo$telephone)  # "YYYY-MM" recognised

clean_result <- inner_join( clean_subset, demo,
                            by = c("id_number"))

clean_result$impl <- ifelse(
  clean_result$tel != 0 & clean_result$t >= clean_result$tel,
  1,0)

# additional gp coverage
# summarise the totals (check script4 for full code)
df_sums <- gp_cov %>%
  group_by(id_number, date_ym) %>%
  summarise(
    utentes_insc = sum(utentes_inscritos, na.rm = TRUE),
    utentes_gp = sum(utentes_mdf, na.rm = TRUE),
    .groups = "drop")

clean_result <- clean_result %>%
  left_join( # left join is better bc inner_join will drop unmatched vars 
    df_sums,
    by = c("id_number", "date_ym")
  )

# variables
clean_result <- clean_result %>%
  arrange(id_number, t) %>%
  group_by(id_number) %>%
  mutate(
    p_gp_cov = utentes_gp / utentes_insc,
    p_gv_cov_ca = utentes_gp / catch_area
  ) %>%
  ungroup()

# variables
clean_result <- clean_result %>%
  arrange(id_number, t) %>%
  group_by(id_number) %>%
  mutate(
    lag1 = lag(wbg, 1),
    lag2 = lag(wbg, 2),
    lag3 = lag(wbg, 3),
    lag4 = lag(wbg, 4),
    lag5 = lag(wbg, 5)) %>%
  ungroup()

# creating leads and lags
clean_result <- clean_result %>%
  mutate(
    # Leads (anticipatory effects)
    int_lead2 = ifelse(t >= 14, 1, 0),  # t=14,15 before intervention
    int_lead1 = ifelse(t >= 15, 1, 0),  # t=15 just before intervention
    # Lags (delayed effects)
    int_lag1 = ifelse(t >= 17, 1, 0),   # t=17 onwards (1 month delay)
    int_lag2 = ifelse(t >= 18, 1, 0),   # t=18 onwards (2 month delay)
    int_lag3 = ifelse(t >= 19, 1, 0))   # t=19 onwards (3 month delay)

# lags for autocorrelation
clean_result$month <- factor((clean_result$t - 1) %% 12 + 1) # different approach to seasonality
clean_result$lag1 <- dplyr::lag(clean_result$wbg, 1)
clean_result$lag2 <- dplyr::lag(clean_result$wbg, 2)
clean_result$lag3 <- dplyr::lag(clean_result$wbg, 3)
clean_result$lag4 <- dplyr::lag(clean_result$wbg, 4)
clean_result$lag5 <- dplyr::lag(clean_result$wbg, 5)

# I was getting convergence errors with the latest model, so Claude suggested rescaling time variables (t and lags)
clean_result$tz <- scale(clean_result$t)
clean_result$lag1_z <- scale(clean_result$lag1)
clean_result$lag2_z <- scale(clean_result$lag2)
clean_result$lag3_z <- scale(clean_result$lag3)
clean_result$lag4_z <- scale(clean_result$lag4)
clean_result$lag5_z <- scale(clean_result$lag5)

write_xlsx(clean_result, "clean_result_full.xlsx")

models

clean_result<- read_excel("clean_result_full.xlsx")

m1 <- glmmTMB(
  wbg ~ tz +
    int_lead2 + int_lead1 + # anticipatory effects
    int +                    # reform
    int_lag1 + int_lag2 + int_lag3 +  # delayed effects
    trend +                  # slope
    sin12 + cos12 + 
    mpp_weighted + sixfive + impl + p_gp_cov + # confounders       
    lag1_z + lag3_z + lag5_z +lag2_z + lag4_z + # lags for autocorrelation
    (1 + tz | id_number) +       # random slopes
    offset(log(catch_area)),    #normalisation for catchment area
  family = nbinom2,   
  data = clean_result)
summary(m1)
##  Family: nbinom2  ( log )
## Formula:          
## wbg ~ tz + int_lead2 + int_lead1 + int + int_lag1 + int_lag2 +  
##     int_lag3 + trend + sin12 + cos12 + mpp_weighted + sixfive +  
##     impl + p_gp_cov + lag1_z + lag3_z + lag5_z + lag2_z + lag4_z +  
##     (1 + tz | id_number) + offset(log(catch_area))
## Data: clean_result
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   12755.8   12868.7   -6353.9   12707.8       793 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance Std.Dev. Corr  
##  id_number (Intercept) 0.052629 0.22941        
##            tz          0.004496 0.06705  -0.03 
## Number of obs: 817, groups:  id_number, 28
## 
## Dispersion parameter for nbinom2 family (): 94.2 
## 
## Conditional model:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.854e+00  5.150e-01  -7.483 7.24e-14 ***
## tz           -1.199e-01  2.057e-02  -5.830 5.55e-09 ***
## int_lead2    -4.947e-03  2.646e-02  -0.187 0.851657    
## int_lead1     5.648e-02  2.861e-02   1.974 0.048364 *  
## int          -8.454e-02  2.856e-02  -2.960 0.003073 ** 
## int_lag1      1.230e-01  2.877e-02   4.276 1.90e-05 ***
## int_lag2      1.128e-01  2.877e-02   3.921 8.80e-05 ***
## int_lag3     -3.936e-02  2.716e-02  -1.449 0.147272    
## trend         1.025e-03  3.527e-03   0.291 0.771386    
## sin12         8.972e-03  8.237e-03   1.089 0.276027    
## cos12         2.702e-02  6.412e-03   4.213 2.52e-05 ***
## mpp_weighted -4.550e-03  2.615e-03  -1.740 0.081917 .  
## sixfive       1.449e+00  1.454e+00   0.996 0.319216    
## impl         -1.543e-01  2.092e-02  -7.375 1.64e-13 ***
## p_gp_cov     -1.763e-01  1.855e-01  -0.951 0.341747    
## lag1_z        7.316e-02  1.299e-02   5.634 1.76e-08 ***
## lag3_z       -3.079e-02  1.340e-02  -2.298 0.021565 *  
## lag5_z        3.731e-02  1.068e-02   3.494 0.000475 ***
## lag2_z       -1.396e-03  1.370e-02  -0.102 0.918812    
## lag4_z       -5.817e-05  1.308e-02  -0.004 0.996452    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
diagnose(m1)
## Unusually large Z-statistics (|x|>5):
## 
##            (Intercept)                     tz                   impl 
##              -7.483399              -5.829646              -7.375385 
##                 lag1_z       disp~(Intercept) theta_1+tz|id_number.1 
##               5.634261              86.444265             -10.415132 
## theta_1+tz|id_number.2 
##             -18.483314 
## 
## Large Z-statistics (estimate/std err) suggest a *possible* failure of
## the Wald approximation - often also associated with parameters that are
## at or near the edge of their range (e.g. random-effects standard
## deviations approaching 0).  (Alternately, they may simply represent
## very well-estimated parameters; intercepts of non-centered models may
## fall in this category.) While the Wald p-values and standard errors
## listed in summary() may be unreliable, profile confidence intervals
## (see ?confint.glmmTMB) and likelihood ratio test p-values derived by
## comparing models (e.g. ?drop1) are probably still OK.  (Note that the
## LRT is conservative when the null value is on the boundary, e.g. a
## variance or zero-inflation value of 0 (Self and Liang 1987; Stram and
## Lee 1994; Goldman and Whelan 2000); in simple cases these p-values are
## approximately twice as large as they should be.)
clean_result <- clean_result %>%
  mutate(id_factor = factor(id_number))

m1_fe <- glmmTMB(
  wbg ~ tz +
    int_lead2 + int_lead1 + 
    int + int_lag1 + int_lag2 + int_lag3 +
    trend + sin12 + cos12 +
    impl + p_gp_cov +
    lag1_z + lag3_z + lag5_z + lag2_z + lag4_z +
    factor(id_number) +
    offset(log(catch_area)), 
  family = nbinom2,
  data = clean_result,
  control = glmmTMBControl(optCtrl = list(iter.max = 1e5, eval.max = 1e5))  # increases number of iterations (was getting convergence issues)
)

summary(m1_fe)
##  Family: nbinom2  ( log )
## Formula:          
## wbg ~ tz + int_lead2 + int_lead1 + int + int_lag1 + int_lag2 +  
##     int_lag3 + trend + sin12 + cos12 + impl + p_gp_cov + lag1_z +  
##     lag3_z + lag5_z + lag2_z + lag4_z + factor(id_number) + offset(log(catch_area))
## Data: clean_result
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   12831.1   13047.6   -6369.6   12739.1       771 
## 
## 
## Dispersion parameter for nbinom2 family (): 69.8 
## 
## Conditional model:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -3.830276   0.114762  -33.38  < 2e-16 ***
## tz                  -0.117220   0.018609   -6.30 2.99e-10 ***
## int_lead2           -0.000409   0.030623   -0.01 0.989344    
## int_lead1            0.060011   0.033147    1.81 0.070227 .  
## int                 -0.095667   0.033055   -2.89 0.003801 ** 
## int_lag1             0.120364   0.033306    3.61 0.000302 ***
## int_lag2             0.104663   0.033334    3.14 0.001690 ** 
## int_lag3            -0.064372   0.031354   -2.05 0.040062 *  
## trend                0.007156   0.003957    1.81 0.070516 .  
## sin12                0.005362   0.009525    0.56 0.573505    
## cos12                0.023552   0.007376    3.19 0.001407 ** 
## impl                -0.207586   0.020484  -10.13  < 2e-16 ***
## p_gp_cov             0.069541   0.182609    0.38 0.703338    
## lag1_z               0.115994   0.014592    7.95 1.88e-15 ***
## lag3_z              -0.030364   0.015565   -1.95 0.051088 .  
## lag5_z               0.031157   0.011768    2.65 0.008109 ** 
## lag2_z               0.003621   0.015909    0.23 0.819939    
## lag4_z               0.001539   0.015150    0.10 0.919105    
## factor(id_number)2  -0.543987   0.039937  -13.62  < 2e-16 ***
## factor(id_number)3  -0.695331   0.050419  -13.79  < 2e-16 ***
## factor(id_number)4  -0.478956   0.038266  -12.52  < 2e-16 ***
## factor(id_number)5  -0.675948   0.062416  -10.83  < 2e-16 ***
## factor(id_number)6  -0.360783   0.074970   -4.81 1.49e-06 ***
## factor(id_number)7   0.117127   0.033571    3.49 0.000485 ***
## factor(id_number)8  -0.470516   0.058242   -8.08 6.55e-16 ***
## factor(id_number)9  -0.096275   0.073359   -1.31 0.189391    
## factor(id_number)10  0.107496   0.036872    2.92 0.003553 ** 
## factor(id_number)12 -0.745397   0.075648   -9.85  < 2e-16 ***
## factor(id_number)13 -0.322332   0.066454   -4.85 1.23e-06 ***
## factor(id_number)15  0.029337   0.055769    0.53 0.598856    
## factor(id_number)16 -0.877649   0.073793  -11.89  < 2e-16 ***
## factor(id_number)17 -0.678045   0.071451   -9.49  < 2e-16 ***
## factor(id_number)18 -0.262211   0.056707   -4.62 3.77e-06 ***
## factor(id_number)20 -0.287002   0.037720   -7.61 2.77e-14 ***
## factor(id_number)21 -0.402118   0.072382   -5.56 2.77e-08 ***
## factor(id_number)22 -0.161550   0.069612   -2.32 0.020301 *  
## factor(id_number)23 -0.089708   0.073590   -1.22 0.222834    
## factor(id_number)25 -0.404961   0.037200  -10.89  < 2e-16 ***
## factor(id_number)26 -0.201514   0.036939   -5.46 4.89e-08 ***
## factor(id_number)27 -0.119299   0.065481   -1.82 0.068474 .  
## factor(id_number)28 -0.133340   0.043451   -3.07 0.002150 ** 
## factor(id_number)29 -0.338326   0.060827   -5.56 2.67e-08 ***
## factor(id_number)30 -0.418004   0.049695   -8.41  < 2e-16 ***
## factor(id_number)31 -0.370597   0.045891   -8.08 6.72e-16 ***
## factor(id_number)32 -0.161854   0.074303   -2.18 0.029384 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
diagnose(m1_fe)
## Unusually large Z-statistics (|x|>5):
## 
##         (Intercept)                  tz                impl              lag1_z 
##          -33.375868           -6.299158          -10.133882            7.948882 
##  factor(id_number)2  factor(id_number)3  factor(id_number)4  factor(id_number)5 
##          -13.621158          -13.791071          -12.516528          -10.829782 
##  factor(id_number)8 factor(id_number)12 factor(id_number)16 factor(id_number)17 
##           -8.078626           -9.853518          -11.893310           -9.489608 
## factor(id_number)20 factor(id_number)21 factor(id_number)25 factor(id_number)26 
##           -7.608817           -5.555484          -10.885957           -5.455350 
## factor(id_number)29 factor(id_number)30 factor(id_number)31    disp~(Intercept) 
##           -5.562117           -8.411473           -8.075523           84.460148 
## 
## Large Z-statistics (estimate/std err) suggest a *possible* failure of
## the Wald approximation - often also associated with parameters that are
## at or near the edge of their range (e.g. random-effects standard
## deviations approaching 0).  (Alternately, they may simply represent
## very well-estimated parameters; intercepts of non-centered models may
## fall in this category.) While the Wald p-values and standard errors
## listed in summary() may be unreliable, profile confidence intervals
## (see ?confint.glmmTMB) and likelihood ratio test p-values derived by
## comparing models (e.g. ?drop1) are probably still OK.  (Note that the
## LRT is conservative when the null value is on the boundary, e.g. a
## variance or zero-inflation value of 0 (Self and Liang 1987; Stram and
## Lee 1994; Goldman and Whelan 2000); in simple cases these p-values are
## approximately twice as large as they should be.)

plots

# predicted values from model m1 (latest, above)
clean_result$predicted <- predict(m1_fe, newdata = clean_result, type = "response")

# averages and SEs for predictions
avg_predictions <- aggregate(predicted ~ t, data = clean_result, mean, na.rm = TRUE)
se_predictions <- aggregate(predicted ~ t, data = clean_result, 
                            function(x) sd(x, na.rm = TRUE)/sqrt(length(x)))
avg_with_se <- merge(avg_predictions, se_predictions, by = "t", suffixes = c("_mean", "_se"))

# averages and SEs for observed data
avg_observed <- aggregate(wbg ~ t, data = clean_result, mean, na.rm = TRUE)
se_observed <- aggregate(wbg ~ t, data = clean_result, 
                         function(x) sd(x, na.rm = TRUE)/sqrt(length(x)))
obs_with_se <- merge(avg_observed, se_observed, by = "t", suffixes = c("_mean", "_se"))

# CI with SE
avg_with_se$upper_ci <- avg_with_se$predicted_mean + 1.96 * avg_with_se$predicted_se
avg_with_se$lower_ci <- avg_with_se$predicted_mean - 1.96 * avg_with_se$predicted_se
obs_with_se$upper_ci <- obs_with_se$wbg_mean + 1.96 * obs_with_se$wbg_se
obs_with_se$lower_ci <- obs_with_se$wbg_mean - 1.96 * obs_with_se$wbg_se

# pre / post int
int <- 16 
pre_pred <- avg_with_se[avg_with_se$t <= int, ]
post_pred <- avg_with_se[avg_with_se$t > int, ]
pre_obs <- obs_with_se[obs_with_se$t <= int, ]
post_obs <- obs_with_se[obs_with_se$t > int, ]

# ggplot
plot_df <- avg_with_se %>%
  rename(pred_mean = predicted_mean,
         pred_se = predicted_se,
         pred_lower = lower_ci,
         pred_upper = upper_ci) %>%
  left_join(obs_with_se %>%
              rename(obs_mean = wbg_mean,
                     obs_se = wbg_se,
                     obs_lower = lower_ci,
                     obs_upper = upper_ci), by = "t")

ggplot(plot_df, aes(x = t)) +
  geom_ribbon(aes(ymin = pred_lower, ymax = pred_upper), fill = "lightgrey", alpha = 0.4) + # predicted CI
  geom_ribbon(aes(ymin = obs_lower, ymax = obs_upper), fill = "lightblue", alpha = 0.4) + # obsv CI
  geom_line(aes(y = pred_mean), color = "darkgrey", linetype = "dashed", linewidth = 1) + # predicted line
  geom_line(aes(y = obs_mean), color = "steelblue", size = 1) + # observed line
  geom_vline(xintercept = int, linetype = "dotted", color = "grey", size = 1) +
  scale_x_continuous(breaks = seq(min(plot_df$t), max(plot_df$t), by = 2)) +
  labs(
    title = "Observed vs Predicted Non-Urgent ED Visits",
    subtitle = "95% CI",
    x = "Month",
    y = "ED visits per 100 000 population") +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid.minor = element_blank(),
    plot.title = element_text(face = "bold"),
    legend.position = "none")

# descriptive tables
summary_table <- clean_result %>%
  group_by(int) %>%
  summarise(
    mean_nonurgent = mean(wbg, na.rm = TRUE),  
    sd_nonurgent = sd(wbg, na.rm = TRUE),
    n = n())
summary_table
## # A tibble: 2 × 4
##     int mean_nonurgent sd_nonurgent     n
##   <dbl>          <dbl>        <dbl> <int>
## 1     0          5718.        2733.   420
## 2     1          5144.        2582.   420
summary_table_by_id <- clean_result %>%
  group_by(int, id_number) %>%
  summarise(
    mean_nonurgent = mean(wbg, na.rm = TRUE),  
    sd_nonurgent = sd(wbg, na.rm = TRUE),
    n = n())
print(as.data.frame(summary_table_by_id), row.names = FALSE)
##  int id_number mean_nonurgent sd_nonurgent  n
##    0         1       5901.133     584.0652 15
##    0         2       6154.200     562.1087 15
##    0         3       6041.067     591.0172 15
##    0         4       3340.200     538.0506 15
##    0         5       5417.733     495.1931 15
##    0         6       4282.267     356.2003 15
##    0         7       7083.267     684.1884 15
##    0         8       7322.067     737.1867 15
##    0         9       8007.667     863.6231 15
##    0        10       5563.533     568.0884 15
##    0        12       5970.333     841.5255 15
##    0        13       5134.733     363.9279 15
##    0        15       2008.467     260.8097 15
##    0        16       3368.800     367.8768 15
##    0        17       4007.000     412.3345 15
##    0        18      13780.467    1022.4401 15
##    0        20       6769.067     575.8930 15
##    0        21       5941.467     607.9660 15
##    0        22       6432.133     924.7451 15
##    0        23       7899.333     389.9102 15
##    0        25       4102.400    1090.6784 15
##    0        26       4606.133     672.0864 15
##    0        27       2299.333     310.8632 15
##    0        28       4523.800     640.8488 15
##    0        29       2023.933     368.3223 15
##    0        30       6225.533     674.7899 15
##    0        31      12697.467    1110.6897 15
##    0        32       3208.200     246.3993 15
##    1         1       5404.933     397.8506 15
##    1         2       4682.933     782.4967 15
##    1         3       5435.667     555.9902 15
##    1         4       3281.600     470.7083 15
##    1         5       4410.000     890.8028 15
##    1         6       3652.267     912.9976 15
##    1         7       6263.267    1303.9808 15
##    1         8       6315.533     650.3263 15
##    1         9       4624.467    1393.9224 15
##    1        10       5463.800     574.9205 15
##    1        12       5874.643    1130.6567 15
##    1        13       4715.429     442.0031 15
##    1        15       1901.400     191.1154 15
##    1        16       3218.067     376.4307 15
##    1        17       4038.400     525.3116 15
##    1        18      12890.400    1077.6298 15
##    1        20       6809.933     949.3547 15
##    1        21       4845.400     714.2521 15
##    1        22       6257.333     603.8131 15
##    1        23       7470.600     676.4018 15
##    1        25       2987.400     351.7556 15
##    1        26       4449.143     556.9231 15
##    1        27       1985.200     225.6607 15
##    1        28       3384.267     499.9343 15
##    1        29       3382.733     605.3661 15
##    1        30       5378.400     980.2645 15
##    1        31      12060.200    1132.7360 15
##    1        32       2810.933     452.1715 15
id_LHU<- read_excel("id_number_2024.xlsx")
print(as.data.frame(id_LHU), row.names = FALSE)
##  id_number                   institution
##          1           ULS Arco Ribeirinho
##          2                    ULS Leiria
##          3          ULS Lisboa Ocidental
##          4                  ULS Arrábida
##          5                    ULS Aveiro
##          6                 ULS Médio Ave
##          7                     ULS Oeste
##          8                   ULS Coimbra
##          9       ULS Entre Douro e Vouga
##         10                ULS Médio Tejo
##         11  ULS Póvoa Varzim/ Vila Conde
##         12            ULS Tâmega e Sousa
##         13                ULS Dão-Lafões
##         14 ULS Trás-os-Montes Alto Douro
##         15                ULS Cova Beira
##         16             ULS Santo António
##         17                  ULS São João
##         18                   ULS Algarve
##         19                  ULS São José
##         20               ULS Santa Maria
##         21   ULS Vila Nova Gaia/ Espinho
##         22                  ULS Alto Ave
##         23                     ULS Braga
##         24      Hospital de Cascais, PPP
##         25           ULS Loures-Odivelas
##         26             ULS Estuário Tejo
##         27             ULS Baixo Mondego
##         28                   ULS Lezíria
##         29          ULS Alentejo Central
##         30             ULS Almada-Seixal
##         31           ULS Amadora/ Sintra
##         32       ULS Barcelos/ Esposende
##         33             ULS Alto Alentejo
##         34                    ULS Guarda
##         35                ULS Matosinhos
##         36                ULS Alto Minho
##         37            ULS Baixo Alentejo
##         38        ULS Litoral Alentejano
##         39                  ULS Nordeste
##         40            ULS Castelo Branco