Setup

# load libraries
library(foreign)
library(dplyr)
library(stringr)
library(ggplot2)
library(ggpubr)
library(reshape2)
library(ggpubr)
library(sjmisc)
library(qwraps2)
library(stargazer)
library(rms)
library(lme4)
library(table1)
library(effects)
library(sjlabelled)
library(performance)
# load data
load(paste0(path1,"pyNE_2021_2023.Rdata"))
load(paste0(path2,"pyWest_2018_2020_2022.Rdata"))
# limit variables & merge
py <- rbind(east[,c("survey.year","region","county", "munic.area", "urban", "youthreach", "school.id",
                    "grade", "gender3", "age.group", "ethnicity3",
                    "Ma_edu","home.parents2","home.parents3", "home.parents4", "home.language",
                    "genMH","badMH", "sharm2", "sdqtot",
                    "dep.appetite", "dep.lonely", "dep.cry", "dep.sleep", 
                    "dep.sad" ,"dep.apathy" ,"dep.hopeless",
                           "dep.lim7", "suic.att",
                    "smokelife.bin", "smoke30days.bin", "eciglife.bin", "ecig30days.bin", "alclife.bin", "alc30days.bin", "cannabislife.bin", "cannabis30days.bin")],
            west[,c("survey.year","region","county","munic.area", "urban",  "youthreach", "school.id",
                    "grade", "gender3", "age.group", "ethnicity3",
                    "Ma_edu","home.parents2","home.parents3","home.parents4",  "home.language",
                    "genMH","badMH", "sharm2", "sdqtot",
                    "dep.appetite", "dep.lonely", "dep.cry", "dep.sleep", 
                    "dep.sad" ,"dep.apathy" ,"dep.hopeless",
                           "dep.lim7", "suic.att",
                    "smokelife.bin", "smoke30days.bin", "eciglife.bin", "ecig30days.bin", "alclife.bin", "alc30days.bin", "cannabislife.bin", "cannabis30days.bin")])
# enter urbanicity scores manually ; calculated separately (see "Urbanicity of counties barplot.keynote" and "Census data for PY regions.Rmd")
py$urbanicity <- NA
py$urbanicity[py$munic.area=="North Dublin"] <- 4.46
py$urbanicity[py$munic.area=="Cavan"] <- 2.36
py$urbanicity[py$munic.area=="Monaghan"] <- 2.44
py$urbanicity[py$munic.area=="Mayo"] <- 2.16
py$urbanicity[py$munic.area=="Roscommon"] <- 2.29
py$urbanicity[py$munic.area=="Galway"] <- 2.87
py$urbanicity[py$munic.area=="Galway_City"] <- 6.0

# make ref level 0 meaningful
py$urbanicity <- py$urbanicity-1 # now ranges from 0-5, increasing urbanicity

Sample selection

# limit to 15 years and up (but allow missings) [N 24,073 -> 23,068]
py <- py[is.na(py$age.group) | (py$age.group=="15" | py$age.group=="16" | py$age.group=="17 or older"), ]
py$age.group <- droplevels(py$age.group)

# keep the full dataframe here
py.full <- py

# remove Youthreach students
py.YR <- py[py$youthreach=="Yes",]
py <- py[py$youthreach=="No",]


# remove those in 3rd year (pre-Junior Cert completion)
py <- py[is.na(py$grade) | (
                              #py$grade=="3rd Year" | 
                              py$grade=="4th Year" | py$grade=="5th Year" 
                            | py$grade=="6th Year" | py$grade=="LCA/QQI"),] 
py$grade <- droplevels(py$grade)

Sample N for analysis = 21436

Sample characteristics

Sample characteristics for the full sample
Overall
(N=21436)
gender3
Male 10623 (49.6%)
Female 10243 (47.8%)
Other 480 (2.2%)
Missing 90 (0.4%)
age.group
15 6779 (31.6%)
16 12619 (58.9%)
17-19 1961 (9.1%)
Missing 77 (0.4%)
ethnicity3
White Irish 12909 (60.2%)
White non-Irish 1937 (9.0%)
Black, Asian or other minority ethnicity 1781 (8.3%)
Not Asked 4722 (22.0%)
Missing 87 (0.4%)
grade
4th Year 15450 (72.1%)
5th Year 5352 (25.0%)
LCA/QQI 221 (1.0%)
6th Year 77 (0.4%)
Missing 336 (1.6%)
Family make-up
2 Parents 16784 (78.3%)
1 Parent 2764 (12.9%)
1 Parent +other 980 (4.6%)
Other 811 (3.8%)
Missing 97 (0.5%)
Maternal education level
College or University 12628 (58.9%)
Secondary School 4968 (23.2%)
Primary School 802 (3.7%)
Dont know/Doesnt apply 2891 (13.5%)
Missing 147 (0.7%)
school.type
Mainstream 21436 (100%)
urbanicity
Mean (SD) 2.20 (1.25)
Median [Min, Max] 1.87 [1.16, 5.00]

Table notes:
* 82% of the 17-25 age-group are from mainstream schools, and are therefore likely 19 or younger
* ethnicity was not recorded in the West in 2018 (20% of sample)
* ‘White Irish’ ethnicity included Irish travellers
* ‘College/University’ level education included technical college, diploma and apprenticeship
* “another” language included Irish
* “Other” family type at home included living alone, foster care & exchange student host family

Let’s look at how demographics changed in each survey year. Note that this also reflects regional differences (even years = West; odd years = North East)

## Warning in table1.formula(~gender3 + age.group + ethnicity3 + grade +
## home.parents4 + : Terms to the right of '|' in formula 'x' define table columns
## and are expected to be factors with meaningful labels.
Sample characteristics split by survey year
2018
(N=4722)
2020
(N=4828)
2021
(N=3160)
2022
(N=5122)
2023
(N=3604)
Overall
(N=21436)
gender3
Male 2337 (49.5%) 2360 (48.9%) 1633 (51.7%) 2505 (48.9%) 1788 (49.6%) 10623 (49.6%)
Female 2358 (49.9%) 2374 (49.2%) 1377 (43.6%) 2469 (48.2%) 1665 (46.2%) 10243 (47.8%)
Other 0 (0%) 77 (1.6%) 130 (4.1%) 139 (2.7%) 134 (3.7%) 480 (2.2%)
Missing 27 (0.6%) 17 (0.4%) 20 (0.6%) 9 (0.2%) 17 (0.5%) 90 (0.4%)
age.group
15 1439 (30.5%) 1341 (27.8%) 1014 (32.1%) 1452 (28.3%) 1533 (42.5%) 6779 (31.6%)
16 3025 (64.1%) 3094 (64.1%) 1595 (50.5%) 3109 (60.7%) 1796 (49.8%) 12619 (58.9%)
17-19 240 (5.1%) 378 (7.8%) 531 (16.8%) 560 (10.9%) 252 (7.0%) 1961 (9.1%)
Missing 18 (0.4%) 15 (0.3%) 20 (0.6%) 1 (0.0%) 23 (0.6%) 77 (0.4%)
ethnicity3
White Irish 0 (0%) 4077 (84.4%) 2199 (69.6%) 3976 (77.6%) 2657 (73.7%) 12909 (60.2%)
White non-Irish 0 (0%) 408 (8.5%) 595 (18.8%) 525 (10.3%) 409 (11.3%) 1937 (9.0%)
Black, Asian or other minority ethnicity 0 (0%) 313 (6.5%) 340 (10.8%) 613 (12.0%) 515 (14.3%) 1781 (8.3%)
Not Asked 4722 (100%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4722 (22.0%)
Missing 0 (0%) 30 (0.6%) 26 (0.8%) 8 (0.2%) 23 (0.6%) 87 (0.4%)
grade
4th Year 3288 (69.6%) 3546 (73.4%) 2098 (66.4%) 3743 (73.1%) 2775 (77.0%) 15450 (72.1%)
5th Year 1382 (29.3%) 1176 (24.4%) 960 (30.4%) 1253 (24.5%) 581 (16.1%) 5352 (25.0%)
LCA/QQI 0 (0%) 83 (1.7%) 81 (2.6%) 57 (1.1%) 0 (0%) 221 (1.0%)
6th Year 10 (0.2%) 0 (0%) 0 (0%) 61 (1.2%) 6 (0.2%) 77 (0.4%)
Missing 42 (0.9%) 23 (0.5%) 21 (0.7%) 8 (0.2%) 242 (6.7%) 336 (1.6%)
Family make-up
2 Parents 3749 (79.4%) 3845 (79.6%) 2438 (77.2%) 3973 (77.6%) 2779 (77.1%) 16784 (78.3%)
1 Parent 638 (13.5%) 627 (13.0%) 426 (13.5%) 634 (12.4%) 439 (12.2%) 2764 (12.9%)
1 Parent +other 233 (4.9%) 207 (4.3%) 188 (5.9%) 191 (3.7%) 161 (4.5%) 980 (4.6%)
Other 84 (1.8%) 126 (2.6%) 85 (2.7%) 312 (6.1%) 204 (5.7%) 811 (3.8%)
Missing 18 (0.4%) 23 (0.5%) 23 (0.7%) 12 (0.2%) 21 (0.6%) 97 (0.5%)
Maternal education level
College or University 2415 (51.1%) 2482 (51.4%) 1953 (61.8%) 3380 (66.0%) 2398 (66.5%) 12628 (58.9%)
Secondary School 1188 (25.2%) 1080 (22.4%) 767 (24.3%) 1233 (24.1%) 700 (19.4%) 4968 (23.2%)
Primary School 337 (7.1%) 261 (5.4%) 49 (1.6%) 94 (1.8%) 61 (1.7%) 802 (3.7%)
Dont know/Doesnt apply 751 (15.9%) 949 (19.7%) 369 (11.7%) 402 (7.8%) 420 (11.7%) 2891 (13.5%)
Missing 31 (0.7%) 56 (1.2%) 22 (0.7%) 13 (0.3%) 25 (0.7%) 147 (0.7%)
school.type
Mainstream 4722 (100%) 4828 (100%) 3160 (100%) 5122 (100%) 3604 (100%) 21436 (100%)
munic.area
North Dublin 0 (0%) 0 (0%) 1510 (47.8%) 0 (0%) 1858 (51.6%) 3368 (15.7%)
Cavan 0 (0%) 0 (0%) 834 (26.4%) 0 (0%) 822 (22.8%) 1656 (7.7%)
Monaghan 0 (0%) 0 (0%) 816 (25.8%) 0 (0%) 924 (25.6%) 1740 (8.1%)
Mayo 1463 (31.0%) 1479 (30.6%) 0 (0%) 1478 (28.9%) 0 (0%) 4420 (20.6%)
Roscommon 511 (10.8%) 629 (13.0%) 0 (0%) 672 (13.1%) 0 (0%) 1812 (8.5%)
Galway County 1975 (41.8%) 1966 (40.7%) 0 (0%) 2097 (40.9%) 0 (0%) 6038 (28.2%)
Galway City 773 (16.4%) 754 (15.6%) 0 (0%) 875 (17.1%) 0 (0%) 2402 (11.2%)

Check number of schools per region

d <- as.data.frame(table(py$school.id))

# count number of schools per county
schls <- data.frame(region=rep(unique(py$munic.area),2), 
                    school.type = c(rep("Mainstream",7),rep("YouthReach",7)))
for (x in unique(py$munic.area)) {
  for (y in unique(py$school.type)) {
    schls$num[schls$region==x & schls$school.type==y] <- 
                    length(unique(py$school.id[py$munic.area==x & py$school.type==y]))
    
    schls$pupils[schls$region==x & schls$school.type==y] <- 
                    length(py$school.id[py$munic.area==x & py$school.type==y])
  }
}

print(schls[schls$school.type=="Mainstream",])
##          region school.type num pupils
## 1         Cavan  Mainstream  11   1656
## 2      Monaghan  Mainstream  12   1740
## 3  North Dublin  Mainstream  16   3368
## 4     Roscommon  Mainstream   9   1812
## 5          Mayo  Mainstream  27   4420
## 6 Galway County  Mainstream  29   6038
## 7   Galway City  Mainstream  10   2402

Results

Split into 4 sections:
Q1. 5-year prevalence estimates for secondary school students in Ireland (N~20,000).
Q2. Time trends: how have prevalence estimates changed throughout the 5 years (descriptive only).
Q3. Which sub-groups in the adolescent population have the highest probability of reporting these outcomes?
Q4. How did prevalence estimates between COVID and non-COVID survey periods?

(Q1) Mental health of the sample

county.cols <- c( "deepskyblue1","dodgerblue4", "cornflowerblue", 
                  "tomato","firebrick4", "violetred")

munic.cols <- c( "deepskyblue1",  "dodgerblue4","cornflowerblue",
                 "goldenrod","firebrick4",  "violetred", "tomato" )

gender.colors <- c("steelblue3", "rosybrown3", "wheat3")


dvlist_nice <- c("Poor mental health", "Repetitive self-harm", "Suicide attempt")
# Descriptives table

py$selfharm.x5 <- factor(py$selfharm.x5, levels=c("No", "Yes"))
py$suic.att <- relevel(py$suic.att, "No")


table1(~ badMH.f +  selfharm.x5 + suic.att | covid.period, 
       data=py, c(left = "Overall"),
       caption = "Overall = 5-year National average")
Overall = 5-year National average
Overall
(N=21436)
Non-COVID years
(N=13448)
COVID-years
(N=7988)
badMH.f
Very good/Good/Okay 16971 (79.2%) 10922 (81.2%) 6049 (75.7%)
Bad/Very bad 4131 (19.3%) 2273 (16.9%) 1858 (23.3%)
Missing 334 (1.6%) 253 (1.9%) 81 (1.0%)
selfharm.x5
No 18159 (84.7%) 11330 (84.3%) 6829 (85.5%)
Yes 2490 (11.6%) 1559 (11.6%) 931 (11.7%)
Missing 787 (3.7%) 559 (4.2%) 228 (2.9%)
suic.att
No 18863 (88.0%) 11869 (88.3%) 6994 (87.6%)
Yes 1747 (8.1%) 1018 (7.6%) 729 (9.1%)
Missing 826 (3.9%) 561 (4.2%) 265 (3.3%)
# model-derived AVERAGE prevalence of each outcome

library(gmodels)

dvlist <- c("badMH", "selfharm.x5", "suic.att")

py$suic.att <- relevel(py$suic.att, "No")
py$suic.att <- relevel(py$suic.att, "No")

mod <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ 1 + (1|munic.area/school.id),
                list(i = as.name(x)))), 
              data = py,
              family=binomial() )} )

# (a) what's the average rate of each outcome?

mod.avgs <- data.frame(outcome = dvlist, prob = NA)
mod.avgs.county <- c()
mod.avgs.school <- c()
for (DV in 1:3) {
  
  mod.avgs$prob[DV]= mean( predict(mod[[DV]], type="response") ) 
  mod.avgs$ci.lo[DV]=quantile(predict(mod[[DV]], type="response"), 0.025)
  mod.avgs$ci.hi[DV]=quantile(predict(mod[[DV]], type="response"), 0.975)
  
  mod.avgs$N_analysis[DV] = nrow(mod[[DV]]@frame)

  # predict probability of outcome for each school
  newdata <- unique(py[,c("munic.area", "school.id")])
  newdata$pred.resp <- predict(mod[[DV]], newdata=newdata, type="response",
                re.form=~(1|munic.area/school.id))
  
  mod.avgs.school <- rbind(mod.avgs.school,
                           cbind(newdata, dvlist[DV]))
  
  # average those within each county to get predicted prob on the county level
  mod.avgs.county <- rbind(mod.avgs.county, 
                           cbind(mod.avgs.school[mod.avgs.school$`dvlist[DV]`==dvlist[DV],]%>%
                                   group_by(munic.area)%>%
                                   summarise(mean=mean(pred.resp), 
                                             ci_lo=ci(pred.resp)[2], 
                                             ci_hi=ci(pred.resp)[3]),
                                 dvlist[DV]) )
  

}
mod.avgs[,c(2:4)] <- round(mod.avgs[,c(2:4)],3)
print(mod.avgs)
##       outcome  prob ci.lo ci.hi N_analysis
## 1       badMH 0.196 0.119 0.296      21102
## 2 selfharm.x5 0.120 0.053 0.184      20649
## 3    suic.att 0.084 0.048 0.130      20610
# plot model-derived mean prevalence for each outcome with variation by area and school

lims =  c(0.025,.325)

d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="badMH",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="badMH",]

g1<-ggplot()+
  geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="badMH"]), linetype="dashed", colour="blue")+
  geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
                 shape=1, alpha=0.8, colour="grey")+
  geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
                colour="grey30", width=0.3)+
  geom_point(data=d_co, aes(x=reorder(munic.area,-mean), y=mean),
                colour="grey30")+
   geom_text(stat="identity", aes(label= paste0("Average = ",round(mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])*100, 
                                                      digits=1), "%"), 
                                  x=7, y=0.32, hjust=1),
             size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,
                     name="Probability")+
  labs(title ="Poor mental health", x="")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=90))


d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="selfharm.x5",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="selfharm.x5",]
g2<-ggplot()+
  geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"]), linetype="dashed", colour="blue")+
  geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
                 shape=1, alpha=0.8, colour="grey")+
  geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
                colour="grey30", width=0.3)+
  geom_point(data=d_co, aes(x=reorder(munic.area,-mean), y=mean),
                colour="grey30")+
  geom_text(stat="identity", aes(label= paste0("Average = ",
                                               round(mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])*100, 
                                                      digits=1), "%"), 
                                  x=7, y=0.32, hjust=1),
             size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,
                     name="Probability")+
  labs(title ="Frequent self-harm", x="")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=90))


d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="suic.att",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="suic.att",]
g3<-ggplot()+
  geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"]), linetype="dashed", colour="blue")+
  geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
                 shape=1, alpha=0.8, colour="grey")+
  geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
                colour="grey30", width=0.3)+
  geom_point(data=d_co,aes(x=reorder(munic.area,-mean), y=mean),
                colour="grey30")+
  geom_text(stat="identity", aes(label= paste0("Average = ",
                                               round(mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])*100, 
                                                      digits=1), "%"), 
                                  x=7, y=0.32, hjust=1),
             size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,
                     name=" ")+
  labs(title ="Suicide Attempt", x="")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=90))

ggarrange(g1, g2, g3, nrow=1, ncol=3)

# plot model-derived mean prevalence for each outcome with variation by school only 

lims =  c(0.025,.325)

d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="badMH",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])

g1<-ggplot()+
  geom_rect(aes(xmin=-Inf, xmax=Inf,
                ymin= mod.avgs$ci.lo[mod.avgs$outcome=="badMH"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="badMH"]), 
            fill="black", alpha=0.1)+
  geom_hline(yintercept=subj.ave, colour="blue")+
  geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
                 shape=1, alpha=0.8, colour="black")+
   geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])*100, digits=1), "%"), 
                                   x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims, name="Probability")+
  labs(title ="Poor mental health", x="")+
  theme_bw()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid.major.x=element_blank())


d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="selfharm.x5",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])

g2<-ggplot()+
  geom_rect(aes(xmin=-Inf, xmax=Inf,
                ymin= mod.avgs$ci.lo[mod.avgs$outcome=="selfharm.x5"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="selfharm.x5"]), 
            fill="black", alpha=0.1)+
  geom_hline(yintercept=subj.ave, colour="blue")+
  geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
                 shape=1, alpha=0.8, colour="black")+
   geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])*100, digits=1), "%"), 
                                   x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims, name=" ")+
  labs(title ="Repetitive Self-Harm", x="")+
  theme_bw()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid.major.x=element_blank())


d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="suic.att",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])

g3<-ggplot()+
  geom_rect(aes(xmin=-Inf, xmax=Inf,
                ymin= mod.avgs$ci.lo[mod.avgs$outcome=="suic.att"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="suic.att"]), 
            fill="black", alpha=0.1)+
  geom_hline(yintercept=subj.ave, colour="blue")+
  geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
                 shape=1, alpha=0.8, colour="black")+
  geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])*100, digits=1), "%"), 
                                   x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims, name=" ")+
  labs(title ="Suicide Attempt", x="")+
  theme_bw()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid.major.x=element_blank())

ggarrange(g1, g2, g3, nrow=1, ncol=3)

(Q3) Group differences in mental health

# set reference levels
py$age.group <- relevel(py$age.group, "16")
py$region <- relevel(py$region, "West")
py$selfharm.x5 <- factor(py$selfharm.x5, levels=c("No", "Yes"))
py$suic.att <- relevel(py$suic.att, "No")

# create a linear version of time with baseline (0) at 2018
py$linear.time <- py$survey.year-2018
py$suic.att <- relevel(py$suic.att, "No")

dvlist <- c("badMH", "selfharm.x5", "suic.att")


py$urbanicity.z <- scale(py$urbanicity,scale=F)

groupdiffs <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~  age.group + gender3 + urbanicity.z +
                         + home.parents3   + Ma_edu + covid.period
                         + (1|munic.area/school.id),
                list(i = as.name(x)))), 
              data = py,
              family=binomial() )} )


# Extract Odds ratios for a table
group.ORs <- c()
for (DV in 1:3){
res1out <- as.data.frame(summary(groupdiffs[[DV]])[["coefficients"]])
res1out$var <- rownames(res1out)
res1out$outcome <- dvlist[DV]
res1out$OR <- exp(res1out$Estimate)
res1out$CI.low<-exp(res1out$Estimate - (1.96*res1out$`Std. Error`))
res1out$CI.hi<-exp(res1out$Estimate + (1.96*res1out$`Std. Error`))
rownames(res1out) <- c()
res1out <- move_columns(res1out, c(outcome, var), .before="Estimate")
res1out[,3:length(res1out)] <- round(res1out[,3:length(res1out)], 3)
group.ORs <- rbind(group.ORs, res1out)
}

group.ORs.lim <- group.ORs[,c("outcome", "var", "OR", "CI.low", "CI.hi", "Pr(>|z|)")]
group.ORs.lim$str <- paste0(round(group.ORs.lim$OR,2),  " (", 
                            round(group.ORs.lim$CI.low,2), "-" , 
                            round(group.ORs.lim$CI.hi,2), ")")

print(group.ORs.lim[,c(1:6)])
##        outcome                          var     OR CI.low  CI.hi Pr(>|z|)
## 1        badMH                  (Intercept)  0.108  0.093  0.126    0.000
## 2        badMH                  age.group15  1.026  0.949  1.110    0.520
## 3        badMH               age.group17-19  1.085  0.956  1.231    0.206
## 4        badMH                gender3Female  2.155  1.993  2.331    0.000
## 5        badMH                 gender3Other  7.409  6.080  9.028    0.000
## 6        badMH                 urbanicity.z  1.071  0.967  1.187    0.187
## 7        badMH      home.parents3One Parent  1.703  1.562  1.857    0.000
## 8        badMH           home.parents3Other  1.036  0.856  1.255    0.714
## 9        badMH       Ma_eduSecondary School  1.206  1.107  1.314    0.000
## 10       badMH         Ma_eduPrimary School  1.577  1.325  1.877    0.000
## 11       badMH Ma_eduDont know/Doesnt apply  1.083  0.970  1.208    0.157
## 12       badMH      covid.periodCOVID-years  1.440  1.338  1.549    0.000
## 13 selfharm.x5                  (Intercept)  0.054  0.045  0.064    0.000
## 14 selfharm.x5                  age.group15  1.005  0.912  1.107    0.925
## 15 selfharm.x5               age.group17-19  1.174  1.008  1.368    0.040
## 16 selfharm.x5                gender3Female  3.025  2.730  3.352    0.000
## 17 selfharm.x5                 gender3Other 12.812 10.387 15.804    0.000
## 18 selfharm.x5                 urbanicity.z  1.090  0.975  1.219    0.131
## 19 selfharm.x5      home.parents3One Parent  1.844  1.662  2.044    0.000
## 20 selfharm.x5           home.parents3Other  1.369  1.103  1.700    0.004
## 21 selfharm.x5       Ma_eduSecondary School  1.082  0.973  1.203    0.146
## 22 selfharm.x5         Ma_eduPrimary School  1.664  1.359  2.038    0.000
## 23 selfharm.x5 Ma_eduDont know/Doesnt apply  0.970  0.843  1.118    0.677
## 24 selfharm.x5      covid.periodCOVID-years  0.946  0.862  1.037    0.236
## 25    suic.att                  (Intercept)  0.041  0.036  0.047    0.000
## 26    suic.att                  age.group15  0.919  0.821  1.029    0.143
## 27    suic.att               age.group17-19  1.287  1.088  1.522    0.003
## 28    suic.att                gender3Female  1.967  1.756  2.202    0.000
## 29    suic.att                 gender3Other  5.387  4.252  6.826    0.000
## 30    suic.att                 urbanicity.z  1.088  1.023  1.158    0.007
## 31    suic.att      home.parents3One Parent  1.964  1.750  2.205    0.000
## 32    suic.att           home.parents3Other  1.767  1.398  2.233    0.000
## 33    suic.att       Ma_eduSecondary School  1.278  1.134  1.441    0.000
## 34    suic.att         Ma_eduPrimary School  1.771  1.416  2.215    0.000
## 35    suic.att Ma_eduDont know/Doesnt apply  1.158  0.993  1.351    0.062
## 36    suic.att      covid.periodCOVID-years  1.190  1.072  1.320    0.001
# plot predicted probabilities of each outcome for each group
upper=0.6

for (DV in c(1:3)) {

mass.prob <- c()

var.n =0
for (x in c("gender3", "age.group", "urbanicity.z", "Ma_edu", "home.parents3")) { 
# not included in plots: covid.period, or mat ed not known, or intermediary urbanicity
  
var.n = var.n+1

# select just the highest & lowest levels of urbanicity (1 and 5) for visualisation
if (x=="urbanicity.z"){
pred.prob <- data.frame(effect(x, groupdiffs[[DV]],
                       se=TRUE, confidence.level = .95, type="response", 
                       xlevels= list(urbanicity.z = c(-1, 2.7))))  

} else  {
  pred.prob <- data.frame(effect(x, groupdiffs[[DV]],
                       se=TRUE, confidence.level = .95, type="response")) 
}

pred.prob$var <- x # variable name e.g. gender
names(pred.prob)[1] <- "group"
pred.prob$group <- as.character(pred.prob$group)
pred.prob <- move_columns(pred.prob, "var", .before=1)
pred.prob$group.full <- paste(pred.prob$var, pred.prob$group, sep="-") # combine var & level strings

mass.prob <- rbind(mass.prob, pred.prob)

}

mass.prob$order <- seq(1, length(mass.prob$group))

mass.prob$group[mass.prob$group=="Other" & mass.prob$var=="gender3"] <- "TGD"

mass.prob$group <- factor(mass.prob$group, levels=c("Male" , "Female" , "TGD",
                                                    "15","16" ,"17-19",
                                                    "College or University","Secondary School","Primary School","Dont know/Doesnt apply",
                                                    "Two Parents", "One Parent" , "Other",
                                                    "-1" ,"2.7"),
                          labels=c("Male" , "Female" , "Trans/gender-diverse",
                                                    "Aged 15","Aged 16" ,"Aged 17-19",
                                                    "Mother 3rd level edu","Mother secondary edu","Mother primary edu","Unknown mother edu",
                                                    "Two Parents", "One Parent" , "Other family make-up",
                                                     "Most rural" ,"Most urban"))

mass.prob$var <- factor(mass.prob$var,  levels=c("gender3" , "age.group" ,"Ma_edu", "home.parents3","urbanicity.z"), 
                        labels=c("Gender identity" , "Age" ,"Maternal education", "Family make-up","Urbanicity"))


# hide "unknown maternal education" from graph for simplicity
mass.prob <- mass.prob[!str_detect(mass.prob$group, "Unknown mother edu"),]


g<-ggplot(mass.prob, aes(x=group, y=fit, ymin=lower, ymax=upper, group=var, fill=var))+
  geom_hline(yintercept=mod.avgs$prob[DV], linetype="dashed", size=0.7)+
  geom_col(alpha=0.6)+
  labs(title=dvlist_nice[DV])+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
                     breaks=seq(0,upper,0.10), minor_breaks = NULL,
                      name="")+
  geom_text( stat="identity", aes(label= paste0(round(fit*100, digits=0), "%"),y=0.02, colour=var),
              size=3, fontface="bold")+ #vjust=-1.2,
  geom_text(aes(label= paste0("- - Sample Average (",round(mod.avgs$prob[DV]*100, digits=1), "%)"),
              x=14.1, y=0.52), hjust=1.0, vjust=0, size=4.5, colour="black")+
  geom_errorbar(aes(colour=var),width=0)+
  theme_bw()+
  scale_fill_manual(values=c( "steelblue4", "lightblue3",  "darkseagreen3", "darkseagreen4","navajowhite3"))+ #"darkorange4"
  scale_colour_manual(values=c( "steelblue4", "lightblue3",  "darkseagreen3", "darkseagreen4", "navajowhite3"))+
  theme(axis.text.x =element_text(angle=30, hjust=1, vjust=1),
        axis.title.y=element_blank(),
        axis.title.x=element_blank(),
        legend.title=element_blank(),
        plot.title  = element_text(face="bold", size=14, hjust=0.5))
print(g)

}

Secondary school students Vs early school leavers

Note switch of dataframe from py to py.full

# descriptive table showing differences in demographics (and the need to control for them)
table1(~ gender3 + age.group + ethnicity3 + grade   
       + home.parents4  + Ma_edu # + home.language 
       + school.type + munic.area | school.type,
       data=py.full)
Mainstream
(N=22474)
YouthReach
(N=600)
Overall
(N=23074)
gender3
Male 11148 (49.6%) 289 (48.2%) 11437 (49.6%)
Female 10705 (47.6%) 289 (48.2%) 10994 (47.6%)
Other 531 (2.4%) 20 (3.3%) 551 (2.4%)
Missing 90 (0.4%) 2 (0.3%) 92 (0.4%)
age.group
15 7778 (34.6%) 30 (5.0%) 7808 (33.8%)
16 12647 (56.3%) 141 (23.5%) 12788 (55.4%)
17 or older 1972 (8.8%) 416 (69.3%) 2388 (10.3%)
Missing 77 (0.3%) 13 (2.2%) 90 (0.4%)
ethnicity3
White Irish 13598 (60.5%) 349 (58.2%) 13947 (60.4%)
White non-Irish 2097 (9.3%) 64 (10.7%) 2161 (9.4%)
Black, Asian or other minority ethnicity 1966 (8.7%) 66 (11.0%) 2032 (8.8%)
Not Asked 4726 (21.0%) 117 (19.5%) 4843 (21.0%)
Missing 87 (0.4%) 4 (0.7%) 91 (0.4%)
grade
2nd Year 7 (0.0%) 1 (0.2%) 8 (0.0%)
3rd Year 1031 (4.6%) 11 (1.8%) 1042 (4.5%)
4th Year 15450 (68.7%) 29 (4.8%) 15479 (67.1%)
5th Year 5352 (23.8%) 67 (11.2%) 5419 (23.5%)
LCA/QQI 221 (1.0%) 337 (56.2%) 558 (2.4%)
6th Year 77 (0.3%) 63 (10.5%) 140 (0.6%)
Missing 336 (1.5%) 92 (15.3%) 428 (1.9%)
home.parents4
2 Parents 17594 (78.3%) 306 (51.0%) 17900 (77.6%)
1 Parent 2915 (13.0%) 174 (29.0%) 3089 (13.4%)
1 Parent +other 1040 (4.6%) 44 (7.3%) 1084 (4.7%)
Other 826 (3.7%) 68 (11.3%) 894 (3.9%)
Missing 99 (0.4%) 8 (1.3%) 107 (0.5%)
Ma_edu
College or University 13296 (59.2%) 123 (20.5%) 13419 (58.2%)
Secondary School 5167 (23.0%) 175 (29.2%) 5342 (23.2%)
Primary School 825 (3.7%) 92 (15.3%) 917 (4.0%)
Dont know/Doesnt apply 3037 (13.5%) 198 (33.0%) 3235 (14.0%)
Missing 149 (0.7%) 12 (2.0%) 161 (0.7%)
school.type
Mainstream 22474 (100%) 0 (0%) 22474 (97.4%)
YouthReach 0 (0%) 600 (100%) 600 (2.6%)
munic.area
North Dublin 4402 (19.6%) 18 (3.0%) 4420 (19.2%)
Cavan 1656 (7.4%) 99 (16.5%) 1755 (7.6%)
Monaghan 1740 (7.7%) 52 (8.7%) 1792 (7.8%)
Mayo 4420 (19.7%) 125 (20.8%) 4545 (19.7%)
Roscommon 1812 (8.1%) 115 (19.2%) 1927 (8.4%)
Galway 6039 (26.9%) 133 (22.2%) 6172 (26.7%)
Galway_City 2405 (10.7%) 58 (9.7%) 2463 (10.7%)
# adjusted comparison of secondary school and early school leavers

py.full$suic.att <- relevel(py.full$suic.att, "No")

dvlist <- c("badMH", "selfharm.x5", "suic.att")

py.full$urbanicity.z <- scale(py.full$urbanicity,scale=F)

fullsamp <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ school.type + age.group + gender3 + urbanicity.z +
                         + home.parents3   + Ma_edu + covid.period
                         + (1|munic.area/school.id),
                list(i = as.name(x)))), 
              data = py.full,
              family=binomial() )} )


# plot predicted probabilities

upper=0.6

mass.prob <- c()
for (DV in c(1:3)) {
    
    pred.prob <- data.frame(effect("school.type", fullsamp[[DV]],
                           se=TRUE, confidence.level = .95, type="response")) 
    
    pred.prob$var <- dvlist_nice[DV] # variable name e.g. gender
    mass.prob <- rbind(mass.prob, pred.prob)

g<- ggplot(pred.prob, aes(x=school.type, y=fit, ymin=lower, ymax=upper, fill=school.type))+
   geom_col(fill="grey40", alpha=0.4)+
   labs(title=dvlist_nice[DV])+
   scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
                      breaks=seq(0,upper,0.10), minor_breaks = NULL,
                       name="")+
   geom_text( stat="identity", aes(label= paste0(round(fit*100, digits=0), "%"),y=0.02,),
               size=3, colour="grey40", fontface="bold")+ #vjust=-1.2,
   geom_errorbar(width=0, colour="grey40")+
   theme_bw()+
   theme(axis.text.x =element_text(angle=30, hjust=1, vjust=1),
         axis.title.y=element_blank(),
         axis.title.x=element_blank(),
        plot.title  = element_text(face="bold", size=14, hjust=0.5))
print(g)
}

# Extract Odds ratios for a table
group.ORs <- c()
for (DV in 1:3){
res1out <- as.data.frame(summary(fullsamp[[DV]])[["coefficients"]])
res1out$var <- rownames(res1out)
res1out$outcome <- dvlist[DV]
res1out$OR <- exp(res1out$Estimate)
res1out$CI.low<-exp(res1out$Estimate - (1.96*res1out$`Std. Error`))
res1out$CI.hi<-exp(res1out$Estimate + (1.96*res1out$`Std. Error`))
rownames(res1out) <- c()
res1out <- move_columns(res1out, c(outcome, var), .before="Estimate")
res1out[,3:length(res1out)] <- round(res1out[,3:length(res1out)], 3)
group.ORs <- rbind(group.ORs, res1out)
}

print(group.ORs[,c("outcome", "var", "OR", "CI.low", "CI.hi", "Pr(>|z|)")])
##        outcome                          var     OR CI.low  CI.hi Pr(>|z|)
## 1        badMH                  (Intercept)  0.112  0.095  0.133    0.000
## 2        badMH        school.typeYouthReach  1.557  1.210  2.004    0.001
## 3        badMH                  age.group16  0.973  0.902  1.049    0.470
## 4        badMH         age.group17 or older  1.051  0.927  1.192    0.434
## 5        badMH                gender3Female  2.198  2.041  2.367    0.000
## 6        badMH                 gender3Other  7.359  6.114  8.859    0.000
## 7        badMH                 urbanicity.z  1.077  0.964  1.204    0.188
## 8        badMH      home.parents3One Parent  1.718  1.583  1.864    0.000
## 9        badMH           home.parents3Other  1.103  0.924  1.318    0.277
## 10       badMH       Ma_eduSecondary School  1.221  1.125  1.325    0.000
## 11       badMH         Ma_eduPrimary School  1.509  1.280  1.779    0.000
## 12       badMH Ma_eduDont know/Doesnt apply  1.087  0.980  1.206    0.115
## 13       badMH      covid.periodCOVID-years  1.388  1.294  1.488    0.000
## 14 selfharm.x5                  (Intercept)  0.056  0.047  0.067    0.000
## 15 selfharm.x5        school.typeYouthReach  1.909  1.402  2.600    0.000
## 16 selfharm.x5                  age.group16  1.000  0.912  1.098    0.994
## 17 selfharm.x5         age.group17 or older  1.180  1.014  1.373    0.032
## 18 selfharm.x5                gender3Female  2.976  2.702  3.277    0.000
## 19 selfharm.x5                 gender3Other 12.155  9.983 14.799    0.000
## 20 selfharm.x5                 urbanicity.z  1.097  0.984  1.223    0.095
## 21 selfharm.x5      home.parents3One Parent  1.829  1.659  2.016    0.000
## 22 selfharm.x5           home.parents3Other  1.467  1.202  1.791    0.000
## 23 selfharm.x5       Ma_eduSecondary School  1.074  0.971  1.189    0.167
## 24 selfharm.x5         Ma_eduPrimary School  1.536  1.269  1.859    0.000
## 25 selfharm.x5 Ma_eduDont know/Doesnt apply  0.929  0.813  1.061    0.276
## 26 selfharm.x5      covid.periodCOVID-years  0.947  0.868  1.033    0.217
## 27    suic.att                  (Intercept)  0.038  0.033  0.044    0.000
## 28    suic.att        school.typeYouthReach  2.825  2.047  3.900    0.000
## 29    suic.att                  age.group16  1.097  0.985  1.222    0.092
## 30    suic.att         age.group17 or older  1.412  1.195  1.668    0.000
## 31    suic.att                gender3Female  2.040  1.834  2.270    0.000
## 32    suic.att                 gender3Other  5.747  4.617  7.154    0.000
## 33    suic.att                 urbanicity.z  1.103  1.039  1.170    0.001
## 34    suic.att      home.parents3One Parent  1.938  1.738  2.161    0.000
## 35    suic.att           home.parents3Other  1.883  1.523  2.329    0.000
## 36    suic.att       Ma_eduSecondary School  1.274  1.137  1.428    0.000
## 37    suic.att         Ma_eduPrimary School  1.666  1.354  2.050    0.000
## 38    suic.att Ma_eduDont know/Doesnt apply  1.116  0.966  1.290    0.135
## 39    suic.att      covid.periodCOVID-years  1.169  1.060  1.289    0.002

(Q4) Differences in the “COVID” period (2020, 2021) vs non-COVID years

First, look at changes post-COVID. Merge the two “COVID” timepoints together (2020, 2021) and the two post-COVID timepoints (2022, 2023) to simplify comparison and avoid regional bias.

dvlist <- c("badMH", "selfharm.x5", "suic.att")

trend <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ survey.period 
                  + gender3 + age.group  + 
                  + home.parents3 +  Ma_edu
                  + (1|munic.area/school.id),
                list(i = as.name(x)))), 
              data = py[py$survey.year>2018,],
              family=binomial() )} )

trends_ORs<- c()
trend.prob <- c()
trend.plots <- vector("list", length = 3)
for (DV in 1:3) {
  
time.coef <- as.data.frame(summary(trend[[DV]])[["coefficients"]])
time.coef <- time.coef[2,]
time.coef$CI.low <- time.coef$Estimate - (1.96*time.coef$`Std. Error`)
time.coef$CI.hi <- time.coef$Estimate + (1.96*time.coef$`Std. Error`)
time.coef <- move_columns(time.coef, c(CI.low, CI.hi), .after="Estimate")
time.coef[,1:3] <- exp(time.coef[,1:3])
names(time.coef) <- c("OR", "CI.low", "CI.hi", "SE_logodds", "z", "p")
time.coef <- round(time.coef, 3)
time.coef$DV <- dvlist[DV]
time.coef$fixef <- rownames(time.coef)

trends_ORs <- rbind(trends_ORs, time.coef)


pred.prob <- data.frame(effect("survey.period", trend[[DV]],
                       se=TRUE, confidence.level = .95, type="response"))
pred.prob$DV <- dvlist[DV]
trend.prob <- rbind(trend.prob, pred.prob)

upper=.30
trend.plots[[DV]]<-ggplot(pred.prob, aes(x=survey.period, y=fit,ymin=lower, ymax=upper))+
  geom_col(alpha=0.5, fill="black")+
  geom_errorbar(width=0.06, colour="black")+
  geom_text(stat="identity", aes(label= paste0(round(fit*100, digits=0), "%")),  size=4, hjust=-0.3, vjust=-0.7)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
                     name="")+
  theme_classic()+
  theme(axis.text.x = element_text(size=11),
        axis.text.y = element_text(size=12),
        axis.title.x=element_blank(),
        plot.title = element_text(face="bold", hjust=0.5))


}

# plot predicted probability (& 95% cis of each outcome for 2020/1 and 2022/3)
ggarrange(trend.plots[[1]], trend.plots[[2]],trend.plots[[3]],
          nrow=1, ncol=3)

For next analysis, limit dataset to the West (has before, during and after COVID timepoints). This includes 15,085 individuals from 77 schools in the West of Ireland (including an urban centre, Galway city [16% of participants]).

pyw <- py[py$region=="West",]

# check demographics across years in the west
table1(~gender3 + age.group + ethnicity3 + grade + munic.area + home.parents3 + home.language + Ma_edu | survey.year, data=pyw, overall=F)
## Warning in table1.formula(~gender3 + age.group + ethnicity3 + grade +
## munic.area + : Terms to the right of '|' in formula 'x' define table columns
## and are expected to be factors with meaningful labels.
2018
(N=4722)
2020
(N=4828)
2022
(N=5122)
gender3
Male 2337 (49.5%) 2360 (48.9%) 2505 (48.9%)
Female 2358 (49.9%) 2374 (49.2%) 2469 (48.2%)
Other 0 (0%) 77 (1.6%) 139 (2.7%)
Missing 27 (0.6%) 17 (0.4%) 9 (0.2%)
age.group
16 3025 (64.1%) 3094 (64.1%) 3109 (60.7%)
15 1439 (30.5%) 1341 (27.8%) 1452 (28.3%)
17-19 240 (5.1%) 378 (7.8%) 560 (10.9%)
Missing 18 (0.4%) 15 (0.3%) 1 (0.0%)
ethnicity3
White Irish 0 (0%) 4077 (84.4%) 3976 (77.6%)
White non-Irish 0 (0%) 408 (8.5%) 525 (10.3%)
Black, Asian or other minority ethnicity 0 (0%) 313 (6.5%) 613 (12.0%)
Not Asked 4722 (100%) 0 (0%) 0 (0%)
Missing 0 (0%) 30 (0.6%) 8 (0.2%)
grade
4th Year 3288 (69.6%) 3546 (73.4%) 3743 (73.1%)
5th Year 1382 (29.3%) 1176 (24.4%) 1253 (24.5%)
LCA/QQI 0 (0%) 83 (1.7%) 57 (1.1%)
6th Year 10 (0.2%) 0 (0%) 61 (1.2%)
Missing 42 (0.9%) 23 (0.5%) 8 (0.2%)
munic.area
North Dublin 0 (0%) 0 (0%) 0 (0%)
Cavan 0 (0%) 0 (0%) 0 (0%)
Monaghan 0 (0%) 0 (0%) 0 (0%)
Mayo 1463 (31.0%) 1479 (30.6%) 1478 (28.9%)
Roscommon 511 (10.8%) 629 (13.0%) 672 (13.1%)
Galway County 1975 (41.8%) 1966 (40.7%) 2097 (40.9%)
Galway City 773 (16.4%) 754 (15.6%) 875 (17.1%)
home.parents3
Two Parents 3749 (79.4%) 3845 (79.6%) 3973 (77.6%)
One Parent 871 (18.4%) 834 (17.3%) 825 (16.1%)
Other 84 (1.8%) 126 (2.6%) 312 (6.1%)
Missing 18 (0.4%) 23 (0.5%) 12 (0.2%)
home.language
English only 3894 (82.5%) 3874 (80.2%) 3669 (71.6%)
English and another 621 (13.2%) 725 (15.0%) 978 (19.1%)
Non-English language only 193 (4.1%) 209 (4.3%) 462 (9.0%)
Missing 14 (0.3%) 20 (0.4%) 13 (0.3%)
Ma_edu
College or University 2415 (51.1%) 2482 (51.4%) 3380 (66.0%)
Secondary School 1188 (25.2%) 1080 (22.4%) 1233 (24.1%)
Primary School 337 (7.1%) 261 (5.4%) 94 (1.8%)
Dont know/Doesnt apply 751 (15.9%) 949 (19.7%) 402 (7.8%)
Missing 31 (0.7%) 56 (1.2%) 13 (0.3%)
# run multivariate models  
pyw$year <- as.factor(pyw$survey.year)
covid.eff <- c()

# model1: bad mental health
covid_mlm1 <- glmer(badMH.f ~ year 
                   + gender3 + age.group  + 
                  + home.parents3 +  Ma_edu
                 + (1|munic.area/school.id),  
                 data=pyw, family=binomial())
summary(covid_mlm1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: badMH.f ~ year + gender3 + age.group + +home.parents3 + Ma_edu +  
##     (1 | munic.area/school.id)
##    Data: pyw
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   12659.4   12765.4   -6315.7   12631.4     14273 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4835 -0.4875 -0.3906 -0.3092  3.7031 
## 
## Random effects:
##  Groups               Name        Variance Std.Dev.
##  school.id:munic.area (Intercept) 0.020575 0.14344 
##  munic.area           (Intercept) 0.007042 0.08392 
## Number of obs: 14287, groups:  school.id:munic.area, 75; munic.area, 4
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -2.26821    0.07689 -29.500  < 2e-16 ***
## year2020                      0.27279    0.05513   4.948 7.48e-07 ***
## year2022                     -0.11750    0.05919  -1.985  0.04712 *  
## gender3Female                 0.74823    0.04980  15.023  < 2e-16 ***
## gender3Other                  2.07569    0.14881  13.948  < 2e-16 ***
## age.group15                  -0.01350    0.05116  -0.264  0.79180    
## age.group17-19                0.17459    0.08364   2.087  0.03684 *  
## home.parents3One Parent       0.55617    0.05493  10.124  < 2e-16 ***
## home.parents3Other            0.11840    0.12382   0.956  0.33897    
## Ma_eduSecondary School        0.17663    0.05473   3.228  0.00125 ** 
## Ma_eduPrimary School          0.32171    0.10079   3.192  0.00141 ** 
## Ma_eduDont know/Doesnt apply  0.06144    0.06914   0.889  0.37415    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) yr2020 yr2022 gndr3F gndr3O ag.g15 a.17-1 hm.3OP hm.p3O
## year2020    -0.402                                                        
## year2022    -0.406  0.518                                                 
## gender3Feml -0.402  0.004  0.007                                          
## gender3Othr -0.080 -0.059 -0.125  0.202                                   
## age.group15 -0.206  0.017  0.012 -0.048  0.002                            
## ag.grp17-19 -0.090 -0.031 -0.082  0.020 -0.003  0.193                     
## hm.prnts3OP -0.159  0.025  0.009 -0.001  0.005 -0.015 -0.035              
## hm.prnts3Ot -0.014 -0.015 -0.093 -0.038 -0.020 -0.084 -0.051  0.108       
## M_dScndrySc -0.249  0.026  0.050  0.002  0.002  0.027 -0.027 -0.031  0.033
## M_dPrmrySch -0.159  0.041  0.134 -0.003 -0.019  0.001 -0.052 -0.056 -0.010
## M_dDnknw/Da -0.234 -0.037  0.117  0.105 -0.009  0.005 -0.034 -0.075 -0.034
##             M_dScS M_dPrS
## year2020                 
## year2022                 
## gender3Feml              
## gender3Othr              
## age.group15              
## ag.grp17-19              
## hm.prnts3OP              
## hm.prnts3Ot              
## M_dScndrySc              
## M_dPrmrySch  0.188       
## M_dDnknw/Da  0.265  0.166
adj.cov <- summary(covid_mlm1)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("badMH_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)


# model3: frequent self-harm 
covid_mlm2 <- glmer(selfharm.x5 ~ year 
                    + gender3 + age.group  + 
                  + home.parents4 +  Ma_edu
                 + (1|munic.area/school.id),  
                 data=pyw, family=binomial())
summary(covid_mlm2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: selfharm.x5 ~ year + gender3 + age.group + +home.parents4 + Ma_edu +  
##     (1 | munic.area/school.id)
##    Data: pyw
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    9093.0    9206.3   -4531.5    9063.0     14099 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6662 -0.3820 -0.2853 -0.2195  5.4269 
## 
## Random effects:
##  Groups               Name        Variance Std.Dev.
##  school.id:munic.area (Intercept) 0.055250 0.23505 
##  munic.area           (Intercept) 0.003843 0.06199 
## Number of obs: 14114, groups:  school.id:munic.area, 75; munic.area, 4
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -3.13974    0.09174 -34.223  < 2e-16 ***
## year2020                      0.15718    0.07106   2.212  0.02696 *  
## year2022                      0.23583    0.07196   3.277  0.00105 ** 
## gender3Female                 1.04103    0.06555  15.882  < 2e-16 ***
## gender3Other                  2.65872    0.15514  17.137  < 2e-16 ***
## age.group15                  -0.05246    0.06343  -0.827  0.40818    
## age.group17-19                0.19923    0.10055   1.981  0.04754 *  
## home.parents41 Parent         0.58047    0.07438   7.804 5.99e-15 ***
## home.parents41 Parent +other  0.88284    0.11044   7.994 1.31e-15 ***
## home.parents4Other            0.38367    0.13697   2.801  0.00509 ** 
## Ma_eduSecondary School        0.01913    0.06875   0.278  0.78081    
## Ma_eduPrimary School          0.48236    0.11796   4.089 4.33e-05 ***
## Ma_eduDont know/Doesnt apply  0.06444    0.08652   0.745  0.45642    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
adj.cov <- summary(covid_mlm2)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("selfharmX5_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)

# model4: suicide attempt
covid_mlm3 <- glmer(suic.att ~ year 
                    + gender3 + age.group  + 
                  + home.parents4 +  Ma_edu
                 + (1|munic.area/school.id),  
                 data=pyw, family=binomial())
summary(covid_mlm3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: suic.att ~ year + gender3 + age.group + +home.parents4 + Ma_edu +  
##     (1 | munic.area/school.id)
##    Data: pyw
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    7708.3    7821.6   -3839.2    7678.3     14057 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1402 -0.3164 -0.2687 -0.2111  5.9121 
## 
## Random effects:
##  Groups               Name        Variance Std.Dev.
##  school.id:munic.area (Intercept) 0.055148 0.23484 
##  munic.area           (Intercept) 0.002273 0.04767 
## Number of obs: 14072, groups:  school.id:munic.area, 75; munic.area, 4
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -3.17296    0.09774 -32.464  < 2e-16 ***
## year2020                      0.13833    0.07767   1.781 0.074923 .  
## year2022                      0.02825    0.08104   0.349 0.727385    
## gender3Female                 0.68287    0.07000   9.756  < 2e-16 ***
## gender3Other                  1.71715    0.17671   9.717  < 2e-16 ***
## age.group15                  -0.10420    0.07201  -1.447 0.147908    
## age.group17-19                0.35216    0.10609   3.319 0.000902 ***
## home.parents41 Parent         0.64218    0.08140   7.890 3.03e-15 ***
## home.parents41 Parent +other  0.83300    0.12183   6.837 8.08e-12 ***
## home.parents4Other            0.66865    0.14295   4.678 2.90e-06 ***
## Ma_eduSecondary School        0.15098    0.07582   1.991 0.046431 *  
## Ma_eduPrimary School          0.48145    0.12890   3.735 0.000188 ***
## Ma_eduDont know/Doesnt apply  0.12006    0.09461   1.269 0.204433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
adj.cov <- summary(covid_mlm3)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("suicatt_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)

covid.eff <- as.data.frame(covid.eff)

covid.eff$OR <- exp(covid.eff$Estimate)
covid.eff$CI.low<-exp(covid.eff$Estimate - (1.96*covid.eff$`Std. Error`))
covid.eff$CI.hi<-exp(covid.eff$Estimate + (1.96*covid.eff$`Std. Error`))
covid.eff <- round(covid.eff, 3)
# plot model-derived probability of each outcome before, during & after COVID (West data only)


predict1 = as.data.frame(effect("year", covid_mlm1,  
                       se=TRUE, confidence.level = .95
                      # typical="response", type="response" # these don't make a difference here it seems
                      ))
predict1$outcome <- "badMH"


predict2 = as.data.frame(effect("year", covid_mlm2,  
                       se=TRUE, confidence.level = .95
                      ))
predict2$outcome <- "self-harmX5"

predict3 = as.data.frame(effect("year", covid_mlm3,  
                       se=TRUE, confidence.level = .95
                      ))
predict3$outcome <- "suic.att"

predict <- rbind(predict1, predict2, predict3)
predict$year <- as.numeric(as.character(predict$year))

ggplot(data=predict, aes(x=year, y=fit, group=outcome, colour=outcome))+
  geom_smooth(method="loess", se=T)+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0.1)+
  labs(title="% of teens reporting various mental health outcomes", 
       x="Year of survey")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,.35),
                     name="Predicted Probability")+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank())