Setup

# load libraries
library(foreign)
library(dplyr)
library(stringr)
library(ggplot2)
library(ggpubr)
library(reshape2)
library(ggpubr)
library(sjmisc)
library(qwraps2)
library(stargazer)
library(rms)
library(lme4)
library(table1)
library(effects)
library(sjlabelled)
library(performance)

# load data
load(paste0(path1,"pyNE_2021_2023.Rdata"))
load(paste0(path2,"pyWest_2018_2020_2022.Rdata"))

# limit variables & merge
py <- rbind(east[,c("survey.year","region","county", "munic.area", "urban", "youthreach", "school.id",
                    "grade", "gender3", "age.group", "ethnicity3",
                    "Ma_edu","home.parents2","home.parents3", "home.parents4", "home.language",
                    "genMH","badMH", "sharm2", "sdqtot",
                    "dep.appetite", "dep.lonely", "dep.cry", "dep.sleep", 
                    "dep.sad" ,"dep.apathy" ,"dep.hopeless",
                           "dep.lim7", "suic.att",
                    "smokelife.bin", "smoke30days.bin", "eciglife.bin", "ecig30days.bin", "alclife.bin", "alc30days.bin", "cannabislife.bin", "cannabis30days.bin")],
            west[,c("survey.year","region","county","munic.area", "urban",  "youthreach", "school.id",
                    "grade", "gender3", "age.group", "ethnicity3",
                    "Ma_edu","home.parents2","home.parents3","home.parents4",  "home.language",
                    "genMH","badMH", "sharm2", "sdqtot",
                    "dep.appetite", "dep.lonely", "dep.cry", "dep.sleep", 
                    "dep.sad" ,"dep.apathy" ,"dep.hopeless",
                           "dep.lim7", "suic.att",
                    "smokelife.bin", "smoke30days.bin", "eciglife.bin", "ecig30days.bin", "alclife.bin", "alc30days.bin", "cannabislife.bin", "cannabis30days.bin")])

# enter urbanicity scores manually ; calculated separately (see "Urbanicity of counties barplot.keynote" and "Census data for PY regions.Rmd")
py$urbanicity <- NA
py$urbanicity[py$munic.area=="North Dublin"] <- 4.46
py$urbanicity[py$munic.area=="Cavan"] <- 2.36
py$urbanicity[py$munic.area=="Monaghan"] <- 2.44
py$urbanicity[py$munic.area=="Mayo"] <- 2.16
py$urbanicity[py$munic.area=="Roscommon"] <- 2.29
py$urbanicity[py$munic.area=="Galway"] <- 2.87
py$urbanicity[py$munic.area=="Galway_City"] <- 6.0

# make ref level 0 meaningful
py$urbanicity <- py$urbanicity-1 # now ranges from 0-5, increasing urbanicity

Sample selection

# limit to 15 years and up (but allow missings) [N 24,073 -> 23,068]
py <- py[is.na(py$age.group) | (py$age.group=="15" | py$age.group=="16" | py$age.group=="17 or older"), ]
py$age.group <- droplevels(py$age.group)

# keep the full dataframe here
py.full <- py

# remove Youthreach students
py.YR <- py[py$youthreach=="Yes",]
py <- py[py$youthreach=="No",]


# remove those in 3rd year (pre-Junior Cert completion)
py <- py[is.na(py$grade) | (
                              #py$grade=="3rd Year" | 
                              py$grade=="4th Year" | py$grade=="5th Year" 
                            | py$grade=="6th Year" | py$grade=="LCA/QQI"),] 
py$grade <- droplevels(py$grade)

Sample N for analysis = 21436

Sample characteristics

Sample characteristics for the full sample
	Overall (N=21436)
gender3
Male	10623 (49.6%)
Female	10243 (47.8%)
Other	480 (2.2%)
Missing	90 (0.4%)
age.group
15	6779 (31.6%)
16	12619 (58.9%)
17-19	1961 (9.1%)
Missing	77 (0.4%)
ethnicity3
White Irish	12909 (60.2%)
White non-Irish	1937 (9.0%)
Black, Asian or other minority ethnicity	1781 (8.3%)
Not Asked	4722 (22.0%)
Missing	87 (0.4%)
grade
4th Year	15450 (72.1%)
5th Year	5352 (25.0%)
LCA/QQI	221 (1.0%)
6th Year	77 (0.4%)
Missing	336 (1.6%)
Family make-up
2 Parents	16784 (78.3%)
1 Parent	2764 (12.9%)
1 Parent +other	980 (4.6%)
Other	811 (3.8%)
Missing	97 (0.5%)
Maternal education level
College or University	12628 (58.9%)
Secondary School	4968 (23.2%)
Primary School	802 (3.7%)
Dont know/Doesnt apply	2891 (13.5%)
Missing	147 (0.7%)
school.type
Mainstream	21436 (100%)
urbanicity
Mean (SD)	2.20 (1.25)
Median [Min, Max]	1.87 [1.16, 5.00]

Table notes:
* 82% of the 17-25 age-group are from mainstream schools, and are therefore likely 19 or younger
* ethnicity was not recorded in the West in 2018 (20% of sample)
* ‘White Irish’ ethnicity included Irish travellers
* ‘College/University’ level education included technical college, diploma and apprenticeship
* “another” language included Irish
* “Other” family type at home included living alone, foster care & exchange student host family

Let’s look at how demographics changed in each survey year. Note that this also reflects regional differences (even years = West; odd years = North East)

## Warning in table1.formula(~gender3 + age.group + ethnicity3 + grade +
## home.parents4 + : Terms to the right of '|' in formula 'x' define table columns
## and are expected to be factors with meaningful labels.

Sample characteristics split by survey year
	2018 (N=4722)	2020 (N=4828)	2021 (N=3160)	2022 (N=5122)	2023 (N=3604)	Overall (N=21436)
gender3
Male	2337 (49.5%)	2360 (48.9%)	1633 (51.7%)	2505 (48.9%)	1788 (49.6%)	10623 (49.6%)
Female	2358 (49.9%)	2374 (49.2%)	1377 (43.6%)	2469 (48.2%)	1665 (46.2%)	10243 (47.8%)
Other	0 (0%)	77 (1.6%)	130 (4.1%)	139 (2.7%)	134 (3.7%)	480 (2.2%)
Missing	27 (0.6%)	17 (0.4%)	20 (0.6%)	9 (0.2%)	17 (0.5%)	90 (0.4%)
age.group
15	1439 (30.5%)	1341 (27.8%)	1014 (32.1%)	1452 (28.3%)	1533 (42.5%)	6779 (31.6%)
16	3025 (64.1%)	3094 (64.1%)	1595 (50.5%)	3109 (60.7%)	1796 (49.8%)	12619 (58.9%)
17-19	240 (5.1%)	378 (7.8%)	531 (16.8%)	560 (10.9%)	252 (7.0%)	1961 (9.1%)
Missing	18 (0.4%)	15 (0.3%)	20 (0.6%)	1 (0.0%)	23 (0.6%)	77 (0.4%)
ethnicity3
White Irish	0 (0%)	4077 (84.4%)	2199 (69.6%)	3976 (77.6%)	2657 (73.7%)	12909 (60.2%)
White non-Irish	0 (0%)	408 (8.5%)	595 (18.8%)	525 (10.3%)	409 (11.3%)	1937 (9.0%)
Black, Asian or other minority ethnicity	0 (0%)	313 (6.5%)	340 (10.8%)	613 (12.0%)	515 (14.3%)	1781 (8.3%)
Not Asked	4722 (100%)	0 (0%)	0 (0%)	0 (0%)	0 (0%)	4722 (22.0%)
Missing	0 (0%)	30 (0.6%)	26 (0.8%)	8 (0.2%)	23 (0.6%)	87 (0.4%)
grade
4th Year	3288 (69.6%)	3546 (73.4%)	2098 (66.4%)	3743 (73.1%)	2775 (77.0%)	15450 (72.1%)
5th Year	1382 (29.3%)	1176 (24.4%)	960 (30.4%)	1253 (24.5%)	581 (16.1%)	5352 (25.0%)
LCA/QQI	0 (0%)	83 (1.7%)	81 (2.6%)	57 (1.1%)	0 (0%)	221 (1.0%)
6th Year	10 (0.2%)	0 (0%)	0 (0%)	61 (1.2%)	6 (0.2%)	77 (0.4%)
Missing	42 (0.9%)	23 (0.5%)	21 (0.7%)	8 (0.2%)	242 (6.7%)	336 (1.6%)
Family make-up
2 Parents	3749 (79.4%)	3845 (79.6%)	2438 (77.2%)	3973 (77.6%)	2779 (77.1%)	16784 (78.3%)
1 Parent	638 (13.5%)	627 (13.0%)	426 (13.5%)	634 (12.4%)	439 (12.2%)	2764 (12.9%)
1 Parent +other	233 (4.9%)	207 (4.3%)	188 (5.9%)	191 (3.7%)	161 (4.5%)	980 (4.6%)
Other	84 (1.8%)	126 (2.6%)	85 (2.7%)	312 (6.1%)	204 (5.7%)	811 (3.8%)
Missing	18 (0.4%)	23 (0.5%)	23 (0.7%)	12 (0.2%)	21 (0.6%)	97 (0.5%)
Maternal education level
College or University	2415 (51.1%)	2482 (51.4%)	1953 (61.8%)	3380 (66.0%)	2398 (66.5%)	12628 (58.9%)
Secondary School	1188 (25.2%)	1080 (22.4%)	767 (24.3%)	1233 (24.1%)	700 (19.4%)	4968 (23.2%)
Primary School	337 (7.1%)	261 (5.4%)	49 (1.6%)	94 (1.8%)	61 (1.7%)	802 (3.7%)
Dont know/Doesnt apply	751 (15.9%)	949 (19.7%)	369 (11.7%)	402 (7.8%)	420 (11.7%)	2891 (13.5%)
Missing	31 (0.7%)	56 (1.2%)	22 (0.7%)	13 (0.3%)	25 (0.7%)	147 (0.7%)
school.type
Mainstream	4722 (100%)	4828 (100%)	3160 (100%)	5122 (100%)	3604 (100%)	21436 (100%)
munic.area
North Dublin	0 (0%)	0 (0%)	1510 (47.8%)	0 (0%)	1858 (51.6%)	3368 (15.7%)
Cavan	0 (0%)	0 (0%)	834 (26.4%)	0 (0%)	822 (22.8%)	1656 (7.7%)
Monaghan	0 (0%)	0 (0%)	816 (25.8%)	0 (0%)	924 (25.6%)	1740 (8.1%)
Mayo	1463 (31.0%)	1479 (30.6%)	0 (0%)	1478 (28.9%)	0 (0%)	4420 (20.6%)
Roscommon	511 (10.8%)	629 (13.0%)	0 (0%)	672 (13.1%)	0 (0%)	1812 (8.5%)
Galway County	1975 (41.8%)	1966 (40.7%)	0 (0%)	2097 (40.9%)	0 (0%)	6038 (28.2%)
Galway City	773 (16.4%)	754 (15.6%)	0 (0%)	875 (17.1%)	0 (0%)	2402 (11.2%)

Check number of schools per region

d <- as.data.frame(table(py$school.id))

# count number of schools per county
schls <- data.frame(region=rep(unique(py$munic.area),2), 
                    school.type = c(rep("Mainstream",7),rep("YouthReach",7)))
for (x in unique(py$munic.area)) {
  for (y in unique(py$school.type)) {
    schls$num[schls$region==x & schls$school.type==y] <- 
                    length(unique(py$school.id[py$munic.area==x & py$school.type==y]))
    
    schls$pupils[schls$region==x & schls$school.type==y] <- 
                    length(py$school.id[py$munic.area==x & py$school.type==y])
  }
}

print(schls[schls$school.type=="Mainstream",])

##          region school.type num pupils
## 1         Cavan  Mainstream  11   1656
## 2      Monaghan  Mainstream  12   1740
## 3  North Dublin  Mainstream  16   3368
## 4     Roscommon  Mainstream   9   1812
## 5          Mayo  Mainstream  27   4420
## 6 Galway County  Mainstream  29   6038
## 7   Galway City  Mainstream  10   2402

Results

Split into 4 sections:
Q1. 5-year prevalence estimates for secondary school students in Ireland (N~20,000).
Q2. Time trends: how have prevalence estimates changed throughout the 5 years (descriptive only).
Q3. Which sub-groups in the adolescent population have the highest probability of reporting these outcomes?
Q4. How did prevalence estimates between COVID and non-COVID survey periods?

(Q1) Mental health of the sample

county.cols <- c( "deepskyblue1","dodgerblue4", "cornflowerblue", 
                  "tomato","firebrick4", "violetred")

munic.cols <- c( "deepskyblue1",  "dodgerblue4","cornflowerblue",
                 "goldenrod","firebrick4",  "violetred", "tomato" )

gender.colors <- c("steelblue3", "rosybrown3", "wheat3")


dvlist_nice <- c("Poor mental health", "Repetitive self-harm", "Suicide attempt")

# Descriptives table

py$selfharm.x5 <- factor(py$selfharm.x5, levels=c("No", "Yes"))
py$suic.att <- relevel(py$suic.att, "No")


table1(~ badMH.f +  selfharm.x5 + suic.att | covid.period, 
       data=py, c(left = "Overall"),
       caption = "Overall = 5-year National average")

Overall = 5-year National average
	Overall (N=21436)	Non-COVID years (N=13448)	COVID-years (N=7988)
badMH.f
Very good/Good/Okay	16971 (79.2%)	10922 (81.2%)	6049 (75.7%)
Bad/Very bad	4131 (19.3%)	2273 (16.9%)	1858 (23.3%)
Missing	334 (1.6%)	253 (1.9%)	81 (1.0%)
selfharm.x5
No	18159 (84.7%)	11330 (84.3%)	6829 (85.5%)
Yes	2490 (11.6%)	1559 (11.6%)	931 (11.7%)
Missing	787 (3.7%)	559 (4.2%)	228 (2.9%)
suic.att
No	18863 (88.0%)	11869 (88.3%)	6994 (87.6%)
Yes	1747 (8.1%)	1018 (7.6%)	729 (9.1%)
Missing	826 (3.9%)	561 (4.2%)	265 (3.3%)

# model-derived AVERAGE prevalence of each outcome

library(gmodels)

dvlist <- c("badMH", "selfharm.x5", "suic.att")

py$suic.att <- relevel(py$suic.att, "No")
py$suic.att <- relevel(py$suic.att, "No")

mod <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ 1 + (1|munic.area/school.id),
                list(i = as.name(x)))), 
              data = py,
              family=binomial() )} )

# (a) what's the average rate of each outcome?

mod.avgs <- data.frame(outcome = dvlist, prob = NA)
mod.avgs.county <- c()
mod.avgs.school <- c()
for (DV in 1:3) {
  
  mod.avgs$prob[DV]= mean( predict(mod[[DV]], type="response") ) 
  mod.avgs$ci.lo[DV]=quantile(predict(mod[[DV]], type="response"), 0.025)
  mod.avgs$ci.hi[DV]=quantile(predict(mod[[DV]], type="response"), 0.975)
  
  mod.avgs$N_analysis[DV] = nrow(mod[[DV]]@frame)

  # predict probability of outcome for each school
  newdata <- unique(py[,c("munic.area", "school.id")])
  newdata$pred.resp <- predict(mod[[DV]], newdata=newdata, type="response",
                re.form=~(1|munic.area/school.id))
  
  mod.avgs.school <- rbind(mod.avgs.school,
                           cbind(newdata, dvlist[DV]))
  
  # average those within each county to get predicted prob on the county level
  mod.avgs.county <- rbind(mod.avgs.county, 
                           cbind(mod.avgs.school[mod.avgs.school$`dvlist[DV]`==dvlist[DV],]%>%
                                   group_by(munic.area)%>%
                                   summarise(mean=mean(pred.resp), 
                                             ci_lo=ci(pred.resp)[2], 
                                             ci_hi=ci(pred.resp)[3]),
                                 dvlist[DV]) )
  

}
mod.avgs[,c(2:4)] <- round(mod.avgs[,c(2:4)],3)
print(mod.avgs)

##       outcome  prob ci.lo ci.hi N_analysis
## 1       badMH 0.196 0.119 0.296      21102
## 2 selfharm.x5 0.120 0.053 0.184      20649
## 3    suic.att 0.084 0.048 0.130      20610

# plot model-derived mean prevalence for each outcome with variation by area and school

lims =  c(0.025,.325)

d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="badMH",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="badMH",]

g1<-ggplot()+
  geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="badMH"]), linetype="dashed", colour="blue")+
  geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
                 shape=1, alpha=0.8, colour="grey")+
  geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
                colour="grey30", width=0.3)+
  geom_point(data=d_co, aes(x=reorder(munic.area,-mean), y=mean),
                colour="grey30")+
   geom_text(stat="identity", aes(label= paste0("Average = ",round(mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])*100, 
                                                      digits=1), "%"), 
                                  x=7, y=0.32, hjust=1),
             size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,
                     name="Probability")+
  labs(title ="Poor mental health", x="")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=90))


d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="selfharm.x5",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="selfharm.x5",]
g2<-ggplot()+
  geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"]), linetype="dashed", colour="blue")+
  geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
                 shape=1, alpha=0.8, colour="grey")+
  geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
                colour="grey30", width=0.3)+
  geom_point(data=d_co, aes(x=reorder(munic.area,-mean), y=mean),
                colour="grey30")+
  geom_text(stat="identity", aes(label= paste0("Average = ",
                                               round(mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])*100, 
                                                      digits=1), "%"), 
                                  x=7, y=0.32, hjust=1),
             size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,
                     name="Probability")+
  labs(title ="Frequent self-harm", x="")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=90))


d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="suic.att",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="suic.att",]
g3<-ggplot()+
  geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"]), linetype="dashed", colour="blue")+
  geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
                 shape=1, alpha=0.8, colour="grey")+
  geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
                colour="grey30", width=0.3)+
  geom_point(data=d_co,aes(x=reorder(munic.area,-mean), y=mean),
                colour="grey30")+
  geom_text(stat="identity", aes(label= paste0("Average = ",
                                               round(mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])*100, 
                                                      digits=1), "%"), 
                                  x=7, y=0.32, hjust=1),
             size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,
                     name=" ")+
  labs(title ="Suicide Attempt", x="")+
  theme_bw()+
  theme(axis.text.x=element_text(angle=90))

ggarrange(g1, g2, g3, nrow=1, ncol=3)

# plot model-derived mean prevalence for each outcome with variation by school only 

lims =  c(0.025,.325)

d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="badMH",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])

g1<-ggplot()+
  geom_rect(aes(xmin=-Inf, xmax=Inf,
                ymin= mod.avgs$ci.lo[mod.avgs$outcome=="badMH"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="badMH"]), 
            fill="black", alpha=0.1)+
  geom_hline(yintercept=subj.ave, colour="blue")+
  geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
                 shape=1, alpha=0.8, colour="black")+
   geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])*100, digits=1), "%"), 
                                   x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims, name="Probability")+
  labs(title ="Poor mental health", x="")+
  theme_bw()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid.major.x=element_blank())


d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="selfharm.x5",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])

g2<-ggplot()+
  geom_rect(aes(xmin=-Inf, xmax=Inf,
                ymin= mod.avgs$ci.lo[mod.avgs$outcome=="selfharm.x5"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="selfharm.x5"]), 
            fill="black", alpha=0.1)+
  geom_hline(yintercept=subj.ave, colour="blue")+
  geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
                 shape=1, alpha=0.8, colour="black")+
   geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])*100, digits=1), "%"), 
                                   x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims, name=" ")+
  labs(title ="Repetitive Self-Harm", x="")+
  theme_bw()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid.major.x=element_blank())


d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="suic.att",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])

g3<-ggplot()+
  geom_rect(aes(xmin=-Inf, xmax=Inf,
                ymin= mod.avgs$ci.lo[mod.avgs$outcome=="suic.att"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="suic.att"]), 
            fill="black", alpha=0.1)+
  geom_hline(yintercept=subj.ave, colour="blue")+
  geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
                 shape=1, alpha=0.8, colour="black")+
  geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])*100, digits=1), "%"), 
                                   x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims, name=" ")+
  labs(title ="Suicide Attempt", x="")+
  theme_bw()+
  theme(axis.text.x=element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid.major.x=element_blank())

ggarrange(g1, g2, g3, nrow=1, ncol=3)

(Q2) Time trends 2018-2023

lims = c(.05,.325)

# badmh
d<-py[ (!is.na(py$badMH)) ,] %>%
  group_by(survey.year,munic.area, badMH) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n),
         totn=sum(n))
d <- d[d$badMH==1,]

for (x in 1:length(d$survey.year)){
d$ci.low[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][1]
d$ci.hi[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][2]
}

g1<-ggplot(data=d)+
  geom_rect(aes(xmin=2019.75, xmax=2021.25,
                ymin= -Inf, ymax= Inf), fill="grey92", alpha=0.1)+
  geom_ribbon(aes(x=survey.year, ymin=ci.low, ymax=ci.hi, fill=munic.area), alpha=0.1)+
  geom_point(aes(x=survey.year, y=freq, colour=munic.area),size=1.7)+
  geom_line(aes(x=survey.year, y=freq, colour=munic.area), size=1, alpha=0.7)+
  labs(title="Poor mental health", 
       x="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     breaks=c(.05, .10, .15, .20, .25, .30),
                     minor_breaks = NULL,
                     limits = lims,
                     name="")+
  scale_colour_manual(values=munic.cols)+
  scale_fill_manual(values=munic.cols)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(angle=90))

# frequent self-harm
d<-py[ (!is.na(py$selfharm.x5)) ,] %>%
  group_by(survey.year,munic.area, selfharm.x5) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n),
         totn=sum(n))
d <- d[d$selfharm.x5=="Yes",]
for (x in 1:length(d$survey.year)){
d$ci.low[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][1]
d$ci.hi[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][2]
}
g2<-ggplot(data=d)+
  #geom_smooth(aes(x=survey.year, y=freq), method="loess", colour="grey85", alpha=0.2)+
  geom_rect(aes(xmin=2019.75, xmax=2021.25,
                ymin= -Inf, ymax= Inf), fill="grey92", alpha=0.1)+
  geom_ribbon(aes(x=survey.year, ymin=ci.low, ymax=ci.hi, fill=munic.area), alpha=0.1)+
  geom_point(aes(x=survey.year, y=freq, colour=munic.area),size=1.7)+
  geom_line(aes(x=survey.year, y=freq, colour=munic.area), size=1, alpha=0.7)+
  labs(title="Repetitive self-harm", 
       x="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     breaks=c(.05, .10, .15, .20, .25, .30),
                     minor_breaks = NULL,
                     limits = lims,name="")+
  scale_colour_manual(values=munic.cols)+
  scale_fill_manual(values=munic.cols)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(angle=90))


# suicide attempt
d<-py[ (!is.na(py$suic.att)) ,] %>%
  group_by(survey.year,munic.area, suic.att) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n),
         totn=sum(n))
d <- d[d$suic.att=="Yes",]
for (x in 1:length(d$survey.year)){
d$ci.low[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][1]
d$ci.hi[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][2]
}

g3<-ggplot(data=d)+
  #geom_smooth(aes(x=survey.year, y=freq), method="loess", colour="grey85", alpha=0.2)+
  geom_rect(aes(xmin=2019.75, xmax=2021.25,
                ymin= -Inf, ymax= Inf), fill="grey92", alpha=0.1)+
  geom_ribbon(aes(x=survey.year, ymin=ci.low, ymax=ci.hi, fill=munic.area), alpha=0.1)+
  geom_point(aes(x=survey.year, y=freq, colour=munic.area),size=1.7)+
  geom_line(aes(x=survey.year, y=freq, colour=munic.area), size=1, alpha=0.7)+
  labs(title="Suicide attempt", 
       x="")+
  scale_y_continuous(labels = scales::percent_format(accuracy=5), 
                     breaks=c(.05, .10, .15, .20, .25, .30),
                     minor_breaks = NULL,
                     limits = lims, name="")+
  scale_colour_manual(values=munic.cols)+
  scale_fill_manual(values=munic.cols)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(angle=90))


g<-ggarrange(g1,g2,g3, ncol=3,nrow=1, common.legend = T, legend="right")
annotate_figure(g, top=text_grob("Change in rates of mental health outcomes over time (secondary school students only)\n"))

Closer look at trends by gender

lims = c(.05,.80)

# badmh
d<-py[ (!is.na(py$badMH)) ,] %>%
  group_by(survey.year,gender3, badMH) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n))
d <- d[d$badMH==1 & !is.na(d$gender3),]

g1<-ggplot(data=d)+
  #geom_smooth(aes(x=survey.year, y=freq), method="loess", colour="grey85", alpha=0.2)+
  geom_point(aes(x=survey.year, y=freq, colour=gender3),size=1.7)+
  geom_line(aes(x=survey.year, y=freq, colour=gender3), size=1, alpha=0.7)+
  labs(title="Poor mental health", 
       x="Year of survey")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,
                     name="")+
  scale_colour_manual(values=gender.colors)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(angle=90))

# frequent self-harm
d<-py[ (!is.na(py$selfharm.x5)) ,] %>%
  group_by(survey.year,gender3, selfharm.x5) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n))
d <- d[d$selfharm.x5=="Yes" & !is.na(d$gender3),]

g2<-ggplot(data=d)+
  geom_point(aes(x=survey.year, y=freq, colour=gender3),size=1.7)+
  geom_line(aes(x=survey.year, y=freq, colour=gender3), size=1, alpha=0.7)+
  labs(title="Frequent self-harm", 
       x="Year of survey")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,name="")+
  scale_colour_manual(values=gender.colors)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(angle=90))


# suicide attempt
d<-py[ (!is.na(py$suic.att)) ,] %>%
  group_by(survey.year,gender3, suic.att) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n))
d <- d[d$suic.att=="Yes" & !is.na(d$gender3),]

g3<-ggplot(data=d)+
  geom_point(aes(x=survey.year, y=freq, colour=gender3),size=1.7)+
  geom_line(aes(x=survey.year, y=freq, colour=gender3), size=1, alpha=0.7)+
  labs(title="Lifetime suicide attempt", 
       x="Year of survey")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,name="")+
  scale_colour_manual(values=gender.colors)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(angle=90))


ggarrange(g1,g2,g3, ncol=3,nrow=1, common.legend = T, legend="right")

# the above plot conflates survey year with region (the peaks may reflect the region that only took part that year)
# so, merge certain years together so that each point reflects all regions. 

py$survey.period <- factor(py$survey.year, levels=c(2018, 2020, 2021, 2022, 2023), 
                           labels=c("2018", "2020-21","2020-21", "2022-23", "2022-23"))

lims = c(.05,.70)

# badmh
d<-py[ (!is.na(py$badMH) & py$survey.year>2018) ,] %>%
  group_by(survey.period,gender3, badMH) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n))
d <- d[d$badMH==1 & !is.na(d$gender3),]

g1<-ggplot(data=d)+
  #geom_smooth(aes(x=survey.period, y=freq), method="loess", colour="grey85", alpha=0.2)+
  geom_point(aes(x=  survey.period, y=freq, group=gender3,colour=gender3),size=1.7)+
  
  geom_line(aes(x=survey.period, y=freq, group=gender3,colour=gender3), size=1, alpha=0.7)+
  labs(title="Poor mental health", 
       x="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,
                     name="")+
  scale_colour_manual(values=gender.colors)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(size=10))

# frequent self-harm
d<-py[ (!is.na(py$selfharm.x5))& py$survey.year>2018 ,] %>%
  group_by(survey.period,gender3, selfharm.x5) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n))
d <- d[d$selfharm.x5=="Yes" & !is.na(d$gender3),]

g2<-ggplot(data=d)+
  #geom_smooth(aes(x=survey.period, y=freq), method="loess", colour="grey85", alpha=0.2)+
  geom_point(aes(x=survey.period, y=freq, group=gender3,colour=gender3),size=1.7)+
  geom_line(aes(x=survey.period, y=freq, group=gender3,colour=gender3), size=1, alpha=0.7)+
  labs(title="Repetitive self-harm", 
       x="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,name="")+
  scale_colour_manual(values=gender.colors)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(size=10))


# suicide attempt
d<-py[ (!is.na(py$suic.att))& py$survey.year>2018 ,] %>%
  group_by(survey.period,gender3, suic.att) %>%
  summarise(n=n()) %>%
  mutate(freq=n/sum(n))
d <- d[d$suic.att=="Yes" & !is.na(d$gender3),]


g3<-ggplot(data=d)+
  #geom_smooth(aes(x=survey.period, y=freq), method="loess", colour="grey85", alpha=0.2)+
  geom_point(aes(x=survey.period, y=freq, group=gender3, colour=gender3),size=1.7)+
  geom_line(aes(x=survey.period, y=freq,group=gender3, colour=gender3), size=1, alpha=0.7)+
  labs(title="Lifetime suicide attempt", 
       x="")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), 
                     limits = lims,name="")+
  scale_colour_manual(values=gender.colors)+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank(),
        axis.text.x = element_text(size=10))


ggarrange(g1,g2,g3, ncol=3,nrow=1, common.legend = T, legend="right")

(Q3) Group differences in mental health

# set reference levels
py$age.group <- relevel(py$age.group, "16")
py$region <- relevel(py$region, "West")
py$selfharm.x5 <- factor(py$selfharm.x5, levels=c("No", "Yes"))
py$suic.att <- relevel(py$suic.att, "No")

# create a linear version of time with baseline (0) at 2018
py$linear.time <- py$survey.year-2018

py$suic.att <- relevel(py$suic.att, "No")

dvlist <- c("badMH", "selfharm.x5", "suic.att")


py$urbanicity.z <- scale(py$urbanicity,scale=F)

groupdiffs <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~  age.group + gender3 + urbanicity.z +
                         + home.parents3   + Ma_edu + covid.period
                         + (1|munic.area/school.id),
                list(i = as.name(x)))), 
              data = py,
              family=binomial() )} )


# Extract Odds ratios for a table
group.ORs <- c()
for (DV in 1:3){
res1out <- as.data.frame(summary(groupdiffs[[DV]])[["coefficients"]])
res1out$var <- rownames(res1out)
res1out$outcome <- dvlist[DV]
res1out$OR <- exp(res1out$Estimate)
res1out$CI.low<-exp(res1out$Estimate - (1.96*res1out$`Std. Error`))
res1out$CI.hi<-exp(res1out$Estimate + (1.96*res1out$`Std. Error`))
rownames(res1out) <- c()
res1out <- move_columns(res1out, c(outcome, var), .before="Estimate")
res1out[,3:length(res1out)] <- round(res1out[,3:length(res1out)], 3)
group.ORs <- rbind(group.ORs, res1out)
}

group.ORs.lim <- group.ORs[,c("outcome", "var", "OR", "CI.low", "CI.hi", "Pr(>|z|)")]
group.ORs.lim$str <- paste0(round(group.ORs.lim$OR,2),  " (", 
                            round(group.ORs.lim$CI.low,2), "-" , 
                            round(group.ORs.lim$CI.hi,2), ")")

print(group.ORs.lim[,c(1:6)])

##        outcome                          var     OR CI.low  CI.hi Pr(>|z|)
## 1        badMH                  (Intercept)  0.108  0.093  0.126    0.000
## 2        badMH                  age.group15  1.026  0.949  1.110    0.520
## 3        badMH               age.group17-19  1.085  0.956  1.231    0.206
## 4        badMH                gender3Female  2.155  1.993  2.331    0.000
## 5        badMH                 gender3Other  7.409  6.080  9.028    0.000
## 6        badMH                 urbanicity.z  1.071  0.967  1.187    0.187
## 7        badMH      home.parents3One Parent  1.703  1.562  1.857    0.000
## 8        badMH           home.parents3Other  1.036  0.856  1.255    0.714
## 9        badMH       Ma_eduSecondary School  1.206  1.107  1.314    0.000
## 10       badMH         Ma_eduPrimary School  1.577  1.325  1.877    0.000
## 11       badMH Ma_eduDont know/Doesnt apply  1.083  0.970  1.208    0.157
## 12       badMH      covid.periodCOVID-years  1.440  1.338  1.549    0.000
## 13 selfharm.x5                  (Intercept)  0.054  0.045  0.064    0.000
## 14 selfharm.x5                  age.group15  1.005  0.912  1.107    0.925
## 15 selfharm.x5               age.group17-19  1.174  1.008  1.368    0.040
## 16 selfharm.x5                gender3Female  3.025  2.730  3.352    0.000
## 17 selfharm.x5                 gender3Other 12.812 10.387 15.804    0.000
## 18 selfharm.x5                 urbanicity.z  1.090  0.975  1.219    0.131
## 19 selfharm.x5      home.parents3One Parent  1.844  1.662  2.044    0.000
## 20 selfharm.x5           home.parents3Other  1.369  1.103  1.700    0.004
## 21 selfharm.x5       Ma_eduSecondary School  1.082  0.973  1.203    0.146
## 22 selfharm.x5         Ma_eduPrimary School  1.664  1.359  2.038    0.000
## 23 selfharm.x5 Ma_eduDont know/Doesnt apply  0.970  0.843  1.118    0.677
## 24 selfharm.x5      covid.periodCOVID-years  0.946  0.862  1.037    0.236
## 25    suic.att                  (Intercept)  0.041  0.036  0.047    0.000
## 26    suic.att                  age.group15  0.919  0.821  1.029    0.143
## 27    suic.att               age.group17-19  1.287  1.088  1.522    0.003
## 28    suic.att                gender3Female  1.967  1.756  2.202    0.000
## 29    suic.att                 gender3Other  5.387  4.252  6.826    0.000
## 30    suic.att                 urbanicity.z  1.088  1.023  1.158    0.007
## 31    suic.att      home.parents3One Parent  1.964  1.750  2.205    0.000
## 32    suic.att           home.parents3Other  1.767  1.398  2.233    0.000
## 33    suic.att       Ma_eduSecondary School  1.278  1.134  1.441    0.000
## 34    suic.att         Ma_eduPrimary School  1.771  1.416  2.215    0.000
## 35    suic.att Ma_eduDont know/Doesnt apply  1.158  0.993  1.351    0.062
## 36    suic.att      covid.periodCOVID-years  1.190  1.072  1.320    0.001

# plot predicted probabilities of each outcome for each group
upper=0.6

for (DV in c(1:3)) {

mass.prob <- c()

var.n =0
for (x in c("gender3", "age.group", "urbanicity.z", "Ma_edu", "home.parents3")) { 
# not included in plots: covid.period, or mat ed not known, or intermediary urbanicity
  
var.n = var.n+1

# select just the highest & lowest levels of urbanicity (1 and 5) for visualisation
if (x=="urbanicity.z"){
pred.prob <- data.frame(effect(x, groupdiffs[[DV]],
                       se=TRUE, confidence.level = .95, type="response", 
                       xlevels= list(urbanicity.z = c(-1, 2.7))))  

} else  {
  pred.prob <- data.frame(effect(x, groupdiffs[[DV]],
                       se=TRUE, confidence.level = .95, type="response")) 
}

pred.prob$var <- x # variable name e.g. gender
names(pred.prob)[1] <- "group"
pred.prob$group <- as.character(pred.prob$group)
pred.prob <- move_columns(pred.prob, "var", .before=1)
pred.prob$group.full <- paste(pred.prob$var, pred.prob$group, sep="-") # combine var & level strings

mass.prob <- rbind(mass.prob, pred.prob)

}

mass.prob$order <- seq(1, length(mass.prob$group))

mass.prob$group[mass.prob$group=="Other" & mass.prob$var=="gender3"] <- "TGD"

mass.prob$group <- factor(mass.prob$group, levels=c("Male" , "Female" , "TGD",
                                                    "15","16" ,"17-19",
                                                    "College or University","Secondary School","Primary School","Dont know/Doesnt apply",
                                                    "Two Parents", "One Parent" , "Other",
                                                    "-1" ,"2.7"),
                          labels=c("Male" , "Female" , "Trans/gender-diverse",
                                                    "Aged 15","Aged 16" ,"Aged 17-19",
                                                    "Mother 3rd level edu","Mother secondary edu","Mother primary edu","Unknown mother edu",
                                                    "Two Parents", "One Parent" , "Other family make-up",
                                                     "Most rural" ,"Most urban"))

mass.prob$var <- factor(mass.prob$var,  levels=c("gender3" , "age.group" ,"Ma_edu", "home.parents3","urbanicity.z"), 
                        labels=c("Gender identity" , "Age" ,"Maternal education", "Family make-up","Urbanicity"))


# hide "unknown maternal education" from graph for simplicity
mass.prob <- mass.prob[!str_detect(mass.prob$group, "Unknown mother edu"),]


g<-ggplot(mass.prob, aes(x=group, y=fit, ymin=lower, ymax=upper, group=var, fill=var))+
  geom_hline(yintercept=mod.avgs$prob[DV], linetype="dashed", size=0.7)+
  geom_col(alpha=0.6)+
  labs(title=dvlist_nice[DV])+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
                     breaks=seq(0,upper,0.10), minor_breaks = NULL,
                      name="")+
  geom_text( stat="identity", aes(label= paste0(round(fit*100, digits=0), "%"),y=0.02, colour=var),
              size=3, fontface="bold")+ #vjust=-1.2,
  geom_text(aes(label= paste0("- - Sample Average (",round(mod.avgs$prob[DV]*100, digits=1), "%)"),
              x=14.1, y=0.52), hjust=1.0, vjust=0, size=4.5, colour="black")+
  geom_errorbar(aes(colour=var),width=0)+
  theme_bw()+
  scale_fill_manual(values=c( "steelblue4", "lightblue3",  "darkseagreen3", "darkseagreen4","navajowhite3"))+ #"darkorange4"
  scale_colour_manual(values=c( "steelblue4", "lightblue3",  "darkseagreen3", "darkseagreen4", "navajowhite3"))+
  theme(axis.text.x =element_text(angle=30, hjust=1, vjust=1),
        axis.title.y=element_blank(),
        axis.title.x=element_blank(),
        legend.title=element_blank(),
        plot.title  = element_text(face="bold", size=14, hjust=0.5))
print(g)

}

Secondary school students Vs early school leavers

Note switch of dataframe from py to py.full

# descriptive table showing differences in demographics (and the need to control for them)
table1(~ gender3 + age.group + ethnicity3 + grade   
       + home.parents4  + Ma_edu # + home.language 
       + school.type + munic.area | school.type,
       data=py.full)

	Mainstream (N=22474)	YouthReach (N=600)	Overall (N=23074)
gender3
Male	11148 (49.6%)	289 (48.2%)	11437 (49.6%)
Female	10705 (47.6%)	289 (48.2%)	10994 (47.6%)
Other	531 (2.4%)	20 (3.3%)	551 (2.4%)
Missing	90 (0.4%)	2 (0.3%)	92 (0.4%)
age.group
15	7778 (34.6%)	30 (5.0%)	7808 (33.8%)
16	12647 (56.3%)	141 (23.5%)	12788 (55.4%)
17 or older	1972 (8.8%)	416 (69.3%)	2388 (10.3%)
Missing	77 (0.3%)	13 (2.2%)	90 (0.4%)
ethnicity3
White Irish	13598 (60.5%)	349 (58.2%)	13947 (60.4%)
White non-Irish	2097 (9.3%)	64 (10.7%)	2161 (9.4%)
Black, Asian or other minority ethnicity	1966 (8.7%)	66 (11.0%)	2032 (8.8%)
Not Asked	4726 (21.0%)	117 (19.5%)	4843 (21.0%)
Missing	87 (0.4%)	4 (0.7%)	91 (0.4%)
grade
2nd Year	7 (0.0%)	1 (0.2%)	8 (0.0%)
3rd Year	1031 (4.6%)	11 (1.8%)	1042 (4.5%)
4th Year	15450 (68.7%)	29 (4.8%)	15479 (67.1%)
5th Year	5352 (23.8%)	67 (11.2%)	5419 (23.5%)
LCA/QQI	221 (1.0%)	337 (56.2%)	558 (2.4%)
6th Year	77 (0.3%)	63 (10.5%)	140 (0.6%)
Missing	336 (1.5%)	92 (15.3%)	428 (1.9%)
home.parents4
2 Parents	17594 (78.3%)	306 (51.0%)	17900 (77.6%)
1 Parent	2915 (13.0%)	174 (29.0%)	3089 (13.4%)
1 Parent +other	1040 (4.6%)	44 (7.3%)	1084 (4.7%)
Other	826 (3.7%)	68 (11.3%)	894 (3.9%)
Missing	99 (0.4%)	8 (1.3%)	107 (0.5%)
Ma_edu
College or University	13296 (59.2%)	123 (20.5%)	13419 (58.2%)
Secondary School	5167 (23.0%)	175 (29.2%)	5342 (23.2%)
Primary School	825 (3.7%)	92 (15.3%)	917 (4.0%)
Dont know/Doesnt apply	3037 (13.5%)	198 (33.0%)	3235 (14.0%)
Missing	149 (0.7%)	12 (2.0%)	161 (0.7%)
school.type
Mainstream	22474 (100%)	0 (0%)	22474 (97.4%)
YouthReach	0 (0%)	600 (100%)	600 (2.6%)
munic.area
North Dublin	4402 (19.6%)	18 (3.0%)	4420 (19.2%)
Cavan	1656 (7.4%)	99 (16.5%)	1755 (7.6%)
Monaghan	1740 (7.7%)	52 (8.7%)	1792 (7.8%)
Mayo	4420 (19.7%)	125 (20.8%)	4545 (19.7%)
Roscommon	1812 (8.1%)	115 (19.2%)	1927 (8.4%)
Galway	6039 (26.9%)	133 (22.2%)	6172 (26.7%)
Galway_City	2405 (10.7%)	58 (9.7%)	2463 (10.7%)

# adjusted comparison of secondary school and early school leavers

py.full$suic.att <- relevel(py.full$suic.att, "No")

dvlist <- c("badMH", "selfharm.x5", "suic.att")

py.full$urbanicity.z <- scale(py.full$urbanicity,scale=F)

fullsamp <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ school.type + age.group + gender3 + urbanicity.z +
                         + home.parents3   + Ma_edu + covid.period
                         + (1|munic.area/school.id),
                list(i = as.name(x)))), 
              data = py.full,
              family=binomial() )} )


# plot predicted probabilities

upper=0.6

mass.prob <- c()
for (DV in c(1:3)) {
    
    pred.prob <- data.frame(effect("school.type", fullsamp[[DV]],
                           se=TRUE, confidence.level = .95, type="response")) 
    
    pred.prob$var <- dvlist_nice[DV] # variable name e.g. gender
    mass.prob <- rbind(mass.prob, pred.prob)

g<- ggplot(pred.prob, aes(x=school.type, y=fit, ymin=lower, ymax=upper, fill=school.type))+
   geom_col(fill="grey40", alpha=0.4)+
   labs(title=dvlist_nice[DV])+
   scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
                      breaks=seq(0,upper,0.10), minor_breaks = NULL,
                       name="")+
   geom_text( stat="identity", aes(label= paste0(round(fit*100, digits=0), "%"),y=0.02,),
               size=3, colour="grey40", fontface="bold")+ #vjust=-1.2,
   geom_errorbar(width=0, colour="grey40")+
   theme_bw()+
   theme(axis.text.x =element_text(angle=30, hjust=1, vjust=1),
         axis.title.y=element_blank(),
         axis.title.x=element_blank(),
        plot.title  = element_text(face="bold", size=14, hjust=0.5))
print(g)
}

# Extract Odds ratios for a table
group.ORs <- c()
for (DV in 1:3){
res1out <- as.data.frame(summary(fullsamp[[DV]])[["coefficients"]])
res1out$var <- rownames(res1out)
res1out$outcome <- dvlist[DV]
res1out$OR <- exp(res1out$Estimate)
res1out$CI.low<-exp(res1out$Estimate - (1.96*res1out$`Std. Error`))
res1out$CI.hi<-exp(res1out$Estimate + (1.96*res1out$`Std. Error`))
rownames(res1out) <- c()
res1out <- move_columns(res1out, c(outcome, var), .before="Estimate")
res1out[,3:length(res1out)] <- round(res1out[,3:length(res1out)], 3)
group.ORs <- rbind(group.ORs, res1out)
}

print(group.ORs[,c("outcome", "var", "OR", "CI.low", "CI.hi", "Pr(>|z|)")])

##        outcome                          var     OR CI.low  CI.hi Pr(>|z|)
## 1        badMH                  (Intercept)  0.112  0.095  0.133    0.000
## 2        badMH        school.typeYouthReach  1.557  1.210  2.004    0.001
## 3        badMH                  age.group16  0.973  0.902  1.049    0.470
## 4        badMH         age.group17 or older  1.051  0.927  1.192    0.434
## 5        badMH                gender3Female  2.198  2.041  2.367    0.000
## 6        badMH                 gender3Other  7.359  6.114  8.859    0.000
## 7        badMH                 urbanicity.z  1.077  0.964  1.204    0.188
## 8        badMH      home.parents3One Parent  1.718  1.583  1.864    0.000
## 9        badMH           home.parents3Other  1.103  0.924  1.318    0.277
## 10       badMH       Ma_eduSecondary School  1.221  1.125  1.325    0.000
## 11       badMH         Ma_eduPrimary School  1.509  1.280  1.779    0.000
## 12       badMH Ma_eduDont know/Doesnt apply  1.087  0.980  1.206    0.115
## 13       badMH      covid.periodCOVID-years  1.388  1.294  1.488    0.000
## 14 selfharm.x5                  (Intercept)  0.056  0.047  0.067    0.000
## 15 selfharm.x5        school.typeYouthReach  1.909  1.402  2.600    0.000
## 16 selfharm.x5                  age.group16  1.000  0.912  1.098    0.994
## 17 selfharm.x5         age.group17 or older  1.180  1.014  1.373    0.032
## 18 selfharm.x5                gender3Female  2.976  2.702  3.277    0.000
## 19 selfharm.x5                 gender3Other 12.155  9.983 14.799    0.000
## 20 selfharm.x5                 urbanicity.z  1.097  0.984  1.223    0.095
## 21 selfharm.x5      home.parents3One Parent  1.829  1.659  2.016    0.000
## 22 selfharm.x5           home.parents3Other  1.467  1.202  1.791    0.000
## 23 selfharm.x5       Ma_eduSecondary School  1.074  0.971  1.189    0.167
## 24 selfharm.x5         Ma_eduPrimary School  1.536  1.269  1.859    0.000
## 25 selfharm.x5 Ma_eduDont know/Doesnt apply  0.929  0.813  1.061    0.276
## 26 selfharm.x5      covid.periodCOVID-years  0.947  0.868  1.033    0.217
## 27    suic.att                  (Intercept)  0.038  0.033  0.044    0.000
## 28    suic.att        school.typeYouthReach  2.825  2.047  3.900    0.000
## 29    suic.att                  age.group16  1.097  0.985  1.222    0.092
## 30    suic.att         age.group17 or older  1.412  1.195  1.668    0.000
## 31    suic.att                gender3Female  2.040  1.834  2.270    0.000
## 32    suic.att                 gender3Other  5.747  4.617  7.154    0.000
## 33    suic.att                 urbanicity.z  1.103  1.039  1.170    0.001
## 34    suic.att      home.parents3One Parent  1.938  1.738  2.161    0.000
## 35    suic.att           home.parents3Other  1.883  1.523  2.329    0.000
## 36    suic.att       Ma_eduSecondary School  1.274  1.137  1.428    0.000
## 37    suic.att         Ma_eduPrimary School  1.666  1.354  2.050    0.000
## 38    suic.att Ma_eduDont know/Doesnt apply  1.116  0.966  1.290    0.135
## 39    suic.att      covid.periodCOVID-years  1.169  1.060  1.289    0.002

(Q4) Differences in the “COVID” period (2020, 2021) vs non-COVID years

First, look at changes post-COVID. Merge the two “COVID” timepoints together (2020, 2021) and the two post-COVID timepoints (2022, 2023) to simplify comparison and avoid regional bias.

dvlist <- c("badMH", "selfharm.x5", "suic.att")

trend <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ survey.period 
                  + gender3 + age.group  + 
                  + home.parents3 +  Ma_edu
                  + (1|munic.area/school.id),
                list(i = as.name(x)))), 
              data = py[py$survey.year>2018,],
              family=binomial() )} )

trends_ORs<- c()
trend.prob <- c()
trend.plots <- vector("list", length = 3)
for (DV in 1:3) {
  
time.coef <- as.data.frame(summary(trend[[DV]])[["coefficients"]])
time.coef <- time.coef[2,]
time.coef$CI.low <- time.coef$Estimate - (1.96*time.coef$`Std. Error`)
time.coef$CI.hi <- time.coef$Estimate + (1.96*time.coef$`Std. Error`)
time.coef <- move_columns(time.coef, c(CI.low, CI.hi), .after="Estimate")
time.coef[,1:3] <- exp(time.coef[,1:3])
names(time.coef) <- c("OR", "CI.low", "CI.hi", "SE_logodds", "z", "p")
time.coef <- round(time.coef, 3)
time.coef$DV <- dvlist[DV]
time.coef$fixef <- rownames(time.coef)

trends_ORs <- rbind(trends_ORs, time.coef)


pred.prob <- data.frame(effect("survey.period", trend[[DV]],
                       se=TRUE, confidence.level = .95, type="response"))
pred.prob$DV <- dvlist[DV]
trend.prob <- rbind(trend.prob, pred.prob)

upper=.30
trend.plots[[DV]]<-ggplot(pred.prob, aes(x=survey.period, y=fit,ymin=lower, ymax=upper))+
  geom_col(alpha=0.5, fill="black")+
  geom_errorbar(width=0.06, colour="black")+
  geom_text(stat="identity", aes(label= paste0(round(fit*100, digits=0), "%")),  size=4, hjust=-0.3, vjust=-0.7)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
                     name="")+
  theme_classic()+
  theme(axis.text.x = element_text(size=11),
        axis.text.y = element_text(size=12),
        axis.title.x=element_blank(),
        plot.title = element_text(face="bold", hjust=0.5))


}

# plot predicted probability (& 95% cis of each outcome for 2020/1 and 2022/3)
ggarrange(trend.plots[[1]], trend.plots[[2]],trend.plots[[3]],
          nrow=1, ncol=3)

For next analysis, limit dataset to the West (has before, during and after COVID timepoints). This includes 15,085 individuals from 77 schools in the West of Ireland (including an urban centre, Galway city [16% of participants]).

pyw <- py[py$region=="West",]

# check demographics across years in the west
table1(~gender3 + age.group + ethnicity3 + grade + munic.area + home.parents3 + home.language + Ma_edu | survey.year, data=pyw, overall=F)

## Warning in table1.formula(~gender3 + age.group + ethnicity3 + grade +
## munic.area + : Terms to the right of '|' in formula 'x' define table columns
## and are expected to be factors with meaningful labels.

	2018 (N=4722)	2020 (N=4828)	2022 (N=5122)
gender3
Male	2337 (49.5%)	2360 (48.9%)	2505 (48.9%)
Female	2358 (49.9%)	2374 (49.2%)	2469 (48.2%)
Other	0 (0%)	77 (1.6%)	139 (2.7%)
Missing	27 (0.6%)	17 (0.4%)	9 (0.2%)
age.group
16	3025 (64.1%)	3094 (64.1%)	3109 (60.7%)
15	1439 (30.5%)	1341 (27.8%)	1452 (28.3%)
17-19	240 (5.1%)	378 (7.8%)	560 (10.9%)
Missing	18 (0.4%)	15 (0.3%)	1 (0.0%)
ethnicity3
White Irish	0 (0%)	4077 (84.4%)	3976 (77.6%)
White non-Irish	0 (0%)	408 (8.5%)	525 (10.3%)
Black, Asian or other minority ethnicity	0 (0%)	313 (6.5%)	613 (12.0%)
Not Asked	4722 (100%)	0 (0%)	0 (0%)
Missing	0 (0%)	30 (0.6%)	8 (0.2%)
grade
4th Year	3288 (69.6%)	3546 (73.4%)	3743 (73.1%)
5th Year	1382 (29.3%)	1176 (24.4%)	1253 (24.5%)
LCA/QQI	0 (0%)	83 (1.7%)	57 (1.1%)
6th Year	10 (0.2%)	0 (0%)	61 (1.2%)
Missing	42 (0.9%)	23 (0.5%)	8 (0.2%)
munic.area
North Dublin	0 (0%)	0 (0%)	0 (0%)
Cavan	0 (0%)	0 (0%)	0 (0%)
Monaghan	0 (0%)	0 (0%)	0 (0%)
Mayo	1463 (31.0%)	1479 (30.6%)	1478 (28.9%)
Roscommon	511 (10.8%)	629 (13.0%)	672 (13.1%)
Galway County	1975 (41.8%)	1966 (40.7%)	2097 (40.9%)
Galway City	773 (16.4%)	754 (15.6%)	875 (17.1%)
home.parents3
Two Parents	3749 (79.4%)	3845 (79.6%)	3973 (77.6%)
One Parent	871 (18.4%)	834 (17.3%)	825 (16.1%)
Other	84 (1.8%)	126 (2.6%)	312 (6.1%)
Missing	18 (0.4%)	23 (0.5%)	12 (0.2%)
home.language
English only	3894 (82.5%)	3874 (80.2%)	3669 (71.6%)
English and another	621 (13.2%)	725 (15.0%)	978 (19.1%)
Non-English language only	193 (4.1%)	209 (4.3%)	462 (9.0%)
Missing	14 (0.3%)	20 (0.4%)	13 (0.3%)
Ma_edu
College or University	2415 (51.1%)	2482 (51.4%)	3380 (66.0%)
Secondary School	1188 (25.2%)	1080 (22.4%)	1233 (24.1%)
Primary School	337 (7.1%)	261 (5.4%)	94 (1.8%)
Dont know/Doesnt apply	751 (15.9%)	949 (19.7%)	402 (7.8%)
Missing	31 (0.7%)	56 (1.2%)	13 (0.3%)

# run multivariate models  
pyw$year <- as.factor(pyw$survey.year)
covid.eff <- c()

# model1: bad mental health
covid_mlm1 <- glmer(badMH.f ~ year 
                   + gender3 + age.group  + 
                  + home.parents3 +  Ma_edu
                 + (1|munic.area/school.id),  
                 data=pyw, family=binomial())
summary(covid_mlm1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: badMH.f ~ year + gender3 + age.group + +home.parents3 + Ma_edu +  
##     (1 | munic.area/school.id)
##    Data: pyw
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   12659.4   12765.4   -6315.7   12631.4     14273 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4835 -0.4875 -0.3906 -0.3092  3.7031 
## 
## Random effects:
##  Groups               Name        Variance Std.Dev.
##  school.id:munic.area (Intercept) 0.020575 0.14344 
##  munic.area           (Intercept) 0.007042 0.08392 
## Number of obs: 14287, groups:  school.id:munic.area, 75; munic.area, 4
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -2.26821    0.07689 -29.500  < 2e-16 ***
## year2020                      0.27279    0.05513   4.948 7.48e-07 ***
## year2022                     -0.11750    0.05919  -1.985  0.04712 *  
## gender3Female                 0.74823    0.04980  15.023  < 2e-16 ***
## gender3Other                  2.07569    0.14881  13.948  < 2e-16 ***
## age.group15                  -0.01350    0.05116  -0.264  0.79180    
## age.group17-19                0.17459    0.08364   2.087  0.03684 *  
## home.parents3One Parent       0.55617    0.05493  10.124  < 2e-16 ***
## home.parents3Other            0.11840    0.12382   0.956  0.33897    
## Ma_eduSecondary School        0.17663    0.05473   3.228  0.00125 ** 
## Ma_eduPrimary School          0.32171    0.10079   3.192  0.00141 ** 
## Ma_eduDont know/Doesnt apply  0.06144    0.06914   0.889  0.37415    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) yr2020 yr2022 gndr3F gndr3O ag.g15 a.17-1 hm.3OP hm.p3O
## year2020    -0.402                                                        
## year2022    -0.406  0.518                                                 
## gender3Feml -0.402  0.004  0.007                                          
## gender3Othr -0.080 -0.059 -0.125  0.202                                   
## age.group15 -0.206  0.017  0.012 -0.048  0.002                            
## ag.grp17-19 -0.090 -0.031 -0.082  0.020 -0.003  0.193                     
## hm.prnts3OP -0.159  0.025  0.009 -0.001  0.005 -0.015 -0.035              
## hm.prnts3Ot -0.014 -0.015 -0.093 -0.038 -0.020 -0.084 -0.051  0.108       
## M_dScndrySc -0.249  0.026  0.050  0.002  0.002  0.027 -0.027 -0.031  0.033
## M_dPrmrySch -0.159  0.041  0.134 -0.003 -0.019  0.001 -0.052 -0.056 -0.010
## M_dDnknw/Da -0.234 -0.037  0.117  0.105 -0.009  0.005 -0.034 -0.075 -0.034
##             M_dScS M_dPrS
## year2020                 
## year2022                 
## gender3Feml              
## gender3Othr              
## age.group15              
## ag.grp17-19              
## hm.prnts3OP              
## hm.prnts3Ot              
## M_dScndrySc              
## M_dPrmrySch  0.188       
## M_dDnknw/Da  0.265  0.166

adj.cov <- summary(covid_mlm1)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("badMH_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)


# model3: frequent self-harm 
covid_mlm2 <- glmer(selfharm.x5 ~ year 
                    + gender3 + age.group  + 
                  + home.parents4 +  Ma_edu
                 + (1|munic.area/school.id),  
                 data=pyw, family=binomial())
summary(covid_mlm2)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: selfharm.x5 ~ year + gender3 + age.group + +home.parents4 + Ma_edu +  
##     (1 | munic.area/school.id)
##    Data: pyw
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    9093.0    9206.3   -4531.5    9063.0     14099 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6662 -0.3820 -0.2853 -0.2195  5.4269 
## 
## Random effects:
##  Groups               Name        Variance Std.Dev.
##  school.id:munic.area (Intercept) 0.055250 0.23505 
##  munic.area           (Intercept) 0.003843 0.06199 
## Number of obs: 14114, groups:  school.id:munic.area, 75; munic.area, 4
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -3.13974    0.09174 -34.223  < 2e-16 ***
## year2020                      0.15718    0.07106   2.212  0.02696 *  
## year2022                      0.23583    0.07196   3.277  0.00105 ** 
## gender3Female                 1.04103    0.06555  15.882  < 2e-16 ***
## gender3Other                  2.65872    0.15514  17.137  < 2e-16 ***
## age.group15                  -0.05246    0.06343  -0.827  0.40818    
## age.group17-19                0.19923    0.10055   1.981  0.04754 *  
## home.parents41 Parent         0.58047    0.07438   7.804 5.99e-15 ***
## home.parents41 Parent +other  0.88284    0.11044   7.994 1.31e-15 ***
## home.parents4Other            0.38367    0.13697   2.801  0.00509 ** 
## Ma_eduSecondary School        0.01913    0.06875   0.278  0.78081    
## Ma_eduPrimary School          0.48236    0.11796   4.089 4.33e-05 ***
## Ma_eduDont know/Doesnt apply  0.06444    0.08652   0.745  0.45642    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

adj.cov <- summary(covid_mlm2)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("selfharmX5_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)

# model4: suicide attempt
covid_mlm3 <- glmer(suic.att ~ year 
                    + gender3 + age.group  + 
                  + home.parents4 +  Ma_edu
                 + (1|munic.area/school.id),  
                 data=pyw, family=binomial())
summary(covid_mlm3)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: suic.att ~ year + gender3 + age.group + +home.parents4 + Ma_edu +  
##     (1 | munic.area/school.id)
##    Data: pyw
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    7708.3    7821.6   -3839.2    7678.3     14057 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1402 -0.3164 -0.2687 -0.2111  5.9121 
## 
## Random effects:
##  Groups               Name        Variance Std.Dev.
##  school.id:munic.area (Intercept) 0.055148 0.23484 
##  munic.area           (Intercept) 0.002273 0.04767 
## Number of obs: 14072, groups:  school.id:munic.area, 75; munic.area, 4
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -3.17296    0.09774 -32.464  < 2e-16 ***
## year2020                      0.13833    0.07767   1.781 0.074923 .  
## year2022                      0.02825    0.08104   0.349 0.727385    
## gender3Female                 0.68287    0.07000   9.756  < 2e-16 ***
## gender3Other                  1.71715    0.17671   9.717  < 2e-16 ***
## age.group15                  -0.10420    0.07201  -1.447 0.147908    
## age.group17-19                0.35216    0.10609   3.319 0.000902 ***
## home.parents41 Parent         0.64218    0.08140   7.890 3.03e-15 ***
## home.parents41 Parent +other  0.83300    0.12183   6.837 8.08e-12 ***
## home.parents4Other            0.66865    0.14295   4.678 2.90e-06 ***
## Ma_eduSecondary School        0.15098    0.07582   1.991 0.046431 *  
## Ma_eduPrimary School          0.48145    0.12890   3.735 0.000188 ***
## Ma_eduDont know/Doesnt apply  0.12006    0.09461   1.269 0.204433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

adj.cov <- summary(covid_mlm3)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("suicatt_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)

covid.eff <- as.data.frame(covid.eff)

covid.eff$OR <- exp(covid.eff$Estimate)
covid.eff$CI.low<-exp(covid.eff$Estimate - (1.96*covid.eff$`Std. Error`))
covid.eff$CI.hi<-exp(covid.eff$Estimate + (1.96*covid.eff$`Std. Error`))
covid.eff <- round(covid.eff, 3)

# plot model-derived probability of each outcome before, during & after COVID (West data only)


predict1 = as.data.frame(effect("year", covid_mlm1,  
                       se=TRUE, confidence.level = .95
                      # typical="response", type="response" # these don't make a difference here it seems
                      ))
predict1$outcome <- "badMH"


predict2 = as.data.frame(effect("year", covid_mlm2,  
                       se=TRUE, confidence.level = .95
                      ))
predict2$outcome <- "self-harmX5"

predict3 = as.data.frame(effect("year", covid_mlm3,  
                       se=TRUE, confidence.level = .95
                      ))
predict3$outcome <- "suic.att"

predict <- rbind(predict1, predict2, predict3)
predict$year <- as.numeric(as.character(predict$year))

ggplot(data=predict, aes(x=year, y=fit, group=outcome, colour=outcome))+
  geom_smooth(method="loess", se=T)+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0.1)+
  labs(title="% of teens reporting various mental health outcomes", 
       x="Year of survey")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,.35),
                     name="Predicted Probability")+
  theme_bw()+
  theme(panel.grid.minor.x=element_blank())

PY National Harmonisation - mental health trends

Niamh Dooley

2024-08-22